1 #include <sys/modctl.h> 2 #include <sys/ddi.h> 3 #include <sys/sunddi.h> 4 #include <sys/conf.h> 5 #include <sys/devops.h> 6 #include <sys/stat.h> 7 #include <sys/fs/zev.h> 8 #include <sys/zev_callbacks.h> 9 #include <sys/zev_checksums.h> 10 #include <sys/zfs_znode.h> 11 #include <sys/time.h> 12 #include <sys/sa.h> 13 #include <sys/zap.h> 14 #include <sys/time.h> 15 16 #define OFFSETOF(s, m) ((size_t)(&(((s *)0)->m))) 17 18 #define ZEV_DEFAULT_QUEUE_NAME "beaver" 19 #define ZEV_CONTROL_DEVICE_MINOR 0 20 #define ZEV_MINOR_MIN (ZEV_CONTROL_DEVICE_MINOR + 1) 21 #define ZEV_MINOR_MAX (ZEV_MINOR_MIN + ZEV_MAX_QUEUES - 1) 22 23 typedef struct zev_queue { 24 char zq_name[ZEV_MAX_QUEUE_NAME_LEN+1]; 25 minor_t zq_minor_number; 26 dev_info_t *zq_dip; 27 struct pollhead zq_pollhead; 28 uint64_t zq_bytes_read; 29 uint64_t zq_events_read; 30 uint64_t zq_bytes_discarded; 31 uint64_t zq_events_discarded; 32 uint64_t zq_bytes_total; 33 uint64_t zq_events_total; 34 uint64_t zq_wakeup_threshold; 35 uint16_t zq_flags; 36 uint16_t zq_need_wakeup; 37 /* protected by zev_mutex */ 38 int zq_refcnt; 39 uint64_t zq_queue_len; 40 uint64_t zq_queue_messages; 41 uint64_t zq_max_queue_len; 42 zev_msg_t *zq_oldest; 43 boolean_t zq_busy; 44 boolean_t zq_to_be_removed; 45 zev_statistics_t zq_statistics; 46 kcondvar_t zq_condvar; 47 } zev_queue_t; 48 49 static void *statep; 50 struct pollhead zev_pollhead; 51 52 kmutex_t zev_mutex; 53 kcondvar_t zev_condvar; 54 kmutex_t zev_queue_msg_mutex; 55 krwlock_t zev_pool_list_rwlock; 56 static zev_statistics_t zev_statistics; 57 static boolean_t zev_attached; 58 static kmutex_t zev_mark_id_mutex; 59 static uint64_t zev_mark_id = 0; 60 61 static uint64_t zev_msg_sequence_number = 0; 62 static zev_queue_t *zev_queues[ZEV_MAX_QUEUES]; 63 static int zev_queue_cnt = 0; 64 65 uint64_t zev_memory_allocated = 0; 66 uint64_t zev_memory_freed = 0; 67 68 /* 69 * The longest potential message is from zev_zfs_mount() and 70 * contains the mountpoint, which might be close to MAXPATHLEN bytes long. 71 * 72 * Another candidate is zev_znode_rename_cb() and contains three inode 73 * numbers and two filenames of up to MAXNAMELEN bytes each. 74 */ 75 #define ZEV_MAX_MESSAGE_LEN 4096 76 77 static zev_msg_t *zev_queue_head = NULL; 78 static zev_msg_t *zev_queue_tail = NULL; 79 static uint64_t zev_queue_len = 0; 80 81 82 typedef struct zev_pool_list_entry { 83 struct zev_pool_list_entry *next; 84 char name[MAXPATHLEN]; 85 } zev_pool_list_entry_t; 86 87 static zev_pool_list_entry_t *zev_muted_pools_head = NULL; 88 89 static volatile int zev_wakeup_thread_run = 1; 90 static kthread_t *zev_poll_wakeup_thread = NULL; 91 92 void * 93 zev_alloc(ssize_t sz) 94 { 95 ZEV_MEM_ADD(sz); 96 return kmem_alloc(sz, KM_SLEEP); 97 } 98 99 void * 100 zev_zalloc(ssize_t sz) 101 { 102 ZEV_MEM_ADD(sz); 103 return kmem_zalloc(sz, KM_SLEEP); 104 } 105 106 void 107 zev_free(void *ptr, ssize_t sz) 108 { 109 ZEV_MEM_SUB(sz); \ 110 kmem_free(ptr, sz); 111 } 112 113 int 114 zev_queue_cmp(const void *a, const void *b) 115 { 116 const zev_queue_t *qa = a; 117 const zev_queue_t *qb = b; 118 if (qa->zq_minor_number > qb->zq_minor_number) 119 return 1; 120 if (qa->zq_minor_number < qb->zq_minor_number) 121 return -1; 122 return 0; 123 } 124 125 /* must be called with zev_mutex held */ 126 void 127 zev_queue_trim(void) 128 { 129 zev_msg_t *m; 130 uint64_t oldest_message; 131 zev_queue_t *q; 132 int i; 133 134 if (!zev_queue_tail) 135 return; 136 137 oldest_message = zev_queue_tail->seq + 1; /* does not exist, yet. */ 138 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 139 q = zev_queues[i - ZEV_MINOR_MIN]; 140 if (q == NULL) 141 continue; 142 if (!q->zq_oldest) 143 continue; 144 if (oldest_message > q->zq_oldest->seq) 145 oldest_message = q->zq_oldest->seq; 146 } 147 148 /* remove msgs between oldest_message and zev_queue_head */ 149 while(zev_queue_head && (oldest_message > zev_queue_head->seq)) { 150 m = zev_queue_head; 151 zev_queue_head = m->next; 152 if (zev_queue_head == NULL) { 153 zev_queue_tail = NULL; 154 } else { 155 zev_queue_head->prev = NULL; 156 } 157 if (m->read == 0) { 158 zev_statistics.zev_bytes_discarded += m->size; 159 zev_statistics.zev_cnt_discarded_events++; 160 } 161 zev_statistics.zev_queue_len -= m->size; 162 zev_queue_len--; 163 zev_free(m, sizeof(*m) + m->size); 164 } 165 } 166 167 /* must be called with zev_mutex held */ 168 static void 169 zev_queue_hold(zev_queue_t *q) 170 { 171 q->zq_refcnt++; 172 } 173 174 /* must be called with zev_mutex held */ 175 static void 176 zev_queue_release(zev_queue_t *q) 177 { 178 q->zq_refcnt--; 179 if (q->zq_refcnt > 0) 180 return; 181 182 ASSERT(q->zq_busy == B_FALSE); 183 184 /* persistent queues will not be removed */ 185 if ((q->zq_flags & ZEV_FL_PERSISTENT) != 0) 186 return; 187 188 /* remove queue from queue list */ 189 zev_queues[q->zq_minor_number - ZEV_MINOR_MIN] = NULL; 190 191 /* discard messages that no queue references anymore */ 192 zev_queue_trim(); 193 194 cv_destroy(&q->zq_condvar); 195 ddi_remove_minor_node(q->zq_dip, q->zq_name); 196 ddi_soft_state_free(statep, q->zq_minor_number); 197 ZEV_MEM_SUB(sizeof(zev_queue_t)); 198 zev_queue_cnt--; 199 } 200 201 int 202 zev_queue_new(zev_queue_t **queue, 203 dev_info_t *dip, 204 char *name, 205 uint64_t max_queue_len, 206 uint16_t flags) 207 { 208 zev_queue_t *q; 209 zev_queue_t *tmp; 210 zev_msg_t *msg; 211 int name_exists = 0; 212 minor_t minor; 213 char *p; 214 int i; 215 216 if (max_queue_len > ZEV_MAX_QUEUE_LEN) 217 return EINVAL; 218 if (max_queue_len == 0) 219 max_queue_len = ZEV_MAX_QUEUE_LEN; 220 if (!strcmp(name, ZEV_CONTROL_DEVICE_NAME)) 221 return EINVAL; 222 for (p = name; *p; p++) { 223 if (*p >= 'a' && *p <= 'z') 224 continue; 225 if (*p >= '0' && *p <= '9') 226 continue; 227 if (*p == '.') 228 continue; 229 return EINVAL; 230 } 231 232 mutex_enter(&zev_mutex); 233 234 /* find free minor number.*/ 235 /* if this were a frequent operation we'd have a free-minor list */ 236 for (minor = ZEV_MINOR_MIN; minor <= ZEV_MINOR_MAX; minor++) { 237 tmp = zev_queues[minor - ZEV_MINOR_MIN]; 238 if (tmp == NULL) 239 break; 240 } 241 if (tmp) { 242 mutex_exit(&zev_mutex); 243 return ENOSPC; 244 } 245 246 if (ddi_soft_state_zalloc(statep, minor) != DDI_SUCCESS) { 247 mutex_exit(&zev_mutex); 248 return ENOSPC; 249 } 250 ZEV_MEM_ADD(sizeof(zev_queue_t)); 251 252 q = ddi_get_soft_state(statep, minor); 253 memset(q, 0, sizeof(*q)); 254 strncpy(q->zq_name, name, ZEV_MAX_QUEUE_NAME_LEN); 255 q->zq_name[ZEV_MAX_QUEUE_NAME_LEN] = '\0'; 256 q->zq_max_queue_len = max_queue_len; 257 q->zq_wakeup_threshold = ZEV_DEFAULT_POLL_WAKEUP_QUEUE_LEN; 258 q->zq_flags = flags; 259 q->zq_refcnt = 1; 260 q->zq_dip = dip; 261 q->zq_minor_number = minor; 262 cv_init(&q->zq_condvar, NULL, CV_DRIVER, NULL); 263 264 /* insert into queue list */ 265 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 266 /* if this were a frequent operation we'd have a name tree */ 267 if (zev_queues[i - ZEV_MINOR_MIN] == NULL) 268 continue; 269 if (!strcmp(q->zq_name, zev_queues[i-ZEV_MINOR_MIN]->zq_name)) { 270 name_exists = 1; 271 break; 272 } 273 } 274 if (name_exists) { 275 ddi_soft_state_free(statep, minor); 276 ZEV_MEM_SUB(sizeof(zev_queue_t)); 277 mutex_exit(&zev_mutex); 278 return EEXIST; 279 } 280 zev_queues[minor - ZEV_MINOR_MIN] = q; 281 zev_queue_cnt++; 282 283 /* calculate current queue len and find head and tail */ 284 q->zq_oldest = zev_queue_tail; 285 msg = zev_queue_tail; 286 while ((msg != NULL) && (q->zq_queue_len < q->zq_max_queue_len)) { 287 q->zq_queue_len += msg->size; 288 q->zq_queue_messages++; 289 q->zq_oldest = msg; 290 msg = msg->prev; 291 } 292 293 mutex_exit(&zev_mutex); 294 295 if (ddi_create_minor_node(dip, name, 296 S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 297 mutex_enter(&zev_mutex); 298 zev_queues[minor - ZEV_MINOR_MIN] = NULL; 299 zev_queue_cnt--; 300 ddi_soft_state_free(statep, minor); 301 ZEV_MEM_SUB(sizeof(zev_queue_t)); 302 mutex_exit(&zev_mutex); 303 return EFAULT; 304 } 305 306 *queue = q; 307 return 0; 308 } 309 310 /* 311 * poll() wakeup thread. Used to check periodically whether we have 312 * bytes left in the queue that have not yet been made into a 313 * pollwakeup() call. This is meant to insure a maximum waiting 314 * time until an event is presented as a poll wakeup, while at 315 * the same time not making every single event into a poll wakeup 316 * of it's own. 317 */ 318 319 static void 320 zev_poll_wakeup(boolean_t flush_all) 321 { 322 zev_queue_t *q; 323 int i; 324 325 /* 326 * This loop works with hold() and release() because 327 * pollwakeup() requires us to release our locks before calling it. 328 * 329 * from pollwakeup(9F): 330 * 331 * "Driver defined locks should not be held across calls 332 * to this function." 333 */ 334 335 /* wake up threads for each individual queue */ 336 mutex_enter(&zev_mutex); 337 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 338 q = zev_queues[i - ZEV_MINOR_MIN]; 339 if (q == NULL) 340 continue; 341 if (!q->zq_busy) 342 continue; 343 if (!q->zq_queue_len) 344 continue; 345 if ((flush_all) || 346 (q->zq_queue_len > q->zq_wakeup_threshold)) { 347 zev_queue_hold(q); 348 mutex_exit(&zev_mutex); 349 pollwakeup(&q->zq_pollhead, POLLIN); 350 mutex_enter(&zev_mutex); 351 zev_queue_release(q); 352 } 353 } 354 mutex_exit(&zev_mutex); 355 } 356 357 static void 358 zev_poll_wakeup_thread_main(void) 359 { 360 while (zev_wakeup_thread_run) { 361 delay(drv_usectohz(100 * 1000)); /* sleep 100ms */ 362 363 zev_poll_wakeup(B_TRUE); 364 } 365 thread_exit(); 366 } 367 368 static int 369 zev_ioc_mute_pool(char *poolname) 370 { 371 zev_pool_list_entry_t *pe; 372 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 373 /* pool already muted? */ 374 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 375 if (!strcmp(pe->name, poolname)) { 376 rw_exit(&zev_pool_list_rwlock); 377 return EEXIST; 378 } 379 } 380 pe = zev_zalloc(sizeof(*pe)); 381 if (!pe) { 382 rw_exit(&zev_pool_list_rwlock); 383 return ENOMEM; 384 } 385 (void) strncpy(pe->name, poolname, sizeof(pe->name)); 386 pe->next = zev_muted_pools_head; 387 zev_muted_pools_head = pe; 388 rw_exit(&zev_pool_list_rwlock); 389 return (0); 390 } 391 392 static int 393 zev_ioc_unmute_pool(char *poolname) 394 { 395 zev_pool_list_entry_t *pe, *peprev; 396 397 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 398 /* pool muted? */ 399 peprev = NULL; 400 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 401 if (!strcmp(pe->name, poolname)) 402 break; 403 peprev = pe; 404 } 405 if (pe) { 406 rw_exit(&zev_pool_list_rwlock); 407 return ENOENT; 408 } 409 410 if (peprev != NULL) { 411 peprev->next = pe->next; 412 } else { 413 zev_muted_pools_head = pe->next; 414 } 415 zev_free(pe, sizeof(*pe)); 416 rw_exit(&zev_pool_list_rwlock); 417 return (0); 418 } 419 420 int 421 zev_skip_pool(objset_t *os) 422 { 423 zev_pool_list_entry_t *pe; 424 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; 425 rw_enter(&zev_pool_list_rwlock, RW_READER); 426 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 427 if (!strcmp(pe->name, dp->dp_spa->spa_name)) { 428 rw_exit(&zev_pool_list_rwlock); 429 return 1; 430 } 431 } 432 rw_exit(&zev_pool_list_rwlock); 433 return 0; 434 } 435 436 static void 437 zev_update_statistics(int op, zev_statistics_t *stat) 438 { 439 switch (op) { 440 case ZEV_OP_ERROR: 441 stat->zev_cnt_errors++; 442 break; 443 case ZEV_OP_MARK: 444 stat->zev_cnt_marks++; 445 break; 446 case ZEV_OP_ZFS_MOUNT: 447 stat->zev_cnt_zfs_mount++; 448 break; 449 case ZEV_OP_ZFS_UMOUNT: 450 stat->zev_cnt_zfs_umount++; 451 break; 452 case ZEV_OP_ZVOL_WRITE: 453 stat->zev_cnt_zvol_write++; 454 break; 455 case ZEV_OP_ZVOL_TRUNCATE: 456 stat->zev_cnt_zvol_truncate++; 457 break; 458 case ZEV_OP_ZNODE_CLOSE_AFTER_UPDATE: 459 stat->zev_cnt_znode_close_after_update++; 460 break; 461 case ZEV_OP_ZNODE_CREATE: 462 stat->zev_cnt_znode_create++; 463 break; 464 case ZEV_OP_ZNODE_REMOVE: 465 stat->zev_cnt_znode_remove++; 466 break; 467 case ZEV_OP_ZNODE_LINK: 468 stat->zev_cnt_znode_link++; 469 break; 470 case ZEV_OP_ZNODE_SYMLINK: 471 stat->zev_cnt_znode_symlink++; 472 break; 473 case ZEV_OP_ZNODE_RENAME: 474 stat->zev_cnt_znode_rename++; 475 break; 476 case ZEV_OP_ZNODE_WRITE: 477 stat->zev_cnt_znode_write++; 478 break; 479 case ZEV_OP_ZNODE_TRUNCATE: 480 stat->zev_cnt_znode_truncate++; 481 break; 482 case ZEV_OP_ZNODE_SETATTR: 483 stat->zev_cnt_znode_setattr++; 484 break; 485 case ZEV_OP_ZNODE_ACL: 486 stat->zev_cnt_znode_acl++; 487 break; 488 } 489 } 490 491 void 492 zev_queue_message(int op, zev_msg_t *msg) 493 { 494 zev_queue_t *q; 495 int wakeup = 0; 496 zev_msg_t *m; 497 int i; 498 499 msg->next = NULL; 500 msg->prev = NULL; 501 msg->read = 0; 502 503 if (op < ZEV_OP_MIN || op > ZEV_OP_MAX) { 504 zev_queue_error(op, "unknown op id encountered: %d", op); 505 zev_free(msg, sizeof(*msg) + msg->size); 506 return; 507 } 508 509 /* 510 * This mutex protects us agains race conditions when several 511 * threads want to queue a message and one or more queues are 512 * full: we release zev_mutex to wait for the queues to become 513 * less-than-full, but we don't know in which order the waiting 514 * threads will be awoken. If it's not the same order in which 515 * they went to sleep we might mark different messages as "newest" 516 * in different queues, and so we might have dupes or even 517 * skip messages. 518 */ 519 mutex_enter(&zev_queue_msg_mutex); 520 521 mutex_enter(&zev_mutex); 522 523 /* 524 * When the module is loaded, the default behavior ist to 525 * put all events into a queue and block if the queue is full. 526 * This is done even before the pseudo device is attached. 527 * This way, no events are lost. 528 * 529 * To discard events entirely the "beaver" queue, 530 * which never discards anything, has to be removed. 531 */ 532 533 if (zev_queue_cnt == 0) { 534 mutex_exit(&zev_mutex); 535 mutex_exit(&zev_queue_msg_mutex); 536 return; 537 } 538 539 /* put message into global queue */ 540 msg->seq = zev_msg_sequence_number++; 541 while (zev_statistics.zev_max_queue_len && 542 zev_statistics.zev_queue_len >= zev_statistics.zev_max_queue_len) { 543 /* queue full. block until it's been shrunk. */ 544 cv_wait(&zev_condvar, &zev_mutex); 545 } 546 547 if (zev_queue_tail == NULL) { 548 zev_queue_head = zev_queue_tail = msg; 549 } else { 550 zev_queue_tail->next = msg; 551 msg->prev = zev_queue_tail; 552 zev_queue_tail = msg; 553 } 554 zev_queue_len++; 555 zev_statistics.zev_cnt_total_events++; 556 zev_statistics.zev_queue_len += msg->size; 557 558 /* update per-device queues */ 559 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 560 q = zev_queues[i - ZEV_MINOR_MIN]; 561 if (!q) 562 continue; 563 564 zev_queue_hold(q); 565 566 /* make sure queue has enough room */ 567 while (q->zq_max_queue_len && 568 q->zq_queue_len > q->zq_max_queue_len) { 569 570 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) { 571 /* block until queue has been shrunk. */ 572 cv_wait(&zev_condvar, &zev_mutex); 573 } else { 574 /* discard msgs until queue is small enough */ 575 while (q->zq_queue_len > q->zq_max_queue_len) { 576 m = q->zq_oldest; 577 if (m == NULL) 578 break; 579 q->zq_events_discarded++; 580 q->zq_bytes_discarded += m->size; 581 q->zq_oldest = m->next; 582 q->zq_queue_len -= m->size; 583 q->zq_queue_messages--; 584 } 585 } 586 } 587 588 /* register new message at the end of the queue */ 589 q->zq_queue_len += msg->size; 590 q->zq_queue_messages++; 591 q->zq_bytes_total += msg->size; 592 q->zq_events_total++; 593 if (q->zq_oldest == NULL) 594 q->zq_oldest = msg; 595 596 zev_update_statistics(op, &q->zq_statistics); 597 598 if (q->zq_queue_len > q->zq_wakeup_threshold) 599 wakeup = 1; 600 if (q->zq_queue_len == msg->size) /* queue was empty */ 601 cv_broadcast(&q->zq_condvar); 602 603 zev_queue_release(q); 604 } 605 606 zev_queue_trim(); 607 608 zev_update_statistics(op, &zev_statistics); 609 mutex_exit(&zev_mutex); 610 mutex_exit(&zev_queue_msg_mutex); 611 612 /* one or more queues need a pollwakeup() */ 613 if (op == ZEV_OP_MARK) { 614 zev_poll_wakeup(B_TRUE); 615 } else if (wakeup) { 616 zev_poll_wakeup(B_FALSE); 617 } 618 619 return; 620 } 621 622 void 623 zev_queue_error(int op, char *fmt, ...) 624 { 625 char buf[ZEV_MAX_MESSAGE_LEN]; 626 va_list ap; 627 int len; 628 zev_msg_t *msg = NULL; 629 zev_error_t *rec; 630 int msg_size; 631 632 va_start(ap, fmt); 633 len = vsnprintf(buf, sizeof(buf), fmt, ap); 634 va_end(ap); 635 if (len >= sizeof(buf)) { 636 cmn_err(CE_WARN, "zev: can't report error - " 637 "dropping event entirely."); 638 return; 639 } 640 641 msg_size = sizeof(*rec) + len + 1; 642 msg = zev_alloc(sizeof(*msg) + msg_size); 643 msg->size = msg_size; 644 rec = (zev_error_t *)(msg + 1); 645 rec->record_len = msg_size; 646 rec->op = ZEV_OP_ERROR; 647 rec->op_time = ddi_get_time(); 648 rec->guid = 0; 649 rec->failed_op = op; 650 rec->errstr_len = len; 651 (void) memcpy(ZEV_ERRSTR(rec), buf, len + 1); 652 653 zev_queue_message(ZEV_OP_ERROR, msg); 654 return; 655 } 656 657 static int 658 zev_find_queue(zev_queue_t **out, zev_queue_t *req_q, zev_queue_name_t *name) 659 { 660 char namebuf[ZEV_MAX_QUEUE_NAME_LEN+1]; 661 zev_queue_t *q; 662 int i; 663 664 *out = NULL; 665 666 if (name->zev_namelen == 0) { 667 if (req_q->zq_minor_number == ZEV_CONTROL_DEVICE_MINOR) 668 return EINVAL; 669 zev_queue_hold(req_q); 670 *out = req_q; 671 return 0; 672 } 673 674 if (name->zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 675 return EINVAL; 676 strncpy(namebuf, name->zev_name, name->zev_namelen); 677 namebuf[name->zev_namelen] = '\0'; 678 679 mutex_enter(&zev_mutex); 680 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 681 q = zev_queues[i - ZEV_MINOR_MIN]; 682 if (!q) 683 continue; 684 if (!strcmp(q->zq_name, namebuf)) { 685 zev_queue_hold(q); 686 mutex_exit(&zev_mutex); 687 *out = q; 688 return 0; 689 } 690 } 691 mutex_exit(&zev_mutex); 692 return ENOENT; 693 } 694 695 static int 696 zev_ioc_get_queue_statistics(zev_queue_t *req_q, intptr_t arg, int mode) 697 { 698 zev_ioctl_get_queue_statistics_t gs; 699 zev_queue_t *q; 700 int ret; 701 702 if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0) 703 return EFAULT; 704 705 ret = zev_find_queue(&q, req_q, &gs.zev_queue_name); 706 if (ret) 707 return ret; 708 709 /* ddi_copyout() can take a long time. Better make 710 a copy to be able to release the mutex faster. */ 711 mutex_enter(&zev_mutex); 712 memcpy(&gs.zev_statistics, &q->zq_statistics,sizeof(gs.zev_statistics)); 713 gs.zev_statistics.zev_queue_len = q->zq_queue_len; 714 gs.zev_statistics.zev_bytes_read = q->zq_bytes_read; 715 gs.zev_statistics.zev_bytes_discarded = q->zq_bytes_discarded; 716 gs.zev_statistics.zev_max_queue_len = q->zq_max_queue_len; 717 gs.zev_statistics.zev_cnt_discarded_events = q->zq_events_discarded; 718 gs.zev_statistics.zev_cnt_total_events = q->zq_events_total; 719 zev_queue_release(q); 720 mutex_exit(&zev_mutex); 721 722 if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0) 723 return EFAULT; 724 return 0; 725 } 726 727 static int 728 zev_ioc_set_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 729 { 730 zev_ioctl_set_queue_properties_t qp; 731 zev_queue_t *q; 732 uint64_t old_max; 733 uint64_t old_flags; 734 int ret; 735 736 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 737 return EFAULT; 738 if (qp.zev_max_queue_len > ZEV_MAX_QUEUE_LEN) 739 return EINVAL; 740 if (qp.zev_poll_wakeup_threshold > ZEV_MAX_POLL_WAKEUP_QUEUE_LEN) 741 return EINVAL; 742 743 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 744 if (ret) 745 return ret; 746 747 mutex_enter(&zev_mutex); 748 749 /* 750 * Note: if the PERSISTENT flag is cleared, and the queue is not busy, 751 * the queue should be removed by zev_queue_release() in zev_ioctl(). 752 */ 753 old_flags = qp.zev_flags; 754 q->zq_flags = qp.zev_flags; 755 if ((old_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) && 756 (!(qp.zev_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL))) { 757 /* queue is no longer blocking - wake blocked threads */ 758 cv_broadcast(&zev_condvar); 759 } 760 761 old_max = q->zq_max_queue_len; 762 q->zq_max_queue_len = qp.zev_max_queue_len; 763 if (q->zq_max_queue_len < old_max) 764 zev_queue_trim(); 765 if (q->zq_max_queue_len > old_max) 766 cv_broadcast(&zev_condvar); /* threads may be waiting */ 767 768 if ((qp.zev_poll_wakeup_threshold < q->zq_wakeup_threshold) && 769 (qp.zev_poll_wakeup_threshold <= q->zq_queue_len)) 770 pollwakeup(&q->zq_pollhead, POLLIN); 771 q->zq_wakeup_threshold = qp.zev_poll_wakeup_threshold; 772 773 zev_queue_release(q); 774 mutex_exit(&zev_mutex); 775 return 0; 776 } 777 778 static int 779 zev_ioc_get_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 780 { 781 zev_ioctl_get_queue_properties_t qp; 782 zev_queue_t *q; 783 int ret; 784 785 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 786 return EFAULT; 787 788 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 789 if (ret) 790 return ret; 791 792 mutex_enter(&zev_mutex); 793 qp.zev_max_queue_len = q->zq_max_queue_len; 794 qp.zev_flags = q->zq_flags; 795 qp.zev_poll_wakeup_threshold = q->zq_wakeup_threshold; 796 zev_queue_release(q); 797 mutex_exit(&zev_mutex); 798 799 if (ddi_copyout(&qp, (void *)arg, sizeof(qp), mode) != 0) 800 return EFAULT; 801 return 0; 802 } 803 804 static int 805 zev_ioc_add_queue(zev_queue_t *req_q, intptr_t arg, int mode) 806 { 807 zev_ioctl_add_queue_t aq; 808 zev_queue_t *new_q; 809 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 810 811 if (ddi_copyin((void *)arg, &aq, sizeof(aq), mode) != 0) 812 return EFAULT; 813 814 if (aq.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 815 return EINVAL; 816 strncpy(name, aq.zev_name, aq.zev_namelen); 817 name[aq.zev_namelen] = '\0'; 818 819 return zev_queue_new(&new_q, req_q->zq_dip, name, 820 aq.zev_max_queue_len, aq.zev_flags); 821 } 822 823 static int 824 zev_ioc_remove_queue(zev_queue_t *req_q, intptr_t arg, int mode) 825 { 826 zev_ioctl_remove_queue_t rq; 827 zev_queue_t *q; 828 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 829 int found = 0; 830 int i; 831 832 if (ddi_copyin((void *)arg, &rq, sizeof(rq), mode) != 0) 833 return EFAULT; 834 835 if (rq.zev_queue_name.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 836 return EINVAL; 837 strncpy(name, rq.zev_queue_name.zev_name, 838 rq.zev_queue_name.zev_namelen); 839 name[rq.zev_queue_name.zev_namelen] = '\0'; 840 841 mutex_enter(&zev_mutex); 842 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 843 q = zev_queues[i - ZEV_MINOR_MIN]; 844 if (!q) 845 continue; 846 if (!strcmp(q->zq_name, name)) { 847 found = 1; 848 break; 849 } 850 } 851 if (!found) { 852 mutex_exit(&zev_mutex); 853 return ENOENT; 854 } 855 856 if (q->zq_busy) { 857 mutex_exit(&zev_mutex); 858 return EBUSY; 859 } 860 /* 861 * clear flags, so that persistent queues are removed aswell 862 * and the queue becomes non-blocking. 863 */ 864 q->zq_flags = 0; 865 if (q->zq_to_be_removed == B_FALSE) { 866 q->zq_to_be_removed = B_TRUE; 867 zev_queue_release(q); 868 } 869 /* some threads might be waiting for this queue to become writable */ 870 cv_broadcast(&zev_condvar); 871 872 mutex_exit(&zev_mutex); 873 return 0; 874 } 875 876 static int 877 zev_ioc_get_debug_info(zev_queue_t *req_q, intptr_t arg, int mode) 878 { 879 zev_ioctl_debug_info_t di; 880 uint64_t mem_allocated = atomic_add_64_nv(&zev_memory_allocated, 0); 881 uint64_t mem_freed = atomic_add_64_nv(&zev_memory_freed, 0); 882 883 zev_chksum_stats(&di.zev_chksum_cache_size, 884 &di.zev_chksum_cache_hits, 885 &di.zev_chksum_cache_misses); 886 di.zev_memory_allocated = mem_allocated - mem_freed; 887 if (ddi_copyout(&di, (void *)arg, sizeof(di), mode) != 0) 888 return EFAULT; 889 return 0; 890 } 891 892 static int 893 zev_ioc_get_queue_list(zev_queue_t *req_q, intptr_t arg, int mode) 894 { 895 zev_ioctl_get_queue_list_t gql; 896 zev_queue_t *q; 897 int i = 0; 898 int count = 0; 899 900 memset(&gql, 0, sizeof(gql)); 901 902 mutex_enter(&zev_mutex); 903 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 904 q = zev_queues[i - ZEV_MINOR_MIN]; 905 if (!q) 906 continue; 907 strncpy(gql.zev_queue_name[count].zev_name, 908 q->zq_name, ZEV_MAX_QUEUE_NAME_LEN); 909 gql.zev_queue_name[count].zev_namelen = strlen(q->zq_name); 910 count++; 911 } 912 gql.zev_n_queues = count; 913 mutex_exit(&zev_mutex); 914 915 if (ddi_copyout(&gql, (void *)arg, sizeof(gql), mode) != 0) 916 return EFAULT; 917 return 0; 918 } 919 920 /* ARGSUSED */ 921 static int 922 zev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 923 { 924 zev_statistics_t zs; 925 zev_ioctl_poolarg_t pa; 926 zev_ioctl_mark_t mark; 927 zev_mark_t *rec; 928 int msg_size; 929 zev_msg_t *msg; 930 uint64_t len; 931 uint64_t mark_id; 932 minor_t minor; 933 zev_queue_t *req_q; 934 int ret = 0; 935 936 minor = getminor(dev); 937 mutex_enter(&zev_mutex); 938 if ((req_q = ddi_get_soft_state(statep, minor)) == NULL) { 939 mutex_exit(&zev_mutex); 940 return (ENXIO); 941 } 942 zev_queue_hold(req_q); 943 mutex_exit(&zev_mutex); 944 /* 945 * all structures passed between kernel and userspace 946 * are now compatible between 64 and 32 bit. Model 947 * conversion can be ignored. 948 */ 949 switch (cmd) { 950 case ZEV_IOC_GET_GLOBAL_STATISTICS: 951 /* ddi_copyout() can take a long time. Better make 952 a copy to be able to release the mutex faster. */ 953 mutex_enter(&zev_mutex); 954 (void) memcpy(&zs, &zev_statistics, sizeof(zs)); 955 mutex_exit(&zev_mutex); 956 if (ddi_copyout(&zs, (void *)arg, sizeof(zs), mode) != 0) 957 ret = EFAULT; 958 break; 959 case ZEV_IOC_GET_QUEUE_STATISTICS: 960 ret = zev_ioc_get_queue_statistics(req_q, arg, mode); 961 break; 962 case ZEV_IOC_MUTE_POOL: 963 case ZEV_IOC_UNMUTE_POOL: 964 if (ddi_copyin((void *)arg, &pa, sizeof(pa), mode) != 0) { 965 ret = EFAULT; 966 break; 967 } 968 if (pa.zev_poolname_len >=MAXPATHLEN) { 969 ret = EINVAL; 970 break; 971 } 972 pa.zev_poolname[pa.zev_poolname_len] = '\0'; 973 if (cmd == ZEV_IOC_MUTE_POOL) { 974 ret = zev_ioc_mute_pool(pa.zev_poolname); 975 } else { 976 ret = zev_ioc_unmute_pool(pa.zev_poolname); 977 } 978 break; 979 case ZEV_IOC_SET_MAX_QUEUE_LEN: 980 if (ddi_copyin((void *)arg, &len, sizeof(len), mode) != 0) { 981 ret = EFAULT; 982 break; 983 } 984 if (len > ZEV_MAX_QUEUE_LEN) { 985 ret = EINVAL; 986 break; 987 } 988 mutex_enter(&zev_mutex); 989 zev_statistics.zev_max_queue_len = len; 990 cv_broadcast(&zev_condvar); 991 mutex_exit(&zev_mutex); 992 break; 993 case ZEV_IOC_GET_QUEUE_PROPERTIES: 994 ret = zev_ioc_get_queue_properties(req_q, arg, mode); 995 break; 996 case ZEV_IOC_SET_QUEUE_PROPERTIES: 997 ret = zev_ioc_set_queue_properties(req_q, arg, mode); 998 break; 999 case ZEV_IOC_MARK: 1000 if (ddi_copyin((void *)arg, &mark, sizeof(mark), mode) != 0) { 1001 ret = EFAULT; 1002 break; 1003 } 1004 /* prepare message */ 1005 msg_size = sizeof(*rec) + mark.zev_payload_len + 1; 1006 msg = zev_alloc(sizeof(*msg) + msg_size); 1007 msg->size = msg_size; 1008 rec = (zev_mark_t *)(msg + 1); 1009 rec->record_len = msg_size; 1010 rec->op = ZEV_OP_MARK; 1011 rec->op_time = ddi_get_time(); 1012 rec->guid = mark.zev_guid; 1013 rec->payload_len = mark.zev_payload_len; 1014 /* get payload */ 1015 if (ddi_copyin(((char *)arg) + sizeof(mark), 1016 ZEV_PAYLOAD(rec), 1017 mark.zev_payload_len, mode) != 0) { 1018 zev_free(msg, msg_size); 1019 ret = EFAULT; 1020 break; 1021 } 1022 *(ZEV_PAYLOAD(rec) + mark.zev_payload_len) = '\0'; 1023 /* get mark id and queue message */ 1024 mutex_enter(&zev_mark_id_mutex); 1025 mark_id = zev_mark_id++; 1026 mutex_exit(&zev_mark_id_mutex); 1027 rec->mark_id = mark_id; 1028 zev_queue_message(ZEV_OP_MARK, msg); 1029 /* report mark id to userland, ignore errors */ 1030 mark.zev_mark_id = mark_id; 1031 ddi_copyout(&mark, (void *)arg, sizeof(mark), mode); 1032 break; 1033 case ZEV_IOC_ADD_QUEUE: 1034 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1035 ret = EACCES; 1036 break; 1037 } 1038 ret = zev_ioc_add_queue(req_q, arg, mode); 1039 break; 1040 case ZEV_IOC_REMOVE_QUEUE: 1041 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1042 ret = EACCES; 1043 break; 1044 } 1045 ret = zev_ioc_remove_queue(req_q, arg, mode); 1046 break; 1047 case ZEV_IOC_GET_DEBUG_INFO: 1048 ret = zev_ioc_get_debug_info(req_q, arg, mode); 1049 break; 1050 case ZEV_IOC_GET_QUEUE_LIST: 1051 ret = zev_ioc_get_queue_list(req_q, arg, mode); 1052 break; 1053 case ZEV_IOC_GET_FILE_SIGNATURES: 1054 ret = zev_ioc_get_signatures(arg, mode); 1055 break; 1056 default: 1057 /* generic "ioctl unknown" error */ 1058 ret = ENOTTY; 1059 } 1060 1061 mutex_enter(&zev_mutex); 1062 zev_queue_release(req_q); 1063 mutex_exit(&zev_mutex); 1064 if (ret) 1065 SET_ERROR(ret); 1066 return (ret); 1067 } 1068 1069 static int 1070 zev_chpoll(dev_t dev, short events, int anyyet, 1071 short *reventsp, struct pollhead **phpp) 1072 { 1073 int minor; 1074 short revent = 0; 1075 zev_queue_t *q; 1076 1077 /* use minor-specific queue context and it's pollhead */ 1078 minor = getminor(dev); 1079 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1080 return (EINVAL); 1081 mutex_enter(&zev_mutex); 1082 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1083 mutex_exit(&zev_mutex); 1084 return (ENXIO); 1085 } 1086 revent = 0; 1087 if ((events & POLLIN)) { 1088 if (q->zq_oldest) 1089 revent |= POLLIN; 1090 } 1091 if (revent == 0) { 1092 if (!anyyet) { 1093 *phpp = &q->zq_pollhead; 1094 } 1095 } 1096 *reventsp = revent; 1097 mutex_exit(&zev_mutex); 1098 return (0); 1099 } 1100 1101 /* ARGSUSED */ 1102 static int 1103 zev_read(dev_t dev, struct uio *uio_p, cred_t *crep_p) 1104 { 1105 minor_t minor; 1106 offset_t off; 1107 int ret = 0; 1108 zev_msg_t *msg; 1109 char *data; 1110 zev_queue_t *q; 1111 1112 minor = getminor(dev); 1113 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1114 return (EINVAL); 1115 1116 mutex_enter(&zev_mutex); 1117 q = ddi_get_soft_state(statep, minor); 1118 if (q == NULL) { 1119 mutex_exit(&zev_mutex); 1120 return (ENXIO); 1121 } 1122 off = uio_p->uio_loffset; 1123 msg = q->zq_oldest; 1124 while (msg == NULL) { 1125 if (!ddi_can_receive_sig()) { 1126 /* 1127 * read() shouldn't block because this thread 1128 * can't receive signals. (e.g., it might be 1129 * torn down by exit() right now.) 1130 */ 1131 mutex_exit(&zev_mutex); 1132 return 0; 1133 } 1134 if (cv_wait_sig(&q->zq_condvar, &zev_mutex) == 0) { 1135 /* signal received. */ 1136 mutex_exit(&zev_mutex); 1137 return EINTR; 1138 } 1139 msg = q->zq_oldest; 1140 } 1141 if (msg->size > uio_p->uio_resid) { 1142 mutex_exit(&zev_mutex); 1143 return E2BIG; 1144 } 1145 while (msg && uio_p->uio_resid >= msg->size) { 1146 data = (char *)(msg + 1); 1147 ret = uiomove(data, msg->size, UIO_READ, uio_p); 1148 if (ret != 0) { 1149 mutex_exit(&zev_mutex); 1150 cmn_err(CE_WARN, "zev: uiomove failed; messages lost"); 1151 uio_p->uio_loffset = off; 1152 return (ret); 1153 } 1154 q->zq_oldest = msg->next; 1155 q->zq_bytes_read += msg->size; 1156 q->zq_queue_len -= msg->size; 1157 q->zq_queue_messages--; 1158 msg->read++; 1159 msg = q->zq_oldest; 1160 } 1161 cv_broadcast(&zev_condvar); 1162 mutex_exit(&zev_mutex); 1163 uio_p->uio_loffset = off; 1164 return 0; 1165 } 1166 1167 /* ARGSUSED */ 1168 static int 1169 zev_close(dev_t dev, int flag, int otyp, cred_t *crepd) 1170 { 1171 zev_queue_t *q; 1172 int minor; 1173 1174 minor = getminor(dev); 1175 if (otyp != OTYP_CHR) 1176 return (EINVAL); 1177 mutex_enter(&zev_mutex); 1178 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1179 mutex_exit(&zev_mutex); 1180 return (ENXIO); 1181 } 1182 if (q->zq_busy != B_TRUE) { 1183 mutex_exit(&zev_mutex); 1184 return (EINVAL); 1185 } 1186 q->zq_busy = B_FALSE; 1187 if ((q->zq_flags & ZEV_FL_PERSISTENT) == 0) 1188 zev_queue_release(q); 1189 mutex_exit(&zev_mutex); 1190 return (0); 1191 } 1192 1193 /* ARGSUSED */ 1194 static int 1195 zev_open(dev_t *devp, int flag, int otyp, cred_t *credp) 1196 { 1197 zev_queue_t *q; 1198 minor_t minor; 1199 1200 minor = getminor(*devp); 1201 if (otyp != OTYP_CHR) 1202 return (EINVAL); 1203 if (drv_priv(credp) != 0) 1204 return (EPERM); 1205 mutex_enter(&zev_mutex); 1206 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1207 mutex_exit(&zev_mutex); 1208 return (ENXIO); 1209 } 1210 if (minor == ZEV_CONTROL_DEVICE_MINOR) { 1211 /* control device may be used in parallel */ 1212 q->zq_busy = B_TRUE; 1213 mutex_exit(&zev_mutex); 1214 return 0; 1215 } 1216 if (q->zq_busy == B_TRUE) { 1217 mutex_exit(&zev_mutex); 1218 return (EBUSY); 1219 } 1220 q->zq_busy = B_TRUE; /* can only be opened exclusively */ 1221 mutex_exit(&zev_mutex); 1222 return (0); 1223 } 1224 1225 static struct cb_ops zev_cb_ops = { 1226 zev_open, /* open */ 1227 zev_close, /* close */ 1228 nodev, /* strategy */ 1229 nodev, /* print */ 1230 nodev, /* dump */ 1231 zev_read, /* read */ 1232 nodev, /* write */ 1233 zev_ioctl, /* ioctl */ 1234 nodev, /* devmap */ 1235 nodev, /* mmap */ 1236 nodev, /* segmap */ 1237 zev_chpoll, /* chpoll */ 1238 ddi_prop_op, /* prop_op */ 1239 NULL, /* streamtab */ 1240 D_MP | D_64BIT, /* cb_flag */ 1241 CB_REV, /* cb_rev */ 1242 nodev, /* aread */ 1243 nodev, /* awrite */ 1244 }; 1245 1246 static void 1247 zev_free_instance(dev_info_t *dip) 1248 { 1249 int instance; 1250 zev_queue_t *q; 1251 int i; 1252 1253 instance = ddi_get_instance(dip); 1254 if (instance != 0) { 1255 cmn_err(CE_WARN, "zev: tried to free instance != 0 (%d)", 1256 instance); 1257 return; 1258 } 1259 1260 ddi_remove_minor_node(dip, NULL); 1261 1262 /* stop pollwakeup thread */ 1263 zev_wakeup_thread_run = 0; 1264 if (zev_poll_wakeup_thread != NULL) { 1265 thread_join(zev_poll_wakeup_thread->t_did); 1266 zev_poll_wakeup_thread = NULL; 1267 } 1268 1269 mutex_enter(&zev_mutex); 1270 1271 /* remove "ctrl" dummy queue */ 1272 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1273 if (q) { 1274 ddi_soft_state_free(statep, ZEV_CONTROL_DEVICE_MINOR); 1275 ZEV_MEM_SUB(sizeof(zev_queue_t)); 1276 } 1277 1278 /* remove all other queues */ 1279 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1280 q = zev_queues[i- ZEV_MINOR_MIN]; 1281 if (!q) 1282 continue; 1283 ASSERT(q->zq_refcnt == 1); 1284 zev_queue_release(q); 1285 } 1286 zev_queue_trim(); 1287 bzero(&zev_queues, sizeof(zev_queues)); 1288 1289 mutex_exit(&zev_mutex); 1290 1291 } 1292 1293 static int 1294 zev_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1295 { 1296 int instance; 1297 zev_queue_t *q; 1298 1299 /* called once per instance with DDI_DETACH, 1300 may be called to suspend */ 1301 switch (cmd) { 1302 case DDI_DETACH: 1303 /* instance busy? */ 1304 instance = ddi_get_instance(dip); 1305 if (instance != 0) { /* hardcoded in zev.conf */ 1306 /* this module only supports one instance. */ 1307 return (DDI_FAILURE); 1308 } 1309 1310 mutex_enter(&zev_mutex); 1311 if (!zev_attached) { 1312 mutex_exit(&zev_mutex); 1313 return (DDI_FAILURE); 1314 } 1315 1316 /* check "ctrl" queue to see if t is busy */ 1317 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1318 if (q == NULL) { 1319 mutex_exit(&zev_mutex); 1320 return (DDI_FAILURE); 1321 } 1322 if (q->zq_busy) { 1323 mutex_exit(&zev_mutex); 1324 return (DDI_FAILURE); 1325 } 1326 /* are there any queues? */ 1327 if (zev_queue_cnt > 0) { 1328 mutex_exit(&zev_mutex); 1329 return (DDI_FAILURE); 1330 } 1331 1332 zev_attached = B_FALSE; 1333 mutex_exit(&zev_mutex); 1334 1335 /* switch ZFS event callbacks back to default */ 1336 rw_enter(&rz_zev_rwlock, RW_WRITER); 1337 rz_zev_callbacks = rz_zev_default_callbacks; 1338 rz_zev_set_active(B_FALSE); 1339 rw_exit(&rz_zev_rwlock); 1340 1341 /* no thread is inside of the callbacks anymore. */ 1342 1343 /* free resources allocated for this instance */ 1344 zev_free_instance(dip); 1345 zev_chksum_fini(); 1346 #if 0 1347 cmn_err(CE_WARN, "zev: allocated memory at detach: %" PRIu64, 1348 zev_memory_allocated - zev_memory_freed); 1349 #endif 1350 return (DDI_SUCCESS); 1351 case DDI_SUSPEND: 1352 /* kernel must not suspend zev devices while ZFS is running */ 1353 return (DDI_FAILURE); 1354 default: 1355 return (DDI_FAILURE); 1356 } 1357 } 1358 1359 static int 1360 zev_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1361 { 1362 /* called once per instance with DDI_ATTACH, 1363 may be called to resume */ 1364 int instance; 1365 int error; 1366 zev_queue_t *q; 1367 switch (cmd) { 1368 case DDI_ATTACH: 1369 /* create instance state */ 1370 instance = ddi_get_instance(dip); 1371 if (instance != 0) { /* hardcoded in zev.conf */ 1372 /* this module only supports one instance. */ 1373 return (DDI_FAILURE); 1374 } 1375 1376 mutex_enter(&zev_mutex); 1377 if (zev_attached) { 1378 mutex_exit(&zev_mutex); 1379 return (DDI_FAILURE); 1380 } 1381 if (ddi_soft_state_zalloc(statep, ZEV_CONTROL_DEVICE_MINOR) != 1382 DDI_SUCCESS) { 1383 mutex_exit(&zev_mutex); 1384 return (DDI_FAILURE); 1385 } 1386 ZEV_MEM_ADD(sizeof(zev_queue_t)); 1387 zev_attached = B_TRUE; 1388 1389 /* init queue list */ 1390 bzero(&zev_queues, sizeof(zev_queues)); 1391 mutex_exit(&zev_mutex); 1392 1393 /* create a dummy queue for management of "ctrl" */ 1394 1395 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1396 q->zq_dip = dip; 1397 q->zq_refcnt = 1; 1398 q->zq_busy = B_FALSE; 1399 q->zq_minor_number = ZEV_CONTROL_DEVICE_MINOR; 1400 q->zq_flags = ZEV_FL_PERSISTENT; 1401 strcpy(q->zq_name, ZEV_CONTROL_DEVICE_NAME); 1402 1403 /* create device node for "ctrl" */ 1404 if (ddi_create_minor_node(dip, ZEV_CONTROL_DEVICE_NAME, 1405 S_IFCHR, ZEV_CONTROL_DEVICE_MINOR, 1406 DDI_PSEUDO, 0) == DDI_FAILURE) { 1407 goto fail; 1408 } 1409 1410 /* note: intentionally not adding ctrl queue to queue list. */ 1411 1412 /* default queue */ 1413 error = zev_queue_new(&q, dip, 1414 ZEV_DEFAULT_QUEUE_NAME, 1415 ZEV_MAX_QUEUE_LEN, 1416 ZEV_FL_BLOCK_WHILE_QUEUE_FULL| 1417 ZEV_FL_PERSISTENT); 1418 if (error) 1419 goto fail; 1420 1421 /* start pollwakeup thread */ 1422 zev_wakeup_thread_run = 1; 1423 zev_poll_wakeup_thread = thread_create(NULL, 0, 1424 zev_poll_wakeup_thread_main, NULL, 0, &p0, 1425 TS_RUN, minclsyspri); 1426 1427 ddi_report_dev(dip); 1428 1429 zev_chksum_init(); 1430 1431 /* switch ZFS event callbacks to zev module callbacks */ 1432 rw_enter(&rz_zev_rwlock, RW_WRITER); 1433 rz_zev_callbacks = &zev_callbacks; 1434 rz_zev_set_active(B_TRUE); 1435 rw_exit(&rz_zev_rwlock); 1436 1437 return (DDI_SUCCESS); 1438 case DDI_RESUME: 1439 /* suspendeding zev devices should never happen */ 1440 return (DDI_SUCCESS); 1441 default: 1442 return (DDI_FAILURE); 1443 } 1444 fail: 1445 cmn_err(CE_WARN, "zev: attach failed"); 1446 zev_free_instance(dip); 1447 mutex_enter(&zev_mutex); 1448 zev_attached = B_FALSE; 1449 mutex_exit(&zev_mutex); 1450 return (DDI_FAILURE); 1451 } 1452 1453 /* ARGSUSED */ 1454 static int 1455 zev_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp) 1456 { 1457 minor_t minor; 1458 zev_queue_t *q; 1459 1460 /* arg is dev_t */ 1461 minor = getminor((dev_t)arg); 1462 mutex_enter(&zev_mutex); 1463 q = ddi_get_soft_state(statep, minor); 1464 if (q == NULL) { 1465 *resultp = NULL; 1466 mutex_exit(&zev_mutex); 1467 return (DDI_FAILURE); 1468 } 1469 1470 switch (infocmd) { 1471 case DDI_INFO_DEVT2DEVINFO: 1472 *resultp = q->zq_dip; 1473 break; 1474 case DDI_INFO_DEVT2INSTANCE: 1475 *resultp = (void *)(uintptr_t)ddi_get_instance(q->zq_dip); 1476 break; 1477 default: 1478 mutex_exit(&zev_mutex); 1479 return (DDI_FAILURE); 1480 } 1481 mutex_exit(&zev_mutex); 1482 return (DDI_SUCCESS); 1483 } 1484 1485 static struct dev_ops zev_dev_ops = { 1486 DEVO_REV, /* driver build revision */ 1487 0, /* driver reference count */ 1488 zev_getinfo, /* getinfo */ 1489 nulldev, /* identify (obsolete) */ 1490 nulldev, /* probe (search for devices) */ 1491 zev_attach, /* attach */ 1492 zev_detach, /* detach */ 1493 nodev, /* reset (obsolete, use quiesce) */ 1494 &zev_cb_ops, /* character and block device ops */ 1495 NULL, /* bus driver ops */ 1496 NULL, /* power management, not needed */ 1497 ddi_quiesce_not_needed, /* quiesce */ 1498 }; 1499 1500 static struct modldrv zev_modldrv = { 1501 &mod_driverops, /* all loadable modules use this */ 1502 "zev ZFS event provider, v1.0", /* driver name and version info */ 1503 &zev_dev_ops /* ops method pointers */ 1504 }; 1505 1506 static struct modlinkage zev_modlinkage = { 1507 MODREV_1, /* fixed value */ 1508 { 1509 &zev_modldrv, /* driver linkage structure */ 1510 NULL /* list terminator */ 1511 } 1512 }; 1513 1514 int 1515 _init(void) 1516 { 1517 int error; 1518 1519 if ((error = ddi_soft_state_init(&statep, sizeof(zev_queue_t), 1)) != 0) 1520 return (error); 1521 zev_attached = B_FALSE; 1522 1523 zev_queue_head = NULL; 1524 zev_queue_tail = NULL; 1525 zev_queue_len = 0; 1526 zev_muted_pools_head = NULL; 1527 zev_memory_allocated = 0; 1528 zev_memory_freed = 0; 1529 zev_queue_cnt = 0; 1530 1531 mutex_init(&zev_mutex, NULL, MUTEX_DRIVER, NULL); 1532 cv_init(&zev_condvar, NULL, CV_DRIVER, NULL); 1533 rw_init(&zev_pool_list_rwlock, NULL, RW_DRIVER, NULL); 1534 mutex_init(&zev_mark_id_mutex, NULL, MUTEX_DRIVER, NULL); 1535 zev_mark_id = gethrtime(); 1536 mutex_init(&zev_queue_msg_mutex, NULL, MUTEX_DRIVER, NULL); 1537 zev_msg_sequence_number = gethrtime(); 1538 bzero(&zev_statistics, sizeof(zev_statistics)); 1539 bzero(&zev_pollhead, sizeof(zev_pollhead)); 1540 bzero(&zev_queues, sizeof(zev_queues)); 1541 zev_statistics.zev_max_queue_len = ZEV_MAX_QUEUE_LEN; 1542 if (zev_ioc_mute_pool("zg0")) { 1543 cmn_err(CE_WARN, "zev: could not init mute list"); 1544 goto FAIL; 1545 } 1546 1547 if ((error = mod_install(&zev_modlinkage)) != 0) { 1548 cmn_err(CE_WARN, "zev: could not install module"); 1549 goto FAIL; 1550 } 1551 1552 return (0); 1553 FAIL: 1554 /* free resources */ 1555 cmn_err(CE_WARN, "zev: _init failed"); 1556 mutex_destroy(&zev_mutex); 1557 ddi_soft_state_fini(&statep); 1558 return (error); 1559 } 1560 1561 int 1562 _info(struct modinfo *modinfop) 1563 { 1564 return (mod_info(&zev_modlinkage, modinfop)); 1565 } 1566 1567 int 1568 _fini(void) 1569 { 1570 int error = 0; 1571 zev_msg_t *msg; 1572 zev_pool_list_entry_t *pe, *npe; 1573 1574 mutex_enter(&zev_mutex); 1575 if (zev_attached == B_TRUE) { 1576 mutex_exit(&zev_mutex); 1577 return (SET_ERROR(EBUSY)); 1578 } 1579 if (zev_queue_cnt != 0) { 1580 /* should never happen */ 1581 mutex_exit(&zev_mutex); 1582 return (SET_ERROR(EBUSY)); 1583 } 1584 1585 /* 1586 * avoid deadlock if event list is full: make sure threads currently 1587 * blocking on the event list can append their event and then release 1588 * rz_zev_rwlock. Since there should be no queues left when we 1589 * reach this point we can simply empty the event list and then 1590 * wake everybody. 1591 */ 1592 while (zev_queue_head) { 1593 msg = zev_queue_head; 1594 zev_queue_head = msg->next; 1595 zev_free(msg, sizeof(*msg) + msg->size); 1596 } 1597 cv_broadcast(&zev_condvar); 1598 mutex_exit(&zev_mutex); 1599 1600 /* switch ZFS event callbacks back to default (again) */ 1601 rw_enter(&rz_zev_rwlock, RW_WRITER); 1602 rz_zev_callbacks = rz_zev_default_callbacks; 1603 rz_zev_set_active(B_FALSE); 1604 rw_exit(&rz_zev_rwlock); 1605 1606 /* no thread is inside of the callbacks anymore. Safe to remove. */ 1607 1608 /* unload module callbacks */ 1609 if ((error = mod_remove(&zev_modlinkage)) != 0) { 1610 cmn_err(CE_WARN, "mod_remove failed: %d", error); 1611 return (error); 1612 } 1613 1614 /* free resources */ 1615 mutex_enter(&zev_mutex); 1616 while (zev_queue_head) { 1617 msg = zev_queue_head; 1618 zev_queue_head = msg->next; 1619 zev_free(msg, sizeof(*msg) + msg->size); 1620 } 1621 mutex_exit(&zev_mutex); 1622 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 1623 pe = zev_muted_pools_head; 1624 while (pe) { 1625 npe = pe; 1626 pe = pe->next; 1627 zev_free(npe, sizeof(*npe)); 1628 } 1629 rw_exit(&zev_pool_list_rwlock); 1630 ddi_soft_state_fini(&statep); 1631 rw_destroy(&zev_pool_list_rwlock); 1632 cv_destroy(&zev_condvar); 1633 mutex_destroy(&zev_mutex); 1634 mutex_destroy(&zev_mark_id_mutex); 1635 mutex_destroy(&zev_queue_msg_mutex); 1636 1637 return (0); 1638 } 1639 1640