1 #include <sys/modctl.h> 2 #include <sys/ddi.h> 3 #include <sys/sunddi.h> 4 #include <sys/conf.h> 5 #include <sys/devops.h> 6 #include <sys/stat.h> 7 #include <sys/fs/zev.h> 8 #include <sys/zev_callbacks.h> 9 #include <sys/zfs_znode.h> 10 #include <sys/time.h> 11 #include <sys/sa.h> 12 #include <sys/zap.h> 13 #include <sys/time.h> 14 15 #define OFFSETOF(s, m) ((size_t)(&(((s *)0)->m))) 16 17 #define ZEV_DEFAULT_QUEUE_NAME "beaver" 18 #define ZEV_CONTROL_DEVICE_MINOR 0 19 #define ZEV_MINOR_MIN (ZEV_CONTROL_DEVICE_MINOR + 1) 20 #define ZEV_MINOR_MAX (ZEV_MINOR_MIN + ZEV_MAX_QUEUES - 1) 21 22 typedef struct zev_queue { 23 char zq_name[ZEV_MAX_QUEUE_NAME_LEN+1]; 24 minor_t zq_minor_number; 25 dev_info_t *zq_dip; 26 struct pollhead zq_pollhead; 27 uint64_t zq_bytes_read; 28 uint64_t zq_events_read; 29 uint64_t zq_bytes_discarded; 30 uint64_t zq_events_discarded; 31 uint64_t zq_bytes_total; 32 uint64_t zq_events_total; 33 uint64_t zq_wakeup_threshold; 34 uint16_t zq_flags; 35 uint16_t zq_need_wakeup; 36 /* protected by zev_mutex */ 37 int zq_refcnt; 38 uint64_t zq_queue_len; 39 uint64_t zq_queue_messages; 40 uint64_t zq_max_queue_len; 41 zev_msg_t *zq_oldest; 42 boolean_t zq_busy; 43 boolean_t zq_to_be_removed; 44 zev_statistics_t zq_statistics; 45 kcondvar_t zq_condvar; 46 } zev_queue_t; 47 48 static void *statep; 49 struct pollhead zev_pollhead; 50 51 kmutex_t zev_mutex; 52 kcondvar_t zev_condvar; 53 kmutex_t zev_queue_msg_mutex; 54 krwlock_t zev_pool_list_rwlock; 55 static zev_statistics_t zev_statistics; 56 static boolean_t zev_attached; 57 static kmutex_t zev_mark_id_mutex; 58 static uint64_t zev_mark_id = 0; 59 60 static uint64_t zev_msg_sequence_number = 0; 61 static zev_queue_t *zev_queues[ZEV_MAX_QUEUES]; 62 static int zev_queue_cnt = 0; 63 64 uint64_t zev_memory_allocated = 0; 65 uint64_t zev_memory_freed = 0; 66 67 /* 68 * The longest potential message is from zev_zfs_mount() and 69 * contains the mountpoint, which might be close to MAXPATHLEN bytes long. 70 * 71 * Another candidate is zev_znode_rename_cb() and contains three inode 72 * numbers and two filenames of up to MAXNAMELEN bytes each. 73 */ 74 #define ZEV_MAX_MESSAGE_LEN 4096 75 76 static zev_msg_t *zev_queue_head = NULL; 77 static zev_msg_t *zev_queue_tail = NULL; 78 static uint64_t zev_queue_len = 0; 79 80 81 typedef struct zev_pool_list_entry { 82 struct zev_pool_list_entry *next; 83 char name[MAXPATHLEN]; 84 } zev_pool_list_entry_t; 85 86 static zev_pool_list_entry_t *zev_muted_pools_head = NULL; 87 88 static volatile int zev_wakeup_thread_run = 1; 89 static kthread_t *zev_poll_wakeup_thread = NULL; 90 91 int 92 zev_queue_cmp(const void *a, const void *b) 93 { 94 const zev_queue_t *qa = a; 95 const zev_queue_t *qb = b; 96 if (qa->zq_minor_number > qb->zq_minor_number) 97 return 1; 98 if (qa->zq_minor_number < qb->zq_minor_number) 99 return -1; 100 return 0; 101 } 102 103 /* must be called with zev_mutex held */ 104 void 105 zev_queue_trim(void) 106 { 107 zev_msg_t *m; 108 uint64_t oldest_message; 109 zev_queue_t *q; 110 int i; 111 112 if (!zev_queue_tail) 113 return; 114 115 oldest_message = zev_queue_tail->seq + 1; /* does not exist, yet. */ 116 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 117 q = zev_queues[i - ZEV_MINOR_MIN]; 118 if (q == NULL) 119 continue; 120 if (!q->zq_oldest) 121 continue; 122 if (oldest_message > q->zq_oldest->seq) 123 oldest_message = q->zq_oldest->seq; 124 } 125 126 /* remove msgs between oldest_message and zev_queue_head */ 127 while(zev_queue_head && (oldest_message > zev_queue_head->seq)) { 128 m = zev_queue_head; 129 zev_queue_head = m->next; 130 if (zev_queue_head == NULL) { 131 zev_queue_tail = NULL; 132 } else { 133 zev_queue_head->prev = NULL; 134 } 135 if (m->read == 0) { 136 zev_statistics.zev_bytes_discarded += m->size; 137 zev_statistics.zev_cnt_discarded_events++; 138 } 139 zev_statistics.zev_queue_len -= m->size; 140 zev_queue_len--; 141 ZEV_FREE(m, sizeof(*m) + m->size); 142 } 143 } 144 145 /* must be called with zev_mutex held */ 146 static void 147 zev_queue_hold(zev_queue_t *q) 148 { 149 q->zq_refcnt++; 150 } 151 152 /* must be called with zev_mutex held */ 153 static void 154 zev_queue_release(zev_queue_t *q) 155 { 156 q->zq_refcnt--; 157 if (q->zq_refcnt > 0) 158 return; 159 160 ASSERT(q->zq_busy == B_FALSE); 161 162 /* persistent queues will not be removed */ 163 if ((q->zq_flags & ZEV_FL_PERSISTENT) != 0) 164 return; 165 166 /* remove queue from queue list */ 167 zev_queues[q->zq_minor_number - ZEV_MINOR_MIN] = NULL; 168 169 /* discard messages that no queue references anymore */ 170 zev_queue_trim(); 171 172 cv_destroy(&q->zq_condvar); 173 ddi_remove_minor_node(q->zq_dip, q->zq_name); 174 ddi_soft_state_free(statep, q->zq_minor_number); 175 ZEV_MEM_SUB(sizeof(zev_queue_t)); 176 zev_queue_cnt--; 177 } 178 179 int 180 zev_queue_new(zev_queue_t **queue, 181 dev_info_t *dip, 182 char *name, 183 uint64_t max_queue_len, 184 uint16_t flags) 185 { 186 zev_queue_t *q; 187 zev_queue_t *tmp; 188 zev_msg_t *msg; 189 int name_exists = 0; 190 minor_t minor; 191 char *p; 192 int i; 193 194 if (max_queue_len > ZEV_MAX_QUEUE_LEN) 195 return EINVAL; 196 if (max_queue_len == 0) 197 max_queue_len = ZEV_MAX_QUEUE_LEN; 198 if (!strcmp(name, ZEV_CONTROL_DEVICE_NAME)) 199 return EINVAL; 200 for (p = name; *p; p++) { 201 if (*p >= 'a' && *p <= 'z') 202 continue; 203 if (*p >= '0' && *p <= '9') 204 continue; 205 if (*p == '.') 206 continue; 207 return EINVAL; 208 } 209 210 mutex_enter(&zev_mutex); 211 212 /* find free minor number.*/ 213 /* if this were a frequent operation we'd have a free-minor list */ 214 for (minor = ZEV_MINOR_MIN; minor <= ZEV_MINOR_MAX; minor++) { 215 tmp = zev_queues[minor - ZEV_MINOR_MIN]; 216 if (tmp == NULL) 217 break; 218 } 219 if (tmp) { 220 mutex_exit(&zev_mutex); 221 return ENOSPC; 222 } 223 224 if (ddi_soft_state_zalloc(statep, minor) != DDI_SUCCESS) { 225 mutex_exit(&zev_mutex); 226 return ENOSPC; 227 } 228 ZEV_MEM_ADD(sizeof(zev_queue_t)); 229 230 q = ddi_get_soft_state(statep, minor); 231 memset(q, 0, sizeof(*q)); 232 strncpy(q->zq_name, name, ZEV_MAX_QUEUE_NAME_LEN); 233 q->zq_name[ZEV_MAX_QUEUE_NAME_LEN] = '\0'; 234 q->zq_max_queue_len = max_queue_len; 235 q->zq_wakeup_threshold = ZEV_DEFAULT_POLL_WAKEUP_QUEUE_LEN; 236 q->zq_flags = flags; 237 q->zq_refcnt = 1; 238 q->zq_dip = dip; 239 q->zq_minor_number = minor; 240 cv_init(&q->zq_condvar, NULL, CV_DRIVER, NULL); 241 242 /* insert into queue list */ 243 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 244 /* if this were a frequent operation we'd have a name tree */ 245 if (zev_queues[i - ZEV_MINOR_MIN] == NULL) 246 continue; 247 if (!strcmp(q->zq_name, zev_queues[i-ZEV_MINOR_MIN]->zq_name)) { 248 name_exists = 1; 249 break; 250 } 251 } 252 if (name_exists) { 253 ddi_soft_state_free(statep, minor); 254 ZEV_MEM_SUB(sizeof(zev_queue_t)); 255 mutex_exit(&zev_mutex); 256 return EEXIST; 257 } 258 zev_queues[minor - ZEV_MINOR_MIN] = q; 259 zev_queue_cnt++; 260 261 /* calculate current queue len and find head and tail */ 262 q->zq_oldest = zev_queue_tail; 263 msg = zev_queue_tail; 264 while ((msg != NULL) && (q->zq_queue_len < q->zq_max_queue_len)) { 265 q->zq_queue_len += msg->size; 266 q->zq_queue_messages++; 267 q->zq_oldest = msg; 268 msg = msg->prev; 269 } 270 271 mutex_exit(&zev_mutex); 272 273 if (ddi_create_minor_node(dip, name, 274 S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 275 mutex_enter(&zev_mutex); 276 zev_queues[minor - ZEV_MINOR_MIN] = NULL; 277 zev_queue_cnt--; 278 ddi_soft_state_free(statep, minor); 279 ZEV_MEM_SUB(sizeof(zev_queue_t)); 280 mutex_exit(&zev_mutex); 281 return EFAULT; 282 } 283 284 *queue = q; 285 return 0; 286 } 287 288 /* 289 * poll() wakeup thread. Used to check periodically whether we have 290 * bytes left in the queue that have not yet been made into a 291 * pollwakeup() call. This is meant to insure a maximum waiting 292 * time until an event is presented as a poll wakeup, while at 293 * the same time not making every single event into a poll wakeup 294 * of it's own. 295 */ 296 297 static void 298 zev_poll_wakeup(boolean_t flush_all) 299 { 300 zev_queue_t *q; 301 int i; 302 303 /* 304 * This loop works with hold() and release() because 305 * pollwakeup() requires us to release our locks before calling it. 306 * 307 * from pollwakeup(9F): 308 * 309 * "Driver defined locks should not be held across calls 310 * to this function." 311 */ 312 313 /* wake up threads for each individual queue */ 314 mutex_enter(&zev_mutex); 315 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 316 q = zev_queues[i - ZEV_MINOR_MIN]; 317 if (q == NULL) 318 continue; 319 if (!q->zq_busy) 320 continue; 321 if (!q->zq_queue_len) 322 continue; 323 if ((flush_all) || 324 (q->zq_queue_len > q->zq_wakeup_threshold)) { 325 zev_queue_hold(q); 326 mutex_exit(&zev_mutex); 327 pollwakeup(&q->zq_pollhead, POLLIN); 328 mutex_enter(&zev_mutex); 329 zev_queue_release(q); 330 } 331 } 332 mutex_exit(&zev_mutex); 333 } 334 335 static void 336 zev_poll_wakeup_thread_main(void) 337 { 338 while (zev_wakeup_thread_run) { 339 delay(drv_usectohz(100 * 1000)); /* sleep 100ms */ 340 341 zev_poll_wakeup(B_TRUE); 342 } 343 thread_exit(); 344 } 345 346 static int 347 zev_ioc_mute_pool(char *poolname) 348 { 349 zev_pool_list_entry_t *pe; 350 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 351 /* pool already muted? */ 352 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 353 if (!strcmp(pe->name, poolname)) { 354 rw_exit(&zev_pool_list_rwlock); 355 return EEXIST; 356 } 357 } 358 pe = ZEV_ZALLOC(sizeof(*pe)); 359 if (!pe) { 360 rw_exit(&zev_pool_list_rwlock); 361 return ENOMEM; 362 } 363 (void) strncpy(pe->name, poolname, sizeof(pe->name)); 364 pe->next = zev_muted_pools_head; 365 zev_muted_pools_head = pe; 366 rw_exit(&zev_pool_list_rwlock); 367 return (0); 368 } 369 370 static int 371 zev_ioc_unmute_pool(char *poolname) 372 { 373 zev_pool_list_entry_t *pe, *peprev; 374 375 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 376 /* pool muted? */ 377 peprev = NULL; 378 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 379 if (!strcmp(pe->name, poolname)) 380 break; 381 peprev = pe; 382 } 383 if (pe) { 384 rw_exit(&zev_pool_list_rwlock); 385 return ENOENT; 386 } 387 388 if (peprev != NULL) { 389 peprev->next = pe->next; 390 } else { 391 zev_muted_pools_head = pe->next; 392 } 393 ZEV_FREE(pe, sizeof(*pe)); 394 rw_exit(&zev_pool_list_rwlock); 395 return (0); 396 } 397 398 int 399 zev_skip_pool(objset_t *os) 400 { 401 zev_pool_list_entry_t *pe; 402 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; 403 rw_enter(&zev_pool_list_rwlock, RW_READER); 404 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 405 if (!strcmp(pe->name, dp->dp_spa->spa_name)) { 406 rw_exit(&zev_pool_list_rwlock); 407 return 1; 408 } 409 } 410 rw_exit(&zev_pool_list_rwlock); 411 return 0; 412 } 413 414 static void 415 zev_update_statistics(int op, zev_statistics_t *stat) 416 { 417 switch (op) { 418 case ZEV_OP_ERROR: 419 stat->zev_cnt_errors++; 420 break; 421 case ZEV_OP_MARK: 422 stat->zev_cnt_marks++; 423 break; 424 case ZEV_OP_ZFS_MOUNT: 425 stat->zev_cnt_zfs_mount++; 426 break; 427 case ZEV_OP_ZFS_UMOUNT: 428 stat->zev_cnt_zfs_umount++; 429 break; 430 case ZEV_OP_ZVOL_WRITE: 431 stat->zev_cnt_zvol_write++; 432 break; 433 case ZEV_OP_ZVOL_TRUNCATE: 434 stat->zev_cnt_zvol_truncate++; 435 break; 436 case ZEV_OP_ZNODE_CLOSE_AFTER_UPDATE: 437 stat->zev_cnt_znode_close_after_update++; 438 break; 439 case ZEV_OP_ZNODE_CREATE: 440 stat->zev_cnt_znode_create++; 441 break; 442 case ZEV_OP_ZNODE_REMOVE: 443 stat->zev_cnt_znode_remove++; 444 break; 445 case ZEV_OP_ZNODE_LINK: 446 stat->zev_cnt_znode_link++; 447 break; 448 case ZEV_OP_ZNODE_SYMLINK: 449 stat->zev_cnt_znode_symlink++; 450 break; 451 case ZEV_OP_ZNODE_RENAME: 452 stat->zev_cnt_znode_rename++; 453 break; 454 case ZEV_OP_ZNODE_WRITE: 455 stat->zev_cnt_znode_write++; 456 break; 457 case ZEV_OP_ZNODE_TRUNCATE: 458 stat->zev_cnt_znode_truncate++; 459 break; 460 case ZEV_OP_ZNODE_SETATTR: 461 stat->zev_cnt_znode_setattr++; 462 break; 463 case ZEV_OP_ZNODE_ACL: 464 stat->zev_cnt_znode_acl++; 465 break; 466 } 467 } 468 469 void 470 zev_queue_message(int op, zev_msg_t *msg) 471 { 472 zev_queue_t *q; 473 int wakeup = 0; 474 zev_msg_t *m; 475 int i; 476 477 msg->next = NULL; 478 msg->prev = NULL; 479 msg->read = 0; 480 481 if (op < ZEV_OP_MIN || op > ZEV_OP_MAX) { 482 zev_queue_error(op, "unknown op id encountered: %d", op); 483 ZEV_FREE(msg, sizeof(*msg) + msg->size); 484 return; 485 } 486 487 /* 488 * This mutex protects us agains race conditions when several 489 * threads want to queue a message and one or more queues are 490 * full: we release zev_mutex to wait for the queues to become 491 * less-than-full, but we don't know in which order the waiting 492 * threads will be awoken. If it's not the same order in which 493 * they went to sleep we might mark different messages as "newest" 494 * in different queues, and so we might have dupes or even 495 * skip messages. 496 */ 497 mutex_enter(&zev_queue_msg_mutex); 498 499 mutex_enter(&zev_mutex); 500 501 /* 502 * When the module is loaded, the default behavior ist to 503 * put all events into a queue and block if the queue is full. 504 * This is done even before the pseudo device is attached. 505 * This way, no events are lost. 506 * 507 * To discard events entirely the "beaver" queue, 508 * which never discards anything, has to be removed. 509 */ 510 511 if (zev_queue_cnt == 0) { 512 mutex_exit(&zev_mutex); 513 mutex_exit(&zev_queue_msg_mutex); 514 return; 515 } 516 517 /* put message into global queue */ 518 msg->seq = zev_msg_sequence_number++; 519 while (zev_statistics.zev_max_queue_len && 520 zev_statistics.zev_queue_len >= zev_statistics.zev_max_queue_len) { 521 /* queue full. block until it's been shrunk. */ 522 cv_wait(&zev_condvar, &zev_mutex); 523 } 524 525 if (zev_queue_tail == NULL) { 526 zev_queue_head = zev_queue_tail = msg; 527 } else { 528 zev_queue_tail->next = msg; 529 msg->prev = zev_queue_tail; 530 zev_queue_tail = msg; 531 } 532 zev_queue_len++; 533 zev_statistics.zev_cnt_total_events++; 534 zev_statistics.zev_queue_len += msg->size; 535 536 /* update per-device queues */ 537 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 538 q = zev_queues[i - ZEV_MINOR_MIN]; 539 if (!q) 540 continue; 541 542 zev_queue_hold(q); 543 544 /* make sure queue has enough room */ 545 while (q->zq_max_queue_len && 546 q->zq_queue_len > q->zq_max_queue_len) { 547 548 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) { 549 /* block until queue has been shrunk. */ 550 cv_wait(&zev_condvar, &zev_mutex); 551 } else { 552 /* discard msgs until queue is small enough */ 553 while (q->zq_queue_len > q->zq_max_queue_len) { 554 m = q->zq_oldest; 555 if (m == NULL) 556 break; 557 q->zq_events_discarded++; 558 q->zq_bytes_discarded += m->size; 559 q->zq_oldest = m->next; 560 q->zq_queue_len -= m->size; 561 q->zq_queue_messages--; 562 } 563 } 564 } 565 566 /* register new message at the end of the queue */ 567 q->zq_queue_len += msg->size; 568 q->zq_queue_messages++; 569 q->zq_bytes_total += msg->size; 570 q->zq_events_total++; 571 if (q->zq_oldest == NULL) 572 q->zq_oldest = msg; 573 574 zev_update_statistics(op, &q->zq_statistics); 575 576 if (q->zq_queue_len > q->zq_wakeup_threshold) 577 wakeup = 1; 578 if (q->zq_queue_len == msg->size) /* queue was empty */ 579 cv_broadcast(&q->zq_condvar); 580 581 zev_queue_release(q); 582 } 583 584 zev_queue_trim(); 585 586 zev_update_statistics(op, &zev_statistics); 587 mutex_exit(&zev_mutex); 588 mutex_exit(&zev_queue_msg_mutex); 589 590 /* one or more queues need a pollwakeup() */ 591 if (op == ZEV_OP_MARK) { 592 zev_poll_wakeup(B_TRUE); 593 } else if (wakeup) { 594 zev_poll_wakeup(B_FALSE); 595 } 596 597 return; 598 } 599 600 void 601 zev_queue_error(int op, char *fmt, ...) 602 { 603 char buf[ZEV_MAX_MESSAGE_LEN]; 604 va_list ap; 605 int len; 606 zev_msg_t *msg = NULL; 607 zev_error_t *rec; 608 int msg_size; 609 610 va_start(ap, fmt); 611 len = vsnprintf(buf, sizeof(buf), fmt, ap); 612 va_end(ap); 613 if (len >= sizeof(buf)) { 614 cmn_err(CE_WARN, "zev: can't report error - " 615 "dropping event entirely."); 616 return; 617 } 618 619 msg_size = sizeof(*rec) + len + 1; 620 msg = ZEV_ALLOC(sizeof(*msg) + msg_size); 621 msg->size = msg_size; 622 rec = (zev_error_t *)(msg + 1); 623 rec->record_len = msg_size; 624 rec->op = ZEV_OP_ERROR; 625 rec->op_time = ddi_get_time(); 626 rec->guid = 0; 627 rec->failed_op = op; 628 rec->errstr_len = len; 629 (void) memcpy(ZEV_ERRSTR(rec), buf, len + 1); 630 631 zev_queue_message(ZEV_OP_ERROR, msg); 632 return; 633 } 634 635 static int 636 zev_find_queue(zev_queue_t **out, zev_queue_t *req_q, zev_queue_name_t *name) 637 { 638 char namebuf[ZEV_MAX_QUEUE_NAME_LEN+1]; 639 zev_queue_t *q; 640 int i; 641 642 *out = NULL; 643 644 if (name->zev_namelen == 0) { 645 if (req_q->zq_minor_number == ZEV_CONTROL_DEVICE_MINOR) 646 return EINVAL; 647 zev_queue_hold(req_q); 648 *out = req_q; 649 return 0; 650 } 651 652 if (name->zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 653 return EINVAL; 654 strncpy(namebuf, name->zev_name, name->zev_namelen); 655 namebuf[name->zev_namelen] = '\0'; 656 657 mutex_enter(&zev_mutex); 658 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 659 q = zev_queues[i - ZEV_MINOR_MIN]; 660 if (!q) 661 continue; 662 if (!strcmp(q->zq_name, namebuf)) { 663 zev_queue_hold(q); 664 mutex_exit(&zev_mutex); 665 *out = q; 666 return 0; 667 } 668 } 669 mutex_exit(&zev_mutex); 670 return ENOENT; 671 } 672 673 static int 674 zev_ioc_get_queue_statistics(zev_queue_t *req_q, intptr_t arg, int mode) 675 { 676 zev_ioctl_get_queue_statistics_t gs; 677 zev_queue_t *q; 678 int ret; 679 680 if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0) 681 return EFAULT; 682 683 ret = zev_find_queue(&q, req_q, &gs.zev_queue_name); 684 if (ret) 685 return ret; 686 687 /* ddi_copyout() can take a long time. Better make 688 a copy to be able to release the mutex faster. */ 689 mutex_enter(&zev_mutex); 690 memcpy(&gs.zev_statistics, &q->zq_statistics,sizeof(gs.zev_statistics)); 691 gs.zev_statistics.zev_queue_len = q->zq_queue_len; 692 gs.zev_statistics.zev_bytes_read = q->zq_bytes_read; 693 gs.zev_statistics.zev_bytes_discarded = q->zq_bytes_discarded; 694 gs.zev_statistics.zev_max_queue_len = q->zq_max_queue_len; 695 gs.zev_statistics.zev_cnt_discarded_events = q->zq_events_discarded; 696 gs.zev_statistics.zev_cnt_total_events = q->zq_events_total; 697 zev_queue_release(q); 698 mutex_exit(&zev_mutex); 699 700 if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0) 701 return EFAULT; 702 return 0; 703 } 704 705 static int 706 zev_ioc_set_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 707 { 708 zev_ioctl_set_queue_properties_t qp; 709 zev_queue_t *q; 710 uint64_t old_max; 711 uint64_t old_flags; 712 int ret; 713 714 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 715 return EFAULT; 716 if (qp.zev_max_queue_len > ZEV_MAX_QUEUE_LEN) 717 return EINVAL; 718 if (qp.zev_poll_wakeup_threshold > ZEV_MAX_POLL_WAKEUP_QUEUE_LEN) 719 return EINVAL; 720 721 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 722 if (ret) 723 return ret; 724 725 mutex_enter(&zev_mutex); 726 727 /* 728 * Note: if the PERSISTENT flag is cleared, and the queue is not busy, 729 * the queue should be removed by zev_queue_release() in zev_ioctl(). 730 */ 731 old_flags = qp.zev_flags; 732 q->zq_flags = qp.zev_flags; 733 if ((old_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) && 734 (!(qp.zev_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL))) { 735 /* queue is no longer blocking - wake blocked threads */ 736 cv_broadcast(&zev_condvar); 737 } 738 739 old_max = q->zq_max_queue_len; 740 q->zq_max_queue_len = qp.zev_max_queue_len; 741 if (q->zq_max_queue_len < old_max) 742 zev_queue_trim(); 743 if (q->zq_max_queue_len > old_max) 744 cv_broadcast(&zev_condvar); /* threads may be waiting */ 745 746 if ((qp.zev_poll_wakeup_threshold < q->zq_wakeup_threshold) && 747 (qp.zev_poll_wakeup_threshold <= q->zq_queue_len)) 748 pollwakeup(&q->zq_pollhead, POLLIN); 749 q->zq_wakeup_threshold = qp.zev_poll_wakeup_threshold; 750 751 zev_queue_release(q); 752 mutex_exit(&zev_mutex); 753 return 0; 754 } 755 756 static int 757 zev_ioc_get_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 758 { 759 zev_ioctl_get_queue_properties_t qp; 760 zev_queue_t *q; 761 int ret; 762 763 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 764 return EFAULT; 765 766 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 767 if (ret) 768 return ret; 769 770 mutex_enter(&zev_mutex); 771 qp.zev_max_queue_len = q->zq_max_queue_len; 772 qp.zev_flags = q->zq_flags; 773 qp.zev_poll_wakeup_threshold = q->zq_wakeup_threshold; 774 zev_queue_release(q); 775 mutex_exit(&zev_mutex); 776 777 if (ddi_copyout(&qp, (void *)arg, sizeof(qp), mode) != 0) 778 return EFAULT; 779 return 0; 780 } 781 782 static int 783 zev_ioc_add_queue(zev_queue_t *req_q, intptr_t arg, int mode) 784 { 785 zev_ioctl_add_queue_t aq; 786 zev_queue_t *new_q; 787 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 788 789 if (ddi_copyin((void *)arg, &aq, sizeof(aq), mode) != 0) 790 return EFAULT; 791 792 if (aq.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 793 return EINVAL; 794 strncpy(name, aq.zev_name, aq.zev_namelen); 795 name[aq.zev_namelen] = '\0'; 796 797 return zev_queue_new(&new_q, req_q->zq_dip, name, 798 aq.zev_max_queue_len, aq.zev_flags); 799 } 800 801 static int 802 zev_ioc_remove_queue(zev_queue_t *req_q, intptr_t arg, int mode) 803 { 804 zev_ioctl_remove_queue_t rq; 805 zev_queue_t *q; 806 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 807 int found = 0; 808 int i; 809 810 if (ddi_copyin((void *)arg, &rq, sizeof(rq), mode) != 0) 811 return EFAULT; 812 813 if (rq.zev_queue_name.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 814 return EINVAL; 815 strncpy(name, rq.zev_queue_name.zev_name, 816 rq.zev_queue_name.zev_namelen); 817 name[rq.zev_queue_name.zev_namelen] = '\0'; 818 819 mutex_enter(&zev_mutex); 820 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 821 q = zev_queues[i - ZEV_MINOR_MIN]; 822 if (!q) 823 continue; 824 if (!strcmp(q->zq_name, name)) { 825 found = 1; 826 break; 827 } 828 } 829 if (!found) { 830 mutex_exit(&zev_mutex); 831 return ENOENT; 832 } 833 834 if (q->zq_busy) { 835 mutex_exit(&zev_mutex); 836 return EBUSY; 837 } 838 /* 839 * clear flags, so that persistent queues are removed aswell 840 * and the queue becomes non-blocking. 841 */ 842 q->zq_flags = 0; 843 if (q->zq_to_be_removed == B_FALSE) { 844 q->zq_to_be_removed = B_TRUE; 845 zev_queue_release(q); 846 } 847 /* some threads might be waiting for this queue to become writable */ 848 cv_broadcast(&zev_condvar); 849 850 mutex_exit(&zev_mutex); 851 return 0; 852 } 853 854 static int 855 zev_ioc_get_debug_info(zev_queue_t *req_q, intptr_t arg, int mode) 856 { 857 zev_ioctl_debug_info_t di; 858 uint64_t mem_allocated = atomic_add_64_nv(&zev_memory_allocated, 0); 859 uint64_t mem_freed = atomic_add_64_nv(&zev_memory_freed, 0); 860 861 di.zev_memory_allocated = mem_allocated - mem_freed; 862 if (ddi_copyout(&di, (void *)arg, sizeof(di), mode) != 0) 863 return EFAULT; 864 return 0; 865 } 866 867 static int 868 zev_ioc_get_queue_list(zev_queue_t *req_q, intptr_t arg, int mode) 869 { 870 zev_ioctl_get_queue_list_t gql; 871 zev_queue_t *q; 872 int i = 0; 873 int count = 0; 874 875 memset(&gql, 0, sizeof(gql)); 876 877 mutex_enter(&zev_mutex); 878 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 879 q = zev_queues[i - ZEV_MINOR_MIN]; 880 if (!q) 881 continue; 882 strncpy(gql.zev_queue_name[count].zev_name, 883 q->zq_name, ZEV_MAX_QUEUE_NAME_LEN); 884 gql.zev_queue_name[count].zev_namelen = strlen(q->zq_name); 885 count++; 886 } 887 gql.zev_n_queues = count; 888 mutex_exit(&zev_mutex); 889 890 if (ddi_copyout(&gql, (void *)arg, sizeof(gql), mode) != 0) 891 return EFAULT; 892 return 0; 893 } 894 895 /* ARGSUSED */ 896 static int 897 zev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 898 { 899 zev_statistics_t zs; 900 zev_ioctl_poolarg_t pa; 901 zev_ioctl_mark_t mark; 902 zev_mark_t *rec; 903 int msg_size; 904 zev_msg_t *msg; 905 uint64_t len; 906 uint64_t mark_id; 907 minor_t minor; 908 zev_queue_t *req_q; 909 int ret = 0; 910 911 minor = getminor(dev); 912 mutex_enter(&zev_mutex); 913 if ((req_q = ddi_get_soft_state(statep, minor)) == NULL) { 914 mutex_exit(&zev_mutex); 915 return (ENXIO); 916 } 917 zev_queue_hold(req_q); 918 mutex_exit(&zev_mutex); 919 /* 920 * all structures passed between kernel and userspace 921 * are now compatible between 64 and 32 bit. Model 922 * conversion can be ignored. 923 */ 924 switch (cmd) { 925 case ZEV_IOC_GET_GLOBAL_STATISTICS: 926 /* ddi_copyout() can take a long time. Better make 927 a copy to be able to release the mutex faster. */ 928 mutex_enter(&zev_mutex); 929 (void) memcpy(&zs, &zev_statistics, sizeof(zs)); 930 mutex_exit(&zev_mutex); 931 if (ddi_copyout(&zs, (void *)arg, sizeof(zs), mode) != 0) 932 ret = EFAULT; 933 break; 934 case ZEV_IOC_GET_QUEUE_STATISTICS: 935 ret = zev_ioc_get_queue_statistics(req_q, arg, mode); 936 break; 937 case ZEV_IOC_MUTE_POOL: 938 case ZEV_IOC_UNMUTE_POOL: 939 if (ddi_copyin((void *)arg, &pa, sizeof(pa), mode) != 0) { 940 ret = EFAULT; 941 break; 942 } 943 if (pa.zev_poolname_len >=MAXPATHLEN) { 944 ret = EINVAL; 945 break; 946 } 947 pa.zev_poolname[pa.zev_poolname_len] = '\0'; 948 if (cmd == ZEV_IOC_MUTE_POOL) { 949 ret = zev_ioc_mute_pool(pa.zev_poolname); 950 } else { 951 ret = zev_ioc_unmute_pool(pa.zev_poolname); 952 } 953 break; 954 case ZEV_IOC_SET_MAX_QUEUE_LEN: 955 if (ddi_copyin((void *)arg, &len, sizeof(len), mode) != 0) { 956 ret = EFAULT; 957 break; 958 } 959 if (len > ZEV_MAX_QUEUE_LEN) { 960 ret = EINVAL; 961 break; 962 } 963 mutex_enter(&zev_mutex); 964 zev_statistics.zev_max_queue_len = len; 965 cv_broadcast(&zev_condvar); 966 mutex_exit(&zev_mutex); 967 break; 968 case ZEV_IOC_GET_QUEUE_PROPERTIES: 969 ret = zev_ioc_get_queue_properties(req_q, arg, mode); 970 break; 971 case ZEV_IOC_SET_QUEUE_PROPERTIES: 972 ret = zev_ioc_set_queue_properties(req_q, arg, mode); 973 break; 974 case ZEV_IOC_MARK: 975 if (ddi_copyin((void *)arg, &mark, sizeof(mark), mode) != 0) { 976 ret = EFAULT; 977 break; 978 } 979 /* prepare message */ 980 msg_size = sizeof(*rec) + mark.zev_payload_len + 1; 981 msg = ZEV_ALLOC(sizeof(*msg) + msg_size); 982 msg->size = msg_size; 983 rec = (zev_mark_t *)(msg + 1); 984 rec->record_len = msg_size; 985 rec->op = ZEV_OP_MARK; 986 rec->op_time = ddi_get_time(); 987 rec->guid = mark.zev_guid; 988 rec->payload_len = mark.zev_payload_len; 989 /* get payload */ 990 if (ddi_copyin(((char *)arg) + sizeof(mark), 991 ZEV_PAYLOAD(rec), 992 mark.zev_payload_len, mode) != 0) { 993 ZEV_FREE(msg, msg_size); 994 ret = EFAULT; 995 break; 996 } 997 *(ZEV_PAYLOAD(rec) + mark.zev_payload_len) = '\0'; 998 /* get mark id and queue message */ 999 mutex_enter(&zev_mark_id_mutex); 1000 mark_id = zev_mark_id++; 1001 mutex_exit(&zev_mark_id_mutex); 1002 rec->mark_id = mark_id; 1003 zev_queue_message(ZEV_OP_MARK, msg); 1004 /* report mark id to userland, ignore errors */ 1005 mark.zev_mark_id = mark_id; 1006 ddi_copyout(&mark, (void *)arg, sizeof(mark), mode); 1007 break; 1008 case ZEV_IOC_ADD_QUEUE: 1009 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1010 ret = EACCES; 1011 break; 1012 } 1013 ret = zev_ioc_add_queue(req_q, arg, mode); 1014 break; 1015 case ZEV_IOC_REMOVE_QUEUE: 1016 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1017 ret = EACCES; 1018 break; 1019 } 1020 ret = zev_ioc_remove_queue(req_q, arg, mode); 1021 break; 1022 case ZEV_IOC_GET_DEBUG_INFO: 1023 ret = zev_ioc_get_debug_info(req_q, arg, mode); 1024 break; 1025 case ZEV_IOC_GET_QUEUE_LIST: 1026 ret = zev_ioc_get_queue_list(req_q, arg, mode); 1027 break; 1028 default: 1029 /* generic "ioctl unknown" error */ 1030 ret = ENOTTY; 1031 } 1032 1033 mutex_enter(&zev_mutex); 1034 zev_queue_release(req_q); 1035 mutex_exit(&zev_mutex); 1036 return (ret); 1037 } 1038 1039 static int 1040 zev_chpoll(dev_t dev, short events, int anyyet, 1041 short *reventsp, struct pollhead **phpp) 1042 { 1043 int minor; 1044 short revent = 0; 1045 zev_queue_t *q; 1046 1047 /* use minor-specific queue context and it's pollhead */ 1048 minor = getminor(dev); 1049 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1050 return (EINVAL); 1051 mutex_enter(&zev_mutex); 1052 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1053 mutex_exit(&zev_mutex); 1054 return (ENXIO); 1055 } 1056 revent = 0; 1057 if ((events & POLLIN)) { 1058 if (q->zq_oldest) 1059 revent |= POLLIN; 1060 } 1061 if (revent == 0) { 1062 if (!anyyet) { 1063 *phpp = &q->zq_pollhead; 1064 } 1065 } 1066 *reventsp = revent; 1067 mutex_exit(&zev_mutex); 1068 return (0); 1069 } 1070 1071 /* ARGSUSED */ 1072 static int 1073 zev_read(dev_t dev, struct uio *uio_p, cred_t *crep_p) 1074 { 1075 minor_t minor; 1076 offset_t off; 1077 int ret = 0; 1078 zev_msg_t *msg; 1079 char *data; 1080 zev_queue_t *q; 1081 1082 minor = getminor(dev); 1083 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1084 return (EINVAL); 1085 1086 mutex_enter(&zev_mutex); 1087 q = ddi_get_soft_state(statep, minor); 1088 if (q == NULL) { 1089 mutex_exit(&zev_mutex); 1090 return (ENXIO); 1091 } 1092 off = uio_p->uio_loffset; 1093 msg = q->zq_oldest; 1094 while (msg == NULL) { 1095 if (!ddi_can_receive_sig()) { 1096 /* 1097 * read() shouldn't block because this thread 1098 * can't receive signals. (e.g., it might be 1099 * torn down by exit() right now.) 1100 */ 1101 mutex_exit(&zev_mutex); 1102 return 0; 1103 } 1104 if (cv_wait_sig(&q->zq_condvar, &zev_mutex) == 0) { 1105 /* signal received. */ 1106 mutex_exit(&zev_mutex); 1107 return EINTR; 1108 } 1109 msg = q->zq_oldest; 1110 } 1111 if (msg->size > uio_p->uio_resid) { 1112 mutex_exit(&zev_mutex); 1113 return E2BIG; 1114 } 1115 while (msg && uio_p->uio_resid >= msg->size) { 1116 data = (char *)(msg + 1); 1117 ret = uiomove(data, msg->size, UIO_READ, uio_p); 1118 if (ret != 0) { 1119 mutex_exit(&zev_mutex); 1120 cmn_err(CE_WARN, "zev: uiomove failed; messages lost"); 1121 uio_p->uio_loffset = off; 1122 return (ret); 1123 } 1124 q->zq_oldest = msg->next; 1125 q->zq_bytes_read += msg->size; 1126 q->zq_queue_len -= msg->size; 1127 q->zq_queue_messages--; 1128 msg->read++; 1129 msg = q->zq_oldest; 1130 } 1131 cv_broadcast(&zev_condvar); 1132 mutex_exit(&zev_mutex); 1133 uio_p->uio_loffset = off; 1134 return 0; 1135 } 1136 1137 /* ARGSUSED */ 1138 static int 1139 zev_close(dev_t dev, int flag, int otyp, cred_t *crepd) 1140 { 1141 zev_queue_t *q; 1142 int minor; 1143 1144 minor = getminor(dev); 1145 if (otyp != OTYP_CHR) 1146 return (EINVAL); 1147 mutex_enter(&zev_mutex); 1148 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1149 mutex_exit(&zev_mutex); 1150 return (ENXIO); 1151 } 1152 if (q->zq_busy != B_TRUE) { 1153 mutex_exit(&zev_mutex); 1154 return (EINVAL); 1155 } 1156 q->zq_busy = B_FALSE; 1157 if ((q->zq_flags & ZEV_FL_PERSISTENT) == 0) 1158 zev_queue_release(q); 1159 mutex_exit(&zev_mutex); 1160 return (0); 1161 } 1162 1163 /* ARGSUSED */ 1164 static int 1165 zev_open(dev_t *devp, int flag, int otyp, cred_t *credp) 1166 { 1167 zev_queue_t *q; 1168 minor_t minor; 1169 1170 minor = getminor(*devp); 1171 if (otyp != OTYP_CHR) 1172 return (EINVAL); 1173 if (drv_priv(credp) != 0) 1174 return (EPERM); 1175 mutex_enter(&zev_mutex); 1176 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1177 mutex_exit(&zev_mutex); 1178 return (ENXIO); 1179 } 1180 if (minor == ZEV_CONTROL_DEVICE_MINOR) { 1181 /* control device may be used in parallel */ 1182 q->zq_busy = B_TRUE; 1183 mutex_exit(&zev_mutex); 1184 return 0; 1185 } 1186 if (q->zq_busy == B_TRUE) { 1187 mutex_exit(&zev_mutex); 1188 return (EBUSY); 1189 } 1190 q->zq_busy = B_TRUE; /* can only be opened exclusively */ 1191 mutex_exit(&zev_mutex); 1192 return (0); 1193 } 1194 1195 static struct cb_ops zev_cb_ops = { 1196 zev_open, /* open */ 1197 zev_close, /* close */ 1198 nodev, /* strategy */ 1199 nodev, /* print */ 1200 nodev, /* dump */ 1201 zev_read, /* read */ 1202 nodev, /* write */ 1203 zev_ioctl, /* ioctl */ 1204 nodev, /* devmap */ 1205 nodev, /* mmap */ 1206 nodev, /* segmap */ 1207 zev_chpoll, /* chpoll */ 1208 ddi_prop_op, /* prop_op */ 1209 NULL, /* streamtab */ 1210 D_MP | D_64BIT, /* cb_flag */ 1211 CB_REV, /* cb_rev */ 1212 nodev, /* aread */ 1213 nodev, /* awrite */ 1214 }; 1215 1216 static void 1217 zev_free_instance(dev_info_t *dip) 1218 { 1219 int instance; 1220 zev_queue_t *q; 1221 int i; 1222 1223 instance = ddi_get_instance(dip); 1224 if (instance != 0) { 1225 cmn_err(CE_WARN, "zev: tried to free instance != 0 (%d)", 1226 instance); 1227 return; 1228 } 1229 1230 ddi_remove_minor_node(dip, NULL); 1231 1232 /* stop pollwakeup thread */ 1233 zev_wakeup_thread_run = 0; 1234 if (zev_poll_wakeup_thread != NULL) { 1235 thread_join(zev_poll_wakeup_thread->t_did); 1236 zev_poll_wakeup_thread = NULL; 1237 } 1238 1239 mutex_enter(&zev_mutex); 1240 1241 /* remove "ctrl" dummy queue */ 1242 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1243 if (q) { 1244 ddi_soft_state_free(statep, ZEV_CONTROL_DEVICE_MINOR); 1245 ZEV_MEM_SUB(sizeof(zev_queue_t)); 1246 } 1247 1248 /* remove all other queues */ 1249 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1250 q = zev_queues[i- ZEV_MINOR_MIN]; 1251 if (!q) 1252 continue; 1253 ASSERT(q->zq_refcnt == 1); 1254 zev_queue_release(q); 1255 } 1256 zev_queue_trim(); 1257 bzero(&zev_queues, sizeof(zev_queues)); 1258 1259 mutex_exit(&zev_mutex); 1260 1261 } 1262 1263 static int 1264 zev_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1265 { 1266 int instance; 1267 zev_queue_t *q; 1268 1269 /* called once per instance with DDI_DETACH, 1270 may be called to suspend */ 1271 switch (cmd) { 1272 case DDI_DETACH: 1273 /* instance busy? */ 1274 instance = ddi_get_instance(dip); 1275 if (instance != 0) { /* hardcoded in zev.conf */ 1276 /* this module only supports one instance. */ 1277 return (DDI_FAILURE); 1278 } 1279 1280 mutex_enter(&zev_mutex); 1281 if (!zev_attached) { 1282 mutex_exit(&zev_mutex); 1283 return (DDI_FAILURE); 1284 } 1285 1286 /* check "ctrl" queue to see if t is busy */ 1287 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1288 if (q == NULL) { 1289 mutex_exit(&zev_mutex); 1290 return (DDI_FAILURE); 1291 } 1292 if (q->zq_busy) { 1293 mutex_exit(&zev_mutex); 1294 return (DDI_FAILURE); 1295 } 1296 /* are there any queues? */ 1297 if (zev_queue_cnt > 0) { 1298 mutex_exit(&zev_mutex); 1299 return (DDI_FAILURE); 1300 } 1301 1302 zev_attached = B_FALSE; 1303 mutex_exit(&zev_mutex); 1304 1305 /* switch ZFS event callbacks back to default */ 1306 rw_enter(&rz_zev_rwlock, RW_WRITER); 1307 rz_zev_callbacks = rz_zev_default_callbacks; 1308 rw_exit(&rz_zev_rwlock); 1309 1310 /* no thread is inside of the callbacks anymore. */ 1311 1312 /* free resources allocated for this instance */ 1313 zev_free_instance(dip); 1314 #if 0 1315 cmn_err(CE_WARN, "zev: allocated memory at detach: %" PRIu64, 1316 zev_memory_allocated - zev_memory_freed); 1317 #endif 1318 return (DDI_SUCCESS); 1319 case DDI_SUSPEND: 1320 /* kernel must not suspend zev devices while ZFS is running */ 1321 return (DDI_FAILURE); 1322 default: 1323 return (DDI_FAILURE); 1324 } 1325 } 1326 1327 static int 1328 zev_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1329 { 1330 /* called once per instance with DDI_ATTACH, 1331 may be called to resume */ 1332 int instance; 1333 int error; 1334 zev_queue_t *q; 1335 switch (cmd) { 1336 case DDI_ATTACH: 1337 /* create instance state */ 1338 instance = ddi_get_instance(dip); 1339 if (instance != 0) { /* hardcoded in zev.conf */ 1340 /* this module only supports one instance. */ 1341 return (DDI_FAILURE); 1342 } 1343 1344 mutex_enter(&zev_mutex); 1345 if (zev_attached) { 1346 mutex_exit(&zev_mutex); 1347 return (DDI_FAILURE); 1348 } 1349 if (ddi_soft_state_zalloc(statep, ZEV_CONTROL_DEVICE_MINOR) != 1350 DDI_SUCCESS) { 1351 mutex_exit(&zev_mutex); 1352 return (DDI_FAILURE); 1353 } 1354 ZEV_MEM_ADD(sizeof(zev_queue_t)); 1355 zev_attached = B_TRUE; 1356 1357 /* init queue list */ 1358 bzero(&zev_queues, sizeof(zev_queues)); 1359 mutex_exit(&zev_mutex); 1360 1361 /* create a dummy queue for management of "ctrl" */ 1362 1363 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1364 q->zq_dip = dip; 1365 q->zq_refcnt = 1; 1366 q->zq_busy = B_FALSE; 1367 q->zq_minor_number = ZEV_CONTROL_DEVICE_MINOR; 1368 q->zq_flags = ZEV_FL_PERSISTENT; 1369 strcpy(q->zq_name, ZEV_CONTROL_DEVICE_NAME); 1370 1371 /* create device node for "ctrl" */ 1372 if (ddi_create_minor_node(dip, ZEV_CONTROL_DEVICE_NAME, 1373 S_IFCHR, ZEV_CONTROL_DEVICE_MINOR, 1374 DDI_PSEUDO, 0) == DDI_FAILURE) { 1375 goto fail; 1376 } 1377 1378 /* note: intentionally not adding ctrl queue to queue list. */ 1379 1380 /* default queue */ 1381 error = zev_queue_new(&q, dip, 1382 ZEV_DEFAULT_QUEUE_NAME, 1383 ZEV_MAX_QUEUE_LEN, 1384 ZEV_FL_BLOCK_WHILE_QUEUE_FULL| 1385 ZEV_FL_PERSISTENT); 1386 if (error) 1387 goto fail; 1388 1389 /* start pollwakeup thread */ 1390 zev_wakeup_thread_run = 1; 1391 zev_poll_wakeup_thread = thread_create(NULL, 0, 1392 zev_poll_wakeup_thread_main, NULL, 0, &p0, 1393 TS_RUN, minclsyspri); 1394 1395 ddi_report_dev(dip); 1396 1397 /* switch ZFS event callbacks to zev module callbacks */ 1398 rw_enter(&rz_zev_rwlock, RW_WRITER); 1399 rz_zev_callbacks = &zev_callbacks; 1400 rw_exit(&rz_zev_rwlock); 1401 1402 return (DDI_SUCCESS); 1403 case DDI_RESUME: 1404 /* suspendeding zev devices should never happen */ 1405 return (DDI_SUCCESS); 1406 default: 1407 return (DDI_FAILURE); 1408 } 1409 fail: 1410 cmn_err(CE_WARN, "zev: attach failed"); 1411 zev_free_instance(dip); 1412 mutex_enter(&zev_mutex); 1413 zev_attached = B_FALSE; 1414 mutex_exit(&zev_mutex); 1415 return (DDI_FAILURE); 1416 } 1417 1418 /* ARGSUSED */ 1419 static int 1420 zev_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp) 1421 { 1422 minor_t minor; 1423 zev_queue_t *q; 1424 1425 /* arg is dev_t */ 1426 minor = getminor((dev_t)arg); 1427 mutex_enter(&zev_mutex); 1428 q = ddi_get_soft_state(statep, minor); 1429 if (q == NULL) { 1430 *resultp = NULL; 1431 mutex_exit(&zev_mutex); 1432 return (DDI_FAILURE); 1433 } 1434 1435 switch (infocmd) { 1436 case DDI_INFO_DEVT2DEVINFO: 1437 *resultp = q->zq_dip; 1438 break; 1439 case DDI_INFO_DEVT2INSTANCE: 1440 *resultp = (void *)(uintptr_t)ddi_get_instance(q->zq_dip); 1441 break; 1442 default: 1443 mutex_exit(&zev_mutex); 1444 return (DDI_FAILURE); 1445 } 1446 mutex_exit(&zev_mutex); 1447 return (DDI_SUCCESS); 1448 } 1449 1450 static struct dev_ops zev_dev_ops = { 1451 DEVO_REV, /* driver build revision */ 1452 0, /* driver reference count */ 1453 zev_getinfo, /* getinfo */ 1454 nulldev, /* identify (obsolete) */ 1455 nulldev, /* probe (search for devices) */ 1456 zev_attach, /* attach */ 1457 zev_detach, /* detach */ 1458 nodev, /* reset (obsolete, use quiesce) */ 1459 &zev_cb_ops, /* character and block device ops */ 1460 NULL, /* bus driver ops */ 1461 NULL, /* power management, not needed */ 1462 ddi_quiesce_not_needed, /* quiesce */ 1463 }; 1464 1465 static struct modldrv zev_modldrv = { 1466 &mod_driverops, /* all loadable modules use this */ 1467 "zev ZFS event provider, v1.0", /* driver name and version info */ 1468 &zev_dev_ops /* ops method pointers */ 1469 }; 1470 1471 static struct modlinkage zev_modlinkage = { 1472 MODREV_1, /* fixed value */ 1473 { 1474 &zev_modldrv, /* driver linkage structure */ 1475 NULL /* list terminator */ 1476 } 1477 }; 1478 1479 int 1480 _init(void) 1481 { 1482 int error; 1483 1484 if ((error = ddi_soft_state_init(&statep, sizeof(zev_queue_t), 1)) != 0) 1485 return (error); 1486 zev_attached = B_FALSE; 1487 1488 zev_queue_head = NULL; 1489 zev_queue_tail = NULL; 1490 zev_queue_len = 0; 1491 zev_muted_pools_head = NULL; 1492 zev_memory_allocated = 0; 1493 zev_memory_freed = 0; 1494 zev_queue_cnt = 0; 1495 1496 mutex_init(&zev_mutex, NULL, MUTEX_DRIVER, NULL); 1497 cv_init(&zev_condvar, NULL, CV_DRIVER, NULL); 1498 rw_init(&zev_pool_list_rwlock, NULL, RW_DRIVER, NULL); 1499 mutex_init(&zev_mark_id_mutex, NULL, MUTEX_DRIVER, NULL); 1500 zev_mark_id = gethrtime(); 1501 mutex_init(&zev_queue_msg_mutex, NULL, MUTEX_DRIVER, NULL); 1502 zev_msg_sequence_number = gethrtime(); 1503 bzero(&zev_statistics, sizeof(zev_statistics)); 1504 bzero(&zev_pollhead, sizeof(zev_pollhead)); 1505 bzero(&zev_queues, sizeof(zev_queues)); 1506 zev_statistics.zev_max_queue_len = ZEV_MAX_QUEUE_LEN; 1507 if (zev_ioc_mute_pool("zg0")) { 1508 cmn_err(CE_WARN, "zev: could not init mute list"); 1509 goto FAIL; 1510 } 1511 1512 if ((error = mod_install(&zev_modlinkage)) != 0) { 1513 cmn_err(CE_WARN, "zev: could not install module"); 1514 goto FAIL; 1515 } 1516 1517 return (0); 1518 FAIL: 1519 /* free resources */ 1520 cmn_err(CE_WARN, "zev: _init failed"); 1521 mutex_destroy(&zev_mutex); 1522 ddi_soft_state_fini(&statep); 1523 return (error); 1524 } 1525 1526 int 1527 _info(struct modinfo *modinfop) 1528 { 1529 return (mod_info(&zev_modlinkage, modinfop)); 1530 } 1531 1532 int 1533 _fini(void) 1534 { 1535 int error = 0; 1536 zev_msg_t *msg; 1537 zev_pool_list_entry_t *pe, *npe; 1538 1539 mutex_enter(&zev_mutex); 1540 if (zev_attached == B_TRUE) { 1541 mutex_exit(&zev_mutex); 1542 return (SET_ERROR(EBUSY)); 1543 } 1544 if (zev_queue_cnt != 0) { 1545 /* should never happen */ 1546 mutex_exit(&zev_mutex); 1547 return (SET_ERROR(EBUSY)); 1548 } 1549 1550 /* 1551 * avoid deadlock if event list is full: make sure threads currently 1552 * blocking on the event list can append their event and then release 1553 * rz_zev_rwlock. Since there should be no queues left when we 1554 * reach this point we can simply empty the event list and then 1555 * wake everybody. 1556 */ 1557 while (zev_queue_head) { 1558 msg = zev_queue_head; 1559 zev_queue_head = msg->next; 1560 ZEV_FREE(msg, sizeof(*msg) + msg->size); 1561 } 1562 cv_broadcast(&zev_condvar); 1563 mutex_exit(&zev_mutex); 1564 1565 /* switch ZFS event callbacks back to default (again) */ 1566 rw_enter(&rz_zev_rwlock, RW_WRITER); 1567 rz_zev_callbacks = rz_zev_default_callbacks; 1568 rw_exit(&rz_zev_rwlock); 1569 1570 /* no thread is inside of the callbacks anymore. Safe to remove. */ 1571 1572 /* unload module callbacks */ 1573 if ((error = mod_remove(&zev_modlinkage)) != 0) { 1574 cmn_err(CE_WARN, "mod_remove failed: %d", error); 1575 return (error); 1576 } 1577 1578 /* free resources */ 1579 mutex_enter(&zev_mutex); 1580 while (zev_queue_head) { 1581 msg = zev_queue_head; 1582 zev_queue_head = msg->next; 1583 ZEV_FREE(msg, sizeof(*msg) + msg->size); 1584 } 1585 mutex_exit(&zev_mutex); 1586 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 1587 pe = zev_muted_pools_head; 1588 while (pe) { 1589 npe = pe; 1590 pe = pe->next; 1591 ZEV_FREE(npe, sizeof(*npe)); 1592 } 1593 rw_exit(&zev_pool_list_rwlock); 1594 ddi_soft_state_fini(&statep); 1595 rw_destroy(&zev_pool_list_rwlock); 1596 cv_destroy(&zev_condvar); 1597 mutex_destroy(&zev_mutex); 1598 mutex_destroy(&zev_mark_id_mutex); 1599 mutex_destroy(&zev_queue_msg_mutex); 1600 1601 return (0); 1602 } 1603 1604