1 #include <sys/modctl.h> 2 #include <sys/ddi.h> 3 #include <sys/sunddi.h> 4 #include <sys/conf.h> 5 #include <sys/devops.h> 6 #include <sys/stat.h> 7 #include <sys/fs/zev.h> 8 #include <sys/zev_callbacks.h> 9 #include <sys/zfs_znode.h> 10 #include <sys/time.h> 11 #include <sys/sa.h> 12 #include <sys/zap.h> 13 #include <sys/time.h> 14 15 typedef struct zev_state { 16 kmutex_t mutex; 17 dev_info_t *dip; 18 boolean_t busy; 19 } zev_state_t; 20 21 static void *statep; 22 struct pollhead zev_pollhead; 23 24 kmutex_t zev_mutex; 25 kcondvar_t zev_condvar; 26 krwlock_t zev_pool_list_rwlock; 27 static zev_statistics_t zev_statistics; 28 static boolean_t zev_busy; 29 static kmutex_t zev_mark_id_mutex; 30 static uint64_t zev_mark_id = 0; 31 32 /* 33 * The longest potential message is from zev_zfs_mount() and 34 * contains the mountpoint, which might be close to MAXPATHLEN bytes long. 35 * 36 * Another candidate is zev_znode_rename_cb() and contains three inode 37 * numbers and two filenames of up to MAXNAMELEN bytes each. 38 */ 39 #define ZEV_MAX_MESSAGE_LEN 4096 40 41 /* If the queue size reaches 1GB, stop ZFS ops and block the threads. */ 42 #define ZEV_MAX_QUEUE_LEN (1 * 1024 * 1024 * 1024) 43 44 /* Don't wake up poll()ing processes for every single message. */ 45 #define ZEV_MIN_POLL_WAKEUP_QUEUE_LEN 8192 46 47 static zev_msg_t *zev_queue_head = NULL; 48 static zev_msg_t *zev_queue_tail = NULL; 49 static uint64_t zev_queue_len = 0; 50 51 52 typedef struct zev_pool_list_entry { 53 struct zev_pool_list_entry *next; 54 char name[MAXPATHLEN]; 55 } zev_pool_list_entry_t; 56 57 static zev_pool_list_entry_t *zev_muted_pools_head = NULL; 58 59 /* 60 * poll() wakeup thread. Used to check periodically whether we have 61 * bytes left in the queue that have not yet been made into a 62 * pollwakeup() call. This is meant to insure a maximum waiting 63 * time until an event is presented as a poll wakeup, while at 64 * the same time not making every single event into a poll wakeup 65 * of it's own. 66 */ 67 68 static volatile int zev_wakeup_thread_run = 1; 69 static kthread_t *zev_poll_wakeup_thread = NULL; 70 71 static void 72 zev_poll_wakeup_thread_main(void) 73 { 74 int wakeup; 75 while (zev_wakeup_thread_run) { 76 delay(drv_usectohz(100 * 1000)); /* sleep 100ms */ 77 /* check message queue */ 78 mutex_enter(&zev_mutex); 79 wakeup = 0; 80 if (zev_queue_head) 81 wakeup = 1; 82 mutex_exit(&zev_mutex); 83 if (wakeup) 84 pollwakeup(&zev_pollhead, POLLIN); 85 } 86 thread_exit(); 87 } 88 89 static int 90 zev_ioc_mute_pool(char *poolname) 91 { 92 zev_pool_list_entry_t *pe; 93 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 94 /* pool already muted? */ 95 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 96 if (!strcmp(pe->name, poolname)) { 97 rw_exit(&zev_pool_list_rwlock); 98 return EEXIST; 99 } 100 } 101 pe = kmem_zalloc(sizeof(*pe), KM_SLEEP); 102 if (!pe) { 103 rw_exit(&zev_pool_list_rwlock); 104 return ENOMEM; 105 } 106 (void) strncpy(pe->name, poolname, sizeof(pe->name)); 107 pe->next = zev_muted_pools_head; 108 zev_muted_pools_head = pe; 109 rw_exit(&zev_pool_list_rwlock); 110 return (0); 111 } 112 113 static int 114 zev_ioc_unmute_pool(char *poolname) 115 { 116 zev_pool_list_entry_t *pe, *peprev; 117 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 118 /* pool muted? */ 119 peprev = NULL; 120 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 121 if (!strcmp(pe->name, poolname)) { 122 goto found; 123 } 124 peprev = pe; 125 } 126 rw_exit(&zev_pool_list_rwlock); 127 return ENOENT; 128 found: 129 if (peprev != NULL) { 130 peprev->next = pe->next; 131 } else { 132 zev_muted_pools_head = pe->next; 133 } 134 kmem_free(pe, sizeof(*pe)); 135 rw_exit(&zev_pool_list_rwlock); 136 return (0); 137 } 138 139 int 140 zev_skip_pool(objset_t *os) 141 { 142 zev_pool_list_entry_t *pe; 143 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; 144 rw_enter(&zev_pool_list_rwlock, RW_READER); 145 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 146 if (!strcmp(pe->name, dp->dp_spa->spa_name)) { 147 rw_exit(&zev_pool_list_rwlock); 148 return 1; 149 } 150 } 151 rw_exit(&zev_pool_list_rwlock); 152 return 0; 153 } 154 155 void 156 zev_queue_message(int op, zev_msg_t *msg) 157 { 158 int wakeup = 0; 159 160 msg->next = NULL; 161 162 if (op < ZEV_OP_MIN || op > ZEV_OP_MAX) { 163 zev_queue_error(op, "unknown op id encountered: %d", op); 164 kmem_free(msg, sizeof(*msg) + msg->size); 165 return; 166 } 167 168 mutex_enter(&zev_mutex); 169 while (zev_statistics.zev_max_queue_len && 170 zev_statistics.zev_queue_len >= zev_statistics.zev_max_queue_len) { 171 /* queue full. block until it's been shrunk. */ 172 cv_wait(&zev_condvar, &zev_mutex); 173 } 174 175 if (zev_queue_tail == NULL) { 176 zev_queue_head = zev_queue_tail = msg; 177 } else { 178 zev_queue_tail->next = msg; 179 zev_queue_tail = msg; 180 } 181 zev_queue_len++; 182 183 /* update statistics */ 184 zev_statistics.zev_cnt_total_events++; 185 zev_statistics.zev_queue_len += msg->size; 186 if (zev_statistics.zev_queue_len > 187 zev_statistics.zev_poll_wakeup_queue_len) 188 wakeup = 1; 189 switch (op) { 190 case ZEV_OP_ERROR: 191 zev_statistics.zev_cnt_errors++; 192 break; 193 case ZEV_OP_MARK: 194 zev_statistics.zev_cnt_marks++; 195 break; 196 case ZEV_OP_ZFS_MOUNT: 197 zev_statistics.zev_cnt_zfs_mount++; 198 break; 199 case ZEV_OP_ZFS_UMOUNT: 200 zev_statistics.zev_cnt_zfs_umount++; 201 break; 202 case ZEV_OP_ZVOL_WRITE: 203 zev_statistics.zev_cnt_zvol_write++; 204 break; 205 case ZEV_OP_ZVOL_TRUNCATE: 206 zev_statistics.zev_cnt_zvol_truncate++; 207 break; 208 case ZEV_OP_ZNODE_CLOSE_AFTER_UPDATE: 209 zev_statistics.zev_cnt_znode_close_after_update++; 210 break; 211 case ZEV_OP_ZNODE_CREATE: 212 zev_statistics.zev_cnt_znode_create++; 213 break; 214 case ZEV_OP_ZNODE_REMOVE: 215 zev_statistics.zev_cnt_znode_remove++; 216 break; 217 case ZEV_OP_ZNODE_LINK: 218 zev_statistics.zev_cnt_znode_link++; 219 break; 220 case ZEV_OP_ZNODE_SYMLINK: 221 zev_statistics.zev_cnt_znode_symlink++; 222 break; 223 case ZEV_OP_ZNODE_RENAME: 224 zev_statistics.zev_cnt_znode_rename++; 225 break; 226 case ZEV_OP_ZNODE_WRITE: 227 zev_statistics.zev_cnt_znode_write++; 228 break; 229 case ZEV_OP_ZNODE_TRUNCATE: 230 zev_statistics.zev_cnt_znode_truncate++; 231 break; 232 case ZEV_OP_ZNODE_SETATTR: 233 zev_statistics.zev_cnt_znode_setattr++; 234 break; 235 case ZEV_OP_ZNODE_ACL: 236 zev_statistics.zev_cnt_znode_acl++; 237 break; 238 } 239 mutex_exit(&zev_mutex); 240 241 /* chpoll event, if necessary. */ 242 if (wakeup) 243 pollwakeup(&zev_pollhead, POLLIN); 244 245 return; 246 } 247 248 void 249 zev_queue_error(int op, char *fmt, ...) 250 { 251 char buf[ZEV_MAX_MESSAGE_LEN]; 252 va_list ap; 253 int len; 254 zev_msg_t *msg = NULL; 255 zev_error_t *rec; 256 int msg_size; 257 258 va_start(ap, fmt); 259 len = vsnprintf(buf, sizeof(buf), fmt, ap); 260 va_end(ap); 261 if (len >= sizeof(buf)) { 262 cmn_err(CE_WARN, "zev: can't report error - " 263 "dropping event entirely."); 264 return; 265 } 266 267 msg_size = sizeof(*rec) + len + 1; 268 msg = kmem_alloc(sizeof(*msg) + msg_size, KM_SLEEP); 269 msg->size = msg_size; 270 rec = (zev_error_t *)(msg + 1); 271 rec->record_len = msg_size; 272 rec->op = ZEV_OP_ERROR; 273 rec->op_time = ddi_get_time(); 274 rec->guid = 0; 275 rec->failed_op = op; 276 rec->errstr_len = len; 277 (void) memcpy(ZEV_ERRSTR(rec), buf, len + 1); 278 279 zev_queue_message(ZEV_OP_ERROR, msg); 280 return; 281 } 282 283 int 284 zev_ioc_get_gen(intptr_t arg, int mode) 285 { 286 zev_ioctl_get_gen_t gg; 287 file_t *fp; 288 uint64_t gen; 289 uint64_t crtime[2]; 290 uint64_t dummy; 291 int ret = 0; 292 zfsvfs_t *zfsvfs; 293 objset_t *osp; 294 sa_attr_type_t *sa_table; 295 sa_handle_t *hdl; 296 dmu_buf_t *db; 297 sa_bulk_attr_t bulk[4]; 298 int count = 0; 299 dmu_object_info_t doi; 300 dsl_pool_t *dp; 301 timestruc_t crtime_s; 302 303 if (ddi_copyin((void *)arg, &gg, sizeof(gg), mode) != 0) 304 return EFAULT; 305 fp = getf(gg.fd); 306 if (fp == NULL) 307 return EBADF; 308 if (fp->f_vnode->v_vfsp->vfs_fstype != zfsfstype) { 309 ret = EINVAL; 310 goto out; 311 } 312 zfsvfs = (zfsvfs_t *)(fp->f_vnode->v_vfsp->vfs_data); 313 osp = zfsvfs->z_os; 314 dsl_dataset_name(zfsvfs->z_os->os_dsl_dataset, gg.dataset); 315 /* get object attributes */ 316 ret = sa_setup(osp, gg.inode, zfs_attr_table, ZPL_END, &sa_table); 317 if (ret) 318 goto out; 319 ret = sa_buf_hold(osp, gg.inode, FTAG, &db); 320 if (ret) 321 goto out; 322 dmu_object_info_from_db(db, &doi); 323 if ((doi.doi_bonus_type != DMU_OT_SA && 324 doi.doi_bonus_type != DMU_OT_ZNODE) || 325 doi.doi_bonus_type == DMU_OT_ZNODE && 326 doi.doi_bonus_size < sizeof (znode_phys_t)) { 327 sa_buf_rele(db, FTAG); 328 ret = ENOTSUP; 329 goto out; 330 } 331 ret = sa_handle_get(osp, gg.inode, NULL, SA_HDL_PRIVATE, &hdl); 332 if (ret) { 333 sa_buf_rele(db, FTAG); 334 goto out; 335 } 336 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, 337 &dummy, sizeof(dummy)); 338 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, 339 &gen, sizeof(gen)); 340 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, 341 &dummy, sizeof(dummy)); 342 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CRTIME], NULL, 343 &crtime, sizeof(crtime)); 344 ret = sa_bulk_lookup(hdl, bulk, count); 345 sa_handle_destroy(hdl); 346 sa_buf_rele(db, FTAG); 347 if (ret) 348 goto out; 349 dp = osp->os_dsl_dataset->ds_dir->dd_pool; 350 ZFS_TIME_DECODE(&crtime_s, crtime); 351 gg.generation = gen; 352 gg.crtime = crtime_s.tv_sec; 353 gg.guid = zfsvfs->z_os->os_dsl_dataset->ds_phys->ds_guid; 354 ddi_copyout(&gg, (void *)arg, sizeof(gg), mode); 355 out: 356 releasef(gg.fd); 357 return ret; 358 } 359 360 /* ARGSUSED */ 361 static int 362 zev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 363 { 364 int instance; 365 zev_statistics_t zs; 366 zev_ioctl_poolarg_t pa; 367 zev_ioctl_mark_t mark; 368 zev_mark_t *rec; 369 int msg_size; 370 zev_msg_t *msg; 371 uint64_t len; 372 uint64_t mark_id; 373 374 instance = getminor(dev); 375 if (ddi_get_soft_state(statep, instance) == NULL) 376 return (ENXIO); 377 /* 378 * all structures passed between kernel and userspace 379 * are now compatible between 64 and 32 bit. Model 380 * conversion can be ignore. 381 */ 382 #if 0 383 /* Remember to do 32/64 bit mode adjustments if 384 necessary. See "Writing Device Drivers", 280pp */ 385 if (ddi_model_convert_from(mode) != DDI_MODEL_NONE) { 386 /* userland has another data model. (most 387 likely 32-bit) -> not supported. */ 388 return (EINVAL); 389 } 390 #endif 391 switch (cmd) { 392 case ZEV_IOC_GET_STATISTICS: 393 /* ddi_copyout() can take a long time. Better make 394 a copy to be able to release the mutex faster. */ 395 mutex_enter(&zev_mutex); 396 (void) memcpy(&zs, &zev_statistics, sizeof(zs)); 397 mutex_exit(&zev_mutex); 398 if (ddi_copyout(&zs, (void *)arg, sizeof(zs), mode) != 0) 399 return EFAULT; 400 break; 401 case ZEV_IOC_MUTE_POOL: 402 case ZEV_IOC_UNMUTE_POOL: 403 if (ddi_copyin((void *)arg, &pa, sizeof(pa), mode) != 0) 404 return EFAULT; 405 if (pa.zev_poolname_len >=MAXPATHLEN) 406 return EINVAL; 407 pa.zev_poolname[pa.zev_poolname_len] = '\0'; 408 if (cmd == ZEV_IOC_MUTE_POOL) { 409 return zev_ioc_mute_pool(pa.zev_poolname); 410 } else { 411 return zev_ioc_unmute_pool(pa.zev_poolname); 412 } 413 case ZEV_IOC_SET_MAX_QUEUE_LEN: 414 if (ddi_copyin((void *)arg, &len, sizeof(len), mode) != 0) 415 return EFAULT; 416 if (len > ZEV_MAX_QUEUE_LEN) 417 return EINVAL; 418 mutex_enter(&zev_mutex); 419 zev_statistics.zev_max_queue_len = len; 420 cv_broadcast(&zev_condvar); 421 mutex_exit(&zev_mutex); 422 break; 423 case ZEV_IOC_SET_POLL_WAKEUP_QUEUE_LEN: 424 if (ddi_copyin((void *)arg, &len, sizeof(len), mode) != 0) 425 return EFAULT; 426 mutex_enter(&zev_mutex); 427 zev_statistics.zev_poll_wakeup_queue_len = len; 428 mutex_exit(&zev_mutex); 429 break; 430 case ZEV_IOC_MARK: 431 if (ddi_copyin((void *)arg, &mark, sizeof(mark), mode) != 0) 432 return EFAULT; 433 cmn_err(CE_WARN, "mark: guid=%lu payload_len=%d", (long unsigned int)mark.zev_guid, mark.zev_payload_len); 434 /* prepare message */ 435 msg_size = sizeof(*rec) + mark.zev_payload_len + 1; 436 msg = kmem_alloc(sizeof(*msg) + msg_size, KM_SLEEP); 437 msg->size = msg_size; 438 rec = (zev_mark_t *)(msg + 1); 439 rec->record_len = msg_size; 440 rec->op = ZEV_OP_MARK; 441 rec->op_time = ddi_get_time(); 442 rec->guid = mark.zev_guid; 443 rec->payload_len = mark.zev_payload_len; 444 /* get payload */ 445 if (ddi_copyin(((char *)arg) + sizeof(mark), 446 ZEV_PAYLOAD(rec), 447 mark.zev_payload_len, mode) != 0) { 448 kmem_free(msg, msg_size); 449 return EFAULT; 450 } 451 *(ZEV_PAYLOAD(rec) + mark.zev_payload_len) = '\0'; 452 /* get mark id and queue message */ 453 mutex_enter(&zev_mark_id_mutex); 454 mark_id = zev_mark_id++; 455 mutex_exit(&zev_mark_id_mutex); 456 rec->mark_id = mark_id; 457 zev_queue_message(ZEV_OP_MARK, msg); 458 /* report mark id to userland, ignore errors */ 459 mark.zev_mark_id = mark_id; 460 ddi_copyout(&mark, (void *)arg, sizeof(mark), mode); 461 break; 462 case ZEV_IOC_GET_GEN: 463 return zev_ioc_get_gen(arg, mode); 464 default: 465 /* generic "ioctl unknown" error */ 466 return ENOTTY; 467 } 468 return (0); 469 } 470 471 static int 472 zev_chpoll(dev_t dev, short events, int anyyet, 473 short *reventsp, struct pollhead **phpp) 474 { 475 int instance; 476 short revent = 0; 477 478 instance = getminor(dev); 479 if (ddi_get_soft_state(statep, instance) == NULL) 480 return (ENXIO); 481 revent = 0; 482 if ((events & POLLIN)) { 483 mutex_enter(&zev_mutex); 484 if (zev_queue_head) 485 revent |= POLLIN; 486 mutex_exit(&zev_mutex); 487 } 488 if (revent == 0) { 489 if (!anyyet) { 490 *phpp = &zev_pollhead; 491 } 492 } 493 *reventsp = revent; 494 return (0); 495 } 496 497 /* ARGSUSED */ 498 static int 499 zev_read(dev_t dev, struct uio *uio_p, cred_t *crep_p) 500 { 501 int instance; 502 offset_t off; 503 int ret = 0; 504 zev_msg_t *msg; 505 char *data; 506 507 instance = getminor(dev); 508 if (ddi_get_soft_state(statep, instance) == NULL) 509 return (ENXIO); 510 off = uio_p->uio_loffset; 511 mutex_enter(&zev_mutex); 512 msg = zev_queue_head; 513 if (msg == NULL) { 514 mutex_exit(&zev_mutex); 515 return 0; 516 } 517 if (msg->size > uio_p->uio_resid) { 518 mutex_exit(&zev_mutex); 519 return E2BIG; 520 } 521 while (msg && uio_p->uio_resid >= msg->size) { 522 data = (char *)(msg + 1); 523 ret = uiomove(data, msg->size, UIO_READ, uio_p); 524 if (ret != 0) { 525 mutex_exit(&zev_mutex); 526 cmn_err(CE_WARN, "zev: uiomove failed; messages lost"); 527 uio_p->uio_loffset = off; 528 return (ret); 529 } 530 zev_queue_head = msg->next; 531 if (zev_queue_head == NULL) 532 zev_queue_tail = NULL; 533 zev_statistics.zev_bytes_read += msg->size; 534 zev_statistics.zev_queue_len -= msg->size; 535 zev_queue_len--; 536 kmem_free(msg, sizeof(*msg) + msg->size); 537 msg = zev_queue_head; 538 } 539 cv_broadcast(&zev_condvar); 540 mutex_exit(&zev_mutex); 541 uio_p->uio_loffset = off; 542 return 0; 543 } 544 545 /* ARGSUSED */ 546 static int 547 zev_close(dev_t dev, int flag, int otyp, cred_t *crepd) 548 { 549 zev_state_t *sp; 550 int instance; 551 552 instance = getminor(dev); 553 if ((sp = ddi_get_soft_state(statep, instance)) == NULL) 554 return (ENXIO); 555 if (otyp != OTYP_CHR) 556 return (EINVAL); 557 mutex_enter(&sp->mutex); 558 if (sp->busy != B_TRUE) { 559 mutex_exit(&sp->mutex); 560 return (EINVAL); 561 } 562 sp->busy = B_FALSE; 563 mutex_exit(&sp->mutex); 564 return (0); 565 } 566 567 /* ARGSUSED */ 568 static int 569 zev_open(dev_t *devp, int flag, int otyp, cred_t *credp) 570 { 571 zev_state_t *sp; 572 int instance; 573 574 instance = getminor(*devp); 575 if ((sp = ddi_get_soft_state(statep, instance)) == NULL) 576 return (ENXIO); 577 if (otyp != OTYP_CHR) 578 return (EINVAL); 579 if (drv_priv(credp) != 0) 580 return (EPERM); 581 mutex_enter(&sp->mutex); 582 if (sp->busy == B_TRUE) { 583 /* XXX: wait for the instance to become available? */ 584 /* XXX: if we wait, the wait should be signal-interruptable. */ 585 mutex_exit(&sp->mutex); 586 return (EBUSY); 587 } 588 sp->busy = B_TRUE; /* can only be opened exclusively */ 589 mutex_exit(&sp->mutex); 590 return (0); 591 } 592 593 static struct cb_ops zev_cb_ops = { 594 zev_open, /* open */ 595 zev_close, /* close */ 596 nodev, /* strategy */ 597 nodev, /* print */ 598 nodev, /* dump */ 599 zev_read, /* read */ 600 nodev, /* write */ 601 zev_ioctl, /* ioctl */ 602 nodev, /* devmap */ 603 nodev, /* mmap */ 604 nodev, /* segmap */ 605 zev_chpoll, /* chpoll */ 606 ddi_prop_op, /* prop_op */ 607 NULL, /* streamtab */ 608 D_MP | D_64BIT, /* cb_flag */ 609 CB_REV, /* cb_rev */ 610 nodev, /* aread */ 611 nodev, /* awrite */ 612 }; 613 614 static void 615 zev_free_instance(dev_info_t *dip) 616 { 617 int instance; 618 zev_state_t *sp; 619 instance = ddi_get_instance(dip); 620 //ddi_remove_minor_node(dip, ddi_get_name(dip)); 621 ddi_remove_minor_node(dip, NULL); 622 sp = ddi_get_soft_state(statep, instance); 623 if (sp) { 624 mutex_destroy(&sp->mutex); 625 ddi_soft_state_free(statep, instance); 626 } 627 } 628 629 static int 630 zev_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 631 { 632 int instance; 633 zev_state_t *sp; 634 /* called once per instance with DDI_DETACH, 635 may be called to suspend */ 636 switch (cmd) { 637 case DDI_DETACH: 638 /* instance busy? */ 639 instance = ddi_get_instance(dip); 640 if ((sp = ddi_get_soft_state(statep, instance)) == NULL) 641 return (DDI_FAILURE); 642 mutex_enter(&sp->mutex); 643 if (sp->busy == B_TRUE) { 644 mutex_exit(&sp->mutex); 645 return (DDI_FAILURE); 646 } 647 mutex_exit(&sp->mutex); 648 /* free resources allocated for this instance */ 649 zev_free_instance(dip); 650 return (DDI_SUCCESS); 651 case DDI_SUSPEND: 652 /* kernel must not suspend zev devices while ZFS is running */ 653 return (DDI_FAILURE); 654 default: 655 return (DDI_FAILURE); 656 } 657 } 658 659 static int 660 zev_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 661 { 662 /* called once per instance with DDI_ATTACH, 663 may be called to resume */ 664 int instance; 665 zev_state_t *sp; 666 switch (cmd) { 667 case DDI_ATTACH: 668 instance = ddi_get_instance(dip); 669 if (ddi_soft_state_zalloc(statep, instance) != DDI_SUCCESS) { 670 return (DDI_FAILURE); 671 } 672 sp = ddi_get_soft_state(statep, instance); 673 ddi_set_driver_private(dip, sp); 674 sp->dip = dip; 675 sp->busy = B_FALSE; 676 mutex_init(&sp->mutex, NULL, MUTEX_DRIVER, NULL); 677 if (ddi_create_minor_node(dip, ddi_get_name(dip), 678 S_IFCHR, instance, DDI_PSEUDO, 0) == DDI_FAILURE) { 679 zev_free_instance(dip); 680 return (DDI_FAILURE); 681 } 682 ddi_report_dev(dip); 683 return (DDI_SUCCESS); 684 case DDI_RESUME: 685 /* suspendeding zev devices should never happen */ 686 return (DDI_SUCCESS); 687 default: 688 return (DDI_FAILURE); 689 } 690 } 691 692 /* ARGSUSED */ 693 static int 694 zev_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp) 695 { 696 int instance; 697 zev_state_t *sp; 698 switch (infocmd) { 699 case DDI_INFO_DEVT2DEVINFO: 700 /* arg is dev_t */ 701 instance = getminor((dev_t)arg); 702 if ((sp = ddi_get_soft_state(statep, instance)) != NULL) { 703 *resultp = sp->dip; 704 return (DDI_SUCCESS); 705 } 706 *resultp = NULL; 707 return (DDI_FAILURE); 708 case DDI_INFO_DEVT2INSTANCE: 709 /* arg is dev_t */ 710 instance = getminor((dev_t)arg); 711 *resultp = (void *)(uintptr_t)instance; 712 return (DDI_FAILURE); 713 } 714 return (DDI_FAILURE); 715 } 716 717 static struct dev_ops zev_dev_ops = { 718 DEVO_REV, /* driver build revision */ 719 0, /* driver reference count */ 720 zev_getinfo, /* getinfo */ 721 nulldev, /* identify (obsolete) */ 722 nulldev, /* probe (search for devices) */ 723 zev_attach, /* attach */ 724 zev_detach, /* detach */ 725 nodev, /* reset (obsolete, use quiesce) */ 726 &zev_cb_ops, /* character and block device ops */ 727 NULL, /* bus driver ops */ 728 NULL, /* power management, not needed */ 729 ddi_quiesce_not_needed, /* quiesce */ 730 }; 731 732 static struct modldrv zev_modldrv = { 733 &mod_driverops, /* all loadable modules use this */ 734 "zev ZFS event provider, v1.0", /* driver name and version info */ 735 &zev_dev_ops /* ops method pointers */ 736 }; 737 738 static struct modlinkage zev_modlinkage = { 739 MODREV_1, /* fixed value */ 740 { 741 &zev_modldrv, /* driver linkage structure */ 742 NULL /* list terminator */ 743 } 744 }; 745 746 int 747 _init(void) 748 { 749 int error; 750 boolean_t module_installed = B_FALSE; 751 752 if ((error = ddi_soft_state_init(&statep, sizeof(zev_state_t), 1)) != 0) 753 return (error); 754 zev_busy = B_FALSE; 755 756 mutex_init(&zev_mutex, NULL, MUTEX_DRIVER, NULL); 757 cv_init(&zev_condvar, NULL, CV_DRIVER, NULL); 758 rw_init(&zev_pool_list_rwlock, NULL, RW_DRIVER, NULL); 759 mutex_init(&zev_mark_id_mutex, NULL, MUTEX_DRIVER, NULL); 760 zev_mark_id = gethrtime(); 761 bzero(&zev_statistics, sizeof(zev_statistics)); 762 zev_statistics.zev_max_queue_len = ZEV_MAX_QUEUE_LEN; 763 zev_statistics.zev_poll_wakeup_queue_len = 764 ZEV_MIN_POLL_WAKEUP_QUEUE_LEN; 765 if (zev_ioc_mute_pool("zg0")) { 766 cmn_err(CE_WARN, "zev: could not init mute list"); 767 goto FAIL; 768 } 769 770 if ((error = mod_install(&zev_modlinkage)) != 0) { 771 cmn_err(CE_WARN, "zev: could not install module"); 772 goto FAIL; 773 } 774 module_installed = B_TRUE; 775 776 /* 777 * Note: _init() seems to be a bad place to access other modules' 778 * device files, as it can cause a kernel panic. 779 * 780 * For example, our _init() is called if our module isn't loaded 781 * when someone causes a readdir() in "/devices/pseudo". For that, 782 * devfs_readdir() is used, which obtains an rwlock for the 783 * directory. 784 * 785 * Then, if we open a device file here, we will indirectly call 786 * devfs_lookup(), which tries to obtain the same rwlock 787 * again, which this thread already has. That will result in 788 * a kernel panic. ("recursive entry") 789 * 790 * Therefor, we have switched from a zfs ioctl() to directly 791 * accessing symbols in the zfs module. 792 */ 793 794 /* switch ZFS event callbacks to zev module callback functions */ 795 rw_enter(&rz_zev_rwlock, RW_WRITER); 796 rz_zev_callbacks = &zev_callbacks; 797 rw_exit(&rz_zev_rwlock); 798 799 zev_poll_wakeup_thread = thread_create(NULL, 0, 800 zev_poll_wakeup_thread_main, NULL, 0, &p0, TS_RUN, minclsyspri); 801 return (0); 802 FAIL: 803 /* free resources */ 804 if (module_installed == B_TRUE) 805 (void) mod_remove(&zev_modlinkage); 806 mutex_destroy(&zev_mutex); 807 ddi_soft_state_fini(&statep); 808 return (error); 809 } 810 811 int 812 _info(struct modinfo *modinfop) 813 { 814 return (mod_info(&zev_modlinkage, modinfop)); 815 } 816 817 int 818 _fini(void) 819 { 820 int error = 0; 821 zev_msg_t *msg; 822 zev_pool_list_entry_t *pe, *npe; 823 824 mutex_enter(&zev_mutex); 825 if (zev_busy == B_TRUE) { 826 mutex_exit(&zev_mutex); 827 return (SET_ERROR(EBUSY)); 828 } 829 mutex_exit(&zev_mutex); 830 831 /* switch ZFS event callbacks back to default */ 832 rw_enter(&rz_zev_rwlock, RW_WRITER); 833 rz_zev_callbacks = rz_zev_default_callbacks; 834 rw_exit(&rz_zev_rwlock); 835 836 /* no thread is inside of the callbacks anymore. Safe to remove. */ 837 zev_wakeup_thread_run = 0; 838 if (zev_poll_wakeup_thread != 0) { 839 thread_join(zev_poll_wakeup_thread->t_did); 840 zev_poll_wakeup_thread = 0; 841 } 842 if ((error = mod_remove(&zev_modlinkage)) != 0) { 843 cmn_err(CE_WARN, "mod_remove failed: %d", error); 844 return (error); 845 } 846 847 /* free resources */ 848 mutex_enter(&zev_mutex); 849 while (zev_queue_head) { 850 msg = zev_queue_head; 851 zev_queue_head = msg->next; 852 if (msg) 853 kmem_free(msg, sizeof(*msg) + msg->size); 854 } 855 mutex_exit(&zev_mutex); 856 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 857 pe = zev_muted_pools_head; 858 while (pe) { 859 npe = pe; 860 pe = pe->next; 861 kmem_free(npe, sizeof(*npe)); 862 } 863 rw_exit(&zev_pool_list_rwlock); 864 ddi_soft_state_fini(&statep); 865 rw_destroy(&zev_pool_list_rwlock); 866 cv_destroy(&zev_condvar); 867 mutex_destroy(&zev_mutex); 868 mutex_destroy(&zev_mark_id_mutex); 869 870 return (0); 871 } 872 873