1 #include <sys/modctl.h> 2 #include <sys/ddi.h> 3 #include <sys/sunddi.h> 4 #include <sys/conf.h> 5 #include <sys/devops.h> 6 #include <sys/stat.h> 7 #include <sys/fs/zev.h> 8 #include <sys/zev_callbacks.h> 9 #include <sys/zev_checksums.h> 10 #include <sys/zfs_znode.h> 11 #include <sys/time.h> 12 #include <sys/sa.h> 13 #include <sys/zap.h> 14 #include <sys/time.h> 15 16 #define OFFSETOF(s, m) ((size_t)(&(((s *)0)->m))) 17 18 #define ZEV_DEFAULT_QUEUE_NAME "beaver" 19 #define ZEV_CONTROL_DEVICE_MINOR 0 20 #define ZEV_MINOR_MIN (ZEV_CONTROL_DEVICE_MINOR + 1) 21 #define ZEV_MINOR_MAX (ZEV_MINOR_MIN + ZEV_MAX_QUEUES - 1) 22 23 typedef struct zev_queue { 24 char zq_name[ZEV_MAX_QUEUE_NAME_LEN+1]; 25 minor_t zq_minor_number; 26 dev_info_t *zq_dip; 27 struct pollhead zq_pollhead; 28 uint64_t zq_bytes_read; 29 uint64_t zq_events_read; 30 uint64_t zq_bytes_discarded; 31 uint64_t zq_events_discarded; 32 uint64_t zq_bytes_total; 33 uint64_t zq_events_total; 34 uint64_t zq_wakeup_threshold; 35 uint16_t zq_flags; 36 uint16_t zq_need_wakeup; 37 /* protected by zev_mutex */ 38 int zq_refcnt; 39 uint64_t zq_queue_len; 40 uint64_t zq_queue_messages; 41 uint64_t zq_max_queue_len; 42 zev_msg_t *zq_oldest; 43 boolean_t zq_busy; 44 boolean_t zq_to_be_removed; 45 zev_statistics_t zq_statistics; 46 kcondvar_t zq_condvar; 47 } zev_queue_t; 48 49 static void *statep; 50 struct pollhead zev_pollhead; 51 52 kmutex_t zev_mutex; 53 kcondvar_t zev_condvar; 54 kmutex_t zev_queue_msg_mutex; 55 krwlock_t zev_pool_list_rwlock; 56 static zev_statistics_t zev_statistics; 57 static boolean_t zev_attached; 58 static kmutex_t zev_mark_id_mutex; 59 static uint64_t zev_mark_id = 0; 60 61 static uint64_t zev_msg_sequence_number = 0; 62 static zev_queue_t *zev_queues[ZEV_MAX_QUEUES]; 63 static int zev_queue_cnt = 0; 64 65 uint64_t zev_memory_allocated = 0; 66 uint64_t zev_memory_freed = 0; 67 68 /* 69 * The longest potential message is from zev_zfs_mount() and 70 * contains the mountpoint, which might be close to MAXPATHLEN bytes long. 71 * 72 * Another candidate is zev_znode_rename_cb() and contains three inode 73 * numbers and two filenames of up to MAXNAMELEN bytes each. 74 */ 75 #define ZEV_MAX_MESSAGE_LEN 4096 76 77 static zev_msg_t *zev_queue_head = NULL; 78 static zev_msg_t *zev_queue_tail = NULL; 79 static uint64_t zev_queue_len = 0; 80 81 82 typedef struct zev_pool_list_entry { 83 struct zev_pool_list_entry *next; 84 char name[MAXPATHLEN]; 85 } zev_pool_list_entry_t; 86 87 static zev_pool_list_entry_t *zev_muted_pools_head = NULL; 88 89 static volatile int zev_wakeup_thread_run = 1; 90 static kthread_t *zev_poll_wakeup_thread = NULL; 91 92 void * 93 zev_alloc(ssize_t sz) 94 { 95 ZEV_MEM_ADD(sz); 96 return kmem_alloc(sz, KM_SLEEP); 97 } 98 99 void * 100 zev_zalloc(ssize_t sz) 101 { 102 ZEV_MEM_ADD(sz); 103 return kmem_zalloc(sz, KM_SLEEP); 104 } 105 106 void 107 zev_free(void *ptr, ssize_t sz) 108 { 109 ZEV_MEM_SUB(sz); \ 110 kmem_free(ptr, sz); 111 } 112 113 int 114 zev_queue_cmp(const void *a, const void *b) 115 { 116 const zev_queue_t *qa = a; 117 const zev_queue_t *qb = b; 118 if (qa->zq_minor_number > qb->zq_minor_number) 119 return 1; 120 if (qa->zq_minor_number < qb->zq_minor_number) 121 return -1; 122 return 0; 123 } 124 125 /* must be called with zev_mutex held */ 126 void 127 zev_queue_trim(void) 128 { 129 zev_msg_t *m; 130 uint64_t oldest_message; 131 zev_queue_t *q; 132 int i; 133 134 if (!zev_queue_tail) 135 return; 136 137 oldest_message = zev_queue_tail->seq + 1; /* does not exist, yet. */ 138 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 139 q = zev_queues[i - ZEV_MINOR_MIN]; 140 if (q == NULL) 141 continue; 142 if (!q->zq_oldest) 143 continue; 144 if (oldest_message > q->zq_oldest->seq) 145 oldest_message = q->zq_oldest->seq; 146 } 147 148 /* remove msgs between oldest_message and zev_queue_head */ 149 while(zev_queue_head && (oldest_message > zev_queue_head->seq)) { 150 m = zev_queue_head; 151 zev_queue_head = m->next; 152 if (zev_queue_head == NULL) { 153 zev_queue_tail = NULL; 154 } else { 155 zev_queue_head->prev = NULL; 156 } 157 if (m->read == 0) { 158 zev_statistics.zev_bytes_discarded += m->size; 159 zev_statistics.zev_cnt_discarded_events++; 160 } 161 zev_statistics.zev_queue_len -= m->size; 162 zev_queue_len--; 163 zev_free(m, sizeof(*m) + m->size); 164 } 165 } 166 167 /* must be called with zev_mutex held */ 168 static void 169 zev_queue_hold(zev_queue_t *q) 170 { 171 q->zq_refcnt++; 172 } 173 174 /* must be called with zev_mutex held */ 175 static void 176 zev_queue_release(zev_queue_t *q) 177 { 178 q->zq_refcnt--; 179 if (q->zq_refcnt > 0) 180 return; 181 182 ASSERT(q->zq_busy == B_FALSE); 183 184 /* persistent queues will not be removed */ 185 if ((q->zq_flags & ZEV_FL_PERSISTENT) != 0) 186 return; 187 188 /* remove queue from queue list */ 189 zev_queues[q->zq_minor_number - ZEV_MINOR_MIN] = NULL; 190 191 /* discard messages that no queue references anymore */ 192 zev_queue_trim(); 193 194 cv_destroy(&q->zq_condvar); 195 ddi_remove_minor_node(q->zq_dip, q->zq_name); 196 ddi_soft_state_free(statep, q->zq_minor_number); 197 ZEV_MEM_SUB(sizeof(zev_queue_t)); 198 zev_queue_cnt--; 199 } 200 201 int 202 zev_queue_new(zev_queue_t **queue, 203 dev_info_t *dip, 204 char *name, 205 uint64_t max_queue_len, 206 uint16_t flags) 207 { 208 zev_queue_t *q; 209 zev_queue_t *tmp; 210 zev_msg_t *msg; 211 int name_exists = 0; 212 minor_t minor; 213 char *p; 214 int i; 215 216 if (max_queue_len > ZEV_MAX_QUEUE_LEN) 217 return EINVAL; 218 if (max_queue_len == 0) 219 max_queue_len = ZEV_MAX_QUEUE_LEN; 220 if (!strcmp(name, ZEV_CONTROL_DEVICE_NAME)) 221 return EINVAL; 222 for (p = name; *p; p++) { 223 if (*p >= 'a' && *p <= 'z') 224 continue; 225 if (*p >= '0' && *p <= '9') 226 continue; 227 if (*p == '.') 228 continue; 229 return EINVAL; 230 } 231 232 mutex_enter(&zev_mutex); 233 234 /* find free minor number.*/ 235 /* if this were a frequent operation we'd have a free-minor list */ 236 for (minor = ZEV_MINOR_MIN; minor <= ZEV_MINOR_MAX; minor++) { 237 tmp = zev_queues[minor - ZEV_MINOR_MIN]; 238 if (tmp == NULL) 239 break; 240 } 241 if (tmp) { 242 mutex_exit(&zev_mutex); 243 return ENOSPC; 244 } 245 246 if (ddi_soft_state_zalloc(statep, minor) != DDI_SUCCESS) { 247 mutex_exit(&zev_mutex); 248 return ENOSPC; 249 } 250 ZEV_MEM_ADD(sizeof(zev_queue_t)); 251 252 q = ddi_get_soft_state(statep, minor); 253 memset(q, 0, sizeof(*q)); 254 strncpy(q->zq_name, name, ZEV_MAX_QUEUE_NAME_LEN); 255 q->zq_name[ZEV_MAX_QUEUE_NAME_LEN] = '\0'; 256 q->zq_max_queue_len = max_queue_len; 257 q->zq_wakeup_threshold = ZEV_DEFAULT_POLL_WAKEUP_QUEUE_LEN; 258 q->zq_flags = flags; 259 q->zq_refcnt = 1; 260 q->zq_dip = dip; 261 q->zq_minor_number = minor; 262 cv_init(&q->zq_condvar, NULL, CV_DRIVER, NULL); 263 264 /* insert into queue list */ 265 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 266 /* if this were a frequent operation we'd have a name tree */ 267 if (zev_queues[i - ZEV_MINOR_MIN] == NULL) 268 continue; 269 if (!strcmp(q->zq_name, zev_queues[i-ZEV_MINOR_MIN]->zq_name)) { 270 name_exists = 1; 271 break; 272 } 273 } 274 if (name_exists) { 275 ddi_soft_state_free(statep, minor); 276 ZEV_MEM_SUB(sizeof(zev_queue_t)); 277 mutex_exit(&zev_mutex); 278 return EEXIST; 279 } 280 zev_queues[minor - ZEV_MINOR_MIN] = q; 281 zev_queue_cnt++; 282 283 /* calculate current queue len and find head and tail */ 284 q->zq_oldest = zev_queue_tail; 285 msg = zev_queue_tail; 286 while ((msg != NULL) && (q->zq_queue_len < q->zq_max_queue_len)) { 287 q->zq_queue_len += msg->size; 288 q->zq_queue_messages++; 289 q->zq_oldest = msg; 290 msg = msg->prev; 291 } 292 293 mutex_exit(&zev_mutex); 294 295 if (ddi_create_minor_node(dip, name, 296 S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 297 mutex_enter(&zev_mutex); 298 zev_queues[minor - ZEV_MINOR_MIN] = NULL; 299 zev_queue_cnt--; 300 ddi_soft_state_free(statep, minor); 301 ZEV_MEM_SUB(sizeof(zev_queue_t)); 302 mutex_exit(&zev_mutex); 303 return EFAULT; 304 } 305 306 *queue = q; 307 return 0; 308 } 309 310 /* 311 * poll() wakeup thread. Used to check periodically whether we have 312 * bytes left in the queue that have not yet been made into a 313 * pollwakeup() call. This is meant to insure a maximum waiting 314 * time until an event is presented as a poll wakeup, while at 315 * the same time not making every single event into a poll wakeup 316 * of it's own. 317 */ 318 319 static void 320 zev_poll_wakeup(boolean_t flush_all) 321 { 322 zev_queue_t *q; 323 int i; 324 325 /* 326 * This loop works with hold() and release() because 327 * pollwakeup() requires us to release our locks before calling it. 328 * 329 * from pollwakeup(9F): 330 * 331 * "Driver defined locks should not be held across calls 332 * to this function." 333 */ 334 335 /* wake up threads for each individual queue */ 336 mutex_enter(&zev_mutex); 337 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 338 q = zev_queues[i - ZEV_MINOR_MIN]; 339 if (q == NULL) 340 continue; 341 if (!q->zq_busy) 342 continue; 343 if (!q->zq_queue_len) 344 continue; 345 if ((flush_all) || 346 (q->zq_queue_len > q->zq_wakeup_threshold)) { 347 zev_queue_hold(q); 348 mutex_exit(&zev_mutex); 349 pollwakeup(&q->zq_pollhead, POLLIN); 350 mutex_enter(&zev_mutex); 351 zev_queue_release(q); 352 } 353 } 354 mutex_exit(&zev_mutex); 355 } 356 357 static void 358 zev_poll_wakeup_thread_main(void) 359 { 360 while (zev_wakeup_thread_run) { 361 delay(drv_usectohz(100 * 1000)); /* sleep 100ms */ 362 363 zev_poll_wakeup(B_TRUE); 364 } 365 thread_exit(); 366 } 367 368 static int 369 zev_ioc_mute_pool(char *poolname) 370 { 371 zev_pool_list_entry_t *pe; 372 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 373 /* pool already muted? */ 374 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 375 if (!strcmp(pe->name, poolname)) { 376 rw_exit(&zev_pool_list_rwlock); 377 return EEXIST; 378 } 379 } 380 pe = zev_zalloc(sizeof(*pe)); 381 if (!pe) { 382 rw_exit(&zev_pool_list_rwlock); 383 return ENOMEM; 384 } 385 (void) strncpy(pe->name, poolname, sizeof(pe->name)); 386 pe->next = zev_muted_pools_head; 387 zev_muted_pools_head = pe; 388 rw_exit(&zev_pool_list_rwlock); 389 return (0); 390 } 391 392 static int 393 zev_ioc_unmute_pool(char *poolname) 394 { 395 zev_pool_list_entry_t *pe, *peprev; 396 397 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 398 /* pool muted? */ 399 peprev = NULL; 400 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 401 if (!strcmp(pe->name, poolname)) 402 break; 403 peprev = pe; 404 } 405 if (pe) { 406 rw_exit(&zev_pool_list_rwlock); 407 return ENOENT; 408 } 409 410 if (peprev != NULL) { 411 peprev->next = pe->next; 412 } else { 413 zev_muted_pools_head = pe->next; 414 } 415 zev_free(pe, sizeof(*pe)); 416 rw_exit(&zev_pool_list_rwlock); 417 return (0); 418 } 419 420 int 421 zev_skip_pool(objset_t *os) 422 { 423 zev_pool_list_entry_t *pe; 424 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; 425 rw_enter(&zev_pool_list_rwlock, RW_READER); 426 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 427 if (!strcmp(pe->name, dp->dp_spa->spa_name)) { 428 rw_exit(&zev_pool_list_rwlock); 429 return 1; 430 } 431 } 432 rw_exit(&zev_pool_list_rwlock); 433 return 0; 434 } 435 436 static void 437 zev_update_statistics(int op, zev_statistics_t *stat) 438 { 439 switch (op) { 440 case ZEV_OP_ERROR: 441 stat->zev_cnt_errors++; 442 break; 443 case ZEV_OP_MARK: 444 stat->zev_cnt_marks++; 445 break; 446 case ZEV_OP_ZFS_MOUNT: 447 stat->zev_cnt_zfs_mount++; 448 break; 449 case ZEV_OP_ZFS_UMOUNT: 450 stat->zev_cnt_zfs_umount++; 451 break; 452 case ZEV_OP_ZVOL_WRITE: 453 stat->zev_cnt_zvol_write++; 454 break; 455 case ZEV_OP_ZVOL_TRUNCATE: 456 stat->zev_cnt_zvol_truncate++; 457 break; 458 case ZEV_OP_ZNODE_CLOSE_AFTER_UPDATE: 459 stat->zev_cnt_znode_close_after_update++; 460 break; 461 case ZEV_OP_ZNODE_CREATE: 462 stat->zev_cnt_znode_create++; 463 break; 464 case ZEV_OP_ZNODE_REMOVE: 465 stat->zev_cnt_znode_remove++; 466 break; 467 case ZEV_OP_ZNODE_LINK: 468 stat->zev_cnt_znode_link++; 469 break; 470 case ZEV_OP_ZNODE_SYMLINK: 471 stat->zev_cnt_znode_symlink++; 472 break; 473 case ZEV_OP_ZNODE_RENAME: 474 stat->zev_cnt_znode_rename++; 475 break; 476 case ZEV_OP_ZNODE_WRITE: 477 stat->zev_cnt_znode_write++; 478 break; 479 case ZEV_OP_ZNODE_TRUNCATE: 480 stat->zev_cnt_znode_truncate++; 481 break; 482 case ZEV_OP_ZNODE_SETATTR: 483 stat->zev_cnt_znode_setattr++; 484 break; 485 case ZEV_OP_ZNODE_ACL: 486 stat->zev_cnt_znode_acl++; 487 break; 488 } 489 } 490 491 void 492 zev_queue_message(int op, zev_msg_t *msg) 493 { 494 zev_queue_t *q; 495 int wakeup = 0; 496 zev_msg_t *m; 497 int i; 498 499 msg->next = NULL; 500 msg->prev = NULL; 501 msg->read = 0; 502 503 if (op < ZEV_OP_MIN || op > ZEV_OP_MAX) { 504 zev_queue_error(op, "unknown op id encountered: %d", op); 505 zev_free(msg, sizeof(*msg) + msg->size); 506 return; 507 } 508 509 /* 510 * This mutex protects us agains race conditions when several 511 * threads want to queue a message and one or more queues are 512 * full: we release zev_mutex to wait for the queues to become 513 * less-than-full, but we don't know in which order the waiting 514 * threads will be awoken. If it's not the same order in which 515 * they went to sleep we might mark different messages as "newest" 516 * in different queues, and so we might have dupes or even 517 * skip messages. 518 */ 519 mutex_enter(&zev_queue_msg_mutex); 520 521 mutex_enter(&zev_mutex); 522 523 /* 524 * When the module is loaded, the default behavior ist to 525 * put all events into a queue and block if the queue is full. 526 * This is done even before the pseudo device is attached. 527 * This way, no events are lost. 528 * 529 * To discard events entirely the "beaver" queue, 530 * which never discards anything, has to be removed. 531 */ 532 533 if (zev_queue_cnt == 0) { 534 mutex_exit(&zev_mutex); 535 mutex_exit(&zev_queue_msg_mutex); 536 return; 537 } 538 539 /* put message into global queue */ 540 msg->seq = zev_msg_sequence_number++; 541 while (zev_statistics.zev_max_queue_len && 542 zev_statistics.zev_queue_len >= zev_statistics.zev_max_queue_len) { 543 /* queue full. block until it's been shrunk. */ 544 cv_wait(&zev_condvar, &zev_mutex); 545 } 546 547 if (zev_queue_tail == NULL) { 548 zev_queue_head = zev_queue_tail = msg; 549 } else { 550 zev_queue_tail->next = msg; 551 msg->prev = zev_queue_tail; 552 zev_queue_tail = msg; 553 } 554 zev_queue_len++; 555 zev_statistics.zev_cnt_total_events++; 556 zev_statistics.zev_queue_len += msg->size; 557 558 /* update per-device queues */ 559 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 560 q = zev_queues[i - ZEV_MINOR_MIN]; 561 if (!q) 562 continue; 563 564 zev_queue_hold(q); 565 566 /* make sure queue has enough room */ 567 while (q->zq_max_queue_len && 568 q->zq_queue_len > q->zq_max_queue_len) { 569 570 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) { 571 /* block until queue has been shrunk. */ 572 cv_wait(&zev_condvar, &zev_mutex); 573 } else { 574 /* discard msgs until queue is small enough */ 575 while (q->zq_queue_len > q->zq_max_queue_len) { 576 m = q->zq_oldest; 577 if (m == NULL) 578 break; 579 q->zq_events_discarded++; 580 q->zq_bytes_discarded += m->size; 581 q->zq_oldest = m->next; 582 q->zq_queue_len -= m->size; 583 q->zq_queue_messages--; 584 } 585 } 586 } 587 588 /* register new message at the end of the queue */ 589 q->zq_queue_len += msg->size; 590 q->zq_queue_messages++; 591 q->zq_bytes_total += msg->size; 592 q->zq_events_total++; 593 if (q->zq_oldest == NULL) 594 q->zq_oldest = msg; 595 596 zev_update_statistics(op, &q->zq_statistics); 597 598 if (q->zq_queue_len > q->zq_wakeup_threshold) 599 wakeup = 1; 600 if (q->zq_queue_len == msg->size) /* queue was empty */ 601 cv_broadcast(&q->zq_condvar); 602 603 zev_queue_release(q); 604 } 605 606 zev_queue_trim(); 607 608 zev_update_statistics(op, &zev_statistics); 609 mutex_exit(&zev_mutex); 610 mutex_exit(&zev_queue_msg_mutex); 611 612 /* one or more queues need a pollwakeup() */ 613 if (op == ZEV_OP_MARK) { 614 zev_poll_wakeup(B_TRUE); 615 } else if (wakeup) { 616 zev_poll_wakeup(B_FALSE); 617 } 618 619 return; 620 } 621 622 void 623 zev_queue_error(int op, char *fmt, ...) 624 { 625 char buf[ZEV_MAX_MESSAGE_LEN]; 626 va_list ap; 627 int len; 628 zev_msg_t *msg = NULL; 629 zev_error_t *rec; 630 int msg_size; 631 632 va_start(ap, fmt); 633 len = vsnprintf(buf, sizeof(buf), fmt, ap); 634 va_end(ap); 635 if (len >= sizeof(buf)) { 636 cmn_err(CE_WARN, "zev: can't report error - " 637 "dropping event entirely."); 638 return; 639 } 640 641 msg_size = sizeof(*rec) + len + 1; 642 msg = zev_alloc(sizeof(*msg) + msg_size); 643 msg->size = msg_size; 644 rec = (zev_error_t *)(msg + 1); 645 rec->record_len = msg_size; 646 rec->op = ZEV_OP_ERROR; 647 rec->op_time = ddi_get_time(); 648 rec->guid = 0; 649 rec->failed_op = op; 650 rec->errstr_len = len; 651 (void) memcpy(ZEV_ERRSTR(rec), buf, len + 1); 652 653 zev_queue_message(ZEV_OP_ERROR, msg); 654 return; 655 } 656 657 static int 658 zev_find_queue(zev_queue_t **out, zev_queue_t *req_q, zev_queue_name_t *name) 659 { 660 char namebuf[ZEV_MAX_QUEUE_NAME_LEN+1]; 661 zev_queue_t *q; 662 int i; 663 664 *out = NULL; 665 666 if (name->zev_namelen == 0) { 667 if (req_q->zq_minor_number == ZEV_CONTROL_DEVICE_MINOR) 668 return EINVAL; 669 zev_queue_hold(req_q); 670 *out = req_q; 671 return 0; 672 } 673 674 if (name->zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 675 return EINVAL; 676 strncpy(namebuf, name->zev_name, name->zev_namelen); 677 namebuf[name->zev_namelen] = '\0'; 678 679 mutex_enter(&zev_mutex); 680 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 681 q = zev_queues[i - ZEV_MINOR_MIN]; 682 if (!q) 683 continue; 684 if (!strcmp(q->zq_name, namebuf)) { 685 zev_queue_hold(q); 686 mutex_exit(&zev_mutex); 687 *out = q; 688 return 0; 689 } 690 } 691 mutex_exit(&zev_mutex); 692 return ENOENT; 693 } 694 695 static int 696 zev_ioc_get_queue_statistics(zev_queue_t *req_q, intptr_t arg, int mode) 697 { 698 zev_ioctl_get_queue_statistics_t gs; 699 zev_queue_t *q; 700 int ret; 701 702 if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0) 703 return EFAULT; 704 705 ret = zev_find_queue(&q, req_q, &gs.zev_queue_name); 706 if (ret) 707 return ret; 708 709 /* ddi_copyout() can take a long time. Better make 710 a copy to be able to release the mutex faster. */ 711 mutex_enter(&zev_mutex); 712 memcpy(&gs.zev_statistics, &q->zq_statistics,sizeof(gs.zev_statistics)); 713 gs.zev_statistics.zev_queue_len = q->zq_queue_len; 714 gs.zev_statistics.zev_bytes_read = q->zq_bytes_read; 715 gs.zev_statistics.zev_bytes_discarded = q->zq_bytes_discarded; 716 gs.zev_statistics.zev_max_queue_len = q->zq_max_queue_len; 717 gs.zev_statistics.zev_cnt_discarded_events = q->zq_events_discarded; 718 gs.zev_statistics.zev_cnt_total_events = q->zq_events_total; 719 zev_queue_release(q); 720 mutex_exit(&zev_mutex); 721 722 if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0) 723 return EFAULT; 724 return 0; 725 } 726 727 static int 728 zev_ioc_set_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 729 { 730 zev_ioctl_set_queue_properties_t qp; 731 zev_queue_t *q; 732 uint64_t old_max; 733 uint64_t old_flags; 734 int ret; 735 736 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 737 return EFAULT; 738 if (qp.zev_max_queue_len > ZEV_MAX_QUEUE_LEN) 739 return EINVAL; 740 if (qp.zev_poll_wakeup_threshold > ZEV_MAX_POLL_WAKEUP_QUEUE_LEN) 741 return EINVAL; 742 743 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 744 if (ret) 745 return ret; 746 747 mutex_enter(&zev_mutex); 748 749 /* 750 * Note: if the PERSISTENT flag is cleared, and the queue is not busy, 751 * the queue should be removed by zev_queue_release() in zev_ioctl(). 752 */ 753 old_flags = qp.zev_flags; 754 q->zq_flags = qp.zev_flags; 755 if ((old_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) && 756 (!(qp.zev_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL))) { 757 /* queue is no longer blocking - wake blocked threads */ 758 cv_broadcast(&zev_condvar); 759 } 760 761 old_max = q->zq_max_queue_len; 762 q->zq_max_queue_len = qp.zev_max_queue_len; 763 if (q->zq_max_queue_len < old_max) 764 zev_queue_trim(); 765 if (q->zq_max_queue_len > old_max) 766 cv_broadcast(&zev_condvar); /* threads may be waiting */ 767 768 if ((qp.zev_poll_wakeup_threshold < q->zq_wakeup_threshold) && 769 (qp.zev_poll_wakeup_threshold <= q->zq_queue_len)) 770 pollwakeup(&q->zq_pollhead, POLLIN); 771 q->zq_wakeup_threshold = qp.zev_poll_wakeup_threshold; 772 773 zev_queue_release(q); 774 mutex_exit(&zev_mutex); 775 return 0; 776 } 777 778 static int 779 zev_ioc_get_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 780 { 781 zev_ioctl_get_queue_properties_t qp; 782 zev_queue_t *q; 783 int ret; 784 785 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 786 return EFAULT; 787 788 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 789 if (ret) 790 return ret; 791 792 mutex_enter(&zev_mutex); 793 qp.zev_max_queue_len = q->zq_max_queue_len; 794 qp.zev_flags = q->zq_flags; 795 qp.zev_poll_wakeup_threshold = q->zq_wakeup_threshold; 796 zev_queue_release(q); 797 mutex_exit(&zev_mutex); 798 799 if (ddi_copyout(&qp, (void *)arg, sizeof(qp), mode) != 0) 800 return EFAULT; 801 return 0; 802 } 803 804 static int 805 zev_ioc_add_queue(zev_queue_t *req_q, intptr_t arg, int mode) 806 { 807 zev_ioctl_add_queue_t aq; 808 zev_queue_t *new_q; 809 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 810 811 if (ddi_copyin((void *)arg, &aq, sizeof(aq), mode) != 0) 812 return EFAULT; 813 814 if (aq.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 815 return EINVAL; 816 strncpy(name, aq.zev_name, aq.zev_namelen); 817 name[aq.zev_namelen] = '\0'; 818 819 return zev_queue_new(&new_q, req_q->zq_dip, name, 820 aq.zev_max_queue_len, aq.zev_flags); 821 } 822 823 static int 824 zev_ioc_remove_queue(zev_queue_t *req_q, intptr_t arg, int mode) 825 { 826 zev_ioctl_remove_queue_t rq; 827 zev_queue_t *q; 828 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 829 int found = 0; 830 int i; 831 832 if (ddi_copyin((void *)arg, &rq, sizeof(rq), mode) != 0) 833 return EFAULT; 834 835 if (rq.zev_queue_name.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 836 return EINVAL; 837 strncpy(name, rq.zev_queue_name.zev_name, 838 rq.zev_queue_name.zev_namelen); 839 name[rq.zev_queue_name.zev_namelen] = '\0'; 840 841 mutex_enter(&zev_mutex); 842 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 843 q = zev_queues[i - ZEV_MINOR_MIN]; 844 if (!q) 845 continue; 846 if (!strcmp(q->zq_name, name)) { 847 found = 1; 848 break; 849 } 850 } 851 if (!found) { 852 mutex_exit(&zev_mutex); 853 return ENOENT; 854 } 855 856 if (q->zq_busy) { 857 mutex_exit(&zev_mutex); 858 return EBUSY; 859 } 860 /* 861 * clear flags, so that persistent queues are removed aswell 862 * and the queue becomes non-blocking. 863 */ 864 q->zq_flags = 0; 865 if (q->zq_to_be_removed == B_FALSE) { 866 q->zq_to_be_removed = B_TRUE; 867 zev_queue_release(q); 868 } 869 /* some threads might be waiting for this queue to become writable */ 870 cv_broadcast(&zev_condvar); 871 872 mutex_exit(&zev_mutex); 873 return 0; 874 } 875 876 static int 877 zev_ioc_get_debug_info(zev_queue_t *req_q, intptr_t arg, int mode) 878 { 879 zev_ioctl_debug_info_t di; 880 uint64_t mem_allocated = atomic_add_64_nv(&zev_memory_allocated, 0); 881 uint64_t mem_freed = atomic_add_64_nv(&zev_memory_freed, 0); 882 883 zev_chksum_stats(&di.zev_chksum_cache_size, 884 &di.zev_chksum_cache_hits, 885 &di.zev_chksum_cache_misses); 886 di.zev_memory_allocated = mem_allocated - mem_freed; 887 if (ddi_copyout(&di, (void *)arg, sizeof(di), mode) != 0) 888 return EFAULT; 889 return 0; 890 } 891 892 static int 893 zev_ioc_get_queue_list(zev_queue_t *req_q, intptr_t arg, int mode) 894 { 895 zev_ioctl_get_queue_list_t gql; 896 zev_queue_t *q; 897 int i = 0; 898 int count = 0; 899 900 memset(&gql, 0, sizeof(gql)); 901 902 mutex_enter(&zev_mutex); 903 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 904 q = zev_queues[i - ZEV_MINOR_MIN]; 905 if (!q) 906 continue; 907 strncpy(gql.zev_queue_name[count].zev_name, 908 q->zq_name, ZEV_MAX_QUEUE_NAME_LEN); 909 gql.zev_queue_name[count].zev_namelen = strlen(q->zq_name); 910 count++; 911 } 912 gql.zev_n_queues = count; 913 mutex_exit(&zev_mutex); 914 915 if (ddi_copyout(&gql, (void *)arg, sizeof(gql), mode) != 0) 916 return EFAULT; 917 return 0; 918 } 919 920 /* ARGSUSED */ 921 static int 922 zev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 923 { 924 zev_statistics_t zs; 925 zev_ioctl_poolarg_t pa; 926 zev_ioctl_mark_t mark; 927 zev_mark_t *rec; 928 int msg_size; 929 zev_msg_t *msg; 930 uint64_t len; 931 uint64_t mark_id; 932 minor_t minor; 933 zev_queue_t *req_q; 934 int ret = 0; 935 936 minor = getminor(dev); 937 mutex_enter(&zev_mutex); 938 if ((req_q = ddi_get_soft_state(statep, minor)) == NULL) { 939 mutex_exit(&zev_mutex); 940 return (ENXIO); 941 } 942 zev_queue_hold(req_q); 943 mutex_exit(&zev_mutex); 944 /* 945 * all structures passed between kernel and userspace 946 * are now compatible between 64 and 32 bit. Model 947 * conversion can be ignored. 948 */ 949 switch (cmd) { 950 case ZEV_IOC_GET_GLOBAL_STATISTICS: 951 /* ddi_copyout() can take a long time. Better make 952 a copy to be able to release the mutex faster. */ 953 mutex_enter(&zev_mutex); 954 (void) memcpy(&zs, &zev_statistics, sizeof(zs)); 955 mutex_exit(&zev_mutex); 956 if (ddi_copyout(&zs, (void *)arg, sizeof(zs), mode) != 0) 957 ret = EFAULT; 958 break; 959 case ZEV_IOC_GET_QUEUE_STATISTICS: 960 ret = zev_ioc_get_queue_statistics(req_q, arg, mode); 961 break; 962 case ZEV_IOC_MUTE_POOL: 963 case ZEV_IOC_UNMUTE_POOL: 964 if (ddi_copyin((void *)arg, &pa, sizeof(pa), mode) != 0) { 965 ret = EFAULT; 966 break; 967 } 968 if (pa.zev_poolname_len >=MAXPATHLEN) { 969 ret = EINVAL; 970 break; 971 } 972 pa.zev_poolname[pa.zev_poolname_len] = '\0'; 973 if (cmd == ZEV_IOC_MUTE_POOL) { 974 ret = zev_ioc_mute_pool(pa.zev_poolname); 975 } else { 976 ret = zev_ioc_unmute_pool(pa.zev_poolname); 977 } 978 break; 979 case ZEV_IOC_SET_MAX_QUEUE_LEN: 980 if (ddi_copyin((void *)arg, &len, sizeof(len), mode) != 0) { 981 ret = EFAULT; 982 break; 983 } 984 if (len > ZEV_MAX_QUEUE_LEN) { 985 ret = EINVAL; 986 break; 987 } 988 mutex_enter(&zev_mutex); 989 zev_statistics.zev_max_queue_len = len; 990 cv_broadcast(&zev_condvar); 991 mutex_exit(&zev_mutex); 992 break; 993 case ZEV_IOC_GET_QUEUE_PROPERTIES: 994 ret = zev_ioc_get_queue_properties(req_q, arg, mode); 995 break; 996 case ZEV_IOC_SET_QUEUE_PROPERTIES: 997 ret = zev_ioc_set_queue_properties(req_q, arg, mode); 998 break; 999 case ZEV_IOC_MARK: 1000 if (ddi_copyin((void *)arg, &mark, sizeof(mark), mode) != 0) { 1001 ret = EFAULT; 1002 break; 1003 } 1004 /* prepare message */ 1005 msg_size = sizeof(*rec) + mark.zev_payload_len + 1; 1006 msg = zev_alloc(sizeof(*msg) + msg_size); 1007 msg->size = msg_size; 1008 rec = (zev_mark_t *)(msg + 1); 1009 rec->record_len = msg_size; 1010 rec->op = ZEV_OP_MARK; 1011 rec->op_time = ddi_get_time(); 1012 rec->guid = mark.zev_guid; 1013 rec->payload_len = mark.zev_payload_len; 1014 /* get payload */ 1015 if (ddi_copyin(((char *)arg) + sizeof(mark), 1016 ZEV_PAYLOAD(rec), 1017 mark.zev_payload_len, mode) != 0) { 1018 zev_free(msg, msg_size); 1019 ret = EFAULT; 1020 break; 1021 } 1022 *(ZEV_PAYLOAD(rec) + mark.zev_payload_len) = '\0'; 1023 /* get mark id and queue message */ 1024 mutex_enter(&zev_mark_id_mutex); 1025 mark_id = zev_mark_id++; 1026 mutex_exit(&zev_mark_id_mutex); 1027 rec->mark_id = mark_id; 1028 zev_queue_message(ZEV_OP_MARK, msg); 1029 /* report mark id to userland, ignore errors */ 1030 mark.zev_mark_id = mark_id; 1031 ddi_copyout(&mark, (void *)arg, sizeof(mark), mode); 1032 break; 1033 case ZEV_IOC_ADD_QUEUE: 1034 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1035 ret = EACCES; 1036 break; 1037 } 1038 ret = zev_ioc_add_queue(req_q, arg, mode); 1039 break; 1040 case ZEV_IOC_REMOVE_QUEUE: 1041 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1042 ret = EACCES; 1043 break; 1044 } 1045 ret = zev_ioc_remove_queue(req_q, arg, mode); 1046 break; 1047 case ZEV_IOC_GET_DEBUG_INFO: 1048 ret = zev_ioc_get_debug_info(req_q, arg, mode); 1049 break; 1050 case ZEV_IOC_GET_QUEUE_LIST: 1051 ret = zev_ioc_get_queue_list(req_q, arg, mode); 1052 break; 1053 default: 1054 /* generic "ioctl unknown" error */ 1055 ret = ENOTTY; 1056 } 1057 1058 mutex_enter(&zev_mutex); 1059 zev_queue_release(req_q); 1060 mutex_exit(&zev_mutex); 1061 return (ret); 1062 } 1063 1064 static int 1065 zev_chpoll(dev_t dev, short events, int anyyet, 1066 short *reventsp, struct pollhead **phpp) 1067 { 1068 int minor; 1069 short revent = 0; 1070 zev_queue_t *q; 1071 1072 /* use minor-specific queue context and it's pollhead */ 1073 minor = getminor(dev); 1074 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1075 return (EINVAL); 1076 mutex_enter(&zev_mutex); 1077 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1078 mutex_exit(&zev_mutex); 1079 return (ENXIO); 1080 } 1081 revent = 0; 1082 if ((events & POLLIN)) { 1083 if (q->zq_oldest) 1084 revent |= POLLIN; 1085 } 1086 if (revent == 0) { 1087 if (!anyyet) { 1088 *phpp = &q->zq_pollhead; 1089 } 1090 } 1091 *reventsp = revent; 1092 mutex_exit(&zev_mutex); 1093 return (0); 1094 } 1095 1096 /* ARGSUSED */ 1097 static int 1098 zev_read(dev_t dev, struct uio *uio_p, cred_t *crep_p) 1099 { 1100 minor_t minor; 1101 offset_t off; 1102 int ret = 0; 1103 zev_msg_t *msg; 1104 char *data; 1105 zev_queue_t *q; 1106 1107 minor = getminor(dev); 1108 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1109 return (EINVAL); 1110 1111 mutex_enter(&zev_mutex); 1112 q = ddi_get_soft_state(statep, minor); 1113 if (q == NULL) { 1114 mutex_exit(&zev_mutex); 1115 return (ENXIO); 1116 } 1117 off = uio_p->uio_loffset; 1118 msg = q->zq_oldest; 1119 while (msg == NULL) { 1120 if (!ddi_can_receive_sig()) { 1121 /* 1122 * read() shouldn't block because this thread 1123 * can't receive signals. (e.g., it might be 1124 * torn down by exit() right now.) 1125 */ 1126 mutex_exit(&zev_mutex); 1127 return 0; 1128 } 1129 if (cv_wait_sig(&q->zq_condvar, &zev_mutex) == 0) { 1130 /* signal received. */ 1131 mutex_exit(&zev_mutex); 1132 return EINTR; 1133 } 1134 msg = q->zq_oldest; 1135 } 1136 if (msg->size > uio_p->uio_resid) { 1137 mutex_exit(&zev_mutex); 1138 return E2BIG; 1139 } 1140 while (msg && uio_p->uio_resid >= msg->size) { 1141 data = (char *)(msg + 1); 1142 ret = uiomove(data, msg->size, UIO_READ, uio_p); 1143 if (ret != 0) { 1144 mutex_exit(&zev_mutex); 1145 cmn_err(CE_WARN, "zev: uiomove failed; messages lost"); 1146 uio_p->uio_loffset = off; 1147 return (ret); 1148 } 1149 q->zq_oldest = msg->next; 1150 q->zq_bytes_read += msg->size; 1151 q->zq_queue_len -= msg->size; 1152 q->zq_queue_messages--; 1153 msg->read++; 1154 msg = q->zq_oldest; 1155 } 1156 cv_broadcast(&zev_condvar); 1157 mutex_exit(&zev_mutex); 1158 uio_p->uio_loffset = off; 1159 return 0; 1160 } 1161 1162 /* ARGSUSED */ 1163 static int 1164 zev_close(dev_t dev, int flag, int otyp, cred_t *crepd) 1165 { 1166 zev_queue_t *q; 1167 int minor; 1168 1169 minor = getminor(dev); 1170 if (otyp != OTYP_CHR) 1171 return (EINVAL); 1172 mutex_enter(&zev_mutex); 1173 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1174 mutex_exit(&zev_mutex); 1175 return (ENXIO); 1176 } 1177 if (q->zq_busy != B_TRUE) { 1178 mutex_exit(&zev_mutex); 1179 return (EINVAL); 1180 } 1181 q->zq_busy = B_FALSE; 1182 if ((q->zq_flags & ZEV_FL_PERSISTENT) == 0) 1183 zev_queue_release(q); 1184 mutex_exit(&zev_mutex); 1185 return (0); 1186 } 1187 1188 /* ARGSUSED */ 1189 static int 1190 zev_open(dev_t *devp, int flag, int otyp, cred_t *credp) 1191 { 1192 zev_queue_t *q; 1193 minor_t minor; 1194 1195 minor = getminor(*devp); 1196 if (otyp != OTYP_CHR) 1197 return (EINVAL); 1198 if (drv_priv(credp) != 0) 1199 return (EPERM); 1200 mutex_enter(&zev_mutex); 1201 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1202 mutex_exit(&zev_mutex); 1203 return (ENXIO); 1204 } 1205 if (minor == ZEV_CONTROL_DEVICE_MINOR) { 1206 /* control device may be used in parallel */ 1207 q->zq_busy = B_TRUE; 1208 mutex_exit(&zev_mutex); 1209 return 0; 1210 } 1211 if (q->zq_busy == B_TRUE) { 1212 mutex_exit(&zev_mutex); 1213 return (EBUSY); 1214 } 1215 q->zq_busy = B_TRUE; /* can only be opened exclusively */ 1216 mutex_exit(&zev_mutex); 1217 return (0); 1218 } 1219 1220 static struct cb_ops zev_cb_ops = { 1221 zev_open, /* open */ 1222 zev_close, /* close */ 1223 nodev, /* strategy */ 1224 nodev, /* print */ 1225 nodev, /* dump */ 1226 zev_read, /* read */ 1227 nodev, /* write */ 1228 zev_ioctl, /* ioctl */ 1229 nodev, /* devmap */ 1230 nodev, /* mmap */ 1231 nodev, /* segmap */ 1232 zev_chpoll, /* chpoll */ 1233 ddi_prop_op, /* prop_op */ 1234 NULL, /* streamtab */ 1235 D_MP | D_64BIT, /* cb_flag */ 1236 CB_REV, /* cb_rev */ 1237 nodev, /* aread */ 1238 nodev, /* awrite */ 1239 }; 1240 1241 static void 1242 zev_free_instance(dev_info_t *dip) 1243 { 1244 int instance; 1245 zev_queue_t *q; 1246 int i; 1247 1248 instance = ddi_get_instance(dip); 1249 if (instance != 0) { 1250 cmn_err(CE_WARN, "zev: tried to free instance != 0 (%d)", 1251 instance); 1252 return; 1253 } 1254 1255 ddi_remove_minor_node(dip, NULL); 1256 1257 /* stop pollwakeup thread */ 1258 zev_wakeup_thread_run = 0; 1259 if (zev_poll_wakeup_thread != NULL) { 1260 thread_join(zev_poll_wakeup_thread->t_did); 1261 zev_poll_wakeup_thread = NULL; 1262 } 1263 1264 mutex_enter(&zev_mutex); 1265 1266 /* remove "ctrl" dummy queue */ 1267 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1268 if (q) { 1269 ddi_soft_state_free(statep, ZEV_CONTROL_DEVICE_MINOR); 1270 ZEV_MEM_SUB(sizeof(zev_queue_t)); 1271 } 1272 1273 /* remove all other queues */ 1274 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1275 q = zev_queues[i- ZEV_MINOR_MIN]; 1276 if (!q) 1277 continue; 1278 ASSERT(q->zq_refcnt == 1); 1279 zev_queue_release(q); 1280 } 1281 zev_queue_trim(); 1282 bzero(&zev_queues, sizeof(zev_queues)); 1283 1284 mutex_exit(&zev_mutex); 1285 1286 } 1287 1288 static int 1289 zev_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1290 { 1291 int instance; 1292 zev_queue_t *q; 1293 1294 /* called once per instance with DDI_DETACH, 1295 may be called to suspend */ 1296 switch (cmd) { 1297 case DDI_DETACH: 1298 /* instance busy? */ 1299 instance = ddi_get_instance(dip); 1300 if (instance != 0) { /* hardcoded in zev.conf */ 1301 /* this module only supports one instance. */ 1302 return (DDI_FAILURE); 1303 } 1304 1305 mutex_enter(&zev_mutex); 1306 if (!zev_attached) { 1307 mutex_exit(&zev_mutex); 1308 return (DDI_FAILURE); 1309 } 1310 1311 /* check "ctrl" queue to see if t is busy */ 1312 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1313 if (q == NULL) { 1314 mutex_exit(&zev_mutex); 1315 return (DDI_FAILURE); 1316 } 1317 if (q->zq_busy) { 1318 mutex_exit(&zev_mutex); 1319 return (DDI_FAILURE); 1320 } 1321 /* are there any queues? */ 1322 if (zev_queue_cnt > 0) { 1323 mutex_exit(&zev_mutex); 1324 return (DDI_FAILURE); 1325 } 1326 1327 zev_attached = B_FALSE; 1328 mutex_exit(&zev_mutex); 1329 1330 /* switch ZFS event callbacks back to default */ 1331 rw_enter(&rz_zev_rwlock, RW_WRITER); 1332 rz_zev_callbacks = rz_zev_default_callbacks; 1333 rz_zev_set_active(B_FALSE); 1334 rw_exit(&rz_zev_rwlock); 1335 1336 /* no thread is inside of the callbacks anymore. */ 1337 1338 /* free resources allocated for this instance */ 1339 zev_free_instance(dip); 1340 zev_chksum_fini(); 1341 #if 0 1342 cmn_err(CE_WARN, "zev: allocated memory at detach: %" PRIu64, 1343 zev_memory_allocated - zev_memory_freed); 1344 #endif 1345 return (DDI_SUCCESS); 1346 case DDI_SUSPEND: 1347 /* kernel must not suspend zev devices while ZFS is running */ 1348 return (DDI_FAILURE); 1349 default: 1350 return (DDI_FAILURE); 1351 } 1352 } 1353 1354 static int 1355 zev_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1356 { 1357 /* called once per instance with DDI_ATTACH, 1358 may be called to resume */ 1359 int instance; 1360 int error; 1361 zev_queue_t *q; 1362 switch (cmd) { 1363 case DDI_ATTACH: 1364 /* create instance state */ 1365 instance = ddi_get_instance(dip); 1366 if (instance != 0) { /* hardcoded in zev.conf */ 1367 /* this module only supports one instance. */ 1368 return (DDI_FAILURE); 1369 } 1370 1371 mutex_enter(&zev_mutex); 1372 if (zev_attached) { 1373 mutex_exit(&zev_mutex); 1374 return (DDI_FAILURE); 1375 } 1376 if (ddi_soft_state_zalloc(statep, ZEV_CONTROL_DEVICE_MINOR) != 1377 DDI_SUCCESS) { 1378 mutex_exit(&zev_mutex); 1379 return (DDI_FAILURE); 1380 } 1381 ZEV_MEM_ADD(sizeof(zev_queue_t)); 1382 zev_attached = B_TRUE; 1383 1384 /* init queue list */ 1385 bzero(&zev_queues, sizeof(zev_queues)); 1386 mutex_exit(&zev_mutex); 1387 1388 /* create a dummy queue for management of "ctrl" */ 1389 1390 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1391 q->zq_dip = dip; 1392 q->zq_refcnt = 1; 1393 q->zq_busy = B_FALSE; 1394 q->zq_minor_number = ZEV_CONTROL_DEVICE_MINOR; 1395 q->zq_flags = ZEV_FL_PERSISTENT; 1396 strcpy(q->zq_name, ZEV_CONTROL_DEVICE_NAME); 1397 1398 /* create device node for "ctrl" */ 1399 if (ddi_create_minor_node(dip, ZEV_CONTROL_DEVICE_NAME, 1400 S_IFCHR, ZEV_CONTROL_DEVICE_MINOR, 1401 DDI_PSEUDO, 0) == DDI_FAILURE) { 1402 goto fail; 1403 } 1404 1405 /* note: intentionally not adding ctrl queue to queue list. */ 1406 1407 /* default queue */ 1408 error = zev_queue_new(&q, dip, 1409 ZEV_DEFAULT_QUEUE_NAME, 1410 ZEV_MAX_QUEUE_LEN, 1411 ZEV_FL_BLOCK_WHILE_QUEUE_FULL| 1412 ZEV_FL_PERSISTENT); 1413 if (error) 1414 goto fail; 1415 1416 /* start pollwakeup thread */ 1417 zev_wakeup_thread_run = 1; 1418 zev_poll_wakeup_thread = thread_create(NULL, 0, 1419 zev_poll_wakeup_thread_main, NULL, 0, &p0, 1420 TS_RUN, minclsyspri); 1421 1422 ddi_report_dev(dip); 1423 1424 zev_chksum_init(); 1425 1426 /* switch ZFS event callbacks to zev module callbacks */ 1427 rw_enter(&rz_zev_rwlock, RW_WRITER); 1428 rz_zev_callbacks = &zev_callbacks; 1429 rz_zev_set_active(B_TRUE); 1430 rw_exit(&rz_zev_rwlock); 1431 1432 return (DDI_SUCCESS); 1433 case DDI_RESUME: 1434 /* suspendeding zev devices should never happen */ 1435 return (DDI_SUCCESS); 1436 default: 1437 return (DDI_FAILURE); 1438 } 1439 fail: 1440 cmn_err(CE_WARN, "zev: attach failed"); 1441 zev_free_instance(dip); 1442 mutex_enter(&zev_mutex); 1443 zev_attached = B_FALSE; 1444 mutex_exit(&zev_mutex); 1445 return (DDI_FAILURE); 1446 } 1447 1448 /* ARGSUSED */ 1449 static int 1450 zev_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp) 1451 { 1452 minor_t minor; 1453 zev_queue_t *q; 1454 1455 /* arg is dev_t */ 1456 minor = getminor((dev_t)arg); 1457 mutex_enter(&zev_mutex); 1458 q = ddi_get_soft_state(statep, minor); 1459 if (q == NULL) { 1460 *resultp = NULL; 1461 mutex_exit(&zev_mutex); 1462 return (DDI_FAILURE); 1463 } 1464 1465 switch (infocmd) { 1466 case DDI_INFO_DEVT2DEVINFO: 1467 *resultp = q->zq_dip; 1468 break; 1469 case DDI_INFO_DEVT2INSTANCE: 1470 *resultp = (void *)(uintptr_t)ddi_get_instance(q->zq_dip); 1471 break; 1472 default: 1473 mutex_exit(&zev_mutex); 1474 return (DDI_FAILURE); 1475 } 1476 mutex_exit(&zev_mutex); 1477 return (DDI_SUCCESS); 1478 } 1479 1480 static struct dev_ops zev_dev_ops = { 1481 DEVO_REV, /* driver build revision */ 1482 0, /* driver reference count */ 1483 zev_getinfo, /* getinfo */ 1484 nulldev, /* identify (obsolete) */ 1485 nulldev, /* probe (search for devices) */ 1486 zev_attach, /* attach */ 1487 zev_detach, /* detach */ 1488 nodev, /* reset (obsolete, use quiesce) */ 1489 &zev_cb_ops, /* character and block device ops */ 1490 NULL, /* bus driver ops */ 1491 NULL, /* power management, not needed */ 1492 ddi_quiesce_not_needed, /* quiesce */ 1493 }; 1494 1495 static struct modldrv zev_modldrv = { 1496 &mod_driverops, /* all loadable modules use this */ 1497 "zev ZFS event provider, v1.0", /* driver name and version info */ 1498 &zev_dev_ops /* ops method pointers */ 1499 }; 1500 1501 static struct modlinkage zev_modlinkage = { 1502 MODREV_1, /* fixed value */ 1503 { 1504 &zev_modldrv, /* driver linkage structure */ 1505 NULL /* list terminator */ 1506 } 1507 }; 1508 1509 int 1510 _init(void) 1511 { 1512 int error; 1513 1514 if ((error = ddi_soft_state_init(&statep, sizeof(zev_queue_t), 1)) != 0) 1515 return (error); 1516 zev_attached = B_FALSE; 1517 1518 zev_queue_head = NULL; 1519 zev_queue_tail = NULL; 1520 zev_queue_len = 0; 1521 zev_muted_pools_head = NULL; 1522 zev_memory_allocated = 0; 1523 zev_memory_freed = 0; 1524 zev_queue_cnt = 0; 1525 1526 mutex_init(&zev_mutex, NULL, MUTEX_DRIVER, NULL); 1527 cv_init(&zev_condvar, NULL, CV_DRIVER, NULL); 1528 rw_init(&zev_pool_list_rwlock, NULL, RW_DRIVER, NULL); 1529 mutex_init(&zev_mark_id_mutex, NULL, MUTEX_DRIVER, NULL); 1530 zev_mark_id = gethrtime(); 1531 mutex_init(&zev_queue_msg_mutex, NULL, MUTEX_DRIVER, NULL); 1532 zev_msg_sequence_number = gethrtime(); 1533 bzero(&zev_statistics, sizeof(zev_statistics)); 1534 bzero(&zev_pollhead, sizeof(zev_pollhead)); 1535 bzero(&zev_queues, sizeof(zev_queues)); 1536 zev_statistics.zev_max_queue_len = ZEV_MAX_QUEUE_LEN; 1537 if (zev_ioc_mute_pool("zg0")) { 1538 cmn_err(CE_WARN, "zev: could not init mute list"); 1539 goto FAIL; 1540 } 1541 1542 if ((error = mod_install(&zev_modlinkage)) != 0) { 1543 cmn_err(CE_WARN, "zev: could not install module"); 1544 goto FAIL; 1545 } 1546 1547 return (0); 1548 FAIL: 1549 /* free resources */ 1550 cmn_err(CE_WARN, "zev: _init failed"); 1551 mutex_destroy(&zev_mutex); 1552 ddi_soft_state_fini(&statep); 1553 return (error); 1554 } 1555 1556 int 1557 _info(struct modinfo *modinfop) 1558 { 1559 return (mod_info(&zev_modlinkage, modinfop)); 1560 } 1561 1562 int 1563 _fini(void) 1564 { 1565 int error = 0; 1566 zev_msg_t *msg; 1567 zev_pool_list_entry_t *pe, *npe; 1568 1569 mutex_enter(&zev_mutex); 1570 if (zev_attached == B_TRUE) { 1571 mutex_exit(&zev_mutex); 1572 return (SET_ERROR(EBUSY)); 1573 } 1574 if (zev_queue_cnt != 0) { 1575 /* should never happen */ 1576 mutex_exit(&zev_mutex); 1577 return (SET_ERROR(EBUSY)); 1578 } 1579 1580 /* 1581 * avoid deadlock if event list is full: make sure threads currently 1582 * blocking on the event list can append their event and then release 1583 * rz_zev_rwlock. Since there should be no queues left when we 1584 * reach this point we can simply empty the event list and then 1585 * wake everybody. 1586 */ 1587 while (zev_queue_head) { 1588 msg = zev_queue_head; 1589 zev_queue_head = msg->next; 1590 zev_free(msg, sizeof(*msg) + msg->size); 1591 } 1592 cv_broadcast(&zev_condvar); 1593 mutex_exit(&zev_mutex); 1594 1595 /* switch ZFS event callbacks back to default (again) */ 1596 rw_enter(&rz_zev_rwlock, RW_WRITER); 1597 rz_zev_callbacks = rz_zev_default_callbacks; 1598 rz_zev_set_active(B_FALSE); 1599 rw_exit(&rz_zev_rwlock); 1600 1601 /* no thread is inside of the callbacks anymore. Safe to remove. */ 1602 1603 /* unload module callbacks */ 1604 if ((error = mod_remove(&zev_modlinkage)) != 0) { 1605 cmn_err(CE_WARN, "mod_remove failed: %d", error); 1606 return (error); 1607 } 1608 1609 /* free resources */ 1610 mutex_enter(&zev_mutex); 1611 while (zev_queue_head) { 1612 msg = zev_queue_head; 1613 zev_queue_head = msg->next; 1614 zev_free(msg, sizeof(*msg) + msg->size); 1615 } 1616 mutex_exit(&zev_mutex); 1617 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 1618 pe = zev_muted_pools_head; 1619 while (pe) { 1620 npe = pe; 1621 pe = pe->next; 1622 zev_free(npe, sizeof(*npe)); 1623 } 1624 rw_exit(&zev_pool_list_rwlock); 1625 ddi_soft_state_fini(&statep); 1626 rw_destroy(&zev_pool_list_rwlock); 1627 cv_destroy(&zev_condvar); 1628 mutex_destroy(&zev_mutex); 1629 mutex_destroy(&zev_mark_id_mutex); 1630 mutex_destroy(&zev_queue_msg_mutex); 1631 1632 return (0); 1633 } 1634 1635