1 #include <sys/modctl.h> 2 #include <sys/ddi.h> 3 #include <sys/sunddi.h> 4 #include <sys/conf.h> 5 #include <sys/devops.h> 6 #include <sys/stat.h> 7 #include <sys/fs/zev.h> 8 #include <sys/zev_callbacks.h> 9 #include <sys/zev_checksums.h> 10 #include <sys/zfs_znode.h> 11 #include <sys/time.h> 12 #include <sys/sa.h> 13 #include <sys/zap.h> 14 #include <sys/time.h> 15 16 #define OFFSETOF(s, m) ((size_t)(&(((s *)0)->m))) 17 18 #define XSTRING(x) STRING(x) 19 #define STRING(x) #x 20 21 #define ZEV_DEFAULT_QUEUE_NAME "beaver" 22 #define ZEV_CONTROL_DEVICE_MINOR 0 23 #define ZEV_MINOR_MIN (ZEV_CONTROL_DEVICE_MINOR + 1) 24 #define ZEV_MINOR_MAX (ZEV_MINOR_MIN + ZEV_MAX_QUEUES - 1) 25 26 typedef struct zev_queue { 27 char zq_name[ZEV_MAX_QUEUE_NAME_LEN+1]; 28 minor_t zq_minor_number; 29 dev_info_t *zq_dip; 30 struct pollhead zq_pollhead; 31 uint64_t zq_bytes_read; 32 uint64_t zq_events_read; 33 uint64_t zq_bytes_discarded; 34 uint64_t zq_events_discarded; 35 uint64_t zq_bytes_total; 36 uint64_t zq_events_total; 37 uint64_t zq_wakeup_threshold; 38 uint16_t zq_flags; 39 uint16_t zq_need_wakeup; 40 /* protected by zev_mutex */ 41 int zq_refcnt; 42 uint64_t zq_queue_len; 43 uint64_t zq_queue_messages; 44 uint64_t zq_max_queue_len; 45 zev_msg_t *zq_oldest; 46 boolean_t zq_busy; 47 boolean_t zq_to_be_removed; 48 zev_statistics_t zq_statistics; 49 kcondvar_t zq_condvar; 50 } zev_queue_t; 51 52 static void *statep; 53 struct pollhead zev_pollhead; 54 55 kmutex_t zev_mutex; 56 kcondvar_t zev_condvar; 57 kmutex_t zev_queue_msg_mutex; 58 krwlock_t zev_pool_list_rwlock; 59 static zev_statistics_t zev_statistics; 60 static boolean_t zev_attached; 61 static kmutex_t zev_mark_id_mutex; 62 static uint64_t zev_mark_id = 0; 63 64 static uint64_t zev_msg_sequence_number = 0; 65 static zev_queue_t *zev_queues[ZEV_MAX_QUEUES]; 66 static int zev_queue_cnt = 0; 67 static int zev_have_blocking_queues = 1; 68 69 uint64_t zev_memory_allocated = 0; 70 uint64_t zev_memory_freed = 0; 71 72 /* 73 * The longest potential message is from zev_zfs_mount() and 74 * contains the mountpoint, which might be close to MAXPATHLEN bytes long. 75 * 76 * Another candidate is zev_znode_rename_cb() and contains three inode 77 * numbers and two filenames of up to MAXNAMELEN bytes each. 78 */ 79 #define ZEV_MAX_MESSAGE_LEN 4096 80 81 static zev_msg_t *zev_queue_head = NULL; 82 static zev_msg_t *zev_queue_tail = NULL; 83 static uint64_t zev_queue_len = 0; 84 85 86 typedef struct zev_pool_list_entry { 87 struct zev_pool_list_entry *next; 88 char name[MAXPATHLEN]; 89 } zev_pool_list_entry_t; 90 91 static zev_pool_list_entry_t *zev_muted_pools_head = NULL; 92 93 static volatile int zev_wakeup_thread_run = 1; 94 static kthread_t *zev_poll_wakeup_thread = NULL; 95 96 void * 97 zev_alloc(ssize_t sz) 98 { 99 ZEV_MEM_ADD(sz); 100 return kmem_alloc(sz, KM_SLEEP); 101 } 102 103 void * 104 zev_zalloc(ssize_t sz) 105 { 106 ZEV_MEM_ADD(sz); 107 return kmem_zalloc(sz, KM_SLEEP); 108 } 109 110 void 111 zev_free(void *ptr, ssize_t sz) 112 { 113 ZEV_MEM_SUB(sz); \ 114 kmem_free(ptr, sz); 115 } 116 117 /* must be called with zev_mutex held */ 118 static void 119 zev_update_blockflag(void) 120 { 121 zev_queue_t *q; 122 int had_blocking_queues; 123 int i; 124 125 had_blocking_queues = zev_have_blocking_queues; 126 127 /* do we still have blocking queues? */ 128 zev_have_blocking_queues = 0; 129 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 130 q = zev_queues[i - ZEV_MINOR_MIN]; 131 if (!q) 132 continue; 133 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) { 134 zev_have_blocking_queues = 1; 135 break; 136 } 137 } 138 /* no blocking queues */ 139 if (had_blocking_queues) 140 cv_broadcast(&zev_condvar); 141 } 142 143 int 144 zev_queue_cmp(const void *a, const void *b) 145 { 146 const zev_queue_t *qa = a; 147 const zev_queue_t *qb = b; 148 if (qa->zq_minor_number > qb->zq_minor_number) 149 return 1; 150 if (qa->zq_minor_number < qb->zq_minor_number) 151 return -1; 152 return 0; 153 } 154 155 /* must be called with zev_mutex held */ 156 void 157 zev_queue_trim(void) 158 { 159 zev_msg_t *m; 160 uint64_t oldest_message; 161 zev_queue_t *q; 162 int i; 163 164 if (!zev_queue_tail) 165 return; 166 167 oldest_message = zev_queue_tail->seq + 1; /* does not exist, yet. */ 168 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 169 q = zev_queues[i - ZEV_MINOR_MIN]; 170 if (q == NULL) 171 continue; 172 if (!q->zq_oldest) 173 continue; 174 if (oldest_message > q->zq_oldest->seq) 175 oldest_message = q->zq_oldest->seq; 176 } 177 178 /* remove msgs between oldest_message and zev_queue_head */ 179 while(zev_queue_head && (oldest_message > zev_queue_head->seq)) { 180 m = zev_queue_head; 181 zev_queue_head = m->next; 182 if (zev_queue_head == NULL) { 183 zev_queue_tail = NULL; 184 } else { 185 zev_queue_head->prev = NULL; 186 } 187 if (m->read == 0) { 188 zev_statistics.zev_bytes_discarded += m->size; 189 zev_statistics.zev_cnt_discarded_events++; 190 } 191 zev_statistics.zev_queue_len -= m->size; 192 zev_queue_len--; 193 zev_free(m, sizeof(*m) + m->size); 194 } 195 } 196 197 /* must be called with zev_mutex held */ 198 static void 199 zev_queue_hold(zev_queue_t *q) 200 { 201 q->zq_refcnt++; 202 } 203 204 /* must be called with zev_mutex held */ 205 static void 206 zev_queue_release(zev_queue_t *q) 207 { 208 q->zq_refcnt--; 209 if (q->zq_refcnt > 0) 210 return; 211 212 ASSERT(q->zq_busy == B_FALSE); 213 214 /* persistent queues will not be removed */ 215 if ((q->zq_flags & ZEV_FL_PERSISTENT) != 0) 216 return; 217 218 /* remove queue from queue list */ 219 zev_queues[q->zq_minor_number - ZEV_MINOR_MIN] = NULL; 220 221 /* discard messages that no queue references anymore */ 222 zev_queue_trim(); 223 224 cv_destroy(&q->zq_condvar); 225 ddi_remove_minor_node(q->zq_dip, q->zq_name); 226 ddi_soft_state_free(statep, q->zq_minor_number); 227 ZEV_MEM_SUB(sizeof(zev_queue_t)); 228 zev_queue_cnt--; 229 zev_update_blockflag(); 230 } 231 232 int 233 zev_queue_new(zev_queue_t **queue, 234 dev_info_t *dip, 235 char *name, 236 uint64_t max_queue_len, 237 uint16_t flags) 238 { 239 zev_queue_t *q; 240 zev_queue_t *tmp; 241 zev_msg_t *msg; 242 int name_exists = 0; 243 minor_t minor; 244 char *p; 245 int i; 246 247 if (max_queue_len > ZEV_MAX_QUEUE_LEN) 248 return EINVAL; 249 if (max_queue_len == 0) 250 max_queue_len = ZEV_MAX_QUEUE_LEN; 251 if (!strcmp(name, ZEV_CONTROL_DEVICE_NAME)) 252 return EINVAL; 253 for (p = name; *p; p++) { 254 if (*p >= 'a' && *p <= 'z') 255 continue; 256 if (*p >= '0' && *p <= '9') 257 continue; 258 if (*p == '.') 259 continue; 260 return EINVAL; 261 } 262 263 mutex_enter(&zev_mutex); 264 265 /* find free minor number.*/ 266 /* if this were a frequent operation we'd have a free-minor list */ 267 for (minor = ZEV_MINOR_MIN; minor <= ZEV_MINOR_MAX; minor++) { 268 tmp = zev_queues[minor - ZEV_MINOR_MIN]; 269 if (tmp == NULL) 270 break; 271 } 272 if (tmp) { 273 mutex_exit(&zev_mutex); 274 return ENOSPC; 275 } 276 277 if (ddi_soft_state_zalloc(statep, minor) != DDI_SUCCESS) { 278 mutex_exit(&zev_mutex); 279 return ENOSPC; 280 } 281 ZEV_MEM_ADD(sizeof(zev_queue_t)); 282 283 q = ddi_get_soft_state(statep, minor); 284 memset(q, 0, sizeof(*q)); 285 strncpy(q->zq_name, name, ZEV_MAX_QUEUE_NAME_LEN); 286 q->zq_name[ZEV_MAX_QUEUE_NAME_LEN] = '\0'; 287 q->zq_max_queue_len = max_queue_len; 288 q->zq_wakeup_threshold = ZEV_DEFAULT_POLL_WAKEUP_QUEUE_LEN; 289 q->zq_flags = flags; 290 q->zq_refcnt = 1; 291 q->zq_dip = dip; 292 q->zq_minor_number = minor; 293 cv_init(&q->zq_condvar, NULL, CV_DRIVER, NULL); 294 295 /* insert into queue list */ 296 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 297 /* if this were a frequent operation we'd have a name tree */ 298 if (zev_queues[i - ZEV_MINOR_MIN] == NULL) 299 continue; 300 if (!strcmp(q->zq_name, zev_queues[i-ZEV_MINOR_MIN]->zq_name)) { 301 name_exists = 1; 302 break; 303 } 304 } 305 if (name_exists) { 306 ddi_soft_state_free(statep, minor); 307 ZEV_MEM_SUB(sizeof(zev_queue_t)); 308 mutex_exit(&zev_mutex); 309 return EEXIST; 310 } 311 zev_queues[minor - ZEV_MINOR_MIN] = q; 312 zev_queue_cnt++; 313 314 /* calculate current queue len and find head and tail */ 315 if (!(q->zq_flags & ZEV_FL_INITIALLY_EMPTY)) { 316 q->zq_oldest = zev_queue_tail; 317 msg = zev_queue_tail; 318 while ((msg) && (q->zq_queue_len < q->zq_max_queue_len)) { 319 q->zq_queue_len += msg->size; 320 q->zq_queue_messages++; 321 q->zq_oldest = msg; 322 msg = msg->prev; 323 } 324 } 325 326 zev_update_blockflag(); 327 328 mutex_exit(&zev_mutex); 329 330 if (ddi_create_minor_node(dip, name, 331 S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 332 mutex_enter(&zev_mutex); 333 zev_queues[minor - ZEV_MINOR_MIN] = NULL; 334 zev_queue_cnt--; 335 ddi_soft_state_free(statep, minor); 336 ZEV_MEM_SUB(sizeof(zev_queue_t)); 337 zev_update_blockflag(); 338 mutex_exit(&zev_mutex); 339 return EFAULT; 340 } 341 342 *queue = q; 343 return 0; 344 } 345 346 /* 347 * poll() wakeup thread. Used to check periodically whether we have 348 * bytes left in the queue that have not yet been made into a 349 * pollwakeup() call. This is meant to insure a maximum waiting 350 * time until an event is presented as a poll wakeup, while at 351 * the same time not making every single event into a poll wakeup 352 * of it's own. 353 */ 354 355 static void 356 zev_poll_wakeup(boolean_t flush_all) 357 { 358 zev_queue_t *q; 359 int i; 360 361 /* 362 * This loop works with hold() and release() because 363 * pollwakeup() requires us to release our locks before calling it. 364 * 365 * from pollwakeup(9F): 366 * 367 * "Driver defined locks should not be held across calls 368 * to this function." 369 */ 370 371 /* wake up threads for each individual queue */ 372 mutex_enter(&zev_mutex); 373 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 374 q = zev_queues[i - ZEV_MINOR_MIN]; 375 if (q == NULL) 376 continue; 377 if (!q->zq_busy) 378 continue; 379 if (!q->zq_queue_len) 380 continue; 381 if ((flush_all) || 382 (q->zq_queue_len > q->zq_wakeup_threshold)) { 383 zev_queue_hold(q); 384 mutex_exit(&zev_mutex); 385 pollwakeup(&q->zq_pollhead, POLLIN); 386 mutex_enter(&zev_mutex); 387 zev_queue_release(q); 388 } 389 } 390 mutex_exit(&zev_mutex); 391 } 392 393 static void 394 zev_poll_wakeup_thread_main(void) 395 { 396 while (zev_wakeup_thread_run) { 397 delay(drv_usectohz(100 * 1000)); /* sleep 100ms */ 398 399 zev_poll_wakeup(B_TRUE); 400 } 401 thread_exit(); 402 } 403 404 static int 405 zev_ioc_mute_pool(char *poolname) 406 { 407 zev_pool_list_entry_t *pe; 408 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 409 /* pool already muted? */ 410 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 411 if (!strcmp(pe->name, poolname)) { 412 rw_exit(&zev_pool_list_rwlock); 413 return EEXIST; 414 } 415 } 416 pe = zev_zalloc(sizeof(*pe)); 417 if (!pe) { 418 rw_exit(&zev_pool_list_rwlock); 419 return ENOMEM; 420 } 421 (void) strncpy(pe->name, poolname, sizeof(pe->name)); 422 pe->next = zev_muted_pools_head; 423 zev_muted_pools_head = pe; 424 rw_exit(&zev_pool_list_rwlock); 425 return (0); 426 } 427 428 static int 429 zev_ioc_unmute_pool(char *poolname) 430 { 431 zev_pool_list_entry_t *pe, *peprev; 432 433 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 434 /* pool muted? */ 435 peprev = NULL; 436 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 437 if (!strcmp(pe->name, poolname)) 438 break; 439 peprev = pe; 440 } 441 if (pe) { 442 rw_exit(&zev_pool_list_rwlock); 443 return ENOENT; 444 } 445 446 if (peprev != NULL) { 447 peprev->next = pe->next; 448 } else { 449 zev_muted_pools_head = pe->next; 450 } 451 zev_free(pe, sizeof(*pe)); 452 rw_exit(&zev_pool_list_rwlock); 453 return (0); 454 } 455 456 int 457 zev_skip_pool(objset_t *os) 458 { 459 zev_pool_list_entry_t *pe; 460 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; 461 rw_enter(&zev_pool_list_rwlock, RW_READER); 462 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 463 if (!strcmp(pe->name, dp->dp_spa->spa_name)) { 464 rw_exit(&zev_pool_list_rwlock); 465 return 1; 466 } 467 } 468 rw_exit(&zev_pool_list_rwlock); 469 return 0; 470 } 471 472 int 473 zev_skip_fs(zfsvfs_t *fs) 474 { 475 dsl_dir_t *d = fs->z_os->os_dsl_dataset->ds_dir; 476 dsl_dir_t *prev = NULL; 477 478 while (d && d != prev) { 479 if (strstr(d->dd_myname, "_root")) 480 return 0; 481 prev = d; 482 d = d->dd_parent; 483 } 484 return 1; 485 } 486 487 static void 488 zev_update_statistics(int op, zev_statistics_t *stat) 489 { 490 switch (op) { 491 case ZEV_OP_ERROR: 492 stat->zev_cnt_errors++; 493 break; 494 case ZEV_OP_MARK: 495 stat->zev_cnt_marks++; 496 break; 497 case ZEV_OP_ZFS_MOUNT: 498 stat->zev_cnt_zfs_mount++; 499 break; 500 case ZEV_OP_ZFS_UMOUNT: 501 stat->zev_cnt_zfs_umount++; 502 break; 503 case ZEV_OP_ZVOL_WRITE: 504 stat->zev_cnt_zvol_write++; 505 break; 506 case ZEV_OP_ZVOL_TRUNCATE: 507 stat->zev_cnt_zvol_truncate++; 508 break; 509 case ZEV_OP_ZNODE_CLOSE_AFTER_UPDATE: 510 stat->zev_cnt_znode_close_after_update++; 511 break; 512 case ZEV_OP_ZNODE_CREATE: 513 stat->zev_cnt_znode_create++; 514 break; 515 case ZEV_OP_ZNODE_REMOVE: 516 stat->zev_cnt_znode_remove++; 517 break; 518 case ZEV_OP_ZNODE_LINK: 519 stat->zev_cnt_znode_link++; 520 break; 521 case ZEV_OP_ZNODE_SYMLINK: 522 stat->zev_cnt_znode_symlink++; 523 break; 524 case ZEV_OP_ZNODE_RENAME: 525 stat->zev_cnt_znode_rename++; 526 break; 527 case ZEV_OP_ZNODE_WRITE: 528 stat->zev_cnt_znode_write++; 529 break; 530 case ZEV_OP_ZNODE_TRUNCATE: 531 stat->zev_cnt_znode_truncate++; 532 break; 533 case ZEV_OP_ZNODE_SETATTR: 534 stat->zev_cnt_znode_setattr++; 535 break; 536 case ZEV_OP_ZNODE_ACL: 537 stat->zev_cnt_znode_acl++; 538 break; 539 } 540 } 541 542 void 543 zev_queue_message(int op, zev_msg_t *msg) 544 { 545 zev_queue_t *q; 546 int wakeup = 0; 547 zev_msg_t *m; 548 int i; 549 550 msg->next = NULL; 551 msg->prev = NULL; 552 msg->read = 0; 553 554 if (op < ZEV_OP_MIN || op > ZEV_OP_MAX) { 555 zev_queue_error(op, "unknown op id encountered: %d", op); 556 zev_free(msg, sizeof(*msg) + msg->size); 557 return; 558 } 559 560 /* 561 * This mutex protects us agains race conditions when several 562 * threads want to queue a message and one or more queues are 563 * full: we release zev_mutex to wait for the queues to become 564 * less-than-full, but we don't know in which order the waiting 565 * threads will be awoken. If it's not the same order in which 566 * they went to sleep we might mark different messages as "newest" 567 * in different queues, and so we might have dupes or even 568 * skip messages. 569 */ 570 mutex_enter(&zev_queue_msg_mutex); 571 572 mutex_enter(&zev_mutex); 573 574 /* 575 * When the module is loaded, the default behavior ist to 576 * put all events into a queue and block if the queue is full. 577 * This is done even before the pseudo device is attached. 578 * This way, no events are lost. 579 * 580 * To discard events entirely the "beaver" queue, 581 * which never discards anything, has to be removed. 582 */ 583 584 if (zev_queue_cnt == 0) { 585 mutex_exit(&zev_mutex); 586 mutex_exit(&zev_queue_msg_mutex); 587 return; 588 } 589 590 /* put message into global queue */ 591 msg->seq = zev_msg_sequence_number++; 592 593 /* do we need to make room? */ 594 while (zev_statistics.zev_max_queue_len && 595 zev_statistics.zev_queue_len > zev_statistics.zev_max_queue_len) { 596 597 if (zev_have_blocking_queues) { 598 /* queue full. block until it's been shrunk. */ 599 cv_wait(&zev_condvar, &zev_mutex); 600 continue; 601 } 602 603 /* discard events until this message fits into all queues */ 604 605 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 606 q = zev_queues[i - ZEV_MINOR_MIN]; 607 if (!q) 608 continue; 609 /* discard msgs until queue is small enough */ 610 while (q->zq_queue_len && 611 q->zq_queue_len > q->zq_max_queue_len) { 612 m = q->zq_oldest; 613 if (m == NULL) 614 break; 615 q->zq_events_discarded++; 616 q->zq_bytes_discarded += m->size; 617 q->zq_oldest = m->next; 618 q->zq_queue_len -= m->size; 619 q->zq_queue_messages--; 620 } 621 } 622 623 zev_queue_trim(); 624 ASSERT(zev_statistics.zev_queue_len == 0 || 625 zev_statistics.zev_queue_len <= 626 zev_statistics.zev_max_queue_len); 627 } 628 629 if (zev_queue_tail == NULL) { 630 zev_queue_head = zev_queue_tail = msg; 631 } else { 632 zev_queue_tail->next = msg; 633 msg->prev = zev_queue_tail; 634 zev_queue_tail = msg; 635 } 636 zev_queue_len++; 637 zev_statistics.zev_cnt_total_events++; 638 zev_statistics.zev_queue_len += msg->size; 639 640 /* update per-device queues */ 641 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 642 q = zev_queues[i - ZEV_MINOR_MIN]; 643 if (!q) 644 continue; 645 646 zev_queue_hold(q); 647 648 /* make sure queue has enough room */ 649 while (q->zq_max_queue_len && 650 q->zq_queue_len > q->zq_max_queue_len) { 651 652 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) { 653 /* block until queue has been shrunk. */ 654 cv_wait(&zev_condvar, &zev_mutex); 655 } else { 656 /* discard msgs until queue is small enough */ 657 while (q->zq_queue_len > q->zq_max_queue_len) { 658 m = q->zq_oldest; 659 if (m == NULL) 660 break; 661 q->zq_events_discarded++; 662 q->zq_bytes_discarded += m->size; 663 q->zq_oldest = m->next; 664 q->zq_queue_len -= m->size; 665 q->zq_queue_messages--; 666 } 667 } 668 } 669 670 /* register new message at the end of the queue */ 671 q->zq_queue_len += msg->size; 672 q->zq_queue_messages++; 673 q->zq_bytes_total += msg->size; 674 q->zq_events_total++; 675 if (q->zq_oldest == NULL) 676 q->zq_oldest = msg; 677 678 zev_update_statistics(op, &q->zq_statistics); 679 680 if (q->zq_queue_len > q->zq_wakeup_threshold) 681 wakeup = 1; 682 if (q->zq_queue_len == msg->size) /* queue was empty */ 683 cv_broadcast(&q->zq_condvar); 684 685 zev_queue_release(q); 686 } 687 688 zev_queue_trim(); 689 690 zev_update_statistics(op, &zev_statistics); 691 mutex_exit(&zev_mutex); 692 mutex_exit(&zev_queue_msg_mutex); 693 694 /* one or more queues need a pollwakeup() */ 695 if (op == ZEV_OP_MARK) { 696 zev_poll_wakeup(B_TRUE); 697 } else if (wakeup) { 698 zev_poll_wakeup(B_FALSE); 699 } 700 701 return; 702 } 703 704 void 705 zev_queue_error(int op, char *fmt, ...) 706 { 707 char buf[ZEV_MAX_MESSAGE_LEN]; 708 va_list ap; 709 int len; 710 zev_msg_t *msg = NULL; 711 zev_error_t *rec; 712 int msg_size; 713 714 va_start(ap, fmt); 715 len = vsnprintf(buf, sizeof(buf), fmt, ap); 716 va_end(ap); 717 if (len >= sizeof(buf)) { 718 cmn_err(CE_WARN, "zev: can't report error - " 719 "dropping event entirely."); 720 return; 721 } 722 723 msg_size = sizeof(*rec) + len + 1; 724 msg = zev_alloc(sizeof(*msg) + msg_size); 725 msg->size = msg_size; 726 rec = (zev_error_t *)(msg + 1); 727 rec->record_len = msg_size; 728 rec->op = ZEV_OP_ERROR; 729 rec->op_time = ddi_get_time(); 730 rec->guid = 0; 731 rec->failed_op = op; 732 rec->errstr_len = len; 733 (void) memcpy(ZEV_ERRSTR(rec), buf, len + 1); 734 735 zev_queue_message(ZEV_OP_ERROR, msg); 736 return; 737 } 738 739 static int 740 zev_find_queue(zev_queue_t **out, zev_queue_t *req_q, zev_queue_name_t *name) 741 { 742 char namebuf[ZEV_MAX_QUEUE_NAME_LEN+1]; 743 zev_queue_t *q; 744 int i; 745 746 *out = NULL; 747 748 if (name->zev_namelen == 0) { 749 if (req_q->zq_minor_number == ZEV_CONTROL_DEVICE_MINOR) 750 return EINVAL; 751 zev_queue_hold(req_q); 752 *out = req_q; 753 return 0; 754 } 755 756 if (name->zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 757 return EINVAL; 758 strncpy(namebuf, name->zev_name, name->zev_namelen); 759 namebuf[name->zev_namelen] = '\0'; 760 761 mutex_enter(&zev_mutex); 762 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 763 q = zev_queues[i - ZEV_MINOR_MIN]; 764 if (!q) 765 continue; 766 if (!strcmp(q->zq_name, namebuf)) { 767 zev_queue_hold(q); 768 mutex_exit(&zev_mutex); 769 *out = q; 770 return 0; 771 } 772 } 773 mutex_exit(&zev_mutex); 774 return ENOENT; 775 } 776 777 static int 778 zev_ioc_get_queue_statistics(zev_queue_t *req_q, intptr_t arg, int mode) 779 { 780 zev_ioctl_get_queue_statistics_t gs; 781 zev_queue_t *q; 782 int ret; 783 784 if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0) 785 return EFAULT; 786 787 ret = zev_find_queue(&q, req_q, &gs.zev_queue_name); 788 if (ret) 789 return ret; 790 791 /* ddi_copyout() can take a long time. Better make 792 a copy to be able to release the mutex faster. */ 793 mutex_enter(&zev_mutex); 794 memcpy(&gs.zev_statistics, &q->zq_statistics,sizeof(gs.zev_statistics)); 795 gs.zev_statistics.zev_queue_len = q->zq_queue_len; 796 gs.zev_statistics.zev_bytes_read = q->zq_bytes_read; 797 gs.zev_statistics.zev_bytes_discarded = q->zq_bytes_discarded; 798 gs.zev_statistics.zev_max_queue_len = q->zq_max_queue_len; 799 gs.zev_statistics.zev_cnt_discarded_events = q->zq_events_discarded; 800 gs.zev_statistics.zev_cnt_total_events = q->zq_events_total; 801 zev_queue_release(q); 802 mutex_exit(&zev_mutex); 803 804 if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0) 805 return EFAULT; 806 return 0; 807 } 808 809 static int 810 zev_ioc_set_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 811 { 812 zev_ioctl_set_queue_properties_t qp; 813 zev_queue_t *q; 814 uint64_t old_max; 815 uint64_t old_flags; 816 int ret; 817 818 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 819 return EFAULT; 820 if (qp.zev_max_queue_len > ZEV_MAX_QUEUE_LEN) 821 return EINVAL; 822 if (qp.zev_poll_wakeup_threshold > ZEV_MAX_POLL_WAKEUP_QUEUE_LEN) 823 return EINVAL; 824 825 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 826 if (ret) 827 return ret; 828 829 mutex_enter(&zev_mutex); 830 831 /* 832 * Note: if the PERSISTENT flag is cleared, and the queue is not busy, 833 * the queue should be removed by zev_queue_release() in zev_ioctl(). 834 */ 835 old_flags = qp.zev_flags; 836 q->zq_flags = qp.zev_flags; 837 if ((old_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) && 838 (!(qp.zev_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL))) { 839 /* queue is no longer blocking - wake blocked threads */ 840 cv_broadcast(&zev_condvar); 841 } 842 843 zev_update_blockflag(); 844 845 old_max = q->zq_max_queue_len; 846 q->zq_max_queue_len = qp.zev_max_queue_len; 847 if (q->zq_max_queue_len < old_max) 848 zev_queue_trim(); 849 if (q->zq_max_queue_len > old_max) 850 cv_broadcast(&zev_condvar); /* threads may be waiting */ 851 852 if ((qp.zev_poll_wakeup_threshold < q->zq_wakeup_threshold) && 853 (qp.zev_poll_wakeup_threshold <= q->zq_queue_len)) 854 pollwakeup(&q->zq_pollhead, POLLIN); 855 q->zq_wakeup_threshold = qp.zev_poll_wakeup_threshold; 856 857 zev_queue_release(q); 858 mutex_exit(&zev_mutex); 859 return 0; 860 } 861 862 static int 863 zev_ioc_get_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 864 { 865 zev_ioctl_get_queue_properties_t qp; 866 zev_queue_t *q; 867 int ret; 868 869 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 870 return EFAULT; 871 872 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 873 if (ret) 874 return ret; 875 876 mutex_enter(&zev_mutex); 877 qp.zev_max_queue_len = q->zq_max_queue_len; 878 qp.zev_flags = q->zq_flags; 879 qp.zev_poll_wakeup_threshold = q->zq_wakeup_threshold; 880 zev_queue_release(q); 881 mutex_exit(&zev_mutex); 882 883 if (ddi_copyout(&qp, (void *)arg, sizeof(qp), mode) != 0) 884 return EFAULT; 885 return 0; 886 } 887 888 static int 889 zev_ioc_add_queue(zev_queue_t *req_q, intptr_t arg, int mode) 890 { 891 zev_ioctl_add_queue_t aq; 892 zev_queue_t *new_q; 893 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 894 895 if (ddi_copyin((void *)arg, &aq, sizeof(aq), mode) != 0) 896 return EFAULT; 897 898 if (aq.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 899 return EINVAL; 900 strncpy(name, aq.zev_name, aq.zev_namelen); 901 name[aq.zev_namelen] = '\0'; 902 903 return zev_queue_new(&new_q, req_q->zq_dip, name, 904 aq.zev_max_queue_len, aq.zev_flags); 905 } 906 907 static int 908 zev_ioc_remove_queue(zev_queue_t *req_q, intptr_t arg, int mode) 909 { 910 zev_ioctl_remove_queue_t rq; 911 zev_queue_t *q; 912 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 913 int found = 0; 914 int i; 915 916 if (ddi_copyin((void *)arg, &rq, sizeof(rq), mode) != 0) 917 return EFAULT; 918 919 if (rq.zev_queue_name.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 920 return EINVAL; 921 strncpy(name, rq.zev_queue_name.zev_name, 922 rq.zev_queue_name.zev_namelen); 923 name[rq.zev_queue_name.zev_namelen] = '\0'; 924 925 mutex_enter(&zev_mutex); 926 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 927 q = zev_queues[i - ZEV_MINOR_MIN]; 928 if (!q) 929 continue; 930 if (!strcmp(q->zq_name, name)) { 931 found = 1; 932 break; 933 } 934 } 935 if (!found) { 936 mutex_exit(&zev_mutex); 937 return ENOENT; 938 } 939 940 if (q->zq_busy) { 941 mutex_exit(&zev_mutex); 942 return EBUSY; 943 } 944 /* 945 * clear flags, so that persistent queues are removed aswell 946 * and the queue becomes non-blocking. 947 */ 948 q->zq_flags = 0; 949 if (q->zq_to_be_removed == B_FALSE) { 950 q->zq_to_be_removed = B_TRUE; 951 zev_queue_release(q); 952 } 953 /* some threads might be waiting for this queue to become writable */ 954 cv_broadcast(&zev_condvar); 955 956 mutex_exit(&zev_mutex); 957 return 0; 958 } 959 960 static int 961 zev_ioc_get_debug_info(zev_queue_t *req_q, intptr_t arg, int mode) 962 { 963 zev_ioctl_debug_info_t di; 964 uint64_t mem_allocated = atomic_add_64_nv(&zev_memory_allocated, 0); 965 uint64_t mem_freed = atomic_add_64_nv(&zev_memory_freed, 0); 966 967 zev_chksum_stats(&di.zev_chksum_cache_size, 968 &di.zev_chksum_cache_hits, 969 &di.zev_chksum_cache_misses); 970 di.zev_memory_allocated = mem_allocated - mem_freed; 971 if (ddi_copyout(&di, (void *)arg, sizeof(di), mode) != 0) 972 return EFAULT; 973 return 0; 974 } 975 976 static int 977 zev_ioc_get_queue_list(zev_queue_t *req_q, intptr_t arg, int mode) 978 { 979 zev_ioctl_get_queue_list_t gql; 980 zev_queue_t *q; 981 int i = 0; 982 int count = 0; 983 984 memset(&gql, 0, sizeof(gql)); 985 986 mutex_enter(&zev_mutex); 987 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 988 q = zev_queues[i - ZEV_MINOR_MIN]; 989 if (!q) 990 continue; 991 strncpy(gql.zev_queue_name[count].zev_name, 992 q->zq_name, ZEV_MAX_QUEUE_NAME_LEN); 993 gql.zev_queue_name[count].zev_namelen = strlen(q->zq_name); 994 count++; 995 } 996 gql.zev_n_queues = count; 997 mutex_exit(&zev_mutex); 998 999 if (ddi_copyout(&gql, (void *)arg, sizeof(gql), mode) != 0) 1000 return EFAULT; 1001 return 0; 1002 } 1003 1004 static int 1005 zev_ioc_set_max_queue_len(zev_queue_t *req_q, intptr_t arg, int mode) 1006 { 1007 uint64_t len; 1008 int i; 1009 zev_queue_t *q; 1010 1011 if (ddi_copyin((void *)arg, &len, sizeof(len), mode) != 0) { 1012 return EFAULT; 1013 } 1014 if (len > ZEV_MAX_QUEUE_LEN) { 1015 return EINVAL; 1016 } 1017 mutex_enter(&zev_mutex); 1018 zev_statistics.zev_max_queue_len = len; 1019 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1020 q = zev_queues[i - ZEV_MINOR_MIN]; 1021 if (!q) 1022 continue; 1023 if (q->zq_max_queue_len <= 1024 zev_statistics.zev_max_queue_len) 1025 continue; 1026 q->zq_max_queue_len = zev_statistics.zev_max_queue_len; 1027 } 1028 cv_broadcast(&zev_condvar); 1029 mutex_exit(&zev_mutex); 1030 return 0; 1031 } 1032 1033 static int 1034 zev_ioc_get_zev_version(intptr_t arg, int mode) 1035 { 1036 zev_ioctl_get_zev_version vi; 1037 vi.zev_major_version = ZEV_MAJOR_VERSION; 1038 vi.zev_minor_version = ZEV_MINOR_VERSION; 1039 if (ddi_copyout(&vi, (void *)arg, sizeof(vi), mode) != 0) 1040 return EFAULT; 1041 return 0; 1042 } 1043 1044 /* ARGSUSED */ 1045 static int 1046 zev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1047 { 1048 zev_statistics_t zs; 1049 zev_ioctl_poolarg_t pa; 1050 zev_ioctl_mark_t mark; 1051 zev_mark_t *rec; 1052 int msg_size; 1053 zev_msg_t *msg; 1054 uint64_t mark_id; 1055 minor_t minor; 1056 zev_queue_t *req_q; 1057 int ret = 0; 1058 1059 minor = getminor(dev); 1060 mutex_enter(&zev_mutex); 1061 if ((req_q = ddi_get_soft_state(statep, minor)) == NULL) { 1062 mutex_exit(&zev_mutex); 1063 return (ENXIO); 1064 } 1065 zev_queue_hold(req_q); 1066 mutex_exit(&zev_mutex); 1067 /* 1068 * all structures passed between kernel and userspace 1069 * are now compatible between 64 and 32 bit. Model 1070 * conversion can be ignored. 1071 */ 1072 switch (cmd) { 1073 case ZEV_IOC_GET_GLOBAL_STATISTICS: 1074 /* ddi_copyout() can take a long time. Better make 1075 a copy to be able to release the mutex faster. */ 1076 mutex_enter(&zev_mutex); 1077 (void) memcpy(&zs, &zev_statistics, sizeof(zs)); 1078 mutex_exit(&zev_mutex); 1079 if (ddi_copyout(&zs, (void *)arg, sizeof(zs), mode) != 0) 1080 ret = EFAULT; 1081 break; 1082 case ZEV_IOC_GET_QUEUE_STATISTICS: 1083 ret = zev_ioc_get_queue_statistics(req_q, arg, mode); 1084 break; 1085 case ZEV_IOC_MUTE_POOL: 1086 case ZEV_IOC_UNMUTE_POOL: 1087 if (ddi_copyin((void *)arg, &pa, sizeof(pa), mode) != 0) { 1088 ret = EFAULT; 1089 break; 1090 } 1091 if (pa.zev_poolname_len >=MAXPATHLEN) { 1092 ret = EINVAL; 1093 break; 1094 } 1095 pa.zev_poolname[pa.zev_poolname_len] = '\0'; 1096 if (cmd == ZEV_IOC_MUTE_POOL) { 1097 ret = zev_ioc_mute_pool(pa.zev_poolname); 1098 } else { 1099 ret = zev_ioc_unmute_pool(pa.zev_poolname); 1100 } 1101 break; 1102 case ZEV_IOC_SET_MAX_QUEUE_LEN: 1103 ret = zev_ioc_set_max_queue_len(req_q, arg, mode); 1104 break; 1105 case ZEV_IOC_GET_QUEUE_PROPERTIES: 1106 ret = zev_ioc_get_queue_properties(req_q, arg, mode); 1107 break; 1108 case ZEV_IOC_SET_QUEUE_PROPERTIES: 1109 ret = zev_ioc_set_queue_properties(req_q, arg, mode); 1110 break; 1111 case ZEV_IOC_MARK: 1112 if (ddi_copyin((void *)arg, &mark, sizeof(mark), mode) != 0) { 1113 ret = EFAULT; 1114 break; 1115 } 1116 /* prepare message */ 1117 msg_size = sizeof(*rec) + mark.zev_payload_len + 1; 1118 msg = zev_alloc(sizeof(*msg) + msg_size); 1119 msg->size = msg_size; 1120 rec = (zev_mark_t *)(msg + 1); 1121 rec->record_len = msg_size; 1122 rec->op = ZEV_OP_MARK; 1123 rec->op_time = ddi_get_time(); 1124 rec->guid = mark.zev_guid; 1125 rec->payload_len = mark.zev_payload_len; 1126 /* get payload */ 1127 if (ddi_copyin(((char *)arg) + sizeof(mark), 1128 ZEV_PAYLOAD(rec), 1129 mark.zev_payload_len, mode) != 0) { 1130 zev_free(msg, msg_size); 1131 ret = EFAULT; 1132 break; 1133 } 1134 *(ZEV_PAYLOAD(rec) + mark.zev_payload_len) = '\0'; 1135 /* get mark id and queue message */ 1136 mutex_enter(&zev_mark_id_mutex); 1137 mark_id = zev_mark_id++; 1138 mutex_exit(&zev_mark_id_mutex); 1139 rec->mark_id = mark_id; 1140 zev_queue_message(ZEV_OP_MARK, msg); 1141 /* report mark id to userland, ignore errors */ 1142 mark.zev_mark_id = mark_id; 1143 ddi_copyout(&mark, (void *)arg, sizeof(mark), mode); 1144 break; 1145 case ZEV_IOC_ADD_QUEUE: 1146 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1147 ret = EACCES; 1148 break; 1149 } 1150 ret = zev_ioc_add_queue(req_q, arg, mode); 1151 break; 1152 case ZEV_IOC_REMOVE_QUEUE: 1153 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1154 ret = EACCES; 1155 break; 1156 } 1157 ret = zev_ioc_remove_queue(req_q, arg, mode); 1158 break; 1159 case ZEV_IOC_GET_DEBUG_INFO: 1160 ret = zev_ioc_get_debug_info(req_q, arg, mode); 1161 break; 1162 case ZEV_IOC_GET_QUEUE_LIST: 1163 ret = zev_ioc_get_queue_list(req_q, arg, mode); 1164 break; 1165 case ZEV_IOC_GET_FILE_SIGNATURES: 1166 ret = zev_ioc_get_signatures(arg, mode); 1167 break; 1168 case ZEV_IOC_GET_ZEV_VERSION: 1169 ret = zev_ioc_get_zev_version(arg, mode); 1170 break; 1171 default: 1172 /* generic "ioctl unknown" error */ 1173 ret = ENOTTY; 1174 } 1175 1176 mutex_enter(&zev_mutex); 1177 zev_queue_release(req_q); 1178 mutex_exit(&zev_mutex); 1179 if (ret) 1180 SET_ERROR(ret); 1181 return (ret); 1182 } 1183 1184 static int 1185 zev_chpoll(dev_t dev, short events, int anyyet, 1186 short *reventsp, struct pollhead **phpp) 1187 { 1188 int minor; 1189 short revent = 0; 1190 zev_queue_t *q; 1191 1192 /* use minor-specific queue context and it's pollhead */ 1193 minor = getminor(dev); 1194 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1195 return (EINVAL); 1196 mutex_enter(&zev_mutex); 1197 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1198 mutex_exit(&zev_mutex); 1199 return (ENXIO); 1200 } 1201 revent = 0; 1202 if ((events & POLLIN)) { 1203 if (q->zq_oldest) 1204 revent |= POLLIN; 1205 } 1206 if (revent == 0) { 1207 if (!anyyet) { 1208 *phpp = &q->zq_pollhead; 1209 } 1210 } 1211 *reventsp = revent; 1212 mutex_exit(&zev_mutex); 1213 return (0); 1214 } 1215 1216 /* ARGSUSED */ 1217 static int 1218 zev_read(dev_t dev, struct uio *uio_p, cred_t *crep_p) 1219 { 1220 minor_t minor; 1221 offset_t off; 1222 int ret = 0; 1223 zev_msg_t *msg; 1224 char *data; 1225 zev_queue_t *q; 1226 1227 minor = getminor(dev); 1228 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1229 return (EINVAL); 1230 1231 mutex_enter(&zev_mutex); 1232 q = ddi_get_soft_state(statep, minor); 1233 if (q == NULL) { 1234 mutex_exit(&zev_mutex); 1235 return (ENXIO); 1236 } 1237 off = uio_p->uio_loffset; 1238 msg = q->zq_oldest; 1239 while (msg == NULL) { 1240 if (!ddi_can_receive_sig()) { 1241 /* 1242 * read() shouldn't block because this thread 1243 * can't receive signals. (e.g., it might be 1244 * torn down by exit() right now.) 1245 */ 1246 mutex_exit(&zev_mutex); 1247 return 0; 1248 } 1249 if (cv_wait_sig(&q->zq_condvar, &zev_mutex) == 0) { 1250 /* signal received. */ 1251 mutex_exit(&zev_mutex); 1252 return EINTR; 1253 } 1254 msg = q->zq_oldest; 1255 } 1256 if (msg->size > uio_p->uio_resid) { 1257 mutex_exit(&zev_mutex); 1258 return E2BIG; 1259 } 1260 while (msg && uio_p->uio_resid >= msg->size) { 1261 data = (char *)(msg + 1); 1262 ret = uiomove(data, msg->size, UIO_READ, uio_p); 1263 if (ret != 0) { 1264 mutex_exit(&zev_mutex); 1265 cmn_err(CE_WARN, "zev: uiomove failed; messages lost"); 1266 uio_p->uio_loffset = off; 1267 return (ret); 1268 } 1269 q->zq_oldest = msg->next; 1270 q->zq_bytes_read += msg->size; 1271 q->zq_queue_len -= msg->size; 1272 q->zq_queue_messages--; 1273 msg->read++; 1274 msg = q->zq_oldest; 1275 } 1276 zev_queue_trim(); 1277 cv_broadcast(&zev_condvar); 1278 mutex_exit(&zev_mutex); 1279 uio_p->uio_loffset = off; 1280 return 0; 1281 } 1282 1283 /* ARGSUSED */ 1284 static int 1285 zev_close(dev_t dev, int flag, int otyp, cred_t *crepd) 1286 { 1287 zev_queue_t *q; 1288 int minor; 1289 1290 minor = getminor(dev); 1291 if (otyp != OTYP_CHR) 1292 return (EINVAL); 1293 mutex_enter(&zev_mutex); 1294 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1295 mutex_exit(&zev_mutex); 1296 return (ENXIO); 1297 } 1298 if (q->zq_busy != B_TRUE) { 1299 mutex_exit(&zev_mutex); 1300 return (EINVAL); 1301 } 1302 q->zq_busy = B_FALSE; 1303 if ((q->zq_flags & ZEV_FL_PERSISTENT) == 0) 1304 zev_queue_release(q); 1305 mutex_exit(&zev_mutex); 1306 return (0); 1307 } 1308 1309 /* ARGSUSED */ 1310 static int 1311 zev_open(dev_t *devp, int flag, int otyp, cred_t *credp) 1312 { 1313 zev_queue_t *q; 1314 minor_t minor; 1315 1316 minor = getminor(*devp); 1317 if (otyp != OTYP_CHR) 1318 return (EINVAL); 1319 if (drv_priv(credp) != 0) 1320 return (EPERM); 1321 mutex_enter(&zev_mutex); 1322 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1323 mutex_exit(&zev_mutex); 1324 return (ENXIO); 1325 } 1326 if (minor == ZEV_CONTROL_DEVICE_MINOR) { 1327 /* control device may be used in parallel */ 1328 q->zq_busy = B_TRUE; 1329 mutex_exit(&zev_mutex); 1330 return 0; 1331 } 1332 if (q->zq_busy == B_TRUE) { 1333 mutex_exit(&zev_mutex); 1334 return (EBUSY); 1335 } 1336 q->zq_busy = B_TRUE; /* can only be opened exclusively */ 1337 mutex_exit(&zev_mutex); 1338 return (0); 1339 } 1340 1341 static struct cb_ops zev_cb_ops = { 1342 zev_open, /* open */ 1343 zev_close, /* close */ 1344 nodev, /* strategy */ 1345 nodev, /* print */ 1346 nodev, /* dump */ 1347 zev_read, /* read */ 1348 nodev, /* write */ 1349 zev_ioctl, /* ioctl */ 1350 nodev, /* devmap */ 1351 nodev, /* mmap */ 1352 nodev, /* segmap */ 1353 zev_chpoll, /* chpoll */ 1354 ddi_prop_op, /* prop_op */ 1355 NULL, /* streamtab */ 1356 D_MP | D_64BIT, /* cb_flag */ 1357 CB_REV, /* cb_rev */ 1358 nodev, /* aread */ 1359 nodev, /* awrite */ 1360 }; 1361 1362 static void 1363 zev_free_instance(dev_info_t *dip) 1364 { 1365 int instance; 1366 zev_queue_t *q; 1367 int i; 1368 1369 instance = ddi_get_instance(dip); 1370 if (instance != 0) { 1371 cmn_err(CE_WARN, "zev: tried to free instance != 0 (%d)", 1372 instance); 1373 return; 1374 } 1375 1376 ddi_remove_minor_node(dip, NULL); 1377 1378 /* stop pollwakeup thread */ 1379 zev_wakeup_thread_run = 0; 1380 if (zev_poll_wakeup_thread != NULL) { 1381 thread_join(zev_poll_wakeup_thread->t_did); 1382 zev_poll_wakeup_thread = NULL; 1383 } 1384 1385 mutex_enter(&zev_mutex); 1386 1387 /* remove "ctrl" dummy queue */ 1388 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1389 if (q) { 1390 ddi_soft_state_free(statep, ZEV_CONTROL_DEVICE_MINOR); 1391 ZEV_MEM_SUB(sizeof(zev_queue_t)); 1392 } 1393 1394 /* remove all other queues */ 1395 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1396 q = zev_queues[i- ZEV_MINOR_MIN]; 1397 if (!q) 1398 continue; 1399 ASSERT(q->zq_refcnt == 1); 1400 zev_queue_release(q); 1401 } 1402 zev_queue_trim(); 1403 bzero(&zev_queues, sizeof(zev_queues)); 1404 1405 mutex_exit(&zev_mutex); 1406 1407 } 1408 1409 static int 1410 zev_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1411 { 1412 int instance; 1413 zev_queue_t *q; 1414 1415 /* called once per instance with DDI_DETACH, 1416 may be called to suspend */ 1417 switch (cmd) { 1418 case DDI_DETACH: 1419 /* instance busy? */ 1420 instance = ddi_get_instance(dip); 1421 if (instance != 0) { /* hardcoded in zev.conf */ 1422 /* this module only supports one instance. */ 1423 return (DDI_FAILURE); 1424 } 1425 1426 mutex_enter(&zev_mutex); 1427 if (!zev_attached) { 1428 mutex_exit(&zev_mutex); 1429 return (DDI_FAILURE); 1430 } 1431 1432 /* check "ctrl" queue to see if t is busy */ 1433 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1434 if (q == NULL) { 1435 mutex_exit(&zev_mutex); 1436 return (DDI_FAILURE); 1437 } 1438 if (q->zq_busy) { 1439 mutex_exit(&zev_mutex); 1440 return (DDI_FAILURE); 1441 } 1442 /* are there any queues? */ 1443 if (zev_queue_cnt > 0) { 1444 mutex_exit(&zev_mutex); 1445 return (DDI_FAILURE); 1446 } 1447 1448 zev_attached = B_FALSE; 1449 mutex_exit(&zev_mutex); 1450 1451 /* switch ZFS event callbacks back to default */ 1452 rw_enter(&rz_zev_rwlock, RW_WRITER); 1453 rz_zev_callbacks = rz_zev_default_callbacks; 1454 rz_zev_set_active(B_FALSE); 1455 rw_exit(&rz_zev_rwlock); 1456 1457 /* no thread is inside of the callbacks anymore. */ 1458 1459 /* free resources allocated for this instance */ 1460 zev_free_instance(dip); 1461 zev_chksum_fini(); 1462 #if 0 1463 cmn_err(CE_WARN, "zev: allocated memory at detach: %" PRIu64, 1464 zev_memory_allocated - zev_memory_freed); 1465 #endif 1466 return (DDI_SUCCESS); 1467 case DDI_SUSPEND: 1468 /* kernel must not suspend zev devices while ZFS is running */ 1469 return (DDI_FAILURE); 1470 default: 1471 return (DDI_FAILURE); 1472 } 1473 } 1474 1475 static int 1476 zev_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1477 { 1478 /* called once per instance with DDI_ATTACH, 1479 may be called to resume */ 1480 int instance; 1481 int error; 1482 zev_queue_t *q; 1483 switch (cmd) { 1484 case DDI_ATTACH: 1485 /* create instance state */ 1486 instance = ddi_get_instance(dip); 1487 if (instance != 0) { /* hardcoded in zev.conf */ 1488 /* this module only supports one instance. */ 1489 return (DDI_FAILURE); 1490 } 1491 1492 mutex_enter(&zev_mutex); 1493 if (zev_attached) { 1494 mutex_exit(&zev_mutex); 1495 return (DDI_FAILURE); 1496 } 1497 if (ddi_soft_state_zalloc(statep, ZEV_CONTROL_DEVICE_MINOR) != 1498 DDI_SUCCESS) { 1499 mutex_exit(&zev_mutex); 1500 return (DDI_FAILURE); 1501 } 1502 ZEV_MEM_ADD(sizeof(zev_queue_t)); 1503 zev_attached = B_TRUE; 1504 1505 /* init queue list */ 1506 bzero(&zev_queues, sizeof(zev_queues)); 1507 mutex_exit(&zev_mutex); 1508 1509 /* create a dummy queue for management of "ctrl" */ 1510 1511 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1512 q->zq_dip = dip; 1513 q->zq_refcnt = 1; 1514 q->zq_busy = B_FALSE; 1515 q->zq_minor_number = ZEV_CONTROL_DEVICE_MINOR; 1516 q->zq_flags = ZEV_FL_PERSISTENT; 1517 strcpy(q->zq_name, ZEV_CONTROL_DEVICE_NAME); 1518 1519 /* create device node for "ctrl" */ 1520 if (ddi_create_minor_node(dip, ZEV_CONTROL_DEVICE_NAME, 1521 S_IFCHR, ZEV_CONTROL_DEVICE_MINOR, 1522 DDI_PSEUDO, 0) == DDI_FAILURE) { 1523 goto fail; 1524 } 1525 1526 /* note: intentionally not adding ctrl queue to queue list. */ 1527 1528 /* default queue */ 1529 error = zev_queue_new(&q, dip, 1530 ZEV_DEFAULT_QUEUE_NAME, 1531 ZEV_MAX_QUEUE_LEN, 1532 ZEV_FL_BLOCK_WHILE_QUEUE_FULL| 1533 ZEV_FL_PERSISTENT); 1534 if (error) 1535 goto fail; 1536 1537 /* start pollwakeup thread */ 1538 zev_wakeup_thread_run = 1; 1539 zev_poll_wakeup_thread = thread_create(NULL, 0, 1540 zev_poll_wakeup_thread_main, NULL, 0, &p0, 1541 TS_RUN, minclsyspri); 1542 1543 ddi_report_dev(dip); 1544 1545 zev_chksum_init(); 1546 1547 /* switch ZFS event callbacks to zev module callbacks */ 1548 rw_enter(&rz_zev_rwlock, RW_WRITER); 1549 rz_zev_callbacks = &zev_callbacks; 1550 rz_zev_set_active(B_TRUE); 1551 rw_exit(&rz_zev_rwlock); 1552 1553 return (DDI_SUCCESS); 1554 case DDI_RESUME: 1555 /* suspendeding zev devices should never happen */ 1556 return (DDI_SUCCESS); 1557 default: 1558 return (DDI_FAILURE); 1559 } 1560 fail: 1561 cmn_err(CE_WARN, "zev: attach failed"); 1562 zev_free_instance(dip); 1563 mutex_enter(&zev_mutex); 1564 zev_attached = B_FALSE; 1565 mutex_exit(&zev_mutex); 1566 return (DDI_FAILURE); 1567 } 1568 1569 /* ARGSUSED */ 1570 static int 1571 zev_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp) 1572 { 1573 minor_t minor; 1574 zev_queue_t *q; 1575 1576 /* arg is dev_t */ 1577 minor = getminor((dev_t)arg); 1578 mutex_enter(&zev_mutex); 1579 q = ddi_get_soft_state(statep, minor); 1580 if (q == NULL) { 1581 *resultp = NULL; 1582 mutex_exit(&zev_mutex); 1583 return (DDI_FAILURE); 1584 } 1585 1586 switch (infocmd) { 1587 case DDI_INFO_DEVT2DEVINFO: 1588 *resultp = q->zq_dip; 1589 break; 1590 case DDI_INFO_DEVT2INSTANCE: 1591 *resultp = (void *)(uintptr_t)ddi_get_instance(q->zq_dip); 1592 break; 1593 default: 1594 mutex_exit(&zev_mutex); 1595 return (DDI_FAILURE); 1596 } 1597 mutex_exit(&zev_mutex); 1598 return (DDI_SUCCESS); 1599 } 1600 1601 static struct dev_ops zev_dev_ops = { 1602 DEVO_REV, /* driver build revision */ 1603 0, /* driver reference count */ 1604 zev_getinfo, /* getinfo */ 1605 nulldev, /* identify (obsolete) */ 1606 nulldev, /* probe (search for devices) */ 1607 zev_attach, /* attach */ 1608 zev_detach, /* detach */ 1609 nodev, /* reset (obsolete, use quiesce) */ 1610 &zev_cb_ops, /* character and block device ops */ 1611 NULL, /* bus driver ops */ 1612 NULL, /* power management, not needed */ 1613 ddi_quiesce_not_needed, /* quiesce */ 1614 }; 1615 1616 static struct modldrv zev_modldrv = { 1617 &mod_driverops, /* all loadable modules use this */ 1618 "ZFS event provider, v" 1619 XSTRING(ZEV_MAJOR_VERSION) "." 1620 XSTRING(ZEV_MINOR_VERSION), 1621 /* driver name and version info */ 1622 &zev_dev_ops /* ops method pointers */ 1623 }; 1624 1625 static struct modlinkage zev_modlinkage = { 1626 MODREV_1, /* fixed value */ 1627 { 1628 &zev_modldrv, /* driver linkage structure */ 1629 NULL /* list terminator */ 1630 } 1631 }; 1632 1633 int 1634 _init(void) 1635 { 1636 int error; 1637 1638 if ((error = ddi_soft_state_init(&statep, sizeof(zev_queue_t), 1)) != 0) 1639 return (error); 1640 zev_attached = B_FALSE; 1641 1642 zev_queue_head = NULL; 1643 zev_queue_tail = NULL; 1644 zev_queue_len = 0; 1645 zev_muted_pools_head = NULL; 1646 zev_memory_allocated = 0; 1647 zev_memory_freed = 0; 1648 zev_queue_cnt = 0; 1649 zev_have_blocking_queues = 1; 1650 1651 mutex_init(&zev_mutex, NULL, MUTEX_DRIVER, NULL); 1652 cv_init(&zev_condvar, NULL, CV_DRIVER, NULL); 1653 rw_init(&zev_pool_list_rwlock, NULL, RW_DRIVER, NULL); 1654 mutex_init(&zev_mark_id_mutex, NULL, MUTEX_DRIVER, NULL); 1655 zev_mark_id = gethrtime(); 1656 mutex_init(&zev_queue_msg_mutex, NULL, MUTEX_DRIVER, NULL); 1657 zev_msg_sequence_number = gethrtime(); 1658 bzero(&zev_statistics, sizeof(zev_statistics)); 1659 bzero(&zev_pollhead, sizeof(zev_pollhead)); 1660 bzero(&zev_queues, sizeof(zev_queues)); 1661 zev_statistics.zev_max_queue_len = ZEV_MAX_QUEUE_LEN; 1662 if (zev_ioc_mute_pool("zg0")) { 1663 cmn_err(CE_WARN, "zev: could not init mute list"); 1664 goto FAIL; 1665 } 1666 1667 if ((error = mod_install(&zev_modlinkage)) != 0) { 1668 cmn_err(CE_WARN, "zev: could not install module"); 1669 goto FAIL; 1670 } 1671 1672 return (0); 1673 FAIL: 1674 /* free resources */ 1675 cmn_err(CE_WARN, "zev: _init failed"); 1676 mutex_destroy(&zev_mutex); 1677 ddi_soft_state_fini(&statep); 1678 return (error); 1679 } 1680 1681 int 1682 _info(struct modinfo *modinfop) 1683 { 1684 return (mod_info(&zev_modlinkage, modinfop)); 1685 } 1686 1687 int 1688 _fini(void) 1689 { 1690 int error = 0; 1691 zev_msg_t *msg; 1692 zev_pool_list_entry_t *pe, *npe; 1693 1694 mutex_enter(&zev_mutex); 1695 if (zev_attached == B_TRUE) { 1696 mutex_exit(&zev_mutex); 1697 return (SET_ERROR(EBUSY)); 1698 } 1699 if (zev_queue_cnt != 0) { 1700 /* should never happen */ 1701 mutex_exit(&zev_mutex); 1702 return (SET_ERROR(EBUSY)); 1703 } 1704 1705 /* 1706 * avoid deadlock if event list is full: make sure threads currently 1707 * blocking on the event list can append their event and then release 1708 * rz_zev_rwlock. Since there should be no queues left when we 1709 * reach this point we can simply empty the event list and then 1710 * wake everybody. 1711 */ 1712 while (zev_queue_head) { 1713 msg = zev_queue_head; 1714 zev_queue_head = msg->next; 1715 zev_free(msg, sizeof(*msg) + msg->size); 1716 } 1717 cv_broadcast(&zev_condvar); 1718 mutex_exit(&zev_mutex); 1719 1720 /* switch ZFS event callbacks back to default (again) */ 1721 rw_enter(&rz_zev_rwlock, RW_WRITER); 1722 rz_zev_callbacks = rz_zev_default_callbacks; 1723 rz_zev_set_active(B_FALSE); 1724 rw_exit(&rz_zev_rwlock); 1725 1726 /* no thread is inside of the callbacks anymore. Safe to remove. */ 1727 1728 /* unload module callbacks */ 1729 if ((error = mod_remove(&zev_modlinkage)) != 0) { 1730 cmn_err(CE_WARN, "mod_remove failed: %d", error); 1731 return (error); 1732 } 1733 1734 /* free resources */ 1735 mutex_enter(&zev_mutex); 1736 while (zev_queue_head) { 1737 msg = zev_queue_head; 1738 zev_queue_head = msg->next; 1739 zev_free(msg, sizeof(*msg) + msg->size); 1740 } 1741 mutex_exit(&zev_mutex); 1742 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 1743 pe = zev_muted_pools_head; 1744 while (pe) { 1745 npe = pe; 1746 pe = pe->next; 1747 zev_free(npe, sizeof(*npe)); 1748 } 1749 rw_exit(&zev_pool_list_rwlock); 1750 ddi_soft_state_fini(&statep); 1751 rw_destroy(&zev_pool_list_rwlock); 1752 cv_destroy(&zev_condvar); 1753 mutex_destroy(&zev_mutex); 1754 mutex_destroy(&zev_mark_id_mutex); 1755 mutex_destroy(&zev_queue_msg_mutex); 1756 1757 return (0); 1758 } 1759 1760