1 #include <sys/modctl.h> 2 #include <sys/ddi.h> 3 #include <sys/sunddi.h> 4 #include <sys/conf.h> 5 #include <sys/devops.h> 6 #include <sys/stat.h> 7 #include <sys/fs/zev.h> 8 #include <sys/zev_callbacks.h> 9 #include <sys/zev_checksums.h> 10 #include <sys/zfs_znode.h> 11 #include <sys/time.h> 12 #include <sys/sa.h> 13 #include <sys/zap.h> 14 #include <sys/time.h> 15 #include <sys/fs/dv_node.h> 16 17 #define OFFSETOF(s, m) ((size_t)(&(((s *)0)->m))) 18 19 #define XSTRING(x) STRING(x) 20 #define STRING(x) #x 21 22 #define ZEV_DEFAULT_QUEUE_NAME "beaver" 23 #define ZEV_CONTROL_DEVICE_MINOR 0 24 #define ZEV_MINOR_MIN (ZEV_CONTROL_DEVICE_MINOR + 1) 25 #define ZEV_MINOR_MAX (ZEV_MINOR_MIN + ZEV_MAX_QUEUES - 1) 26 27 typedef struct zev_queue { 28 char zq_name[ZEV_MAX_QUEUE_NAME_LEN+1]; 29 minor_t zq_minor_number; 30 dev_info_t *zq_dip; 31 struct pollhead zq_pollhead; 32 uint64_t zq_bytes_read; 33 uint64_t zq_events_read; 34 uint64_t zq_bytes_discarded; 35 uint64_t zq_events_discarded; 36 uint64_t zq_bytes_total; 37 uint64_t zq_events_total; 38 uint64_t zq_wakeup_threshold; 39 uint16_t zq_flags; 40 uint16_t zq_need_wakeup; 41 /* protected by zev_mutex */ 42 int zq_refcnt; 43 uint64_t zq_queue_len; 44 uint64_t zq_queue_messages; 45 uint64_t zq_max_queue_len; 46 zev_msg_t *zq_oldest; 47 boolean_t zq_busy; 48 boolean_t zq_to_be_removed; 49 zev_statistics_t zq_statistics; 50 kcondvar_t zq_condvar; 51 } zev_queue_t; 52 53 static void *statep; 54 struct pollhead zev_pollhead; 55 56 kmutex_t zev_mutex; 57 kcondvar_t zev_condvar; 58 kmutex_t zev_queue_msg_mutex; 59 krwlock_t zev_pool_list_rwlock; 60 static zev_statistics_t zev_statistics; 61 static boolean_t zev_attached; 62 static kmutex_t zev_mark_id_mutex; 63 static uint64_t zev_mark_id = 0; 64 65 static uint64_t zev_msg_sequence_number = 0; 66 static zev_queue_t *zev_queues[ZEV_MAX_QUEUES]; 67 static int zev_queue_cnt = 0; 68 static int zev_have_blocking_queues = 1; 69 70 uint64_t zev_memory_allocated = 0; 71 uint64_t zev_memory_freed = 0; 72 73 /* 74 * The longest potential message is from zev_zfs_mount() and 75 * contains the mountpoint, which might be close to MAXPATHLEN bytes long. 76 * 77 * Another candidate is zev_znode_rename_cb() and contains three inode 78 * numbers and two filenames of up to MAXNAMELEN bytes each. 79 */ 80 #define ZEV_MAX_MESSAGE_LEN 4096 81 82 static zev_msg_t *zev_queue_head = NULL; 83 static zev_msg_t *zev_queue_tail = NULL; 84 static uint64_t zev_queue_len = 0; 85 86 87 typedef struct zev_pool_list_entry { 88 struct zev_pool_list_entry *next; 89 char name[MAXPATHLEN]; 90 } zev_pool_list_entry_t; 91 92 static zev_pool_list_entry_t *zev_muted_pools_head = NULL; 93 94 static volatile int zev_wakeup_thread_run = 1; 95 static kthread_t *zev_poll_wakeup_thread = NULL; 96 97 void * 98 zev_alloc(ssize_t sz) 99 { 100 ZEV_MEM_ADD(sz); 101 return kmem_alloc(sz, KM_SLEEP); 102 } 103 104 void * 105 zev_zalloc(ssize_t sz) 106 { 107 ZEV_MEM_ADD(sz); 108 return kmem_zalloc(sz, KM_SLEEP); 109 } 110 111 void 112 zev_free(void *ptr, ssize_t sz) 113 { 114 ZEV_MEM_SUB(sz); \ 115 kmem_free(ptr, sz); 116 } 117 118 /* must be called with zev_mutex held */ 119 static void 120 zev_update_blockflag(void) 121 { 122 zev_queue_t *q; 123 int had_blocking_queues; 124 int i; 125 126 had_blocking_queues = zev_have_blocking_queues; 127 128 /* do we still have blocking queues? */ 129 zev_have_blocking_queues = 0; 130 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 131 q = zev_queues[i - ZEV_MINOR_MIN]; 132 if (!q) 133 continue; 134 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) { 135 zev_have_blocking_queues = 1; 136 break; 137 } 138 } 139 /* no blocking queues */ 140 if (had_blocking_queues) 141 cv_broadcast(&zev_condvar); 142 } 143 144 int 145 zev_queue_cmp(const void *a, const void *b) 146 { 147 const zev_queue_t *qa = a; 148 const zev_queue_t *qb = b; 149 if (qa->zq_minor_number > qb->zq_minor_number) 150 return 1; 151 if (qa->zq_minor_number < qb->zq_minor_number) 152 return -1; 153 return 0; 154 } 155 156 /* must be called with zev_mutex held */ 157 void 158 zev_queue_trim(void) 159 { 160 zev_msg_t *m; 161 uint64_t oldest_message; 162 zev_queue_t *q; 163 int i; 164 165 if (!zev_queue_tail) 166 return; 167 168 oldest_message = zev_queue_tail->seq + 1; /* does not exist, yet. */ 169 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 170 q = zev_queues[i - ZEV_MINOR_MIN]; 171 if (q == NULL) 172 continue; 173 if (!q->zq_oldest) 174 continue; 175 if (oldest_message > q->zq_oldest->seq) 176 oldest_message = q->zq_oldest->seq; 177 } 178 179 /* remove msgs between oldest_message and zev_queue_head */ 180 while(zev_queue_head && (oldest_message > zev_queue_head->seq)) { 181 m = zev_queue_head; 182 zev_queue_head = m->next; 183 if (zev_queue_head == NULL) { 184 zev_queue_tail = NULL; 185 } else { 186 zev_queue_head->prev = NULL; 187 } 188 if (m->read == 0) { 189 zev_statistics.zev_bytes_discarded += m->size; 190 zev_statistics.zev_cnt_discarded_events++; 191 } 192 zev_statistics.zev_queue_len -= m->size; 193 zev_queue_len--; 194 zev_free(m, sizeof(*m) + m->size); 195 } 196 } 197 198 /* must be called with zev_mutex held */ 199 static void 200 zev_queue_hold(zev_queue_t *q) 201 { 202 q->zq_refcnt++; 203 } 204 205 /* must be called with zev_mutex held */ 206 static void 207 zev_queue_release(zev_queue_t *q) 208 { 209 q->zq_refcnt--; 210 if (q->zq_refcnt > 0) 211 return; 212 213 ASSERT(q->zq_busy == B_FALSE); 214 215 /* persistent queues will not be removed */ 216 if ((q->zq_flags & ZEV_FL_PERSISTENT) != 0) 217 return; 218 219 /* remove queue from queue list */ 220 zev_queues[q->zq_minor_number - ZEV_MINOR_MIN] = NULL; 221 222 /* discard messages that no queue references anymore */ 223 zev_queue_trim(); 224 225 cv_destroy(&q->zq_condvar); 226 ddi_remove_minor_node(q->zq_dip, q->zq_name); 227 devfs_clean(q->zq_dip, NULL, 0); 228 ddi_soft_state_free(statep, q->zq_minor_number); 229 ZEV_MEM_SUB(sizeof(zev_queue_t)); 230 zev_queue_cnt--; 231 zev_update_blockflag(); 232 } 233 234 int 235 zev_queue_new(zev_queue_t **queue, 236 dev_info_t *dip, 237 char *name, 238 uint64_t max_queue_len, 239 uint16_t flags) 240 { 241 zev_queue_t *q; 242 zev_queue_t *tmp; 243 zev_msg_t *msg; 244 int name_exists = 0; 245 minor_t minor; 246 char *p; 247 int i; 248 249 if (max_queue_len > ZEV_MAX_QUEUE_LEN) 250 return EINVAL; 251 if (max_queue_len == 0) 252 max_queue_len = ZEV_MAX_QUEUE_LEN; 253 if (!strcmp(name, ZEV_CONTROL_DEVICE_NAME)) 254 return EINVAL; 255 for (p = name; *p; p++) { 256 if (*p >= 'a' && *p <= 'z') 257 continue; 258 if (*p >= '0' && *p <= '9') 259 continue; 260 if (*p == '.') 261 continue; 262 return EINVAL; 263 } 264 265 mutex_enter(&zev_mutex); 266 267 /* find free minor number.*/ 268 /* if this were a frequent operation we'd have a free-minor list */ 269 for (minor = ZEV_MINOR_MIN; minor <= ZEV_MINOR_MAX; minor++) { 270 tmp = zev_queues[minor - ZEV_MINOR_MIN]; 271 if (tmp == NULL) 272 break; 273 } 274 if (tmp) { 275 mutex_exit(&zev_mutex); 276 return ENOSPC; 277 } 278 279 if (ddi_soft_state_zalloc(statep, minor) != DDI_SUCCESS) { 280 mutex_exit(&zev_mutex); 281 return ENOSPC; 282 } 283 ZEV_MEM_ADD(sizeof(zev_queue_t)); 284 285 q = ddi_get_soft_state(statep, minor); 286 memset(q, 0, sizeof(*q)); 287 strncpy(q->zq_name, name, ZEV_MAX_QUEUE_NAME_LEN); 288 q->zq_name[ZEV_MAX_QUEUE_NAME_LEN] = '\0'; 289 q->zq_max_queue_len = max_queue_len; 290 q->zq_wakeup_threshold = ZEV_DEFAULT_POLL_WAKEUP_QUEUE_LEN; 291 q->zq_flags = flags; 292 q->zq_refcnt = 1; 293 q->zq_dip = dip; 294 q->zq_minor_number = minor; 295 cv_init(&q->zq_condvar, NULL, CV_DRIVER, NULL); 296 297 /* insert into queue list */ 298 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 299 /* if this were a frequent operation we'd have a name tree */ 300 if (zev_queues[i - ZEV_MINOR_MIN] == NULL) 301 continue; 302 if (!strcmp(q->zq_name, zev_queues[i-ZEV_MINOR_MIN]->zq_name)) { 303 name_exists = 1; 304 break; 305 } 306 } 307 if (name_exists) { 308 ddi_soft_state_free(statep, minor); 309 ZEV_MEM_SUB(sizeof(zev_queue_t)); 310 mutex_exit(&zev_mutex); 311 return EEXIST; 312 } 313 zev_queues[minor - ZEV_MINOR_MIN] = q; 314 zev_queue_cnt++; 315 316 /* calculate current queue len and find head and tail */ 317 if (!(q->zq_flags & ZEV_FL_INITIALLY_EMPTY)) { 318 q->zq_oldest = zev_queue_tail; 319 msg = zev_queue_tail; 320 while ((msg) && (q->zq_queue_len < q->zq_max_queue_len)) { 321 q->zq_queue_len += msg->size; 322 q->zq_queue_messages++; 323 q->zq_oldest = msg; 324 msg = msg->prev; 325 } 326 } 327 328 zev_update_blockflag(); 329 330 mutex_exit(&zev_mutex); 331 332 if (ddi_create_minor_node(dip, name, 333 S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 334 mutex_enter(&zev_mutex); 335 zev_queues[minor - ZEV_MINOR_MIN] = NULL; 336 zev_queue_cnt--; 337 ddi_soft_state_free(statep, minor); 338 ZEV_MEM_SUB(sizeof(zev_queue_t)); 339 zev_update_blockflag(); 340 mutex_exit(&zev_mutex); 341 return EFAULT; 342 } 343 344 *queue = q; 345 return 0; 346 } 347 348 /* 349 * poll() wakeup thread. Used to check periodically whether we have 350 * bytes left in the queue that have not yet been made into a 351 * pollwakeup() call. This is meant to insure a maximum waiting 352 * time until an event is presented as a poll wakeup, while at 353 * the same time not making every single event into a poll wakeup 354 * of it's own. 355 */ 356 357 static void 358 zev_poll_wakeup(boolean_t flush_all) 359 { 360 zev_queue_t *q; 361 int i; 362 363 /* 364 * This loop works with hold() and release() because 365 * pollwakeup() requires us to release our locks before calling it. 366 * 367 * from pollwakeup(9F): 368 * 369 * "Driver defined locks should not be held across calls 370 * to this function." 371 */ 372 373 /* wake up threads for each individual queue */ 374 mutex_enter(&zev_mutex); 375 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 376 q = zev_queues[i - ZEV_MINOR_MIN]; 377 if (q == NULL) 378 continue; 379 if (!q->zq_busy) 380 continue; 381 if (!q->zq_queue_len) 382 continue; 383 if ((flush_all) || 384 (q->zq_queue_len > q->zq_wakeup_threshold)) { 385 zev_queue_hold(q); 386 mutex_exit(&zev_mutex); 387 pollwakeup(&q->zq_pollhead, POLLIN); 388 mutex_enter(&zev_mutex); 389 zev_queue_release(q); 390 } 391 } 392 mutex_exit(&zev_mutex); 393 } 394 395 static void 396 zev_poll_wakeup_thread_main(void) 397 { 398 while (zev_wakeup_thread_run) { 399 delay(drv_usectohz(100 * 1000)); /* sleep 100ms */ 400 401 zev_poll_wakeup(B_TRUE); 402 } 403 thread_exit(); 404 } 405 406 static int 407 zev_ioc_mute_pool(char *poolname) 408 { 409 zev_pool_list_entry_t *pe; 410 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 411 /* pool already muted? */ 412 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 413 if (!strcmp(pe->name, poolname)) { 414 rw_exit(&zev_pool_list_rwlock); 415 return EEXIST; 416 } 417 } 418 pe = zev_zalloc(sizeof(*pe)); 419 if (!pe) { 420 rw_exit(&zev_pool_list_rwlock); 421 return ENOMEM; 422 } 423 (void) strncpy(pe->name, poolname, sizeof(pe->name)); 424 pe->next = zev_muted_pools_head; 425 zev_muted_pools_head = pe; 426 rw_exit(&zev_pool_list_rwlock); 427 return (0); 428 } 429 430 static int 431 zev_ioc_unmute_pool(char *poolname) 432 { 433 zev_pool_list_entry_t *pe, *peprev; 434 435 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 436 /* pool muted? */ 437 peprev = NULL; 438 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 439 if (!strcmp(pe->name, poolname)) 440 break; 441 peprev = pe; 442 } 443 if (pe) { 444 rw_exit(&zev_pool_list_rwlock); 445 return ENOENT; 446 } 447 448 if (peprev != NULL) { 449 peprev->next = pe->next; 450 } else { 451 zev_muted_pools_head = pe->next; 452 } 453 zev_free(pe, sizeof(*pe)); 454 rw_exit(&zev_pool_list_rwlock); 455 return (0); 456 } 457 458 int 459 zev_skip_pool(objset_t *os) 460 { 461 zev_pool_list_entry_t *pe; 462 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; 463 rw_enter(&zev_pool_list_rwlock, RW_READER); 464 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 465 if (!strcmp(pe->name, dp->dp_spa->spa_name)) { 466 rw_exit(&zev_pool_list_rwlock); 467 return 1; 468 } 469 } 470 rw_exit(&zev_pool_list_rwlock); 471 return 0; 472 } 473 474 int 475 zev_skip_fs(zfsvfs_t *fs) 476 { 477 dsl_dir_t *d = fs->z_os->os_dsl_dataset->ds_dir; 478 dsl_dir_t *prev = NULL; 479 480 while (d && d != prev) { 481 if (strstr(d->dd_myname, "_root")) 482 return 0; 483 prev = d; 484 d = d->dd_parent; 485 } 486 return 1; 487 } 488 489 static void 490 zev_update_statistics(int op, zev_statistics_t *stat) 491 { 492 switch (op) { 493 case ZEV_OP_ERROR: 494 stat->zev_cnt_errors++; 495 break; 496 case ZEV_OP_MARK: 497 stat->zev_cnt_marks++; 498 break; 499 case ZEV_OP_ZFS_MOUNT: 500 stat->zev_cnt_zfs_mount++; 501 break; 502 case ZEV_OP_ZFS_UMOUNT: 503 stat->zev_cnt_zfs_umount++; 504 break; 505 case ZEV_OP_ZVOL_WRITE: 506 stat->zev_cnt_zvol_write++; 507 break; 508 case ZEV_OP_ZVOL_TRUNCATE: 509 stat->zev_cnt_zvol_truncate++; 510 break; 511 case ZEV_OP_ZNODE_CLOSE_AFTER_UPDATE: 512 stat->zev_cnt_znode_close_after_update++; 513 break; 514 case ZEV_OP_ZNODE_CREATE: 515 stat->zev_cnt_znode_create++; 516 break; 517 case ZEV_OP_ZNODE_REMOVE: 518 stat->zev_cnt_znode_remove++; 519 break; 520 case ZEV_OP_ZNODE_LINK: 521 stat->zev_cnt_znode_link++; 522 break; 523 case ZEV_OP_ZNODE_SYMLINK: 524 stat->zev_cnt_znode_symlink++; 525 break; 526 case ZEV_OP_ZNODE_RENAME: 527 stat->zev_cnt_znode_rename++; 528 break; 529 case ZEV_OP_ZNODE_WRITE: 530 stat->zev_cnt_znode_write++; 531 break; 532 case ZEV_OP_ZNODE_TRUNCATE: 533 stat->zev_cnt_znode_truncate++; 534 break; 535 case ZEV_OP_ZNODE_SETATTR: 536 stat->zev_cnt_znode_setattr++; 537 break; 538 case ZEV_OP_ZNODE_ACL: 539 stat->zev_cnt_znode_acl++; 540 break; 541 } 542 } 543 544 void 545 zev_queue_message(int op, zev_msg_t *msg) 546 { 547 zev_queue_t *q; 548 int wakeup = 0; 549 zev_msg_t *m; 550 int i; 551 552 msg->next = NULL; 553 msg->prev = NULL; 554 msg->read = 0; 555 556 if (op < ZEV_OP_MIN || op > ZEV_OP_MAX) { 557 zev_queue_error(op, "unknown op id encountered: %d", op); 558 zev_free(msg, sizeof(*msg) + msg->size); 559 return; 560 } 561 562 /* 563 * This mutex protects us agains race conditions when several 564 * threads want to queue a message and one or more queues are 565 * full: we release zev_mutex to wait for the queues to become 566 * less-than-full, but we don't know in which order the waiting 567 * threads will be awoken. If it's not the same order in which 568 * they went to sleep we might mark different messages as "newest" 569 * in different queues, and so we might have dupes or even 570 * skip messages. 571 */ 572 mutex_enter(&zev_queue_msg_mutex); 573 574 mutex_enter(&zev_mutex); 575 576 /* 577 * When the module is loaded, the default behavior ist to 578 * put all events into a queue and block if the queue is full. 579 * This is done even before the pseudo device is attached. 580 * This way, no events are lost. 581 * 582 * To discard events entirely the "beaver" queue, 583 * which never discards anything, has to be removed. 584 */ 585 586 if (zev_queue_cnt == 0) { 587 mutex_exit(&zev_mutex); 588 mutex_exit(&zev_queue_msg_mutex); 589 return; 590 } 591 592 /* put message into global queue */ 593 msg->seq = zev_msg_sequence_number++; 594 595 /* do we need to make room? */ 596 again: 597 while (zev_statistics.zev_max_queue_len && 598 zev_statistics.zev_queue_len > zev_statistics.zev_max_queue_len) { 599 600 if (zev_have_blocking_queues) { 601 /* so we have blocking queues. are they full? */ 602 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 603 q = zev_queues[i - ZEV_MINOR_MIN]; 604 if (!q) 605 continue; 606 if ((q->zq_flags & 607 ZEV_FL_BLOCK_WHILE_QUEUE_FULL) == 0) 608 continue; 609 if (q->zq_queue_len && 610 q->zq_queue_len > q->zq_max_queue_len) { 611 /* block until queue's been shrunk. */ 612 cv_wait(&zev_condvar, &zev_mutex); 613 goto again; 614 } 615 } 616 } 617 618 /* discard events until this message fits into all queues */ 619 620 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 621 q = zev_queues[i - ZEV_MINOR_MIN]; 622 if (!q) 623 continue; 624 /* discard msgs until queue is small enough */ 625 while (q->zq_queue_len && 626 q->zq_queue_len > q->zq_max_queue_len) { 627 m = q->zq_oldest; 628 if (m == NULL) 629 break; 630 q->zq_events_discarded++; 631 q->zq_bytes_discarded += m->size; 632 q->zq_oldest = m->next; 633 q->zq_queue_len -= m->size; 634 q->zq_queue_messages--; 635 } 636 } 637 638 zev_queue_trim(); 639 ASSERT(zev_statistics.zev_queue_len == 0 || 640 zev_statistics.zev_queue_len <= 641 zev_statistics.zev_max_queue_len); 642 } 643 644 if (zev_queue_tail == NULL) { 645 zev_queue_head = zev_queue_tail = msg; 646 } else { 647 zev_queue_tail->next = msg; 648 msg->prev = zev_queue_tail; 649 zev_queue_tail = msg; 650 } 651 zev_queue_len++; 652 zev_statistics.zev_cnt_total_events++; 653 zev_statistics.zev_queue_len += msg->size; 654 655 /* update per-device queues */ 656 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 657 q = zev_queues[i - ZEV_MINOR_MIN]; 658 if (!q) 659 continue; 660 661 zev_queue_hold(q); 662 663 /* make sure queue has enough room */ 664 while (q->zq_max_queue_len && 665 q->zq_queue_len > q->zq_max_queue_len) { 666 667 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) { 668 /* block until queue has been shrunk. */ 669 cv_wait(&zev_condvar, &zev_mutex); 670 } else { 671 /* discard msgs until queue is small enough */ 672 while (q->zq_queue_len > q->zq_max_queue_len) { 673 m = q->zq_oldest; 674 if (m == NULL) 675 break; 676 q->zq_events_discarded++; 677 q->zq_bytes_discarded += m->size; 678 q->zq_oldest = m->next; 679 q->zq_queue_len -= m->size; 680 q->zq_queue_messages--; 681 } 682 } 683 } 684 685 /* register new message at the end of the queue */ 686 q->zq_queue_len += msg->size; 687 q->zq_queue_messages++; 688 q->zq_bytes_total += msg->size; 689 q->zq_events_total++; 690 if (q->zq_oldest == NULL) 691 q->zq_oldest = msg; 692 693 zev_update_statistics(op, &q->zq_statistics); 694 695 if (q->zq_queue_len > q->zq_wakeup_threshold) 696 wakeup = 1; 697 if (q->zq_queue_len == msg->size) /* queue was empty */ 698 cv_broadcast(&q->zq_condvar); 699 700 zev_queue_release(q); 701 } 702 703 zev_queue_trim(); 704 705 zev_update_statistics(op, &zev_statistics); 706 mutex_exit(&zev_mutex); 707 mutex_exit(&zev_queue_msg_mutex); 708 709 /* one or more queues need a pollwakeup() */ 710 if (op == ZEV_OP_MARK) { 711 zev_poll_wakeup(B_TRUE); 712 } else if (wakeup) { 713 zev_poll_wakeup(B_FALSE); 714 } 715 716 return; 717 } 718 719 void 720 zev_queue_error(int op, char *fmt, ...) 721 { 722 char buf[ZEV_MAX_MESSAGE_LEN]; 723 va_list ap; 724 int len; 725 zev_msg_t *msg = NULL; 726 zev_error_t *rec; 727 int msg_size; 728 729 va_start(ap, fmt); 730 len = vsnprintf(buf, sizeof(buf), fmt, ap); 731 va_end(ap); 732 if (len >= sizeof(buf)) { 733 cmn_err(CE_WARN, "zev: can't report error - " 734 "dropping event entirely."); 735 return; 736 } 737 738 msg_size = sizeof(*rec) + len + 1; 739 msg = zev_alloc(sizeof(*msg) + msg_size); 740 msg->size = msg_size; 741 rec = (zev_error_t *)(msg + 1); 742 rec->record_len = msg_size; 743 rec->op = ZEV_OP_ERROR; 744 rec->op_time = ddi_get_time(); 745 rec->guid = 0; 746 rec->failed_op = op; 747 rec->errstr_len = len; 748 (void) memcpy(ZEV_ERRSTR(rec), buf, len + 1); 749 750 zev_queue_message(ZEV_OP_ERROR, msg); 751 return; 752 } 753 754 static int 755 zev_find_queue(zev_queue_t **out, zev_queue_t *req_q, zev_queue_name_t *name) 756 { 757 char namebuf[ZEV_MAX_QUEUE_NAME_LEN+1]; 758 zev_queue_t *q; 759 int i; 760 761 *out = NULL; 762 763 if (name->zev_namelen == 0) { 764 if (req_q->zq_minor_number == ZEV_CONTROL_DEVICE_MINOR) 765 return EINVAL; 766 zev_queue_hold(req_q); 767 *out = req_q; 768 return 0; 769 } 770 771 if (name->zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 772 return EINVAL; 773 strncpy(namebuf, name->zev_name, name->zev_namelen); 774 namebuf[name->zev_namelen] = '\0'; 775 776 mutex_enter(&zev_mutex); 777 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 778 q = zev_queues[i - ZEV_MINOR_MIN]; 779 if (!q) 780 continue; 781 if (!strcmp(q->zq_name, namebuf)) { 782 zev_queue_hold(q); 783 mutex_exit(&zev_mutex); 784 *out = q; 785 return 0; 786 } 787 } 788 mutex_exit(&zev_mutex); 789 return ENOENT; 790 } 791 792 static int 793 zev_ioc_get_queue_statistics(zev_queue_t *req_q, intptr_t arg, int mode) 794 { 795 zev_ioctl_get_queue_statistics_t gs; 796 zev_queue_t *q; 797 int ret; 798 799 if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0) 800 return EFAULT; 801 802 ret = zev_find_queue(&q, req_q, &gs.zev_queue_name); 803 if (ret) 804 return ret; 805 806 /* ddi_copyout() can take a long time. Better make 807 a copy to be able to release the mutex faster. */ 808 mutex_enter(&zev_mutex); 809 memcpy(&gs.zev_statistics, &q->zq_statistics,sizeof(gs.zev_statistics)); 810 gs.zev_statistics.zev_queue_len = q->zq_queue_len; 811 gs.zev_statistics.zev_bytes_read = q->zq_bytes_read; 812 gs.zev_statistics.zev_bytes_discarded = q->zq_bytes_discarded; 813 gs.zev_statistics.zev_max_queue_len = q->zq_max_queue_len; 814 gs.zev_statistics.zev_cnt_discarded_events = q->zq_events_discarded; 815 gs.zev_statistics.zev_cnt_total_events = q->zq_events_total; 816 zev_queue_release(q); 817 mutex_exit(&zev_mutex); 818 819 if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0) 820 return EFAULT; 821 return 0; 822 } 823 824 static int 825 zev_ioc_set_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 826 { 827 zev_ioctl_set_queue_properties_t qp; 828 zev_queue_t *q; 829 uint64_t old_max; 830 uint64_t old_flags; 831 int ret; 832 833 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 834 return EFAULT; 835 if (qp.zev_max_queue_len > ZEV_MAX_QUEUE_LEN) 836 return EINVAL; 837 if (qp.zev_poll_wakeup_threshold > ZEV_MAX_POLL_WAKEUP_QUEUE_LEN) 838 return EINVAL; 839 840 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 841 if (ret) 842 return ret; 843 844 mutex_enter(&zev_mutex); 845 846 /* 847 * Note: if the PERSISTENT flag is cleared, and the queue is not busy, 848 * the queue should be removed by zev_queue_release() in zev_ioctl(). 849 */ 850 old_flags = qp.zev_flags; 851 q->zq_flags = qp.zev_flags; 852 if ((old_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) && 853 (!(qp.zev_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL))) { 854 /* queue is no longer blocking - wake blocked threads */ 855 cv_broadcast(&zev_condvar); 856 } 857 858 zev_update_blockflag(); 859 860 old_max = q->zq_max_queue_len; 861 q->zq_max_queue_len = qp.zev_max_queue_len; 862 if (q->zq_max_queue_len < old_max) 863 zev_queue_trim(); 864 if (q->zq_max_queue_len > old_max) 865 cv_broadcast(&zev_condvar); /* threads may be waiting */ 866 867 if ((qp.zev_poll_wakeup_threshold < q->zq_wakeup_threshold) && 868 (qp.zev_poll_wakeup_threshold <= q->zq_queue_len)) 869 pollwakeup(&q->zq_pollhead, POLLIN); 870 q->zq_wakeup_threshold = qp.zev_poll_wakeup_threshold; 871 872 zev_queue_release(q); 873 mutex_exit(&zev_mutex); 874 return 0; 875 } 876 877 static int 878 zev_ioc_get_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 879 { 880 zev_ioctl_get_queue_properties_t qp; 881 zev_queue_t *q; 882 int ret; 883 884 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 885 return EFAULT; 886 887 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 888 if (ret) 889 return ret; 890 891 mutex_enter(&zev_mutex); 892 qp.zev_max_queue_len = q->zq_max_queue_len; 893 qp.zev_flags = q->zq_flags; 894 qp.zev_poll_wakeup_threshold = q->zq_wakeup_threshold; 895 zev_queue_release(q); 896 mutex_exit(&zev_mutex); 897 898 if (ddi_copyout(&qp, (void *)arg, sizeof(qp), mode) != 0) 899 return EFAULT; 900 return 0; 901 } 902 903 static int 904 zev_ioc_add_queue(zev_queue_t *req_q, intptr_t arg, int mode) 905 { 906 zev_ioctl_add_queue_t aq; 907 zev_queue_t *new_q; 908 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 909 910 if (ddi_copyin((void *)arg, &aq, sizeof(aq), mode) != 0) 911 return EFAULT; 912 913 if (aq.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 914 return EINVAL; 915 strncpy(name, aq.zev_name, aq.zev_namelen); 916 name[aq.zev_namelen] = '\0'; 917 918 return zev_queue_new(&new_q, req_q->zq_dip, name, 919 aq.zev_max_queue_len, aq.zev_flags); 920 } 921 922 static int 923 zev_ioc_remove_queue(zev_queue_t *req_q, intptr_t arg, int mode) 924 { 925 zev_ioctl_remove_queue_t rq; 926 zev_queue_t *q; 927 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 928 int found = 0; 929 int i; 930 931 if (ddi_copyin((void *)arg, &rq, sizeof(rq), mode) != 0) 932 return EFAULT; 933 934 if (rq.zev_queue_name.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 935 return EINVAL; 936 strncpy(name, rq.zev_queue_name.zev_name, 937 rq.zev_queue_name.zev_namelen); 938 name[rq.zev_queue_name.zev_namelen] = '\0'; 939 940 mutex_enter(&zev_mutex); 941 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 942 q = zev_queues[i - ZEV_MINOR_MIN]; 943 if (!q) 944 continue; 945 if (!strcmp(q->zq_name, name)) { 946 found = 1; 947 break; 948 } 949 } 950 if (!found) { 951 mutex_exit(&zev_mutex); 952 return ENOENT; 953 } 954 955 if (q->zq_busy) { 956 mutex_exit(&zev_mutex); 957 return EBUSY; 958 } 959 /* 960 * clear flags, so that persistent queues are removed aswell 961 * and the queue becomes non-blocking. 962 */ 963 q->zq_flags = 0; 964 if (q->zq_to_be_removed == B_FALSE) { 965 q->zq_to_be_removed = B_TRUE; 966 zev_queue_release(q); 967 } 968 /* some threads might be waiting for this queue to become writable */ 969 cv_broadcast(&zev_condvar); 970 971 mutex_exit(&zev_mutex); 972 return 0; 973 } 974 975 static int 976 zev_ioc_get_debug_info(zev_queue_t *req_q, intptr_t arg, int mode) 977 { 978 zev_ioctl_debug_info_t di; 979 uint64_t mem_allocated = atomic_add_64_nv(&zev_memory_allocated, 0); 980 uint64_t mem_freed = atomic_add_64_nv(&zev_memory_freed, 0); 981 982 zev_chksum_stats(&di.zev_chksum_cache_size, 983 &di.zev_chksum_cache_hits, 984 &di.zev_chksum_cache_misses); 985 di.zev_memory_allocated = mem_allocated - mem_freed; 986 if (ddi_copyout(&di, (void *)arg, sizeof(di), mode) != 0) 987 return EFAULT; 988 return 0; 989 } 990 991 static int 992 zev_ioc_get_queue_list(zev_queue_t *req_q, intptr_t arg, int mode) 993 { 994 zev_ioctl_get_queue_list_t gql; 995 zev_queue_t *q; 996 int i = 0; 997 int count = 0; 998 999 memset(&gql, 0, sizeof(gql)); 1000 1001 mutex_enter(&zev_mutex); 1002 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1003 q = zev_queues[i - ZEV_MINOR_MIN]; 1004 if (!q) 1005 continue; 1006 strncpy(gql.zev_queue_name[count].zev_name, 1007 q->zq_name, ZEV_MAX_QUEUE_NAME_LEN); 1008 gql.zev_queue_name[count].zev_namelen = strlen(q->zq_name); 1009 count++; 1010 } 1011 gql.zev_n_queues = count; 1012 mutex_exit(&zev_mutex); 1013 1014 if (ddi_copyout(&gql, (void *)arg, sizeof(gql), mode) != 0) 1015 return EFAULT; 1016 return 0; 1017 } 1018 1019 static int 1020 zev_ioc_set_max_queue_len(zev_queue_t *req_q, intptr_t arg, int mode) 1021 { 1022 uint64_t len; 1023 int i; 1024 zev_queue_t *q; 1025 1026 if (ddi_copyin((void *)arg, &len, sizeof(len), mode) != 0) { 1027 return EFAULT; 1028 } 1029 if (len > ZEV_MAX_QUEUE_LEN) { 1030 return EINVAL; 1031 } 1032 mutex_enter(&zev_mutex); 1033 zev_statistics.zev_max_queue_len = len; 1034 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1035 q = zev_queues[i - ZEV_MINOR_MIN]; 1036 if (!q) 1037 continue; 1038 if (q->zq_max_queue_len <= 1039 zev_statistics.zev_max_queue_len) 1040 continue; 1041 q->zq_max_queue_len = zev_statistics.zev_max_queue_len; 1042 } 1043 cv_broadcast(&zev_condvar); 1044 mutex_exit(&zev_mutex); 1045 return 0; 1046 } 1047 1048 static int 1049 zev_ioc_get_zev_version(intptr_t arg, int mode) 1050 { 1051 zev_ioctl_get_zev_version vi; 1052 vi.zev_major_version = ZEV_MAJOR_VERSION; 1053 vi.zev_minor_version = ZEV_MINOR_VERSION; 1054 if (ddi_copyout(&vi, (void *)arg, sizeof(vi), mode) != 0) 1055 return EFAULT; 1056 return 0; 1057 } 1058 1059 /* ARGSUSED */ 1060 static int 1061 zev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1062 { 1063 zev_statistics_t zs; 1064 zev_ioctl_poolarg_t pa; 1065 zev_ioctl_mark_t mark; 1066 zev_mark_t *rec; 1067 int msg_size; 1068 zev_msg_t *msg; 1069 uint64_t mark_id; 1070 minor_t minor; 1071 zev_queue_t *req_q; 1072 int ret = 0; 1073 1074 minor = getminor(dev); 1075 mutex_enter(&zev_mutex); 1076 if ((req_q = ddi_get_soft_state(statep, minor)) == NULL) { 1077 mutex_exit(&zev_mutex); 1078 return (ENXIO); 1079 } 1080 zev_queue_hold(req_q); 1081 mutex_exit(&zev_mutex); 1082 /* 1083 * all structures passed between kernel and userspace 1084 * are now compatible between 64 and 32 bit. Model 1085 * conversion can be ignored. 1086 */ 1087 switch (cmd) { 1088 case ZEV_IOC_GET_GLOBAL_STATISTICS: 1089 /* ddi_copyout() can take a long time. Better make 1090 a copy to be able to release the mutex faster. */ 1091 mutex_enter(&zev_mutex); 1092 (void) memcpy(&zs, &zev_statistics, sizeof(zs)); 1093 mutex_exit(&zev_mutex); 1094 if (ddi_copyout(&zs, (void *)arg, sizeof(zs), mode) != 0) 1095 ret = EFAULT; 1096 break; 1097 case ZEV_IOC_GET_QUEUE_STATISTICS: 1098 ret = zev_ioc_get_queue_statistics(req_q, arg, mode); 1099 break; 1100 case ZEV_IOC_MUTE_POOL: 1101 case ZEV_IOC_UNMUTE_POOL: 1102 if (ddi_copyin((void *)arg, &pa, sizeof(pa), mode) != 0) { 1103 ret = EFAULT; 1104 break; 1105 } 1106 if (pa.zev_poolname_len >=MAXPATHLEN) { 1107 ret = EINVAL; 1108 break; 1109 } 1110 pa.zev_poolname[pa.zev_poolname_len] = '\0'; 1111 if (cmd == ZEV_IOC_MUTE_POOL) { 1112 ret = zev_ioc_mute_pool(pa.zev_poolname); 1113 } else { 1114 ret = zev_ioc_unmute_pool(pa.zev_poolname); 1115 } 1116 break; 1117 case ZEV_IOC_SET_MAX_QUEUE_LEN: 1118 ret = zev_ioc_set_max_queue_len(req_q, arg, mode); 1119 break; 1120 case ZEV_IOC_GET_QUEUE_PROPERTIES: 1121 ret = zev_ioc_get_queue_properties(req_q, arg, mode); 1122 break; 1123 case ZEV_IOC_SET_QUEUE_PROPERTIES: 1124 ret = zev_ioc_set_queue_properties(req_q, arg, mode); 1125 break; 1126 case ZEV_IOC_MARK: 1127 if (ddi_copyin((void *)arg, &mark, sizeof(mark), mode) != 0) { 1128 ret = EFAULT; 1129 break; 1130 } 1131 /* prepare message */ 1132 msg_size = sizeof(*rec) + mark.zev_payload_len + 1; 1133 msg = zev_alloc(sizeof(*msg) + msg_size); 1134 msg->size = msg_size; 1135 rec = (zev_mark_t *)(msg + 1); 1136 rec->record_len = msg_size; 1137 rec->op = ZEV_OP_MARK; 1138 rec->op_time = ddi_get_time(); 1139 rec->guid = mark.zev_guid; 1140 rec->payload_len = mark.zev_payload_len; 1141 /* get payload */ 1142 if (ddi_copyin(((char *)arg) + sizeof(mark), 1143 ZEV_PAYLOAD(rec), 1144 mark.zev_payload_len, mode) != 0) { 1145 zev_free(msg, msg_size); 1146 ret = EFAULT; 1147 break; 1148 } 1149 *(ZEV_PAYLOAD(rec) + mark.zev_payload_len) = '\0'; 1150 /* get mark id and queue message */ 1151 mutex_enter(&zev_mark_id_mutex); 1152 mark_id = zev_mark_id++; 1153 mutex_exit(&zev_mark_id_mutex); 1154 rec->mark_id = mark_id; 1155 zev_queue_message(ZEV_OP_MARK, msg); 1156 /* report mark id to userland, ignore errors */ 1157 mark.zev_mark_id = mark_id; 1158 ddi_copyout(&mark, (void *)arg, sizeof(mark), mode); 1159 break; 1160 case ZEV_IOC_ADD_QUEUE: 1161 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1162 ret = EACCES; 1163 break; 1164 } 1165 ret = zev_ioc_add_queue(req_q, arg, mode); 1166 break; 1167 case ZEV_IOC_REMOVE_QUEUE: 1168 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1169 ret = EACCES; 1170 break; 1171 } 1172 ret = zev_ioc_remove_queue(req_q, arg, mode); 1173 break; 1174 case ZEV_IOC_GET_DEBUG_INFO: 1175 ret = zev_ioc_get_debug_info(req_q, arg, mode); 1176 break; 1177 case ZEV_IOC_GET_QUEUE_LIST: 1178 ret = zev_ioc_get_queue_list(req_q, arg, mode); 1179 break; 1180 case ZEV_IOC_GET_FILE_SIGNATURES: 1181 ret = zev_ioc_get_signatures(arg, mode); 1182 break; 1183 case ZEV_IOC_GET_ZEV_VERSION: 1184 ret = zev_ioc_get_zev_version(arg, mode); 1185 break; 1186 default: 1187 /* generic "ioctl unknown" error */ 1188 ret = ENOTTY; 1189 } 1190 1191 mutex_enter(&zev_mutex); 1192 zev_queue_release(req_q); 1193 mutex_exit(&zev_mutex); 1194 if (ret) 1195 SET_ERROR(ret); 1196 return (ret); 1197 } 1198 1199 static int 1200 zev_chpoll(dev_t dev, short events, int anyyet, 1201 short *reventsp, struct pollhead **phpp) 1202 { 1203 int minor; 1204 short revent = 0; 1205 zev_queue_t *q; 1206 1207 /* use minor-specific queue context and it's pollhead */ 1208 minor = getminor(dev); 1209 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1210 return (EINVAL); 1211 mutex_enter(&zev_mutex); 1212 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1213 mutex_exit(&zev_mutex); 1214 return (ENXIO); 1215 } 1216 revent = 0; 1217 if ((events & POLLIN)) { 1218 if (q->zq_oldest) 1219 revent |= POLLIN; 1220 } 1221 if (revent == 0) { 1222 if (!anyyet) { 1223 *phpp = &q->zq_pollhead; 1224 } 1225 } 1226 *reventsp = revent; 1227 mutex_exit(&zev_mutex); 1228 return (0); 1229 } 1230 1231 /* ARGSUSED */ 1232 static int 1233 zev_read(dev_t dev, struct uio *uio_p, cred_t *crep_p) 1234 { 1235 minor_t minor; 1236 offset_t off; 1237 int ret = 0; 1238 zev_msg_t *msg; 1239 char *data; 1240 zev_queue_t *q; 1241 1242 minor = getminor(dev); 1243 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1244 return (EINVAL); 1245 1246 mutex_enter(&zev_mutex); 1247 q = ddi_get_soft_state(statep, minor); 1248 if (q == NULL) { 1249 mutex_exit(&zev_mutex); 1250 return (ENXIO); 1251 } 1252 off = uio_p->uio_loffset; 1253 msg = q->zq_oldest; 1254 while (msg == NULL) { 1255 if (!ddi_can_receive_sig()) { 1256 /* 1257 * read() shouldn't block because this thread 1258 * can't receive signals. (e.g., it might be 1259 * torn down by exit() right now.) 1260 */ 1261 mutex_exit(&zev_mutex); 1262 return 0; 1263 } 1264 if (cv_wait_sig(&q->zq_condvar, &zev_mutex) == 0) { 1265 /* signal received. */ 1266 mutex_exit(&zev_mutex); 1267 return EINTR; 1268 } 1269 msg = q->zq_oldest; 1270 } 1271 if (msg->size > uio_p->uio_resid) { 1272 mutex_exit(&zev_mutex); 1273 return E2BIG; 1274 } 1275 while (msg && uio_p->uio_resid >= msg->size) { 1276 data = (char *)(msg + 1); 1277 ret = uiomove(data, msg->size, UIO_READ, uio_p); 1278 if (ret != 0) { 1279 mutex_exit(&zev_mutex); 1280 cmn_err(CE_WARN, "zev: uiomove failed; messages lost"); 1281 uio_p->uio_loffset = off; 1282 return (ret); 1283 } 1284 q->zq_oldest = msg->next; 1285 q->zq_bytes_read += msg->size; 1286 q->zq_queue_len -= msg->size; 1287 q->zq_queue_messages--; 1288 msg->read++; 1289 msg = q->zq_oldest; 1290 } 1291 zev_queue_trim(); 1292 cv_broadcast(&zev_condvar); 1293 mutex_exit(&zev_mutex); 1294 uio_p->uio_loffset = off; 1295 return 0; 1296 } 1297 1298 /* ARGSUSED */ 1299 static int 1300 zev_close(dev_t dev, int flag, int otyp, cred_t *crepd) 1301 { 1302 zev_queue_t *q; 1303 int minor; 1304 1305 minor = getminor(dev); 1306 if (otyp != OTYP_CHR) 1307 return (EINVAL); 1308 mutex_enter(&zev_mutex); 1309 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1310 mutex_exit(&zev_mutex); 1311 return (ENXIO); 1312 } 1313 if (q->zq_busy != B_TRUE) { 1314 mutex_exit(&zev_mutex); 1315 return (EINVAL); 1316 } 1317 q->zq_busy = B_FALSE; 1318 if ((q->zq_flags & ZEV_FL_PERSISTENT) == 0) 1319 zev_queue_release(q); 1320 mutex_exit(&zev_mutex); 1321 return (0); 1322 } 1323 1324 /* ARGSUSED */ 1325 static int 1326 zev_open(dev_t *devp, int flag, int otyp, cred_t *credp) 1327 { 1328 zev_queue_t *q; 1329 minor_t minor; 1330 1331 minor = getminor(*devp); 1332 if (otyp != OTYP_CHR) 1333 return (EINVAL); 1334 if (drv_priv(credp) != 0) 1335 return (EPERM); 1336 mutex_enter(&zev_mutex); 1337 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1338 mutex_exit(&zev_mutex); 1339 return (ENXIO); 1340 } 1341 if (minor == ZEV_CONTROL_DEVICE_MINOR) { 1342 /* control device may be used in parallel */ 1343 q->zq_busy = B_TRUE; 1344 mutex_exit(&zev_mutex); 1345 return 0; 1346 } 1347 if (q->zq_busy == B_TRUE) { 1348 mutex_exit(&zev_mutex); 1349 return (EBUSY); 1350 } 1351 q->zq_busy = B_TRUE; /* can only be opened exclusively */ 1352 mutex_exit(&zev_mutex); 1353 return (0); 1354 } 1355 1356 static struct cb_ops zev_cb_ops = { 1357 zev_open, /* open */ 1358 zev_close, /* close */ 1359 nodev, /* strategy */ 1360 nodev, /* print */ 1361 nodev, /* dump */ 1362 zev_read, /* read */ 1363 nodev, /* write */ 1364 zev_ioctl, /* ioctl */ 1365 nodev, /* devmap */ 1366 nodev, /* mmap */ 1367 nodev, /* segmap */ 1368 zev_chpoll, /* chpoll */ 1369 ddi_prop_op, /* prop_op */ 1370 NULL, /* streamtab */ 1371 D_MP | D_64BIT, /* cb_flag */ 1372 CB_REV, /* cb_rev */ 1373 nodev, /* aread */ 1374 nodev, /* awrite */ 1375 }; 1376 1377 static void 1378 zev_free_instance(dev_info_t *dip) 1379 { 1380 int instance; 1381 zev_queue_t *q; 1382 int i; 1383 1384 instance = ddi_get_instance(dip); 1385 if (instance != 0) { 1386 cmn_err(CE_WARN, "zev: tried to free instance != 0 (%d)", 1387 instance); 1388 return; 1389 } 1390 1391 ddi_remove_minor_node(dip, NULL); 1392 devfs_clean(q->zq_dip, NULL, 0); 1393 1394 /* stop pollwakeup thread */ 1395 zev_wakeup_thread_run = 0; 1396 if (zev_poll_wakeup_thread != NULL) { 1397 thread_join(zev_poll_wakeup_thread->t_did); 1398 zev_poll_wakeup_thread = NULL; 1399 } 1400 1401 mutex_enter(&zev_mutex); 1402 1403 /* remove "ctrl" dummy queue */ 1404 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1405 if (q) { 1406 ddi_soft_state_free(statep, ZEV_CONTROL_DEVICE_MINOR); 1407 ZEV_MEM_SUB(sizeof(zev_queue_t)); 1408 } 1409 1410 /* remove all other queues */ 1411 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1412 q = zev_queues[i- ZEV_MINOR_MIN]; 1413 if (!q) 1414 continue; 1415 ASSERT(q->zq_refcnt == 1); 1416 zev_queue_release(q); 1417 } 1418 zev_queue_trim(); 1419 bzero(&zev_queues, sizeof(zev_queues)); 1420 1421 mutex_exit(&zev_mutex); 1422 1423 } 1424 1425 static int 1426 zev_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1427 { 1428 int instance; 1429 zev_queue_t *q; 1430 1431 /* called once per instance with DDI_DETACH, 1432 may be called to suspend */ 1433 switch (cmd) { 1434 case DDI_DETACH: 1435 /* instance busy? */ 1436 instance = ddi_get_instance(dip); 1437 if (instance != 0) { /* hardcoded in zev.conf */ 1438 /* this module only supports one instance. */ 1439 return (DDI_FAILURE); 1440 } 1441 1442 mutex_enter(&zev_mutex); 1443 if (!zev_attached) { 1444 mutex_exit(&zev_mutex); 1445 return (DDI_FAILURE); 1446 } 1447 1448 /* check "ctrl" queue to see if t is busy */ 1449 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1450 if (q == NULL) { 1451 mutex_exit(&zev_mutex); 1452 return (DDI_FAILURE); 1453 } 1454 if (q->zq_busy) { 1455 mutex_exit(&zev_mutex); 1456 return (DDI_FAILURE); 1457 } 1458 /* are there any queues? */ 1459 if (zev_queue_cnt > 0) { 1460 mutex_exit(&zev_mutex); 1461 return (DDI_FAILURE); 1462 } 1463 1464 zev_attached = B_FALSE; 1465 mutex_exit(&zev_mutex); 1466 1467 /* switch ZFS event callbacks back to default */ 1468 rw_enter(&rz_zev_rwlock, RW_WRITER); 1469 rz_zev_callbacks = rz_zev_default_callbacks; 1470 rz_zev_set_active(B_FALSE); 1471 rw_exit(&rz_zev_rwlock); 1472 1473 /* no thread is inside of the callbacks anymore. */ 1474 1475 /* free resources allocated for this instance */ 1476 zev_free_instance(dip); 1477 zev_chksum_fini(); 1478 #if 0 1479 cmn_err(CE_WARN, "zev: allocated memory at detach: %" PRIu64, 1480 zev_memory_allocated - zev_memory_freed); 1481 #endif 1482 return (DDI_SUCCESS); 1483 case DDI_SUSPEND: 1484 /* kernel must not suspend zev devices while ZFS is running */ 1485 return (DDI_FAILURE); 1486 default: 1487 return (DDI_FAILURE); 1488 } 1489 } 1490 1491 static int 1492 zev_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1493 { 1494 /* called once per instance with DDI_ATTACH, 1495 may be called to resume */ 1496 int instance; 1497 int error; 1498 zev_queue_t *q; 1499 switch (cmd) { 1500 case DDI_ATTACH: 1501 /* create instance state */ 1502 instance = ddi_get_instance(dip); 1503 if (instance != 0) { /* hardcoded in zev.conf */ 1504 /* this module only supports one instance. */ 1505 return (DDI_FAILURE); 1506 } 1507 1508 mutex_enter(&zev_mutex); 1509 if (zev_attached) { 1510 mutex_exit(&zev_mutex); 1511 return (DDI_FAILURE); 1512 } 1513 if (ddi_soft_state_zalloc(statep, ZEV_CONTROL_DEVICE_MINOR) != 1514 DDI_SUCCESS) { 1515 mutex_exit(&zev_mutex); 1516 return (DDI_FAILURE); 1517 } 1518 ZEV_MEM_ADD(sizeof(zev_queue_t)); 1519 zev_attached = B_TRUE; 1520 1521 /* init queue list */ 1522 bzero(&zev_queues, sizeof(zev_queues)); 1523 mutex_exit(&zev_mutex); 1524 1525 /* create a dummy queue for management of "ctrl" */ 1526 1527 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1528 q->zq_dip = dip; 1529 q->zq_refcnt = 1; 1530 q->zq_busy = B_FALSE; 1531 q->zq_minor_number = ZEV_CONTROL_DEVICE_MINOR; 1532 q->zq_flags = ZEV_FL_PERSISTENT; 1533 strcpy(q->zq_name, ZEV_CONTROL_DEVICE_NAME); 1534 1535 /* create device node for "ctrl" */ 1536 if (ddi_create_minor_node(dip, ZEV_CONTROL_DEVICE_NAME, 1537 S_IFCHR, ZEV_CONTROL_DEVICE_MINOR, 1538 DDI_PSEUDO, 0) == DDI_FAILURE) { 1539 goto fail; 1540 } 1541 1542 /* note: intentionally not adding ctrl queue to queue list. */ 1543 1544 /* default queue */ 1545 error = zev_queue_new(&q, dip, 1546 ZEV_DEFAULT_QUEUE_NAME, 1547 ZEV_MAX_QUEUE_LEN, 1548 ZEV_FL_BLOCK_WHILE_QUEUE_FULL| 1549 ZEV_FL_PERSISTENT); 1550 if (error) 1551 goto fail; 1552 1553 /* start pollwakeup thread */ 1554 zev_wakeup_thread_run = 1; 1555 zev_poll_wakeup_thread = thread_create(NULL, 0, 1556 zev_poll_wakeup_thread_main, NULL, 0, &p0, 1557 TS_RUN, minclsyspri); 1558 1559 ddi_report_dev(dip); 1560 1561 zev_chksum_init(); 1562 1563 /* switch ZFS event callbacks to zev module callbacks */ 1564 rw_enter(&rz_zev_rwlock, RW_WRITER); 1565 rz_zev_callbacks = &zev_callbacks; 1566 rz_zev_set_active(B_TRUE); 1567 rw_exit(&rz_zev_rwlock); 1568 1569 return (DDI_SUCCESS); 1570 case DDI_RESUME: 1571 /* suspendeding zev devices should never happen */ 1572 return (DDI_SUCCESS); 1573 default: 1574 return (DDI_FAILURE); 1575 } 1576 fail: 1577 cmn_err(CE_WARN, "zev: attach failed"); 1578 zev_free_instance(dip); 1579 mutex_enter(&zev_mutex); 1580 zev_attached = B_FALSE; 1581 mutex_exit(&zev_mutex); 1582 return (DDI_FAILURE); 1583 } 1584 1585 /* ARGSUSED */ 1586 static int 1587 zev_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp) 1588 { 1589 minor_t minor; 1590 zev_queue_t *q; 1591 1592 /* arg is dev_t */ 1593 minor = getminor((dev_t)arg); 1594 mutex_enter(&zev_mutex); 1595 q = ddi_get_soft_state(statep, minor); 1596 if (q == NULL) { 1597 *resultp = NULL; 1598 mutex_exit(&zev_mutex); 1599 return (DDI_FAILURE); 1600 } 1601 1602 switch (infocmd) { 1603 case DDI_INFO_DEVT2DEVINFO: 1604 *resultp = q->zq_dip; 1605 break; 1606 case DDI_INFO_DEVT2INSTANCE: 1607 *resultp = (void *)(uintptr_t)ddi_get_instance(q->zq_dip); 1608 break; 1609 default: 1610 mutex_exit(&zev_mutex); 1611 return (DDI_FAILURE); 1612 } 1613 mutex_exit(&zev_mutex); 1614 return (DDI_SUCCESS); 1615 } 1616 1617 static struct dev_ops zev_dev_ops = { 1618 DEVO_REV, /* driver build revision */ 1619 0, /* driver reference count */ 1620 zev_getinfo, /* getinfo */ 1621 nulldev, /* identify (obsolete) */ 1622 nulldev, /* probe (search for devices) */ 1623 zev_attach, /* attach */ 1624 zev_detach, /* detach */ 1625 nodev, /* reset (obsolete, use quiesce) */ 1626 &zev_cb_ops, /* character and block device ops */ 1627 NULL, /* bus driver ops */ 1628 NULL, /* power management, not needed */ 1629 ddi_quiesce_not_needed, /* quiesce */ 1630 }; 1631 1632 static struct modldrv zev_modldrv = { 1633 &mod_driverops, /* all loadable modules use this */ 1634 "ZFS event provider, v" 1635 XSTRING(ZEV_MAJOR_VERSION) "." 1636 XSTRING(ZEV_MINOR_VERSION), 1637 /* driver name and version info */ 1638 &zev_dev_ops /* ops method pointers */ 1639 }; 1640 1641 static struct modlinkage zev_modlinkage = { 1642 MODREV_1, /* fixed value */ 1643 { 1644 &zev_modldrv, /* driver linkage structure */ 1645 NULL /* list terminator */ 1646 } 1647 }; 1648 1649 int 1650 _init(void) 1651 { 1652 int error; 1653 1654 if ((error = ddi_soft_state_init(&statep, sizeof(zev_queue_t), 1)) != 0) 1655 return (error); 1656 zev_attached = B_FALSE; 1657 1658 zev_queue_head = NULL; 1659 zev_queue_tail = NULL; 1660 zev_queue_len = 0; 1661 zev_muted_pools_head = NULL; 1662 zev_memory_allocated = 0; 1663 zev_memory_freed = 0; 1664 zev_queue_cnt = 0; 1665 zev_have_blocking_queues = 1; 1666 1667 mutex_init(&zev_mutex, NULL, MUTEX_DRIVER, NULL); 1668 cv_init(&zev_condvar, NULL, CV_DRIVER, NULL); 1669 rw_init(&zev_pool_list_rwlock, NULL, RW_DRIVER, NULL); 1670 mutex_init(&zev_mark_id_mutex, NULL, MUTEX_DRIVER, NULL); 1671 zev_mark_id = gethrtime(); 1672 mutex_init(&zev_queue_msg_mutex, NULL, MUTEX_DRIVER, NULL); 1673 zev_msg_sequence_number = gethrtime(); 1674 bzero(&zev_statistics, sizeof(zev_statistics)); 1675 bzero(&zev_pollhead, sizeof(zev_pollhead)); 1676 bzero(&zev_queues, sizeof(zev_queues)); 1677 zev_statistics.zev_max_queue_len = ZEV_MAX_QUEUE_LEN; 1678 if (zev_ioc_mute_pool("zg0")) { 1679 cmn_err(CE_WARN, "zev: could not init mute list"); 1680 goto FAIL; 1681 } 1682 1683 if ((error = mod_install(&zev_modlinkage)) != 0) { 1684 cmn_err(CE_WARN, "zev: could not install module"); 1685 goto FAIL; 1686 } 1687 1688 return (0); 1689 FAIL: 1690 /* free resources */ 1691 cmn_err(CE_WARN, "zev: _init failed"); 1692 mutex_destroy(&zev_mutex); 1693 ddi_soft_state_fini(&statep); 1694 return (error); 1695 } 1696 1697 int 1698 _info(struct modinfo *modinfop) 1699 { 1700 return (mod_info(&zev_modlinkage, modinfop)); 1701 } 1702 1703 int 1704 _fini(void) 1705 { 1706 int error = 0; 1707 zev_msg_t *msg; 1708 zev_pool_list_entry_t *pe, *npe; 1709 1710 mutex_enter(&zev_mutex); 1711 if (zev_attached == B_TRUE) { 1712 mutex_exit(&zev_mutex); 1713 return (SET_ERROR(EBUSY)); 1714 } 1715 if (zev_queue_cnt != 0) { 1716 /* should never happen */ 1717 mutex_exit(&zev_mutex); 1718 return (SET_ERROR(EBUSY)); 1719 } 1720 1721 /* 1722 * avoid deadlock if event list is full: make sure threads currently 1723 * blocking on the event list can append their event and then release 1724 * rz_zev_rwlock. Since there should be no queues left when we 1725 * reach this point we can simply empty the event list and then 1726 * wake everybody. 1727 */ 1728 while (zev_queue_head) { 1729 msg = zev_queue_head; 1730 zev_queue_head = msg->next; 1731 zev_free(msg, sizeof(*msg) + msg->size); 1732 } 1733 cv_broadcast(&zev_condvar); 1734 mutex_exit(&zev_mutex); 1735 1736 /* switch ZFS event callbacks back to default (again) */ 1737 rw_enter(&rz_zev_rwlock, RW_WRITER); 1738 rz_zev_callbacks = rz_zev_default_callbacks; 1739 rz_zev_set_active(B_FALSE); 1740 rw_exit(&rz_zev_rwlock); 1741 1742 /* no thread is inside of the callbacks anymore. Safe to remove. */ 1743 1744 /* unload module callbacks */ 1745 if ((error = mod_remove(&zev_modlinkage)) != 0) { 1746 cmn_err(CE_WARN, "mod_remove failed: %d", error); 1747 return (error); 1748 } 1749 1750 /* free resources */ 1751 mutex_enter(&zev_mutex); 1752 while (zev_queue_head) { 1753 msg = zev_queue_head; 1754 zev_queue_head = msg->next; 1755 zev_free(msg, sizeof(*msg) + msg->size); 1756 } 1757 mutex_exit(&zev_mutex); 1758 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 1759 pe = zev_muted_pools_head; 1760 while (pe) { 1761 npe = pe; 1762 pe = pe->next; 1763 zev_free(npe, sizeof(*npe)); 1764 } 1765 rw_exit(&zev_pool_list_rwlock); 1766 ddi_soft_state_fini(&statep); 1767 rw_destroy(&zev_pool_list_rwlock); 1768 cv_destroy(&zev_condvar); 1769 mutex_destroy(&zev_mutex); 1770 mutex_destroy(&zev_mark_id_mutex); 1771 mutex_destroy(&zev_queue_msg_mutex); 1772 1773 return (0); 1774 } 1775 1776