1 #include <sys/modctl.h> 2 #include <sys/ddi.h> 3 #include <sys/sunddi.h> 4 #include <sys/conf.h> 5 #include <sys/devops.h> 6 #include <sys/stat.h> 7 #include <sys/fs/zev.h> 8 #include <sys/zev_callbacks.h> 9 #include <sys/zev_checksums.h> 10 #include <sys/zfs_znode.h> 11 #include <sys/time.h> 12 #include <sys/sa.h> 13 #include <sys/zap.h> 14 #include <sys/time.h> 15 #include <sys/fs/dv_node.h> 16 17 #define OFFSETOF(s, m) ((size_t)(&(((s *)0)->m))) 18 19 #define XSTRING(x) STRING(x) 20 #define STRING(x) #x 21 22 #define ZEV_DEFAULT_QUEUE_NAME "beaver" 23 #define ZEV_CONTROL_DEVICE_MINOR 0 24 #define ZEV_MINOR_MIN (ZEV_CONTROL_DEVICE_MINOR + 1) 25 #define ZEV_MINOR_MAX (ZEV_MINOR_MIN + ZEV_MAX_QUEUES - 1) 26 27 typedef struct zev_queue { 28 char zq_name[ZEV_MAX_QUEUE_NAME_LEN+1]; 29 minor_t zq_minor_number; 30 dev_info_t *zq_dip; 31 struct pollhead zq_pollhead; 32 uint64_t zq_bytes_read; 33 uint64_t zq_events_read; 34 uint64_t zq_bytes_discarded; 35 uint64_t zq_events_discarded; 36 uint64_t zq_bytes_total; 37 uint64_t zq_events_total; 38 uint64_t zq_wakeup_threshold; 39 uint16_t zq_flags; 40 uint16_t zq_need_wakeup; 41 /* protected by zev_mutex */ 42 int zq_refcnt; 43 uint64_t zq_queue_len; 44 uint64_t zq_queue_messages; 45 uint64_t zq_max_queue_len; 46 zev_msg_t *zq_oldest; 47 boolean_t zq_busy; 48 boolean_t zq_to_be_removed; 49 zev_statistics_t zq_statistics; 50 kcondvar_t zq_condvar; 51 } zev_queue_t; 52 53 static void *statep; 54 struct pollhead zev_pollhead; 55 56 kmutex_t zev_mutex; 57 kcondvar_t zev_condvar; 58 kmutex_t zev_queue_msg_mutex; 59 krwlock_t zev_pool_list_rwlock; 60 static zev_statistics_t zev_statistics; 61 static boolean_t zev_attached; 62 static kmutex_t zev_mark_id_mutex; 63 static uint64_t zev_mark_id = 0; 64 65 static uint64_t zev_msg_sequence_number = 0; 66 static zev_queue_t *zev_queues[ZEV_MAX_QUEUES]; 67 static int zev_queue_cnt = 0; 68 static int zev_have_blocking_queues = 1; 69 70 uint64_t zev_memory_allocated = 0; 71 uint64_t zev_memory_freed = 0; 72 73 /* 74 * The longest potential message is from zev_zfs_mount() and 75 * contains the mountpoint, which might be close to MAXPATHLEN bytes long. 76 * 77 * Another candidate is zev_znode_rename_cb() and contains three inode 78 * numbers and two filenames of up to MAXNAMELEN bytes each. 79 */ 80 #define ZEV_MAX_MESSAGE_LEN 4096 81 82 static zev_msg_t *zev_queue_head = NULL; 83 static zev_msg_t *zev_queue_tail = NULL; 84 static uint64_t zev_queue_len = 0; 85 86 87 typedef struct zev_pool_list_entry { 88 struct zev_pool_list_entry *next; 89 char name[MAXPATHLEN]; 90 } zev_pool_list_entry_t; 91 92 static zev_pool_list_entry_t *zev_muted_pools_head = NULL; 93 94 static volatile int zev_wakeup_thread_run = 1; 95 static kthread_t *zev_poll_wakeup_thread = NULL; 96 97 void * 98 zev_alloc(ssize_t sz) 99 { 100 ZEV_MEM_ADD(sz); 101 return kmem_alloc(sz, KM_SLEEP); 102 } 103 104 void * 105 zev_zalloc(ssize_t sz) 106 { 107 ZEV_MEM_ADD(sz); 108 return kmem_zalloc(sz, KM_SLEEP); 109 } 110 111 void 112 zev_free(void *ptr, ssize_t sz) 113 { 114 ZEV_MEM_SUB(sz); \ 115 kmem_free(ptr, sz); 116 } 117 118 /* must be called with zev_mutex held */ 119 static void 120 zev_update_blockflag(void) 121 { 122 zev_queue_t *q; 123 int had_blocking_queues; 124 int i; 125 126 had_blocking_queues = zev_have_blocking_queues; 127 128 /* do we still have blocking queues? */ 129 zev_have_blocking_queues = 0; 130 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 131 q = zev_queues[i - ZEV_MINOR_MIN]; 132 if (!q) 133 continue; 134 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) { 135 zev_have_blocking_queues = 1; 136 break; 137 } 138 } 139 /* no blocking queues */ 140 if (had_blocking_queues) 141 cv_broadcast(&zev_condvar); 142 } 143 144 int 145 zev_queue_cmp(const void *a, const void *b) 146 { 147 const zev_queue_t *qa = a; 148 const zev_queue_t *qb = b; 149 if (qa->zq_minor_number > qb->zq_minor_number) 150 return 1; 151 if (qa->zq_minor_number < qb->zq_minor_number) 152 return -1; 153 return 0; 154 } 155 156 /* must be called with zev_mutex held */ 157 void 158 zev_queue_trim(void) 159 { 160 zev_msg_t *m; 161 uint64_t oldest_message; 162 zev_queue_t *q; 163 int i; 164 165 if (!zev_queue_tail) 166 return; 167 168 oldest_message = zev_queue_tail->seq + 1; /* does not exist, yet. */ 169 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 170 q = zev_queues[i - ZEV_MINOR_MIN]; 171 if (q == NULL) 172 continue; 173 if (!q->zq_oldest) 174 continue; 175 if (oldest_message > q->zq_oldest->seq) 176 oldest_message = q->zq_oldest->seq; 177 } 178 179 /* remove msgs between oldest_message and zev_queue_head */ 180 while(zev_queue_head && (oldest_message > zev_queue_head->seq)) { 181 m = zev_queue_head; 182 zev_queue_head = m->next; 183 if (zev_queue_head == NULL) { 184 zev_queue_tail = NULL; 185 } else { 186 zev_queue_head->prev = NULL; 187 } 188 if (m->read == 0) { 189 zev_statistics.zev_bytes_discarded += m->size; 190 zev_statistics.zev_cnt_discarded_events++; 191 } 192 zev_statistics.zev_queue_len -= m->size; 193 zev_queue_len--; 194 zev_free(m, sizeof(*m) + m->size); 195 } 196 } 197 198 /* must be called with zev_mutex held */ 199 static void 200 zev_queue_hold(zev_queue_t *q) 201 { 202 q->zq_refcnt++; 203 } 204 205 /* must be called with zev_mutex held */ 206 static void 207 zev_queue_release(zev_queue_t *q) 208 { 209 q->zq_refcnt--; 210 if (q->zq_refcnt > 0) 211 return; 212 213 ASSERT(q->zq_busy == B_FALSE); 214 215 /* persistent queues will not be removed */ 216 if ((q->zq_flags & ZEV_FL_PERSISTENT) != 0) 217 return; 218 219 /* remove queue from queue list */ 220 zev_queues[q->zq_minor_number - ZEV_MINOR_MIN] = NULL; 221 222 /* discard messages that no queue references anymore */ 223 zev_queue_trim(); 224 225 cv_destroy(&q->zq_condvar); 226 ddi_remove_minor_node(q->zq_dip, q->zq_name); 227 devfs_clean(q->zq_dip, NULL, 0); 228 ddi_soft_state_free(statep, q->zq_minor_number); 229 ZEV_MEM_SUB(sizeof(zev_queue_t)); 230 zev_queue_cnt--; 231 zev_update_blockflag(); 232 } 233 234 int 235 zev_queue_new(zev_queue_t **queue, 236 dev_info_t *dip, 237 char *name, 238 uint64_t max_queue_len, 239 uint16_t flags) 240 { 241 zev_queue_t *q; 242 zev_queue_t *tmp; 243 zev_msg_t *msg; 244 int name_exists = 0; 245 minor_t minor; 246 char *p; 247 int i; 248 249 if (max_queue_len > ZEV_MAX_QUEUE_LEN) 250 return EINVAL; 251 if (max_queue_len == 0) 252 max_queue_len = ZEV_MAX_QUEUE_LEN; 253 if (!strcmp(name, ZEV_CONTROL_DEVICE_NAME)) 254 return EINVAL; 255 for (p = name; *p; p++) { 256 if (*p >= 'a' && *p <= 'z') 257 continue; 258 if (*p >= '0' && *p <= '9') 259 continue; 260 if (*p == '.') 261 continue; 262 return EINVAL; 263 } 264 265 mutex_enter(&zev_mutex); 266 267 /* find free minor number.*/ 268 /* if this were a frequent operation we'd have a free-minor list */ 269 for (minor = ZEV_MINOR_MIN; minor <= ZEV_MINOR_MAX; minor++) { 270 tmp = zev_queues[minor - ZEV_MINOR_MIN]; 271 if (tmp == NULL) 272 break; 273 } 274 if (tmp) { 275 mutex_exit(&zev_mutex); 276 return ENOSPC; 277 } 278 279 if (ddi_soft_state_zalloc(statep, minor) != DDI_SUCCESS) { 280 mutex_exit(&zev_mutex); 281 return ENOSPC; 282 } 283 ZEV_MEM_ADD(sizeof(zev_queue_t)); 284 285 q = ddi_get_soft_state(statep, minor); 286 memset(q, 0, sizeof(*q)); 287 strncpy(q->zq_name, name, ZEV_MAX_QUEUE_NAME_LEN); 288 q->zq_name[ZEV_MAX_QUEUE_NAME_LEN] = '\0'; 289 q->zq_max_queue_len = max_queue_len; 290 q->zq_wakeup_threshold = ZEV_DEFAULT_POLL_WAKEUP_QUEUE_LEN; 291 q->zq_flags = flags; 292 q->zq_refcnt = 1; 293 q->zq_dip = dip; 294 q->zq_minor_number = minor; 295 cv_init(&q->zq_condvar, NULL, CV_DRIVER, NULL); 296 297 /* insert into queue list */ 298 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 299 /* if this were a frequent operation we'd have a name tree */ 300 if (zev_queues[i - ZEV_MINOR_MIN] == NULL) 301 continue; 302 if (!strcmp(q->zq_name, zev_queues[i-ZEV_MINOR_MIN]->zq_name)) { 303 name_exists = 1; 304 break; 305 } 306 } 307 if (name_exists) { 308 ddi_soft_state_free(statep, minor); 309 ZEV_MEM_SUB(sizeof(zev_queue_t)); 310 mutex_exit(&zev_mutex); 311 return EEXIST; 312 } 313 zev_queues[minor - ZEV_MINOR_MIN] = q; 314 zev_queue_cnt++; 315 316 /* calculate current queue len and find head and tail */ 317 if (!(q->zq_flags & ZEV_FL_INITIALLY_EMPTY)) { 318 q->zq_oldest = zev_queue_tail; 319 msg = zev_queue_tail; 320 while ((msg) && (q->zq_queue_len < q->zq_max_queue_len)) { 321 q->zq_queue_len += msg->size; 322 q->zq_queue_messages++; 323 q->zq_oldest = msg; 324 msg = msg->prev; 325 } 326 } 327 328 zev_update_blockflag(); 329 330 mutex_exit(&zev_mutex); 331 332 if (ddi_create_minor_node(dip, name, 333 S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 334 mutex_enter(&zev_mutex); 335 zev_queues[minor - ZEV_MINOR_MIN] = NULL; 336 zev_queue_cnt--; 337 ddi_soft_state_free(statep, minor); 338 ZEV_MEM_SUB(sizeof(zev_queue_t)); 339 zev_update_blockflag(); 340 mutex_exit(&zev_mutex); 341 return EFAULT; 342 } 343 344 *queue = q; 345 return 0; 346 } 347 348 /* 349 * poll() wakeup thread. Used to check periodically whether we have 350 * bytes left in the queue that have not yet been made into a 351 * pollwakeup() call. This is meant to insure a maximum waiting 352 * time until an event is presented as a poll wakeup, while at 353 * the same time not making every single event into a poll wakeup 354 * of it's own. 355 */ 356 357 static void 358 zev_poll_wakeup(boolean_t flush_all) 359 { 360 zev_queue_t *q; 361 int i; 362 363 /* 364 * This loop works with hold() and release() because 365 * pollwakeup() requires us to release our locks before calling it. 366 * 367 * from pollwakeup(9F): 368 * 369 * "Driver defined locks should not be held across calls 370 * to this function." 371 */ 372 373 /* wake up threads for each individual queue */ 374 mutex_enter(&zev_mutex); 375 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 376 q = zev_queues[i - ZEV_MINOR_MIN]; 377 if (q == NULL) 378 continue; 379 if (!q->zq_busy) 380 continue; 381 if (!q->zq_queue_len) 382 continue; 383 if ((flush_all) || 384 (q->zq_queue_len > q->zq_wakeup_threshold)) { 385 zev_queue_hold(q); 386 mutex_exit(&zev_mutex); 387 pollwakeup(&q->zq_pollhead, POLLIN); 388 mutex_enter(&zev_mutex); 389 zev_queue_release(q); 390 } 391 } 392 mutex_exit(&zev_mutex); 393 } 394 395 static void 396 zev_poll_wakeup_thread_main(void) 397 { 398 while (zev_wakeup_thread_run) { 399 delay(drv_usectohz(100 * 1000)); /* sleep 100ms */ 400 401 zev_poll_wakeup(B_TRUE); 402 } 403 thread_exit(); 404 } 405 406 static int 407 zev_ioc_mute_pool(char *poolname) 408 { 409 zev_pool_list_entry_t *pe; 410 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 411 /* pool already muted? */ 412 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 413 if (!strcmp(pe->name, poolname)) { 414 rw_exit(&zev_pool_list_rwlock); 415 return EEXIST; 416 } 417 } 418 pe = zev_zalloc(sizeof(*pe)); 419 if (!pe) { 420 rw_exit(&zev_pool_list_rwlock); 421 return ENOMEM; 422 } 423 (void) strncpy(pe->name, poolname, sizeof(pe->name)); 424 pe->next = zev_muted_pools_head; 425 zev_muted_pools_head = pe; 426 rw_exit(&zev_pool_list_rwlock); 427 return (0); 428 } 429 430 static int 431 zev_ioc_unmute_pool(char *poolname) 432 { 433 zev_pool_list_entry_t *pe, *peprev; 434 435 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 436 /* pool muted? */ 437 peprev = NULL; 438 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 439 if (!strcmp(pe->name, poolname)) 440 break; 441 peprev = pe; 442 } 443 if (pe) { 444 rw_exit(&zev_pool_list_rwlock); 445 return ENOENT; 446 } 447 448 if (peprev != NULL) { 449 peprev->next = pe->next; 450 } else { 451 zev_muted_pools_head = pe->next; 452 } 453 zev_free(pe, sizeof(*pe)); 454 rw_exit(&zev_pool_list_rwlock); 455 return (0); 456 } 457 458 int 459 zev_skip_pool(objset_t *os) 460 { 461 zev_pool_list_entry_t *pe; 462 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; 463 rw_enter(&zev_pool_list_rwlock, RW_READER); 464 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 465 if (!strcmp(pe->name, dp->dp_spa->spa_name)) { 466 rw_exit(&zev_pool_list_rwlock); 467 return 1; 468 } 469 } 470 rw_exit(&zev_pool_list_rwlock); 471 return 0; 472 } 473 474 int 475 zev_skip_fs(zfsvfs_t *fs) 476 { 477 dsl_dir_t *d = fs->z_os->os_dsl_dataset->ds_dir; 478 dsl_dir_t *prev = NULL; 479 480 while (d && d != prev) { 481 if (strstr(d->dd_myname, "_root")) 482 return 0; 483 prev = d; 484 d = d->dd_parent; 485 } 486 return 1; 487 } 488 489 static void 490 zev_update_statistics(int op, zev_statistics_t *stat) 491 { 492 switch (op) { 493 case ZEV_OP_ERROR: 494 stat->zev_cnt_errors++; 495 break; 496 case ZEV_OP_MARK: 497 stat->zev_cnt_marks++; 498 break; 499 case ZEV_OP_ZFS_MOUNT: 500 stat->zev_cnt_zfs_mount++; 501 break; 502 case ZEV_OP_ZFS_UMOUNT: 503 stat->zev_cnt_zfs_umount++; 504 break; 505 case ZEV_OP_ZVOL_WRITE: 506 stat->zev_cnt_zvol_write++; 507 break; 508 case ZEV_OP_ZVOL_TRUNCATE: 509 stat->zev_cnt_zvol_truncate++; 510 break; 511 case ZEV_OP_ZNODE_CLOSE_AFTER_UPDATE: 512 stat->zev_cnt_znode_close_after_update++; 513 break; 514 case ZEV_OP_ZNODE_CREATE: 515 stat->zev_cnt_znode_create++; 516 break; 517 case ZEV_OP_ZNODE_REMOVE: 518 stat->zev_cnt_znode_remove++; 519 break; 520 case ZEV_OP_ZNODE_LINK: 521 stat->zev_cnt_znode_link++; 522 break; 523 case ZEV_OP_ZNODE_SYMLINK: 524 stat->zev_cnt_znode_symlink++; 525 break; 526 case ZEV_OP_ZNODE_RENAME: 527 stat->zev_cnt_znode_rename++; 528 break; 529 case ZEV_OP_ZNODE_WRITE: 530 stat->zev_cnt_znode_write++; 531 break; 532 case ZEV_OP_ZNODE_TRUNCATE: 533 stat->zev_cnt_znode_truncate++; 534 break; 535 case ZEV_OP_ZNODE_SETATTR: 536 stat->zev_cnt_znode_setattr++; 537 break; 538 case ZEV_OP_ZNODE_ACL: 539 stat->zev_cnt_znode_acl++; 540 break; 541 } 542 } 543 544 void 545 zev_queue_message(int op, zev_msg_t *msg) 546 { 547 zev_queue_t *q; 548 int wakeup = 0; 549 zev_msg_t *m; 550 int i; 551 552 msg->next = NULL; 553 msg->prev = NULL; 554 msg->read = 0; 555 556 if (op < ZEV_OP_MIN || op > ZEV_OP_MAX) { 557 zev_queue_error(op, "unknown op id encountered: %d", op); 558 zev_free(msg, sizeof(*msg) + msg->size); 559 return; 560 } 561 562 /* 563 * This mutex protects us agains race conditions when several 564 * threads want to queue a message and one or more queues are 565 * full: we release zev_mutex to wait for the queues to become 566 * less-than-full, but we don't know in which order the waiting 567 * threads will be awoken. If it's not the same order in which 568 * they went to sleep we might mark different messages as "newest" 569 * in different queues, and so we might have dupes or even 570 * skip messages. 571 */ 572 mutex_enter(&zev_queue_msg_mutex); 573 574 mutex_enter(&zev_mutex); 575 576 /* 577 * When the module is loaded, the default behavior ist to 578 * put all events into a queue and block if the queue is full. 579 * This is done even before the pseudo device is attached. 580 * This way, no events are lost. 581 * 582 * To discard events entirely the "beaver" queue, 583 * which never discards anything, has to be removed. 584 */ 585 586 if (zev_queue_cnt == 0) { 587 mutex_exit(&zev_mutex); 588 mutex_exit(&zev_queue_msg_mutex); 589 return; 590 } 591 592 /* put message into global queue */ 593 msg->seq = zev_msg_sequence_number++; 594 595 /* do we need to make room? */ 596 again: 597 while (zev_statistics.zev_max_queue_len && 598 zev_statistics.zev_queue_len > zev_statistics.zev_max_queue_len) { 599 600 if (zev_have_blocking_queues) { 601 /* so we have blocking queues. are they full? */ 602 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 603 q = zev_queues[i - ZEV_MINOR_MIN]; 604 if (!q) 605 continue; 606 if ((q->zq_flags & 607 ZEV_FL_BLOCK_WHILE_QUEUE_FULL) == 0) 608 continue; 609 if (q->zq_queue_len && 610 q->zq_queue_len > q->zq_max_queue_len) { 611 /* block until queue's been shrunk. */ 612 cv_wait(&zev_condvar, &zev_mutex); 613 goto again; 614 } 615 } 616 } 617 618 /* discard events until this message fits into all queues */ 619 620 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 621 q = zev_queues[i - ZEV_MINOR_MIN]; 622 if (!q) 623 continue; 624 /* discard msgs until queue is small enough */ 625 while (q->zq_queue_len && 626 q->zq_queue_len > q->zq_max_queue_len) { 627 m = q->zq_oldest; 628 if (m == NULL) 629 break; 630 q->zq_events_discarded++; 631 q->zq_bytes_discarded += m->size; 632 q->zq_oldest = m->next; 633 q->zq_queue_len -= m->size; 634 q->zq_queue_messages--; 635 } 636 } 637 638 zev_queue_trim(); 639 ASSERT(zev_statistics.zev_queue_len == 0 || 640 zev_statistics.zev_queue_len <= 641 zev_statistics.zev_max_queue_len); 642 } 643 644 if (zev_queue_tail == NULL) { 645 zev_queue_head = zev_queue_tail = msg; 646 } else { 647 zev_queue_tail->next = msg; 648 msg->prev = zev_queue_tail; 649 zev_queue_tail = msg; 650 } 651 zev_queue_len++; 652 zev_statistics.zev_cnt_total_events++; 653 zev_statistics.zev_queue_len += msg->size; 654 655 /* update per-device queues */ 656 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 657 q = zev_queues[i - ZEV_MINOR_MIN]; 658 if (!q) 659 continue; 660 661 zev_queue_hold(q); 662 663 /* make sure queue has enough room */ 664 while (q->zq_max_queue_len && 665 q->zq_queue_len > q->zq_max_queue_len) { 666 667 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) { 668 /* block until queue has been shrunk. */ 669 cv_wait(&zev_condvar, &zev_mutex); 670 } else { 671 /* discard msgs until queue is small enough */ 672 while (q->zq_queue_len > q->zq_max_queue_len) { 673 m = q->zq_oldest; 674 if (m == NULL) 675 break; 676 q->zq_events_discarded++; 677 q->zq_bytes_discarded += m->size; 678 q->zq_oldest = m->next; 679 q->zq_queue_len -= m->size; 680 q->zq_queue_messages--; 681 } 682 } 683 } 684 685 /* register new message at the end of the queue */ 686 q->zq_queue_len += msg->size; 687 q->zq_queue_messages++; 688 q->zq_bytes_total += msg->size; 689 q->zq_events_total++; 690 if (q->zq_oldest == NULL) 691 q->zq_oldest = msg; 692 693 zev_update_statistics(op, &q->zq_statistics); 694 695 if (q->zq_queue_len > q->zq_wakeup_threshold) 696 wakeup = 1; 697 if (q->zq_queue_len == msg->size) /* queue was empty */ 698 cv_broadcast(&q->zq_condvar); 699 700 zev_queue_release(q); 701 } 702 703 zev_queue_trim(); 704 705 zev_update_statistics(op, &zev_statistics); 706 mutex_exit(&zev_mutex); 707 mutex_exit(&zev_queue_msg_mutex); 708 709 /* one or more queues need a pollwakeup() */ 710 if (op == ZEV_OP_MARK) { 711 zev_poll_wakeup(B_TRUE); 712 } else if (wakeup) { 713 zev_poll_wakeup(B_FALSE); 714 } 715 716 return; 717 } 718 719 void 720 zev_queue_error(int op, char *fmt, ...) 721 { 722 char buf[ZEV_MAX_MESSAGE_LEN]; 723 va_list ap; 724 int len; 725 zev_msg_t *msg = NULL; 726 zev_error_t *rec; 727 int msg_size; 728 729 va_start(ap, fmt); 730 len = vsnprintf(buf, sizeof(buf), fmt, ap); 731 va_end(ap); 732 if (len >= sizeof(buf)) { 733 cmn_err(CE_WARN, "zev: can't report error - " 734 "dropping event entirely."); 735 return; 736 } 737 738 msg_size = sizeof(*rec) + len + 1; 739 msg = zev_alloc(sizeof(*msg) + msg_size); 740 msg->size = msg_size; 741 rec = (zev_error_t *)(msg + 1); 742 rec->record_len = msg_size; 743 rec->op = ZEV_OP_ERROR; 744 rec->op_time = ddi_get_time(); 745 rec->guid = 0; 746 rec->failed_op = op; 747 rec->errstr_len = len; 748 (void) memcpy(ZEV_ERRSTR(rec), buf, len + 1); 749 750 zev_queue_message(ZEV_OP_ERROR, msg); 751 return; 752 } 753 754 static int 755 zev_find_queue(zev_queue_t **out, zev_queue_t *req_q, zev_queue_name_t *name) 756 { 757 char namebuf[ZEV_MAX_QUEUE_NAME_LEN+1]; 758 zev_queue_t *q; 759 int i; 760 761 *out = NULL; 762 763 if (name->zev_namelen == 0) { 764 if (req_q->zq_minor_number == ZEV_CONTROL_DEVICE_MINOR) 765 return EINVAL; 766 mutex_enter(&zev_mutex); 767 zev_queue_hold(req_q); 768 mutex_exit(&zev_mutex); 769 *out = req_q; 770 return 0; 771 } 772 773 if (name->zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 774 return EINVAL; 775 strncpy(namebuf, name->zev_name, name->zev_namelen); 776 namebuf[name->zev_namelen] = '\0'; 777 778 mutex_enter(&zev_mutex); 779 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 780 q = zev_queues[i - ZEV_MINOR_MIN]; 781 if (!q) 782 continue; 783 if (!strcmp(q->zq_name, namebuf)) { 784 zev_queue_hold(q); 785 mutex_exit(&zev_mutex); 786 *out = q; 787 return 0; 788 } 789 } 790 mutex_exit(&zev_mutex); 791 return ENOENT; 792 } 793 794 static int 795 zev_ioc_get_queue_statistics(zev_queue_t *req_q, intptr_t arg, int mode) 796 { 797 zev_ioctl_get_queue_statistics_t gs; 798 zev_queue_t *q; 799 int ret; 800 801 if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0) 802 return EFAULT; 803 804 ret = zev_find_queue(&q, req_q, &gs.zev_queue_name); 805 if (ret) 806 return ret; 807 808 /* ddi_copyout() can take a long time. Better make 809 a copy to be able to release the mutex faster. */ 810 mutex_enter(&zev_mutex); 811 memcpy(&gs.zev_statistics, &q->zq_statistics,sizeof(gs.zev_statistics)); 812 gs.zev_statistics.zev_queue_len = q->zq_queue_len; 813 gs.zev_statistics.zev_bytes_read = q->zq_bytes_read; 814 gs.zev_statistics.zev_bytes_discarded = q->zq_bytes_discarded; 815 gs.zev_statistics.zev_max_queue_len = q->zq_max_queue_len; 816 gs.zev_statistics.zev_cnt_discarded_events = q->zq_events_discarded; 817 gs.zev_statistics.zev_cnt_total_events = q->zq_events_total; 818 zev_queue_release(q); 819 mutex_exit(&zev_mutex); 820 821 if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0) 822 return EFAULT; 823 return 0; 824 } 825 826 static int 827 zev_ioc_set_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 828 { 829 zev_ioctl_set_queue_properties_t qp; 830 zev_queue_t *q; 831 uint64_t old_max; 832 uint64_t old_flags; 833 int ret; 834 835 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 836 return EFAULT; 837 if (qp.zev_max_queue_len > ZEV_MAX_QUEUE_LEN) 838 return EINVAL; 839 if (qp.zev_poll_wakeup_threshold > ZEV_MAX_POLL_WAKEUP_QUEUE_LEN) 840 return EINVAL; 841 842 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 843 if (ret) 844 return ret; 845 846 mutex_enter(&zev_mutex); 847 848 /* 849 * Note: if the PERSISTENT flag is cleared, and the queue is not busy, 850 * the queue should be removed by zev_queue_release() in zev_ioctl(). 851 */ 852 old_flags = qp.zev_flags; 853 q->zq_flags = qp.zev_flags; 854 if ((old_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) && 855 (!(qp.zev_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL))) { 856 /* queue is no longer blocking - wake blocked threads */ 857 cv_broadcast(&zev_condvar); 858 } 859 860 zev_update_blockflag(); 861 862 old_max = q->zq_max_queue_len; 863 q->zq_max_queue_len = qp.zev_max_queue_len; 864 if (q->zq_max_queue_len < old_max) 865 zev_queue_trim(); 866 if (q->zq_max_queue_len > old_max) 867 cv_broadcast(&zev_condvar); /* threads may be waiting */ 868 869 if ((qp.zev_poll_wakeup_threshold < q->zq_wakeup_threshold) && 870 (qp.zev_poll_wakeup_threshold <= q->zq_queue_len)) 871 pollwakeup(&q->zq_pollhead, POLLIN); 872 q->zq_wakeup_threshold = qp.zev_poll_wakeup_threshold; 873 874 zev_queue_release(q); 875 mutex_exit(&zev_mutex); 876 return 0; 877 } 878 879 static int 880 zev_ioc_get_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 881 { 882 zev_ioctl_get_queue_properties_t qp; 883 zev_queue_t *q; 884 int ret; 885 886 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 887 return EFAULT; 888 889 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 890 if (ret) 891 return ret; 892 893 mutex_enter(&zev_mutex); 894 qp.zev_max_queue_len = q->zq_max_queue_len; 895 qp.zev_flags = q->zq_flags; 896 qp.zev_poll_wakeup_threshold = q->zq_wakeup_threshold; 897 zev_queue_release(q); 898 mutex_exit(&zev_mutex); 899 900 if (ddi_copyout(&qp, (void *)arg, sizeof(qp), mode) != 0) 901 return EFAULT; 902 return 0; 903 } 904 905 static int 906 zev_ioc_add_queue(zev_queue_t *req_q, intptr_t arg, int mode) 907 { 908 zev_ioctl_add_queue_t aq; 909 zev_queue_t *new_q; 910 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 911 912 if (ddi_copyin((void *)arg, &aq, sizeof(aq), mode) != 0) 913 return EFAULT; 914 915 if (aq.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 916 return EINVAL; 917 strncpy(name, aq.zev_name, aq.zev_namelen); 918 name[aq.zev_namelen] = '\0'; 919 920 return zev_queue_new(&new_q, req_q->zq_dip, name, 921 aq.zev_max_queue_len, aq.zev_flags); 922 } 923 924 static int 925 zev_ioc_remove_queue(zev_queue_t *req_q, intptr_t arg, int mode) 926 { 927 zev_ioctl_remove_queue_t rq; 928 zev_queue_t *q; 929 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 930 int found = 0; 931 int i; 932 933 if (ddi_copyin((void *)arg, &rq, sizeof(rq), mode) != 0) 934 return EFAULT; 935 936 if (rq.zev_queue_name.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 937 return EINVAL; 938 strncpy(name, rq.zev_queue_name.zev_name, 939 rq.zev_queue_name.zev_namelen); 940 name[rq.zev_queue_name.zev_namelen] = '\0'; 941 942 mutex_enter(&zev_mutex); 943 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 944 q = zev_queues[i - ZEV_MINOR_MIN]; 945 if (!q) 946 continue; 947 if (!strcmp(q->zq_name, name)) { 948 found = 1; 949 break; 950 } 951 } 952 if (!found) { 953 mutex_exit(&zev_mutex); 954 return ENOENT; 955 } 956 957 if (q->zq_busy) { 958 mutex_exit(&zev_mutex); 959 return EBUSY; 960 } 961 /* 962 * clear flags, so that persistent queues are removed aswell 963 * and the queue becomes non-blocking. 964 */ 965 q->zq_flags = 0; 966 if (q->zq_to_be_removed == B_FALSE) { 967 q->zq_to_be_removed = B_TRUE; 968 zev_queue_release(q); 969 } 970 /* some threads might be waiting for this queue to become writable */ 971 cv_broadcast(&zev_condvar); 972 973 mutex_exit(&zev_mutex); 974 return 0; 975 } 976 977 static int 978 zev_ioc_get_debug_info(zev_queue_t *req_q, intptr_t arg, int mode) 979 { 980 zev_ioctl_debug_info_t di; 981 uint64_t mem_allocated = atomic_add_64_nv(&zev_memory_allocated, 0); 982 uint64_t mem_freed = atomic_add_64_nv(&zev_memory_freed, 0); 983 984 zev_chksum_stats(&di.zev_chksum_cache_size, 985 &di.zev_chksum_cache_hits, 986 &di.zev_chksum_cache_misses); 987 di.zev_memory_allocated = mem_allocated - mem_freed; 988 if (ddi_copyout(&di, (void *)arg, sizeof(di), mode) != 0) 989 return EFAULT; 990 return 0; 991 } 992 993 static int 994 zev_ioc_get_queue_list(zev_queue_t *req_q, intptr_t arg, int mode) 995 { 996 zev_ioctl_get_queue_list_t gql; 997 zev_queue_t *q; 998 int i = 0; 999 int count = 0; 1000 1001 memset(&gql, 0, sizeof(gql)); 1002 1003 mutex_enter(&zev_mutex); 1004 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1005 q = zev_queues[i - ZEV_MINOR_MIN]; 1006 if (!q) 1007 continue; 1008 strncpy(gql.zev_queue_name[count].zev_name, 1009 q->zq_name, ZEV_MAX_QUEUE_NAME_LEN); 1010 gql.zev_queue_name[count].zev_namelen = strlen(q->zq_name); 1011 count++; 1012 } 1013 gql.zev_n_queues = count; 1014 mutex_exit(&zev_mutex); 1015 1016 if (ddi_copyout(&gql, (void *)arg, sizeof(gql), mode) != 0) 1017 return EFAULT; 1018 return 0; 1019 } 1020 1021 static int 1022 zev_ioc_set_max_queue_len(zev_queue_t *req_q, intptr_t arg, int mode) 1023 { 1024 uint64_t len; 1025 int i; 1026 zev_queue_t *q; 1027 1028 if (ddi_copyin((void *)arg, &len, sizeof(len), mode) != 0) { 1029 return EFAULT; 1030 } 1031 if (len > ZEV_MAX_QUEUE_LEN) { 1032 return EINVAL; 1033 } 1034 mutex_enter(&zev_mutex); 1035 zev_statistics.zev_max_queue_len = len; 1036 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1037 q = zev_queues[i - ZEV_MINOR_MIN]; 1038 if (!q) 1039 continue; 1040 if (q->zq_max_queue_len <= 1041 zev_statistics.zev_max_queue_len) 1042 continue; 1043 q->zq_max_queue_len = zev_statistics.zev_max_queue_len; 1044 } 1045 cv_broadcast(&zev_condvar); 1046 mutex_exit(&zev_mutex); 1047 return 0; 1048 } 1049 1050 static int 1051 zev_ioc_get_zev_version(intptr_t arg, int mode) 1052 { 1053 zev_ioctl_get_zev_version vi; 1054 vi.zev_major_version = ZEV_MAJOR_VERSION; 1055 vi.zev_minor_version = ZEV_MINOR_VERSION; 1056 if (ddi_copyout(&vi, (void *)arg, sizeof(vi), mode) != 0) 1057 return EFAULT; 1058 return 0; 1059 } 1060 1061 /* ARGSUSED */ 1062 static int 1063 zev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1064 { 1065 zev_statistics_t zs; 1066 zev_ioctl_poolarg_t pa; 1067 zev_ioctl_mark_t mark; 1068 zev_mark_t *rec; 1069 int msg_size; 1070 zev_msg_t *msg; 1071 uint64_t mark_id; 1072 minor_t minor; 1073 zev_queue_t *req_q; 1074 int ret = 0; 1075 1076 minor = getminor(dev); 1077 mutex_enter(&zev_mutex); 1078 if ((req_q = ddi_get_soft_state(statep, minor)) == NULL) { 1079 mutex_exit(&zev_mutex); 1080 return (ENXIO); 1081 } 1082 zev_queue_hold(req_q); 1083 mutex_exit(&zev_mutex); 1084 /* 1085 * all structures passed between kernel and userspace 1086 * are now compatible between 64 and 32 bit. Model 1087 * conversion can be ignored. 1088 */ 1089 switch (cmd) { 1090 case ZEV_IOC_GET_GLOBAL_STATISTICS: 1091 /* ddi_copyout() can take a long time. Better make 1092 a copy to be able to release the mutex faster. */ 1093 mutex_enter(&zev_mutex); 1094 (void) memcpy(&zs, &zev_statistics, sizeof(zs)); 1095 mutex_exit(&zev_mutex); 1096 if (ddi_copyout(&zs, (void *)arg, sizeof(zs), mode) != 0) 1097 ret = EFAULT; 1098 break; 1099 case ZEV_IOC_GET_QUEUE_STATISTICS: 1100 ret = zev_ioc_get_queue_statistics(req_q, arg, mode); 1101 break; 1102 case ZEV_IOC_MUTE_POOL: 1103 case ZEV_IOC_UNMUTE_POOL: 1104 if (ddi_copyin((void *)arg, &pa, sizeof(pa), mode) != 0) { 1105 ret = EFAULT; 1106 break; 1107 } 1108 if (pa.zev_poolname_len >=MAXPATHLEN) { 1109 ret = EINVAL; 1110 break; 1111 } 1112 pa.zev_poolname[pa.zev_poolname_len] = '\0'; 1113 if (cmd == ZEV_IOC_MUTE_POOL) { 1114 ret = zev_ioc_mute_pool(pa.zev_poolname); 1115 } else { 1116 ret = zev_ioc_unmute_pool(pa.zev_poolname); 1117 } 1118 break; 1119 case ZEV_IOC_SET_MAX_QUEUE_LEN: 1120 ret = zev_ioc_set_max_queue_len(req_q, arg, mode); 1121 break; 1122 case ZEV_IOC_GET_QUEUE_PROPERTIES: 1123 ret = zev_ioc_get_queue_properties(req_q, arg, mode); 1124 break; 1125 case ZEV_IOC_SET_QUEUE_PROPERTIES: 1126 ret = zev_ioc_set_queue_properties(req_q, arg, mode); 1127 break; 1128 case ZEV_IOC_MARK: 1129 if (ddi_copyin((void *)arg, &mark, sizeof(mark), mode) != 0) { 1130 ret = EFAULT; 1131 break; 1132 } 1133 /* prepare message */ 1134 msg_size = sizeof(*rec) + mark.zev_payload_len + 1; 1135 msg = zev_alloc(sizeof(*msg) + msg_size); 1136 msg->size = msg_size; 1137 rec = (zev_mark_t *)(msg + 1); 1138 rec->record_len = msg_size; 1139 rec->op = ZEV_OP_MARK; 1140 rec->op_time = ddi_get_time(); 1141 rec->guid = mark.zev_guid; 1142 rec->payload_len = mark.zev_payload_len; 1143 /* get payload */ 1144 if (ddi_copyin(((char *)arg) + sizeof(mark), 1145 ZEV_PAYLOAD(rec), 1146 mark.zev_payload_len, mode) != 0) { 1147 zev_free(msg, msg_size); 1148 ret = EFAULT; 1149 break; 1150 } 1151 *(ZEV_PAYLOAD(rec) + mark.zev_payload_len) = '\0'; 1152 /* get mark id and queue message */ 1153 mutex_enter(&zev_mark_id_mutex); 1154 mark_id = zev_mark_id++; 1155 mutex_exit(&zev_mark_id_mutex); 1156 rec->mark_id = mark_id; 1157 zev_queue_message(ZEV_OP_MARK, msg); 1158 /* report mark id to userland, ignore errors */ 1159 mark.zev_mark_id = mark_id; 1160 ddi_copyout(&mark, (void *)arg, sizeof(mark), mode); 1161 break; 1162 case ZEV_IOC_ADD_QUEUE: 1163 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1164 ret = EACCES; 1165 break; 1166 } 1167 ret = zev_ioc_add_queue(req_q, arg, mode); 1168 break; 1169 case ZEV_IOC_REMOVE_QUEUE: 1170 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1171 ret = EACCES; 1172 break; 1173 } 1174 ret = zev_ioc_remove_queue(req_q, arg, mode); 1175 break; 1176 case ZEV_IOC_GET_DEBUG_INFO: 1177 ret = zev_ioc_get_debug_info(req_q, arg, mode); 1178 break; 1179 case ZEV_IOC_GET_QUEUE_LIST: 1180 ret = zev_ioc_get_queue_list(req_q, arg, mode); 1181 break; 1182 case ZEV_IOC_GET_FILE_SIGNATURES: 1183 ret = zev_ioc_get_signatures(arg, mode); 1184 break; 1185 case ZEV_IOC_GET_ZEV_VERSION: 1186 ret = zev_ioc_get_zev_version(arg, mode); 1187 break; 1188 default: 1189 /* generic "ioctl unknown" error */ 1190 ret = ENOTTY; 1191 } 1192 1193 mutex_enter(&zev_mutex); 1194 zev_queue_release(req_q); 1195 mutex_exit(&zev_mutex); 1196 if (ret) 1197 SET_ERROR(ret); 1198 return (ret); 1199 } 1200 1201 static int 1202 zev_chpoll(dev_t dev, short events, int anyyet, 1203 short *reventsp, struct pollhead **phpp) 1204 { 1205 int minor; 1206 short revent = 0; 1207 zev_queue_t *q; 1208 1209 /* use minor-specific queue context and it's pollhead */ 1210 minor = getminor(dev); 1211 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1212 return (EINVAL); 1213 mutex_enter(&zev_mutex); 1214 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1215 mutex_exit(&zev_mutex); 1216 return (ENXIO); 1217 } 1218 revent = 0; 1219 if ((events & POLLIN)) { 1220 if (q->zq_oldest) 1221 revent |= POLLIN; 1222 } 1223 if (revent == 0) { 1224 if (!anyyet) { 1225 *phpp = &q->zq_pollhead; 1226 } 1227 } 1228 *reventsp = revent; 1229 mutex_exit(&zev_mutex); 1230 return (0); 1231 } 1232 1233 /* ARGSUSED */ 1234 static int 1235 zev_read(dev_t dev, struct uio *uio_p, cred_t *crep_p) 1236 { 1237 minor_t minor; 1238 offset_t off; 1239 int ret = 0; 1240 zev_msg_t *msg; 1241 char *data; 1242 zev_queue_t *q; 1243 1244 minor = getminor(dev); 1245 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1246 return (EINVAL); 1247 1248 mutex_enter(&zev_mutex); 1249 q = ddi_get_soft_state(statep, minor); 1250 if (q == NULL) { 1251 mutex_exit(&zev_mutex); 1252 return (ENXIO); 1253 } 1254 off = uio_p->uio_loffset; 1255 msg = q->zq_oldest; 1256 while (msg == NULL) { 1257 if (!ddi_can_receive_sig()) { 1258 /* 1259 * read() shouldn't block because this thread 1260 * can't receive signals. (e.g., it might be 1261 * torn down by exit() right now.) 1262 */ 1263 mutex_exit(&zev_mutex); 1264 return 0; 1265 } 1266 if (cv_wait_sig(&q->zq_condvar, &zev_mutex) == 0) { 1267 /* signal received. */ 1268 mutex_exit(&zev_mutex); 1269 return EINTR; 1270 } 1271 msg = q->zq_oldest; 1272 } 1273 if (msg->size > uio_p->uio_resid) { 1274 mutex_exit(&zev_mutex); 1275 return E2BIG; 1276 } 1277 while (msg && uio_p->uio_resid >= msg->size) { 1278 data = (char *)(msg + 1); 1279 ret = uiomove(data, msg->size, UIO_READ, uio_p); 1280 if (ret != 0) { 1281 mutex_exit(&zev_mutex); 1282 cmn_err(CE_WARN, "zev: uiomove failed; messages lost"); 1283 uio_p->uio_loffset = off; 1284 return (ret); 1285 } 1286 q->zq_oldest = msg->next; 1287 q->zq_bytes_read += msg->size; 1288 q->zq_queue_len -= msg->size; 1289 q->zq_queue_messages--; 1290 msg->read++; 1291 msg = q->zq_oldest; 1292 } 1293 zev_queue_trim(); 1294 cv_broadcast(&zev_condvar); 1295 mutex_exit(&zev_mutex); 1296 uio_p->uio_loffset = off; 1297 return 0; 1298 } 1299 1300 /* ARGSUSED */ 1301 static int 1302 zev_close(dev_t dev, int flag, int otyp, cred_t *crepd) 1303 { 1304 zev_queue_t *q; 1305 int minor; 1306 1307 minor = getminor(dev); 1308 if (otyp != OTYP_CHR) 1309 return (EINVAL); 1310 mutex_enter(&zev_mutex); 1311 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1312 mutex_exit(&zev_mutex); 1313 return (ENXIO); 1314 } 1315 if (q->zq_busy != B_TRUE) { 1316 mutex_exit(&zev_mutex); 1317 return (EINVAL); 1318 } 1319 q->zq_busy = B_FALSE; 1320 if ((q->zq_flags & ZEV_FL_PERSISTENT) == 0) 1321 zev_queue_release(q); 1322 mutex_exit(&zev_mutex); 1323 return (0); 1324 } 1325 1326 /* ARGSUSED */ 1327 static int 1328 zev_open(dev_t *devp, int flag, int otyp, cred_t *credp) 1329 { 1330 zev_queue_t *q; 1331 minor_t minor; 1332 1333 minor = getminor(*devp); 1334 if (otyp != OTYP_CHR) 1335 return (EINVAL); 1336 if (drv_priv(credp) != 0) 1337 return (EPERM); 1338 mutex_enter(&zev_mutex); 1339 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1340 mutex_exit(&zev_mutex); 1341 return (ENXIO); 1342 } 1343 if (minor == ZEV_CONTROL_DEVICE_MINOR) { 1344 /* control device may be used in parallel */ 1345 q->zq_busy = B_TRUE; 1346 mutex_exit(&zev_mutex); 1347 return 0; 1348 } 1349 if (q->zq_busy == B_TRUE) { 1350 mutex_exit(&zev_mutex); 1351 return (EBUSY); 1352 } 1353 q->zq_busy = B_TRUE; /* can only be opened exclusively */ 1354 mutex_exit(&zev_mutex); 1355 return (0); 1356 } 1357 1358 static struct cb_ops zev_cb_ops = { 1359 zev_open, /* open */ 1360 zev_close, /* close */ 1361 nodev, /* strategy */ 1362 nodev, /* print */ 1363 nodev, /* dump */ 1364 zev_read, /* read */ 1365 nodev, /* write */ 1366 zev_ioctl, /* ioctl */ 1367 nodev, /* devmap */ 1368 nodev, /* mmap */ 1369 nodev, /* segmap */ 1370 zev_chpoll, /* chpoll */ 1371 ddi_prop_op, /* prop_op */ 1372 NULL, /* streamtab */ 1373 D_MP | D_64BIT, /* cb_flag */ 1374 CB_REV, /* cb_rev */ 1375 nodev, /* aread */ 1376 nodev, /* awrite */ 1377 }; 1378 1379 static void 1380 zev_free_instance(dev_info_t *dip) 1381 { 1382 int instance; 1383 zev_queue_t *q; 1384 int i; 1385 1386 instance = ddi_get_instance(dip); 1387 if (instance != 0) { 1388 cmn_err(CE_WARN, "zev: tried to free instance != 0 (%d)", 1389 instance); 1390 return; 1391 } 1392 1393 ddi_remove_minor_node(dip, NULL); 1394 devfs_clean(q->zq_dip, NULL, 0); 1395 1396 /* stop pollwakeup thread */ 1397 zev_wakeup_thread_run = 0; 1398 if (zev_poll_wakeup_thread != NULL) { 1399 thread_join(zev_poll_wakeup_thread->t_did); 1400 zev_poll_wakeup_thread = NULL; 1401 } 1402 1403 mutex_enter(&zev_mutex); 1404 1405 /* remove "ctrl" dummy queue */ 1406 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1407 if (q) { 1408 ddi_soft_state_free(statep, ZEV_CONTROL_DEVICE_MINOR); 1409 ZEV_MEM_SUB(sizeof(zev_queue_t)); 1410 } 1411 1412 /* remove all other queues */ 1413 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1414 q = zev_queues[i- ZEV_MINOR_MIN]; 1415 if (!q) 1416 continue; 1417 ASSERT(q->zq_refcnt == 1); 1418 zev_queue_release(q); 1419 } 1420 zev_queue_trim(); 1421 bzero(&zev_queues, sizeof(zev_queues)); 1422 1423 mutex_exit(&zev_mutex); 1424 1425 } 1426 1427 static int 1428 zev_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1429 { 1430 int instance; 1431 zev_queue_t *q; 1432 1433 /* called once per instance with DDI_DETACH, 1434 may be called to suspend */ 1435 switch (cmd) { 1436 case DDI_DETACH: 1437 /* instance busy? */ 1438 instance = ddi_get_instance(dip); 1439 if (instance != 0) { /* hardcoded in zev.conf */ 1440 /* this module only supports one instance. */ 1441 return (DDI_FAILURE); 1442 } 1443 1444 mutex_enter(&zev_mutex); 1445 if (!zev_attached) { 1446 mutex_exit(&zev_mutex); 1447 return (DDI_FAILURE); 1448 } 1449 1450 /* check "ctrl" queue to see if t is busy */ 1451 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1452 if (q == NULL) { 1453 mutex_exit(&zev_mutex); 1454 return (DDI_FAILURE); 1455 } 1456 if (q->zq_busy) { 1457 mutex_exit(&zev_mutex); 1458 return (DDI_FAILURE); 1459 } 1460 /* are there any queues? */ 1461 if (zev_queue_cnt > 0) { 1462 mutex_exit(&zev_mutex); 1463 return (DDI_FAILURE); 1464 } 1465 1466 zev_attached = B_FALSE; 1467 mutex_exit(&zev_mutex); 1468 1469 /* switch ZFS event callbacks back to default */ 1470 rw_enter(&rz_zev_rwlock, RW_WRITER); 1471 rz_zev_callbacks = rz_zev_default_callbacks; 1472 rz_zev_set_active(B_FALSE); 1473 rw_exit(&rz_zev_rwlock); 1474 1475 /* no thread is inside of the callbacks anymore. */ 1476 1477 /* free resources allocated for this instance */ 1478 zev_free_instance(dip); 1479 zev_chksum_fini(); 1480 #if 0 1481 cmn_err(CE_WARN, "zev: allocated memory at detach: %" PRIu64, 1482 zev_memory_allocated - zev_memory_freed); 1483 #endif 1484 return (DDI_SUCCESS); 1485 case DDI_SUSPEND: 1486 /* kernel must not suspend zev devices while ZFS is running */ 1487 return (DDI_FAILURE); 1488 default: 1489 return (DDI_FAILURE); 1490 } 1491 } 1492 1493 static int 1494 zev_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1495 { 1496 /* called once per instance with DDI_ATTACH, 1497 may be called to resume */ 1498 int instance; 1499 int error; 1500 zev_queue_t *q; 1501 switch (cmd) { 1502 case DDI_ATTACH: 1503 /* create instance state */ 1504 instance = ddi_get_instance(dip); 1505 if (instance != 0) { /* hardcoded in zev.conf */ 1506 /* this module only supports one instance. */ 1507 return (DDI_FAILURE); 1508 } 1509 1510 mutex_enter(&zev_mutex); 1511 if (zev_attached) { 1512 mutex_exit(&zev_mutex); 1513 return (DDI_FAILURE); 1514 } 1515 if (ddi_soft_state_zalloc(statep, ZEV_CONTROL_DEVICE_MINOR) != 1516 DDI_SUCCESS) { 1517 mutex_exit(&zev_mutex); 1518 return (DDI_FAILURE); 1519 } 1520 ZEV_MEM_ADD(sizeof(zev_queue_t)); 1521 zev_attached = B_TRUE; 1522 1523 /* init queue list */ 1524 bzero(&zev_queues, sizeof(zev_queues)); 1525 mutex_exit(&zev_mutex); 1526 1527 /* create a dummy queue for management of "ctrl" */ 1528 1529 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1530 q->zq_dip = dip; 1531 q->zq_refcnt = 1; 1532 q->zq_busy = B_FALSE; 1533 q->zq_minor_number = ZEV_CONTROL_DEVICE_MINOR; 1534 q->zq_flags = ZEV_FL_PERSISTENT; 1535 strcpy(q->zq_name, ZEV_CONTROL_DEVICE_NAME); 1536 1537 /* create device node for "ctrl" */ 1538 if (ddi_create_minor_node(dip, ZEV_CONTROL_DEVICE_NAME, 1539 S_IFCHR, ZEV_CONTROL_DEVICE_MINOR, 1540 DDI_PSEUDO, 0) == DDI_FAILURE) { 1541 goto fail; 1542 } 1543 1544 /* note: intentionally not adding ctrl queue to queue list. */ 1545 1546 /* default queue */ 1547 error = zev_queue_new(&q, dip, 1548 ZEV_DEFAULT_QUEUE_NAME, 1549 ZEV_MAX_QUEUE_LEN, 1550 ZEV_FL_BLOCK_WHILE_QUEUE_FULL| 1551 ZEV_FL_PERSISTENT); 1552 if (error) 1553 goto fail; 1554 1555 /* start pollwakeup thread */ 1556 zev_wakeup_thread_run = 1; 1557 zev_poll_wakeup_thread = thread_create(NULL, 0, 1558 zev_poll_wakeup_thread_main, NULL, 0, &p0, 1559 TS_RUN, minclsyspri); 1560 1561 ddi_report_dev(dip); 1562 1563 zev_chksum_init(); 1564 1565 /* switch ZFS event callbacks to zev module callbacks */ 1566 rw_enter(&rz_zev_rwlock, RW_WRITER); 1567 rz_zev_callbacks = &zev_callbacks; 1568 rz_zev_set_active(B_TRUE); 1569 rw_exit(&rz_zev_rwlock); 1570 1571 return (DDI_SUCCESS); 1572 case DDI_RESUME: 1573 /* suspendeding zev devices should never happen */ 1574 return (DDI_SUCCESS); 1575 default: 1576 return (DDI_FAILURE); 1577 } 1578 fail: 1579 cmn_err(CE_WARN, "zev: attach failed"); 1580 zev_free_instance(dip); 1581 mutex_enter(&zev_mutex); 1582 zev_attached = B_FALSE; 1583 mutex_exit(&zev_mutex); 1584 return (DDI_FAILURE); 1585 } 1586 1587 /* ARGSUSED */ 1588 static int 1589 zev_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp) 1590 { 1591 minor_t minor; 1592 zev_queue_t *q; 1593 1594 /* arg is dev_t */ 1595 minor = getminor((dev_t)arg); 1596 mutex_enter(&zev_mutex); 1597 q = ddi_get_soft_state(statep, minor); 1598 if (q == NULL) { 1599 *resultp = NULL; 1600 mutex_exit(&zev_mutex); 1601 return (DDI_FAILURE); 1602 } 1603 1604 switch (infocmd) { 1605 case DDI_INFO_DEVT2DEVINFO: 1606 *resultp = q->zq_dip; 1607 break; 1608 case DDI_INFO_DEVT2INSTANCE: 1609 *resultp = (void *)(uintptr_t)ddi_get_instance(q->zq_dip); 1610 break; 1611 default: 1612 mutex_exit(&zev_mutex); 1613 return (DDI_FAILURE); 1614 } 1615 mutex_exit(&zev_mutex); 1616 return (DDI_SUCCESS); 1617 } 1618 1619 static struct dev_ops zev_dev_ops = { 1620 DEVO_REV, /* driver build revision */ 1621 0, /* driver reference count */ 1622 zev_getinfo, /* getinfo */ 1623 nulldev, /* identify (obsolete) */ 1624 nulldev, /* probe (search for devices) */ 1625 zev_attach, /* attach */ 1626 zev_detach, /* detach */ 1627 nodev, /* reset (obsolete, use quiesce) */ 1628 &zev_cb_ops, /* character and block device ops */ 1629 NULL, /* bus driver ops */ 1630 NULL, /* power management, not needed */ 1631 ddi_quiesce_not_needed, /* quiesce */ 1632 }; 1633 1634 static struct modldrv zev_modldrv = { 1635 &mod_driverops, /* all loadable modules use this */ 1636 "ZFS event provider, v" 1637 XSTRING(ZEV_MAJOR_VERSION) "." 1638 XSTRING(ZEV_MINOR_VERSION), 1639 /* driver name and version info */ 1640 &zev_dev_ops /* ops method pointers */ 1641 }; 1642 1643 static struct modlinkage zev_modlinkage = { 1644 MODREV_1, /* fixed value */ 1645 { 1646 &zev_modldrv, /* driver linkage structure */ 1647 NULL /* list terminator */ 1648 } 1649 }; 1650 1651 int 1652 _init(void) 1653 { 1654 int error; 1655 1656 if ((error = ddi_soft_state_init(&statep, sizeof(zev_queue_t), 1)) != 0) 1657 return (error); 1658 zev_attached = B_FALSE; 1659 1660 zev_queue_head = NULL; 1661 zev_queue_tail = NULL; 1662 zev_queue_len = 0; 1663 zev_muted_pools_head = NULL; 1664 zev_memory_allocated = 0; 1665 zev_memory_freed = 0; 1666 zev_queue_cnt = 0; 1667 zev_have_blocking_queues = 1; 1668 1669 mutex_init(&zev_mutex, NULL, MUTEX_DRIVER, NULL); 1670 cv_init(&zev_condvar, NULL, CV_DRIVER, NULL); 1671 rw_init(&zev_pool_list_rwlock, NULL, RW_DRIVER, NULL); 1672 mutex_init(&zev_mark_id_mutex, NULL, MUTEX_DRIVER, NULL); 1673 zev_mark_id = gethrtime(); 1674 mutex_init(&zev_queue_msg_mutex, NULL, MUTEX_DRIVER, NULL); 1675 zev_msg_sequence_number = gethrtime(); 1676 bzero(&zev_statistics, sizeof(zev_statistics)); 1677 bzero(&zev_pollhead, sizeof(zev_pollhead)); 1678 bzero(&zev_queues, sizeof(zev_queues)); 1679 zev_statistics.zev_max_queue_len = ZEV_MAX_QUEUE_LEN; 1680 if (zev_ioc_mute_pool("zg0")) { 1681 cmn_err(CE_WARN, "zev: could not init mute list"); 1682 goto FAIL; 1683 } 1684 1685 if ((error = mod_install(&zev_modlinkage)) != 0) { 1686 cmn_err(CE_WARN, "zev: could not install module"); 1687 goto FAIL; 1688 } 1689 1690 return (0); 1691 FAIL: 1692 /* free resources */ 1693 cmn_err(CE_WARN, "zev: _init failed"); 1694 mutex_destroy(&zev_mutex); 1695 ddi_soft_state_fini(&statep); 1696 return (error); 1697 } 1698 1699 int 1700 _info(struct modinfo *modinfop) 1701 { 1702 return (mod_info(&zev_modlinkage, modinfop)); 1703 } 1704 1705 int 1706 _fini(void) 1707 { 1708 int error = 0; 1709 zev_msg_t *msg; 1710 zev_pool_list_entry_t *pe, *npe; 1711 1712 mutex_enter(&zev_mutex); 1713 if (zev_attached == B_TRUE) { 1714 mutex_exit(&zev_mutex); 1715 return (SET_ERROR(EBUSY)); 1716 } 1717 if (zev_queue_cnt != 0) { 1718 /* should never happen */ 1719 mutex_exit(&zev_mutex); 1720 return (SET_ERROR(EBUSY)); 1721 } 1722 1723 /* 1724 * avoid deadlock if event list is full: make sure threads currently 1725 * blocking on the event list can append their event and then release 1726 * rz_zev_rwlock. Since there should be no queues left when we 1727 * reach this point we can simply empty the event list and then 1728 * wake everybody. 1729 */ 1730 while (zev_queue_head) { 1731 msg = zev_queue_head; 1732 zev_queue_head = msg->next; 1733 zev_free(msg, sizeof(*msg) + msg->size); 1734 } 1735 cv_broadcast(&zev_condvar); 1736 mutex_exit(&zev_mutex); 1737 1738 /* switch ZFS event callbacks back to default (again) */ 1739 rw_enter(&rz_zev_rwlock, RW_WRITER); 1740 rz_zev_callbacks = rz_zev_default_callbacks; 1741 rz_zev_set_active(B_FALSE); 1742 rw_exit(&rz_zev_rwlock); 1743 1744 /* no thread is inside of the callbacks anymore. Safe to remove. */ 1745 1746 /* unload module callbacks */ 1747 if ((error = mod_remove(&zev_modlinkage)) != 0) { 1748 cmn_err(CE_WARN, "mod_remove failed: %d", error); 1749 return (error); 1750 } 1751 1752 /* free resources */ 1753 mutex_enter(&zev_mutex); 1754 while (zev_queue_head) { 1755 msg = zev_queue_head; 1756 zev_queue_head = msg->next; 1757 zev_free(msg, sizeof(*msg) + msg->size); 1758 } 1759 mutex_exit(&zev_mutex); 1760 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 1761 pe = zev_muted_pools_head; 1762 while (pe) { 1763 npe = pe; 1764 pe = pe->next; 1765 zev_free(npe, sizeof(*npe)); 1766 } 1767 rw_exit(&zev_pool_list_rwlock); 1768 ddi_soft_state_fini(&statep); 1769 rw_destroy(&zev_pool_list_rwlock); 1770 cv_destroy(&zev_condvar); 1771 mutex_destroy(&zev_mutex); 1772 mutex_destroy(&zev_mark_id_mutex); 1773 mutex_destroy(&zev_queue_msg_mutex); 1774 1775 return (0); 1776 } 1777 1778