1 #include <sys/modctl.h> 2 #include <sys/ddi.h> 3 #include <sys/sunddi.h> 4 #include <sys/conf.h> 5 #include <sys/devops.h> 6 #include <sys/stat.h> 7 #include <sys/fs/zev.h> 8 #include <sys/zev_callbacks.h> 9 #include <sys/zev_checksums.h> 10 #include <sys/zfs_znode.h> 11 #include <sys/time.h> 12 #include <sys/sa.h> 13 #include <sys/zap.h> 14 #include <sys/time.h> 15 #include <sys/fs/dv_node.h> 16 17 #define OFFSETOF(s, m) ((size_t)(&(((s *)0)->m))) 18 19 #define XSTRING(x) STRING(x) 20 #define STRING(x) #x 21 22 #define ZEV_DEFAULT_QUEUE_NAME "beaver" 23 #define ZEV_CONTROL_DEVICE_MINOR 0 24 #define ZEV_MINOR_MIN (ZEV_CONTROL_DEVICE_MINOR + 1) 25 #define ZEV_MINOR_MAX (ZEV_MINOR_MIN + ZEV_MAX_QUEUES - 1) 26 27 typedef struct zev_queue { 28 char zq_name[ZEV_MAX_QUEUE_NAME_LEN+1]; 29 minor_t zq_minor_number; 30 dev_info_t *zq_dip; 31 struct pollhead zq_pollhead; 32 uint64_t zq_bytes_read; 33 uint64_t zq_events_read; 34 uint64_t zq_bytes_discarded; 35 uint64_t zq_events_discarded; 36 uint64_t zq_bytes_total; 37 uint64_t zq_events_total; 38 uint64_t zq_wakeup_threshold; 39 uint16_t zq_flags; 40 uint16_t zq_need_wakeup; 41 /* protected by zev_mutex */ 42 int zq_refcnt; 43 uint64_t zq_queue_len; 44 uint64_t zq_queue_messages; 45 uint64_t zq_max_queue_len; 46 zev_msg_t *zq_oldest; 47 boolean_t zq_busy; 48 boolean_t zq_to_be_removed; 49 zev_statistics_t zq_statistics; 50 kcondvar_t zq_condvar; 51 } zev_queue_t; 52 53 static void *statep; 54 struct pollhead zev_pollhead; 55 56 kmutex_t zev_mutex; 57 kcondvar_t zev_condvar; 58 kmutex_t zev_queue_msg_mutex; 59 krwlock_t zev_pool_list_rwlock; 60 static zev_statistics_t zev_statistics; 61 static boolean_t zev_attached; 62 static kmutex_t zev_mark_id_mutex; 63 static uint64_t zev_mark_id = 0; 64 65 static uint64_t zev_msg_sequence_number = 0; 66 static zev_queue_t *zev_queues[ZEV_MAX_QUEUES]; 67 static int zev_queue_cnt = 0; 68 static int zev_have_blocking_queues = 1; 69 70 uint64_t zev_memory_allocated = 0; 71 uint64_t zev_memory_freed = 0; 72 73 /* 74 * The longest potential message is from zev_zfs_mount() and 75 * contains the mountpoint, which might be close to MAXPATHLEN bytes long. 76 * 77 * Another candidate is zev_znode_rename_cb() and contains three inode 78 * numbers and two filenames of up to MAXNAMELEN bytes each. 79 */ 80 #define ZEV_MAX_MESSAGE_LEN 4096 81 82 static zev_msg_t *zev_queue_head = NULL; 83 static zev_msg_t *zev_queue_tail = NULL; 84 static uint64_t zev_queue_len = 0; 85 86 87 typedef struct zev_pool_list_entry { 88 struct zev_pool_list_entry *next; 89 char name[MAXPATHLEN]; 90 } zev_pool_list_entry_t; 91 92 static zev_pool_list_entry_t *zev_muted_pools_head = NULL; 93 94 static volatile int zev_wakeup_thread_run = 1; 95 static kthread_t *zev_poll_wakeup_thread = NULL; 96 97 void * 98 zev_alloc(ssize_t sz) 99 { 100 ZEV_MEM_ADD(sz); 101 return kmem_alloc(sz, KM_SLEEP); 102 } 103 104 void * 105 zev_zalloc(ssize_t sz) 106 { 107 ZEV_MEM_ADD(sz); 108 return kmem_zalloc(sz, KM_SLEEP); 109 } 110 111 void 112 zev_free(void *ptr, ssize_t sz) 113 { 114 ZEV_MEM_SUB(sz); \ 115 kmem_free(ptr, sz); 116 } 117 118 /* must be called with zev_mutex held */ 119 static void 120 zev_update_blockflag(void) 121 { 122 zev_queue_t *q; 123 int had_blocking_queues; 124 int i; 125 126 had_blocking_queues = zev_have_blocking_queues; 127 128 /* do we still have blocking queues? */ 129 zev_have_blocking_queues = 0; 130 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 131 q = zev_queues[i - ZEV_MINOR_MIN]; 132 if (!q) 133 continue; 134 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) { 135 zev_have_blocking_queues = 1; 136 break; 137 } 138 } 139 /* no blocking queues */ 140 if (had_blocking_queues) 141 cv_broadcast(&zev_condvar); 142 } 143 144 int 145 zev_queue_cmp(const void *a, const void *b) 146 { 147 const zev_queue_t *qa = a; 148 const zev_queue_t *qb = b; 149 if (qa->zq_minor_number > qb->zq_minor_number) 150 return 1; 151 if (qa->zq_minor_number < qb->zq_minor_number) 152 return -1; 153 return 0; 154 } 155 156 /* must be called with zev_mutex held */ 157 void 158 zev_queue_trim(void) 159 { 160 zev_msg_t *m; 161 uint64_t oldest_message; 162 zev_queue_t *q; 163 int i; 164 165 if (!zev_queue_tail) 166 return; 167 168 oldest_message = zev_queue_tail->seq + 1; /* does not exist, yet. */ 169 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 170 q = zev_queues[i - ZEV_MINOR_MIN]; 171 if (q == NULL) 172 continue; 173 if (!q->zq_oldest) 174 continue; 175 if (oldest_message > q->zq_oldest->seq) 176 oldest_message = q->zq_oldest->seq; 177 } 178 179 /* remove msgs between oldest_message and zev_queue_head */ 180 while(zev_queue_head && (oldest_message > zev_queue_head->seq)) { 181 m = zev_queue_head; 182 zev_queue_head = m->next; 183 if (zev_queue_head == NULL) { 184 zev_queue_tail = NULL; 185 } else { 186 zev_queue_head->prev = NULL; 187 } 188 if (m->read == 0) { 189 zev_statistics.zev_bytes_discarded += m->size; 190 zev_statistics.zev_cnt_discarded_events++; 191 } 192 zev_statistics.zev_queue_len -= m->size; 193 zev_queue_len--; 194 zev_free(m, sizeof(*m) + m->size); 195 } 196 } 197 198 /* must be called with zev_mutex held */ 199 static void 200 zev_queue_hold(zev_queue_t *q) 201 { 202 q->zq_refcnt++; 203 } 204 205 /* must be called with zev_mutex held */ 206 static void 207 zev_queue_release(zev_queue_t *q) 208 { 209 q->zq_refcnt--; 210 if (q->zq_refcnt > 0) 211 return; 212 213 ASSERT(q->zq_busy == B_FALSE); 214 215 /* persistent queues will not be removed */ 216 if ((q->zq_flags & ZEV_FL_PERSISTENT) != 0) 217 return; 218 219 /* remove queue from queue list */ 220 zev_queues[q->zq_minor_number - ZEV_MINOR_MIN] = NULL; 221 222 /* discard messages that no queue references anymore */ 223 zev_queue_trim(); 224 225 cv_destroy(&q->zq_condvar); 226 ddi_remove_minor_node(q->zq_dip, q->zq_name); 227 devfs_clean(q->zq_dip, NULL, 0); 228 ddi_soft_state_free(statep, q->zq_minor_number); 229 ZEV_MEM_SUB(sizeof(zev_queue_t)); 230 zev_queue_cnt--; 231 zev_update_blockflag(); 232 } 233 234 int 235 zev_queue_new(zev_queue_t **queue, 236 dev_info_t *dip, 237 char *name, 238 uint64_t max_queue_len, 239 uint16_t flags) 240 { 241 zev_queue_t *q; 242 zev_queue_t *tmp; 243 zev_msg_t *msg; 244 int name_exists = 0; 245 minor_t minor; 246 char *p; 247 int i; 248 249 if (max_queue_len > ZEV_MAX_QUEUE_LEN) 250 return EINVAL; 251 if (max_queue_len == 0) 252 max_queue_len = ZEV_MAX_QUEUE_LEN; 253 if (!strcmp(name, ZEV_CONTROL_DEVICE_NAME)) 254 return EINVAL; 255 for (p = name; *p; p++) { 256 if (*p >= 'a' && *p <= 'z') 257 continue; 258 if (*p >= '0' && *p <= '9') 259 continue; 260 if (*p == '.') 261 continue; 262 return EINVAL; 263 } 264 265 mutex_enter(&zev_mutex); 266 267 /* find free minor number.*/ 268 /* if this were a frequent operation we'd have a free-minor list */ 269 for (minor = ZEV_MINOR_MIN; minor <= ZEV_MINOR_MAX; minor++) { 270 tmp = zev_queues[minor - ZEV_MINOR_MIN]; 271 if (tmp == NULL) 272 break; 273 } 274 if (tmp) { 275 mutex_exit(&zev_mutex); 276 return ENOSPC; 277 } 278 279 if (ddi_soft_state_zalloc(statep, minor) != DDI_SUCCESS) { 280 mutex_exit(&zev_mutex); 281 return ENOSPC; 282 } 283 ZEV_MEM_ADD(sizeof(zev_queue_t)); 284 285 q = ddi_get_soft_state(statep, minor); 286 memset(q, 0, sizeof(*q)); 287 strncpy(q->zq_name, name, ZEV_MAX_QUEUE_NAME_LEN); 288 q->zq_name[ZEV_MAX_QUEUE_NAME_LEN] = '\0'; 289 q->zq_max_queue_len = max_queue_len; 290 q->zq_wakeup_threshold = ZEV_DEFAULT_POLL_WAKEUP_QUEUE_LEN; 291 q->zq_flags = flags; 292 q->zq_refcnt = 1; 293 q->zq_dip = dip; 294 q->zq_minor_number = minor; 295 cv_init(&q->zq_condvar, NULL, CV_DRIVER, NULL); 296 297 /* insert into queue list */ 298 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 299 /* if this were a frequent operation we'd have a name tree */ 300 if (zev_queues[i - ZEV_MINOR_MIN] == NULL) 301 continue; 302 if (!strcmp(q->zq_name, zev_queues[i-ZEV_MINOR_MIN]->zq_name)) { 303 name_exists = 1; 304 break; 305 } 306 } 307 if (name_exists) { 308 ddi_soft_state_free(statep, minor); 309 ZEV_MEM_SUB(sizeof(zev_queue_t)); 310 mutex_exit(&zev_mutex); 311 return EEXIST; 312 } 313 zev_queues[minor - ZEV_MINOR_MIN] = q; 314 zev_queue_cnt++; 315 316 /* calculate current queue len and find head and tail */ 317 if (!(q->zq_flags & ZEV_FL_INITIALLY_EMPTY)) { 318 q->zq_oldest = zev_queue_tail; 319 msg = zev_queue_tail; 320 while ((msg) && (q->zq_queue_len < q->zq_max_queue_len)) { 321 q->zq_queue_len += msg->size; 322 q->zq_queue_messages++; 323 q->zq_oldest = msg; 324 msg = msg->prev; 325 } 326 } 327 328 zev_update_blockflag(); 329 330 mutex_exit(&zev_mutex); 331 332 if (ddi_create_minor_node(dip, name, 333 S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) { 334 mutex_enter(&zev_mutex); 335 zev_queues[minor - ZEV_MINOR_MIN] = NULL; 336 zev_queue_cnt--; 337 ddi_soft_state_free(statep, minor); 338 ZEV_MEM_SUB(sizeof(zev_queue_t)); 339 zev_update_blockflag(); 340 mutex_exit(&zev_mutex); 341 return EFAULT; 342 } 343 344 *queue = q; 345 return 0; 346 } 347 348 /* 349 * poll() wakeup thread. Used to check periodically whether we have 350 * bytes left in the queue that have not yet been made into a 351 * pollwakeup() call. This is meant to insure a maximum waiting 352 * time until an event is presented as a poll wakeup, while at 353 * the same time not making every single event into a poll wakeup 354 * of it's own. 355 */ 356 357 static void 358 zev_poll_wakeup(boolean_t flush_all) 359 { 360 zev_queue_t *q; 361 int i; 362 363 /* 364 * This loop works with hold() and release() because 365 * pollwakeup() requires us to release our locks before calling it. 366 * 367 * from pollwakeup(9F): 368 * 369 * "Driver defined locks should not be held across calls 370 * to this function." 371 */ 372 373 /* wake up threads for each individual queue */ 374 mutex_enter(&zev_mutex); 375 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 376 q = zev_queues[i - ZEV_MINOR_MIN]; 377 if (q == NULL) 378 continue; 379 if (!q->zq_busy) 380 continue; 381 if (!q->zq_queue_len) 382 continue; 383 if ((flush_all) || 384 (q->zq_queue_len > q->zq_wakeup_threshold)) { 385 zev_queue_hold(q); 386 mutex_exit(&zev_mutex); 387 pollwakeup(&q->zq_pollhead, POLLIN); 388 mutex_enter(&zev_mutex); 389 zev_queue_release(q); 390 } 391 } 392 mutex_exit(&zev_mutex); 393 } 394 395 static void 396 zev_poll_wakeup_thread_main(void) 397 { 398 while (zev_wakeup_thread_run) { 399 delay(drv_usectohz(100 * 1000)); /* sleep 100ms */ 400 401 zev_poll_wakeup(B_TRUE); 402 } 403 thread_exit(); 404 } 405 406 static int 407 zev_ioc_mute_pool(char *poolname) 408 { 409 zev_pool_list_entry_t *pe; 410 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 411 /* pool already muted? */ 412 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 413 if (!strcmp(pe->name, poolname)) { 414 rw_exit(&zev_pool_list_rwlock); 415 return EEXIST; 416 } 417 } 418 pe = zev_zalloc(sizeof(*pe)); 419 if (!pe) { 420 rw_exit(&zev_pool_list_rwlock); 421 return ENOMEM; 422 } 423 (void) strncpy(pe->name, poolname, sizeof(pe->name)); 424 pe->next = zev_muted_pools_head; 425 zev_muted_pools_head = pe; 426 rw_exit(&zev_pool_list_rwlock); 427 return (0); 428 } 429 430 static int 431 zev_ioc_unmute_pool(char *poolname) 432 { 433 zev_pool_list_entry_t *pe, *peprev; 434 435 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 436 /* pool muted? */ 437 peprev = NULL; 438 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 439 if (!strcmp(pe->name, poolname)) 440 break; 441 peprev = pe; 442 } 443 if (pe) { 444 rw_exit(&zev_pool_list_rwlock); 445 return ENOENT; 446 } 447 448 if (peprev != NULL) { 449 peprev->next = pe->next; 450 } else { 451 zev_muted_pools_head = pe->next; 452 } 453 zev_free(pe, sizeof(*pe)); 454 rw_exit(&zev_pool_list_rwlock); 455 return (0); 456 } 457 458 int 459 zev_skip_pool(objset_t *os) 460 { 461 zev_pool_list_entry_t *pe; 462 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool; 463 rw_enter(&zev_pool_list_rwlock, RW_READER); 464 for (pe=zev_muted_pools_head; pe; pe=pe->next) { 465 if (!strcmp(pe->name, dp->dp_spa->spa_name)) { 466 rw_exit(&zev_pool_list_rwlock); 467 return 1; 468 } 469 } 470 rw_exit(&zev_pool_list_rwlock); 471 return 0; 472 } 473 474 int 475 zev_skip_fs(zfsvfs_t *fs) 476 { 477 dsl_dir_t *d = fs->z_os->os_dsl_dataset->ds_dir; 478 dsl_dir_t *prev = NULL; 479 480 while (d && d != prev) { 481 if (strstr(d->dd_myname, "_root")) 482 return 0; 483 prev = d; 484 d = d->dd_parent; 485 } 486 return 1; 487 } 488 489 static void 490 zev_update_statistics(int op, zev_statistics_t *stat) 491 { 492 switch (op) { 493 case ZEV_OP_ERROR: 494 stat->zev_cnt_errors++; 495 break; 496 case ZEV_OP_MARK: 497 stat->zev_cnt_marks++; 498 break; 499 case ZEV_OP_ZFS_MOUNT: 500 stat->zev_cnt_zfs_mount++; 501 break; 502 case ZEV_OP_ZFS_UMOUNT: 503 stat->zev_cnt_zfs_umount++; 504 break; 505 case ZEV_OP_ZVOL_WRITE: 506 stat->zev_cnt_zvol_write++; 507 break; 508 case ZEV_OP_ZVOL_TRUNCATE: 509 stat->zev_cnt_zvol_truncate++; 510 break; 511 case ZEV_OP_ZNODE_CLOSE_AFTER_UPDATE: 512 stat->zev_cnt_znode_close_after_update++; 513 break; 514 case ZEV_OP_ZNODE_CREATE: 515 stat->zev_cnt_znode_create++; 516 break; 517 case ZEV_OP_ZNODE_REMOVE: 518 stat->zev_cnt_znode_remove++; 519 break; 520 case ZEV_OP_ZNODE_LINK: 521 stat->zev_cnt_znode_link++; 522 break; 523 case ZEV_OP_ZNODE_SYMLINK: 524 stat->zev_cnt_znode_symlink++; 525 break; 526 case ZEV_OP_ZNODE_RENAME: 527 stat->zev_cnt_znode_rename++; 528 break; 529 case ZEV_OP_ZNODE_WRITE: 530 stat->zev_cnt_znode_write++; 531 break; 532 case ZEV_OP_ZNODE_TRUNCATE: 533 stat->zev_cnt_znode_truncate++; 534 break; 535 case ZEV_OP_ZNODE_SETATTR: 536 stat->zev_cnt_znode_setattr++; 537 break; 538 case ZEV_OP_ZNODE_ACL: 539 stat->zev_cnt_znode_acl++; 540 break; 541 } 542 } 543 544 void 545 zev_queue_message(int op, zev_msg_t *msg) 546 { 547 zev_queue_t *q; 548 int wakeup = 0; 549 zev_msg_t *m; 550 int i; 551 552 msg->next = NULL; 553 msg->prev = NULL; 554 msg->read = 0; 555 556 if (op < ZEV_OP_MIN || op > ZEV_OP_MAX) { 557 zev_queue_error(op, "unknown op id encountered: %d", op); 558 zev_free(msg, sizeof(*msg) + msg->size); 559 return; 560 } 561 562 /* 563 * This mutex protects us agains race conditions when several 564 * threads want to queue a message and one or more queues are 565 * full: we release zev_mutex to wait for the queues to become 566 * less-than-full, but we don't know in which order the waiting 567 * threads will be awoken. If it's not the same order in which 568 * they went to sleep we might mark different messages as "newest" 569 * in different queues, and so we might have dupes or even 570 * skip messages. 571 */ 572 mutex_enter(&zev_queue_msg_mutex); 573 574 mutex_enter(&zev_mutex); 575 576 /* 577 * When the module is loaded, the default behavior ist to 578 * put all events into a queue and block if the queue is full. 579 * This is done even before the pseudo device is attached. 580 * This way, no events are lost. 581 * 582 * To discard events entirely the "beaver" queue, 583 * which never discards anything, has to be removed. 584 */ 585 586 if (zev_queue_cnt == 0) { 587 mutex_exit(&zev_mutex); 588 mutex_exit(&zev_queue_msg_mutex); 589 return; 590 } 591 592 /* put message into global queue */ 593 msg->seq = zev_msg_sequence_number++; 594 595 /* do we need to make room? */ 596 while (zev_statistics.zev_max_queue_len && 597 zev_statistics.zev_queue_len > zev_statistics.zev_max_queue_len) { 598 599 if (zev_have_blocking_queues) { 600 /* queue full. block until it's been shrunk. */ 601 cv_wait(&zev_condvar, &zev_mutex); 602 continue; 603 } 604 605 /* discard events until this message fits into all queues */ 606 607 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 608 q = zev_queues[i - ZEV_MINOR_MIN]; 609 if (!q) 610 continue; 611 /* discard msgs until queue is small enough */ 612 while (q->zq_queue_len && 613 q->zq_queue_len > q->zq_max_queue_len) { 614 m = q->zq_oldest; 615 if (m == NULL) 616 break; 617 q->zq_events_discarded++; 618 q->zq_bytes_discarded += m->size; 619 q->zq_oldest = m->next; 620 q->zq_queue_len -= m->size; 621 q->zq_queue_messages--; 622 } 623 } 624 625 zev_queue_trim(); 626 ASSERT(zev_statistics.zev_queue_len == 0 || 627 zev_statistics.zev_queue_len <= 628 zev_statistics.zev_max_queue_len); 629 } 630 631 if (zev_queue_tail == NULL) { 632 zev_queue_head = zev_queue_tail = msg; 633 } else { 634 zev_queue_tail->next = msg; 635 msg->prev = zev_queue_tail; 636 zev_queue_tail = msg; 637 } 638 zev_queue_len++; 639 zev_statistics.zev_cnt_total_events++; 640 zev_statistics.zev_queue_len += msg->size; 641 642 /* update per-device queues */ 643 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 644 q = zev_queues[i - ZEV_MINOR_MIN]; 645 if (!q) 646 continue; 647 648 zev_queue_hold(q); 649 650 /* make sure queue has enough room */ 651 while (q->zq_max_queue_len && 652 q->zq_queue_len > q->zq_max_queue_len) { 653 654 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) { 655 /* block until queue has been shrunk. */ 656 cv_wait(&zev_condvar, &zev_mutex); 657 } else { 658 /* discard msgs until queue is small enough */ 659 while (q->zq_queue_len > q->zq_max_queue_len) { 660 m = q->zq_oldest; 661 if (m == NULL) 662 break; 663 q->zq_events_discarded++; 664 q->zq_bytes_discarded += m->size; 665 q->zq_oldest = m->next; 666 q->zq_queue_len -= m->size; 667 q->zq_queue_messages--; 668 } 669 } 670 } 671 672 /* register new message at the end of the queue */ 673 q->zq_queue_len += msg->size; 674 q->zq_queue_messages++; 675 q->zq_bytes_total += msg->size; 676 q->zq_events_total++; 677 if (q->zq_oldest == NULL) 678 q->zq_oldest = msg; 679 680 zev_update_statistics(op, &q->zq_statistics); 681 682 if (q->zq_queue_len > q->zq_wakeup_threshold) 683 wakeup = 1; 684 if (q->zq_queue_len == msg->size) /* queue was empty */ 685 cv_broadcast(&q->zq_condvar); 686 687 zev_queue_release(q); 688 } 689 690 zev_queue_trim(); 691 692 zev_update_statistics(op, &zev_statistics); 693 mutex_exit(&zev_mutex); 694 mutex_exit(&zev_queue_msg_mutex); 695 696 /* one or more queues need a pollwakeup() */ 697 if (op == ZEV_OP_MARK) { 698 zev_poll_wakeup(B_TRUE); 699 } else if (wakeup) { 700 zev_poll_wakeup(B_FALSE); 701 } 702 703 return; 704 } 705 706 void 707 zev_queue_error(int op, char *fmt, ...) 708 { 709 char buf[ZEV_MAX_MESSAGE_LEN]; 710 va_list ap; 711 int len; 712 zev_msg_t *msg = NULL; 713 zev_error_t *rec; 714 int msg_size; 715 716 va_start(ap, fmt); 717 len = vsnprintf(buf, sizeof(buf), fmt, ap); 718 va_end(ap); 719 if (len >= sizeof(buf)) { 720 cmn_err(CE_WARN, "zev: can't report error - " 721 "dropping event entirely."); 722 return; 723 } 724 725 msg_size = sizeof(*rec) + len + 1; 726 msg = zev_alloc(sizeof(*msg) + msg_size); 727 msg->size = msg_size; 728 rec = (zev_error_t *)(msg + 1); 729 rec->record_len = msg_size; 730 rec->op = ZEV_OP_ERROR; 731 rec->op_time = ddi_get_time(); 732 rec->guid = 0; 733 rec->failed_op = op; 734 rec->errstr_len = len; 735 (void) memcpy(ZEV_ERRSTR(rec), buf, len + 1); 736 737 zev_queue_message(ZEV_OP_ERROR, msg); 738 return; 739 } 740 741 static int 742 zev_find_queue(zev_queue_t **out, zev_queue_t *req_q, zev_queue_name_t *name) 743 { 744 char namebuf[ZEV_MAX_QUEUE_NAME_LEN+1]; 745 zev_queue_t *q; 746 int i; 747 748 *out = NULL; 749 750 if (name->zev_namelen == 0) { 751 if (req_q->zq_minor_number == ZEV_CONTROL_DEVICE_MINOR) 752 return EINVAL; 753 zev_queue_hold(req_q); 754 *out = req_q; 755 return 0; 756 } 757 758 if (name->zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 759 return EINVAL; 760 strncpy(namebuf, name->zev_name, name->zev_namelen); 761 namebuf[name->zev_namelen] = '\0'; 762 763 mutex_enter(&zev_mutex); 764 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 765 q = zev_queues[i - ZEV_MINOR_MIN]; 766 if (!q) 767 continue; 768 if (!strcmp(q->zq_name, namebuf)) { 769 zev_queue_hold(q); 770 mutex_exit(&zev_mutex); 771 *out = q; 772 return 0; 773 } 774 } 775 mutex_exit(&zev_mutex); 776 return ENOENT; 777 } 778 779 static int 780 zev_ioc_get_queue_statistics(zev_queue_t *req_q, intptr_t arg, int mode) 781 { 782 zev_ioctl_get_queue_statistics_t gs; 783 zev_queue_t *q; 784 int ret; 785 786 if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0) 787 return EFAULT; 788 789 ret = zev_find_queue(&q, req_q, &gs.zev_queue_name); 790 if (ret) 791 return ret; 792 793 /* ddi_copyout() can take a long time. Better make 794 a copy to be able to release the mutex faster. */ 795 mutex_enter(&zev_mutex); 796 memcpy(&gs.zev_statistics, &q->zq_statistics,sizeof(gs.zev_statistics)); 797 gs.zev_statistics.zev_queue_len = q->zq_queue_len; 798 gs.zev_statistics.zev_bytes_read = q->zq_bytes_read; 799 gs.zev_statistics.zev_bytes_discarded = q->zq_bytes_discarded; 800 gs.zev_statistics.zev_max_queue_len = q->zq_max_queue_len; 801 gs.zev_statistics.zev_cnt_discarded_events = q->zq_events_discarded; 802 gs.zev_statistics.zev_cnt_total_events = q->zq_events_total; 803 zev_queue_release(q); 804 mutex_exit(&zev_mutex); 805 806 if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0) 807 return EFAULT; 808 return 0; 809 } 810 811 static int 812 zev_ioc_set_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 813 { 814 zev_ioctl_set_queue_properties_t qp; 815 zev_queue_t *q; 816 uint64_t old_max; 817 uint64_t old_flags; 818 int ret; 819 820 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 821 return EFAULT; 822 if (qp.zev_max_queue_len > ZEV_MAX_QUEUE_LEN) 823 return EINVAL; 824 if (qp.zev_poll_wakeup_threshold > ZEV_MAX_POLL_WAKEUP_QUEUE_LEN) 825 return EINVAL; 826 827 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 828 if (ret) 829 return ret; 830 831 mutex_enter(&zev_mutex); 832 833 /* 834 * Note: if the PERSISTENT flag is cleared, and the queue is not busy, 835 * the queue should be removed by zev_queue_release() in zev_ioctl(). 836 */ 837 old_flags = qp.zev_flags; 838 q->zq_flags = qp.zev_flags; 839 if ((old_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) && 840 (!(qp.zev_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL))) { 841 /* queue is no longer blocking - wake blocked threads */ 842 cv_broadcast(&zev_condvar); 843 } 844 845 zev_update_blockflag(); 846 847 old_max = q->zq_max_queue_len; 848 q->zq_max_queue_len = qp.zev_max_queue_len; 849 if (q->zq_max_queue_len < old_max) 850 zev_queue_trim(); 851 if (q->zq_max_queue_len > old_max) 852 cv_broadcast(&zev_condvar); /* threads may be waiting */ 853 854 if ((qp.zev_poll_wakeup_threshold < q->zq_wakeup_threshold) && 855 (qp.zev_poll_wakeup_threshold <= q->zq_queue_len)) 856 pollwakeup(&q->zq_pollhead, POLLIN); 857 q->zq_wakeup_threshold = qp.zev_poll_wakeup_threshold; 858 859 zev_queue_release(q); 860 mutex_exit(&zev_mutex); 861 return 0; 862 } 863 864 static int 865 zev_ioc_get_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode) 866 { 867 zev_ioctl_get_queue_properties_t qp; 868 zev_queue_t *q; 869 int ret; 870 871 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0) 872 return EFAULT; 873 874 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name); 875 if (ret) 876 return ret; 877 878 mutex_enter(&zev_mutex); 879 qp.zev_max_queue_len = q->zq_max_queue_len; 880 qp.zev_flags = q->zq_flags; 881 qp.zev_poll_wakeup_threshold = q->zq_wakeup_threshold; 882 zev_queue_release(q); 883 mutex_exit(&zev_mutex); 884 885 if (ddi_copyout(&qp, (void *)arg, sizeof(qp), mode) != 0) 886 return EFAULT; 887 return 0; 888 } 889 890 static int 891 zev_ioc_add_queue(zev_queue_t *req_q, intptr_t arg, int mode) 892 { 893 zev_ioctl_add_queue_t aq; 894 zev_queue_t *new_q; 895 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 896 897 if (ddi_copyin((void *)arg, &aq, sizeof(aq), mode) != 0) 898 return EFAULT; 899 900 if (aq.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 901 return EINVAL; 902 strncpy(name, aq.zev_name, aq.zev_namelen); 903 name[aq.zev_namelen] = '\0'; 904 905 return zev_queue_new(&new_q, req_q->zq_dip, name, 906 aq.zev_max_queue_len, aq.zev_flags); 907 } 908 909 static int 910 zev_ioc_remove_queue(zev_queue_t *req_q, intptr_t arg, int mode) 911 { 912 zev_ioctl_remove_queue_t rq; 913 zev_queue_t *q; 914 char name[ZEV_MAX_QUEUE_NAME_LEN+1]; 915 int found = 0; 916 int i; 917 918 if (ddi_copyin((void *)arg, &rq, sizeof(rq), mode) != 0) 919 return EFAULT; 920 921 if (rq.zev_queue_name.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN) 922 return EINVAL; 923 strncpy(name, rq.zev_queue_name.zev_name, 924 rq.zev_queue_name.zev_namelen); 925 name[rq.zev_queue_name.zev_namelen] = '\0'; 926 927 mutex_enter(&zev_mutex); 928 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 929 q = zev_queues[i - ZEV_MINOR_MIN]; 930 if (!q) 931 continue; 932 if (!strcmp(q->zq_name, name)) { 933 found = 1; 934 break; 935 } 936 } 937 if (!found) { 938 mutex_exit(&zev_mutex); 939 return ENOENT; 940 } 941 942 if (q->zq_busy) { 943 mutex_exit(&zev_mutex); 944 return EBUSY; 945 } 946 /* 947 * clear flags, so that persistent queues are removed aswell 948 * and the queue becomes non-blocking. 949 */ 950 q->zq_flags = 0; 951 if (q->zq_to_be_removed == B_FALSE) { 952 q->zq_to_be_removed = B_TRUE; 953 zev_queue_release(q); 954 } 955 /* some threads might be waiting for this queue to become writable */ 956 cv_broadcast(&zev_condvar); 957 958 mutex_exit(&zev_mutex); 959 return 0; 960 } 961 962 static int 963 zev_ioc_get_debug_info(zev_queue_t *req_q, intptr_t arg, int mode) 964 { 965 zev_ioctl_debug_info_t di; 966 uint64_t mem_allocated = atomic_add_64_nv(&zev_memory_allocated, 0); 967 uint64_t mem_freed = atomic_add_64_nv(&zev_memory_freed, 0); 968 969 zev_chksum_stats(&di.zev_chksum_cache_size, 970 &di.zev_chksum_cache_hits, 971 &di.zev_chksum_cache_misses); 972 di.zev_memory_allocated = mem_allocated - mem_freed; 973 if (ddi_copyout(&di, (void *)arg, sizeof(di), mode) != 0) 974 return EFAULT; 975 return 0; 976 } 977 978 static int 979 zev_ioc_get_queue_list(zev_queue_t *req_q, intptr_t arg, int mode) 980 { 981 zev_ioctl_get_queue_list_t gql; 982 zev_queue_t *q; 983 int i = 0; 984 int count = 0; 985 986 memset(&gql, 0, sizeof(gql)); 987 988 mutex_enter(&zev_mutex); 989 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 990 q = zev_queues[i - ZEV_MINOR_MIN]; 991 if (!q) 992 continue; 993 strncpy(gql.zev_queue_name[count].zev_name, 994 q->zq_name, ZEV_MAX_QUEUE_NAME_LEN); 995 gql.zev_queue_name[count].zev_namelen = strlen(q->zq_name); 996 count++; 997 } 998 gql.zev_n_queues = count; 999 mutex_exit(&zev_mutex); 1000 1001 if (ddi_copyout(&gql, (void *)arg, sizeof(gql), mode) != 0) 1002 return EFAULT; 1003 return 0; 1004 } 1005 1006 static int 1007 zev_ioc_set_max_queue_len(zev_queue_t *req_q, intptr_t arg, int mode) 1008 { 1009 uint64_t len; 1010 int i; 1011 zev_queue_t *q; 1012 1013 if (ddi_copyin((void *)arg, &len, sizeof(len), mode) != 0) { 1014 return EFAULT; 1015 } 1016 if (len > ZEV_MAX_QUEUE_LEN) { 1017 return EINVAL; 1018 } 1019 mutex_enter(&zev_mutex); 1020 zev_statistics.zev_max_queue_len = len; 1021 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1022 q = zev_queues[i - ZEV_MINOR_MIN]; 1023 if (!q) 1024 continue; 1025 if (q->zq_max_queue_len <= 1026 zev_statistics.zev_max_queue_len) 1027 continue; 1028 q->zq_max_queue_len = zev_statistics.zev_max_queue_len; 1029 } 1030 cv_broadcast(&zev_condvar); 1031 mutex_exit(&zev_mutex); 1032 return 0; 1033 } 1034 1035 static int 1036 zev_ioc_get_zev_version(intptr_t arg, int mode) 1037 { 1038 zev_ioctl_get_zev_version vi; 1039 vi.zev_major_version = ZEV_MAJOR_VERSION; 1040 vi.zev_minor_version = ZEV_MINOR_VERSION; 1041 if (ddi_copyout(&vi, (void *)arg, sizeof(vi), mode) != 0) 1042 return EFAULT; 1043 return 0; 1044 } 1045 1046 /* ARGSUSED */ 1047 static int 1048 zev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1049 { 1050 zev_statistics_t zs; 1051 zev_ioctl_poolarg_t pa; 1052 zev_ioctl_mark_t mark; 1053 zev_mark_t *rec; 1054 int msg_size; 1055 zev_msg_t *msg; 1056 uint64_t mark_id; 1057 minor_t minor; 1058 zev_queue_t *req_q; 1059 int ret = 0; 1060 1061 minor = getminor(dev); 1062 mutex_enter(&zev_mutex); 1063 if ((req_q = ddi_get_soft_state(statep, minor)) == NULL) { 1064 mutex_exit(&zev_mutex); 1065 return (ENXIO); 1066 } 1067 zev_queue_hold(req_q); 1068 mutex_exit(&zev_mutex); 1069 /* 1070 * all structures passed between kernel and userspace 1071 * are now compatible between 64 and 32 bit. Model 1072 * conversion can be ignored. 1073 */ 1074 switch (cmd) { 1075 case ZEV_IOC_GET_GLOBAL_STATISTICS: 1076 /* ddi_copyout() can take a long time. Better make 1077 a copy to be able to release the mutex faster. */ 1078 mutex_enter(&zev_mutex); 1079 (void) memcpy(&zs, &zev_statistics, sizeof(zs)); 1080 mutex_exit(&zev_mutex); 1081 if (ddi_copyout(&zs, (void *)arg, sizeof(zs), mode) != 0) 1082 ret = EFAULT; 1083 break; 1084 case ZEV_IOC_GET_QUEUE_STATISTICS: 1085 ret = zev_ioc_get_queue_statistics(req_q, arg, mode); 1086 break; 1087 case ZEV_IOC_MUTE_POOL: 1088 case ZEV_IOC_UNMUTE_POOL: 1089 if (ddi_copyin((void *)arg, &pa, sizeof(pa), mode) != 0) { 1090 ret = EFAULT; 1091 break; 1092 } 1093 if (pa.zev_poolname_len >=MAXPATHLEN) { 1094 ret = EINVAL; 1095 break; 1096 } 1097 pa.zev_poolname[pa.zev_poolname_len] = '\0'; 1098 if (cmd == ZEV_IOC_MUTE_POOL) { 1099 ret = zev_ioc_mute_pool(pa.zev_poolname); 1100 } else { 1101 ret = zev_ioc_unmute_pool(pa.zev_poolname); 1102 } 1103 break; 1104 case ZEV_IOC_SET_MAX_QUEUE_LEN: 1105 ret = zev_ioc_set_max_queue_len(req_q, arg, mode); 1106 break; 1107 case ZEV_IOC_GET_QUEUE_PROPERTIES: 1108 ret = zev_ioc_get_queue_properties(req_q, arg, mode); 1109 break; 1110 case ZEV_IOC_SET_QUEUE_PROPERTIES: 1111 ret = zev_ioc_set_queue_properties(req_q, arg, mode); 1112 break; 1113 case ZEV_IOC_MARK: 1114 if (ddi_copyin((void *)arg, &mark, sizeof(mark), mode) != 0) { 1115 ret = EFAULT; 1116 break; 1117 } 1118 /* prepare message */ 1119 msg_size = sizeof(*rec) + mark.zev_payload_len + 1; 1120 msg = zev_alloc(sizeof(*msg) + msg_size); 1121 msg->size = msg_size; 1122 rec = (zev_mark_t *)(msg + 1); 1123 rec->record_len = msg_size; 1124 rec->op = ZEV_OP_MARK; 1125 rec->op_time = ddi_get_time(); 1126 rec->guid = mark.zev_guid; 1127 rec->payload_len = mark.zev_payload_len; 1128 /* get payload */ 1129 if (ddi_copyin(((char *)arg) + sizeof(mark), 1130 ZEV_PAYLOAD(rec), 1131 mark.zev_payload_len, mode) != 0) { 1132 zev_free(msg, msg_size); 1133 ret = EFAULT; 1134 break; 1135 } 1136 *(ZEV_PAYLOAD(rec) + mark.zev_payload_len) = '\0'; 1137 /* get mark id and queue message */ 1138 mutex_enter(&zev_mark_id_mutex); 1139 mark_id = zev_mark_id++; 1140 mutex_exit(&zev_mark_id_mutex); 1141 rec->mark_id = mark_id; 1142 zev_queue_message(ZEV_OP_MARK, msg); 1143 /* report mark id to userland, ignore errors */ 1144 mark.zev_mark_id = mark_id; 1145 ddi_copyout(&mark, (void *)arg, sizeof(mark), mode); 1146 break; 1147 case ZEV_IOC_ADD_QUEUE: 1148 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1149 ret = EACCES; 1150 break; 1151 } 1152 ret = zev_ioc_add_queue(req_q, arg, mode); 1153 break; 1154 case ZEV_IOC_REMOVE_QUEUE: 1155 if (minor != ZEV_CONTROL_DEVICE_MINOR) { 1156 ret = EACCES; 1157 break; 1158 } 1159 ret = zev_ioc_remove_queue(req_q, arg, mode); 1160 break; 1161 case ZEV_IOC_GET_DEBUG_INFO: 1162 ret = zev_ioc_get_debug_info(req_q, arg, mode); 1163 break; 1164 case ZEV_IOC_GET_QUEUE_LIST: 1165 ret = zev_ioc_get_queue_list(req_q, arg, mode); 1166 break; 1167 case ZEV_IOC_GET_FILE_SIGNATURES: 1168 ret = zev_ioc_get_signatures(arg, mode); 1169 break; 1170 case ZEV_IOC_GET_ZEV_VERSION: 1171 ret = zev_ioc_get_zev_version(arg, mode); 1172 break; 1173 default: 1174 /* generic "ioctl unknown" error */ 1175 ret = ENOTTY; 1176 } 1177 1178 mutex_enter(&zev_mutex); 1179 zev_queue_release(req_q); 1180 mutex_exit(&zev_mutex); 1181 if (ret) 1182 SET_ERROR(ret); 1183 return (ret); 1184 } 1185 1186 static int 1187 zev_chpoll(dev_t dev, short events, int anyyet, 1188 short *reventsp, struct pollhead **phpp) 1189 { 1190 int minor; 1191 short revent = 0; 1192 zev_queue_t *q; 1193 1194 /* use minor-specific queue context and it's pollhead */ 1195 minor = getminor(dev); 1196 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1197 return (EINVAL); 1198 mutex_enter(&zev_mutex); 1199 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1200 mutex_exit(&zev_mutex); 1201 return (ENXIO); 1202 } 1203 revent = 0; 1204 if ((events & POLLIN)) { 1205 if (q->zq_oldest) 1206 revent |= POLLIN; 1207 } 1208 if (revent == 0) { 1209 if (!anyyet) { 1210 *phpp = &q->zq_pollhead; 1211 } 1212 } 1213 *reventsp = revent; 1214 mutex_exit(&zev_mutex); 1215 return (0); 1216 } 1217 1218 /* ARGSUSED */ 1219 static int 1220 zev_read(dev_t dev, struct uio *uio_p, cred_t *crep_p) 1221 { 1222 minor_t minor; 1223 offset_t off; 1224 int ret = 0; 1225 zev_msg_t *msg; 1226 char *data; 1227 zev_queue_t *q; 1228 1229 minor = getminor(dev); 1230 if (minor == ZEV_CONTROL_DEVICE_MINOR) 1231 return (EINVAL); 1232 1233 mutex_enter(&zev_mutex); 1234 q = ddi_get_soft_state(statep, minor); 1235 if (q == NULL) { 1236 mutex_exit(&zev_mutex); 1237 return (ENXIO); 1238 } 1239 off = uio_p->uio_loffset; 1240 msg = q->zq_oldest; 1241 while (msg == NULL) { 1242 if (!ddi_can_receive_sig()) { 1243 /* 1244 * read() shouldn't block because this thread 1245 * can't receive signals. (e.g., it might be 1246 * torn down by exit() right now.) 1247 */ 1248 mutex_exit(&zev_mutex); 1249 return 0; 1250 } 1251 if (cv_wait_sig(&q->zq_condvar, &zev_mutex) == 0) { 1252 /* signal received. */ 1253 mutex_exit(&zev_mutex); 1254 return EINTR; 1255 } 1256 msg = q->zq_oldest; 1257 } 1258 if (msg->size > uio_p->uio_resid) { 1259 mutex_exit(&zev_mutex); 1260 return E2BIG; 1261 } 1262 while (msg && uio_p->uio_resid >= msg->size) { 1263 data = (char *)(msg + 1); 1264 ret = uiomove(data, msg->size, UIO_READ, uio_p); 1265 if (ret != 0) { 1266 mutex_exit(&zev_mutex); 1267 cmn_err(CE_WARN, "zev: uiomove failed; messages lost"); 1268 uio_p->uio_loffset = off; 1269 return (ret); 1270 } 1271 q->zq_oldest = msg->next; 1272 q->zq_bytes_read += msg->size; 1273 q->zq_queue_len -= msg->size; 1274 q->zq_queue_messages--; 1275 msg->read++; 1276 msg = q->zq_oldest; 1277 } 1278 zev_queue_trim(); 1279 cv_broadcast(&zev_condvar); 1280 mutex_exit(&zev_mutex); 1281 uio_p->uio_loffset = off; 1282 return 0; 1283 } 1284 1285 /* ARGSUSED */ 1286 static int 1287 zev_close(dev_t dev, int flag, int otyp, cred_t *crepd) 1288 { 1289 zev_queue_t *q; 1290 int minor; 1291 1292 minor = getminor(dev); 1293 if (otyp != OTYP_CHR) 1294 return (EINVAL); 1295 mutex_enter(&zev_mutex); 1296 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1297 mutex_exit(&zev_mutex); 1298 return (ENXIO); 1299 } 1300 if (q->zq_busy != B_TRUE) { 1301 mutex_exit(&zev_mutex); 1302 return (EINVAL); 1303 } 1304 q->zq_busy = B_FALSE; 1305 if ((q->zq_flags & ZEV_FL_PERSISTENT) == 0) 1306 zev_queue_release(q); 1307 mutex_exit(&zev_mutex); 1308 return (0); 1309 } 1310 1311 /* ARGSUSED */ 1312 static int 1313 zev_open(dev_t *devp, int flag, int otyp, cred_t *credp) 1314 { 1315 zev_queue_t *q; 1316 minor_t minor; 1317 1318 minor = getminor(*devp); 1319 if (otyp != OTYP_CHR) 1320 return (EINVAL); 1321 if (drv_priv(credp) != 0) 1322 return (EPERM); 1323 mutex_enter(&zev_mutex); 1324 if ((q = ddi_get_soft_state(statep, minor)) == NULL) { 1325 mutex_exit(&zev_mutex); 1326 return (ENXIO); 1327 } 1328 if (minor == ZEV_CONTROL_DEVICE_MINOR) { 1329 /* control device may be used in parallel */ 1330 q->zq_busy = B_TRUE; 1331 mutex_exit(&zev_mutex); 1332 return 0; 1333 } 1334 if (q->zq_busy == B_TRUE) { 1335 mutex_exit(&zev_mutex); 1336 return (EBUSY); 1337 } 1338 q->zq_busy = B_TRUE; /* can only be opened exclusively */ 1339 mutex_exit(&zev_mutex); 1340 return (0); 1341 } 1342 1343 static struct cb_ops zev_cb_ops = { 1344 zev_open, /* open */ 1345 zev_close, /* close */ 1346 nodev, /* strategy */ 1347 nodev, /* print */ 1348 nodev, /* dump */ 1349 zev_read, /* read */ 1350 nodev, /* write */ 1351 zev_ioctl, /* ioctl */ 1352 nodev, /* devmap */ 1353 nodev, /* mmap */ 1354 nodev, /* segmap */ 1355 zev_chpoll, /* chpoll */ 1356 ddi_prop_op, /* prop_op */ 1357 NULL, /* streamtab */ 1358 D_MP | D_64BIT, /* cb_flag */ 1359 CB_REV, /* cb_rev */ 1360 nodev, /* aread */ 1361 nodev, /* awrite */ 1362 }; 1363 1364 static void 1365 zev_free_instance(dev_info_t *dip) 1366 { 1367 int instance; 1368 zev_queue_t *q; 1369 int i; 1370 1371 instance = ddi_get_instance(dip); 1372 if (instance != 0) { 1373 cmn_err(CE_WARN, "zev: tried to free instance != 0 (%d)", 1374 instance); 1375 return; 1376 } 1377 1378 ddi_remove_minor_node(dip, NULL); 1379 devfs_clean(q->zq_dip, NULL, 0); 1380 1381 /* stop pollwakeup thread */ 1382 zev_wakeup_thread_run = 0; 1383 if (zev_poll_wakeup_thread != NULL) { 1384 thread_join(zev_poll_wakeup_thread->t_did); 1385 zev_poll_wakeup_thread = NULL; 1386 } 1387 1388 mutex_enter(&zev_mutex); 1389 1390 /* remove "ctrl" dummy queue */ 1391 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1392 if (q) { 1393 ddi_soft_state_free(statep, ZEV_CONTROL_DEVICE_MINOR); 1394 ZEV_MEM_SUB(sizeof(zev_queue_t)); 1395 } 1396 1397 /* remove all other queues */ 1398 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) { 1399 q = zev_queues[i- ZEV_MINOR_MIN]; 1400 if (!q) 1401 continue; 1402 ASSERT(q->zq_refcnt == 1); 1403 zev_queue_release(q); 1404 } 1405 zev_queue_trim(); 1406 bzero(&zev_queues, sizeof(zev_queues)); 1407 1408 mutex_exit(&zev_mutex); 1409 1410 } 1411 1412 static int 1413 zev_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1414 { 1415 int instance; 1416 zev_queue_t *q; 1417 1418 /* called once per instance with DDI_DETACH, 1419 may be called to suspend */ 1420 switch (cmd) { 1421 case DDI_DETACH: 1422 /* instance busy? */ 1423 instance = ddi_get_instance(dip); 1424 if (instance != 0) { /* hardcoded in zev.conf */ 1425 /* this module only supports one instance. */ 1426 return (DDI_FAILURE); 1427 } 1428 1429 mutex_enter(&zev_mutex); 1430 if (!zev_attached) { 1431 mutex_exit(&zev_mutex); 1432 return (DDI_FAILURE); 1433 } 1434 1435 /* check "ctrl" queue to see if t is busy */ 1436 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1437 if (q == NULL) { 1438 mutex_exit(&zev_mutex); 1439 return (DDI_FAILURE); 1440 } 1441 if (q->zq_busy) { 1442 mutex_exit(&zev_mutex); 1443 return (DDI_FAILURE); 1444 } 1445 /* are there any queues? */ 1446 if (zev_queue_cnt > 0) { 1447 mutex_exit(&zev_mutex); 1448 return (DDI_FAILURE); 1449 } 1450 1451 zev_attached = B_FALSE; 1452 mutex_exit(&zev_mutex); 1453 1454 /* switch ZFS event callbacks back to default */ 1455 rw_enter(&rz_zev_rwlock, RW_WRITER); 1456 rz_zev_callbacks = rz_zev_default_callbacks; 1457 rz_zev_set_active(B_FALSE); 1458 rw_exit(&rz_zev_rwlock); 1459 1460 /* no thread is inside of the callbacks anymore. */ 1461 1462 /* free resources allocated for this instance */ 1463 zev_free_instance(dip); 1464 zev_chksum_fini(); 1465 #if 0 1466 cmn_err(CE_WARN, "zev: allocated memory at detach: %" PRIu64, 1467 zev_memory_allocated - zev_memory_freed); 1468 #endif 1469 return (DDI_SUCCESS); 1470 case DDI_SUSPEND: 1471 /* kernel must not suspend zev devices while ZFS is running */ 1472 return (DDI_FAILURE); 1473 default: 1474 return (DDI_FAILURE); 1475 } 1476 } 1477 1478 static int 1479 zev_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1480 { 1481 /* called once per instance with DDI_ATTACH, 1482 may be called to resume */ 1483 int instance; 1484 int error; 1485 zev_queue_t *q; 1486 switch (cmd) { 1487 case DDI_ATTACH: 1488 /* create instance state */ 1489 instance = ddi_get_instance(dip); 1490 if (instance != 0) { /* hardcoded in zev.conf */ 1491 /* this module only supports one instance. */ 1492 return (DDI_FAILURE); 1493 } 1494 1495 mutex_enter(&zev_mutex); 1496 if (zev_attached) { 1497 mutex_exit(&zev_mutex); 1498 return (DDI_FAILURE); 1499 } 1500 if (ddi_soft_state_zalloc(statep, ZEV_CONTROL_DEVICE_MINOR) != 1501 DDI_SUCCESS) { 1502 mutex_exit(&zev_mutex); 1503 return (DDI_FAILURE); 1504 } 1505 ZEV_MEM_ADD(sizeof(zev_queue_t)); 1506 zev_attached = B_TRUE; 1507 1508 /* init queue list */ 1509 bzero(&zev_queues, sizeof(zev_queues)); 1510 mutex_exit(&zev_mutex); 1511 1512 /* create a dummy queue for management of "ctrl" */ 1513 1514 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR); 1515 q->zq_dip = dip; 1516 q->zq_refcnt = 1; 1517 q->zq_busy = B_FALSE; 1518 q->zq_minor_number = ZEV_CONTROL_DEVICE_MINOR; 1519 q->zq_flags = ZEV_FL_PERSISTENT; 1520 strcpy(q->zq_name, ZEV_CONTROL_DEVICE_NAME); 1521 1522 /* create device node for "ctrl" */ 1523 if (ddi_create_minor_node(dip, ZEV_CONTROL_DEVICE_NAME, 1524 S_IFCHR, ZEV_CONTROL_DEVICE_MINOR, 1525 DDI_PSEUDO, 0) == DDI_FAILURE) { 1526 goto fail; 1527 } 1528 1529 /* note: intentionally not adding ctrl queue to queue list. */ 1530 1531 /* default queue */ 1532 error = zev_queue_new(&q, dip, 1533 ZEV_DEFAULT_QUEUE_NAME, 1534 ZEV_MAX_QUEUE_LEN, 1535 ZEV_FL_BLOCK_WHILE_QUEUE_FULL| 1536 ZEV_FL_PERSISTENT); 1537 if (error) 1538 goto fail; 1539 1540 /* start pollwakeup thread */ 1541 zev_wakeup_thread_run = 1; 1542 zev_poll_wakeup_thread = thread_create(NULL, 0, 1543 zev_poll_wakeup_thread_main, NULL, 0, &p0, 1544 TS_RUN, minclsyspri); 1545 1546 ddi_report_dev(dip); 1547 1548 zev_chksum_init(); 1549 1550 /* switch ZFS event callbacks to zev module callbacks */ 1551 rw_enter(&rz_zev_rwlock, RW_WRITER); 1552 rz_zev_callbacks = &zev_callbacks; 1553 rz_zev_set_active(B_TRUE); 1554 rw_exit(&rz_zev_rwlock); 1555 1556 return (DDI_SUCCESS); 1557 case DDI_RESUME: 1558 /* suspendeding zev devices should never happen */ 1559 return (DDI_SUCCESS); 1560 default: 1561 return (DDI_FAILURE); 1562 } 1563 fail: 1564 cmn_err(CE_WARN, "zev: attach failed"); 1565 zev_free_instance(dip); 1566 mutex_enter(&zev_mutex); 1567 zev_attached = B_FALSE; 1568 mutex_exit(&zev_mutex); 1569 return (DDI_FAILURE); 1570 } 1571 1572 /* ARGSUSED */ 1573 static int 1574 zev_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp) 1575 { 1576 minor_t minor; 1577 zev_queue_t *q; 1578 1579 /* arg is dev_t */ 1580 minor = getminor((dev_t)arg); 1581 mutex_enter(&zev_mutex); 1582 q = ddi_get_soft_state(statep, minor); 1583 if (q == NULL) { 1584 *resultp = NULL; 1585 mutex_exit(&zev_mutex); 1586 return (DDI_FAILURE); 1587 } 1588 1589 switch (infocmd) { 1590 case DDI_INFO_DEVT2DEVINFO: 1591 *resultp = q->zq_dip; 1592 break; 1593 case DDI_INFO_DEVT2INSTANCE: 1594 *resultp = (void *)(uintptr_t)ddi_get_instance(q->zq_dip); 1595 break; 1596 default: 1597 mutex_exit(&zev_mutex); 1598 return (DDI_FAILURE); 1599 } 1600 mutex_exit(&zev_mutex); 1601 return (DDI_SUCCESS); 1602 } 1603 1604 static struct dev_ops zev_dev_ops = { 1605 DEVO_REV, /* driver build revision */ 1606 0, /* driver reference count */ 1607 zev_getinfo, /* getinfo */ 1608 nulldev, /* identify (obsolete) */ 1609 nulldev, /* probe (search for devices) */ 1610 zev_attach, /* attach */ 1611 zev_detach, /* detach */ 1612 nodev, /* reset (obsolete, use quiesce) */ 1613 &zev_cb_ops, /* character and block device ops */ 1614 NULL, /* bus driver ops */ 1615 NULL, /* power management, not needed */ 1616 ddi_quiesce_not_needed, /* quiesce */ 1617 }; 1618 1619 static struct modldrv zev_modldrv = { 1620 &mod_driverops, /* all loadable modules use this */ 1621 "ZFS event provider, v" 1622 XSTRING(ZEV_MAJOR_VERSION) "." 1623 XSTRING(ZEV_MINOR_VERSION), 1624 /* driver name and version info */ 1625 &zev_dev_ops /* ops method pointers */ 1626 }; 1627 1628 static struct modlinkage zev_modlinkage = { 1629 MODREV_1, /* fixed value */ 1630 { 1631 &zev_modldrv, /* driver linkage structure */ 1632 NULL /* list terminator */ 1633 } 1634 }; 1635 1636 int 1637 _init(void) 1638 { 1639 int error; 1640 1641 if ((error = ddi_soft_state_init(&statep, sizeof(zev_queue_t), 1)) != 0) 1642 return (error); 1643 zev_attached = B_FALSE; 1644 1645 zev_queue_head = NULL; 1646 zev_queue_tail = NULL; 1647 zev_queue_len = 0; 1648 zev_muted_pools_head = NULL; 1649 zev_memory_allocated = 0; 1650 zev_memory_freed = 0; 1651 zev_queue_cnt = 0; 1652 zev_have_blocking_queues = 1; 1653 1654 mutex_init(&zev_mutex, NULL, MUTEX_DRIVER, NULL); 1655 cv_init(&zev_condvar, NULL, CV_DRIVER, NULL); 1656 rw_init(&zev_pool_list_rwlock, NULL, RW_DRIVER, NULL); 1657 mutex_init(&zev_mark_id_mutex, NULL, MUTEX_DRIVER, NULL); 1658 zev_mark_id = gethrtime(); 1659 mutex_init(&zev_queue_msg_mutex, NULL, MUTEX_DRIVER, NULL); 1660 zev_msg_sequence_number = gethrtime(); 1661 bzero(&zev_statistics, sizeof(zev_statistics)); 1662 bzero(&zev_pollhead, sizeof(zev_pollhead)); 1663 bzero(&zev_queues, sizeof(zev_queues)); 1664 zev_statistics.zev_max_queue_len = ZEV_MAX_QUEUE_LEN; 1665 if (zev_ioc_mute_pool("zg0")) { 1666 cmn_err(CE_WARN, "zev: could not init mute list"); 1667 goto FAIL; 1668 } 1669 1670 if ((error = mod_install(&zev_modlinkage)) != 0) { 1671 cmn_err(CE_WARN, "zev: could not install module"); 1672 goto FAIL; 1673 } 1674 1675 return (0); 1676 FAIL: 1677 /* free resources */ 1678 cmn_err(CE_WARN, "zev: _init failed"); 1679 mutex_destroy(&zev_mutex); 1680 ddi_soft_state_fini(&statep); 1681 return (error); 1682 } 1683 1684 int 1685 _info(struct modinfo *modinfop) 1686 { 1687 return (mod_info(&zev_modlinkage, modinfop)); 1688 } 1689 1690 int 1691 _fini(void) 1692 { 1693 int error = 0; 1694 zev_msg_t *msg; 1695 zev_pool_list_entry_t *pe, *npe; 1696 1697 mutex_enter(&zev_mutex); 1698 if (zev_attached == B_TRUE) { 1699 mutex_exit(&zev_mutex); 1700 return (SET_ERROR(EBUSY)); 1701 } 1702 if (zev_queue_cnt != 0) { 1703 /* should never happen */ 1704 mutex_exit(&zev_mutex); 1705 return (SET_ERROR(EBUSY)); 1706 } 1707 1708 /* 1709 * avoid deadlock if event list is full: make sure threads currently 1710 * blocking on the event list can append their event and then release 1711 * rz_zev_rwlock. Since there should be no queues left when we 1712 * reach this point we can simply empty the event list and then 1713 * wake everybody. 1714 */ 1715 while (zev_queue_head) { 1716 msg = zev_queue_head; 1717 zev_queue_head = msg->next; 1718 zev_free(msg, sizeof(*msg) + msg->size); 1719 } 1720 cv_broadcast(&zev_condvar); 1721 mutex_exit(&zev_mutex); 1722 1723 /* switch ZFS event callbacks back to default (again) */ 1724 rw_enter(&rz_zev_rwlock, RW_WRITER); 1725 rz_zev_callbacks = rz_zev_default_callbacks; 1726 rz_zev_set_active(B_FALSE); 1727 rw_exit(&rz_zev_rwlock); 1728 1729 /* no thread is inside of the callbacks anymore. Safe to remove. */ 1730 1731 /* unload module callbacks */ 1732 if ((error = mod_remove(&zev_modlinkage)) != 0) { 1733 cmn_err(CE_WARN, "mod_remove failed: %d", error); 1734 return (error); 1735 } 1736 1737 /* free resources */ 1738 mutex_enter(&zev_mutex); 1739 while (zev_queue_head) { 1740 msg = zev_queue_head; 1741 zev_queue_head = msg->next; 1742 zev_free(msg, sizeof(*msg) + msg->size); 1743 } 1744 mutex_exit(&zev_mutex); 1745 rw_enter(&zev_pool_list_rwlock, RW_WRITER); 1746 pe = zev_muted_pools_head; 1747 while (pe) { 1748 npe = pe; 1749 pe = pe->next; 1750 zev_free(npe, sizeof(*npe)); 1751 } 1752 rw_exit(&zev_pool_list_rwlock); 1753 ddi_soft_state_fini(&statep); 1754 rw_destroy(&zev_pool_list_rwlock); 1755 cv_destroy(&zev_condvar); 1756 mutex_destroy(&zev_mutex); 1757 mutex_destroy(&zev_mark_id_mutex); 1758 mutex_destroy(&zev_queue_msg_mutex); 1759 1760 return (0); 1761 } 1762 1763