1 #include <sys/modctl.h>
2 #include <sys/ddi.h>
3 #include <sys/sunddi.h>
4 #include <sys/conf.h>
5 #include <sys/devops.h>
6 #include <sys/stat.h>
7 #include <sys/fs/zev.h>
8 #include <sys/zev_callbacks.h>
9 #include <sys/zev_checksums.h>
10 #include <sys/zfs_znode.h>
11 #include <sys/time.h>
12 #include <sys/sa.h>
13 #include <sys/zap.h>
14 #include <sys/time.h>
15 #include <sys/fs/dv_node.h>
16
17 #define OFFSETOF(s, m) ((size_t)(&(((s *)0)->m)))
18
19 #define XSTRING(x) STRING(x)
20 #define STRING(x) #x
21
22 #define ZEV_DEFAULT_QUEUE_NAME "beaver"
23 #define ZEV_CONTROL_DEVICE_MINOR 0
24 #define ZEV_TMPQUEUE_DEVICE_MINOR 1
25 #define ZEV_MINOR_MIN (ZEV_TMPQUEUE_DEVICE_MINOR + 1)
26 #define ZEV_MINOR_MAX (ZEV_MINOR_MIN + ZEV_MAX_QUEUES - 1)
27
28 typedef struct zev_queue {
29 char zq_name[ZEV_MAX_QUEUE_NAME_LEN+1];
30 minor_t zq_minor_number;
31 dev_info_t *zq_dip;
32 struct pollhead zq_pollhead;
33 uint64_t zq_bytes_read;
34 uint64_t zq_events_read;
35 uint64_t zq_bytes_discarded;
36 uint64_t zq_events_discarded;
37 uint64_t zq_bytes_total;
38 uint64_t zq_events_total;
39 uint64_t zq_wakeup_threshold;
40 uint16_t zq_flags;
41 uint16_t zq_need_wakeup;
42 /* protected by zev_mutex */
43 int zq_refcnt;
44 uint64_t zq_queue_len;
45 uint64_t zq_queue_messages;
46 uint64_t zq_max_queue_len;
47 zev_msg_t *zq_oldest;
48 boolean_t zq_busy;
49 boolean_t zq_to_be_removed;
50 zev_statistics_t zq_statistics;
51 kcondvar_t zq_condvar;
52 } zev_queue_t;
53
54 static void *statep;
55 struct pollhead zev_pollhead;
56
57 kmutex_t zev_mutex;
58 kcondvar_t zev_condvar;
59 kmutex_t zev_queue_msg_mutex;
60 krwlock_t zev_pool_list_rwlock;
61 static zev_statistics_t zev_statistics;
62 static boolean_t zev_attached;
63 static kmutex_t zev_mark_id_mutex;
64 static uint64_t zev_mark_id = 0;
65
66 static uint64_t zev_msg_sequence_number = 0;
67 static zev_queue_t *zev_queues[ZEV_MAX_QUEUES];
68 static int zev_queue_cnt = 0;
69 static int zev_have_blocking_queues = 1;
70 static int zev_tmpqueue_num = 0;
71
72 uint64_t zev_memory_allocated = 0;
73 uint64_t zev_memory_freed = 0;
74
75 /*
76 * The longest potential message is from zev_zfs_mount() and
77 * contains the mountpoint, which might be close to MAXPATHLEN bytes long.
78 *
79 * Another candidate is zev_znode_rename_cb() and contains three inode
80 * numbers and two filenames of up to MAXNAMELEN bytes each.
81 */
82 #define ZEV_MAX_MESSAGE_LEN 4096
83
84 static zev_msg_t *zev_queue_head = NULL;
85 static zev_msg_t *zev_queue_tail = NULL;
86 static uint64_t zev_queue_len = 0;
87
88
89 typedef struct zev_pool_list_entry {
90 struct zev_pool_list_entry *next;
91 char name[MAXPATHLEN];
92 } zev_pool_list_entry_t;
93
94 static zev_pool_list_entry_t *zev_muted_pools_head = NULL;
95
96 static volatile int zev_wakeup_thread_run = 1;
97 static kthread_t *zev_poll_wakeup_thread = NULL;
98
99 void *
zev_alloc(ssize_t sz)100 zev_alloc(ssize_t sz)
101 {
102 ZEV_MEM_ADD(sz);
103 return kmem_alloc(sz, KM_SLEEP);
104 }
105
106 void *
zev_zalloc(ssize_t sz)107 zev_zalloc(ssize_t sz)
108 {
109 ZEV_MEM_ADD(sz);
110 return kmem_zalloc(sz, KM_SLEEP);
111 }
112
113 void
zev_free(void * ptr,ssize_t sz)114 zev_free(void *ptr, ssize_t sz)
115 {
116 ZEV_MEM_SUB(sz); \
117 kmem_free(ptr, sz);
118 }
119
120 /* must be called with zev_mutex held */
121 static void
zev_update_blockflag(void)122 zev_update_blockflag(void)
123 {
124 zev_queue_t *q;
125 int had_blocking_queues;
126 int i;
127
128 had_blocking_queues = zev_have_blocking_queues;
129
130 /* do we still have blocking queues? */
131 zev_have_blocking_queues = 0;
132 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
133 q = zev_queues[i - ZEV_MINOR_MIN];
134 if (!q)
135 continue;
136 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) {
137 zev_have_blocking_queues = 1;
138 break;
139 }
140 }
141 /* no blocking queues */
142 if (had_blocking_queues)
143 cv_broadcast(&zev_condvar);
144 }
145
146 int
zev_queue_cmp(const void * a,const void * b)147 zev_queue_cmp(const void *a, const void *b)
148 {
149 const zev_queue_t *qa = a;
150 const zev_queue_t *qb = b;
151 if (qa->zq_minor_number > qb->zq_minor_number)
152 return 1;
153 if (qa->zq_minor_number < qb->zq_minor_number)
154 return -1;
155 return 0;
156 }
157
158 /* must be called with zev_mutex held */
159 void
zev_queue_trim(void)160 zev_queue_trim(void)
161 {
162 zev_msg_t *m;
163 uint64_t oldest_message;
164 zev_queue_t *q;
165 int i;
166
167 if (!zev_queue_tail)
168 return;
169
170 oldest_message = zev_queue_tail->seq + 1; /* does not exist, yet. */
171 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
172 q = zev_queues[i - ZEV_MINOR_MIN];
173 if (q == NULL)
174 continue;
175 if (!q->zq_oldest)
176 continue;
177 if (oldest_message > q->zq_oldest->seq)
178 oldest_message = q->zq_oldest->seq;
179 }
180
181 /* remove msgs between oldest_message and zev_queue_head */
182 while(zev_queue_head && (oldest_message > zev_queue_head->seq)) {
183 m = zev_queue_head;
184 zev_queue_head = m->next;
185 if (zev_queue_head == NULL) {
186 zev_queue_tail = NULL;
187 } else {
188 zev_queue_head->prev = NULL;
189 }
190 if (m->read == 0) {
191 zev_statistics.zev_bytes_discarded += m->size;
192 zev_statistics.zev_cnt_discarded_events++;
193 }
194 zev_statistics.zev_queue_len -= m->size;
195 zev_queue_len--;
196 zev_free(m, sizeof(*m) + m->size);
197 }
198 }
199
200 /* must be called with zev_mutex held */
201 static void
zev_queue_hold(zev_queue_t * q)202 zev_queue_hold(zev_queue_t *q)
203 {
204 q->zq_refcnt++;
205 }
206
207 /* must be called with zev_mutex held */
208 static void
zev_queue_release(zev_queue_t * q)209 zev_queue_release(zev_queue_t *q)
210 {
211 q->zq_refcnt--;
212 if (q->zq_refcnt > 0)
213 return;
214
215 ASSERT(q->zq_busy == B_FALSE);
216
217 /* persistent queues will not be removed */
218 if ((q->zq_flags & ZEV_FL_PERSISTENT) != 0)
219 return;
220
221 /* remove queue from queue list */
222 zev_queues[q->zq_minor_number - ZEV_MINOR_MIN] = NULL;
223
224 /* discard messages that no queue references anymore */
225 zev_queue_trim();
226
227 cv_destroy(&q->zq_condvar);
228 ddi_remove_minor_node(q->zq_dip, q->zq_name);
229 devfs_clean(ddi_root_node() ? ddi_root_node() : q->zq_dip,
230 NULL, DV_CLEAN_FORCE);
231 ddi_soft_state_free(statep, q->zq_minor_number);
232 ZEV_MEM_SUB(sizeof(zev_queue_t));
233 zev_queue_cnt--;
234 zev_update_blockflag();
235 }
236
237 int
zev_queue_new(zev_queue_t ** queue,dev_info_t * dip,char * name,uint64_t max_queue_len,uint16_t flags)238 zev_queue_new(zev_queue_t **queue,
239 dev_info_t *dip,
240 char *name,
241 uint64_t max_queue_len,
242 uint16_t flags)
243 {
244 zev_queue_t *q;
245 zev_queue_t *tmp;
246 zev_msg_t *msg;
247 int name_exists = 0;
248 minor_t minor;
249 char *p;
250 int i;
251
252 if (max_queue_len > ZEV_MAX_QUEUE_LEN)
253 return EINVAL;
254 if (max_queue_len == 0)
255 max_queue_len = ZEV_MAX_QUEUE_LEN;
256 if (!strcmp(name, ZEV_CONTROL_DEVICE_NAME))
257 return EINVAL;
258 for (p = name; *p; p++) {
259 if (*p >= 'a' && *p <= 'z')
260 continue;
261 if (*p >= '0' && *p <= '9')
262 continue;
263 if (*p == '.')
264 continue;
265 return EINVAL;
266 }
267
268 mutex_enter(&zev_mutex);
269
270 /* find free minor number.*/
271 /* if this were a frequent operation we'd have a free-minor list */
272 for (minor = ZEV_MINOR_MIN; minor <= ZEV_MINOR_MAX; minor++) {
273 tmp = zev_queues[minor - ZEV_MINOR_MIN];
274 if (tmp == NULL)
275 break;
276 }
277 if (tmp) {
278 mutex_exit(&zev_mutex);
279 return ENOSPC;
280 }
281
282 if (ddi_soft_state_zalloc(statep, minor) != DDI_SUCCESS) {
283 mutex_exit(&zev_mutex);
284 return ENOSPC;
285 }
286 ZEV_MEM_ADD(sizeof(zev_queue_t));
287
288 q = ddi_get_soft_state(statep, minor);
289 memset(q, 0, sizeof(*q));
290 strncpy(q->zq_name, name, ZEV_MAX_QUEUE_NAME_LEN);
291 q->zq_name[ZEV_MAX_QUEUE_NAME_LEN] = '\0';
292 q->zq_max_queue_len = max_queue_len;
293 q->zq_wakeup_threshold = ZEV_DEFAULT_POLL_WAKEUP_QUEUE_LEN;
294 q->zq_flags = flags;
295 q->zq_refcnt = 1;
296 q->zq_dip = dip;
297 q->zq_minor_number = minor;
298 cv_init(&q->zq_condvar, NULL, CV_DRIVER, NULL);
299
300 /* insert into queue list */
301 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
302 /* if this were a frequent operation we'd have a name tree */
303 if (zev_queues[i - ZEV_MINOR_MIN] == NULL)
304 continue;
305 if (!strcmp(q->zq_name, zev_queues[i-ZEV_MINOR_MIN]->zq_name)) {
306 name_exists = 1;
307 break;
308 }
309 }
310 if (name_exists) {
311 ddi_soft_state_free(statep, minor);
312 ZEV_MEM_SUB(sizeof(zev_queue_t));
313 mutex_exit(&zev_mutex);
314 return EEXIST;
315 }
316 zev_queues[minor - ZEV_MINOR_MIN] = q;
317 zev_queue_cnt++;
318
319 /* calculate current queue len and find head and tail */
320 if (!(q->zq_flags & ZEV_FL_INITIALLY_EMPTY)) {
321 q->zq_oldest = zev_queue_tail;
322 msg = zev_queue_tail;
323 while ((msg) && (q->zq_queue_len < q->zq_max_queue_len)) {
324 q->zq_queue_len += msg->size;
325 q->zq_queue_messages++;
326 q->zq_oldest = msg;
327 msg = msg->prev;
328 }
329 }
330
331 zev_update_blockflag();
332
333 mutex_exit(&zev_mutex);
334
335 if (ddi_create_minor_node(dip, name,
336 S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
337 mutex_enter(&zev_mutex);
338 zev_queues[minor - ZEV_MINOR_MIN] = NULL;
339 zev_queue_cnt--;
340 ddi_soft_state_free(statep, minor);
341 ZEV_MEM_SUB(sizeof(zev_queue_t));
342 zev_update_blockflag();
343 mutex_exit(&zev_mutex);
344 return EFAULT;
345 }
346
347 *queue = q;
348 return 0;
349 }
350
351 /*
352 * poll() wakeup thread. Used to check periodically whether we have
353 * bytes left in the queue that have not yet been made into a
354 * pollwakeup() call. This is meant to insure a maximum waiting
355 * time until an event is presented as a poll wakeup, while at
356 * the same time not making every single event into a poll wakeup
357 * of it's own.
358 */
359
360 static void
zev_poll_wakeup(boolean_t flush_all)361 zev_poll_wakeup(boolean_t flush_all)
362 {
363 zev_queue_t *q;
364 int i;
365
366 /*
367 * This loop works with hold() and release() because
368 * pollwakeup() requires us to release our locks before calling it.
369 *
370 * from pollwakeup(9F):
371 *
372 * "Driver defined locks should not be held across calls
373 * to this function."
374 */
375
376 /* wake up threads for each individual queue */
377 mutex_enter(&zev_mutex);
378 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
379 q = zev_queues[i - ZEV_MINOR_MIN];
380 if (q == NULL)
381 continue;
382 if (!q->zq_busy)
383 continue;
384 if (!q->zq_queue_len)
385 continue;
386 if ((flush_all) ||
387 (q->zq_queue_len > q->zq_wakeup_threshold)) {
388 zev_queue_hold(q);
389 mutex_exit(&zev_mutex);
390 pollwakeup(&q->zq_pollhead, POLLIN);
391 mutex_enter(&zev_mutex);
392 zev_queue_release(q);
393 }
394 }
395 mutex_exit(&zev_mutex);
396 }
397
398 static void
zev_poll_wakeup_thread_main(void)399 zev_poll_wakeup_thread_main(void)
400 {
401 while (zev_wakeup_thread_run) {
402 delay(drv_usectohz(100 * 1000)); /* sleep 100ms */
403
404 zev_poll_wakeup(B_TRUE);
405 }
406 thread_exit();
407 }
408
409 static int
zev_ioc_mute_pool(char * poolname)410 zev_ioc_mute_pool(char *poolname)
411 {
412 zev_pool_list_entry_t *pe;
413 rw_enter(&zev_pool_list_rwlock, RW_WRITER);
414 /* pool already muted? */
415 for (pe=zev_muted_pools_head; pe; pe=pe->next) {
416 if (!strcmp(pe->name, poolname)) {
417 rw_exit(&zev_pool_list_rwlock);
418 return EEXIST;
419 }
420 }
421 pe = zev_zalloc(sizeof(*pe));
422 if (!pe) {
423 rw_exit(&zev_pool_list_rwlock);
424 return ENOMEM;
425 }
426 (void) strncpy(pe->name, poolname, sizeof(pe->name));
427 pe->next = zev_muted_pools_head;
428 zev_muted_pools_head = pe;
429 rw_exit(&zev_pool_list_rwlock);
430 return (0);
431 }
432
433 static int
zev_ioc_unmute_pool(char * poolname)434 zev_ioc_unmute_pool(char *poolname)
435 {
436 zev_pool_list_entry_t *pe, *peprev;
437
438 rw_enter(&zev_pool_list_rwlock, RW_WRITER);
439 /* pool muted? */
440 peprev = NULL;
441 for (pe=zev_muted_pools_head; pe; pe=pe->next) {
442 if (!strcmp(pe->name, poolname))
443 break;
444 peprev = pe;
445 }
446 if (pe) {
447 rw_exit(&zev_pool_list_rwlock);
448 return ENOENT;
449 }
450
451 if (peprev != NULL) {
452 peprev->next = pe->next;
453 } else {
454 zev_muted_pools_head = pe->next;
455 }
456 zev_free(pe, sizeof(*pe));
457 rw_exit(&zev_pool_list_rwlock);
458 return (0);
459 }
460
461 int
zev_skip_pool(objset_t * os)462 zev_skip_pool(objset_t *os)
463 {
464 zev_pool_list_entry_t *pe;
465 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
466 rw_enter(&zev_pool_list_rwlock, RW_READER);
467 for (pe=zev_muted_pools_head; pe; pe=pe->next) {
468 if (!strcmp(pe->name, dp->dp_spa->spa_name)) {
469 rw_exit(&zev_pool_list_rwlock);
470 return 1;
471 }
472 }
473 rw_exit(&zev_pool_list_rwlock);
474 return 0;
475 }
476
477 int
zev_skip_fs(zfsvfs_t * fs)478 zev_skip_fs(zfsvfs_t *fs)
479 {
480 dsl_dir_t *d = fs->z_os->os_dsl_dataset->ds_dir;
481 dsl_dir_t *prev = NULL;
482
483 while (d && d != prev) {
484 if (strstr(d->dd_myname, "_root"))
485 return 0;
486 prev = d;
487 d = d->dd_parent;
488 }
489 return 1;
490 }
491
492 static void
zev_update_statistics(int op,zev_statistics_t * stat)493 zev_update_statistics(int op, zev_statistics_t *stat)
494 {
495 switch (op) {
496 case ZEV_OP_ERROR:
497 stat->zev_cnt_errors++;
498 break;
499 case ZEV_OP_MARK:
500 stat->zev_cnt_marks++;
501 break;
502 case ZEV_OP_ZFS_MOUNT:
503 stat->zev_cnt_zfs_mount++;
504 break;
505 case ZEV_OP_ZFS_UMOUNT:
506 stat->zev_cnt_zfs_umount++;
507 break;
508 case ZEV_OP_ZVOL_WRITE:
509 stat->zev_cnt_zvol_write++;
510 break;
511 case ZEV_OP_ZVOL_TRUNCATE:
512 stat->zev_cnt_zvol_truncate++;
513 break;
514 case ZEV_OP_ZNODE_CLOSE_AFTER_UPDATE:
515 stat->zev_cnt_znode_close_after_update++;
516 break;
517 case ZEV_OP_ZNODE_CREATE:
518 stat->zev_cnt_znode_create++;
519 break;
520 case ZEV_OP_ZNODE_REMOVE:
521 stat->zev_cnt_znode_remove++;
522 break;
523 case ZEV_OP_ZNODE_LINK:
524 stat->zev_cnt_znode_link++;
525 break;
526 case ZEV_OP_ZNODE_SYMLINK:
527 stat->zev_cnt_znode_symlink++;
528 break;
529 case ZEV_OP_ZNODE_RENAME:
530 stat->zev_cnt_znode_rename++;
531 break;
532 case ZEV_OP_ZNODE_WRITE:
533 stat->zev_cnt_znode_write++;
534 break;
535 case ZEV_OP_ZNODE_TRUNCATE:
536 stat->zev_cnt_znode_truncate++;
537 break;
538 case ZEV_OP_ZNODE_SETATTR:
539 stat->zev_cnt_znode_setattr++;
540 break;
541 case ZEV_OP_ZNODE_ACL:
542 stat->zev_cnt_znode_acl++;
543 break;
544 }
545 }
546
547 void
zev_queue_message(int op,zev_msg_t * msg)548 zev_queue_message(int op, zev_msg_t *msg)
549 {
550 zev_queue_t *q;
551 int wakeup = 0;
552 zev_msg_t *m;
553 int i;
554
555 msg->next = NULL;
556 msg->prev = NULL;
557 msg->read = 0;
558
559 if (op < ZEV_OP_MIN || op > ZEV_OP_MAX) {
560 zev_queue_error(op, "unknown op id encountered: %d", op);
561 zev_free(msg, sizeof(*msg) + msg->size);
562 return;
563 }
564
565 /*
566 * This mutex protects us agains race conditions when several
567 * threads want to queue a message and one or more queues are
568 * full: we release zev_mutex to wait for the queues to become
569 * less-than-full, but we don't know in which order the waiting
570 * threads will be awoken. If it's not the same order in which
571 * they went to sleep we might mark different messages as "newest"
572 * in different queues, and so we might have dupes or even
573 * skip messages.
574 */
575 mutex_enter(&zev_queue_msg_mutex);
576
577 mutex_enter(&zev_mutex);
578
579 /*
580 * When the module is loaded, the default behavior ist to
581 * put all events into a queue and block if the queue is full.
582 * This is done even before the pseudo device is attached.
583 * This way, no events are lost.
584 *
585 * To discard events entirely the "beaver" queue,
586 * which never discards anything, has to be removed.
587 */
588
589 if (zev_queue_cnt == 0) {
590 mutex_exit(&zev_mutex);
591 mutex_exit(&zev_queue_msg_mutex);
592 return;
593 }
594
595 /* put message into global queue */
596 msg->seq = zev_msg_sequence_number++;
597
598 /* do we need to make room? */
599 again:
600 while (zev_statistics.zev_max_queue_len &&
601 zev_statistics.zev_queue_len > zev_statistics.zev_max_queue_len) {
602
603 if (zev_have_blocking_queues) {
604 /* so we have blocking queues. are they full? */
605 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
606 q = zev_queues[i - ZEV_MINOR_MIN];
607 if (!q)
608 continue;
609 if ((q->zq_flags &
610 ZEV_FL_BLOCK_WHILE_QUEUE_FULL) == 0)
611 continue;
612 if (q->zq_queue_len &&
613 q->zq_queue_len > q->zq_max_queue_len) {
614 /* block until queue's been shrunk. */
615 cv_wait(&zev_condvar, &zev_mutex);
616 goto again;
617 }
618 }
619 }
620
621 /* discard events until this message fits into all queues */
622
623 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
624 q = zev_queues[i - ZEV_MINOR_MIN];
625 if (!q)
626 continue;
627 /* discard msgs until queue is small enough */
628 while (q->zq_queue_len &&
629 q->zq_queue_len > q->zq_max_queue_len) {
630 m = q->zq_oldest;
631 if (m == NULL)
632 break;
633 q->zq_events_discarded++;
634 q->zq_bytes_discarded += m->size;
635 q->zq_oldest = m->next;
636 q->zq_queue_len -= m->size;
637 q->zq_queue_messages--;
638 }
639 }
640
641 zev_queue_trim();
642 ASSERT(zev_statistics.zev_queue_len == 0 ||
643 zev_statistics.zev_queue_len <=
644 zev_statistics.zev_max_queue_len);
645 }
646
647 if (zev_queue_tail == NULL) {
648 zev_queue_head = zev_queue_tail = msg;
649 } else {
650 zev_queue_tail->next = msg;
651 msg->prev = zev_queue_tail;
652 zev_queue_tail = msg;
653 }
654 zev_queue_len++;
655 zev_statistics.zev_cnt_total_events++;
656 zev_statistics.zev_queue_len += msg->size;
657
658 /* update per-device queues */
659 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
660 q = zev_queues[i - ZEV_MINOR_MIN];
661 if (!q)
662 continue;
663
664 zev_queue_hold(q);
665
666 /* make sure queue has enough room */
667 while (q->zq_max_queue_len &&
668 q->zq_queue_len > q->zq_max_queue_len) {
669
670 if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) {
671 /* block until queue has been shrunk. */
672 cv_wait(&zev_condvar, &zev_mutex);
673 } else {
674 /* discard msgs until queue is small enough */
675 while (q->zq_queue_len > q->zq_max_queue_len) {
676 m = q->zq_oldest;
677 if (m == NULL)
678 break;
679 q->zq_events_discarded++;
680 q->zq_bytes_discarded += m->size;
681 q->zq_oldest = m->next;
682 q->zq_queue_len -= m->size;
683 q->zq_queue_messages--;
684 }
685 }
686 }
687
688 /* register new message at the end of the queue */
689 q->zq_queue_len += msg->size;
690 q->zq_queue_messages++;
691 q->zq_bytes_total += msg->size;
692 q->zq_events_total++;
693 if (q->zq_oldest == NULL)
694 q->zq_oldest = msg;
695
696 zev_update_statistics(op, &q->zq_statistics);
697
698 if (q->zq_queue_len > q->zq_wakeup_threshold)
699 wakeup = 1;
700 if (q->zq_queue_len == msg->size) /* queue was empty */
701 cv_broadcast(&q->zq_condvar);
702
703 zev_queue_release(q);
704 }
705
706 zev_queue_trim();
707
708 zev_update_statistics(op, &zev_statistics);
709 mutex_exit(&zev_mutex);
710 mutex_exit(&zev_queue_msg_mutex);
711
712 /* one or more queues need a pollwakeup() */
713 if (op == ZEV_OP_MARK) {
714 zev_poll_wakeup(B_TRUE);
715 } else if (wakeup) {
716 zev_poll_wakeup(B_FALSE);
717 }
718
719 return;
720 }
721
722 void
zev_queue_error(int op,char * fmt,...)723 zev_queue_error(int op, char *fmt, ...)
724 {
725 char buf[ZEV_MAX_MESSAGE_LEN];
726 va_list ap;
727 int len;
728 zev_msg_t *msg = NULL;
729 zev_error_t *rec;
730 int msg_size;
731
732 va_start(ap, fmt);
733 len = vsnprintf(buf, sizeof(buf), fmt, ap);
734 va_end(ap);
735 if (len >= sizeof(buf)) {
736 cmn_err(CE_WARN, "zev: can't report error - "
737 "dropping event entirely.");
738 return;
739 }
740
741 msg_size = sizeof(*rec) + len + 1;
742 msg = zev_alloc(sizeof(*msg) + msg_size);
743 msg->size = msg_size;
744 rec = (zev_error_t *)(msg + 1);
745 rec->record_len = msg_size;
746 rec->op = ZEV_OP_ERROR;
747 rec->op_time = ddi_get_time();
748 rec->guid = 0;
749 rec->failed_op = op;
750 rec->errstr_len = len;
751 (void) memcpy(ZEV_ERRSTR(rec), buf, len + 1);
752
753 zev_queue_message(ZEV_OP_ERROR, msg);
754 return;
755 }
756
757 static int
zev_find_queue(zev_queue_t ** out,zev_queue_t * req_q,zev_queue_name_t * name)758 zev_find_queue(zev_queue_t **out, zev_queue_t *req_q, zev_queue_name_t *name)
759 {
760 char namebuf[ZEV_MAX_QUEUE_NAME_LEN+1];
761 zev_queue_t *q;
762 int i;
763
764 *out = NULL;
765
766 if (name->zev_namelen == 0) {
767 if (req_q->zq_minor_number == ZEV_CONTROL_DEVICE_MINOR)
768 return EINVAL;
769 mutex_enter(&zev_mutex);
770 zev_queue_hold(req_q);
771 mutex_exit(&zev_mutex);
772 *out = req_q;
773 return 0;
774 }
775
776 if (name->zev_namelen > ZEV_MAX_QUEUE_NAME_LEN)
777 return EINVAL;
778 strncpy(namebuf, name->zev_name, name->zev_namelen);
779 namebuf[name->zev_namelen] = '\0';
780
781 mutex_enter(&zev_mutex);
782 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
783 q = zev_queues[i - ZEV_MINOR_MIN];
784 if (!q)
785 continue;
786 if (!strcmp(q->zq_name, namebuf)) {
787 zev_queue_hold(q);
788 mutex_exit(&zev_mutex);
789 *out = q;
790 return 0;
791 }
792 }
793 mutex_exit(&zev_mutex);
794 return ENOENT;
795 }
796
797 static int
zev_ioc_get_queue_statistics(zev_queue_t * req_q,intptr_t arg,int mode)798 zev_ioc_get_queue_statistics(zev_queue_t *req_q, intptr_t arg, int mode)
799 {
800 zev_ioctl_get_queue_statistics_t gs;
801 zev_queue_t *q;
802 int ret;
803
804 if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0)
805 return EFAULT;
806
807 ret = zev_find_queue(&q, req_q, &gs.zev_queue_name);
808 if (ret)
809 return ret;
810
811 /* ddi_copyout() can take a long time. Better make
812 a copy to be able to release the mutex faster. */
813 mutex_enter(&zev_mutex);
814 memcpy(&gs.zev_statistics, &q->zq_statistics,sizeof(gs.zev_statistics));
815 gs.zev_statistics.zev_queue_len = q->zq_queue_len;
816 gs.zev_statistics.zev_bytes_read = q->zq_bytes_read;
817 gs.zev_statistics.zev_bytes_discarded = q->zq_bytes_discarded;
818 gs.zev_statistics.zev_max_queue_len = q->zq_max_queue_len;
819 gs.zev_statistics.zev_cnt_discarded_events = q->zq_events_discarded;
820 gs.zev_statistics.zev_cnt_total_events = q->zq_events_total;
821 zev_queue_release(q);
822 mutex_exit(&zev_mutex);
823
824 if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0)
825 return EFAULT;
826 return 0;
827 }
828
829 static int
zev_ioc_set_queue_properties(zev_queue_t * req_q,intptr_t arg,int mode)830 zev_ioc_set_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode)
831 {
832 zev_ioctl_set_queue_properties_t qp;
833 zev_queue_t *q;
834 uint64_t old_max;
835 uint64_t old_flags;
836 int ret;
837
838 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0)
839 return EFAULT;
840 if (qp.zev_max_queue_len > ZEV_MAX_QUEUE_LEN)
841 return EINVAL;
842 if (qp.zev_poll_wakeup_threshold > ZEV_MAX_POLL_WAKEUP_QUEUE_LEN)
843 return EINVAL;
844
845 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name);
846 if (ret)
847 return ret;
848
849 mutex_enter(&zev_mutex);
850
851 /*
852 * Note: if the PERSISTENT flag is cleared, and the queue is not busy,
853 * the queue should be removed by zev_queue_release() in zev_ioctl().
854 */
855 old_flags = qp.zev_flags;
856 q->zq_flags = qp.zev_flags;
857 if ((old_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) &&
858 (!(qp.zev_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL))) {
859 /* queue is no longer blocking - wake blocked threads */
860 cv_broadcast(&zev_condvar);
861 }
862
863 zev_update_blockflag();
864
865 old_max = q->zq_max_queue_len;
866 q->zq_max_queue_len = qp.zev_max_queue_len;
867 if (q->zq_max_queue_len < old_max)
868 zev_queue_trim();
869 if (q->zq_max_queue_len > old_max)
870 cv_broadcast(&zev_condvar); /* threads may be waiting */
871
872 if ((qp.zev_poll_wakeup_threshold < q->zq_wakeup_threshold) &&
873 (qp.zev_poll_wakeup_threshold <= q->zq_queue_len))
874 pollwakeup(&q->zq_pollhead, POLLIN);
875 q->zq_wakeup_threshold = qp.zev_poll_wakeup_threshold;
876
877 zev_queue_release(q);
878 mutex_exit(&zev_mutex);
879 return 0;
880 }
881
882 static int
zev_ioc_get_queue_properties(zev_queue_t * req_q,intptr_t arg,int mode)883 zev_ioc_get_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode)
884 {
885 zev_ioctl_get_queue_properties_t qp;
886 zev_queue_t *q;
887 int ret;
888
889 if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0)
890 return EFAULT;
891
892 ret = zev_find_queue(&q, req_q, &qp.zev_queue_name);
893 if (ret)
894 return ret;
895
896 mutex_enter(&zev_mutex);
897 qp.zev_max_queue_len = q->zq_max_queue_len;
898 qp.zev_flags = q->zq_flags;
899 qp.zev_poll_wakeup_threshold = q->zq_wakeup_threshold;
900 zev_queue_release(q);
901 mutex_exit(&zev_mutex);
902
903 if (ddi_copyout(&qp, (void *)arg, sizeof(qp), mode) != 0)
904 return EFAULT;
905 return 0;
906 }
907
908 static int
zev_ioc_add_queue(zev_queue_t * req_q,intptr_t arg,int mode)909 zev_ioc_add_queue(zev_queue_t *req_q, intptr_t arg, int mode)
910 {
911 zev_ioctl_add_queue_t aq;
912 zev_queue_t *new_q;
913 char name[ZEV_MAX_QUEUE_NAME_LEN+1];
914
915 if (ddi_copyin((void *)arg, &aq, sizeof(aq), mode) != 0)
916 return EFAULT;
917
918 if (aq.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN)
919 return EINVAL;
920 strncpy(name, aq.zev_name, aq.zev_namelen);
921 name[aq.zev_namelen] = '\0';
922 if (!strncmp(name, ZEV_TMPQUEUE_DEVICE_NAME,
923 strlen(ZEV_TMPQUEUE_DEVICE_NAME)))
924 return EINVAL;
925
926 return zev_queue_new(&new_q, req_q->zq_dip, name,
927 aq.zev_max_queue_len, aq.zev_flags);
928 }
929
930 static int
zev_ioc_remove_queue(zev_queue_t * req_q,intptr_t arg,int mode)931 zev_ioc_remove_queue(zev_queue_t *req_q, intptr_t arg, int mode)
932 {
933 zev_ioctl_remove_queue_t rq;
934 zev_queue_t *q;
935 char name[ZEV_MAX_QUEUE_NAME_LEN+1];
936 int found = 0;
937 int i;
938
939 if (ddi_copyin((void *)arg, &rq, sizeof(rq), mode) != 0)
940 return EFAULT;
941
942 if (rq.zev_queue_name.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN)
943 return EINVAL;
944 strncpy(name, rq.zev_queue_name.zev_name,
945 rq.zev_queue_name.zev_namelen);
946 name[rq.zev_queue_name.zev_namelen] = '\0';
947
948 mutex_enter(&zev_mutex);
949 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
950 q = zev_queues[i - ZEV_MINOR_MIN];
951 if (!q)
952 continue;
953 if (!strcmp(q->zq_name, name)) {
954 found = 1;
955 break;
956 }
957 }
958 if (!found) {
959 mutex_exit(&zev_mutex);
960 return ENOENT;
961 }
962
963 if (q->zq_busy) {
964 mutex_exit(&zev_mutex);
965 return EBUSY;
966 }
967 /*
968 * clear flags, so that persistent queues are removed aswell
969 * and the queue becomes non-blocking.
970 */
971 q->zq_flags = 0;
972 if (q->zq_to_be_removed == B_FALSE) {
973 q->zq_to_be_removed = B_TRUE;
974 zev_queue_release(q);
975 }
976 /* some threads might be waiting for this queue to become writable */
977 cv_broadcast(&zev_condvar);
978
979 mutex_exit(&zev_mutex);
980 return 0;
981 }
982
983 static int
zev_ioc_get_debug_info(zev_queue_t * req_q,intptr_t arg,int mode)984 zev_ioc_get_debug_info(zev_queue_t *req_q, intptr_t arg, int mode)
985 {
986 zev_ioctl_debug_info_t di;
987 uint64_t mem_allocated = atomic_add_64_nv(&zev_memory_allocated, 0);
988 uint64_t mem_freed = atomic_add_64_nv(&zev_memory_freed, 0);
989
990 zev_chksum_stats(&di.zev_chksum_cache_size,
991 &di.zev_chksum_cache_hits,
992 &di.zev_chksum_cache_misses);
993 di.zev_memory_allocated = mem_allocated - mem_freed;
994 if (ddi_copyout(&di, (void *)arg, sizeof(di), mode) != 0)
995 return EFAULT;
996 return 0;
997 }
998
999 static int
zev_ioc_get_queue_list(zev_queue_t * req_q,intptr_t arg,int mode)1000 zev_ioc_get_queue_list(zev_queue_t *req_q, intptr_t arg, int mode)
1001 {
1002 zev_ioctl_get_queue_list_t gql;
1003 zev_queue_t *q;
1004 int i = 0;
1005 int count = 0;
1006
1007 memset(&gql, 0, sizeof(gql));
1008
1009 mutex_enter(&zev_mutex);
1010 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
1011 q = zev_queues[i - ZEV_MINOR_MIN];
1012 if (!q)
1013 continue;
1014 strncpy(gql.zev_queue_name[count].zev_name,
1015 q->zq_name, ZEV_MAX_QUEUE_NAME_LEN);
1016 gql.zev_queue_name[count].zev_namelen = strlen(q->zq_name);
1017 count++;
1018 }
1019 gql.zev_n_queues = count;
1020 mutex_exit(&zev_mutex);
1021
1022 if (ddi_copyout(&gql, (void *)arg, sizeof(gql), mode) != 0)
1023 return EFAULT;
1024 return 0;
1025 }
1026
1027 static int
zev_ioc_set_max_queue_len(zev_queue_t * req_q,intptr_t arg,int mode)1028 zev_ioc_set_max_queue_len(zev_queue_t *req_q, intptr_t arg, int mode)
1029 {
1030 uint64_t len;
1031 int i;
1032 zev_queue_t *q;
1033
1034 if (ddi_copyin((void *)arg, &len, sizeof(len), mode) != 0) {
1035 return EFAULT;
1036 }
1037 if (len > ZEV_MAX_QUEUE_LEN) {
1038 return EINVAL;
1039 }
1040 mutex_enter(&zev_mutex);
1041 zev_statistics.zev_max_queue_len = len;
1042 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
1043 q = zev_queues[i - ZEV_MINOR_MIN];
1044 if (!q)
1045 continue;
1046 if (q->zq_max_queue_len <=
1047 zev_statistics.zev_max_queue_len)
1048 continue;
1049 q->zq_max_queue_len = zev_statistics.zev_max_queue_len;
1050 }
1051 cv_broadcast(&zev_condvar);
1052 mutex_exit(&zev_mutex);
1053 return 0;
1054 }
1055
1056 static int
zev_ioc_get_zev_version(intptr_t arg,int mode)1057 zev_ioc_get_zev_version(intptr_t arg, int mode)
1058 {
1059 zev_ioctl_get_zev_version vi;
1060 vi.zev_major_version = ZEV_MAJOR_VERSION;
1061 vi.zev_minor_version = ZEV_MINOR_VERSION;
1062 if (ddi_copyout(&vi, (void *)arg, sizeof(vi), mode) != 0)
1063 return EFAULT;
1064 return 0;
1065 }
1066
1067 /* ARGSUSED */
1068 static int
zev_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * credp,int * rvalp)1069 zev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
1070 {
1071 zev_statistics_t zs;
1072 zev_ioctl_poolarg_t pa;
1073 zev_ioctl_mark_t mark;
1074 zev_mark_t *rec;
1075 int msg_size;
1076 zev_msg_t *msg;
1077 uint64_t mark_id;
1078 minor_t minor;
1079 zev_queue_t *req_q;
1080 int ret = 0;
1081
1082 minor = getminor(dev);
1083 mutex_enter(&zev_mutex);
1084 if ((req_q = ddi_get_soft_state(statep, minor)) == NULL) {
1085 mutex_exit(&zev_mutex);
1086 return (ENXIO);
1087 }
1088 zev_queue_hold(req_q);
1089 mutex_exit(&zev_mutex);
1090 /*
1091 * all structures passed between kernel and userspace
1092 * are now compatible between 64 and 32 bit. Model
1093 * conversion can be ignored.
1094 */
1095 switch (cmd) {
1096 case ZEV_IOC_GET_GLOBAL_STATISTICS:
1097 /* ddi_copyout() can take a long time. Better make
1098 a copy to be able to release the mutex faster. */
1099 mutex_enter(&zev_mutex);
1100 (void) memcpy(&zs, &zev_statistics, sizeof(zs));
1101 mutex_exit(&zev_mutex);
1102 if (ddi_copyout(&zs, (void *)arg, sizeof(zs), mode) != 0)
1103 ret = EFAULT;
1104 break;
1105 case ZEV_IOC_GET_QUEUE_STATISTICS:
1106 ret = zev_ioc_get_queue_statistics(req_q, arg, mode);
1107 break;
1108 case ZEV_IOC_MUTE_POOL:
1109 case ZEV_IOC_UNMUTE_POOL:
1110 if (ddi_copyin((void *)arg, &pa, sizeof(pa), mode) != 0) {
1111 ret = EFAULT;
1112 break;
1113 }
1114 if (pa.zev_poolname_len >=MAXPATHLEN) {
1115 ret = EINVAL;
1116 break;
1117 }
1118 pa.zev_poolname[pa.zev_poolname_len] = '\0';
1119 if (cmd == ZEV_IOC_MUTE_POOL) {
1120 ret = zev_ioc_mute_pool(pa.zev_poolname);
1121 } else {
1122 ret = zev_ioc_unmute_pool(pa.zev_poolname);
1123 }
1124 break;
1125 case ZEV_IOC_SET_MAX_QUEUE_LEN:
1126 ret = zev_ioc_set_max_queue_len(req_q, arg, mode);
1127 break;
1128 case ZEV_IOC_GET_QUEUE_PROPERTIES:
1129 ret = zev_ioc_get_queue_properties(req_q, arg, mode);
1130 break;
1131 case ZEV_IOC_SET_QUEUE_PROPERTIES:
1132 ret = zev_ioc_set_queue_properties(req_q, arg, mode);
1133 break;
1134 case ZEV_IOC_MARK:
1135 if (ddi_copyin((void *)arg, &mark, sizeof(mark), mode) != 0) {
1136 ret = EFAULT;
1137 break;
1138 }
1139 /* prepare message */
1140 msg_size = sizeof(*rec) + mark.zev_payload_len + 1;
1141 msg = zev_alloc(sizeof(*msg) + msg_size);
1142 msg->size = msg_size;
1143 rec = (zev_mark_t *)(msg + 1);
1144 rec->record_len = msg_size;
1145 rec->op = ZEV_OP_MARK;
1146 rec->op_time = ddi_get_time();
1147 rec->guid = mark.zev_guid;
1148 rec->payload_len = mark.zev_payload_len;
1149 /* get payload */
1150 if (ddi_copyin(((char *)arg) + sizeof(mark),
1151 ZEV_PAYLOAD(rec),
1152 mark.zev_payload_len, mode) != 0) {
1153 zev_free(msg, msg_size);
1154 ret = EFAULT;
1155 break;
1156 }
1157 *(ZEV_PAYLOAD(rec) + mark.zev_payload_len) = '\0';
1158 /* get mark id and queue message */
1159 mutex_enter(&zev_mark_id_mutex);
1160 mark_id = zev_mark_id++;
1161 mutex_exit(&zev_mark_id_mutex);
1162 rec->mark_id = mark_id;
1163 zev_queue_message(ZEV_OP_MARK, msg);
1164 /* report mark id to userland, ignore errors */
1165 mark.zev_mark_id = mark_id;
1166 ddi_copyout(&mark, (void *)arg, sizeof(mark), mode);
1167 break;
1168 case ZEV_IOC_ADD_QUEUE:
1169 if (minor != ZEV_CONTROL_DEVICE_MINOR) {
1170 ret = EACCES;
1171 break;
1172 }
1173 ret = zev_ioc_add_queue(req_q, arg, mode);
1174 break;
1175 case ZEV_IOC_REMOVE_QUEUE:
1176 if (minor != ZEV_CONTROL_DEVICE_MINOR) {
1177 ret = EACCES;
1178 break;
1179 }
1180 ret = zev_ioc_remove_queue(req_q, arg, mode);
1181 break;
1182 case ZEV_IOC_GET_DEBUG_INFO:
1183 ret = zev_ioc_get_debug_info(req_q, arg, mode);
1184 break;
1185 case ZEV_IOC_GET_QUEUE_LIST:
1186 ret = zev_ioc_get_queue_list(req_q, arg, mode);
1187 break;
1188 case ZEV_IOC_GET_FILE_SIGNATURES:
1189 ret = zev_ioc_get_signatures(arg, mode);
1190 break;
1191 case ZEV_IOC_GET_ZEV_VERSION:
1192 ret = zev_ioc_get_zev_version(arg, mode);
1193 break;
1194 default:
1195 /* generic "ioctl unknown" error */
1196 ret = ENOTTY;
1197 }
1198
1199 mutex_enter(&zev_mutex);
1200 zev_queue_release(req_q);
1201 mutex_exit(&zev_mutex);
1202 if (ret)
1203 return(SET_ERROR(ret));
1204 return (ret);
1205 }
1206
1207 static int
zev_chpoll(dev_t dev,short events,int anyyet,short * reventsp,struct pollhead ** phpp)1208 zev_chpoll(dev_t dev, short events, int anyyet,
1209 short *reventsp, struct pollhead **phpp)
1210 {
1211 int minor;
1212 short revent = 0;
1213 zev_queue_t *q;
1214
1215 /* use minor-specific queue context and it's pollhead */
1216 minor = getminor(dev);
1217 if (minor == ZEV_CONTROL_DEVICE_MINOR)
1218 return (EINVAL);
1219 mutex_enter(&zev_mutex);
1220 if ((q = ddi_get_soft_state(statep, minor)) == NULL) {
1221 mutex_exit(&zev_mutex);
1222 return (ENXIO);
1223 }
1224 revent = 0;
1225 if ((events & POLLIN)) {
1226 if (q->zq_oldest)
1227 revent |= POLLIN;
1228 }
1229 if (revent == 0) {
1230 if (!anyyet) {
1231 *phpp = &q->zq_pollhead;
1232 }
1233 }
1234 *reventsp = revent;
1235 mutex_exit(&zev_mutex);
1236 return (0);
1237 }
1238
1239 /* ARGSUSED */
1240 static int
zev_read(dev_t dev,struct uio * uio_p,cred_t * crep_p)1241 zev_read(dev_t dev, struct uio *uio_p, cred_t *crep_p)
1242 {
1243 minor_t minor;
1244 offset_t off;
1245 int ret = 0;
1246 zev_msg_t *msg;
1247 char *data;
1248 zev_queue_t *q;
1249
1250 minor = getminor(dev);
1251 if (minor == ZEV_CONTROL_DEVICE_MINOR)
1252 return (EINVAL);
1253
1254 mutex_enter(&zev_mutex);
1255 q = ddi_get_soft_state(statep, minor);
1256 if (q == NULL) {
1257 mutex_exit(&zev_mutex);
1258 return (ENXIO);
1259 }
1260 off = uio_p->uio_loffset;
1261 msg = q->zq_oldest;
1262 while (msg == NULL) {
1263 if (!ddi_can_receive_sig()) {
1264 /*
1265 * read() shouldn't block because this thread
1266 * can't receive signals. (e.g., it might be
1267 * torn down by exit() right now.)
1268 */
1269 mutex_exit(&zev_mutex);
1270 return 0;
1271 }
1272 if (cv_wait_sig(&q->zq_condvar, &zev_mutex) == 0) {
1273 /* signal received. */
1274 mutex_exit(&zev_mutex);
1275 return EINTR;
1276 }
1277 msg = q->zq_oldest;
1278 }
1279 if (msg->size > uio_p->uio_resid) {
1280 mutex_exit(&zev_mutex);
1281 return E2BIG;
1282 }
1283 while (msg && uio_p->uio_resid >= msg->size) {
1284 data = (char *)(msg + 1);
1285 ret = uiomove(data, msg->size, UIO_READ, uio_p);
1286 if (ret != 0) {
1287 mutex_exit(&zev_mutex);
1288 cmn_err(CE_WARN, "zev: uiomove failed; messages lost");
1289 uio_p->uio_loffset = off;
1290 return (ret);
1291 }
1292 q->zq_oldest = msg->next;
1293 q->zq_bytes_read += msg->size;
1294 q->zq_queue_len -= msg->size;
1295 q->zq_queue_messages--;
1296 msg->read++;
1297 msg = q->zq_oldest;
1298 }
1299 zev_queue_trim();
1300 cv_broadcast(&zev_condvar);
1301 mutex_exit(&zev_mutex);
1302 uio_p->uio_loffset = off;
1303 return 0;
1304 }
1305
1306 /* ARGSUSED */
1307 static int
zev_close(dev_t dev,int flag,int otyp,cred_t * crepd)1308 zev_close(dev_t dev, int flag, int otyp, cred_t *crepd)
1309 {
1310 zev_queue_t *q;
1311 int minor;
1312
1313 minor = getminor(dev);
1314 if (otyp != OTYP_CHR)
1315 return (EINVAL);
1316 mutex_enter(&zev_mutex);
1317 if ((q = ddi_get_soft_state(statep, minor)) == NULL) {
1318 mutex_exit(&zev_mutex);
1319 return (ENXIO);
1320 }
1321 if (q->zq_busy != B_TRUE) {
1322 mutex_exit(&zev_mutex);
1323 return (EINVAL);
1324 }
1325 q->zq_busy = B_FALSE;
1326 if ((q->zq_flags & ZEV_FL_PERSISTENT) == 0)
1327 zev_queue_release(q);
1328 mutex_exit(&zev_mutex);
1329 return (0);
1330 }
1331
1332 /* ARGSUSED */
1333 static int
zev_open(dev_t * devp,int flag,int otyp,cred_t * credp)1334 zev_open(dev_t *devp, int flag, int otyp, cred_t *credp)
1335 {
1336 zev_queue_t *q;
1337 minor_t minor;
1338 char zq_name[ZEV_MAX_QUEUE_NAME_LEN];
1339 int ret;
1340
1341 minor = getminor(*devp);
1342 if (otyp != OTYP_CHR)
1343 return (EINVAL);
1344 if (drv_priv(credp) != 0)
1345 return (EPERM);
1346 if (minor == ZEV_TMPQUEUE_DEVICE_MINOR) {
1347 /* get control queue soft state to have dip */
1348 if ((q = ddi_get_soft_state(statep,
1349 ZEV_CONTROL_DEVICE_MINOR)) == NULL){
1350 mutex_exit(&zev_mutex);
1351 return (ENXIO);
1352 }
1353
1354 /* create new temporary queue and return it. */
1355
1356 snprintf(zq_name, sizeof(zq_name),
1357 ZEV_TMPQUEUE_DEVICE_NAME ".%d", zev_tmpqueue_num++);
1358
1359 ret = zev_queue_new(&q, q->zq_dip, zq_name, 0,
1360 ZEV_FL_INITIALLY_EMPTY);
1361 if (ret) {
1362 return ret;
1363 }
1364
1365 q->zq_busy = B_TRUE;
1366 *devp = makedevice(getmajor(*devp), q->zq_minor_number);
1367 return 0;
1368 }
1369 mutex_enter(&zev_mutex);
1370 if ((q = ddi_get_soft_state(statep, minor)) == NULL) {
1371 mutex_exit(&zev_mutex);
1372 return (ENXIO);
1373 }
1374 if (minor == ZEV_CONTROL_DEVICE_MINOR) {
1375 /* control device may be used in parallel */
1376 q->zq_busy = B_TRUE;
1377 mutex_exit(&zev_mutex);
1378 return 0;
1379 }
1380 if (q->zq_busy == B_TRUE) {
1381 mutex_exit(&zev_mutex);
1382 return (EBUSY);
1383 }
1384 q->zq_busy = B_TRUE; /* can only be opened exclusively */
1385 mutex_exit(&zev_mutex);
1386 return (0);
1387 }
1388
1389 static struct cb_ops zev_cb_ops = {
1390 zev_open, /* open */
1391 zev_close, /* close */
1392 nodev, /* strategy */
1393 nodev, /* print */
1394 nodev, /* dump */
1395 zev_read, /* read */
1396 nodev, /* write */
1397 zev_ioctl, /* ioctl */
1398 nodev, /* devmap */
1399 nodev, /* mmap */
1400 nodev, /* segmap */
1401 zev_chpoll, /* chpoll */
1402 ddi_prop_op, /* prop_op */
1403 NULL, /* streamtab */
1404 D_MP | D_64BIT, /* cb_flag */
1405 CB_REV, /* cb_rev */
1406 nodev, /* aread */
1407 nodev, /* awrite */
1408 };
1409
1410 static void
zev_free_instance(dev_info_t * dip)1411 zev_free_instance(dev_info_t *dip)
1412 {
1413 int instance;
1414 zev_queue_t *q;
1415 int i;
1416
1417 instance = ddi_get_instance(dip);
1418 if (instance != 0) {
1419 cmn_err(CE_WARN, "zev: tried to free instance != 0 (%d)",
1420 instance);
1421 return;
1422 }
1423
1424 ddi_remove_minor_node(dip, NULL);
1425 devfs_clean(ddi_root_node() ? ddi_root_node() : dip,
1426 NULL, DV_CLEAN_FORCE);
1427
1428 /* stop pollwakeup thread */
1429 zev_wakeup_thread_run = 0;
1430 if (zev_poll_wakeup_thread != NULL) {
1431 thread_join(zev_poll_wakeup_thread->t_did);
1432 zev_poll_wakeup_thread = NULL;
1433 }
1434
1435 mutex_enter(&zev_mutex);
1436
1437 /* remove "ctrl" dummy queue */
1438 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR);
1439 if (q) {
1440 ddi_soft_state_free(statep, ZEV_CONTROL_DEVICE_MINOR);
1441 ZEV_MEM_SUB(sizeof(zev_queue_t));
1442 }
1443
1444 /* remove all other queues */
1445 for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
1446 q = zev_queues[i- ZEV_MINOR_MIN];
1447 if (!q)
1448 continue;
1449 ASSERT(q->zq_refcnt == 1);
1450 zev_queue_release(q);
1451 }
1452 zev_queue_trim();
1453 bzero(&zev_queues, sizeof(zev_queues));
1454
1455 mutex_exit(&zev_mutex);
1456
1457 }
1458
1459 static int
zev_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)1460 zev_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1461 {
1462 int instance;
1463 zev_queue_t *q;
1464
1465 /* called once per instance with DDI_DETACH,
1466 may be called to suspend */
1467 switch (cmd) {
1468 case DDI_DETACH:
1469 /* instance busy? */
1470 instance = ddi_get_instance(dip);
1471 if (instance != 0) { /* hardcoded in zev.conf */
1472 /* this module only supports one instance. */
1473 return (DDI_FAILURE);
1474 }
1475
1476 mutex_enter(&zev_mutex);
1477 if (!zev_attached) {
1478 mutex_exit(&zev_mutex);
1479 return (DDI_FAILURE);
1480 }
1481
1482 /* check "ctrl" queue to see if t is busy */
1483 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR);
1484 if (q == NULL) {
1485 mutex_exit(&zev_mutex);
1486 return (DDI_FAILURE);
1487 }
1488 if (q->zq_busy) {
1489 mutex_exit(&zev_mutex);
1490 return (DDI_FAILURE);
1491 }
1492 /* are there any queues? */
1493 if (zev_queue_cnt > 0) {
1494 mutex_exit(&zev_mutex);
1495 return (DDI_FAILURE);
1496 }
1497
1498 zev_attached = B_FALSE;
1499 mutex_exit(&zev_mutex);
1500
1501 /* switch ZFS event callbacks back to default */
1502 rw_enter(&rz_zev_rwlock, RW_WRITER);
1503 rz_zev_callbacks = rz_zev_default_callbacks;
1504 rz_zev_set_active(B_FALSE);
1505 rw_exit(&rz_zev_rwlock);
1506
1507 /* no thread is inside of the callbacks anymore. */
1508
1509 /* free resources allocated for this instance */
1510 zev_free_instance(dip);
1511 zev_chksum_fini();
1512 #if 0
1513 cmn_err(CE_WARN, "zev: allocated memory at detach: %" PRIu64,
1514 zev_memory_allocated - zev_memory_freed);
1515 #endif
1516 return (DDI_SUCCESS);
1517 case DDI_SUSPEND:
1518 /* kernel must not suspend zev devices while ZFS is running */
1519 return (DDI_FAILURE);
1520 default:
1521 return (DDI_FAILURE);
1522 }
1523 }
1524
1525 static int
zev_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)1526 zev_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1527 {
1528 /* called once per instance with DDI_ATTACH,
1529 may be called to resume */
1530 int instance;
1531 int error;
1532 zev_queue_t *q;
1533 switch (cmd) {
1534 case DDI_ATTACH:
1535 /* create instance state */
1536 instance = ddi_get_instance(dip);
1537 if (instance != 0) { /* hardcoded in zev.conf */
1538 /* this module only supports one instance. */
1539 return (DDI_FAILURE);
1540 }
1541
1542 mutex_enter(&zev_mutex);
1543 if (zev_attached) {
1544 mutex_exit(&zev_mutex);
1545 return (DDI_FAILURE);
1546 }
1547 if (ddi_soft_state_zalloc(statep, ZEV_CONTROL_DEVICE_MINOR) !=
1548 DDI_SUCCESS) {
1549 mutex_exit(&zev_mutex);
1550 return (DDI_FAILURE);
1551 }
1552 ZEV_MEM_ADD(sizeof(zev_queue_t));
1553 zev_attached = B_TRUE;
1554
1555 /* init queue list */
1556 bzero(&zev_queues, sizeof(zev_queues));
1557 mutex_exit(&zev_mutex);
1558
1559 /* create a dummy queue for management of "ctrl" */
1560
1561 q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR);
1562 q->zq_dip = dip;
1563 q->zq_refcnt = 1;
1564 q->zq_busy = B_FALSE;
1565 q->zq_minor_number = ZEV_CONTROL_DEVICE_MINOR;
1566 q->zq_flags = ZEV_FL_PERSISTENT;
1567 strcpy(q->zq_name, ZEV_CONTROL_DEVICE_NAME);
1568
1569 /* create device node for "ctrl" */
1570 if (ddi_create_minor_node(dip, ZEV_CONTROL_DEVICE_NAME,
1571 S_IFCHR, ZEV_CONTROL_DEVICE_MINOR,
1572 DDI_PSEUDO, 0) == DDI_FAILURE) {
1573 goto fail;
1574 }
1575
1576 /* note: intentionally not adding ctrl queue to queue list. */
1577
1578 /* create device node for "tmpqueue" */
1579 if (ddi_create_minor_node(dip, ZEV_TMPQUEUE_DEVICE_NAME,
1580 S_IFCHR, ZEV_TMPQUEUE_DEVICE_MINOR,
1581 DDI_PSEUDO, 0) == DDI_FAILURE) {
1582 goto fail;
1583 }
1584
1585 /* default queue */
1586 error = zev_queue_new(&q, dip,
1587 ZEV_DEFAULT_QUEUE_NAME,
1588 ZEV_MAX_QUEUE_LEN,
1589 ZEV_FL_BLOCK_WHILE_QUEUE_FULL|
1590 ZEV_FL_PERSISTENT);
1591 if (error)
1592 goto fail;
1593
1594 /* start pollwakeup thread */
1595 zev_wakeup_thread_run = 1;
1596 zev_poll_wakeup_thread = thread_create(NULL, 0,
1597 zev_poll_wakeup_thread_main, NULL, 0, &p0,
1598 TS_RUN, minclsyspri);
1599
1600 ddi_report_dev(dip);
1601
1602 zev_chksum_init();
1603
1604 /* switch ZFS event callbacks to zev module callbacks */
1605 rw_enter(&rz_zev_rwlock, RW_WRITER);
1606 rz_zev_callbacks = &zev_callbacks;
1607 rz_zev_set_active(B_TRUE);
1608 rw_exit(&rz_zev_rwlock);
1609
1610 return (DDI_SUCCESS);
1611 case DDI_RESUME:
1612 /* suspendeding zev devices should never happen */
1613 return (DDI_SUCCESS);
1614 default:
1615 return (DDI_FAILURE);
1616 }
1617 fail:
1618 cmn_err(CE_WARN, "zev: attach failed");
1619 zev_free_instance(dip);
1620 mutex_enter(&zev_mutex);
1621 zev_attached = B_FALSE;
1622 mutex_exit(&zev_mutex);
1623 return (DDI_FAILURE);
1624 }
1625
1626 /* ARGSUSED */
1627 static int
zev_getinfo(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** resultp)1628 zev_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp)
1629 {
1630 minor_t minor;
1631 zev_queue_t *q;
1632
1633 /* arg is dev_t */
1634 minor = getminor((dev_t)arg);
1635 mutex_enter(&zev_mutex);
1636 q = ddi_get_soft_state(statep, minor);
1637 if (q == NULL) {
1638 *resultp = NULL;
1639 mutex_exit(&zev_mutex);
1640 return (DDI_FAILURE);
1641 }
1642
1643 switch (infocmd) {
1644 case DDI_INFO_DEVT2DEVINFO:
1645 *resultp = q->zq_dip;
1646 break;
1647 case DDI_INFO_DEVT2INSTANCE:
1648 *resultp = (void *)(uintptr_t)ddi_get_instance(q->zq_dip);
1649 break;
1650 default:
1651 mutex_exit(&zev_mutex);
1652 return (DDI_FAILURE);
1653 }
1654 mutex_exit(&zev_mutex);
1655 return (DDI_SUCCESS);
1656 }
1657
1658 static struct dev_ops zev_dev_ops = {
1659 DEVO_REV, /* driver build revision */
1660 0, /* driver reference count */
1661 zev_getinfo, /* getinfo */
1662 nulldev, /* identify (obsolete) */
1663 nulldev, /* probe (search for devices) */
1664 zev_attach, /* attach */
1665 zev_detach, /* detach */
1666 nodev, /* reset (obsolete, use quiesce) */
1667 &zev_cb_ops, /* character and block device ops */
1668 NULL, /* bus driver ops */
1669 NULL, /* power management, not needed */
1670 ddi_quiesce_not_needed, /* quiesce */
1671 };
1672
1673 static struct modldrv zev_modldrv = {
1674 &mod_driverops, /* all loadable modules use this */
1675 "ZFS event provider, v"
1676 XSTRING(ZEV_MAJOR_VERSION) "."
1677 XSTRING(ZEV_MINOR_VERSION),
1678 /* driver name and version info */
1679 &zev_dev_ops /* ops method pointers */
1680 };
1681
1682 static struct modlinkage zev_modlinkage = {
1683 MODREV_1, /* fixed value */
1684 {
1685 &zev_modldrv, /* driver linkage structure */
1686 NULL /* list terminator */
1687 }
1688 };
1689
1690 int
_init(void)1691 _init(void)
1692 {
1693 int error;
1694
1695 if ((error = ddi_soft_state_init(&statep, sizeof(zev_queue_t), 1)) != 0)
1696 return (error);
1697 zev_attached = B_FALSE;
1698
1699 zev_queue_head = NULL;
1700 zev_queue_tail = NULL;
1701 zev_queue_len = 0;
1702 zev_muted_pools_head = NULL;
1703 zev_memory_allocated = 0;
1704 zev_memory_freed = 0;
1705 zev_queue_cnt = 0;
1706 zev_have_blocking_queues = 1;
1707
1708 mutex_init(&zev_mutex, NULL, MUTEX_DRIVER, NULL);
1709 cv_init(&zev_condvar, NULL, CV_DRIVER, NULL);
1710 rw_init(&zev_pool_list_rwlock, NULL, RW_DRIVER, NULL);
1711 mutex_init(&zev_mark_id_mutex, NULL, MUTEX_DRIVER, NULL);
1712 zev_mark_id = gethrtime();
1713 mutex_init(&zev_queue_msg_mutex, NULL, MUTEX_DRIVER, NULL);
1714 zev_msg_sequence_number = gethrtime();
1715 bzero(&zev_statistics, sizeof(zev_statistics));
1716 bzero(&zev_pollhead, sizeof(zev_pollhead));
1717 bzero(&zev_queues, sizeof(zev_queues));
1718 zev_statistics.zev_max_queue_len = ZEV_MAX_QUEUE_LEN;
1719 if (zev_ioc_mute_pool("zg0")) {
1720 cmn_err(CE_WARN, "zev: could not init mute list");
1721 goto FAIL;
1722 }
1723
1724 if ((error = mod_install(&zev_modlinkage)) != 0) {
1725 cmn_err(CE_WARN, "zev: could not install module");
1726 goto FAIL;
1727 }
1728
1729 return (0);
1730 FAIL:
1731 /* free resources */
1732 cmn_err(CE_WARN, "zev: _init failed");
1733 mutex_destroy(&zev_mutex);
1734 ddi_soft_state_fini(&statep);
1735 return (error);
1736 }
1737
1738 int
_info(struct modinfo * modinfop)1739 _info(struct modinfo *modinfop)
1740 {
1741 return (mod_info(&zev_modlinkage, modinfop));
1742 }
1743
1744 int
_fini(void)1745 _fini(void)
1746 {
1747 int error = 0;
1748 zev_msg_t *msg;
1749 zev_pool_list_entry_t *pe, *npe;
1750
1751 mutex_enter(&zev_mutex);
1752 if (zev_attached == B_TRUE) {
1753 mutex_exit(&zev_mutex);
1754 return (SET_ERROR(EBUSY));
1755 }
1756 if (zev_queue_cnt != 0) {
1757 /* should never happen */
1758 mutex_exit(&zev_mutex);
1759 return (SET_ERROR(EBUSY));
1760 }
1761
1762 /*
1763 * avoid deadlock if event list is full: make sure threads currently
1764 * blocking on the event list can append their event and then release
1765 * rz_zev_rwlock. Since there should be no queues left when we
1766 * reach this point we can simply empty the event list and then
1767 * wake everybody.
1768 */
1769 while (zev_queue_head) {
1770 msg = zev_queue_head;
1771 zev_queue_head = msg->next;
1772 zev_free(msg, sizeof(*msg) + msg->size);
1773 }
1774 cv_broadcast(&zev_condvar);
1775 mutex_exit(&zev_mutex);
1776
1777 /* switch ZFS event callbacks back to default (again) */
1778 rw_enter(&rz_zev_rwlock, RW_WRITER);
1779 rz_zev_callbacks = rz_zev_default_callbacks;
1780 rz_zev_set_active(B_FALSE);
1781 rw_exit(&rz_zev_rwlock);
1782
1783 /* no thread is inside of the callbacks anymore. Safe to remove. */
1784
1785 /* unload module callbacks */
1786 if ((error = mod_remove(&zev_modlinkage)) != 0) {
1787 cmn_err(CE_WARN, "mod_remove failed: %d", error);
1788 return (error);
1789 }
1790
1791 /* free resources */
1792 mutex_enter(&zev_mutex);
1793 while (zev_queue_head) {
1794 msg = zev_queue_head;
1795 zev_queue_head = msg->next;
1796 zev_free(msg, sizeof(*msg) + msg->size);
1797 }
1798 mutex_exit(&zev_mutex);
1799 rw_enter(&zev_pool_list_rwlock, RW_WRITER);
1800 pe = zev_muted_pools_head;
1801 while (pe) {
1802 npe = pe;
1803 pe = pe->next;
1804 zev_free(npe, sizeof(*npe));
1805 }
1806 rw_exit(&zev_pool_list_rwlock);
1807 ddi_soft_state_fini(&statep);
1808 rw_destroy(&zev_pool_list_rwlock);
1809 cv_destroy(&zev_condvar);
1810 mutex_destroy(&zev_mutex);
1811 mutex_destroy(&zev_mark_id_mutex);
1812 mutex_destroy(&zev_queue_msg_mutex);
1813
1814 return (0);
1815 }
1816
1817