xref: /titanic_51/usr/src/uts/common/fs/zev/zev.c (revision 47d53391f758a1c4054673e23d09f599e3e7436e)
1 #include <sys/modctl.h>
2 #include <sys/ddi.h>
3 #include <sys/sunddi.h>
4 #include <sys/conf.h>
5 #include <sys/devops.h>
6 #include <sys/stat.h>
7 #include <sys/fs/zev.h>
8 #include <sys/zev_callbacks.h>
9 #include <sys/zfs_znode.h>
10 #include <sys/time.h>
11 #include <sys/sa.h>
12 #include <sys/zap.h>
13 #include <sys/time.h>
14 
15 #define	OFFSETOF(s, m)		((size_t)(&(((s *)0)->m)))
16 
17 #define ZEV_DEFAULT_QUEUE_NAME		"beaver"
18 #define ZEV_CONTROL_DEVICE_MINOR	0
19 #define ZEV_MINOR_MIN			(ZEV_CONTROL_DEVICE_MINOR + 1)
20 #define ZEV_MINOR_MAX			(ZEV_MINOR_MIN + ZEV_MAX_QUEUES - 1)
21 
22 typedef struct zev_queue {
23 	char			zq_name[ZEV_MAX_QUEUE_NAME_LEN+1];
24 	minor_t			zq_minor_number;
25 	dev_info_t		*zq_dip;
26 	struct pollhead		zq_pollhead;
27 	uint64_t		zq_bytes_read;
28 	uint64_t		zq_events_read;
29 	uint64_t		zq_bytes_discarded;
30 	uint64_t		zq_events_discarded;
31 	uint64_t		zq_bytes_total;
32 	uint64_t		zq_events_total;
33 	uint64_t		zq_wakeup_threshold;
34 	uint16_t		zq_flags;
35 	uint16_t		zq_need_wakeup;
36 	/* protected by zev_mutex */
37 	int			zq_refcnt;
38 	uint64_t		zq_queue_len;
39 	uint64_t		zq_queue_messages;
40 	uint64_t		zq_max_queue_len;
41 	zev_msg_t		*zq_oldest;
42 	boolean_t		zq_busy;
43 	boolean_t		zq_to_be_removed;
44 	zev_statistics_t	zq_statistics;
45 	kcondvar_t		zq_condvar;
46 } zev_queue_t;
47 
48 static void		*statep;
49 struct pollhead		zev_pollhead;
50 
51 kmutex_t		zev_mutex;
52 kcondvar_t		zev_condvar;
53 kmutex_t		zev_queue_msg_mutex;
54 krwlock_t		zev_pool_list_rwlock;
55 static zev_statistics_t	zev_statistics;
56 static boolean_t	zev_attached;
57 static kmutex_t		zev_mark_id_mutex;
58 static uint64_t		zev_mark_id = 0;
59 
60 static uint64_t		zev_msg_sequence_number = 0;
61 static zev_queue_t	*zev_queues[ZEV_MAX_QUEUES];
62 static int		zev_queue_cnt = 0;
63 
64 uint64_t	zev_memory_allocated = 0;
65 uint64_t	zev_memory_freed = 0;
66 
67 /*
68  * The longest potential message is from zev_zfs_mount() and
69  * contains the mountpoint, which might be close to MAXPATHLEN bytes long.
70  *
71  * Another candidate is zev_znode_rename_cb() and contains three inode
72  * numbers and two filenames of up to MAXNAMELEN bytes each.
73  */
74 #define ZEV_MAX_MESSAGE_LEN	4096
75 
76 static zev_msg_t *zev_queue_head = NULL;
77 static zev_msg_t *zev_queue_tail = NULL;
78 static uint64_t zev_queue_len = 0;
79 
80 
81 typedef struct zev_pool_list_entry {
82 	struct zev_pool_list_entry	*next;
83 	char				name[MAXPATHLEN];
84 } zev_pool_list_entry_t;
85 
86 static zev_pool_list_entry_t *zev_muted_pools_head = NULL;
87 
88 static volatile int zev_wakeup_thread_run = 1;
89 static kthread_t *zev_poll_wakeup_thread = NULL;
90 
91 int
92 zev_queue_cmp(const void *a, const void *b)
93 {
94 	const zev_queue_t *qa = a;
95 	const zev_queue_t *qb = b;
96 	if (qa->zq_minor_number > qb->zq_minor_number)
97 		return 1;
98 	if (qa->zq_minor_number < qb->zq_minor_number)
99 		return -1;
100 	return 0;
101 }
102 
103 /* must be called with zev_mutex held */
104 void
105 zev_queue_trim(void)
106 {
107 	zev_msg_t *m;
108 	uint64_t oldest_message;
109 	zev_queue_t *q;
110 	int i;
111 
112 	if (!zev_queue_tail)
113 		return;
114 
115 	oldest_message = zev_queue_tail->seq + 1;  /* does not exist, yet. */
116 	for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
117 		q = zev_queues[i - ZEV_MINOR_MIN];
118 		if (q == NULL)
119 			continue;
120 		if (!q->zq_oldest)
121 			continue;
122 		if (oldest_message > q->zq_oldest->seq)
123 			oldest_message = q->zq_oldest->seq;
124 	}
125 
126 	/* remove msgs between oldest_message and zev_queue_head */
127 	while(zev_queue_head && (oldest_message > zev_queue_head->seq)) {
128 		m = zev_queue_head;
129 		zev_queue_head = m->next;
130 		if (zev_queue_head == NULL) {
131 			zev_queue_tail = NULL;
132 		} else {
133 			zev_queue_head->prev = NULL;
134 		}
135 		if (m->read == 0) {
136 			zev_statistics.zev_bytes_discarded += m->size;
137 			zev_statistics.zev_cnt_discarded_events++;
138 		}
139 		zev_statistics.zev_queue_len -= m->size;
140 		zev_queue_len--;
141 		ZEV_FREE(m, sizeof(*m) + m->size);
142 	}
143 }
144 
145 /* must be called with zev_mutex held */
146 static void
147 zev_queue_hold(zev_queue_t *q)
148 {
149 	q->zq_refcnt++;
150 }
151 
152 /* must be called with zev_mutex held */
153 static void
154 zev_queue_release(zev_queue_t *q)
155 {
156 	q->zq_refcnt--;
157 	if (q->zq_refcnt > 0)
158 		return;
159 
160 	ASSERT(q->zq_busy == B_FALSE);
161 
162 	/* persistent queues will not be removed */
163 	if ((q->zq_flags & ZEV_FL_PERSISTENT) != 0)
164 		return;
165 
166 	/* remove queue from queue list */
167 	zev_queues[q->zq_minor_number - ZEV_MINOR_MIN] = NULL;
168 
169 	/* discard messages that no queue references anymore */
170 	zev_queue_trim();
171 
172 	cv_destroy(&q->zq_condvar);
173 	ddi_remove_minor_node(q->zq_dip, q->zq_name);
174 	ddi_soft_state_free(statep, q->zq_minor_number);
175 	ZEV_MEM_SUB(sizeof(zev_queue_t));
176 	zev_queue_cnt--;
177 }
178 
179 int
180 zev_queue_new(zev_queue_t **queue,
181               dev_info_t *dip,
182               char *name,
183               uint64_t max_queue_len,
184               uint16_t flags)
185 {
186 	zev_queue_t *q;
187 	zev_queue_t *tmp;
188 	zev_msg_t *msg;
189 	int name_exists = 0;
190 	minor_t minor;
191 	char *p;
192 	int i;
193 
194 	if (max_queue_len > ZEV_MAX_QUEUE_LEN)
195 		return EINVAL;
196 	if (max_queue_len == 0)
197 		max_queue_len = ZEV_MAX_QUEUE_LEN;
198 	if (!strcmp(name, ZEV_CONTROL_DEVICE_NAME))
199 		return EINVAL;
200 	for (p = name; *p; p++) {
201 		if (*p >= 'a' && *p <= 'z')
202 			continue;
203 		if (*p >= '0' && *p <= '9')
204 			continue;
205 		if (*p == '.')
206 			continue;
207 		return EINVAL;
208 	}
209 
210 	mutex_enter(&zev_mutex);
211 
212 	/* find free minor number.*/
213 	/* if this were a frequent operation we'd have a free-minor list */
214 	for (minor = ZEV_MINOR_MIN; minor <= ZEV_MINOR_MAX; minor++) {
215 		tmp = zev_queues[minor - ZEV_MINOR_MIN];
216 		if (tmp == NULL)
217 			break;
218 	}
219 	if (tmp) {
220 		mutex_exit(&zev_mutex);
221 		return ENOSPC;
222 	}
223 
224 	if (ddi_soft_state_zalloc(statep, minor) != DDI_SUCCESS) {
225 		mutex_exit(&zev_mutex);
226 		return ENOSPC;
227 	}
228 	ZEV_MEM_ADD(sizeof(zev_queue_t));
229 
230 	q = ddi_get_soft_state(statep, minor);
231 	memset(q, 0, sizeof(*q));
232 	strncpy(q->zq_name, name, ZEV_MAX_QUEUE_NAME_LEN);
233 	q->zq_name[ZEV_MAX_QUEUE_NAME_LEN] = '\0';
234 	q->zq_max_queue_len = max_queue_len;
235 	q->zq_wakeup_threshold = ZEV_DEFAULT_POLL_WAKEUP_QUEUE_LEN;
236 	q->zq_flags = flags;
237 	q->zq_refcnt = 1;
238 	q->zq_dip = dip;
239 	q->zq_minor_number = minor;
240 	cv_init(&q->zq_condvar, NULL, CV_DRIVER, NULL);
241 
242 	/* insert into queue list */
243 	for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
244 		/* if this were a frequent operation we'd have a name tree */
245 		if (zev_queues[i - ZEV_MINOR_MIN] == NULL)
246 			continue;
247 		if (!strcmp(q->zq_name, zev_queues[i-ZEV_MINOR_MIN]->zq_name)) {
248 			name_exists = 1;
249 			break;
250 		}
251 	}
252 	if (name_exists) {
253 		ddi_soft_state_free(statep, minor);
254 		ZEV_MEM_SUB(sizeof(zev_queue_t));
255 		mutex_exit(&zev_mutex);
256 		return EEXIST;
257 	}
258 	zev_queues[minor - ZEV_MINOR_MIN] = q;
259 	zev_queue_cnt++;
260 
261 	/* calculate current queue len and find head and tail */
262 	q->zq_oldest = zev_queue_tail;
263 	msg = zev_queue_tail;
264 	while ((msg != NULL) && (q->zq_queue_len < q->zq_max_queue_len)) {
265 		q->zq_queue_len += msg->size;
266 		q->zq_queue_messages++;
267 		q->zq_oldest = msg;
268 		msg = msg->prev;
269 	}
270 
271 	mutex_exit(&zev_mutex);
272 
273 	if (ddi_create_minor_node(dip, name,
274 	    S_IFCHR, minor, DDI_PSEUDO, 0) == DDI_FAILURE) {
275 		mutex_enter(&zev_mutex);
276 		zev_queues[minor - ZEV_MINOR_MIN] = NULL;
277 		zev_queue_cnt--;
278 		ddi_soft_state_free(statep, minor);
279 		ZEV_MEM_SUB(sizeof(zev_queue_t));
280 		mutex_exit(&zev_mutex);
281 		return EFAULT;
282 	}
283 
284 	*queue = q;
285 	return 0;
286 }
287 
288 /*
289  * poll() wakeup thread.  Used to check periodically whether we have
290  * bytes left in the queue that have not yet been made into a
291  * pollwakeup() call.  This is meant to insure a maximum waiting
292  * time until an event is presented as a poll wakeup, while at
293  * the same time not making every single event into a poll wakeup
294  * of it's own.
295  */
296 
297 static void
298 zev_poll_wakeup(boolean_t flush_all)
299 {
300 	zev_queue_t *q;
301 	int i;
302 
303 	/*
304 	 * This loop works with hold() and release() because
305 	 * pollwakeup() requires us to release our locks before calling it.
306 	 *
307 	 * from pollwakeup(9F):
308 	 *
309 	 *   "Driver defined locks should not be held across calls
310 	 *    to this function."
311 	 */
312 
313 	/* wake up threads for each individual queue */
314 	mutex_enter(&zev_mutex);
315 	for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
316 		q = zev_queues[i - ZEV_MINOR_MIN];
317 		if (q == NULL)
318 			continue;
319 		if (!q->zq_busy)
320 			continue;
321 		if (!q->zq_queue_len)
322 			continue;
323 		if ((flush_all) ||
324 		    (q->zq_queue_len > q->zq_wakeup_threshold)) {
325 			zev_queue_hold(q);
326 			mutex_exit(&zev_mutex);
327 			pollwakeup(&q->zq_pollhead, POLLIN);
328 			mutex_enter(&zev_mutex);
329 			zev_queue_release(q);
330 		}
331 	}
332 	mutex_exit(&zev_mutex);
333 }
334 
335 static void
336 zev_poll_wakeup_thread_main(void)
337 {
338 	while (zev_wakeup_thread_run) {
339 		delay(drv_usectohz(100 * 1000)); /* sleep 100ms */
340 
341 		zev_poll_wakeup(B_TRUE);
342 	}
343 	thread_exit();
344 }
345 
346 static int
347 zev_ioc_mute_pool(char *poolname)
348 {
349 	zev_pool_list_entry_t *pe;
350 	rw_enter(&zev_pool_list_rwlock, RW_WRITER);
351 	/* pool already muted? */
352 	for (pe=zev_muted_pools_head; pe; pe=pe->next) {
353 		if (!strcmp(pe->name, poolname)) {
354 			rw_exit(&zev_pool_list_rwlock);
355 			return EEXIST;
356 		}
357 	}
358 	pe = ZEV_ZALLOC(sizeof(*pe));
359 	if (!pe) {
360 		rw_exit(&zev_pool_list_rwlock);
361 		return ENOMEM;
362 	}
363 	(void) strncpy(pe->name, poolname, sizeof(pe->name));
364 	pe->next = zev_muted_pools_head;
365 	zev_muted_pools_head = pe;
366 	rw_exit(&zev_pool_list_rwlock);
367 	return (0);
368 }
369 
370 static int
371 zev_ioc_unmute_pool(char *poolname)
372 {
373 	zev_pool_list_entry_t *pe, *peprev;
374 
375 	rw_enter(&zev_pool_list_rwlock, RW_WRITER);
376 	/* pool muted? */
377 	peprev = NULL;
378 	for (pe=zev_muted_pools_head; pe; pe=pe->next) {
379 		if (!strcmp(pe->name, poolname))
380 			break;
381 		peprev = pe;
382 	}
383 	if (pe) {
384 		rw_exit(&zev_pool_list_rwlock);
385 		return ENOENT;
386 	}
387 
388 	if (peprev != NULL) {
389 		peprev->next = pe->next;
390 	} else {
391 		zev_muted_pools_head = pe->next;
392 	}
393 	ZEV_FREE(pe, sizeof(*pe));
394 	rw_exit(&zev_pool_list_rwlock);
395 	return (0);
396 }
397 
398 int
399 zev_skip_pool(objset_t *os)
400 {
401 	zev_pool_list_entry_t *pe;
402 	dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
403 	rw_enter(&zev_pool_list_rwlock, RW_READER);
404 	for (pe=zev_muted_pools_head; pe; pe=pe->next) {
405 		if (!strcmp(pe->name, dp->dp_spa->spa_name)) {
406 			rw_exit(&zev_pool_list_rwlock);
407 			return 1;
408 		}
409 	}
410 	rw_exit(&zev_pool_list_rwlock);
411 	return 0;
412 }
413 
414 static void
415 zev_update_statistics(int op, zev_statistics_t *stat)
416 {
417 	switch (op) {
418 	case ZEV_OP_ERROR:
419 		stat->zev_cnt_errors++;
420 		break;
421 	case ZEV_OP_MARK:
422 		stat->zev_cnt_marks++;
423 		break;
424 	case ZEV_OP_ZFS_MOUNT:
425 		stat->zev_cnt_zfs_mount++;
426 		break;
427 	case ZEV_OP_ZFS_UMOUNT:
428 		stat->zev_cnt_zfs_umount++;
429 		break;
430 	case ZEV_OP_ZVOL_WRITE:
431 		stat->zev_cnt_zvol_write++;
432 		break;
433 	case ZEV_OP_ZVOL_TRUNCATE:
434 		stat->zev_cnt_zvol_truncate++;
435 		break;
436 	case ZEV_OP_ZNODE_CLOSE_AFTER_UPDATE:
437 		stat->zev_cnt_znode_close_after_update++;
438 		break;
439 	case ZEV_OP_ZNODE_CREATE:
440 		stat->zev_cnt_znode_create++;
441 		break;
442 	case ZEV_OP_ZNODE_REMOVE:
443 		stat->zev_cnt_znode_remove++;
444 		break;
445 	case ZEV_OP_ZNODE_LINK:
446 		stat->zev_cnt_znode_link++;
447 		break;
448 	case ZEV_OP_ZNODE_SYMLINK:
449 		stat->zev_cnt_znode_symlink++;
450 		break;
451 	case ZEV_OP_ZNODE_RENAME:
452 		stat->zev_cnt_znode_rename++;
453 		break;
454 	case ZEV_OP_ZNODE_WRITE:
455 		stat->zev_cnt_znode_write++;
456 		break;
457 	case ZEV_OP_ZNODE_TRUNCATE:
458 		stat->zev_cnt_znode_truncate++;
459 		break;
460 	case ZEV_OP_ZNODE_SETATTR:
461 		stat->zev_cnt_znode_setattr++;
462 		break;
463 	case ZEV_OP_ZNODE_ACL:
464 		stat->zev_cnt_znode_acl++;
465 		break;
466 	}
467 }
468 
469 void
470 zev_queue_message(int op, zev_msg_t *msg)
471 {
472 	zev_queue_t *q;
473 	int wakeup = 0;
474 	zev_msg_t *m;
475 	int i;
476 
477 	msg->next = NULL;
478 	msg->prev = NULL;
479 	msg->read = 0;
480 
481 	if (op < ZEV_OP_MIN || op > ZEV_OP_MAX) {
482 		zev_queue_error(op, "unknown op id encountered: %d", op);
483 		ZEV_FREE(msg, sizeof(*msg) + msg->size);
484 		return;
485 	}
486 
487 	/*
488 	 * This mutex protects us agains race conditions when several
489 	 * threads want to queue a message and one or more queues are
490 	 * full:  we release zev_mutex to wait for the queues to become
491 	 * less-than-full, but we don't know in which order the waiting
492 	 * threads will be awoken.  If it's not the same order in which
493 	 * they went to sleep we might mark different messages as "newest"
494 	 * in different queues, and so we might have dupes or even
495 	 * skip messages.
496 	 */
497 	mutex_enter(&zev_queue_msg_mutex);
498 
499 	mutex_enter(&zev_mutex);
500 
501 	/*
502 	 * When the module is loaded, the default behavior ist to
503 	 * put all events into a queue and block if the queue is full.
504 	 * This is done even before the pseudo device is attached.
505 	 * This way, no events are lost.
506 	 *
507 	 * To discard events entirely the "beaver" queue,
508 	 * which never discards anything, has to be removed.
509 	 */
510 
511 	if (zev_queue_cnt == 0) {
512 		mutex_exit(&zev_mutex);
513 		mutex_exit(&zev_queue_msg_mutex);
514 		return;
515 	}
516 
517 	/* put message into global queue */
518 	msg->seq = zev_msg_sequence_number++;
519 	while (zev_statistics.zev_max_queue_len &&
520 	    zev_statistics.zev_queue_len >= zev_statistics.zev_max_queue_len) {
521 		/* queue full.  block until it's been shrunk. */
522 		cv_wait(&zev_condvar, &zev_mutex);
523 	}
524 
525 	if (zev_queue_tail == NULL) {
526 		zev_queue_head = zev_queue_tail = msg;
527 	} else {
528 		zev_queue_tail->next = msg;
529 		msg->prev = zev_queue_tail;
530 		zev_queue_tail = msg;
531 	}
532 	zev_queue_len++;
533 	zev_statistics.zev_cnt_total_events++;
534 	zev_statistics.zev_queue_len += msg->size;
535 
536 	/* update per-device queues */
537 	for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
538 		q = zev_queues[i - ZEV_MINOR_MIN];
539 		if (!q)
540 			continue;
541 
542 		zev_queue_hold(q);
543 
544 		/* make sure queue has enough room */
545 		while (q->zq_max_queue_len &&
546 		       q->zq_queue_len > q->zq_max_queue_len) {
547 
548 			if (q->zq_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) {
549 				/* block until queue has been shrunk. */
550 				cv_wait(&zev_condvar, &zev_mutex);
551 			} else {
552 				/* discard msgs until queue is small enough */
553 				while (q->zq_queue_len > q->zq_max_queue_len) {
554 					m = q->zq_oldest;
555 					if (m == NULL)
556 						break;
557 					q->zq_events_discarded++;
558 					q->zq_bytes_discarded += m->size;
559 					q->zq_oldest = m->next;
560 					q->zq_queue_len -= m->size;
561 					q->zq_queue_messages--;
562 				}
563 			}
564 		}
565 
566 		/* register new message at the end of the queue */
567 		q->zq_queue_len += msg->size;
568 		q->zq_queue_messages++;
569 		q->zq_bytes_total += msg->size;
570 		q->zq_events_total++;
571 		if (q->zq_oldest == NULL)
572 			q->zq_oldest = msg;
573 
574 		zev_update_statistics(op, &q->zq_statistics);
575 
576 		if (q->zq_queue_len > q->zq_wakeup_threshold)
577 			wakeup = 1;
578 		if (q->zq_queue_len == msg->size)  /* queue was empty */
579 			cv_broadcast(&q->zq_condvar);
580 
581 		zev_queue_release(q);
582 	}
583 
584 	zev_queue_trim();
585 
586 	zev_update_statistics(op, &zev_statistics);
587 	mutex_exit(&zev_mutex);
588 	mutex_exit(&zev_queue_msg_mutex);
589 
590 	/* one or more queues need a pollwakeup() */
591 	if (op == ZEV_OP_MARK) {
592 		zev_poll_wakeup(B_TRUE);
593 	} else if (wakeup) {
594 		zev_poll_wakeup(B_FALSE);
595 	}
596 
597 	return;
598 }
599 
600 void
601 zev_queue_error(int op, char *fmt, ...)
602 {
603 	char buf[ZEV_MAX_MESSAGE_LEN];
604 	va_list ap;
605 	int len;
606 	zev_msg_t *msg = NULL;
607 	zev_error_t *rec;
608 	int msg_size;
609 
610 	va_start(ap, fmt);
611 	len = vsnprintf(buf, sizeof(buf), fmt, ap);
612 	va_end(ap);
613 	if (len >= sizeof(buf)) {
614 		cmn_err(CE_WARN, "zev: can't report error - "
615 		        "dropping event entirely.");
616 		return;
617 	}
618 
619 	msg_size = sizeof(*rec) + len + 1;
620 	msg = ZEV_ALLOC(sizeof(*msg) + msg_size);
621 	msg->size = msg_size;
622 	rec = (zev_error_t *)(msg + 1);
623 	rec->record_len = msg_size;
624 	rec->op = ZEV_OP_ERROR;
625 	rec->op_time = ddi_get_time();
626 	rec->guid = 0;
627 	rec->failed_op = op;
628 	rec->errstr_len = len;
629 	(void) memcpy(ZEV_ERRSTR(rec), buf, len + 1);
630 
631 	zev_queue_message(ZEV_OP_ERROR, msg);
632 	return;
633 }
634 
635 static int
636 zev_find_queue(zev_queue_t **out, zev_queue_t *req_q, zev_queue_name_t *name)
637 {
638 	char namebuf[ZEV_MAX_QUEUE_NAME_LEN+1];
639 	zev_queue_t *q;
640 	int i;
641 
642 	*out = NULL;
643 
644 	if (name->zev_namelen == 0) {
645 		if (req_q->zq_minor_number == ZEV_CONTROL_DEVICE_MINOR)
646 			return EINVAL;
647 		zev_queue_hold(req_q);
648 		*out = req_q;
649 		return 0;
650 	}
651 
652 	if (name->zev_namelen > ZEV_MAX_QUEUE_NAME_LEN)
653 		return EINVAL;
654 	strncpy(namebuf, name->zev_name, name->zev_namelen);
655 	namebuf[name->zev_namelen] = '\0';
656 
657 	mutex_enter(&zev_mutex);
658 	for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
659 		q = zev_queues[i - ZEV_MINOR_MIN];
660 		if (!q)
661 			continue;
662 		if (!strcmp(q->zq_name, namebuf)) {
663 			zev_queue_hold(q);
664 			mutex_exit(&zev_mutex);
665 			*out = q;
666 			return 0;
667 		}
668 	}
669 	mutex_exit(&zev_mutex);
670 	return ENOENT;
671 }
672 
673 static int
674 zev_ioc_get_queue_statistics(zev_queue_t *req_q, intptr_t arg, int mode)
675 {
676 	zev_ioctl_get_queue_statistics_t gs;
677 	zev_queue_t *q;
678 	int ret;
679 
680 	if (ddi_copyin((void *)arg, &gs, sizeof(gs), mode) != 0)
681 		return EFAULT;
682 
683 	ret = zev_find_queue(&q, req_q, &gs.zev_queue_name);
684 	if (ret)
685 		return ret;
686 
687 	/* ddi_copyout() can take a long time.  Better make
688 	   a copy to be able to release the mutex faster. */
689 	mutex_enter(&zev_mutex);
690 	memcpy(&gs.zev_statistics, &q->zq_statistics,sizeof(gs.zev_statistics));
691 	gs.zev_statistics.zev_queue_len = q->zq_queue_len;
692 	gs.zev_statistics.zev_bytes_read = q->zq_bytes_read;
693 	gs.zev_statistics.zev_bytes_discarded = q->zq_bytes_discarded;
694 	gs.zev_statistics.zev_max_queue_len = q->zq_max_queue_len;
695 	gs.zev_statistics.zev_cnt_discarded_events = q->zq_events_discarded;
696 	gs.zev_statistics.zev_cnt_total_events = q->zq_events_total;
697 	zev_queue_release(q);
698 	mutex_exit(&zev_mutex);
699 
700 	if (ddi_copyout(&gs, (void *)arg, sizeof(gs), mode) != 0)
701 		return EFAULT;
702 	return 0;
703 }
704 
705 static int
706 zev_ioc_set_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode)
707 {
708 	zev_ioctl_set_queue_properties_t qp;
709 	zev_queue_t *q;
710 	uint64_t old_max;
711 	uint64_t old_flags;
712 	int ret;
713 
714 	if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0)
715 		return EFAULT;
716 	if (qp.zev_max_queue_len > ZEV_MAX_QUEUE_LEN)
717 		return EINVAL;
718 	if (qp.zev_poll_wakeup_threshold > ZEV_MAX_POLL_WAKEUP_QUEUE_LEN)
719 		return EINVAL;
720 
721 	ret = zev_find_queue(&q, req_q, &qp.zev_queue_name);
722 	if (ret)
723 		return ret;
724 
725 	mutex_enter(&zev_mutex);
726 
727 	/*
728 	 * Note: if the PERSISTENT flag is cleared, and the queue is not busy,
729 	 * the queue should be removed by zev_queue_release() in zev_ioctl().
730 	 */
731 	old_flags = qp.zev_flags;
732 	q->zq_flags = qp.zev_flags;
733 	if ((old_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL) &&
734 	   (!(qp.zev_flags & ZEV_FL_BLOCK_WHILE_QUEUE_FULL))) {
735 		/* queue is no longer blocking - wake blocked threads */
736 		cv_broadcast(&zev_condvar);
737 	}
738 
739 	old_max = q->zq_max_queue_len;
740 	q->zq_max_queue_len = qp.zev_max_queue_len;
741 	if (q->zq_max_queue_len < old_max)
742 		zev_queue_trim();
743 	if (q->zq_max_queue_len > old_max)
744 		cv_broadcast(&zev_condvar);	/* threads may be waiting */
745 
746 	if ((qp.zev_poll_wakeup_threshold < q->zq_wakeup_threshold) &&
747 	    (qp.zev_poll_wakeup_threshold <= q->zq_queue_len))
748 		pollwakeup(&q->zq_pollhead, POLLIN);
749 	q->zq_wakeup_threshold = qp.zev_poll_wakeup_threshold;
750 
751 	zev_queue_release(q);
752 	mutex_exit(&zev_mutex);
753 	return 0;
754 }
755 
756 static int
757 zev_ioc_get_queue_properties(zev_queue_t *req_q, intptr_t arg, int mode)
758 {
759 	zev_ioctl_get_queue_properties_t qp;
760 	zev_queue_t *q;
761 	int ret;
762 
763 	if (ddi_copyin((void *)arg, &qp, sizeof(qp), mode) != 0)
764 		return EFAULT;
765 
766 	ret = zev_find_queue(&q, req_q, &qp.zev_queue_name);
767 	if (ret)
768 		return ret;
769 
770 	mutex_enter(&zev_mutex);
771 	qp.zev_max_queue_len = q->zq_max_queue_len;
772 	qp.zev_flags = q->zq_flags;
773 	qp.zev_poll_wakeup_threshold = q->zq_wakeup_threshold;
774 	zev_queue_release(q);
775 	mutex_exit(&zev_mutex);
776 
777 	if (ddi_copyout(&qp, (void *)arg, sizeof(qp), mode) != 0)
778 		return EFAULT;
779 	return 0;
780 }
781 
782 static int
783 zev_ioc_add_queue(zev_queue_t *req_q, intptr_t arg, int mode)
784 {
785 	zev_ioctl_add_queue_t aq;
786 	zev_queue_t *new_q;
787 	char name[ZEV_MAX_QUEUE_NAME_LEN+1];
788 
789 	if (ddi_copyin((void *)arg, &aq, sizeof(aq), mode) != 0)
790 		return EFAULT;
791 
792 	if (aq.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN)
793 		return EINVAL;
794 	strncpy(name, aq.zev_name, aq.zev_namelen);
795 	name[aq.zev_namelen] = '\0';
796 
797 	return zev_queue_new(&new_q, req_q->zq_dip, name,
798 	                     aq.zev_max_queue_len, aq.zev_flags);
799 }
800 
801 static int
802 zev_ioc_remove_queue(zev_queue_t *req_q, intptr_t arg, int mode)
803 {
804 	zev_ioctl_remove_queue_t rq;
805 	zev_queue_t *q;
806 	char name[ZEV_MAX_QUEUE_NAME_LEN+1];
807 	int found = 0;
808 	int i;
809 
810 	if (ddi_copyin((void *)arg, &rq, sizeof(rq), mode) != 0)
811 		return EFAULT;
812 
813 	if (rq.zev_queue_name.zev_namelen > ZEV_MAX_QUEUE_NAME_LEN)
814 		return EINVAL;
815 	strncpy(name, rq.zev_queue_name.zev_name,
816 	        rq.zev_queue_name.zev_namelen);
817 	name[rq.zev_queue_name.zev_namelen] = '\0';
818 
819 	mutex_enter(&zev_mutex);
820 	for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
821 		q = zev_queues[i - ZEV_MINOR_MIN];
822 		if (!q)
823 			continue;
824 		if (!strcmp(q->zq_name, name)) {
825 			found = 1;
826 			break;
827 		}
828 	}
829 	if (!found) {
830 		mutex_exit(&zev_mutex);
831 		return ENOENT;
832 	}
833 
834 	if (q->zq_busy) {
835 		mutex_exit(&zev_mutex);
836 		return EBUSY;
837 	}
838 	/*
839 	 * clear flags, so that persistent queues are removed aswell
840 	 * and the queue becomes non-blocking.
841 	 */
842 	q->zq_flags = 0;
843 	if (q->zq_to_be_removed == B_FALSE) {
844 		q->zq_to_be_removed = B_TRUE;
845 		zev_queue_release(q);
846 	}
847 	/* some threads might be waiting for this queue to become writable */
848 	cv_broadcast(&zev_condvar);
849 
850 	mutex_exit(&zev_mutex);
851 	return 0;
852 }
853 
854 static int
855 zev_ioc_get_debug_info(zev_queue_t *req_q, intptr_t arg, int mode)
856 {
857 	zev_ioctl_debug_info_t di;
858 	uint64_t mem_allocated = atomic_add_64_nv(&zev_memory_allocated, 0);
859 	uint64_t mem_freed     = atomic_add_64_nv(&zev_memory_freed, 0);
860 
861 	di.zev_memory_allocated = mem_allocated - mem_freed;
862 	if (ddi_copyout(&di, (void *)arg, sizeof(di), mode) != 0)
863 		return EFAULT;
864 	return 0;
865 }
866 
867 static int
868 zev_ioc_get_queue_list(zev_queue_t *req_q, intptr_t arg, int mode)
869 {
870 	zev_ioctl_get_queue_list_t gql;
871 	zev_queue_t *q;
872 	int i = 0;
873 	int count = 0;
874 
875 	memset(&gql, 0, sizeof(gql));
876 
877 	mutex_enter(&zev_mutex);
878 	for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
879 		q = zev_queues[i - ZEV_MINOR_MIN];
880 		if (!q)
881 			continue;
882 		strncpy(gql.zev_queue_name[count].zev_name,
883 		    q->zq_name, ZEV_MAX_QUEUE_NAME_LEN);
884 		gql.zev_queue_name[count].zev_namelen = strlen(q->zq_name);
885 		count++;
886 	}
887 	gql.zev_n_queues = count;
888 	mutex_exit(&zev_mutex);
889 
890 	if (ddi_copyout(&gql, (void *)arg, sizeof(gql), mode) != 0)
891 		return EFAULT;
892 	return 0;
893 }
894 
895 /* ARGSUSED */
896 static int
897 zev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
898 {
899 	zev_statistics_t zs;
900 	zev_ioctl_poolarg_t pa;
901 	zev_ioctl_mark_t mark;
902 	zev_mark_t *rec;
903 	int msg_size;
904 	zev_msg_t *msg;
905 	uint64_t len;
906 	uint64_t mark_id;
907 	minor_t minor;
908 	zev_queue_t *req_q;
909 	int ret = 0;
910 
911 	minor = getminor(dev);
912 	mutex_enter(&zev_mutex);
913 	if ((req_q = ddi_get_soft_state(statep, minor)) == NULL) {
914 		mutex_exit(&zev_mutex);
915 		return (ENXIO);
916 	}
917 	zev_queue_hold(req_q);
918 	mutex_exit(&zev_mutex);
919 	/*
920 	 * all structures passed between kernel and userspace
921 	 * are now compatible between 64 and 32 bit.  Model
922 	 * conversion can be ignored.
923 	 */
924 	switch (cmd) {
925 	case ZEV_IOC_GET_GLOBAL_STATISTICS:
926 		/* ddi_copyout() can take a long time.  Better make
927 		   a copy to be able to release the mutex faster. */
928 		mutex_enter(&zev_mutex);
929 		(void) memcpy(&zs, &zev_statistics, sizeof(zs));
930 		mutex_exit(&zev_mutex);
931 		if (ddi_copyout(&zs, (void *)arg, sizeof(zs), mode) != 0)
932 			ret = EFAULT;
933 		break;
934 	case ZEV_IOC_GET_QUEUE_STATISTICS:
935 		ret = zev_ioc_get_queue_statistics(req_q, arg, mode);
936 		break;
937 	case ZEV_IOC_MUTE_POOL:
938 	case ZEV_IOC_UNMUTE_POOL:
939 		if (ddi_copyin((void *)arg, &pa, sizeof(pa), mode) != 0) {
940 			ret = EFAULT;
941 			break;
942 		}
943 		if (pa.zev_poolname_len >=MAXPATHLEN) {
944 			ret = EINVAL;
945 			break;
946 		}
947 		pa.zev_poolname[pa.zev_poolname_len] = '\0';
948 		if (cmd == ZEV_IOC_MUTE_POOL) {
949 			ret = zev_ioc_mute_pool(pa.zev_poolname);
950 		} else {
951 			ret = zev_ioc_unmute_pool(pa.zev_poolname);
952 		}
953 		break;
954 	case ZEV_IOC_SET_MAX_QUEUE_LEN:
955 		if (ddi_copyin((void *)arg, &len, sizeof(len), mode) != 0) {
956 			ret = EFAULT;
957 			break;
958 		}
959 		if (len > ZEV_MAX_QUEUE_LEN) {
960 			ret = EINVAL;
961 			break;
962 		}
963 		mutex_enter(&zev_mutex);
964 		zev_statistics.zev_max_queue_len = len;
965 		cv_broadcast(&zev_condvar);
966 		mutex_exit(&zev_mutex);
967 		break;
968 	case ZEV_IOC_GET_QUEUE_PROPERTIES:
969 		ret = zev_ioc_get_queue_properties(req_q, arg, mode);
970 		break;
971 	case ZEV_IOC_SET_QUEUE_PROPERTIES:
972 		ret = zev_ioc_set_queue_properties(req_q, arg, mode);
973 		break;
974 	case ZEV_IOC_MARK:
975 		if (ddi_copyin((void *)arg, &mark, sizeof(mark), mode) != 0) {
976 			ret = EFAULT;
977 			break;
978 		}
979 		/* prepare message */
980 		msg_size = sizeof(*rec) + mark.zev_payload_len + 1;
981 		msg = ZEV_ALLOC(sizeof(*msg) + msg_size);
982 		msg->size = msg_size;
983 		rec = (zev_mark_t *)(msg + 1);
984 		rec->record_len = msg_size;
985 		rec->op = ZEV_OP_MARK;
986 		rec->op_time = ddi_get_time();
987 		rec->guid = mark.zev_guid;
988 		rec->payload_len = mark.zev_payload_len;
989 		/* get payload */
990 		if (ddi_copyin(((char *)arg) + sizeof(mark),
991 		               ZEV_PAYLOAD(rec),
992 		               mark.zev_payload_len, mode) != 0) {
993 			ZEV_FREE(msg, msg_size);
994 			ret = EFAULT;
995 			break;
996 		}
997 		*(ZEV_PAYLOAD(rec) + mark.zev_payload_len) = '\0';
998 		/* get mark id and queue message */
999 		mutex_enter(&zev_mark_id_mutex);
1000 		mark_id = zev_mark_id++;
1001 		mutex_exit(&zev_mark_id_mutex);
1002 		rec->mark_id = mark_id;
1003 		zev_queue_message(ZEV_OP_MARK, msg);
1004 		/* report mark id to userland, ignore errors */
1005 		mark.zev_mark_id = mark_id;
1006 		ddi_copyout(&mark, (void *)arg, sizeof(mark), mode);
1007 		break;
1008 	case ZEV_IOC_ADD_QUEUE:
1009 		if (minor != ZEV_CONTROL_DEVICE_MINOR) {
1010 			ret = EACCES;
1011 			break;
1012 		}
1013 		ret = zev_ioc_add_queue(req_q, arg, mode);
1014 		break;
1015 	case ZEV_IOC_REMOVE_QUEUE:
1016 		if (minor != ZEV_CONTROL_DEVICE_MINOR) {
1017 			ret = EACCES;
1018 			break;
1019 		}
1020 		ret = zev_ioc_remove_queue(req_q, arg, mode);
1021 		break;
1022 	case ZEV_IOC_GET_DEBUG_INFO:
1023 		ret = zev_ioc_get_debug_info(req_q, arg, mode);
1024 		break;
1025 	case ZEV_IOC_GET_QUEUE_LIST:
1026 		ret = zev_ioc_get_queue_list(req_q, arg, mode);
1027 		break;
1028 	default:
1029 		/* generic "ioctl unknown" error */
1030 		ret = ENOTTY;
1031 	}
1032 
1033 	mutex_enter(&zev_mutex);
1034 	zev_queue_release(req_q);
1035 	mutex_exit(&zev_mutex);
1036 	return (ret);
1037 }
1038 
1039 static int
1040 zev_chpoll(dev_t dev, short events, int anyyet,
1041     short *reventsp, struct pollhead **phpp)
1042 {
1043 	int minor;
1044 	short revent = 0;
1045 	zev_queue_t *q;
1046 
1047 	/* use minor-specific queue context and it's pollhead */
1048 	minor = getminor(dev);
1049 	if (minor == ZEV_CONTROL_DEVICE_MINOR)
1050 		return (EINVAL);
1051 	mutex_enter(&zev_mutex);
1052 	if ((q = ddi_get_soft_state(statep, minor)) == NULL) {
1053 		mutex_exit(&zev_mutex);
1054 		return (ENXIO);
1055 	}
1056 	revent = 0;
1057 	if ((events & POLLIN)) {
1058 		if (q->zq_oldest)
1059 			revent |= POLLIN;
1060 	}
1061 	if (revent == 0) {
1062 		if (!anyyet) {
1063 			*phpp = &q->zq_pollhead;
1064 		}
1065 	}
1066 	*reventsp = revent;
1067 	mutex_exit(&zev_mutex);
1068 	return (0);
1069 }
1070 
1071 /* ARGSUSED */
1072 static int
1073 zev_read(dev_t dev, struct uio *uio_p, cred_t *crep_p)
1074 {
1075 	minor_t minor;
1076 	offset_t off;
1077 	int ret = 0;
1078 	zev_msg_t *msg;
1079 	char *data;
1080 	zev_queue_t *q;
1081 
1082 	minor = getminor(dev);
1083 	if (minor == ZEV_CONTROL_DEVICE_MINOR)
1084 		return (EINVAL);
1085 
1086 	mutex_enter(&zev_mutex);
1087 	q = ddi_get_soft_state(statep, minor);
1088 	if (q == NULL) {
1089 		mutex_exit(&zev_mutex);
1090 		return (ENXIO);
1091 	}
1092 	off = uio_p->uio_loffset;
1093 	msg = q->zq_oldest;
1094 	while (msg == NULL) {
1095 		if (!ddi_can_receive_sig()) {
1096 			/*
1097 			 * read() shouldn't block because this thread
1098 			 * can't receive signals. (e.g., it might be
1099 			 * torn down by exit() right now.)
1100 			 */
1101 			mutex_exit(&zev_mutex);
1102 			return 0;
1103 		}
1104 		if (cv_wait_sig(&q->zq_condvar, &zev_mutex) == 0) {
1105 			/* signal received. */
1106 			mutex_exit(&zev_mutex);
1107 			return EINTR;
1108 		}
1109 		msg = q->zq_oldest;
1110 	}
1111 	if (msg->size > uio_p->uio_resid) {
1112 		mutex_exit(&zev_mutex);
1113 		return E2BIG;
1114 	}
1115 	while (msg && uio_p->uio_resid >= msg->size) {
1116 		data = (char *)(msg + 1);
1117 		ret = uiomove(data, msg->size, UIO_READ, uio_p);
1118 		if (ret != 0) {
1119 			mutex_exit(&zev_mutex);
1120 			cmn_err(CE_WARN, "zev: uiomove failed; messages lost");
1121 			uio_p->uio_loffset = off;
1122 			return (ret);
1123 		}
1124 		q->zq_oldest = msg->next;
1125 		q->zq_bytes_read += msg->size;
1126 		q->zq_queue_len -= msg->size;
1127 		q->zq_queue_messages--;
1128 		msg->read++;
1129 		msg = q->zq_oldest;
1130 	}
1131 	cv_broadcast(&zev_condvar);
1132 	mutex_exit(&zev_mutex);
1133 	uio_p->uio_loffset = off;
1134 	return 0;
1135 }
1136 
1137 /* ARGSUSED */
1138 static int
1139 zev_close(dev_t dev, int flag, int otyp, cred_t *crepd)
1140 {
1141 	zev_queue_t *q;
1142 	int minor;
1143 
1144 	minor = getminor(dev);
1145 	if (otyp != OTYP_CHR)
1146 		return (EINVAL);
1147 	mutex_enter(&zev_mutex);
1148 	if ((q = ddi_get_soft_state(statep, minor)) == NULL) {
1149 		mutex_exit(&zev_mutex);
1150 		return (ENXIO);
1151 	}
1152 	if (q->zq_busy != B_TRUE) {
1153 		mutex_exit(&zev_mutex);
1154 		return (EINVAL);
1155 	}
1156 	q->zq_busy = B_FALSE;
1157 	if ((q->zq_flags & ZEV_FL_PERSISTENT) == 0)
1158 		zev_queue_release(q);
1159 	mutex_exit(&zev_mutex);
1160 	return (0);
1161 }
1162 
1163 /* ARGSUSED */
1164 static int
1165 zev_open(dev_t *devp, int flag, int otyp, cred_t *credp)
1166 {
1167 	zev_queue_t *q;
1168 	minor_t minor;
1169 
1170 	minor = getminor(*devp);
1171 	if (otyp != OTYP_CHR)
1172 		return (EINVAL);
1173 	if (drv_priv(credp) != 0)
1174 		return (EPERM);
1175 	mutex_enter(&zev_mutex);
1176 	if ((q = ddi_get_soft_state(statep, minor)) == NULL) {
1177 		mutex_exit(&zev_mutex);
1178 		return (ENXIO);
1179 	}
1180 	if (minor == ZEV_CONTROL_DEVICE_MINOR) {
1181 		/* control device may be used in parallel */
1182 		q->zq_busy = B_TRUE;
1183 		mutex_exit(&zev_mutex);
1184 		return 0;
1185 	}
1186 	if (q->zq_busy == B_TRUE) {
1187 		mutex_exit(&zev_mutex);
1188 		return (EBUSY);
1189 	}
1190 	q->zq_busy = B_TRUE;	/* can only be opened exclusively */
1191 	mutex_exit(&zev_mutex);
1192 	return (0);
1193 }
1194 
1195 static struct cb_ops zev_cb_ops = {
1196 	zev_open,		/* open */
1197 	zev_close,		/* close */
1198 	nodev,			/* strategy */
1199 	nodev,			/* print */
1200 	nodev,			/* dump */
1201 	zev_read,		/* read */
1202 	nodev,			/* write */
1203 	zev_ioctl,		/* ioctl */
1204 	nodev,			/* devmap */
1205 	nodev,			/* mmap */
1206 	nodev,			/* segmap */
1207 	zev_chpoll,		/* chpoll */
1208 	ddi_prop_op,		/* prop_op */
1209 	NULL,			/* streamtab */
1210 	D_MP | D_64BIT,		/* cb_flag */
1211 	CB_REV,			/* cb_rev */
1212 	nodev,			/* aread */
1213 	nodev,			/* awrite */
1214 };
1215 
1216 static void
1217 zev_free_instance(dev_info_t *dip)
1218 {
1219 	int instance;
1220 	zev_queue_t *q;
1221 	int i;
1222 
1223 	instance = ddi_get_instance(dip);
1224 	if (instance != 0) {
1225 		cmn_err(CE_WARN, "zev: tried to free instance != 0 (%d)",
1226 		        instance);
1227 		return;
1228 	}
1229 
1230 	ddi_remove_minor_node(dip, NULL);
1231 
1232 	/* stop pollwakeup thread */
1233 	zev_wakeup_thread_run = 0;
1234 	if (zev_poll_wakeup_thread != NULL) {
1235 		thread_join(zev_poll_wakeup_thread->t_did);
1236 		zev_poll_wakeup_thread = NULL;
1237 	}
1238 
1239 	mutex_enter(&zev_mutex);
1240 
1241 	/* remove "ctrl" dummy queue */
1242 	q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR);
1243 	if (q) {
1244 		ddi_soft_state_free(statep, ZEV_CONTROL_DEVICE_MINOR);
1245 		ZEV_MEM_SUB(sizeof(zev_queue_t));
1246 	}
1247 
1248 	/* remove all other queues */
1249 	for (i = ZEV_MINOR_MIN; i <= ZEV_MINOR_MAX; i++) {
1250 		q = zev_queues[i- ZEV_MINOR_MIN];
1251 		if (!q)
1252 			continue;
1253 		ASSERT(q->zq_refcnt == 1);
1254 		zev_queue_release(q);
1255 	}
1256 	zev_queue_trim();
1257 	bzero(&zev_queues, sizeof(zev_queues));
1258 
1259 	mutex_exit(&zev_mutex);
1260 
1261 }
1262 
1263 static int
1264 zev_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1265 {
1266 	int instance;
1267 	zev_queue_t *q;
1268 
1269 	/* called once per instance with DDI_DETACH,
1270 	   may be called to suspend */
1271 	switch (cmd) {
1272 	case DDI_DETACH:
1273 		/* instance busy? */
1274 		instance = ddi_get_instance(dip);
1275 		if (instance != 0) {	/* hardcoded in zev.conf */
1276 			/* this module only supports one instance. */
1277 			return (DDI_FAILURE);
1278 		}
1279 
1280 		mutex_enter(&zev_mutex);
1281 		if (!zev_attached) {
1282 			mutex_exit(&zev_mutex);
1283 			return (DDI_FAILURE);
1284 		}
1285 
1286 		/* check "ctrl" queue to see if t is busy */
1287 		q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR);
1288 		if (q == NULL) {
1289 			mutex_exit(&zev_mutex);
1290 			return (DDI_FAILURE);
1291 		}
1292 		if (q->zq_busy) {
1293 			mutex_exit(&zev_mutex);
1294 			return (DDI_FAILURE);
1295 		}
1296 		/* are there any queues? */
1297 		if (zev_queue_cnt > 0) {
1298 			mutex_exit(&zev_mutex);
1299 			return (DDI_FAILURE);
1300 		}
1301 
1302 		zev_attached = B_FALSE;
1303 		mutex_exit(&zev_mutex);
1304 
1305 		/* switch ZFS event callbacks back to default */
1306 		rw_enter(&rz_zev_rwlock, RW_WRITER);
1307 		rz_zev_callbacks = rz_zev_default_callbacks;
1308 		rw_exit(&rz_zev_rwlock);
1309 
1310 		/* no thread is inside of the callbacks anymore. */
1311 
1312 		/* free resources allocated for this instance */
1313 		zev_free_instance(dip);
1314 #if 0
1315 		cmn_err(CE_WARN, "zev: allocated memory at detach: %" PRIu64,
1316 			zev_memory_allocated - zev_memory_freed);
1317 #endif
1318 		return (DDI_SUCCESS);
1319 	case DDI_SUSPEND:
1320 		/* kernel must not suspend zev devices while ZFS is running */
1321 		return (DDI_FAILURE);
1322 	default:
1323 		return (DDI_FAILURE);
1324 	}
1325 }
1326 
1327 static int
1328 zev_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1329 {
1330 	/* called once per instance with DDI_ATTACH,
1331 	   may be called to resume */
1332 	int instance;
1333 	int error;
1334 	zev_queue_t *q;
1335 	switch (cmd) {
1336 	case DDI_ATTACH:
1337 		/* create instance state */
1338 		instance = ddi_get_instance(dip);
1339 		if (instance != 0) {	/* hardcoded in zev.conf */
1340 			/* this module only supports one instance. */
1341 			return (DDI_FAILURE);
1342 		}
1343 
1344 		mutex_enter(&zev_mutex);
1345 		if (zev_attached) {
1346 			mutex_exit(&zev_mutex);
1347 			return (DDI_FAILURE);
1348 		}
1349 		if (ddi_soft_state_zalloc(statep, ZEV_CONTROL_DEVICE_MINOR) !=
1350 		    DDI_SUCCESS) {
1351 			mutex_exit(&zev_mutex);
1352 			return (DDI_FAILURE);
1353 		}
1354 		ZEV_MEM_ADD(sizeof(zev_queue_t));
1355 		zev_attached = B_TRUE;
1356 
1357 		/* init queue list */
1358 		bzero(&zev_queues, sizeof(zev_queues));
1359 		mutex_exit(&zev_mutex);
1360 
1361 		/* create a dummy queue for management of "ctrl" */
1362 
1363 		q = ddi_get_soft_state(statep, ZEV_CONTROL_DEVICE_MINOR);
1364 		q->zq_dip = dip;
1365 		q->zq_refcnt = 1;
1366 		q->zq_busy = B_FALSE;
1367 		q->zq_minor_number = ZEV_CONTROL_DEVICE_MINOR;
1368 		q->zq_flags = ZEV_FL_PERSISTENT;
1369 		strcpy(q->zq_name, ZEV_CONTROL_DEVICE_NAME);
1370 
1371 		/* create device node for "ctrl" */
1372 		if (ddi_create_minor_node(dip, ZEV_CONTROL_DEVICE_NAME,
1373 		    S_IFCHR, ZEV_CONTROL_DEVICE_MINOR,
1374 		    DDI_PSEUDO, 0) == DDI_FAILURE) {
1375 			goto fail;
1376 		}
1377 
1378 		/* note: intentionally not adding ctrl queue to queue list. */
1379 
1380 		/* default queue */
1381 		error = zev_queue_new(&q, dip,
1382 				      ZEV_DEFAULT_QUEUE_NAME,
1383 				      ZEV_MAX_QUEUE_LEN,
1384 				      ZEV_FL_BLOCK_WHILE_QUEUE_FULL|
1385 		                      ZEV_FL_PERSISTENT);
1386 		if (error)
1387 			goto fail;
1388 
1389 		/* start pollwakeup thread */
1390 		zev_wakeup_thread_run = 1;
1391 		zev_poll_wakeup_thread = thread_create(NULL, 0,
1392 		    zev_poll_wakeup_thread_main, NULL, 0, &p0,
1393 		    TS_RUN, minclsyspri);
1394 
1395 		ddi_report_dev(dip);
1396 
1397 		/* switch ZFS event callbacks to zev module callbacks */
1398 		rw_enter(&rz_zev_rwlock, RW_WRITER);
1399 		rz_zev_callbacks = &zev_callbacks;
1400 		rw_exit(&rz_zev_rwlock);
1401 
1402 		return (DDI_SUCCESS);
1403 	case DDI_RESUME:
1404 		/* suspendeding zev devices should never happen */
1405 		return (DDI_SUCCESS);
1406 	default:
1407 		return (DDI_FAILURE);
1408 	}
1409 fail:
1410 	cmn_err(CE_WARN, "zev: attach failed");
1411 	zev_free_instance(dip);
1412 	mutex_enter(&zev_mutex);
1413 	zev_attached = B_FALSE;
1414 	mutex_exit(&zev_mutex);
1415 	return (DDI_FAILURE);
1416 }
1417 
1418 /* ARGSUSED */
1419 static int
1420 zev_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp)
1421 {
1422 	minor_t minor;
1423 	zev_queue_t *q;
1424 
1425 	/* arg is dev_t */
1426 	minor = getminor((dev_t)arg);
1427 	mutex_enter(&zev_mutex);
1428 	q = ddi_get_soft_state(statep, minor);
1429 	if (q == NULL) {
1430 		*resultp = NULL;
1431 		mutex_exit(&zev_mutex);
1432 		return (DDI_FAILURE);
1433 	}
1434 
1435 	switch (infocmd) {
1436 	case DDI_INFO_DEVT2DEVINFO:
1437 		*resultp = q->zq_dip;
1438 		break;
1439 	case DDI_INFO_DEVT2INSTANCE:
1440 		*resultp = (void *)(uintptr_t)ddi_get_instance(q->zq_dip);
1441 		break;
1442 	default:
1443 		mutex_exit(&zev_mutex);
1444 		return (DDI_FAILURE);
1445 	}
1446 	mutex_exit(&zev_mutex);
1447 	return (DDI_SUCCESS);
1448 }
1449 
1450 static struct dev_ops zev_dev_ops = {
1451 	DEVO_REV,			/* driver build revision */
1452 	0,				/* driver reference count */
1453 	zev_getinfo,			/* getinfo */
1454 	nulldev,			/* identify (obsolete) */
1455 	nulldev,			/* probe (search for devices) */
1456 	zev_attach,			/* attach */
1457 	zev_detach,			/* detach */
1458 	nodev,				/* reset (obsolete, use quiesce) */
1459 	&zev_cb_ops,			/* character and block device ops */
1460 	NULL,				/* bus driver ops */
1461 	NULL,				/* power management, not needed */
1462 	ddi_quiesce_not_needed,		/* quiesce */
1463 };
1464 
1465 static struct modldrv zev_modldrv = {
1466 	&mod_driverops,			/* all loadable modules use this */
1467 	"zev ZFS event provider, v1.0",	/* driver name and version info */
1468 	&zev_dev_ops			/* ops method pointers */
1469 };
1470 
1471 static struct modlinkage zev_modlinkage = {
1472 	MODREV_1,	/* fixed value */
1473 	{
1474 		&zev_modldrv,	/* driver linkage structure */
1475 		NULL		/* list terminator */
1476 	}
1477 };
1478 
1479 int
1480 _init(void)
1481 {
1482 	int error;
1483 
1484 	if ((error = ddi_soft_state_init(&statep, sizeof(zev_queue_t), 1)) != 0)
1485 		return (error);
1486 	zev_attached = B_FALSE;
1487 
1488 	zev_queue_head = NULL;
1489 	zev_queue_tail = NULL;
1490 	zev_queue_len = 0;
1491 	zev_muted_pools_head = NULL;
1492 	zev_memory_allocated = 0;
1493 	zev_memory_freed = 0;
1494 	zev_queue_cnt = 0;
1495 
1496 	mutex_init(&zev_mutex, NULL, MUTEX_DRIVER, NULL);
1497 	cv_init(&zev_condvar, NULL, CV_DRIVER, NULL);
1498 	rw_init(&zev_pool_list_rwlock, NULL, RW_DRIVER, NULL);
1499 	mutex_init(&zev_mark_id_mutex, NULL, MUTEX_DRIVER, NULL);
1500 	zev_mark_id = gethrtime();
1501 	mutex_init(&zev_queue_msg_mutex, NULL, MUTEX_DRIVER, NULL);
1502 	zev_msg_sequence_number = gethrtime();
1503 	bzero(&zev_statistics, sizeof(zev_statistics));
1504 	bzero(&zev_pollhead, sizeof(zev_pollhead));
1505 	bzero(&zev_queues, sizeof(zev_queues));
1506 	zev_statistics.zev_max_queue_len = ZEV_MAX_QUEUE_LEN;
1507 	if (zev_ioc_mute_pool("zg0")) {
1508 		cmn_err(CE_WARN, "zev: could not init mute list");
1509 		goto FAIL;
1510 	}
1511 
1512 	if ((error = mod_install(&zev_modlinkage)) != 0) {
1513 		cmn_err(CE_WARN, "zev: could not install module");
1514 		goto FAIL;
1515 	}
1516 
1517 	return (0);
1518 FAIL:
1519 	/* free resources */
1520 	cmn_err(CE_WARN, "zev: _init failed");
1521 	mutex_destroy(&zev_mutex);
1522 	ddi_soft_state_fini(&statep);
1523 	return (error);
1524 }
1525 
1526 int
1527 _info(struct modinfo *modinfop)
1528 {
1529 	return (mod_info(&zev_modlinkage, modinfop));
1530 }
1531 
1532 int
1533 _fini(void)
1534 {
1535 	int error = 0;
1536 	zev_msg_t *msg;
1537 	zev_pool_list_entry_t *pe, *npe;
1538 
1539 	mutex_enter(&zev_mutex);
1540 	if (zev_attached == B_TRUE) {
1541 		mutex_exit(&zev_mutex);
1542 		return (SET_ERROR(EBUSY));
1543 	}
1544 	if (zev_queue_cnt != 0) {
1545 		/* should never happen */
1546 		mutex_exit(&zev_mutex);
1547 		return (SET_ERROR(EBUSY));
1548 	}
1549 
1550 	/*
1551 	 * avoid deadlock if event list is full: make sure threads currently
1552 	 * blocking on the event list can append their event and then release
1553 	 * rz_zev_rwlock.  Since there should be no queues left when we
1554 	 * reach this point we can simply empty the event list and then
1555 	 * wake everybody.
1556 	 */
1557 	while (zev_queue_head) {
1558 		msg = zev_queue_head;
1559 		zev_queue_head = msg->next;
1560 		ZEV_FREE(msg, sizeof(*msg) + msg->size);
1561 	}
1562 	cv_broadcast(&zev_condvar);
1563 	mutex_exit(&zev_mutex);
1564 
1565 	/* switch ZFS event callbacks back to default (again) */
1566 	rw_enter(&rz_zev_rwlock, RW_WRITER);
1567 	rz_zev_callbacks = rz_zev_default_callbacks;
1568 	rw_exit(&rz_zev_rwlock);
1569 
1570 	/* no thread is inside of the callbacks anymore.  Safe to remove. */
1571 
1572 	/* unload module callbacks */
1573 	if ((error = mod_remove(&zev_modlinkage)) != 0) {
1574 		cmn_err(CE_WARN, "mod_remove failed: %d", error);
1575 		return (error);
1576 	}
1577 
1578 	/* free resources */
1579 	mutex_enter(&zev_mutex);
1580 	while (zev_queue_head) {
1581 		msg = zev_queue_head;
1582 		zev_queue_head = msg->next;
1583 		ZEV_FREE(msg, sizeof(*msg) + msg->size);
1584 	}
1585 	mutex_exit(&zev_mutex);
1586 	rw_enter(&zev_pool_list_rwlock, RW_WRITER);
1587 	pe = zev_muted_pools_head;
1588 	while (pe) {
1589 		npe = pe;
1590 		pe = pe->next;
1591 		ZEV_FREE(npe, sizeof(*npe));
1592 	}
1593 	rw_exit(&zev_pool_list_rwlock);
1594 	ddi_soft_state_fini(&statep);
1595 	rw_destroy(&zev_pool_list_rwlock);
1596 	cv_destroy(&zev_condvar);
1597 	mutex_destroy(&zev_mutex);
1598 	mutex_destroy(&zev_mark_id_mutex);
1599 	mutex_destroy(&zev_queue_msg_mutex);
1600 
1601 	return (0);
1602 }
1603 
1604