xref: /titanic_44/usr/src/uts/common/fs/zev/zev.c (revision 149d0affa57d708a9278120c8e260622dbb3667f)
1 #include <sys/modctl.h>
2 #include <sys/ddi.h>
3 #include <sys/sunddi.h>
4 #include <sys/conf.h>
5 #include <sys/devops.h>
6 #include <sys/stat.h>
7 #include <sys/fs/zev.h>
8 #include <sys/zev_callbacks.h>
9 #include <time.h>
10 
11 typedef struct zev_state {
12 	kmutex_t	mutex;
13 	dev_info_t	*dip;
14 	boolean_t	busy;
15 } zev_state_t;
16 
17 static void		*statep;
18 struct pollhead		zev_pollhead;
19 
20 kmutex_t		zev_mutex;
21 kcondvar_t		zev_condvar;
22 krwlock_t		zev_pool_list_rwlock;
23 static zev_statistics_t	zev_statistics;
24 static boolean_t	zev_busy;
25 
26 /*
27  * The longest potential message is from zev_zfs_mount() and
28  * contains the mountpoint, which might be close to MAXPATHLEN bytes long.
29  *
30  * Another candidate is zev_znode_rename_cb() and contains three inode
31  * numbers and two filenames of up to MAXNAMELEN bytes each.
32  */
33 #define ZEV_MAX_MESSAGE_LEN	4096
34 
35 /* If the queue size reaches 1GB, stop ZFS ops and block the threads.  */
36 #define ZEV_MAX_QUEUE_LEN		(1 * 1024 * 1024 * 1024)
37 
38 /* Don't wake up poll()ing processes for every single message. */
39 #define ZEV_MIN_POLL_WAKEUP_QUEUE_LEN	8192
40 
41 static zev_msg_t *zev_queue_head = NULL;
42 static zev_msg_t *zev_queue_tail = NULL;
43 static uint64_t zev_queue_len = 0;
44 
45 
46 typedef struct zev_pool_list_entry {
47 	struct zev_pool_list_entry	*next;
48 	char				name[MAXPATHLEN];
49 } zev_pool_list_entry_t;
50 
51 static zev_pool_list_entry_t *zev_muted_pools_head = NULL;
52 
53 /*
54  * poll() wakeup thread.  Used to check periodically whether we have
55  * bytes left in the queue that have not yet been made into a
56  * pollwakeup() call.  This is meant to insure a maximum waiting
57  * time until an event is presented as a poll wakeup, while at
58  * the same time not making every single event into a poll wakeup
59  * of it's own.
60  */
61 
62 static volatile int zev_wakeup_thread_run = 1;
63 static kthread_t *zev_poll_wakeup_thread = NULL;
64 
65 static void
66 zev_poll_wakeup_thread_main(void)
67 {
68 	int wakeup;
69 	while (zev_wakeup_thread_run) {
70 		delay(drv_usectohz(100 * 1000)); /* sleep 100ms */
71 		/* check message queue */
72 		mutex_enter(&zev_mutex);
73 		wakeup = 0;
74 		if (zev_queue_head)
75 			wakeup = 1;
76 		mutex_exit(&zev_mutex);
77 		if (wakeup)
78 			pollwakeup(&zev_pollhead, POLLIN);
79 	}
80 	thread_exit();
81 }
82 
83 static int
84 zev_ioc_mute_pool(char *poolname)
85 {
86 	zev_pool_list_entry_t *pe;
87 	rw_enter(&zev_pool_list_rwlock, RW_WRITER);
88 	/* pool already muted? */
89 	for (pe=zev_muted_pools_head; pe; pe=pe->next) {
90 		if (!strcmp(pe->name, poolname)) {
91 			rw_exit(&zev_pool_list_rwlock);
92 			return EEXIST;
93 		}
94 	}
95 	pe = kmem_zalloc(sizeof(*pe), KM_SLEEP);
96 	if (!pe) {
97 		rw_exit(&zev_pool_list_rwlock);
98 		return ENOMEM;
99 	}
100 	strncpy(pe->name, poolname, sizeof(pe->name));
101 	pe->next = zev_muted_pools_head;
102 	zev_muted_pools_head = pe;
103 	rw_exit(&zev_pool_list_rwlock);
104 	return (0);
105 }
106 
107 static int
108 zev_ioc_unmute_pool(char *poolname)
109 {
110 	zev_pool_list_entry_t *pe, *peprev;
111 	rw_enter(&zev_pool_list_rwlock, RW_WRITER);
112 	/* pool muted? */
113 	peprev = NULL;
114 	for (pe=zev_muted_pools_head; pe; pe=pe->next) {
115 		if (!strcmp(pe->name, poolname)) {
116 			goto found;
117 		}
118 		peprev = pe;
119 	}
120 	rw_exit(&zev_pool_list_rwlock);
121 	return ENOENT;
122 found:
123 	if (peprev != NULL) {
124 		peprev->next = pe->next;
125 	} else {
126 		zev_muted_pools_head = pe->next;
127 	}
128 	kmem_free(pe, sizeof(*pe));
129 	rw_exit(&zev_pool_list_rwlock);
130 	return (0);
131 }
132 
133 int
134 zev_skip_pool(objset_t *os)
135 {
136 	zev_pool_list_entry_t *pe;
137 	dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
138 	rw_enter(&zev_pool_list_rwlock, RW_READER);
139 	for (pe=zev_muted_pools_head; pe; pe=pe->next) {
140 		if (!strcmp(pe->name, dp->dp_spa->spa_name)) {
141 			rw_exit(&zev_pool_list_rwlock);
142 			return 1;
143 		}
144 	}
145 	rw_exit(&zev_pool_list_rwlock);
146 	return 0;
147 }
148 
149 void
150 zev_queue_message(int op, zev_msg_t *msg)
151 {
152 	time_t now = 0;
153 	int wakeup = 0;
154 
155 	msg->next = NULL;
156 
157 	if (op < ZEV_OP_MIN || op > ZEV_OP_MAX) {
158 		zev_queue_error(op, "unknown op id encountered: %d", op);
159 		kmem_free(msg, sizeof(*msg) + msg->size);
160 		return;
161 	}
162 
163 	mutex_enter(&zev_mutex);
164 	while (zev_statistics.zev_max_queue_len &&
165 	    zev_statistics.zev_queue_len >= zev_statistics.zev_max_queue_len) {
166 		/* queue full.  block until it's been shrunk. */
167 		cv_wait(&zev_condvar, &zev_mutex);
168 	}
169 
170 	if (zev_queue_tail == NULL) {
171 		zev_queue_head = zev_queue_tail = msg;
172 	} else {
173 		zev_queue_tail->next = msg;
174 		zev_queue_tail = msg;
175 	}
176 	zev_queue_len++;
177 
178 	/* update statistics */
179 	zev_statistics.zev_cnt_total_events++;
180 	zev_statistics.zev_queue_len += msg->size;
181 	if (zev_statistics.zev_queue_len >
182 	    zev_statistics.zev_poll_wakeup_queue_len)
183 		wakeup = 1;
184 	switch (op) {
185 	case ZEV_OP_ERROR:
186 		zev_statistics.zev_cnt_errors++;
187 		break;
188 	case ZEV_OP_ZFS_MOUNT:
189 		zev_statistics.zev_cnt_zfs_mount++;
190 		break;
191 	case ZEV_OP_ZFS_UMOUNT:
192 		zev_statistics.zev_cnt_zfs_umount++;
193 		break;
194 	case ZEV_OP_ZVOL_WRITE:
195 		zev_statistics.zev_cnt_zvol_write++;
196 		break;
197 	case ZEV_OP_ZVOL_TRUNCATE:
198 		zev_statistics.zev_cnt_zvol_truncate++;
199 		break;
200 	case ZEV_OP_ZNODE_CLOSE_AFTER_UPDATE:
201 		zev_statistics.zev_cnt_znode_close_after_update++;
202 		break;
203 	case ZEV_OP_ZNODE_CREATE:
204 		zev_statistics.zev_cnt_znode_create++;
205 		break;
206 	case ZEV_OP_ZNODE_REMOVE:
207 		zev_statistics.zev_cnt_znode_remove++;
208 		break;
209 	case ZEV_OP_ZNODE_LINK:
210 		zev_statistics.zev_cnt_znode_link++;
211 		break;
212 	case ZEV_OP_ZNODE_SYMLINK:
213 		zev_statistics.zev_cnt_znode_symlink++;
214 		break;
215 	case ZEV_OP_ZNODE_RENAME:
216 		zev_statistics.zev_cnt_znode_rename++;
217 		break;
218 	case ZEV_OP_ZNODE_WRITE:
219 		zev_statistics.zev_cnt_znode_write++;
220 		break;
221 	case ZEV_OP_ZNODE_TRUNCATE:
222 		zev_statistics.zev_cnt_znode_truncate++;
223 		break;
224 	case ZEV_OP_ZNODE_SETATTR:
225 		zev_statistics.zev_cnt_znode_setattr++;
226 		break;
227 	case ZEV_OP_ZNODE_ACL:
228 		zev_statistics.zev_cnt_znode_acl++;
229 		break;
230 	}
231 	mutex_exit(&zev_mutex);
232 
233 	/* chpoll event, if necessary.  */
234 	if (wakeup)
235 		pollwakeup(&zev_pollhead, POLLIN);
236 
237 	return;
238 }
239 
240 void
241 zev_queue_error(int op, char *fmt, ...)
242 {
243 	char buf[ZEV_MAX_MESSAGE_LEN];
244 	va_list ap;
245 	int len;
246 	zev_msg_t *msg = NULL;
247 	zev_error_t *rec;
248 	int msg_size;
249 
250 	va_start(ap, fmt);
251 	len = vsnprintf(buf, sizeof(buf), fmt, ap);
252 	va_end(ap);
253 	if (len >= sizeof(buf)) {
254 		cmn_err(CE_WARN, "zev: can't report error - "
255 		        "dropping event entirely.");
256 		return;
257 	}
258 
259 	msg_size = sizeof(*rec) + len + 1;
260 	msg = kmem_alloc(sizeof(*msg) + msg_size, KM_SLEEP);
261 	msg->size = msg_size;
262 	rec = (zev_error_t *)(msg + 1);
263 	rec->op = ZEV_OP_ERROR;
264 	rec->op_time = ddi_get_time();
265 	rec->guid = 0;
266 	rec->failed_op = op;
267 	rec->errstr_len = len;
268 	memcpy(ZEV_ERRSTR(rec), buf, len + 1);
269 
270 	zev_queue_message(ZEV_OP_ERROR, msg);
271 	return;
272 }
273 
274 static int
275 zev_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
276 {
277 	int instance;
278 	zev_state_t *sp;
279 	zev_statistics_t zs;
280 	zev_ioctl_poolarg_t pa;
281 	uint64_t len;
282 
283 	instance = getminor(dev);
284 	if ((sp = ddi_get_soft_state(statep, instance)) == NULL)
285 		return (ENXIO);
286 	if (ddi_model_convert_from(mode) != DDI_MODEL_NONE) {
287 		/* userland has another data model.  (most
288 		   likely 32-bit) -> not supported. */
289 		return (EINVAL);
290 	}
291 	/* Remember to do 32/64 bit mode adjustments if
292 	   necessary.  See "Writing Device Drivers", 280pp */
293 	switch (cmd) {
294 	case ZEV_IOC_GET_STATISTICS:
295 		/* ddi_copyout() can take a long time.  Better make
296 		   a copy to be able to release the mutex faster. */
297 		mutex_enter(&zev_mutex);
298 		memcpy(&zs, &zev_statistics, sizeof(zs));
299 		mutex_exit(&zev_mutex);
300 		if (ddi_copyout(&zs, (void *)arg, sizeof(zs), mode) != 0)
301 			return EFAULT;
302 		break;
303 	case ZEV_IOC_MUTE_POOL:
304 	case ZEV_IOC_UNMUTE_POOL:
305 		if (ddi_copyin((void *)arg, &pa, sizeof(pa), mode) != 0)
306 			return EFAULT;
307 		if (pa.zev_poolname_len >=MAXPATHLEN)
308 			return EINVAL;
309 		pa.zev_poolname[pa.zev_poolname_len] = '\0';
310 		if (cmd == ZEV_IOC_MUTE_POOL) {
311 			return zev_ioc_mute_pool(pa.zev_poolname);
312 		} else {
313 			return zev_ioc_unmute_pool(pa.zev_poolname);
314 		}
315 		break;
316 	case ZEV_IOC_SET_MAX_QUEUE_LEN:
317 		if (ddi_copyin((void *)arg, &len, sizeof(len), mode) != 0)
318 			return EFAULT;
319 		if (len > ZEV_MAX_QUEUE_LEN)
320 			return EINVAL;
321 		mutex_enter(&zev_mutex);
322 		zev_statistics.zev_max_queue_len = len;
323 		cv_broadcast(&zev_condvar);
324 		mutex_exit(&zev_mutex);
325 		break;
326 	case ZEV_IOC_SET_POLL_WAKEUP_QUEUE_LEN:
327 		if (ddi_copyin((void *)arg, &len, sizeof(len), mode) != 0)
328 			return EFAULT;
329 		mutex_enter(&zev_mutex);
330 		zev_statistics.zev_poll_wakeup_queue_len = len;
331 		mutex_exit(&zev_mutex);
332 		break;
333 	default:
334 		/* generic "ioctl unknown" error */
335 		return (ENOTTY);
336 	}
337 	return (0);
338 }
339 
340 static int
341 zev_chpoll(dev_t dev, short events, int anyyet,
342     short *reventsp, struct pollhead **phpp)
343 {
344 	int instance;
345 	zev_state_t *sp;
346 	short revent = 0;
347 
348 	instance = getminor(dev);
349 	if ((sp = ddi_get_soft_state(statep, instance)) == NULL)
350 		return (ENXIO);
351 	revent = 0;
352 	if ((events & POLLIN)) {
353 		mutex_enter(&zev_mutex);
354 		if (zev_queue_head)
355 			revent |= POLLIN;
356 		mutex_exit(&zev_mutex);
357 	}
358 	if (revent == 0) {
359 		if (!anyyet) {
360 			*phpp = &zev_pollhead;
361 		}
362 	}
363 	*reventsp = revent;
364 	return (0);
365 }
366 
367 static int
368 zev_read(dev_t dev, struct uio *uio_p, cred_t *crep_p)
369 {
370 	zev_state_t *sp;
371 	int instance;
372 	offset_t off;
373 	int ret = 0;
374 	zev_msg_t *msg;
375 	char *data;
376 
377 	instance = getminor(dev);
378 	if ((sp = ddi_get_soft_state(statep, instance)) == NULL)
379 		return (ENXIO);
380 	off = uio_p->uio_loffset;
381 	mutex_enter(&zev_mutex);
382 	msg = zev_queue_head;
383 	if (msg == NULL) {
384 		mutex_exit(&zev_mutex);
385 		return 0;
386 	}
387 	if (msg->size > uio_p->uio_resid) {
388 		mutex_exit(&zev_mutex);
389 		return E2BIG;
390 	}
391 	while (msg && uio_p->uio_resid >= msg->size) {
392 		data = (char *)(msg + 1);
393 		ret = uiomove(data, msg->size, UIO_READ, uio_p);
394 		if (ret != 0) {
395 			mutex_exit(&zev_mutex);
396 			cmn_err(CE_WARN, "zev: uiomove failed; messages lost");
397 			uio_p->uio_loffset = off;
398 			return (ret);
399 		}
400 		zev_queue_head = msg->next;
401 		if (zev_queue_head == NULL)
402 			zev_queue_tail = NULL;
403 		zev_statistics.zev_bytes_read += msg->size;
404 		zev_statistics.zev_queue_len -= msg->size;
405 		zev_queue_len--;
406 		kmem_free(msg, sizeof(*msg) + msg->size);
407 		msg = zev_queue_head;
408 	}
409 	cv_broadcast(&zev_condvar);
410 	mutex_exit(&zev_mutex);
411 	uio_p->uio_loffset = off;
412 	return 0;
413 }
414 
415 static int
416 zev_close(dev_t dev, int flag, int otyp, cred_t *crepd)
417 {
418 	zev_state_t *sp;
419 	int instance;
420 
421 	instance = getminor(dev);
422 	if ((sp = ddi_get_soft_state(statep, instance)) == NULL)
423 		return (ENXIO);
424 	if (otyp != OTYP_CHR)
425 		return (EINVAL);
426 	mutex_enter(&sp->mutex);
427 	if (sp->busy != B_TRUE) {
428 		mutex_exit(&sp->mutex);
429 		return (EINVAL);
430 	}
431 	sp->busy = B_FALSE;
432 	mutex_exit(&sp->mutex);
433 	return (0);
434 }
435 
436 static int
437 zev_open(dev_t *devp, int flag, int otyp, cred_t *credp)
438 {
439 	zev_state_t *sp;
440 	int instance;
441 
442 	instance = getminor(*devp);
443 	if ((sp = ddi_get_soft_state(statep, instance)) == NULL)
444 		return (ENXIO);
445 	if (otyp != OTYP_CHR)
446 		return (EINVAL);
447 	if (drv_priv(credp) != 0)
448 		return (EPERM);
449 	mutex_enter(&sp->mutex);
450 	if (sp->busy == B_TRUE) {
451 		/* XXX: wait for the instance to become available? */
452 		/* XXX: if we wait, the wait should be signal-interruptable. */
453 		mutex_exit(&sp->mutex);
454 		return (EBUSY);
455 	}
456 	sp->busy = B_TRUE;	/* can only be opened exclusively */
457 	mutex_exit(&sp->mutex);
458 	return (0);
459 }
460 
461 static struct cb_ops zev_cb_ops = {
462 	zev_open,		/* open */
463 	zev_close,		/* close */
464 	nodev,			/* strategy */
465 	nodev,			/* print */
466 	nodev,			/* dump */
467 	zev_read,		/* read */
468 	nodev,			/* write */
469 	zev_ioctl,		/* ioctl */
470 	nodev,			/* devmap */
471 	nodev,			/* mmap */
472 	nodev,			/* segmap */
473 	zev_chpoll,		/* chpoll */
474 	ddi_prop_op,		/* prop_op */
475 	NULL,			/* streamtab */
476 	D_MP | D_64BIT,		/* cb_flag */
477 	CB_REV,			/* cb_rev */
478 	nodev,			/* aread */
479 	nodev,			/* awrite */
480 };
481 
482 static void
483 zev_free_instance(dev_info_t *dip)
484 {
485 	int instance;
486 	zev_state_t *sp;
487 	instance = ddi_get_instance(dip);
488 	//ddi_remove_minor_node(dip, ddi_get_name(dip));
489 	ddi_remove_minor_node(dip, NULL);
490 	sp = ddi_get_soft_state(statep, instance);
491 	if (sp) {
492 		mutex_destroy(&sp->mutex);
493 		ddi_soft_state_free(statep, instance);
494 	}
495 }
496 
497 static int
498 zev_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
499 {
500 	int instance;
501 	zev_state_t *sp;
502 	/* called once per instance with DDI_DETACH,
503 	   may be called to suspend */
504 	switch (cmd) {
505 	case DDI_DETACH:
506 		/* instance busy? */
507 		instance = ddi_get_instance(dip);
508 		if ((sp = ddi_get_soft_state(statep, instance)) == NULL)
509 			return (ENXIO);
510 		mutex_enter(&sp->mutex);
511 		if (sp->busy == B_TRUE) {
512 			mutex_exit(&sp->mutex);
513 			return (EBUSY);
514 		}
515 		mutex_exit(&sp->mutex);
516 		/* free resources allocated for this instance */
517 		zev_free_instance(dip);
518 		return (DDI_SUCCESS);
519 	case DDI_SUSPEND:
520 		/* kernel must not suspend zev devices while ZFS is running */
521 		return (DDI_FAILURE);
522 	default:
523 		return (DDI_FAILURE);
524 	}
525 }
526 
527 static int
528 zev_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
529 {
530 	/* called once per instance with DDI_ATTACH,
531 	   may be called to resume */
532 	int instance;
533 	zev_state_t *sp;
534 	switch (cmd) {
535 	case DDI_ATTACH:
536 		instance = ddi_get_instance(dip);
537 		if (ddi_soft_state_zalloc(statep, instance) != DDI_SUCCESS) {
538 			return (DDI_FAILURE);
539 		}
540 		sp = ddi_get_soft_state(statep, instance);
541 		ddi_set_driver_private(dip, sp);
542 		sp->dip = dip;
543 		sp->busy = B_FALSE;
544 		mutex_init(&sp->mutex, NULL, MUTEX_DRIVER, NULL);
545 		if (ddi_create_minor_node(dip, ddi_get_name(dip),
546 		    S_IFCHR, instance, DDI_PSEUDO, 0) == DDI_FAILURE) {
547 			zev_free_instance(dip);
548 			return (DDI_FAILURE);
549 		}
550 		ddi_report_dev(dip);
551 		return (DDI_SUCCESS);
552 	case DDI_RESUME:
553 		/* suspendeding zev devices should never happen */
554 		return (DDI_SUCCESS);
555 	default:
556 		return (DDI_FAILURE);
557 	}
558 }
559 
560 static int
561 zev_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp)
562 {
563 	int instance;
564 	zev_state_t *sp;
565 	switch (infocmd) {
566 	case DDI_INFO_DEVT2DEVINFO:
567 		/* arg is dev_t */
568 		instance = getminor((dev_t)arg);
569 		if ((sp = ddi_get_soft_state(statep, instance)) != NULL) {
570 			*resultp = sp->dip;
571 			return (DDI_SUCCESS);
572 		}
573 		*resultp = NULL;
574 		return (DDI_FAILURE);
575 	case DDI_INFO_DEVT2INSTANCE:
576 		/* arg is dev_t */
577 		instance = getminor((dev_t)arg);
578 		*resultp = (void *)(uintptr_t)instance;
579 		return (DDI_FAILURE);
580 	}
581 	return (DDI_FAILURE);
582 }
583 
584 static struct dev_ops zev_dev_ops = {
585 	DEVO_REV,			/* driver build revision */
586 	0,				/* driver reference count */
587 	zev_getinfo,			/* getinfo */
588 	nulldev,			/* identify (obsolete) */
589 	nulldev,			/* probe (search for devices) */
590 	zev_attach,			/* attach */
591 	zev_detach,			/* detach */
592 	nodev,				/* reset (obsolete, use quiesce) */
593 	&zev_cb_ops,			/* character and block device ops */
594 	NULL,				/* bus driver ops */
595 	NULL,				/* power management, not needed */
596 	ddi_quiesce_not_needed,		/* quiesce */
597 };
598 
599 static struct modldrv zev_modldrv = {
600 	&mod_driverops,			/* all loadable modules use this */
601 	"zev ZFS event provider, v1.0",	/* driver name and version info */
602 	&zev_dev_ops			/* ops method pointers */
603 };
604 
605 static struct modlinkage zev_modlinkage = {
606 	MODREV_1,	/* fixed value */
607 	{
608 		&zev_modldrv,	/* driver linkage structure */
609 		NULL		/* list terminator */
610 	}
611 };
612 
613 int
614 _init(void)
615 {
616 	int error;
617 	boolean_t module_installed = B_FALSE;
618 
619 	if ((error = ddi_soft_state_init(&statep, sizeof(zev_state_t), 1)) != 0)
620 		return (error);
621 	zev_busy = B_FALSE;
622 
623 	mutex_init(&zev_mutex, NULL, MUTEX_DRIVER, NULL);
624 	cv_init(&zev_condvar, NULL, CV_DRIVER, NULL);
625 	rw_init(&zev_pool_list_rwlock, NULL, RW_DRIVER, NULL);
626 	bzero(&zev_statistics, sizeof(zev_statistics));
627 	zev_statistics.zev_max_queue_len = ZEV_MAX_QUEUE_LEN;
628 	zev_statistics.zev_poll_wakeup_queue_len =
629 	    ZEV_MIN_POLL_WAKEUP_QUEUE_LEN;
630 	if (zev_ioc_mute_pool("zg0")) {
631 		cmn_err(CE_WARN, "zev: could not init mute list");
632 		goto FAIL;
633 	}
634 
635 	if ((error = mod_install(&zev_modlinkage)) != 0) {
636 		cmn_err(CE_WARN, "zev: could not install module");
637 		goto FAIL;
638 	}
639 	module_installed = B_TRUE;
640 
641 	/*
642 	 * Note: _init() seems to be a bad place to access other modules'
643 	 * device files, as it can cause a kernel panic.
644 	 *
645 	 * For example, our _init() is called if our module isn't loaded
646 	 * when someone causes a readdir() in "/devices/pseudo".  For that,
647 	 * devfs_readdir() is used, which obtains an rwlock for the
648 	 * directory.
649 	 *
650 	 * Then, if we open a device file here, we will indirectly call
651 	 * devfs_lookup(), which tries to obtain the same rwlock
652 	 * again, which this thread already has.  That will result in
653 	 * a kernel panic. ("recursive entry")
654 	 *
655 	 * Therefor, we have switched from a zfs ioctl() to directly
656 	 * accessing symbols in the zfs module.
657 	 */
658 
659 	/* switch ZFS event callbacks to zev module callback functions */
660 	rw_enter(&rz_zev_rwlock, RW_WRITER);
661 	rz_zev_callbacks = &zev_callbacks;
662 	rw_exit(&rz_zev_rwlock);
663 
664 	zev_poll_wakeup_thread = thread_create(NULL, 0,
665 	    zev_poll_wakeup_thread_main, NULL, 0, &p0, TS_RUN, minclsyspri);
666 	return (0);
667 FAIL:
668 	/* free resources */
669 	if (module_installed == B_TRUE)
670 		(void) mod_remove(&zev_modlinkage);
671 	mutex_destroy(&zev_mutex);
672 	ddi_soft_state_fini(&statep);
673 	return (error);
674 }
675 
676 int
677 _info(struct modinfo *modinfop)
678 {
679 	return (mod_info(&zev_modlinkage, modinfop));
680 }
681 
682 int
683 _fini(void)
684 {
685 	int error = 0;
686 	zev_msg_t *msg;
687 	zev_pool_list_entry_t *pe, *npe;
688 
689 	mutex_enter(&zev_mutex);
690 	if (zev_busy == B_TRUE) {
691 		mutex_exit(&zev_mutex);
692 		return (SET_ERROR(EBUSY));
693 	}
694 	mutex_exit(&zev_mutex);
695 
696 	/* switch ZFS event callbacks back to default */
697 	rw_enter(&rz_zev_rwlock, RW_WRITER);
698 	rz_zev_callbacks = rz_zev_default_callbacks;
699 	rw_exit(&rz_zev_rwlock);
700 
701 	/* no thread is inside of the callbacks anymore.  Safe to remove. */
702 	zev_wakeup_thread_run = 0;
703 	if (zev_poll_wakeup_thread != 0) {
704 		thread_join(zev_poll_wakeup_thread->t_did);
705 		zev_poll_wakeup_thread = 0;
706 	}
707 	if ((error = mod_remove(&zev_modlinkage)) != 0) {
708 		cmn_err(CE_WARN, "mod_remove failed: %d", error);
709 		return (error);
710 	}
711 
712 	/* free resources */
713 	mutex_enter(&zev_mutex);
714 	while (zev_queue_head) {
715 		msg = zev_queue_head;
716 		zev_queue_head = msg->next;
717 		if (msg)
718 			kmem_free(msg, sizeof(*msg) + msg->size);
719 	}
720 	mutex_exit(&zev_mutex);
721 	rw_enter(&zev_pool_list_rwlock, RW_WRITER);
722 	pe = zev_muted_pools_head;
723 	while (pe) {
724 		npe = pe;
725 		pe = pe->next;
726 		kmem_free(npe, sizeof(*npe));
727 	}
728 	rw_exit(&zev_pool_list_rwlock);
729 	ddi_soft_state_fini(&statep);
730 	rw_destroy(&zev_pool_list_rwlock);
731 	cv_destroy(&zev_condvar);
732 	mutex_destroy(&zev_mutex);
733 
734 	return (0);
735 }
736 
737