xref: /titanic_52/usr/src/uts/common/fs/zfs/zfs_ioctl.c (revision ae8180db892a16c89bf25609727063514ca6b719)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/errno.h>
31 #include <sys/uio.h>
32 #include <sys/buf.h>
33 #include <sys/modctl.h>
34 #include <sys/open.h>
35 #include <sys/file.h>
36 #include <sys/kmem.h>
37 #include <sys/conf.h>
38 #include <sys/cmn_err.h>
39 #include <sys/stat.h>
40 #include <sys/zfs_ioctl.h>
41 #include <sys/zap.h>
42 #include <sys/spa.h>
43 #include <sys/spa_impl.h>
44 #include <sys/vdev.h>
45 #include <sys/vdev_impl.h>
46 #include <sys/dmu.h>
47 #include <sys/dsl_dir.h>
48 #include <sys/dsl_dataset.h>
49 #include <sys/dsl_prop.h>
50 #include <sys/dsl_deleg.h>
51 #include <sys/dmu_objset.h>
52 #include <sys/ddi.h>
53 #include <sys/sunddi.h>
54 #include <sys/sunldi.h>
55 #include <sys/policy.h>
56 #include <sys/zone.h>
57 #include <sys/nvpair.h>
58 #include <sys/pathname.h>
59 #include <sys/mount.h>
60 #include <sys/sdt.h>
61 #include <sys/fs/zfs.h>
62 #include <sys/zfs_ctldir.h>
63 #include <sys/zvol.h>
64 #include <sharefs/share.h>
65 #include <sys/zfs_znode.h>
66 
67 #include "zfs_namecheck.h"
68 #include "zfs_prop.h"
69 #include "zfs_deleg.h"
70 
71 extern struct modlfs zfs_modlfs;
72 
73 extern void zfs_init(void);
74 extern void zfs_fini(void);
75 
76 ldi_ident_t zfs_li = NULL;
77 dev_info_t *zfs_dip;
78 
79 typedef int zfs_ioc_func_t(zfs_cmd_t *);
80 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
81 
82 typedef struct zfs_ioc_vec {
83 	zfs_ioc_func_t		*zvec_func;
84 	zfs_secpolicy_func_t	*zvec_secpolicy;
85 	enum {
86 		NO_NAME,
87 		POOL_NAME,
88 		DATASET_NAME
89 	} zvec_namecheck;
90 	boolean_t		zvec_his_log;
91 } zfs_ioc_vec_t;
92 
93 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
94 void
95 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
96 {
97 	const char *newfile;
98 	char buf[256];
99 	va_list adx;
100 
101 	/*
102 	 * Get rid of annoying "../common/" prefix to filename.
103 	 */
104 	newfile = strrchr(file, '/');
105 	if (newfile != NULL) {
106 		newfile = newfile + 1; /* Get rid of leading / */
107 	} else {
108 		newfile = file;
109 	}
110 
111 	va_start(adx, fmt);
112 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
113 	va_end(adx);
114 
115 	/*
116 	 * To get this data, use the zfs-dprintf probe as so:
117 	 * dtrace -q -n 'zfs-dprintf \
118 	 *	/stringof(arg0) == "dbuf.c"/ \
119 	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
120 	 * arg0 = file name
121 	 * arg1 = function name
122 	 * arg2 = line number
123 	 * arg3 = message
124 	 */
125 	DTRACE_PROBE4(zfs__dprintf,
126 	    char *, newfile, char *, func, int, line, char *, buf);
127 }
128 
129 static void
130 history_str_free(char *buf)
131 {
132 	kmem_free(buf, HIS_MAX_RECORD_LEN);
133 }
134 
135 static char *
136 history_str_get(zfs_cmd_t *zc)
137 {
138 	char *buf;
139 
140 	if (zc->zc_history == NULL)
141 		return (NULL);
142 
143 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
144 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
145 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
146 		history_str_free(buf);
147 		return (NULL);
148 	}
149 
150 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
151 
152 	return (buf);
153 }
154 
155 static void
156 zfs_log_history(zfs_cmd_t *zc)
157 {
158 	spa_t *spa;
159 	char *buf;
160 
161 	if ((buf = history_str_get(zc)) == NULL)
162 		return;
163 
164 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
165 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
166 			(void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
167 		spa_close(spa, FTAG);
168 	}
169 	history_str_free(buf);
170 }
171 
172 /*
173  * Policy for top-level read operations (list pools).  Requires no privileges,
174  * and can be used in the local zone, as there is no associated dataset.
175  */
176 /* ARGSUSED */
177 static int
178 zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
179 {
180 	return (0);
181 }
182 
183 /*
184  * Policy for dataset read operations (list children, get statistics).  Requires
185  * no privileges, but must be visible in the local zone.
186  */
187 /* ARGSUSED */
188 static int
189 zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
190 {
191 	if (INGLOBALZONE(curproc) ||
192 	    zone_dataset_visible(zc->zc_name, NULL))
193 		return (0);
194 
195 	return (ENOENT);
196 }
197 
198 static int
199 zfs_dozonecheck(const char *dataset, cred_t *cr)
200 {
201 	uint64_t zoned;
202 	int writable = 1;
203 
204 	/*
205 	 * The dataset must be visible by this zone -- check this first
206 	 * so they don't see EPERM on something they shouldn't know about.
207 	 */
208 	if (!INGLOBALZONE(curproc) &&
209 	    !zone_dataset_visible(dataset, &writable))
210 		return (ENOENT);
211 
212 	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
213 		return (ENOENT);
214 
215 	if (INGLOBALZONE(curproc)) {
216 		/*
217 		 * If the fs is zoned, only root can access it from the
218 		 * global zone.
219 		 */
220 		if (secpolicy_zfs(cr) && zoned)
221 			return (EPERM);
222 	} else {
223 		/*
224 		 * If we are in a local zone, the 'zoned' property must be set.
225 		 */
226 		if (!zoned)
227 			return (EPERM);
228 
229 		/* must be writable by this zone */
230 		if (!writable)
231 			return (EPERM);
232 	}
233 	return (0);
234 }
235 
236 int
237 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
238 {
239 	int error;
240 
241 	error = zfs_dozonecheck(name, cr);
242 	if (error == 0) {
243 		error = secpolicy_zfs(cr);
244 		if (error)
245 			error = dsl_deleg_access(name, perm, cr);
246 	}
247 	return (error);
248 }
249 
250 static int
251 zfs_secpolicy_setprop(const char *name, zfs_prop_t prop, cred_t *cr)
252 {
253 	/*
254 	 * Check permissions for special properties.
255 	 */
256 	switch (prop) {
257 	case ZFS_PROP_ZONED:
258 		/*
259 		 * Disallow setting of 'zoned' from within a local zone.
260 		 */
261 		if (!INGLOBALZONE(curproc))
262 			return (EPERM);
263 		break;
264 
265 	case ZFS_PROP_QUOTA:
266 		if (!INGLOBALZONE(curproc)) {
267 			uint64_t zoned;
268 			char setpoint[MAXNAMELEN];
269 			/*
270 			 * Unprivileged users are allowed to modify the
271 			 * quota on things *under* (ie. contained by)
272 			 * the thing they own.
273 			 */
274 			if (dsl_prop_get_integer(name, "zoned", &zoned,
275 			    setpoint))
276 				return (EPERM);
277 			if (!zoned || strlen(name) <= strlen(setpoint))
278 				return (EPERM);
279 		}
280 		break;
281 	}
282 
283 	return (zfs_secpolicy_write_perms(name, zfs_prop_perm(prop), cr));
284 }
285 
286 int
287 zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
288 {
289 	int error;
290 
291 	error = zfs_dozonecheck(zc->zc_name, cr);
292 	if (error)
293 		return (error);
294 
295 	/*
296 	 * permission to set permissions will be evaluated later in
297 	 * dsl_deleg_can_allow()
298 	 */
299 	return (0);
300 }
301 
302 int
303 zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
304 {
305 	int error;
306 	error = zfs_secpolicy_write_perms(zc->zc_name,
307 	    ZFS_DELEG_PERM_ROLLBACK, cr);
308 	if (error == 0)
309 		error = zfs_secpolicy_write_perms(zc->zc_name,
310 		    ZFS_DELEG_PERM_MOUNT, cr);
311 	return (error);
312 }
313 
314 int
315 zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
316 {
317 	return (zfs_secpolicy_write_perms(zc->zc_name,
318 	    ZFS_DELEG_PERM_SEND, cr));
319 }
320 
321 int
322 zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
323 {
324 	if (!INGLOBALZONE(curproc))
325 		return (EPERM);
326 
327 	if (secpolicy_nfs(CRED()) == 0) {
328 		return (0);
329 	} else {
330 		vnode_t *vp;
331 		int error;
332 
333 		if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
334 		    NO_FOLLOW, NULL, &vp)) != 0)
335 			return (error);
336 
337 		/* Now make sure mntpnt and dataset are ZFS */
338 
339 		if (vp->v_vfsp->vfs_fstype != zfsfstype ||
340 		    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
341 		    zc->zc_name) != 0)) {
342 			VN_RELE(vp);
343 			return (EPERM);
344 		}
345 
346 		VN_RELE(vp);
347 		return (dsl_deleg_access(zc->zc_name,
348 		    ZFS_DELEG_PERM_SHARE, cr));
349 	}
350 }
351 
352 static int
353 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
354 {
355 	char *cp;
356 
357 	/*
358 	 * Remove the @bla or /bla from the end of the name to get the parent.
359 	 */
360 	(void) strncpy(parent, datasetname, parentsize);
361 	cp = strrchr(parent, '@');
362 	if (cp != NULL) {
363 		cp[0] = '\0';
364 	} else {
365 		cp = strrchr(parent, '/');
366 		if (cp == NULL)
367 			return (ENOENT);
368 		cp[0] = '\0';
369 	}
370 
371 	return (0);
372 }
373 
374 int
375 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
376 {
377 	int error;
378 
379 	if ((error = zfs_secpolicy_write_perms(name,
380 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
381 		return (error);
382 
383 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
384 }
385 
386 static int
387 zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
388 {
389 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
390 }
391 
392 /*
393  * Must have sys_config privilege to check the iscsi permission
394  */
395 /* ARGSUSED */
396 static int
397 zfs_secpolicy_iscsi(zfs_cmd_t *zc, cred_t *cr)
398 {
399 	return (secpolicy_zfs(cr));
400 }
401 
402 int
403 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
404 {
405 	char 	parentname[MAXNAMELEN];
406 	int	error;
407 
408 	if ((error = zfs_secpolicy_write_perms(from,
409 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
410 		return (error);
411 
412 	if ((error = zfs_secpolicy_write_perms(from,
413 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
414 		return (error);
415 
416 	if ((error = zfs_get_parent(to, parentname,
417 	    sizeof (parentname))) != 0)
418 		return (error);
419 
420 	if ((error = zfs_secpolicy_write_perms(parentname,
421 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
422 		return (error);
423 
424 	if ((error = zfs_secpolicy_write_perms(parentname,
425 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
426 		return (error);
427 
428 	return (error);
429 }
430 
431 static int
432 zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
433 {
434 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
435 }
436 
437 static int
438 zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
439 {
440 	char 	parentname[MAXNAMELEN];
441 	objset_t *clone;
442 	int error;
443 
444 	error = zfs_secpolicy_write_perms(zc->zc_name,
445 	    ZFS_DELEG_PERM_PROMOTE, cr);
446 	if (error)
447 		return (error);
448 
449 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
450 	    DS_MODE_STANDARD | DS_MODE_READONLY, &clone);
451 
452 	if (error == 0) {
453 		dsl_dataset_t *pclone = NULL;
454 		dsl_dir_t *dd;
455 		dd = clone->os->os_dsl_dataset->ds_dir;
456 
457 		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
458 		error = dsl_dataset_open_obj(dd->dd_pool,
459 		    dd->dd_phys->dd_clone_parent_obj, NULL,
460 		    DS_MODE_NONE, FTAG, &pclone);
461 		rw_exit(&dd->dd_pool->dp_config_rwlock);
462 		if (error) {
463 			dmu_objset_close(clone);
464 			return (error);
465 		}
466 
467 		error = zfs_secpolicy_write_perms(zc->zc_name,
468 		    ZFS_DELEG_PERM_MOUNT, cr);
469 
470 		dsl_dataset_name(pclone, parentname);
471 		dmu_objset_close(clone);
472 		dsl_dataset_close(pclone, DS_MODE_NONE, FTAG);
473 		if (error == 0)
474 			error = zfs_secpolicy_write_perms(parentname,
475 			    ZFS_DELEG_PERM_PROMOTE, cr);
476 	}
477 	return (error);
478 }
479 
480 static int
481 zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
482 {
483 	int error;
484 
485 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
486 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
487 		return (error);
488 
489 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
490 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
491 		return (error);
492 
493 	return (zfs_secpolicy_write_perms(zc->zc_name,
494 	    ZFS_DELEG_PERM_CREATE, cr));
495 }
496 
497 int
498 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
499 {
500 	int error;
501 
502 	if ((error = zfs_secpolicy_write_perms(name,
503 	    ZFS_DELEG_PERM_SNAPSHOT, cr)) != 0)
504 		return (error);
505 
506 	error = zfs_secpolicy_write_perms(name,
507 	    ZFS_DELEG_PERM_MOUNT, cr);
508 
509 	return (error);
510 }
511 
512 static int
513 zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
514 {
515 
516 	return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
517 }
518 
519 static int
520 zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
521 {
522 	char 	parentname[MAXNAMELEN];
523 	int 	error;
524 
525 	if ((error = zfs_get_parent(zc->zc_name, parentname,
526 	    sizeof (parentname))) != 0)
527 		return (error);
528 
529 	if (zc->zc_value[0] != '\0') {
530 		if ((error = zfs_secpolicy_write_perms(zc->zc_value,
531 		    ZFS_DELEG_PERM_CLONE, cr)) != 0)
532 			return (error);
533 	}
534 
535 	if ((error = zfs_secpolicy_write_perms(parentname,
536 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
537 		return (error);
538 
539 	error = zfs_secpolicy_write_perms(parentname,
540 	    ZFS_DELEG_PERM_MOUNT, cr);
541 
542 	return (error);
543 }
544 
545 static int
546 zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
547 {
548 	int error;
549 
550 	error = secpolicy_fs_unmount(cr, NULL);
551 	if (error) {
552 		error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
553 	}
554 	return (error);
555 }
556 
557 /*
558  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
559  * SYS_CONFIG privilege, which is not available in a local zone.
560  */
561 /* ARGSUSED */
562 static int
563 zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
564 {
565 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
566 		return (EPERM);
567 
568 	return (0);
569 }
570 
571 /*
572  * Just like zfs_secpolicy_config, except that we will check for
573  * mount permission on the dataset for permission to create/remove
574  * the minor nodes.
575  */
576 static int
577 zfs_secpolicy_minor(zfs_cmd_t *zc, cred_t *cr)
578 {
579 	if (secpolicy_sys_config(cr, B_FALSE) != 0) {
580 		return (dsl_deleg_access(zc->zc_name,
581 		    ZFS_DELEG_PERM_MOUNT, cr));
582 	}
583 
584 	return (0);
585 }
586 
587 /*
588  * Policy for fault injection.  Requires all privileges.
589  */
590 /* ARGSUSED */
591 static int
592 zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
593 {
594 	return (secpolicy_zinject(cr));
595 }
596 
597 /*
598  * Returns the nvlist as specified by the user in the zfs_cmd_t.
599  */
600 static int
601 get_nvlist(zfs_cmd_t *zc, nvlist_t **nvp)
602 {
603 	char *packed;
604 	size_t size;
605 	int error;
606 	nvlist_t *config = NULL;
607 
608 	/*
609 	 * Read in and unpack the user-supplied nvlist.
610 	 */
611 	if ((size = zc->zc_nvlist_src_size) == 0)
612 		return (EINVAL);
613 
614 	packed = kmem_alloc(size, KM_SLEEP);
615 
616 	if ((error = xcopyin((void *)(uintptr_t)zc->zc_nvlist_src, packed,
617 	    size)) != 0) {
618 		kmem_free(packed, size);
619 		return (error);
620 	}
621 
622 	if ((error = nvlist_unpack(packed, size, &config, 0)) != 0) {
623 		kmem_free(packed, size);
624 		return (error);
625 	}
626 
627 	kmem_free(packed, size);
628 
629 	*nvp = config;
630 	return (0);
631 }
632 
633 static int
634 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
635 {
636 	char *packed = NULL;
637 	size_t size;
638 	int error;
639 
640 	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
641 
642 	if (size > zc->zc_nvlist_dst_size) {
643 		error = ENOMEM;
644 	} else {
645 		packed = kmem_alloc(size, KM_SLEEP);
646 		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
647 		    KM_SLEEP) == 0);
648 		error = xcopyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
649 		    size);
650 		kmem_free(packed, size);
651 	}
652 
653 	zc->zc_nvlist_dst_size = size;
654 	return (error);
655 }
656 
657 static int
658 zfs_ioc_pool_create(zfs_cmd_t *zc)
659 {
660 	int error;
661 	nvlist_t *config;
662 	char *buf;
663 
664 	if ((buf = history_str_get(zc)) == NULL)
665 		return (EINVAL);
666 
667 	if ((error = get_nvlist(zc, &config)) != 0) {
668 		history_str_free(buf);
669 		return (error);
670 	}
671 
672 	error = spa_create(zc->zc_name, config, zc->zc_value[0] == '\0' ?
673 	    NULL : zc->zc_value, buf);
674 
675 	nvlist_free(config);
676 	history_str_free(buf);
677 
678 	return (error);
679 }
680 
681 static int
682 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
683 {
684 	int error;
685 	zfs_log_history(zc);
686 	error = spa_destroy(zc->zc_name);
687 	return (error);
688 }
689 
690 static int
691 zfs_ioc_pool_import(zfs_cmd_t *zc)
692 {
693 	int error;
694 	nvlist_t *config;
695 	uint64_t guid;
696 
697 	if ((error = get_nvlist(zc, &config)) != 0)
698 		return (error);
699 
700 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
701 	    guid != zc->zc_guid)
702 		error = EINVAL;
703 	else
704 		error = spa_import(zc->zc_name, config,
705 		    zc->zc_value[0] == '\0' ? NULL : zc->zc_value);
706 
707 	nvlist_free(config);
708 
709 	return (error);
710 }
711 
712 static int
713 zfs_ioc_pool_export(zfs_cmd_t *zc)
714 {
715 	int error;
716 	zfs_log_history(zc);
717 	error = spa_export(zc->zc_name, NULL);
718 	return (error);
719 }
720 
721 static int
722 zfs_ioc_pool_configs(zfs_cmd_t *zc)
723 {
724 	nvlist_t *configs;
725 	int error;
726 
727 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
728 		return (EEXIST);
729 
730 	error = put_nvlist(zc, configs);
731 
732 	nvlist_free(configs);
733 
734 	return (error);
735 }
736 
737 static int
738 zfs_ioc_pool_stats(zfs_cmd_t *zc)
739 {
740 	nvlist_t *config;
741 	int error;
742 	int ret = 0;
743 
744 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
745 	    sizeof (zc->zc_value));
746 
747 	if (config != NULL) {
748 		ret = put_nvlist(zc, config);
749 		nvlist_free(config);
750 
751 		/*
752 		 * The config may be present even if 'error' is non-zero.
753 		 * In this case we return success, and preserve the real errno
754 		 * in 'zc_cookie'.
755 		 */
756 		zc->zc_cookie = error;
757 	} else {
758 		ret = error;
759 	}
760 
761 	return (ret);
762 }
763 
764 /*
765  * Try to import the given pool, returning pool stats as appropriate so that
766  * user land knows which devices are available and overall pool health.
767  */
768 static int
769 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
770 {
771 	nvlist_t *tryconfig, *config;
772 	int error;
773 
774 	if ((error = get_nvlist(zc, &tryconfig)) != 0)
775 		return (error);
776 
777 	config = spa_tryimport(tryconfig);
778 
779 	nvlist_free(tryconfig);
780 
781 	if (config == NULL)
782 		return (EINVAL);
783 
784 	error = put_nvlist(zc, config);
785 	nvlist_free(config);
786 
787 	return (error);
788 }
789 
790 static int
791 zfs_ioc_pool_scrub(zfs_cmd_t *zc)
792 {
793 	spa_t *spa;
794 	int error;
795 
796 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
797 		return (error);
798 
799 	spa_config_enter(spa, RW_READER, FTAG);
800 	error = spa_scrub(spa, zc->zc_cookie, B_FALSE);
801 	spa_config_exit(spa, FTAG);
802 
803 	spa_close(spa, FTAG);
804 
805 	return (error);
806 }
807 
808 static int
809 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
810 {
811 	spa_t *spa;
812 	int error;
813 
814 	error = spa_open(zc->zc_name, &spa, FTAG);
815 	if (error == 0) {
816 		spa_freeze(spa);
817 		spa_close(spa, FTAG);
818 	}
819 	return (error);
820 }
821 
822 static int
823 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
824 {
825 	spa_t *spa;
826 	int error;
827 
828 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
829 		return (error);
830 
831 	spa_upgrade(spa);
832 	spa_close(spa, FTAG);
833 
834 	return (error);
835 }
836 
837 static int
838 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
839 {
840 	spa_t *spa;
841 	char *hist_buf;
842 	uint64_t size;
843 	int error;
844 
845 	if ((size = zc->zc_history_len) == 0)
846 		return (EINVAL);
847 
848 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
849 		return (error);
850 
851 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
852 		spa_close(spa, FTAG);
853 		return (ENOTSUP);
854 	}
855 
856 	hist_buf = kmem_alloc(size, KM_SLEEP);
857 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
858 	    &zc->zc_history_len, hist_buf)) == 0) {
859 		error = xcopyout(hist_buf,
860 		    (char *)(uintptr_t)zc->zc_history,
861 		    zc->zc_history_len);
862 	}
863 
864 	spa_close(spa, FTAG);
865 	kmem_free(hist_buf, size);
866 	return (error);
867 }
868 
869 static int
870 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
871 {
872 	int error;
873 
874 	if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
875 		return (error);
876 
877 	return (0);
878 }
879 
880 static int
881 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
882 {
883 	objset_t *osp;
884 	int error;
885 
886 	if ((error = dmu_objset_open(zc->zc_name, DMU_OST_ZFS,
887 	    DS_MODE_NONE | DS_MODE_READONLY, &osp)) != 0)
888 		return (error);
889 
890 	error = zfs_obj_to_path(osp, zc->zc_obj, zc->zc_value,
891 	    sizeof (zc->zc_value));
892 	dmu_objset_close(osp);
893 
894 	return (error);
895 }
896 
897 static int
898 zfs_ioc_vdev_add(zfs_cmd_t *zc)
899 {
900 	spa_t *spa;
901 	int error;
902 	nvlist_t *config;
903 
904 	error = spa_open(zc->zc_name, &spa, FTAG);
905 	if (error != 0)
906 		return (error);
907 
908 	/*
909 	 * A root pool with concatenated devices is not supported.
910 	 * Thus, can not add a device to a root pool with one device.
911 	 */
912 	if (spa->spa_root_vdev->vdev_children == 1 && spa->spa_bootfs != 0) {
913 		spa_close(spa, FTAG);
914 		return (EDOM);
915 	}
916 
917 	if ((error = get_nvlist(zc, &config)) == 0) {
918 		error = spa_vdev_add(spa, config);
919 		nvlist_free(config);
920 	}
921 	spa_close(spa, FTAG);
922 	return (error);
923 }
924 
925 static int
926 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
927 {
928 	spa_t *spa;
929 	int error;
930 
931 	error = spa_open(zc->zc_name, &spa, FTAG);
932 	if (error != 0)
933 		return (error);
934 	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
935 	spa_close(spa, FTAG);
936 	return (error);
937 }
938 
939 static int
940 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
941 {
942 	spa_t *spa;
943 	int error;
944 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
945 
946 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
947 		return (error);
948 	switch (zc->zc_cookie) {
949 	case VDEV_STATE_ONLINE:
950 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
951 		break;
952 
953 	case VDEV_STATE_OFFLINE:
954 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
955 		break;
956 
957 	case VDEV_STATE_FAULTED:
958 		error = vdev_fault(spa, zc->zc_guid);
959 		break;
960 
961 	case VDEV_STATE_DEGRADED:
962 		error = vdev_degrade(spa, zc->zc_guid);
963 		break;
964 
965 	default:
966 		error = EINVAL;
967 	}
968 	zc->zc_cookie = newstate;
969 	spa_close(spa, FTAG);
970 	return (error);
971 }
972 
973 static int
974 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
975 {
976 	spa_t *spa;
977 	int replacing = zc->zc_cookie;
978 	nvlist_t *config;
979 	int error;
980 
981 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
982 		return (error);
983 
984 	if ((error = get_nvlist(zc, &config)) == 0) {
985 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
986 		nvlist_free(config);
987 	}
988 
989 	spa_close(spa, FTAG);
990 	return (error);
991 }
992 
993 static int
994 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
995 {
996 	spa_t *spa;
997 	int error;
998 
999 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1000 		return (error);
1001 
1002 	error = spa_vdev_detach(spa, zc->zc_guid, B_FALSE);
1003 
1004 	spa_close(spa, FTAG);
1005 	return (error);
1006 }
1007 
1008 static int
1009 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1010 {
1011 	spa_t *spa;
1012 	char *path = zc->zc_value;
1013 	uint64_t guid = zc->zc_guid;
1014 	int error;
1015 
1016 	error = spa_open(zc->zc_name, &spa, FTAG);
1017 	if (error != 0)
1018 		return (error);
1019 
1020 	error = spa_vdev_setpath(spa, guid, path);
1021 	spa_close(spa, FTAG);
1022 	return (error);
1023 }
1024 
1025 static int
1026 zfs_ioc_objset_stats(zfs_cmd_t *zc)
1027 {
1028 	objset_t *os = NULL;
1029 	int error;
1030 	nvlist_t *nv;
1031 
1032 retry:
1033 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
1034 	    DS_MODE_STANDARD | DS_MODE_READONLY, &os);
1035 	if (error != 0) {
1036 		/*
1037 		 * This is ugly: dmu_objset_open() can return EBUSY if
1038 		 * the objset is held exclusively. Fortunately this hold is
1039 		 * only for a short while, so we retry here.
1040 		 * This avoids user code having to handle EBUSY,
1041 		 * for example for a "zfs list".
1042 		 */
1043 		if (error == EBUSY) {
1044 			delay(1);
1045 			goto retry;
1046 		}
1047 		return (error);
1048 	}
1049 
1050 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1051 
1052 	if (zc->zc_nvlist_dst != 0 &&
1053 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
1054 		dmu_objset_stats(os, nv);
1055 		/*
1056 		 * NB: {zpl,zvol}_get_stats() will read the objset contents,
1057 		 * which we aren't supposed to do with a
1058 		 * DS_MODE_STANDARD open, because it could be
1059 		 * inconsistent.  So this is a bit of a workaround...
1060 		 */
1061 		if (!zc->zc_objset_stats.dds_inconsistent) {
1062 			if (dmu_objset_type(os) == DMU_OST_ZVOL)
1063 				VERIFY(zvol_get_stats(os, nv) == 0);
1064 			else if (dmu_objset_type(os) == DMU_OST_ZFS)
1065 				(void) zfs_get_stats(os, nv);
1066 		}
1067 		error = put_nvlist(zc, nv);
1068 		nvlist_free(nv);
1069 	}
1070 
1071 	spa_altroot(dmu_objset_spa(os), zc->zc_value, sizeof (zc->zc_value));
1072 
1073 	dmu_objset_close(os);
1074 	return (error);
1075 }
1076 
1077 static int
1078 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
1079 {
1080 	objset_t *os;
1081 	int error;
1082 	char *p;
1083 
1084 retry:
1085 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
1086 	    DS_MODE_STANDARD | DS_MODE_READONLY, &os);
1087 	if (error != 0) {
1088 		/*
1089 		 * This is ugly: dmu_objset_open() can return EBUSY if
1090 		 * the objset is held exclusively. Fortunately this hold is
1091 		 * only for a short while, so we retry here.
1092 		 * This avoids user code having to handle EBUSY,
1093 		 * for example for a "zfs list".
1094 		 */
1095 		if (error == EBUSY) {
1096 			delay(1);
1097 			goto retry;
1098 		}
1099 		if (error == ENOENT)
1100 			error = ESRCH;
1101 		return (error);
1102 	}
1103 
1104 	p = strrchr(zc->zc_name, '/');
1105 	if (p == NULL || p[1] != '\0')
1106 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
1107 	p = zc->zc_name + strlen(zc->zc_name);
1108 
1109 	do {
1110 		error = dmu_dir_list_next(os,
1111 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
1112 		    NULL, &zc->zc_cookie);
1113 		if (error == ENOENT)
1114 			error = ESRCH;
1115 	} while (error == 0 && !INGLOBALZONE(curproc) &&
1116 	    !zone_dataset_visible(zc->zc_name, NULL));
1117 
1118 	/*
1119 	 * If it's a hidden dataset (ie. with a '$' in its name), don't
1120 	 * try to get stats for it.  Userland will skip over it.
1121 	 */
1122 	if (error == 0 && strchr(zc->zc_name, '$') == NULL)
1123 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1124 
1125 	dmu_objset_close(os);
1126 	return (error);
1127 }
1128 
1129 static int
1130 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
1131 {
1132 	objset_t *os;
1133 	int error;
1134 
1135 retry:
1136 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
1137 	    DS_MODE_STANDARD | DS_MODE_READONLY, &os);
1138 	if (error != 0) {
1139 		/*
1140 		 * This is ugly: dmu_objset_open() can return EBUSY if
1141 		 * the objset is held exclusively. Fortunately this hold is
1142 		 * only for a short while, so we retry here.
1143 		 * This avoids user code having to handle EBUSY,
1144 		 * for example for a "zfs list".
1145 		 */
1146 		if (error == EBUSY) {
1147 			delay(1);
1148 			goto retry;
1149 		}
1150 		if (error == ENOENT)
1151 			error = ESRCH;
1152 		return (error);
1153 	}
1154 
1155 	/*
1156 	 * A dataset name of maximum length cannot have any snapshots,
1157 	 * so exit immediately.
1158 	 */
1159 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
1160 		dmu_objset_close(os);
1161 		return (ESRCH);
1162 	}
1163 
1164 	error = dmu_snapshot_list_next(os,
1165 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
1166 	    zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie);
1167 	if (error == ENOENT)
1168 		error = ESRCH;
1169 
1170 	if (error == 0)
1171 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1172 
1173 	dmu_objset_close(os);
1174 	return (error);
1175 }
1176 
1177 static int
1178 zfs_set_prop_nvlist(const char *name, dev_t dev, cred_t *cr, nvlist_t *nvl)
1179 {
1180 	nvpair_t *elem;
1181 	int error;
1182 	uint64_t intval;
1183 	char *strval;
1184 
1185 	/*
1186 	 * First validate permission to set all of the properties
1187 	 */
1188 	elem = NULL;
1189 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
1190 		const char *propname = nvpair_name(elem);
1191 		zfs_prop_t prop = zfs_name_to_prop(propname);
1192 
1193 		if (prop == ZFS_PROP_INVAL) {
1194 			/*
1195 			 * If this is a user-defined property, it must be a
1196 			 * string, and there is no further validation to do.
1197 			 */
1198 			if (!zfs_prop_user(propname) ||
1199 			    nvpair_type(elem) != DATA_TYPE_STRING)
1200 				return (EINVAL);
1201 
1202 			error = zfs_secpolicy_write_perms(name,
1203 			    ZFS_DELEG_PERM_USERPROP, cr);
1204 			if (error)
1205 				return (error);
1206 			continue;
1207 		}
1208 
1209 		if ((error = zfs_secpolicy_setprop(name, prop, cr)) != 0)
1210 			return (error);
1211 
1212 		/*
1213 		 * Check that this value is valid for this pool version
1214 		 */
1215 		switch (prop) {
1216 		case ZFS_PROP_COMPRESSION:
1217 			/*
1218 			 * If the user specified gzip compression, make sure
1219 			 * the SPA supports it. We ignore any errors here since
1220 			 * we'll catch them later.
1221 			 */
1222 			if (nvpair_type(elem) == DATA_TYPE_UINT64 &&
1223 			    nvpair_value_uint64(elem, &intval) == 0 &&
1224 			    intval >= ZIO_COMPRESS_GZIP_1 &&
1225 			    intval <= ZIO_COMPRESS_GZIP_9) {
1226 				spa_t *spa;
1227 
1228 				if (spa_open(name, &spa, FTAG) == 0) {
1229 					if (spa_version(spa) <
1230 					    SPA_VERSION_GZIP_COMPRESSION) {
1231 						spa_close(spa, FTAG);
1232 						return (ENOTSUP);
1233 					}
1234 
1235 					spa_close(spa, FTAG);
1236 				}
1237 			}
1238 			break;
1239 
1240 		case ZFS_PROP_COPIES:
1241 		{
1242 			spa_t *spa;
1243 
1244 			if (spa_open(name, &spa, FTAG) == 0) {
1245 				if (spa_version(spa) <
1246 				    SPA_VERSION_DITTO_BLOCKS) {
1247 					spa_close(spa, FTAG);
1248 					return (ENOTSUP);
1249 				}
1250 				spa_close(spa, FTAG);
1251 			}
1252 			break;
1253 		}
1254 		}
1255 	}
1256 
1257 	elem = NULL;
1258 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
1259 		const char *propname = nvpair_name(elem);
1260 		zfs_prop_t prop = zfs_name_to_prop(propname);
1261 
1262 		if (prop == ZFS_PROP_INVAL) {
1263 			VERIFY(nvpair_value_string(elem, &strval) == 0);
1264 			error = dsl_prop_set(name, propname, 1,
1265 			    strlen(strval) + 1, strval);
1266 			if (error == 0)
1267 				continue;
1268 			else
1269 				return (error);
1270 		}
1271 
1272 		switch (prop) {
1273 		case ZFS_PROP_QUOTA:
1274 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1275 			    (error = dsl_dir_set_quota(name, intval)) != 0)
1276 				return (error);
1277 			break;
1278 
1279 		case ZFS_PROP_RESERVATION:
1280 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1281 			    (error = dsl_dir_set_reservation(name,
1282 			    intval)) != 0)
1283 				return (error);
1284 			break;
1285 
1286 		case ZFS_PROP_VOLSIZE:
1287 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1288 			    (error = zvol_set_volsize(name, dev, intval)) != 0)
1289 				return (error);
1290 			break;
1291 
1292 		case ZFS_PROP_VOLBLOCKSIZE:
1293 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1294 			    (error = zvol_set_volblocksize(name, intval)) != 0)
1295 				return (error);
1296 			break;
1297 
1298 		case ZFS_PROP_VERSION:
1299 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1300 			    (error = zfs_set_version(name, intval)) != 0)
1301 				return (error);
1302 			break;
1303 
1304 		default:
1305 			if (nvpair_type(elem) == DATA_TYPE_STRING) {
1306 				if (zfs_prop_get_type(prop) !=
1307 				    prop_type_string)
1308 					return (EINVAL);
1309 				VERIFY(nvpair_value_string(elem, &strval) == 0);
1310 				if ((error = dsl_prop_set(name,
1311 				    nvpair_name(elem), 1, strlen(strval) + 1,
1312 				    strval)) != 0)
1313 					return (error);
1314 			} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
1315 				const char *unused;
1316 
1317 				VERIFY(nvpair_value_uint64(elem, &intval) == 0);
1318 
1319 				switch (zfs_prop_get_type(prop)) {
1320 				case prop_type_number:
1321 					break;
1322 				case prop_type_boolean:
1323 					if (intval > 1)
1324 						return (EINVAL);
1325 					break;
1326 				case prop_type_string:
1327 					return (EINVAL);
1328 				case prop_type_index:
1329 					if (zfs_prop_index_to_string(prop,
1330 					    intval, &unused) != 0)
1331 						return (EINVAL);
1332 					break;
1333 				default:
1334 					cmn_err(CE_PANIC,
1335 					    "unknown property type");
1336 					break;
1337 				}
1338 
1339 				if ((error = dsl_prop_set(name, propname,
1340 				    8, 1, &intval)) != 0)
1341 					return (error);
1342 			} else {
1343 				return (EINVAL);
1344 			}
1345 			break;
1346 		}
1347 	}
1348 
1349 	return (0);
1350 }
1351 
1352 static int
1353 zfs_ioc_set_prop(zfs_cmd_t *zc)
1354 {
1355 	nvlist_t *nvl;
1356 	int error;
1357 
1358 	/*
1359 	 * If zc_value is set, then this is an attempt to inherit a value.
1360 	 * Otherwise, zc_nvlist refers to a list of properties to set.
1361 	 */
1362 	if (zc->zc_value[0] != '\0') {
1363 		zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1364 
1365 		if (prop == ZFS_PROP_INVAL) {
1366 			if (!zfs_prop_user(zc->zc_value))
1367 				return (EINVAL);
1368 			error = zfs_secpolicy_write_perms(zc->zc_name,
1369 			    ZFS_DELEG_PERM_USERPROP,
1370 			    (cred_t *)(uintptr_t)zc->zc_cred);
1371 		} else {
1372 			if (!zfs_prop_inheritable(prop))
1373 				return (EINVAL);
1374 			error = zfs_secpolicy_setprop(zc->zc_name,
1375 			    prop, (cred_t *)(uintptr_t)zc->zc_cred);
1376 		}
1377 		if (error)
1378 			return (error);
1379 
1380 		return (dsl_prop_set(zc->zc_name, zc->zc_value, 0, 0, NULL));
1381 	}
1382 
1383 	if ((error = get_nvlist(zc, &nvl)) != 0)
1384 		return (error);
1385 
1386 	error = zfs_set_prop_nvlist(zc->zc_name, zc->zc_dev,
1387 	    (cred_t *)(uintptr_t)zc->zc_cred, nvl);
1388 
1389 	nvlist_free(nvl);
1390 	return (error);
1391 }
1392 
1393 static int
1394 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
1395 {
1396 	nvlist_t *nvl;
1397 	int error, reset_bootfs = 0;
1398 	uint64_t objnum;
1399 	uint64_t intval;
1400 	zpool_prop_t prop;
1401 	nvpair_t *elem;
1402 	char *propname, *strval;
1403 	spa_t *spa;
1404 	vdev_t *rvdev;
1405 	char *vdev_type;
1406 	objset_t *os;
1407 
1408 	if ((error = get_nvlist(zc, &nvl)) != 0)
1409 		return (error);
1410 
1411 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
1412 		nvlist_free(nvl);
1413 		return (error);
1414 	}
1415 
1416 	if (spa_version(spa) < SPA_VERSION_BOOTFS) {
1417 		nvlist_free(nvl);
1418 		spa_close(spa, FTAG);
1419 		return (ENOTSUP);
1420 	}
1421 
1422 	elem = NULL;
1423 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
1424 
1425 		propname = nvpair_name(elem);
1426 
1427 		if ((prop = zpool_name_to_prop(propname)) ==
1428 		    ZFS_PROP_INVAL) {
1429 			nvlist_free(nvl);
1430 			spa_close(spa, FTAG);
1431 			return (EINVAL);
1432 		}
1433 
1434 		switch (prop) {
1435 		case ZPOOL_PROP_DELEGATION:
1436 			VERIFY(nvpair_value_uint64(elem, &intval) == 0);
1437 			if (intval > 1)
1438 				error = EINVAL;
1439 			break;
1440 		case ZPOOL_PROP_BOOTFS:
1441 			/*
1442 			 * A bootable filesystem can not be on a RAIDZ pool
1443 			 * nor a striped pool with more than 1 device.
1444 			 */
1445 			rvdev = spa->spa_root_vdev;
1446 			vdev_type =
1447 			    rvdev->vdev_child[0]->vdev_ops->vdev_op_type;
1448 			if (strcmp(vdev_type, VDEV_TYPE_RAIDZ) == 0 ||
1449 			    (strcmp(vdev_type, VDEV_TYPE_MIRROR) != 0 &&
1450 			    rvdev->vdev_children > 1)) {
1451 				error = ENOTSUP;
1452 				break;
1453 			}
1454 
1455 			reset_bootfs = 1;
1456 
1457 			VERIFY(nvpair_value_string(elem, &strval) == 0);
1458 			if (strval == NULL || strval[0] == '\0') {
1459 				objnum = zpool_prop_default_numeric(
1460 				    ZPOOL_PROP_BOOTFS);
1461 				break;
1462 			}
1463 
1464 			if (error = dmu_objset_open(strval, DMU_OST_ZFS,
1465 			    DS_MODE_STANDARD | DS_MODE_READONLY, &os))
1466 				break;
1467 			objnum = dmu_objset_id(os);
1468 			dmu_objset_close(os);
1469 			break;
1470 		}
1471 
1472 		if (error)
1473 			break;
1474 	}
1475 	if (error == 0) {
1476 		if (reset_bootfs) {
1477 			VERIFY(nvlist_remove(nvl,
1478 			    zpool_prop_to_name(ZPOOL_PROP_BOOTFS),
1479 			    DATA_TYPE_STRING) == 0);
1480 			VERIFY(nvlist_add_uint64(nvl,
1481 			    zpool_prop_to_name(ZPOOL_PROP_BOOTFS),
1482 			    objnum) == 0);
1483 		}
1484 		error = spa_set_props(spa, nvl);
1485 	}
1486 
1487 	nvlist_free(nvl);
1488 	spa_close(spa, FTAG);
1489 
1490 	return (error);
1491 }
1492 
1493 static int
1494 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
1495 {
1496 	spa_t *spa;
1497 	int error;
1498 	nvlist_t *nvp = NULL;
1499 
1500 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1501 		return (error);
1502 
1503 	error = spa_get_props(spa, &nvp);
1504 
1505 	if (error == 0 && zc->zc_nvlist_dst != NULL)
1506 		error = put_nvlist(zc, nvp);
1507 	else
1508 		error = EFAULT;
1509 
1510 	spa_close(spa, FTAG);
1511 
1512 	if (nvp)
1513 		nvlist_free(nvp);
1514 	return (error);
1515 }
1516 
1517 static int
1518 zfs_ioc_iscsi_perm_check(zfs_cmd_t *zc)
1519 {
1520 	nvlist_t *nvp;
1521 	int error;
1522 	uint32_t uid;
1523 	uint32_t gid;
1524 	uint32_t *groups;
1525 	uint_t group_cnt;
1526 	cred_t	*usercred;
1527 
1528 	if ((error = get_nvlist(zc, &nvp)) != 0) {
1529 		return (error);
1530 	}
1531 
1532 	if ((error = nvlist_lookup_uint32(nvp,
1533 	    ZFS_DELEG_PERM_UID, &uid)) != 0) {
1534 		nvlist_free(nvp);
1535 		return (EPERM);
1536 	}
1537 
1538 	if ((error = nvlist_lookup_uint32(nvp,
1539 	    ZFS_DELEG_PERM_GID, &gid)) != 0) {
1540 		nvlist_free(nvp);
1541 		return (EPERM);
1542 	}
1543 
1544 	if ((error = nvlist_lookup_uint32_array(nvp, ZFS_DELEG_PERM_GROUPS,
1545 	    &groups, &group_cnt)) != 0) {
1546 		nvlist_free(nvp);
1547 		return (EPERM);
1548 	}
1549 	usercred = cralloc();
1550 	if ((crsetugid(usercred, uid, gid) != 0) ||
1551 	    (crsetgroups(usercred, group_cnt, (gid_t *)groups) != 0)) {
1552 		nvlist_free(nvp);
1553 		crfree(usercred);
1554 		return (EPERM);
1555 	}
1556 	nvlist_free(nvp);
1557 	error = dsl_deleg_access(zc->zc_name,
1558 	    ZFS_DELEG_PERM_SHAREISCSI, usercred);
1559 	crfree(usercred);
1560 	return (error);
1561 }
1562 
1563 static int
1564 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
1565 {
1566 	int error;
1567 	nvlist_t *fsaclnv = NULL;
1568 	cred_t *cr;
1569 
1570 	if ((error = get_nvlist(zc, &fsaclnv)) != 0)
1571 		return (error);
1572 
1573 	/*
1574 	 * Verify nvlist is constructed correctly
1575 	 */
1576 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
1577 		nvlist_free(fsaclnv);
1578 		return (EINVAL);
1579 	}
1580 
1581 	/*
1582 	 * If we don't have PRIV_SYS_MOUNT, then validate
1583 	 * that user is allowed to hand out each permission in
1584 	 * the nvlist(s)
1585 	 */
1586 
1587 	cr = (cred_t *)(uintptr_t)zc->zc_cred;
1588 	error = secpolicy_zfs(cr);
1589 	if (error) {
1590 		if (zc->zc_perm_action == B_FALSE)
1591 			error = dsl_deleg_can_allow(zc->zc_name, fsaclnv, cr);
1592 		else
1593 			error = dsl_deleg_can_unallow(zc->zc_name, fsaclnv, cr);
1594 	}
1595 
1596 	if (error == 0)
1597 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
1598 
1599 	nvlist_free(fsaclnv);
1600 	return (error);
1601 }
1602 
1603 static int
1604 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
1605 {
1606 	nvlist_t *nvp;
1607 	int error;
1608 
1609 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
1610 		error = put_nvlist(zc, nvp);
1611 		nvlist_free(nvp);
1612 	}
1613 
1614 	return (error);
1615 }
1616 
1617 static int
1618 zfs_ioc_create_minor(zfs_cmd_t *zc)
1619 {
1620 	return (zvol_create_minor(zc->zc_name, zc->zc_dev));
1621 }
1622 
1623 static int
1624 zfs_ioc_remove_minor(zfs_cmd_t *zc)
1625 {
1626 	return (zvol_remove_minor(zc->zc_name));
1627 }
1628 
1629 /*
1630  * Search the vfs list for a specified resource.  Returns a pointer to it
1631  * or NULL if no suitable entry is found. The caller of this routine
1632  * is responsible for releasing the returned vfs pointer.
1633  */
1634 static vfs_t *
1635 zfs_get_vfs(const char *resource)
1636 {
1637 	struct vfs *vfsp;
1638 	struct vfs *vfs_found = NULL;
1639 
1640 	vfs_list_read_lock();
1641 	vfsp = rootvfs;
1642 	do {
1643 		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
1644 			VFS_HOLD(vfsp);
1645 			vfs_found = vfsp;
1646 			break;
1647 		}
1648 		vfsp = vfsp->vfs_next;
1649 	} while (vfsp != rootvfs);
1650 	vfs_list_unlock();
1651 	return (vfs_found);
1652 }
1653 
1654 /* ARGSUSED */
1655 static void
1656 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
1657 {
1658 	nvlist_t *nvprops = arg;
1659 	uint64_t version = ZPL_VERSION;
1660 
1661 	(void) nvlist_lookup_uint64(nvprops,
1662 	    zfs_prop_to_name(ZFS_PROP_VERSION), &version);
1663 
1664 	zfs_create_fs(os, cr, version, tx);
1665 }
1666 
1667 static int
1668 zfs_ioc_create(zfs_cmd_t *zc)
1669 {
1670 	objset_t *clone;
1671 	int error = 0;
1672 	nvlist_t *nvprops = NULL;
1673 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
1674 	dmu_objset_type_t type = zc->zc_objset_type;
1675 
1676 	switch (type) {
1677 
1678 	case DMU_OST_ZFS:
1679 		cbfunc = zfs_create_cb;
1680 		break;
1681 
1682 	case DMU_OST_ZVOL:
1683 		cbfunc = zvol_create_cb;
1684 		break;
1685 
1686 	default:
1687 		cbfunc = NULL;
1688 	}
1689 	if (strchr(zc->zc_name, '@'))
1690 		return (EINVAL);
1691 
1692 	if (zc->zc_nvlist_src != NULL &&
1693 	    (error = get_nvlist(zc, &nvprops)) != 0)
1694 		return (error);
1695 
1696 	if (zc->zc_value[0] != '\0') {
1697 		/*
1698 		 * We're creating a clone of an existing snapshot.
1699 		 */
1700 		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
1701 		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
1702 			nvlist_free(nvprops);
1703 			return (EINVAL);
1704 		}
1705 
1706 		error = dmu_objset_open(zc->zc_value, type,
1707 		    DS_MODE_STANDARD | DS_MODE_READONLY, &clone);
1708 		if (error) {
1709 			nvlist_free(nvprops);
1710 			return (error);
1711 		}
1712 		error = dmu_objset_create(zc->zc_name, type, clone, NULL, NULL);
1713 		dmu_objset_close(clone);
1714 	} else {
1715 		if (cbfunc == NULL) {
1716 			nvlist_free(nvprops);
1717 			return (EINVAL);
1718 		}
1719 
1720 		if (type == DMU_OST_ZVOL) {
1721 			uint64_t volsize, volblocksize;
1722 
1723 			if (nvprops == NULL ||
1724 			    nvlist_lookup_uint64(nvprops,
1725 			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
1726 			    &volsize) != 0) {
1727 				nvlist_free(nvprops);
1728 				return (EINVAL);
1729 			}
1730 
1731 			if ((error = nvlist_lookup_uint64(nvprops,
1732 			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
1733 			    &volblocksize)) != 0 && error != ENOENT) {
1734 				nvlist_free(nvprops);
1735 				return (EINVAL);
1736 			}
1737 
1738 			if (error != 0)
1739 				volblocksize = zfs_prop_default_numeric(
1740 				    ZFS_PROP_VOLBLOCKSIZE);
1741 
1742 			if ((error = zvol_check_volblocksize(
1743 			    volblocksize)) != 0 ||
1744 			    (error = zvol_check_volsize(volsize,
1745 			    volblocksize)) != 0) {
1746 				nvlist_free(nvprops);
1747 				return (error);
1748 			}
1749 		} else if (type == DMU_OST_ZFS) {
1750 			uint64_t version;
1751 
1752 			if (0 == nvlist_lookup_uint64(nvprops,
1753 			    zfs_prop_to_name(ZFS_PROP_VERSION), &version) &&
1754 			    (version < ZPL_VERSION_INITIAL ||
1755 			    version > ZPL_VERSION)) {
1756 				nvlist_free(nvprops);
1757 				return (EINVAL);
1758 			}
1759 		}
1760 
1761 		error = dmu_objset_create(zc->zc_name, type, NULL, cbfunc,
1762 		    nvprops);
1763 	}
1764 
1765 	/*
1766 	 * It would be nice to do this atomically.
1767 	 */
1768 	if (error == 0) {
1769 		if ((error = zfs_set_prop_nvlist(zc->zc_name,
1770 		    zc->zc_dev, (cred_t *)(uintptr_t)zc->zc_cred,
1771 		    nvprops)) != 0)
1772 			(void) dmu_objset_destroy(zc->zc_name);
1773 	}
1774 
1775 	nvlist_free(nvprops);
1776 	return (error);
1777 }
1778 
1779 static int
1780 zfs_ioc_snapshot(zfs_cmd_t *zc)
1781 {
1782 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
1783 		return (EINVAL);
1784 	return (dmu_objset_snapshot(zc->zc_name,
1785 	    zc->zc_value, zc->zc_cookie));
1786 }
1787 
1788 int
1789 zfs_unmount_snap(char *name, void *arg)
1790 {
1791 	char *snapname = arg;
1792 	char *cp;
1793 	vfs_t *vfsp = NULL;
1794 
1795 	/*
1796 	 * Snapshots (which are under .zfs control) must be unmounted
1797 	 * before they can be destroyed.
1798 	 */
1799 
1800 	if (snapname) {
1801 		(void) strcat(name, "@");
1802 		(void) strcat(name, snapname);
1803 		vfsp = zfs_get_vfs(name);
1804 		cp = strchr(name, '@');
1805 		*cp = '\0';
1806 	} else if (strchr(name, '@')) {
1807 		vfsp = zfs_get_vfs(name);
1808 	}
1809 
1810 	if (vfsp) {
1811 		/*
1812 		 * Always force the unmount for snapshots.
1813 		 */
1814 		int flag = MS_FORCE;
1815 		int err;
1816 
1817 		if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
1818 			VFS_RELE(vfsp);
1819 			return (err);
1820 		}
1821 		VFS_RELE(vfsp);
1822 		if ((err = dounmount(vfsp, flag, kcred)) != 0)
1823 			return (err);
1824 	}
1825 	return (0);
1826 }
1827 
1828 static int
1829 zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
1830 {
1831 	int err;
1832 
1833 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
1834 		return (EINVAL);
1835 	err = dmu_objset_find(zc->zc_name,
1836 	    zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
1837 	if (err)
1838 		return (err);
1839 	return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value));
1840 }
1841 
1842 static int
1843 zfs_ioc_destroy(zfs_cmd_t *zc)
1844 {
1845 	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
1846 		int err = zfs_unmount_snap(zc->zc_name, NULL);
1847 		if (err)
1848 			return (err);
1849 	}
1850 
1851 	return (dmu_objset_destroy(zc->zc_name));
1852 }
1853 
1854 static int
1855 zfs_ioc_rollback(zfs_cmd_t *zc)
1856 {
1857 	return (dmu_objset_rollback(zc->zc_name));
1858 }
1859 
1860 static int
1861 zfs_ioc_rename(zfs_cmd_t *zc)
1862 {
1863 	boolean_t recursive = zc->zc_cookie & 1;
1864 
1865 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
1866 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0)
1867 		return (EINVAL);
1868 
1869 	/*
1870 	 * Unmount snapshot unless we're doing a recursive rename,
1871 	 * in which case the dataset code figures out which snapshots
1872 	 * to unmount.
1873 	 */
1874 	if (!recursive && strchr(zc->zc_name, '@') != NULL &&
1875 	    zc->zc_objset_type == DMU_OST_ZFS) {
1876 		int err = zfs_unmount_snap(zc->zc_name, NULL);
1877 		if (err)
1878 			return (err);
1879 	}
1880 
1881 	return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
1882 }
1883 
1884 static int
1885 zfs_ioc_recvbackup(zfs_cmd_t *zc)
1886 {
1887 	file_t *fp;
1888 	int error, fd;
1889 	offset_t new_off;
1890 
1891 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
1892 	    strchr(zc->zc_value, '@') == NULL)
1893 		return (EINVAL);
1894 
1895 	fd = zc->zc_cookie;
1896 	fp = getf(fd);
1897 	if (fp == NULL)
1898 		return (EBADF);
1899 	error = dmu_recvbackup(zc->zc_value, &zc->zc_begin_record,
1900 	    &zc->zc_cookie, (boolean_t)zc->zc_guid, fp->f_vnode,
1901 	    fp->f_offset);
1902 
1903 	new_off = fp->f_offset + zc->zc_cookie;
1904 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &new_off) == 0)
1905 		fp->f_offset = new_off;
1906 
1907 	releasef(fd);
1908 	return (error);
1909 }
1910 
1911 static int
1912 zfs_ioc_sendbackup(zfs_cmd_t *zc)
1913 {
1914 	objset_t *fromsnap = NULL;
1915 	objset_t *tosnap;
1916 	file_t *fp;
1917 	int error;
1918 
1919 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
1920 	    DS_MODE_STANDARD | DS_MODE_READONLY, &tosnap);
1921 	if (error)
1922 		return (error);
1923 
1924 	if (zc->zc_value[0] != '\0') {
1925 		char buf[MAXPATHLEN];
1926 		char *cp;
1927 
1928 		(void) strncpy(buf, zc->zc_name, sizeof (buf));
1929 		cp = strchr(buf, '@');
1930 		if (cp)
1931 			*(cp+1) = 0;
1932 		(void) strncat(buf, zc->zc_value, sizeof (buf));
1933 		error = dmu_objset_open(buf, DMU_OST_ANY,
1934 		    DS_MODE_STANDARD | DS_MODE_READONLY, &fromsnap);
1935 		if (error) {
1936 			dmu_objset_close(tosnap);
1937 			return (error);
1938 		}
1939 	}
1940 
1941 	fp = getf(zc->zc_cookie);
1942 	if (fp == NULL) {
1943 		dmu_objset_close(tosnap);
1944 		if (fromsnap)
1945 			dmu_objset_close(fromsnap);
1946 		return (EBADF);
1947 	}
1948 
1949 	error = dmu_sendbackup(tosnap, fromsnap, fp->f_vnode);
1950 
1951 	releasef(zc->zc_cookie);
1952 	if (fromsnap)
1953 		dmu_objset_close(fromsnap);
1954 	dmu_objset_close(tosnap);
1955 	return (error);
1956 }
1957 
1958 static int
1959 zfs_ioc_inject_fault(zfs_cmd_t *zc)
1960 {
1961 	int id, error;
1962 
1963 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
1964 	    &zc->zc_inject_record);
1965 
1966 	if (error == 0)
1967 		zc->zc_guid = (uint64_t)id;
1968 
1969 	return (error);
1970 }
1971 
1972 static int
1973 zfs_ioc_clear_fault(zfs_cmd_t *zc)
1974 {
1975 	return (zio_clear_fault((int)zc->zc_guid));
1976 }
1977 
1978 static int
1979 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
1980 {
1981 	int id = (int)zc->zc_guid;
1982 	int error;
1983 
1984 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
1985 	    &zc->zc_inject_record);
1986 
1987 	zc->zc_guid = id;
1988 
1989 	return (error);
1990 }
1991 
1992 static int
1993 zfs_ioc_error_log(zfs_cmd_t *zc)
1994 {
1995 	spa_t *spa;
1996 	int error;
1997 	size_t count = (size_t)zc->zc_nvlist_dst_size;
1998 
1999 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2000 		return (error);
2001 
2002 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
2003 	    &count);
2004 	if (error == 0)
2005 		zc->zc_nvlist_dst_size = count;
2006 	else
2007 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
2008 
2009 	spa_close(spa, FTAG);
2010 
2011 	return (error);
2012 }
2013 
2014 static int
2015 zfs_ioc_clear(zfs_cmd_t *zc)
2016 {
2017 	spa_t *spa;
2018 	vdev_t *vd;
2019 	int error;
2020 	uint64_t txg;
2021 
2022 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2023 		return (error);
2024 
2025 	txg = spa_vdev_enter(spa);
2026 
2027 	if (zc->zc_guid == 0) {
2028 		vd = NULL;
2029 	} else if ((vd = spa_lookup_by_guid(spa, zc->zc_guid)) == NULL) {
2030 		(void) spa_vdev_exit(spa, NULL, txg, ENODEV);
2031 		spa_close(spa, FTAG);
2032 		return (ENODEV);
2033 	}
2034 
2035 	vdev_clear(spa, vd);
2036 
2037 	(void) spa_vdev_exit(spa, NULL, txg, 0);
2038 
2039 	spa_close(spa, FTAG);
2040 
2041 	return (0);
2042 }
2043 
2044 static int
2045 zfs_ioc_promote(zfs_cmd_t *zc)
2046 {
2047 	char *cp;
2048 
2049 	/*
2050 	 * We don't need to unmount *all* the origin fs's snapshots, but
2051 	 * it's easier.
2052 	 */
2053 	cp = strchr(zc->zc_value, '@');
2054 	if (cp)
2055 		*cp = '\0';
2056 	(void) dmu_objset_find(zc->zc_value,
2057 	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
2058 	return (dsl_dataset_promote(zc->zc_name));
2059 }
2060 
2061 /*
2062  * We don't want to have a hard dependency
2063  * against some special symbols in sharefs
2064  * and nfs.  Determine them if needed when
2065  * the first file system is shared.
2066  * Neither sharefs or nfs are unloadable modules.
2067  */
2068 int (*zexport_fs)(void *arg);
2069 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
2070 
2071 int zfs_share_inited;
2072 ddi_modhandle_t nfs_mod;
2073 ddi_modhandle_t sharefs_mod;
2074 kmutex_t zfs_share_lock;
2075 
2076 static int
2077 zfs_ioc_share(zfs_cmd_t *zc)
2078 {
2079 	int error;
2080 	int opcode;
2081 
2082 	if (zfs_share_inited == 0) {
2083 		mutex_enter(&zfs_share_lock);
2084 		nfs_mod = ddi_modopen("fs/nfs", KRTLD_MODE_FIRST, &error);
2085 		sharefs_mod = ddi_modopen("fs/sharefs",
2086 		    KRTLD_MODE_FIRST, &error);
2087 		if (nfs_mod == NULL || sharefs_mod == NULL) {
2088 			mutex_exit(&zfs_share_lock);
2089 			return (ENOSYS);
2090 		}
2091 		if (zexport_fs == NULL && ((zexport_fs = (int (*)(void *))
2092 		    ddi_modsym(nfs_mod, "nfs_export", &error)) == NULL)) {
2093 			mutex_exit(&zfs_share_lock);
2094 			return (ENOSYS);
2095 		}
2096 
2097 		if (zshare_fs == NULL && ((zshare_fs =
2098 		    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
2099 		    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
2100 			mutex_exit(&zfs_share_lock);
2101 			return (ENOSYS);
2102 		}
2103 		zfs_share_inited = 1;
2104 		mutex_exit(&zfs_share_lock);
2105 	}
2106 
2107 	if (error = zexport_fs((void *)(uintptr_t)zc->zc_share.z_exportdata))
2108 		return (error);
2109 
2110 	opcode = (zc->zc_share.z_sharetype == B_TRUE) ?
2111 	    SHAREFS_ADD : SHAREFS_REMOVE;
2112 
2113 	error = zshare_fs(opcode,
2114 	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
2115 	    zc->zc_share.z_sharemax);
2116 
2117 	return (error);
2118 
2119 }
2120 
2121 /*
2122  * pool destroy and pool export don't log the history as part of zfsdev_ioctl,
2123  * but rather zfs_ioc_pool_create, and zfs_ioc_pool_export do the loggin
2124  * of those commands.
2125  */
2126 static zfs_ioc_vec_t zfs_ioc_vec[] = {
2127 	{ zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE },
2128 	{ zfs_ioc_pool_destroy,	zfs_secpolicy_config, POOL_NAME, B_FALSE },
2129 	{ zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2130 	{ zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE },
2131 	{ zfs_ioc_pool_configs,	zfs_secpolicy_none, NO_NAME, B_FALSE },
2132 	{ zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE },
2133 	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE },
2134 	{ zfs_ioc_pool_scrub, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2135 	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE },
2136 	{ zfs_ioc_pool_upgrade,	zfs_secpolicy_config, POOL_NAME, B_TRUE },
2137 	{ zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE },
2138 	{ zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2139 	{ zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2140 	{ zfs_ioc_vdev_set_state, zfs_secpolicy_config,	POOL_NAME, B_TRUE },
2141 	{ zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2142 	{ zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2143 	{ zfs_ioc_vdev_setpath,	zfs_secpolicy_config, POOL_NAME, B_FALSE },
2144 	{ zfs_ioc_objset_stats,	zfs_secpolicy_read, DATASET_NAME, B_FALSE },
2145 	{ zfs_ioc_dataset_list_next, zfs_secpolicy_read,
2146 	    DATASET_NAME, B_FALSE },
2147 	{ zfs_ioc_snapshot_list_next, zfs_secpolicy_read,
2148 	    DATASET_NAME, B_FALSE },
2149 	{ zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE },
2150 	{ zfs_ioc_create_minor,	zfs_secpolicy_minor, DATASET_NAME, B_FALSE },
2151 	{ zfs_ioc_remove_minor,	zfs_secpolicy_minor, DATASET_NAME, B_FALSE },
2152 	{ zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE },
2153 	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE },
2154 	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE },
2155 	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE },
2156 	{ zfs_ioc_recvbackup, zfs_secpolicy_receive, DATASET_NAME, B_TRUE },
2157 	{ zfs_ioc_sendbackup, zfs_secpolicy_send, DATASET_NAME, B_TRUE },
2158 	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE },
2159 	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE },
2160 	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE },
2161 	{ zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE },
2162 	{ zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2163 	{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE },
2164 	{ zfs_ioc_destroy_snaps, zfs_secpolicy_destroy,	DATASET_NAME, B_TRUE },
2165 	{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE },
2166 	{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_config, POOL_NAME, B_FALSE },
2167 	{ zfs_ioc_obj_to_path, zfs_secpolicy_config, NO_NAME, B_FALSE },
2168 	{ zfs_ioc_pool_set_props, zfs_secpolicy_config,	POOL_NAME, B_TRUE },
2169 	{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE },
2170 	{ zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE },
2171 	{ zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE },
2172 	{ zfs_ioc_iscsi_perm_check, zfs_secpolicy_iscsi,
2173 	    DATASET_NAME, B_FALSE },
2174 	{ zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE }
2175 };
2176 
2177 static int
2178 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
2179 {
2180 	zfs_cmd_t *zc;
2181 	uint_t vec;
2182 	int error, rc;
2183 
2184 	if (getminor(dev) != 0)
2185 		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
2186 
2187 	vec = cmd - ZFS_IOC;
2188 
2189 	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
2190 		return (EINVAL);
2191 
2192 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2193 
2194 	error = xcopyin((void *)arg, zc, sizeof (zfs_cmd_t));
2195 
2196 	if (error == 0) {
2197 		zc->zc_cred = (uintptr_t)cr;
2198 		zc->zc_dev = dev;
2199 		error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr);
2200 	}
2201 
2202 	/*
2203 	 * Ensure that all pool/dataset names are valid before we pass down to
2204 	 * the lower layers.
2205 	 */
2206 	if (error == 0) {
2207 		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
2208 		switch (zfs_ioc_vec[vec].zvec_namecheck) {
2209 		case POOL_NAME:
2210 			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
2211 				error = EINVAL;
2212 			break;
2213 
2214 		case DATASET_NAME:
2215 			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
2216 				error = EINVAL;
2217 			break;
2218 
2219 		case NO_NAME:
2220 			break;
2221 		}
2222 	}
2223 
2224 	if (error == 0)
2225 		error = zfs_ioc_vec[vec].zvec_func(zc);
2226 
2227 	rc = xcopyout(zc, (void *)arg, sizeof (zfs_cmd_t));
2228 	if (error == 0) {
2229 		error = rc;
2230 		if (zfs_ioc_vec[vec].zvec_his_log == B_TRUE)
2231 			zfs_log_history(zc);
2232 	}
2233 
2234 	kmem_free(zc, sizeof (zfs_cmd_t));
2235 	return (error);
2236 }
2237 
2238 static int
2239 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
2240 {
2241 	if (cmd != DDI_ATTACH)
2242 		return (DDI_FAILURE);
2243 
2244 	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
2245 	    DDI_PSEUDO, 0) == DDI_FAILURE)
2246 		return (DDI_FAILURE);
2247 
2248 	zfs_dip = dip;
2249 
2250 	ddi_report_dev(dip);
2251 
2252 	return (DDI_SUCCESS);
2253 }
2254 
2255 static int
2256 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
2257 {
2258 	if (spa_busy() || zfs_busy() || zvol_busy())
2259 		return (DDI_FAILURE);
2260 
2261 	if (cmd != DDI_DETACH)
2262 		return (DDI_FAILURE);
2263 
2264 	zfs_dip = NULL;
2265 
2266 	ddi_prop_remove_all(dip);
2267 	ddi_remove_minor_node(dip, NULL);
2268 
2269 	return (DDI_SUCCESS);
2270 }
2271 
2272 /*ARGSUSED*/
2273 static int
2274 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2275 {
2276 	switch (infocmd) {
2277 	case DDI_INFO_DEVT2DEVINFO:
2278 		*result = zfs_dip;
2279 		return (DDI_SUCCESS);
2280 
2281 	case DDI_INFO_DEVT2INSTANCE:
2282 		*result = (void *)0;
2283 		return (DDI_SUCCESS);
2284 	}
2285 
2286 	return (DDI_FAILURE);
2287 }
2288 
2289 /*
2290  * OK, so this is a little weird.
2291  *
2292  * /dev/zfs is the control node, i.e. minor 0.
2293  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
2294  *
2295  * /dev/zfs has basically nothing to do except serve up ioctls,
2296  * so most of the standard driver entry points are in zvol.c.
2297  */
2298 static struct cb_ops zfs_cb_ops = {
2299 	zvol_open,	/* open */
2300 	zvol_close,	/* close */
2301 	zvol_strategy,	/* strategy */
2302 	nodev,		/* print */
2303 	nodev,		/* dump */
2304 	zvol_read,	/* read */
2305 	zvol_write,	/* write */
2306 	zfsdev_ioctl,	/* ioctl */
2307 	nodev,		/* devmap */
2308 	nodev,		/* mmap */
2309 	nodev,		/* segmap */
2310 	nochpoll,	/* poll */
2311 	ddi_prop_op,	/* prop_op */
2312 	NULL,		/* streamtab */
2313 	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
2314 	CB_REV,		/* version */
2315 	nodev,		/* async read */
2316 	nodev,		/* async write */
2317 };
2318 
2319 static struct dev_ops zfs_dev_ops = {
2320 	DEVO_REV,	/* version */
2321 	0,		/* refcnt */
2322 	zfs_info,	/* info */
2323 	nulldev,	/* identify */
2324 	nulldev,	/* probe */
2325 	zfs_attach,	/* attach */
2326 	zfs_detach,	/* detach */
2327 	nodev,		/* reset */
2328 	&zfs_cb_ops,	/* driver operations */
2329 	NULL		/* no bus operations */
2330 };
2331 
2332 static struct modldrv zfs_modldrv = {
2333 	&mod_driverops, "ZFS storage pool version " SPA_VERSION_STRING,
2334 	    &zfs_dev_ops
2335 };
2336 
2337 static struct modlinkage modlinkage = {
2338 	MODREV_1,
2339 	(void *)&zfs_modlfs,
2340 	(void *)&zfs_modldrv,
2341 	NULL
2342 };
2343 
2344 
2345 uint_t zfs_fsyncer_key;
2346 
2347 int
2348 _init(void)
2349 {
2350 	int error;
2351 
2352 	spa_init(FREAD | FWRITE);
2353 	zfs_init();
2354 	zvol_init();
2355 
2356 	if ((error = mod_install(&modlinkage)) != 0) {
2357 		zvol_fini();
2358 		zfs_fini();
2359 		spa_fini();
2360 		return (error);
2361 	}
2362 
2363 	tsd_create(&zfs_fsyncer_key, NULL);
2364 
2365 	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
2366 	ASSERT(error == 0);
2367 	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
2368 
2369 	return (0);
2370 }
2371 
2372 int
2373 _fini(void)
2374 {
2375 	int error;
2376 
2377 	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
2378 		return (EBUSY);
2379 
2380 	if ((error = mod_remove(&modlinkage)) != 0)
2381 		return (error);
2382 
2383 	zvol_fini();
2384 	zfs_fini();
2385 	spa_fini();
2386 	if (zfs_share_inited) {
2387 		(void) ddi_modclose(nfs_mod);
2388 		(void) ddi_modclose(sharefs_mod);
2389 	}
2390 
2391 	tsd_destroy(&zfs_fsyncer_key);
2392 	ldi_ident_release(zfs_li);
2393 	zfs_li = NULL;
2394 	mutex_destroy(&zfs_share_lock);
2395 
2396 	return (error);
2397 }
2398 
2399 int
2400 _info(struct modinfo *modinfop)
2401 {
2402 	return (mod_info(&modlinkage, modinfop));
2403 }
2404