xref: /titanic_52/usr/src/uts/common/fs/zfs/zfs_ioctl.c (revision fa94a07fd0519b8abfd871ad8fe60e6bebe1e2bb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/errno.h>
31 #include <sys/uio.h>
32 #include <sys/buf.h>
33 #include <sys/modctl.h>
34 #include <sys/open.h>
35 #include <sys/file.h>
36 #include <sys/kmem.h>
37 #include <sys/conf.h>
38 #include <sys/cmn_err.h>
39 #include <sys/stat.h>
40 #include <sys/zfs_ioctl.h>
41 #include <sys/zfs_i18n.h>
42 #include <sys/zfs_znode.h>
43 #include <sys/zap.h>
44 #include <sys/spa.h>
45 #include <sys/spa_impl.h>
46 #include <sys/vdev.h>
47 #include <sys/vdev_impl.h>
48 #include <sys/dmu.h>
49 #include <sys/dsl_dir.h>
50 #include <sys/dsl_dataset.h>
51 #include <sys/dsl_prop.h>
52 #include <sys/dsl_deleg.h>
53 #include <sys/dmu_objset.h>
54 #include <sys/ddi.h>
55 #include <sys/sunddi.h>
56 #include <sys/sunldi.h>
57 #include <sys/policy.h>
58 #include <sys/zone.h>
59 #include <sys/nvpair.h>
60 #include <sys/pathname.h>
61 #include <sys/mount.h>
62 #include <sys/sdt.h>
63 #include <sys/fs/zfs.h>
64 #include <sys/zfs_ctldir.h>
65 #include <sys/zfs_dir.h>
66 #include <sys/zvol.h>
67 #include <sharefs/share.h>
68 #include <sys/zfs_znode.h>
69 #include <sys/zfs_vfsops.h>
70 #include <sys/dmu_objset.h>
71 
72 #include "zfs_namecheck.h"
73 #include "zfs_prop.h"
74 #include "zfs_deleg.h"
75 
76 extern struct modlfs zfs_modlfs;
77 
78 extern void zfs_init(void);
79 extern void zfs_fini(void);
80 
81 ldi_ident_t zfs_li = NULL;
82 dev_info_t *zfs_dip;
83 
84 typedef int zfs_ioc_func_t(zfs_cmd_t *);
85 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
86 
87 typedef struct zfs_ioc_vec {
88 	zfs_ioc_func_t		*zvec_func;
89 	zfs_secpolicy_func_t	*zvec_secpolicy;
90 	enum {
91 		NO_NAME,
92 		POOL_NAME,
93 		DATASET_NAME
94 	} zvec_namecheck;
95 	boolean_t		zvec_his_log;
96 } zfs_ioc_vec_t;
97 
98 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
99 void
100 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
101 {
102 	const char *newfile;
103 	char buf[256];
104 	va_list adx;
105 
106 	/*
107 	 * Get rid of annoying "../common/" prefix to filename.
108 	 */
109 	newfile = strrchr(file, '/');
110 	if (newfile != NULL) {
111 		newfile = newfile + 1; /* Get rid of leading / */
112 	} else {
113 		newfile = file;
114 	}
115 
116 	va_start(adx, fmt);
117 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
118 	va_end(adx);
119 
120 	/*
121 	 * To get this data, use the zfs-dprintf probe as so:
122 	 * dtrace -q -n 'zfs-dprintf \
123 	 *	/stringof(arg0) == "dbuf.c"/ \
124 	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
125 	 * arg0 = file name
126 	 * arg1 = function name
127 	 * arg2 = line number
128 	 * arg3 = message
129 	 */
130 	DTRACE_PROBE4(zfs__dprintf,
131 	    char *, newfile, char *, func, int, line, char *, buf);
132 }
133 
134 static void
135 history_str_free(char *buf)
136 {
137 	kmem_free(buf, HIS_MAX_RECORD_LEN);
138 }
139 
140 static char *
141 history_str_get(zfs_cmd_t *zc)
142 {
143 	char *buf;
144 
145 	if (zc->zc_history == NULL)
146 		return (NULL);
147 
148 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
149 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
150 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
151 		history_str_free(buf);
152 		return (NULL);
153 	}
154 
155 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
156 
157 	return (buf);
158 }
159 
160 /*
161  * zfs_check_version
162  *
163  *	Return non-zero if the spa version is less than requested version.
164  */
165 static int
166 zfs_check_version(const char *name, int version)
167 {
168 
169 	spa_t *spa;
170 
171 	if (spa_open(name, &spa, FTAG) == 0) {
172 		if (spa_version(spa) < version) {
173 			spa_close(spa, FTAG);
174 			return (1);
175 		}
176 		spa_close(spa, FTAG);
177 	}
178 	return (0);
179 }
180 
181 static void
182 zfs_log_history(zfs_cmd_t *zc)
183 {
184 	spa_t *spa;
185 	char *buf;
186 
187 	if ((buf = history_str_get(zc)) == NULL)
188 		return;
189 
190 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
191 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
192 			(void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
193 		spa_close(spa, FTAG);
194 	}
195 	history_str_free(buf);
196 }
197 
198 /*
199  * Policy for top-level read operations (list pools).  Requires no privileges,
200  * and can be used in the local zone, as there is no associated dataset.
201  */
202 /* ARGSUSED */
203 static int
204 zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
205 {
206 	return (0);
207 }
208 
209 /*
210  * Policy for dataset read operations (list children, get statistics).  Requires
211  * no privileges, but must be visible in the local zone.
212  */
213 /* ARGSUSED */
214 static int
215 zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
216 {
217 	if (INGLOBALZONE(curproc) ||
218 	    zone_dataset_visible(zc->zc_name, NULL))
219 		return (0);
220 
221 	return (ENOENT);
222 }
223 
224 static int
225 zfs_dozonecheck(const char *dataset, cred_t *cr)
226 {
227 	uint64_t zoned;
228 	int writable = 1;
229 
230 	/*
231 	 * The dataset must be visible by this zone -- check this first
232 	 * so they don't see EPERM on something they shouldn't know about.
233 	 */
234 	if (!INGLOBALZONE(curproc) &&
235 	    !zone_dataset_visible(dataset, &writable))
236 		return (ENOENT);
237 
238 	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
239 		return (ENOENT);
240 
241 	if (INGLOBALZONE(curproc)) {
242 		/*
243 		 * If the fs is zoned, only root can access it from the
244 		 * global zone.
245 		 */
246 		if (secpolicy_zfs(cr) && zoned)
247 			return (EPERM);
248 	} else {
249 		/*
250 		 * If we are in a local zone, the 'zoned' property must be set.
251 		 */
252 		if (!zoned)
253 			return (EPERM);
254 
255 		/* must be writable by this zone */
256 		if (!writable)
257 			return (EPERM);
258 	}
259 	return (0);
260 }
261 
262 int
263 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
264 {
265 	int error;
266 
267 	error = zfs_dozonecheck(name, cr);
268 	if (error == 0) {
269 		error = secpolicy_zfs(cr);
270 		if (error)
271 			error = dsl_deleg_access(name, perm, cr);
272 	}
273 	return (error);
274 }
275 
276 static int
277 zfs_secpolicy_setprop(const char *name, zfs_prop_t prop, cred_t *cr)
278 {
279 	/*
280 	 * Check permissions for special properties.
281 	 */
282 	switch (prop) {
283 	case ZFS_PROP_ZONED:
284 		/*
285 		 * Disallow setting of 'zoned' from within a local zone.
286 		 */
287 		if (!INGLOBALZONE(curproc))
288 			return (EPERM);
289 		break;
290 
291 	case ZFS_PROP_QUOTA:
292 		if (!INGLOBALZONE(curproc)) {
293 			uint64_t zoned;
294 			char setpoint[MAXNAMELEN];
295 			/*
296 			 * Unprivileged users are allowed to modify the
297 			 * quota on things *under* (ie. contained by)
298 			 * the thing they own.
299 			 */
300 			if (dsl_prop_get_integer(name, "zoned", &zoned,
301 			    setpoint))
302 				return (EPERM);
303 			if (!zoned || strlen(name) <= strlen(setpoint))
304 				return (EPERM);
305 		}
306 		break;
307 	}
308 
309 	return (zfs_secpolicy_write_perms(name, zfs_prop_to_name(prop), cr));
310 }
311 
312 int
313 zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
314 {
315 	int error;
316 
317 	error = zfs_dozonecheck(zc->zc_name, cr);
318 	if (error)
319 		return (error);
320 
321 	/*
322 	 * permission to set permissions will be evaluated later in
323 	 * dsl_deleg_can_allow()
324 	 */
325 	return (0);
326 }
327 
328 int
329 zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
330 {
331 	int error;
332 	error = zfs_secpolicy_write_perms(zc->zc_name,
333 	    ZFS_DELEG_PERM_ROLLBACK, cr);
334 	if (error == 0)
335 		error = zfs_secpolicy_write_perms(zc->zc_name,
336 		    ZFS_DELEG_PERM_MOUNT, cr);
337 	return (error);
338 }
339 
340 int
341 zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
342 {
343 	return (zfs_secpolicy_write_perms(zc->zc_name,
344 	    ZFS_DELEG_PERM_SEND, cr));
345 }
346 
347 int
348 zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
349 {
350 	if (!INGLOBALZONE(curproc))
351 		return (EPERM);
352 
353 	if (secpolicy_nfs(cr) == 0) {
354 		return (0);
355 	} else {
356 		vnode_t *vp;
357 		int error;
358 
359 		if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
360 		    NO_FOLLOW, NULL, &vp)) != 0)
361 			return (error);
362 
363 		/* Now make sure mntpnt and dataset are ZFS */
364 
365 		if (vp->v_vfsp->vfs_fstype != zfsfstype ||
366 		    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
367 		    zc->zc_name) != 0)) {
368 			VN_RELE(vp);
369 			return (EPERM);
370 		}
371 
372 		VN_RELE(vp);
373 		return (dsl_deleg_access(zc->zc_name,
374 		    ZFS_DELEG_PERM_SHARE, cr));
375 	}
376 }
377 
378 static int
379 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
380 {
381 	char *cp;
382 
383 	/*
384 	 * Remove the @bla or /bla from the end of the name to get the parent.
385 	 */
386 	(void) strncpy(parent, datasetname, parentsize);
387 	cp = strrchr(parent, '@');
388 	if (cp != NULL) {
389 		cp[0] = '\0';
390 	} else {
391 		cp = strrchr(parent, '/');
392 		if (cp == NULL)
393 			return (ENOENT);
394 		cp[0] = '\0';
395 	}
396 
397 	return (0);
398 }
399 
400 int
401 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
402 {
403 	int error;
404 
405 	if ((error = zfs_secpolicy_write_perms(name,
406 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
407 		return (error);
408 
409 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
410 }
411 
412 static int
413 zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
414 {
415 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
416 }
417 
418 /*
419  * Must have sys_config privilege to check the iscsi permission
420  */
421 /* ARGSUSED */
422 static int
423 zfs_secpolicy_iscsi(zfs_cmd_t *zc, cred_t *cr)
424 {
425 	return (secpolicy_zfs(cr));
426 }
427 
428 int
429 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
430 {
431 	char 	parentname[MAXNAMELEN];
432 	int	error;
433 
434 	if ((error = zfs_secpolicy_write_perms(from,
435 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
436 		return (error);
437 
438 	if ((error = zfs_secpolicy_write_perms(from,
439 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
440 		return (error);
441 
442 	if ((error = zfs_get_parent(to, parentname,
443 	    sizeof (parentname))) != 0)
444 		return (error);
445 
446 	if ((error = zfs_secpolicy_write_perms(parentname,
447 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
448 		return (error);
449 
450 	if ((error = zfs_secpolicy_write_perms(parentname,
451 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
452 		return (error);
453 
454 	return (error);
455 }
456 
457 static int
458 zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
459 {
460 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
461 }
462 
463 static int
464 zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
465 {
466 	char 	parentname[MAXNAMELEN];
467 	objset_t *clone;
468 	int error;
469 
470 	error = zfs_secpolicy_write_perms(zc->zc_name,
471 	    ZFS_DELEG_PERM_PROMOTE, cr);
472 	if (error)
473 		return (error);
474 
475 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
476 	    DS_MODE_STANDARD | DS_MODE_READONLY, &clone);
477 
478 	if (error == 0) {
479 		dsl_dataset_t *pclone = NULL;
480 		dsl_dir_t *dd;
481 		dd = clone->os->os_dsl_dataset->ds_dir;
482 
483 		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
484 		error = dsl_dataset_open_obj(dd->dd_pool,
485 		    dd->dd_phys->dd_origin_obj, NULL,
486 		    DS_MODE_NONE, FTAG, &pclone);
487 		rw_exit(&dd->dd_pool->dp_config_rwlock);
488 		if (error) {
489 			dmu_objset_close(clone);
490 			return (error);
491 		}
492 
493 		error = zfs_secpolicy_write_perms(zc->zc_name,
494 		    ZFS_DELEG_PERM_MOUNT, cr);
495 
496 		dsl_dataset_name(pclone, parentname);
497 		dmu_objset_close(clone);
498 		dsl_dataset_close(pclone, DS_MODE_NONE, FTAG);
499 		if (error == 0)
500 			error = zfs_secpolicy_write_perms(parentname,
501 			    ZFS_DELEG_PERM_PROMOTE, cr);
502 	}
503 	return (error);
504 }
505 
506 static int
507 zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
508 {
509 	int error;
510 
511 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
512 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
513 		return (error);
514 
515 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
516 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
517 		return (error);
518 
519 	return (zfs_secpolicy_write_perms(zc->zc_name,
520 	    ZFS_DELEG_PERM_CREATE, cr));
521 }
522 
523 int
524 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
525 {
526 	int error;
527 
528 	if ((error = zfs_secpolicy_write_perms(name,
529 	    ZFS_DELEG_PERM_SNAPSHOT, cr)) != 0)
530 		return (error);
531 
532 	error = zfs_secpolicy_write_perms(name,
533 	    ZFS_DELEG_PERM_MOUNT, cr);
534 
535 	return (error);
536 }
537 
538 static int
539 zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
540 {
541 
542 	return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
543 }
544 
545 static int
546 zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
547 {
548 	char 	parentname[MAXNAMELEN];
549 	int 	error;
550 
551 	if ((error = zfs_get_parent(zc->zc_name, parentname,
552 	    sizeof (parentname))) != 0)
553 		return (error);
554 
555 	if (zc->zc_value[0] != '\0') {
556 		if ((error = zfs_secpolicy_write_perms(zc->zc_value,
557 		    ZFS_DELEG_PERM_CLONE, cr)) != 0)
558 			return (error);
559 	}
560 
561 	if ((error = zfs_secpolicy_write_perms(parentname,
562 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
563 		return (error);
564 
565 	error = zfs_secpolicy_write_perms(parentname,
566 	    ZFS_DELEG_PERM_MOUNT, cr);
567 
568 	return (error);
569 }
570 
571 static int
572 zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
573 {
574 	int error;
575 
576 	error = secpolicy_fs_unmount(cr, NULL);
577 	if (error) {
578 		error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
579 	}
580 	return (error);
581 }
582 
583 /*
584  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
585  * SYS_CONFIG privilege, which is not available in a local zone.
586  */
587 /* ARGSUSED */
588 static int
589 zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
590 {
591 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
592 		return (EPERM);
593 
594 	return (0);
595 }
596 
597 /*
598  * Just like zfs_secpolicy_config, except that we will check for
599  * mount permission on the dataset for permission to create/remove
600  * the minor nodes.
601  */
602 static int
603 zfs_secpolicy_minor(zfs_cmd_t *zc, cred_t *cr)
604 {
605 	if (secpolicy_sys_config(cr, B_FALSE) != 0) {
606 		return (dsl_deleg_access(zc->zc_name,
607 		    ZFS_DELEG_PERM_MOUNT, cr));
608 	}
609 
610 	return (0);
611 }
612 
613 /*
614  * Policy for fault injection.  Requires all privileges.
615  */
616 /* ARGSUSED */
617 static int
618 zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
619 {
620 	return (secpolicy_zinject(cr));
621 }
622 
623 static int
624 zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
625 {
626 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
627 
628 	if (prop == ZPROP_INVAL) {
629 		if (!zfs_prop_user(zc->zc_value))
630 			return (EINVAL);
631 		return (zfs_secpolicy_write_perms(zc->zc_name,
632 		    ZFS_DELEG_PERM_USERPROP, cr));
633 	} else {
634 		if (!zfs_prop_inheritable(prop))
635 			return (EINVAL);
636 		return (zfs_secpolicy_setprop(zc->zc_name, prop, cr));
637 	}
638 }
639 
640 /*
641  * Returns the nvlist as specified by the user in the zfs_cmd_t.
642  */
643 static int
644 get_nvlist(uint64_t nvl, uint64_t size, nvlist_t **nvp)
645 {
646 	char *packed;
647 	int error;
648 	nvlist_t *list = NULL;
649 
650 	/*
651 	 * Read in and unpack the user-supplied nvlist.
652 	 */
653 	if (size == 0)
654 		return (EINVAL);
655 
656 	packed = kmem_alloc(size, KM_SLEEP);
657 
658 	if ((error = xcopyin((void *)(uintptr_t)nvl, packed, size)) != 0) {
659 		kmem_free(packed, size);
660 		return (error);
661 	}
662 
663 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
664 		kmem_free(packed, size);
665 		return (error);
666 	}
667 
668 	kmem_free(packed, size);
669 
670 	*nvp = list;
671 	return (0);
672 }
673 
674 static int
675 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
676 {
677 	char *packed = NULL;
678 	size_t size;
679 	int error;
680 
681 	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
682 
683 	if (size > zc->zc_nvlist_dst_size) {
684 		error = ENOMEM;
685 	} else {
686 		packed = kmem_alloc(size, KM_SLEEP);
687 		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
688 		    KM_SLEEP) == 0);
689 		error = xcopyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
690 		    size);
691 		kmem_free(packed, size);
692 	}
693 
694 	zc->zc_nvlist_dst_size = size;
695 	return (error);
696 }
697 
698 static int
699 zfs_ioc_pool_create(zfs_cmd_t *zc)
700 {
701 	int error;
702 	nvlist_t *config, *props = NULL;
703 	char *buf;
704 
705 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
706 	    &config))
707 		return (error);
708 
709 	if (zc->zc_nvlist_src_size != 0 && (error =
710 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, &props))) {
711 		nvlist_free(config);
712 		return (error);
713 	}
714 
715 	buf = history_str_get(zc);
716 
717 	error = spa_create(zc->zc_name, config, props, buf);
718 
719 	if (buf != NULL)
720 		history_str_free(buf);
721 
722 	nvlist_free(config);
723 
724 	if (props)
725 		nvlist_free(props);
726 
727 	return (error);
728 }
729 
730 static int
731 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
732 {
733 	int error;
734 	zfs_log_history(zc);
735 	error = spa_destroy(zc->zc_name);
736 	return (error);
737 }
738 
739 static int
740 zfs_ioc_pool_import(zfs_cmd_t *zc)
741 {
742 	int error;
743 	nvlist_t *config, *props = NULL;
744 	uint64_t guid;
745 
746 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
747 	    &config)) != 0)
748 		return (error);
749 
750 	if (zc->zc_nvlist_src_size != 0 && (error =
751 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size, &props))) {
752 		nvlist_free(config);
753 		return (error);
754 	}
755 
756 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
757 	    guid != zc->zc_guid)
758 		error = EINVAL;
759 	else
760 		error = spa_import(zc->zc_name, config, props);
761 
762 	nvlist_free(config);
763 
764 	if (props)
765 		nvlist_free(props);
766 
767 	return (error);
768 }
769 
770 static int
771 zfs_ioc_pool_export(zfs_cmd_t *zc)
772 {
773 	int error;
774 	zfs_log_history(zc);
775 	error = spa_export(zc->zc_name, NULL);
776 	return (error);
777 }
778 
779 static int
780 zfs_ioc_pool_configs(zfs_cmd_t *zc)
781 {
782 	nvlist_t *configs;
783 	int error;
784 
785 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
786 		return (EEXIST);
787 
788 	error = put_nvlist(zc, configs);
789 
790 	nvlist_free(configs);
791 
792 	return (error);
793 }
794 
795 static int
796 zfs_ioc_pool_stats(zfs_cmd_t *zc)
797 {
798 	nvlist_t *config;
799 	int error;
800 	int ret = 0;
801 
802 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
803 	    sizeof (zc->zc_value));
804 
805 	if (config != NULL) {
806 		ret = put_nvlist(zc, config);
807 		nvlist_free(config);
808 
809 		/*
810 		 * The config may be present even if 'error' is non-zero.
811 		 * In this case we return success, and preserve the real errno
812 		 * in 'zc_cookie'.
813 		 */
814 		zc->zc_cookie = error;
815 	} else {
816 		ret = error;
817 	}
818 
819 	return (ret);
820 }
821 
822 /*
823  * Try to import the given pool, returning pool stats as appropriate so that
824  * user land knows which devices are available and overall pool health.
825  */
826 static int
827 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
828 {
829 	nvlist_t *tryconfig, *config;
830 	int error;
831 
832 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
833 	    &tryconfig)) != 0)
834 		return (error);
835 
836 	config = spa_tryimport(tryconfig);
837 
838 	nvlist_free(tryconfig);
839 
840 	if (config == NULL)
841 		return (EINVAL);
842 
843 	error = put_nvlist(zc, config);
844 	nvlist_free(config);
845 
846 	return (error);
847 }
848 
849 static int
850 zfs_ioc_pool_scrub(zfs_cmd_t *zc)
851 {
852 	spa_t *spa;
853 	int error;
854 
855 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
856 		return (error);
857 
858 	mutex_enter(&spa_namespace_lock);
859 	error = spa_scrub(spa, zc->zc_cookie, B_FALSE);
860 	mutex_exit(&spa_namespace_lock);
861 
862 	spa_close(spa, FTAG);
863 
864 	return (error);
865 }
866 
867 static int
868 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
869 {
870 	spa_t *spa;
871 	int error;
872 
873 	error = spa_open(zc->zc_name, &spa, FTAG);
874 	if (error == 0) {
875 		spa_freeze(spa);
876 		spa_close(spa, FTAG);
877 	}
878 	return (error);
879 }
880 
881 static int
882 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
883 {
884 	spa_t *spa;
885 	int error;
886 
887 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
888 		return (error);
889 
890 	if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
891 		spa_close(spa, FTAG);
892 		return (EINVAL);
893 	}
894 
895 	spa_upgrade(spa, zc->zc_cookie);
896 	spa_close(spa, FTAG);
897 
898 	return (error);
899 }
900 
901 static int
902 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
903 {
904 	spa_t *spa;
905 	char *hist_buf;
906 	uint64_t size;
907 	int error;
908 
909 	if ((size = zc->zc_history_len) == 0)
910 		return (EINVAL);
911 
912 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
913 		return (error);
914 
915 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
916 		spa_close(spa, FTAG);
917 		return (ENOTSUP);
918 	}
919 
920 	hist_buf = kmem_alloc(size, KM_SLEEP);
921 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
922 	    &zc->zc_history_len, hist_buf)) == 0) {
923 		error = xcopyout(hist_buf,
924 		    (char *)(uintptr_t)zc->zc_history,
925 		    zc->zc_history_len);
926 	}
927 
928 	spa_close(spa, FTAG);
929 	kmem_free(hist_buf, size);
930 	return (error);
931 }
932 
933 static int
934 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
935 {
936 	int error;
937 
938 	if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
939 		return (error);
940 
941 	return (0);
942 }
943 
944 static int
945 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
946 {
947 	objset_t *osp;
948 	int error;
949 
950 	if ((error = dmu_objset_open(zc->zc_name, DMU_OST_ZFS,
951 	    DS_MODE_NONE | DS_MODE_READONLY, &osp)) != 0)
952 		return (error);
953 
954 	error = zfs_obj_to_path(osp, zc->zc_obj, zc->zc_value,
955 	    sizeof (zc->zc_value));
956 	dmu_objset_close(osp);
957 
958 	return (error);
959 }
960 
961 static int
962 zfs_ioc_vdev_add(zfs_cmd_t *zc)
963 {
964 	spa_t *spa;
965 	int error;
966 	nvlist_t *config, **l2cache;
967 	uint_t nl2cache;
968 
969 	error = spa_open(zc->zc_name, &spa, FTAG);
970 	if (error != 0)
971 		return (error);
972 
973 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
974 	    &config);
975 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
976 	    &l2cache, &nl2cache);
977 
978 	/*
979 	 * A root pool with concatenated devices is not supported.
980 	 * Thus, can not add a device to a root pool with one device.
981 	 * Allow for l2cache devices to be added.
982 	 */
983 	if (spa->spa_root_vdev->vdev_children == 1 && spa->spa_bootfs != 0 &&
984 	    nl2cache == 0) {
985 		spa_close(spa, FTAG);
986 		return (EDOM);
987 	}
988 
989 	if (error == 0) {
990 		error = spa_vdev_add(spa, config);
991 		nvlist_free(config);
992 	}
993 	spa_close(spa, FTAG);
994 	return (error);
995 }
996 
997 static int
998 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
999 {
1000 	spa_t *spa;
1001 	int error;
1002 
1003 	error = spa_open(zc->zc_name, &spa, FTAG);
1004 	if (error != 0)
1005 		return (error);
1006 	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1007 	spa_close(spa, FTAG);
1008 	return (error);
1009 }
1010 
1011 static int
1012 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1013 {
1014 	spa_t *spa;
1015 	int error;
1016 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1017 
1018 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1019 		return (error);
1020 	switch (zc->zc_cookie) {
1021 	case VDEV_STATE_ONLINE:
1022 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1023 		break;
1024 
1025 	case VDEV_STATE_OFFLINE:
1026 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1027 		break;
1028 
1029 	case VDEV_STATE_FAULTED:
1030 		error = vdev_fault(spa, zc->zc_guid);
1031 		break;
1032 
1033 	case VDEV_STATE_DEGRADED:
1034 		error = vdev_degrade(spa, zc->zc_guid);
1035 		break;
1036 
1037 	default:
1038 		error = EINVAL;
1039 	}
1040 	zc->zc_cookie = newstate;
1041 	spa_close(spa, FTAG);
1042 	return (error);
1043 }
1044 
1045 static int
1046 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1047 {
1048 	spa_t *spa;
1049 	int replacing = zc->zc_cookie;
1050 	nvlist_t *config;
1051 	int error;
1052 
1053 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1054 		return (error);
1055 
1056 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1057 	    &config)) == 0) {
1058 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1059 		nvlist_free(config);
1060 	}
1061 
1062 	spa_close(spa, FTAG);
1063 	return (error);
1064 }
1065 
1066 static int
1067 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1068 {
1069 	spa_t *spa;
1070 	int error;
1071 
1072 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1073 		return (error);
1074 
1075 	error = spa_vdev_detach(spa, zc->zc_guid, B_FALSE);
1076 
1077 	spa_close(spa, FTAG);
1078 	return (error);
1079 }
1080 
1081 static int
1082 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1083 {
1084 	spa_t *spa;
1085 	char *path = zc->zc_value;
1086 	uint64_t guid = zc->zc_guid;
1087 	int error;
1088 
1089 	error = spa_open(zc->zc_name, &spa, FTAG);
1090 	if (error != 0)
1091 		return (error);
1092 
1093 	error = spa_vdev_setpath(spa, guid, path);
1094 	spa_close(spa, FTAG);
1095 	return (error);
1096 }
1097 
1098 /*
1099  * inputs:
1100  * zc_name		name of filesystem
1101  * zc_nvlist_dst_size	size of buffer for property nvlist
1102  *
1103  * outputs:
1104  * zc_objset_stats	stats
1105  * zc_nvlist_dst	property nvlist
1106  * zc_nvlist_dst_size	size of property nvlist
1107  * zc_value		alternate root
1108  */
1109 static int
1110 zfs_ioc_objset_stats(zfs_cmd_t *zc)
1111 {
1112 	objset_t *os = NULL;
1113 	int error;
1114 	nvlist_t *nv;
1115 
1116 retry:
1117 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
1118 	    DS_MODE_STANDARD | DS_MODE_READONLY, &os);
1119 	if (error != 0) {
1120 		/*
1121 		 * This is ugly: dmu_objset_open() can return EBUSY if
1122 		 * the objset is held exclusively. Fortunately this hold is
1123 		 * only for a short while, so we retry here.
1124 		 * This avoids user code having to handle EBUSY,
1125 		 * for example for a "zfs list".
1126 		 */
1127 		if (error == EBUSY) {
1128 			delay(1);
1129 			goto retry;
1130 		}
1131 		return (error);
1132 	}
1133 
1134 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1135 
1136 	if (zc->zc_nvlist_dst != 0 &&
1137 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
1138 		dmu_objset_stats(os, nv);
1139 		/*
1140 		 * NB: zvol_get_stats() will read the objset contents,
1141 		 * which we aren't supposed to do with a
1142 		 * DS_MODE_STANDARD open, because it could be
1143 		 * inconsistent.  So this is a bit of a workaround...
1144 		 */
1145 		if (!zc->zc_objset_stats.dds_inconsistent) {
1146 			if (dmu_objset_type(os) == DMU_OST_ZVOL)
1147 				VERIFY(zvol_get_stats(os, nv) == 0);
1148 		}
1149 		error = put_nvlist(zc, nv);
1150 		nvlist_free(nv);
1151 	}
1152 
1153 	spa_altroot(dmu_objset_spa(os), zc->zc_value, sizeof (zc->zc_value));
1154 
1155 	dmu_objset_close(os);
1156 	return (error);
1157 }
1158 
1159 /*
1160  * inputs:
1161  * zc_name		name of filesystem
1162  * zc_cookie		zap cursor
1163  * zc_nvlist_dst_size	size of buffer for property nvlist
1164  *
1165  * outputs:
1166  * zc_name		name of next filesystem
1167  * zc_objset_stats	stats
1168  * zc_nvlist_dst	property nvlist
1169  * zc_nvlist_dst_size	size of property nvlist
1170  * zc_value		alternate root
1171  */
1172 static int
1173 zfs_ioc_objset_version(zfs_cmd_t *zc)
1174 {
1175 	objset_t *os = NULL;
1176 	int error;
1177 
1178 retry:
1179 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
1180 	    DS_MODE_STANDARD | DS_MODE_READONLY, &os);
1181 	if (error != 0) {
1182 		/*
1183 		 * This is ugly: dmu_objset_open() can return EBUSY if
1184 		 * the objset is held exclusively. Fortunately this hold is
1185 		 * only for a short while, so we retry here.
1186 		 * This avoids user code having to handle EBUSY,
1187 		 * for example for a "zfs list".
1188 		 */
1189 		if (error == EBUSY) {
1190 			delay(1);
1191 			goto retry;
1192 		}
1193 		return (error);
1194 	}
1195 
1196 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1197 
1198 	/*
1199 	 * NB: zfs_get_version() will read the objset contents,
1200 	 * which we aren't supposed to do with a
1201 	 * DS_MODE_STANDARD open, because it could be
1202 	 * inconsistent.  So this is a bit of a workaround...
1203 	 */
1204 	zc->zc_cookie = 0;
1205 	if (!zc->zc_objset_stats.dds_inconsistent)
1206 		if (dmu_objset_type(os) == DMU_OST_ZFS)
1207 			(void) zfs_get_version(os, &zc->zc_cookie);
1208 
1209 	dmu_objset_close(os);
1210 	return (0);
1211 }
1212 
1213 static int
1214 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
1215 {
1216 	objset_t *os;
1217 	int error;
1218 	char *p;
1219 
1220 retry:
1221 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
1222 	    DS_MODE_STANDARD | DS_MODE_READONLY, &os);
1223 	if (error != 0) {
1224 		/*
1225 		 * This is ugly: dmu_objset_open() can return EBUSY if
1226 		 * the objset is held exclusively. Fortunately this hold is
1227 		 * only for a short while, so we retry here.
1228 		 * This avoids user code having to handle EBUSY,
1229 		 * for example for a "zfs list".
1230 		 */
1231 		if (error == EBUSY) {
1232 			delay(1);
1233 			goto retry;
1234 		}
1235 		if (error == ENOENT)
1236 			error = ESRCH;
1237 		return (error);
1238 	}
1239 
1240 	p = strrchr(zc->zc_name, '/');
1241 	if (p == NULL || p[1] != '\0')
1242 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
1243 	p = zc->zc_name + strlen(zc->zc_name);
1244 
1245 	do {
1246 		error = dmu_dir_list_next(os,
1247 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
1248 		    NULL, &zc->zc_cookie);
1249 		if (error == ENOENT)
1250 			error = ESRCH;
1251 	} while (error == 0 && !INGLOBALZONE(curproc) &&
1252 	    !zone_dataset_visible(zc->zc_name, NULL));
1253 
1254 	/*
1255 	 * If it's a hidden dataset (ie. with a '$' in its name), don't
1256 	 * try to get stats for it.  Userland will skip over it.
1257 	 */
1258 	if (error == 0 && strchr(zc->zc_name, '$') == NULL)
1259 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1260 
1261 	dmu_objset_close(os);
1262 	return (error);
1263 }
1264 
1265 /*
1266  * inputs:
1267  * zc_name		name of filesystem
1268  * zc_cookie		zap cursor
1269  * zc_nvlist_dst_size	size of buffer for property nvlist
1270  *
1271  * outputs:
1272  * zc_name		name of next snapshot
1273  * zc_objset_stats	stats
1274  * zc_nvlist_dst	property nvlist
1275  * zc_nvlist_dst_size	size of property nvlist
1276  * zc_value		alternate root
1277  */
1278 static int
1279 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
1280 {
1281 	objset_t *os;
1282 	int error;
1283 
1284 retry:
1285 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
1286 	    DS_MODE_STANDARD | DS_MODE_READONLY, &os);
1287 	if (error != 0) {
1288 		/*
1289 		 * This is ugly: dmu_objset_open() can return EBUSY if
1290 		 * the objset is held exclusively. Fortunately this hold is
1291 		 * only for a short while, so we retry here.
1292 		 * This avoids user code having to handle EBUSY,
1293 		 * for example for a "zfs list".
1294 		 */
1295 		if (error == EBUSY) {
1296 			delay(1);
1297 			goto retry;
1298 		}
1299 		if (error == ENOENT)
1300 			error = ESRCH;
1301 		return (error);
1302 	}
1303 
1304 	/*
1305 	 * A dataset name of maximum length cannot have any snapshots,
1306 	 * so exit immediately.
1307 	 */
1308 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
1309 		dmu_objset_close(os);
1310 		return (ESRCH);
1311 	}
1312 
1313 	error = dmu_snapshot_list_next(os,
1314 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
1315 	    zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie);
1316 	if (error == ENOENT)
1317 		error = ESRCH;
1318 
1319 	if (error == 0)
1320 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1321 
1322 	/* if we failed, undo the @ that we tacked on to zc_name */
1323 	if (error != 0)
1324 		*strchr(zc->zc_name, '@') = '\0';
1325 
1326 	dmu_objset_close(os);
1327 	return (error);
1328 }
1329 
1330 static int
1331 zfs_set_prop_nvlist(const char *name, nvlist_t *nvl)
1332 {
1333 	nvpair_t *elem;
1334 	int error;
1335 	uint64_t intval;
1336 	char *strval;
1337 
1338 	/*
1339 	 * First validate permission to set all of the properties
1340 	 */
1341 	elem = NULL;
1342 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
1343 		const char *propname = nvpair_name(elem);
1344 		zfs_prop_t prop = zfs_name_to_prop(propname);
1345 
1346 		if (prop == ZPROP_INVAL) {
1347 			/*
1348 			 * If this is a user-defined property, it must be a
1349 			 * string, and there is no further validation to do.
1350 			 */
1351 			if (!zfs_prop_user(propname) ||
1352 			    nvpair_type(elem) != DATA_TYPE_STRING)
1353 				return (EINVAL);
1354 
1355 			if (error = zfs_secpolicy_write_perms(name,
1356 			    ZFS_DELEG_PERM_USERPROP, CRED()))
1357 				return (error);
1358 			continue;
1359 		}
1360 
1361 		if ((error = zfs_secpolicy_setprop(name, prop, CRED())) != 0)
1362 			return (error);
1363 
1364 		/*
1365 		 * Check that this value is valid for this pool version
1366 		 */
1367 		switch (prop) {
1368 		case ZFS_PROP_COMPRESSION:
1369 			/*
1370 			 * If the user specified gzip compression, make sure
1371 			 * the SPA supports it. We ignore any errors here since
1372 			 * we'll catch them later.
1373 			 */
1374 			if (nvpair_type(elem) == DATA_TYPE_UINT64 &&
1375 			    nvpair_value_uint64(elem, &intval) == 0 &&
1376 			    intval >= ZIO_COMPRESS_GZIP_1 &&
1377 			    intval <= ZIO_COMPRESS_GZIP_9) {
1378 				if (zfs_check_version(name,
1379 				    SPA_VERSION_GZIP_COMPRESSION))
1380 					return (ENOTSUP);
1381 			}
1382 			break;
1383 
1384 		case ZFS_PROP_COPIES:
1385 			if (zfs_check_version(name, SPA_VERSION_DITTO_BLOCKS))
1386 				return (ENOTSUP);
1387 			break;
1388 		case ZFS_PROP_NORMALIZE:
1389 		case ZFS_PROP_UTF8ONLY:
1390 		case ZFS_PROP_CASE:
1391 			if (zfs_check_version(name, SPA_VERSION_NORMALIZATION))
1392 				return (ENOTSUP);
1393 
1394 		}
1395 		if ((error = zfs_secpolicy_setprop(name, prop, CRED())) != 0)
1396 			return (error);
1397 	}
1398 
1399 	elem = NULL;
1400 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
1401 		const char *propname = nvpair_name(elem);
1402 		zfs_prop_t prop = zfs_name_to_prop(propname);
1403 
1404 		if (prop == ZPROP_INVAL) {
1405 			VERIFY(nvpair_value_string(elem, &strval) == 0);
1406 			error = dsl_prop_set(name, propname, 1,
1407 			    strlen(strval) + 1, strval);
1408 			if (error == 0)
1409 				continue;
1410 			else
1411 				return (error);
1412 		}
1413 
1414 		switch (prop) {
1415 		case ZFS_PROP_QUOTA:
1416 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1417 			    (error = dsl_dir_set_quota(name, intval)) != 0)
1418 				return (error);
1419 			break;
1420 
1421 		case ZFS_PROP_REFQUOTA:
1422 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1423 			    (error = dsl_dataset_set_quota(name, intval)) != 0)
1424 				return (error);
1425 			break;
1426 
1427 		case ZFS_PROP_RESERVATION:
1428 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1429 			    (error = dsl_dir_set_reservation(name,
1430 			    intval)) != 0)
1431 				return (error);
1432 			break;
1433 
1434 		case ZFS_PROP_REFRESERVATION:
1435 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1436 			    (error = dsl_dataset_set_reservation(name,
1437 			    intval)) != 0)
1438 				return (error);
1439 			break;
1440 
1441 		case ZFS_PROP_VOLSIZE:
1442 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1443 			    (error = zvol_set_volsize(name,
1444 			    ddi_driver_major(zfs_dip), intval)) != 0)
1445 				return (error);
1446 			break;
1447 
1448 		case ZFS_PROP_VOLBLOCKSIZE:
1449 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1450 			    (error = zvol_set_volblocksize(name, intval)) != 0)
1451 				return (error);
1452 			break;
1453 
1454 		case ZFS_PROP_VERSION:
1455 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1456 			    (error = zfs_set_version(name, intval)) != 0)
1457 				return (error);
1458 			break;
1459 
1460 		default:
1461 			if (nvpair_type(elem) == DATA_TYPE_STRING) {
1462 				if (zfs_prop_get_type(prop) !=
1463 				    PROP_TYPE_STRING)
1464 					return (EINVAL);
1465 				VERIFY(nvpair_value_string(elem, &strval) == 0);
1466 				if ((error = dsl_prop_set(name,
1467 				    nvpair_name(elem), 1, strlen(strval) + 1,
1468 				    strval)) != 0)
1469 					return (error);
1470 			} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
1471 				const char *unused;
1472 
1473 				VERIFY(nvpair_value_uint64(elem, &intval) == 0);
1474 
1475 				switch (zfs_prop_get_type(prop)) {
1476 				case PROP_TYPE_NUMBER:
1477 					break;
1478 				case PROP_TYPE_STRING:
1479 					return (EINVAL);
1480 				case PROP_TYPE_INDEX:
1481 					if (zfs_prop_index_to_string(prop,
1482 					    intval, &unused) != 0)
1483 						return (EINVAL);
1484 					break;
1485 				default:
1486 					cmn_err(CE_PANIC,
1487 					    "unknown property type");
1488 					break;
1489 				}
1490 
1491 				if ((error = dsl_prop_set(name, propname,
1492 				    8, 1, &intval)) != 0)
1493 					return (error);
1494 			} else {
1495 				return (EINVAL);
1496 			}
1497 			break;
1498 		}
1499 	}
1500 
1501 	return (0);
1502 }
1503 
1504 /*
1505  * inputs:
1506  * zc_name		name of filesystem
1507  * zc_value		name of property to inherit
1508  * zc_nvlist_src{_size}	nvlist of properties to apply
1509  *
1510  * outputs:		none
1511  */
1512 static int
1513 zfs_ioc_set_prop(zfs_cmd_t *zc)
1514 {
1515 	nvlist_t *nvl;
1516 	int error;
1517 
1518 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1519 	    &nvl)) != 0)
1520 		return (error);
1521 
1522 	error = zfs_set_prop_nvlist(zc->zc_name, nvl);
1523 
1524 	nvlist_free(nvl);
1525 	return (error);
1526 }
1527 
1528 /*
1529  * inputs:
1530  * zc_name		name of filesystem
1531  * zc_value		name of property to inherit
1532  *
1533  * outputs:		none
1534  */
1535 static int
1536 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
1537 {
1538 	/* the property name has been validated by zfs_secpolicy_inherit() */
1539 	return (dsl_prop_set(zc->zc_name, zc->zc_value, 0, 0, NULL));
1540 }
1541 
1542 static int
1543 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
1544 {
1545 	nvlist_t *props;
1546 	spa_t *spa;
1547 	int error;
1548 
1549 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1550 	    &props)))
1551 		return (error);
1552 
1553 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
1554 		nvlist_free(props);
1555 		return (error);
1556 	}
1557 
1558 	error = spa_prop_set(spa, props);
1559 
1560 	nvlist_free(props);
1561 	spa_close(spa, FTAG);
1562 
1563 	return (error);
1564 }
1565 
1566 static int
1567 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
1568 {
1569 	spa_t *spa;
1570 	int error;
1571 	nvlist_t *nvp = NULL;
1572 
1573 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1574 		return (error);
1575 
1576 	error = spa_prop_get(spa, &nvp);
1577 
1578 	if (error == 0 && zc->zc_nvlist_dst != NULL)
1579 		error = put_nvlist(zc, nvp);
1580 	else
1581 		error = EFAULT;
1582 
1583 	spa_close(spa, FTAG);
1584 
1585 	if (nvp)
1586 		nvlist_free(nvp);
1587 	return (error);
1588 }
1589 
1590 static int
1591 zfs_ioc_iscsi_perm_check(zfs_cmd_t *zc)
1592 {
1593 	nvlist_t *nvp;
1594 	int error;
1595 	uint32_t uid;
1596 	uint32_t gid;
1597 	uint32_t *groups;
1598 	uint_t group_cnt;
1599 	cred_t	*usercred;
1600 
1601 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1602 	    &nvp)) != 0) {
1603 		return (error);
1604 	}
1605 
1606 	if ((error = nvlist_lookup_uint32(nvp,
1607 	    ZFS_DELEG_PERM_UID, &uid)) != 0) {
1608 		nvlist_free(nvp);
1609 		return (EPERM);
1610 	}
1611 
1612 	if ((error = nvlist_lookup_uint32(nvp,
1613 	    ZFS_DELEG_PERM_GID, &gid)) != 0) {
1614 		nvlist_free(nvp);
1615 		return (EPERM);
1616 	}
1617 
1618 	if ((error = nvlist_lookup_uint32_array(nvp, ZFS_DELEG_PERM_GROUPS,
1619 	    &groups, &group_cnt)) != 0) {
1620 		nvlist_free(nvp);
1621 		return (EPERM);
1622 	}
1623 	usercred = cralloc();
1624 	if ((crsetugid(usercred, uid, gid) != 0) ||
1625 	    (crsetgroups(usercred, group_cnt, (gid_t *)groups) != 0)) {
1626 		nvlist_free(nvp);
1627 		crfree(usercred);
1628 		return (EPERM);
1629 	}
1630 	nvlist_free(nvp);
1631 	error = dsl_deleg_access(zc->zc_name,
1632 	    zfs_prop_to_name(ZFS_PROP_SHAREISCSI), usercred);
1633 	crfree(usercred);
1634 	return (error);
1635 }
1636 
1637 /*
1638  * inputs:
1639  * zc_name		name of filesystem
1640  * zc_nvlist_src{_size}	nvlist of delegated permissions
1641  * zc_perm_action	allow/unallow flag
1642  *
1643  * outputs:		none
1644  */
1645 static int
1646 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
1647 {
1648 	int error;
1649 	nvlist_t *fsaclnv = NULL;
1650 
1651 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1652 	    &fsaclnv)) != 0)
1653 		return (error);
1654 
1655 	/*
1656 	 * Verify nvlist is constructed correctly
1657 	 */
1658 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
1659 		nvlist_free(fsaclnv);
1660 		return (EINVAL);
1661 	}
1662 
1663 	/*
1664 	 * If we don't have PRIV_SYS_MOUNT, then validate
1665 	 * that user is allowed to hand out each permission in
1666 	 * the nvlist(s)
1667 	 */
1668 
1669 	error = secpolicy_zfs(CRED());
1670 	if (error) {
1671 		if (zc->zc_perm_action == B_FALSE) {
1672 			error = dsl_deleg_can_allow(zc->zc_name,
1673 			    fsaclnv, CRED());
1674 		} else {
1675 			error = dsl_deleg_can_unallow(zc->zc_name,
1676 			    fsaclnv, CRED());
1677 		}
1678 	}
1679 
1680 	if (error == 0)
1681 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
1682 
1683 	nvlist_free(fsaclnv);
1684 	return (error);
1685 }
1686 
1687 /*
1688  * inputs:
1689  * zc_name		name of filesystem
1690  *
1691  * outputs:
1692  * zc_nvlist_src{_size}	nvlist of delegated permissions
1693  */
1694 static int
1695 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
1696 {
1697 	nvlist_t *nvp;
1698 	int error;
1699 
1700 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
1701 		error = put_nvlist(zc, nvp);
1702 		nvlist_free(nvp);
1703 	}
1704 
1705 	return (error);
1706 }
1707 
1708 /*
1709  * inputs:
1710  * zc_name		name of volume
1711  *
1712  * outputs:		none
1713  */
1714 static int
1715 zfs_ioc_create_minor(zfs_cmd_t *zc)
1716 {
1717 	return (zvol_create_minor(zc->zc_name, ddi_driver_major(zfs_dip)));
1718 }
1719 
1720 /*
1721  * inputs:
1722  * zc_name		name of volume
1723  *
1724  * outputs:		none
1725  */
1726 static int
1727 zfs_ioc_remove_minor(zfs_cmd_t *zc)
1728 {
1729 	return (zvol_remove_minor(zc->zc_name));
1730 }
1731 
1732 /*
1733  * Search the vfs list for a specified resource.  Returns a pointer to it
1734  * or NULL if no suitable entry is found. The caller of this routine
1735  * is responsible for releasing the returned vfs pointer.
1736  */
1737 static vfs_t *
1738 zfs_get_vfs(const char *resource)
1739 {
1740 	struct vfs *vfsp;
1741 	struct vfs *vfs_found = NULL;
1742 
1743 	vfs_list_read_lock();
1744 	vfsp = rootvfs;
1745 	do {
1746 		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
1747 			VFS_HOLD(vfsp);
1748 			vfs_found = vfsp;
1749 			break;
1750 		}
1751 		vfsp = vfsp->vfs_next;
1752 	} while (vfsp != rootvfs);
1753 	vfs_list_unlock();
1754 	return (vfs_found);
1755 }
1756 
1757 /* ARGSUSED */
1758 static void
1759 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
1760 {
1761 	zfs_creat_t *zct = arg;
1762 	uint64_t version;
1763 
1764 	if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID)
1765 		version = ZPL_VERSION;
1766 	else
1767 		version = ZPL_VERSION_FUID - 1;
1768 
1769 	(void) nvlist_lookup_uint64(zct->zct_props,
1770 	    zfs_prop_to_name(ZFS_PROP_VERSION), &version);
1771 
1772 	zfs_create_fs(os, cr, version, zct->zct_norm, tx);
1773 }
1774 
1775 /*
1776  * zfs_prop_lookup()
1777  *
1778  * Look for the property first in the existing property nvlist.  If
1779  * it's already present, you're done.  If it's not there, attempt to
1780  * find the property value from a parent dataset.  If that fails, fall
1781  * back to the property's default value.  In either of these two
1782  * cases, if update is TRUE, add a value for the property to the
1783  * property nvlist.
1784  *
1785  * If the rval pointer is non-NULL, copy the discovered value to rval.
1786  *
1787  * If we get any unexpected errors, bail and return the error number
1788  * to the caller.
1789  *
1790  * If we succeed, return 0.
1791  */
1792 static int
1793 zfs_prop_lookup(const char *parentname, zfs_prop_t propnum,
1794     nvlist_t *proplist, uint64_t *rval, boolean_t update)
1795 {
1796 	const char *propname;
1797 	uint64_t value;
1798 	int error = ENOENT;
1799 
1800 	propname = zfs_prop_to_name(propnum);
1801 	if (proplist != NULL)
1802 		error = nvlist_lookup_uint64(proplist, propname, &value);
1803 	if (error == ENOENT) {
1804 		error = dsl_prop_get_integer(parentname, propname,
1805 		    &value, NULL);
1806 		if (error == ENOENT)
1807 			value = zfs_prop_default_numeric(propnum);
1808 		else if (error != 0)
1809 			return (error);
1810 		if (update) {
1811 			ASSERT(proplist != NULL);
1812 			error = nvlist_add_uint64(proplist, propname, value);
1813 		}
1814 	}
1815 	if (error == 0 && rval)
1816 		*rval = value;
1817 	return (error);
1818 }
1819 
1820 /*
1821  * zfs_normalization_get
1822  *
1823  * Get the normalization flag value.  If the properties have
1824  * non-default values, make sure the pool version is recent enough to
1825  * support these choices.
1826  */
1827 static int
1828 zfs_normalization_get(const char *dataset, nvlist_t *proplist, int *norm,
1829     boolean_t update)
1830 {
1831 	char parentname[MAXNAMELEN];
1832 	char poolname[MAXNAMELEN];
1833 	char *cp;
1834 	uint64_t value;
1835 	int check = 0;
1836 	int error;
1837 
1838 	ASSERT(norm != NULL);
1839 	*norm = 0;
1840 
1841 	(void) strncpy(parentname, dataset, sizeof (parentname));
1842 	cp = strrchr(parentname, '@');
1843 	if (cp != NULL) {
1844 		cp[0] = '\0';
1845 	} else {
1846 		cp = strrchr(parentname, '/');
1847 		if (cp == NULL)
1848 			return (ENOENT);
1849 		cp[0] = '\0';
1850 	}
1851 
1852 	(void) strncpy(poolname, dataset, sizeof (poolname));
1853 	cp = strchr(poolname, '/');
1854 	if (cp != NULL)
1855 		cp[0] = '\0';
1856 
1857 	/*
1858 	 * Make sure pool is of new enough vintage to support normalization.
1859 	 */
1860 	if (zfs_check_version(poolname, SPA_VERSION_NORMALIZATION))
1861 		return (0);
1862 
1863 	error = zfs_prop_lookup(parentname, ZFS_PROP_UTF8ONLY,
1864 	    proplist, &value, update);
1865 	if (error != 0)
1866 		return (error);
1867 	if (value != zfs_prop_default_numeric(ZFS_PROP_UTF8ONLY))
1868 		check = 1;
1869 
1870 	error = zfs_prop_lookup(parentname, ZFS_PROP_NORMALIZE,
1871 	    proplist, &value, update);
1872 	if (error != 0)
1873 		return (error);
1874 	if (value != zfs_prop_default_numeric(ZFS_PROP_NORMALIZE)) {
1875 		check = 1;
1876 		switch ((int)value) {
1877 		case ZFS_NORMALIZE_NONE:
1878 			break;
1879 		case ZFS_NORMALIZE_C:
1880 			*norm |= U8_TEXTPREP_NFC;
1881 			break;
1882 		case ZFS_NORMALIZE_D:
1883 			*norm |= U8_TEXTPREP_NFD;
1884 			break;
1885 		case ZFS_NORMALIZE_KC:
1886 			*norm |= U8_TEXTPREP_NFKC;
1887 			break;
1888 		case ZFS_NORMALIZE_KD:
1889 			*norm |= U8_TEXTPREP_NFKD;
1890 			break;
1891 		default:
1892 			ASSERT((int)value >= ZFS_NORMALIZE_NONE);
1893 			ASSERT((int)value <= ZFS_NORMALIZE_KD);
1894 			break;
1895 		}
1896 	}
1897 
1898 	error = zfs_prop_lookup(parentname, ZFS_PROP_CASE,
1899 	    proplist, &value, update);
1900 	if (error != 0)
1901 		return (error);
1902 	if (value != zfs_prop_default_numeric(ZFS_PROP_CASE)) {
1903 		check = 1;
1904 		switch ((int)value) {
1905 		case ZFS_CASE_SENSITIVE:
1906 			break;
1907 		case ZFS_CASE_INSENSITIVE:
1908 			*norm |= U8_TEXTPREP_TOUPPER;
1909 			break;
1910 		case ZFS_CASE_MIXED:
1911 			*norm |= U8_TEXTPREP_TOUPPER;
1912 			break;
1913 		default:
1914 			ASSERT((int)value >= ZFS_CASE_SENSITIVE);
1915 			ASSERT((int)value <= ZFS_CASE_MIXED);
1916 			break;
1917 		}
1918 	}
1919 
1920 	/*
1921 	 * At the moment we are disabling non-default values for these
1922 	 * properties because they cannot be preserved properly with a
1923 	 * zfs send.
1924 	 */
1925 	if (check == 1)
1926 		return (ENOTSUP);
1927 
1928 	return (0);
1929 }
1930 
1931 /*
1932  * inputs:
1933  * zc_objset_type	type of objset to create (fs vs zvol)
1934  * zc_name		name of new objset
1935  * zc_value		name of snapshot to clone from (may be empty)
1936  * zc_nvlist_src{_size}	nvlist of properties to apply
1937  *
1938  * outputs:		none
1939  */
1940 static int
1941 zfs_ioc_create(zfs_cmd_t *zc)
1942 {
1943 	objset_t *clone;
1944 	int error = 0;
1945 	zfs_creat_t zct;
1946 	nvlist_t *nvprops = NULL;
1947 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
1948 	dmu_objset_type_t type = zc->zc_objset_type;
1949 
1950 	switch (type) {
1951 
1952 	case DMU_OST_ZFS:
1953 		cbfunc = zfs_create_cb;
1954 		break;
1955 
1956 	case DMU_OST_ZVOL:
1957 		cbfunc = zvol_create_cb;
1958 		break;
1959 
1960 	default:
1961 		cbfunc = NULL;
1962 	}
1963 	if (strchr(zc->zc_name, '@') ||
1964 	    strchr(zc->zc_name, '%'))
1965 		return (EINVAL);
1966 
1967 	if (zc->zc_nvlist_src != NULL &&
1968 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1969 	    &nvprops)) != 0)
1970 		return (error);
1971 
1972 	zct.zct_norm = 0;
1973 	zct.zct_props = nvprops;
1974 
1975 	if (zc->zc_value[0] != '\0') {
1976 		/*
1977 		 * We're creating a clone of an existing snapshot.
1978 		 */
1979 		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
1980 		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
1981 			nvlist_free(nvprops);
1982 			return (EINVAL);
1983 		}
1984 
1985 		error = dmu_objset_open(zc->zc_value, type,
1986 		    DS_MODE_STANDARD | DS_MODE_READONLY, &clone);
1987 		if (error) {
1988 			nvlist_free(nvprops);
1989 			return (error);
1990 		}
1991 		error = dmu_objset_create(zc->zc_name, type, clone, NULL, NULL);
1992 		if (error) {
1993 			dmu_objset_close(clone);
1994 			nvlist_free(nvprops);
1995 			return (error);
1996 		}
1997 		/*
1998 		 * If caller did not provide any properties, allocate
1999 		 * an nvlist for properties, as we will be adding our set-once
2000 		 * properties to it.  This carries the choices made on the
2001 		 * original file system into the clone.
2002 		 */
2003 		if (nvprops == NULL)
2004 			VERIFY(nvlist_alloc(&nvprops,
2005 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
2006 
2007 		/*
2008 		 * We have to have normalization and case-folding
2009 		 * flags correct when we do the file system creation,
2010 		 * so go figure them out now.  All we really care about
2011 		 * here is getting these values into the property list.
2012 		 */
2013 		error = zfs_normalization_get(zc->zc_value, nvprops,
2014 		    &zct.zct_norm, B_TRUE);
2015 		if (error != 0) {
2016 			dmu_objset_close(clone);
2017 			nvlist_free(nvprops);
2018 			return (error);
2019 		}
2020 		dmu_objset_close(clone);
2021 	} else {
2022 		if (cbfunc == NULL) {
2023 			nvlist_free(nvprops);
2024 			return (EINVAL);
2025 		}
2026 
2027 		if (type == DMU_OST_ZVOL) {
2028 			uint64_t volsize, volblocksize;
2029 
2030 			if (nvprops == NULL ||
2031 			    nvlist_lookup_uint64(nvprops,
2032 			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
2033 			    &volsize) != 0) {
2034 				nvlist_free(nvprops);
2035 				return (EINVAL);
2036 			}
2037 
2038 			if ((error = nvlist_lookup_uint64(nvprops,
2039 			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
2040 			    &volblocksize)) != 0 && error != ENOENT) {
2041 				nvlist_free(nvprops);
2042 				return (EINVAL);
2043 			}
2044 
2045 			if (error != 0)
2046 				volblocksize = zfs_prop_default_numeric(
2047 				    ZFS_PROP_VOLBLOCKSIZE);
2048 
2049 			if ((error = zvol_check_volblocksize(
2050 			    volblocksize)) != 0 ||
2051 			    (error = zvol_check_volsize(volsize,
2052 			    volblocksize)) != 0) {
2053 				nvlist_free(nvprops);
2054 				return (error);
2055 			}
2056 		} else if (type == DMU_OST_ZFS) {
2057 			uint64_t version;
2058 			int error;
2059 
2060 			error = nvlist_lookup_uint64(nvprops,
2061 			    zfs_prop_to_name(ZFS_PROP_VERSION), &version);
2062 
2063 			if (error == 0 && (version < ZPL_VERSION_INITIAL ||
2064 			    version > ZPL_VERSION)) {
2065 				nvlist_free(nvprops);
2066 				return (ENOTSUP);
2067 			} else if (error == 0 && version >= ZPL_VERSION_FUID &&
2068 			    zfs_check_version(zc->zc_name, SPA_VERSION_FUID)) {
2069 				nvlist_free(nvprops);
2070 				return (ENOTSUP);
2071 			}
2072 
2073 			/*
2074 			 * We have to have normalization and
2075 			 * case-folding flags correct when we do the
2076 			 * file system creation, so go figure them out
2077 			 * now.  The final argument to zfs_normalization_get()
2078 			 * tells that routine not to update the nvprops
2079 			 * list.
2080 			 */
2081 			error = zfs_normalization_get(zc->zc_name, nvprops,
2082 			    &zct.zct_norm, B_FALSE);
2083 			if (error != 0) {
2084 				nvlist_free(nvprops);
2085 				return (error);
2086 			}
2087 		}
2088 		error = dmu_objset_create(zc->zc_name, type, NULL, cbfunc,
2089 		    &zct);
2090 	}
2091 
2092 	/*
2093 	 * It would be nice to do this atomically.
2094 	 */
2095 	if (error == 0) {
2096 		if ((error = zfs_set_prop_nvlist(zc->zc_name, nvprops)) != 0)
2097 			(void) dmu_objset_destroy(zc->zc_name);
2098 	}
2099 
2100 	nvlist_free(nvprops);
2101 	return (error);
2102 }
2103 
2104 /*
2105  * inputs:
2106  * zc_name	name of filesystem
2107  * zc_value	short name of snapshot
2108  * zc_cookie	recursive flag
2109  *
2110  * outputs:	none
2111  */
2112 static int
2113 zfs_ioc_snapshot(zfs_cmd_t *zc)
2114 {
2115 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2116 		return (EINVAL);
2117 	return (dmu_objset_snapshot(zc->zc_name,
2118 	    zc->zc_value, zc->zc_cookie));
2119 }
2120 
2121 int
2122 zfs_unmount_snap(char *name, void *arg)
2123 {
2124 	char *snapname = arg;
2125 	char *cp;
2126 	vfs_t *vfsp = NULL;
2127 
2128 	/*
2129 	 * Snapshots (which are under .zfs control) must be unmounted
2130 	 * before they can be destroyed.
2131 	 */
2132 
2133 	if (snapname) {
2134 		(void) strcat(name, "@");
2135 		(void) strcat(name, snapname);
2136 		vfsp = zfs_get_vfs(name);
2137 		cp = strchr(name, '@');
2138 		*cp = '\0';
2139 	} else if (strchr(name, '@')) {
2140 		vfsp = zfs_get_vfs(name);
2141 	}
2142 
2143 	if (vfsp) {
2144 		/*
2145 		 * Always force the unmount for snapshots.
2146 		 */
2147 		int flag = MS_FORCE;
2148 		int err;
2149 
2150 		if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
2151 			VFS_RELE(vfsp);
2152 			return (err);
2153 		}
2154 		VFS_RELE(vfsp);
2155 		if ((err = dounmount(vfsp, flag, kcred)) != 0)
2156 			return (err);
2157 	}
2158 	return (0);
2159 }
2160 
2161 /*
2162  * inputs:
2163  * zc_name	name of filesystem
2164  * zc_value	short name of snapshot
2165  *
2166  * outputs:	none
2167  */
2168 static int
2169 zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
2170 {
2171 	int err;
2172 
2173 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2174 		return (EINVAL);
2175 	err = dmu_objset_find(zc->zc_name,
2176 	    zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
2177 	if (err)
2178 		return (err);
2179 	return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value));
2180 }
2181 
2182 /*
2183  * inputs:
2184  * zc_name		name of dataset to destroy
2185  * zc_objset_type	type of objset
2186  *
2187  * outputs:		none
2188  */
2189 static int
2190 zfs_ioc_destroy(zfs_cmd_t *zc)
2191 {
2192 	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
2193 		int err = zfs_unmount_snap(zc->zc_name, NULL);
2194 		if (err)
2195 			return (err);
2196 	}
2197 
2198 	return (dmu_objset_destroy(zc->zc_name));
2199 }
2200 
2201 /*
2202  * inputs:
2203  * zc_name	name of dataset to rollback (to most recent snapshot)
2204  *
2205  * outputs:	none
2206  */
2207 static int
2208 zfs_ioc_rollback(zfs_cmd_t *zc)
2209 {
2210 	objset_t *os;
2211 	int error;
2212 	zfsvfs_t *zfsvfs = NULL;
2213 
2214 	/*
2215 	 * Get the zfsvfs for the receiving objset. There
2216 	 * won't be one if we're operating on a zvol, if the
2217 	 * objset doesn't exist yet, or is not mounted.
2218 	 */
2219 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
2220 	    DS_MODE_STANDARD, &os);
2221 	if (error)
2222 		return (error);
2223 
2224 	if (dmu_objset_type(os) == DMU_OST_ZFS) {
2225 		mutex_enter(&os->os->os_user_ptr_lock);
2226 		zfsvfs = dmu_objset_get_user(os);
2227 		if (zfsvfs != NULL)
2228 			VFS_HOLD(zfsvfs->z_vfs);
2229 		mutex_exit(&os->os->os_user_ptr_lock);
2230 	}
2231 
2232 	if (zfsvfs != NULL) {
2233 		char osname[MAXNAMELEN];
2234 		int mode;
2235 
2236 		VERIFY3U(0, ==, zfs_suspend_fs(zfsvfs, osname, &mode));
2237 		ASSERT(strcmp(osname, zc->zc_name) == 0);
2238 		error = dmu_objset_rollback(os);
2239 		VERIFY3U(0, ==, zfs_resume_fs(zfsvfs, osname, mode));
2240 
2241 		VFS_RELE(zfsvfs->z_vfs);
2242 	} else {
2243 		error = dmu_objset_rollback(os);
2244 	}
2245 	/* Note, the dmu_objset_rollback() closes the objset for us. */
2246 
2247 	return (error);
2248 }
2249 
2250 /*
2251  * inputs:
2252  * zc_name	old name of dataset
2253  * zc_value	new name of dataset
2254  * zc_cookie	recursive flag (only valid for snapshots)
2255  *
2256  * outputs:	none
2257  */
2258 static int
2259 zfs_ioc_rename(zfs_cmd_t *zc)
2260 {
2261 	boolean_t recursive = zc->zc_cookie & 1;
2262 
2263 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2264 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
2265 	    strchr(zc->zc_value, '%'))
2266 		return (EINVAL);
2267 
2268 	/*
2269 	 * Unmount snapshot unless we're doing a recursive rename,
2270 	 * in which case the dataset code figures out which snapshots
2271 	 * to unmount.
2272 	 */
2273 	if (!recursive && strchr(zc->zc_name, '@') != NULL &&
2274 	    zc->zc_objset_type == DMU_OST_ZFS) {
2275 		int err = zfs_unmount_snap(zc->zc_name, NULL);
2276 		if (err)
2277 			return (err);
2278 	}
2279 
2280 	return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
2281 }
2282 
2283 /*
2284  * inputs:
2285  * zc_name		name of containing filesystem
2286  * zc_nvlist_src{_size}	nvlist of properties to apply
2287  * zc_value		name of snapshot to create
2288  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
2289  * zc_cookie		file descriptor to recv from
2290  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
2291  * zc_guid		force flag
2292  *
2293  * outputs:
2294  * zc_cookie		number of bytes read
2295  */
2296 static int
2297 zfs_ioc_recv(zfs_cmd_t *zc)
2298 {
2299 	file_t *fp;
2300 	objset_t *os;
2301 	dmu_recv_cookie_t drc;
2302 	zfsvfs_t *zfsvfs = NULL;
2303 	boolean_t force = (boolean_t)zc->zc_guid;
2304 	int error, fd;
2305 	offset_t off;
2306 	nvlist_t *props = NULL;
2307 	objset_t *origin = NULL;
2308 	char *tosnap;
2309 	char tofs[ZFS_MAXNAMELEN];
2310 
2311 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
2312 	    strchr(zc->zc_value, '@') == NULL ||
2313 	    strchr(zc->zc_value, '%'))
2314 		return (EINVAL);
2315 
2316 	(void) strcpy(tofs, zc->zc_value);
2317 	tosnap = strchr(tofs, '@');
2318 	*tosnap = '\0';
2319 	tosnap++;
2320 
2321 	if (zc->zc_nvlist_src != NULL &&
2322 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2323 	    &props)) != 0)
2324 		return (error);
2325 
2326 	fd = zc->zc_cookie;
2327 	fp = getf(fd);
2328 	if (fp == NULL) {
2329 		nvlist_free(props);
2330 		return (EBADF);
2331 	}
2332 
2333 	/*
2334 	 * Get the zfsvfs for the receiving objset. There
2335 	 * won't be one if we're operating on a zvol, if the
2336 	 * objset doesn't exist yet, or is not mounted.
2337 	 */
2338 
2339 	error = dmu_objset_open(tofs, DMU_OST_ZFS,
2340 	    DS_MODE_STANDARD | DS_MODE_READONLY, &os);
2341 	if (!error) {
2342 		mutex_enter(&os->os->os_user_ptr_lock);
2343 		zfsvfs = dmu_objset_get_user(os);
2344 		if (zfsvfs != NULL)
2345 			VFS_HOLD(zfsvfs->z_vfs);
2346 		mutex_exit(&os->os->os_user_ptr_lock);
2347 		dmu_objset_close(os);
2348 	}
2349 
2350 	if (zc->zc_string[0]) {
2351 		error = dmu_objset_open(zc->zc_string, DMU_OST_ANY,
2352 		    DS_MODE_STANDARD | DS_MODE_READONLY, &origin);
2353 		if (error) {
2354 			if (zfsvfs != NULL)
2355 				VFS_RELE(zfsvfs->z_vfs);
2356 			nvlist_free(props);
2357 			releasef(fd);
2358 			return (error);
2359 		}
2360 	}
2361 
2362 	error = dmu_recv_begin(tofs, tosnap, &zc->zc_begin_record,
2363 	    force, origin, zfsvfs != NULL, &drc);
2364 	if (origin)
2365 		dmu_objset_close(origin);
2366 	if (error) {
2367 		if (zfsvfs != NULL)
2368 			VFS_RELE(zfsvfs->z_vfs);
2369 		nvlist_free(props);
2370 		releasef(fd);
2371 		return (error);
2372 	}
2373 
2374 	/*
2375 	 * If properties are supplied, they are to completely replace
2376 	 * the existing ones; "inherit" any existing properties.
2377 	 */
2378 	if (props) {
2379 		objset_t *os;
2380 		nvlist_t *nv = NULL;
2381 
2382 		error = dmu_objset_open(tofs, DMU_OST_ANY,
2383 		    DS_MODE_STANDARD | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
2384 		    &os);
2385 		if (error == 0) {
2386 			error = dsl_prop_get_all(os, &nv);
2387 			dmu_objset_close(os);
2388 		}
2389 		if (error == 0) {
2390 			nvpair_t *elem;
2391 			zfs_cmd_t *zc2;
2392 			zc2 = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
2393 
2394 			(void) strcpy(zc2->zc_name, tofs);
2395 			for (elem = nvlist_next_nvpair(nv, NULL); elem;
2396 			    elem = nvlist_next_nvpair(nv, elem)) {
2397 				(void) strcpy(zc2->zc_value, nvpair_name(elem));
2398 				if (zfs_secpolicy_inherit(zc2, CRED()) == 0)
2399 					(void) zfs_ioc_inherit_prop(zc2);
2400 			}
2401 			kmem_free(zc2, sizeof (zfs_cmd_t));
2402 		}
2403 		if (nv)
2404 			nvlist_free(nv);
2405 	}
2406 
2407 	/*
2408 	 * Set properties.  Note, we ignore errors.  Would be better to
2409 	 * do best-effort in zfs_set_prop_nvlist, too.
2410 	 */
2411 	(void) zfs_set_prop_nvlist(tofs, props);
2412 	nvlist_free(props);
2413 
2414 	off = fp->f_offset;
2415 	error = dmu_recv_stream(&drc, fp->f_vnode, &off);
2416 
2417 	if (error == 0) {
2418 		if (zfsvfs != NULL) {
2419 			char osname[MAXNAMELEN];
2420 			int mode;
2421 
2422 			(void) zfs_suspend_fs(zfsvfs, osname, &mode);
2423 			error = dmu_recv_end(&drc);
2424 			error |= zfs_resume_fs(zfsvfs, osname, mode);
2425 		} else {
2426 			error = dmu_recv_end(&drc);
2427 		}
2428 	}
2429 	if (zfsvfs != NULL)
2430 		VFS_RELE(zfsvfs->z_vfs);
2431 
2432 	zc->zc_cookie = off - fp->f_offset;
2433 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
2434 		fp->f_offset = off;
2435 
2436 	releasef(fd);
2437 	return (error);
2438 }
2439 
2440 /*
2441  * inputs:
2442  * zc_name	name of snapshot to send
2443  * zc_value	short name of incremental fromsnap (may be empty)
2444  * zc_cookie	file descriptor to send stream to
2445  * zc_obj	fromorigin flag (mutually exclusive with zc_value)
2446  *
2447  * outputs: none
2448  */
2449 static int
2450 zfs_ioc_send(zfs_cmd_t *zc)
2451 {
2452 	objset_t *fromsnap = NULL;
2453 	objset_t *tosnap;
2454 	file_t *fp;
2455 	int error;
2456 	offset_t off;
2457 
2458 	error = dmu_objset_open(zc->zc_name, DMU_OST_ANY,
2459 	    DS_MODE_STANDARD | DS_MODE_READONLY, &tosnap);
2460 	if (error)
2461 		return (error);
2462 
2463 	if (zc->zc_value[0] != '\0') {
2464 		char buf[MAXPATHLEN];
2465 		char *cp;
2466 
2467 		(void) strncpy(buf, zc->zc_name, sizeof (buf));
2468 		cp = strchr(buf, '@');
2469 		if (cp)
2470 			*(cp+1) = 0;
2471 		(void) strncat(buf, zc->zc_value, sizeof (buf));
2472 		error = dmu_objset_open(buf, DMU_OST_ANY,
2473 		    DS_MODE_STANDARD | DS_MODE_READONLY, &fromsnap);
2474 		if (error) {
2475 			dmu_objset_close(tosnap);
2476 			return (error);
2477 		}
2478 	}
2479 
2480 	fp = getf(zc->zc_cookie);
2481 	if (fp == NULL) {
2482 		dmu_objset_close(tosnap);
2483 		if (fromsnap)
2484 			dmu_objset_close(fromsnap);
2485 		return (EBADF);
2486 	}
2487 
2488 	off = fp->f_offset;
2489 	error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off);
2490 
2491 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
2492 		fp->f_offset = off;
2493 	releasef(zc->zc_cookie);
2494 	if (fromsnap)
2495 		dmu_objset_close(fromsnap);
2496 	dmu_objset_close(tosnap);
2497 	return (error);
2498 }
2499 
2500 static int
2501 zfs_ioc_inject_fault(zfs_cmd_t *zc)
2502 {
2503 	int id, error;
2504 
2505 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
2506 	    &zc->zc_inject_record);
2507 
2508 	if (error == 0)
2509 		zc->zc_guid = (uint64_t)id;
2510 
2511 	return (error);
2512 }
2513 
2514 static int
2515 zfs_ioc_clear_fault(zfs_cmd_t *zc)
2516 {
2517 	return (zio_clear_fault((int)zc->zc_guid));
2518 }
2519 
2520 static int
2521 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
2522 {
2523 	int id = (int)zc->zc_guid;
2524 	int error;
2525 
2526 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
2527 	    &zc->zc_inject_record);
2528 
2529 	zc->zc_guid = id;
2530 
2531 	return (error);
2532 }
2533 
2534 static int
2535 zfs_ioc_error_log(zfs_cmd_t *zc)
2536 {
2537 	spa_t *spa;
2538 	int error;
2539 	size_t count = (size_t)zc->zc_nvlist_dst_size;
2540 
2541 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2542 		return (error);
2543 
2544 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
2545 	    &count);
2546 	if (error == 0)
2547 		zc->zc_nvlist_dst_size = count;
2548 	else
2549 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
2550 
2551 	spa_close(spa, FTAG);
2552 
2553 	return (error);
2554 }
2555 
2556 static int
2557 zfs_ioc_clear(zfs_cmd_t *zc)
2558 {
2559 	spa_t *spa;
2560 	vdev_t *vd;
2561 	uint64_t txg;
2562 	int error;
2563 
2564 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2565 		return (error);
2566 
2567 	/*
2568 	 * Try to resume any I/Os which may have been suspended
2569 	 * as a result of a complete pool failure.
2570 	 */
2571 	if (!list_is_empty(&spa->spa_zio_list)) {
2572 		if (zio_vdev_resume_io(spa) != 0) {
2573 			spa_close(spa, FTAG);
2574 			return (EIO);
2575 		}
2576 	}
2577 
2578 	txg = spa_vdev_enter(spa);
2579 
2580 	if (zc->zc_guid == 0) {
2581 		vd = NULL;
2582 	} else if ((vd = spa_lookup_by_guid(spa, zc->zc_guid)) == NULL) {
2583 		spa_aux_vdev_t *sav;
2584 		int i;
2585 
2586 		/*
2587 		 * Check if this is an l2cache device.
2588 		 */
2589 		ASSERT(spa != NULL);
2590 		sav = &spa->spa_l2cache;
2591 		for (i = 0; i < sav->sav_count; i++) {
2592 			if (sav->sav_vdevs[i]->vdev_guid == zc->zc_guid) {
2593 				vd = sav->sav_vdevs[i];
2594 				break;
2595 			}
2596 		}
2597 
2598 		if (vd == NULL) {
2599 			(void) spa_vdev_exit(spa, NULL, txg, ENODEV);
2600 			spa_close(spa, FTAG);
2601 			return (ENODEV);
2602 		}
2603 	}
2604 
2605 	vdev_clear(spa, vd, B_TRUE);
2606 
2607 	(void) spa_vdev_exit(spa, NULL, txg, 0);
2608 
2609 	spa_close(spa, FTAG);
2610 
2611 	return (0);
2612 }
2613 
2614 /*
2615  * inputs:
2616  * zc_name	name of filesystem
2617  * zc_value	name of origin snapshot
2618  *
2619  * outputs:	none
2620  */
2621 static int
2622 zfs_ioc_promote(zfs_cmd_t *zc)
2623 {
2624 	char *cp;
2625 
2626 	/*
2627 	 * We don't need to unmount *all* the origin fs's snapshots, but
2628 	 * it's easier.
2629 	 */
2630 	cp = strchr(zc->zc_value, '@');
2631 	if (cp)
2632 		*cp = '\0';
2633 	(void) dmu_objset_find(zc->zc_value,
2634 	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
2635 	return (dsl_dataset_promote(zc->zc_name));
2636 }
2637 
2638 /*
2639  * We don't want to have a hard dependency
2640  * against some special symbols in sharefs
2641  * nfs, and smbsrv.  Determine them if needed when
2642  * the first file system is shared.
2643  * Neither sharefs, nfs or smbsrv are unloadable modules.
2644  */
2645 int (*znfsexport_fs)(void *arg);
2646 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
2647 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
2648 
2649 int zfs_nfsshare_inited;
2650 int zfs_smbshare_inited;
2651 
2652 ddi_modhandle_t nfs_mod;
2653 ddi_modhandle_t sharefs_mod;
2654 ddi_modhandle_t smbsrv_mod;
2655 kmutex_t zfs_share_lock;
2656 
2657 static int
2658 zfs_init_sharefs()
2659 {
2660 	int error;
2661 
2662 	ASSERT(MUTEX_HELD(&zfs_share_lock));
2663 	/* Both NFS and SMB shares also require sharetab support. */
2664 	if (sharefs_mod == NULL && ((sharefs_mod =
2665 	    ddi_modopen("fs/sharefs",
2666 	    KRTLD_MODE_FIRST, &error)) == NULL)) {
2667 		return (ENOSYS);
2668 	}
2669 	if (zshare_fs == NULL && ((zshare_fs =
2670 	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
2671 	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
2672 		return (ENOSYS);
2673 	}
2674 	return (0);
2675 }
2676 
2677 static int
2678 zfs_ioc_share(zfs_cmd_t *zc)
2679 {
2680 	int error;
2681 	int opcode;
2682 
2683 	switch (zc->zc_share.z_sharetype) {
2684 	case ZFS_SHARE_NFS:
2685 	case ZFS_UNSHARE_NFS:
2686 		if (zfs_nfsshare_inited == 0) {
2687 			mutex_enter(&zfs_share_lock);
2688 			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
2689 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
2690 				mutex_exit(&zfs_share_lock);
2691 				return (ENOSYS);
2692 			}
2693 			if (znfsexport_fs == NULL &&
2694 			    ((znfsexport_fs = (int (*)(void *))
2695 			    ddi_modsym(nfs_mod,
2696 			    "nfs_export", &error)) == NULL)) {
2697 				mutex_exit(&zfs_share_lock);
2698 				return (ENOSYS);
2699 			}
2700 			error = zfs_init_sharefs();
2701 			if (error) {
2702 				mutex_exit(&zfs_share_lock);
2703 				return (ENOSYS);
2704 			}
2705 			zfs_nfsshare_inited = 1;
2706 			mutex_exit(&zfs_share_lock);
2707 		}
2708 		break;
2709 	case ZFS_SHARE_SMB:
2710 	case ZFS_UNSHARE_SMB:
2711 		if (zfs_smbshare_inited == 0) {
2712 			mutex_enter(&zfs_share_lock);
2713 			if (smbsrv_mod == NULL && ((smbsrv_mod =
2714 			    ddi_modopen("drv/smbsrv",
2715 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
2716 				mutex_exit(&zfs_share_lock);
2717 				return (ENOSYS);
2718 			}
2719 			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
2720 			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
2721 			    "lmshrd_share_upcall", &error)) == NULL)) {
2722 				mutex_exit(&zfs_share_lock);
2723 				return (ENOSYS);
2724 			}
2725 			error = zfs_init_sharefs();
2726 			if (error) {
2727 				mutex_exit(&zfs_share_lock);
2728 				return (ENOSYS);
2729 			}
2730 			zfs_smbshare_inited = 1;
2731 			mutex_exit(&zfs_share_lock);
2732 		}
2733 		break;
2734 	default:
2735 		return (EINVAL);
2736 	}
2737 
2738 	switch (zc->zc_share.z_sharetype) {
2739 	case ZFS_SHARE_NFS:
2740 	case ZFS_UNSHARE_NFS:
2741 		if (error =
2742 		    znfsexport_fs((void *)
2743 		    (uintptr_t)zc->zc_share.z_exportdata))
2744 			return (error);
2745 		break;
2746 	case ZFS_SHARE_SMB:
2747 	case ZFS_UNSHARE_SMB:
2748 		if (error = zsmbexport_fs((void *)
2749 		    (uintptr_t)zc->zc_share.z_exportdata,
2750 		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
2751 		    B_TRUE : B_FALSE)) {
2752 			return (error);
2753 		}
2754 		break;
2755 	}
2756 
2757 	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
2758 	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
2759 	    SHAREFS_ADD : SHAREFS_REMOVE;
2760 
2761 	/*
2762 	 * Add or remove share from sharetab
2763 	 */
2764 	error = zshare_fs(opcode,
2765 	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
2766 	    zc->zc_share.z_sharemax);
2767 
2768 	return (error);
2769 
2770 }
2771 
2772 /*
2773  * pool create, destroy, and export don't log the history as part of
2774  * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
2775  * do the logging of those commands.
2776  */
2777 static zfs_ioc_vec_t zfs_ioc_vec[] = {
2778 	{ zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE },
2779 	{ zfs_ioc_pool_destroy,	zfs_secpolicy_config, POOL_NAME, B_FALSE },
2780 	{ zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2781 	{ zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE },
2782 	{ zfs_ioc_pool_configs,	zfs_secpolicy_none, NO_NAME, B_FALSE },
2783 	{ zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE },
2784 	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE },
2785 	{ zfs_ioc_pool_scrub, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2786 	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE },
2787 	{ zfs_ioc_pool_upgrade,	zfs_secpolicy_config, POOL_NAME, B_TRUE },
2788 	{ zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE },
2789 	{ zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2790 	{ zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2791 	{ zfs_ioc_vdev_set_state, zfs_secpolicy_config,	POOL_NAME, B_TRUE },
2792 	{ zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2793 	{ zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2794 	{ zfs_ioc_vdev_setpath,	zfs_secpolicy_config, POOL_NAME, B_FALSE },
2795 	{ zfs_ioc_objset_stats,	zfs_secpolicy_read, DATASET_NAME, B_FALSE },
2796 	{ zfs_ioc_objset_version, zfs_secpolicy_read, DATASET_NAME, B_FALSE },
2797 	{ zfs_ioc_dataset_list_next, zfs_secpolicy_read,
2798 	    DATASET_NAME, B_FALSE },
2799 	{ zfs_ioc_snapshot_list_next, zfs_secpolicy_read,
2800 	    DATASET_NAME, B_FALSE },
2801 	{ zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE },
2802 	{ zfs_ioc_create_minor,	zfs_secpolicy_minor, DATASET_NAME, B_FALSE },
2803 	{ zfs_ioc_remove_minor,	zfs_secpolicy_minor, DATASET_NAME, B_FALSE },
2804 	{ zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE },
2805 	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE },
2806 	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE },
2807 	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE },
2808 	{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE },
2809 	{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE },
2810 	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE },
2811 	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE },
2812 	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE },
2813 	{ zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE },
2814 	{ zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE },
2815 	{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE },
2816 	{ zfs_ioc_destroy_snaps, zfs_secpolicy_destroy,	DATASET_NAME, B_TRUE },
2817 	{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE },
2818 	{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_config, POOL_NAME, B_FALSE },
2819 	{ zfs_ioc_obj_to_path, zfs_secpolicy_config, NO_NAME, B_FALSE },
2820 	{ zfs_ioc_pool_set_props, zfs_secpolicy_config,	POOL_NAME, B_TRUE },
2821 	{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE },
2822 	{ zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE },
2823 	{ zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE },
2824 	{ zfs_ioc_iscsi_perm_check, zfs_secpolicy_iscsi,
2825 	    DATASET_NAME, B_FALSE },
2826 	{ zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE },
2827 	{ zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE },
2828 };
2829 
2830 static int
2831 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
2832 {
2833 	zfs_cmd_t *zc;
2834 	uint_t vec;
2835 	int error, rc;
2836 
2837 	if (getminor(dev) != 0)
2838 		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
2839 
2840 	vec = cmd - ZFS_IOC;
2841 	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
2842 
2843 	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
2844 		return (EINVAL);
2845 
2846 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2847 
2848 	error = xcopyin((void *)arg, zc, sizeof (zfs_cmd_t));
2849 
2850 	if (error == 0)
2851 		error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr);
2852 
2853 	/*
2854 	 * Ensure that all pool/dataset names are valid before we pass down to
2855 	 * the lower layers.
2856 	 */
2857 	if (error == 0) {
2858 		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
2859 		switch (zfs_ioc_vec[vec].zvec_namecheck) {
2860 		case POOL_NAME:
2861 			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
2862 				error = EINVAL;
2863 			break;
2864 
2865 		case DATASET_NAME:
2866 			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
2867 				error = EINVAL;
2868 			break;
2869 
2870 		case NO_NAME:
2871 			break;
2872 		}
2873 	}
2874 
2875 	if (error == 0)
2876 		error = zfs_ioc_vec[vec].zvec_func(zc);
2877 
2878 	rc = xcopyout(zc, (void *)arg, sizeof (zfs_cmd_t));
2879 	if (error == 0) {
2880 		error = rc;
2881 		if (zfs_ioc_vec[vec].zvec_his_log == B_TRUE)
2882 			zfs_log_history(zc);
2883 	}
2884 
2885 	kmem_free(zc, sizeof (zfs_cmd_t));
2886 	return (error);
2887 }
2888 
2889 static int
2890 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
2891 {
2892 	if (cmd != DDI_ATTACH)
2893 		return (DDI_FAILURE);
2894 
2895 	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
2896 	    DDI_PSEUDO, 0) == DDI_FAILURE)
2897 		return (DDI_FAILURE);
2898 
2899 	zfs_dip = dip;
2900 
2901 	ddi_report_dev(dip);
2902 
2903 	return (DDI_SUCCESS);
2904 }
2905 
2906 static int
2907 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
2908 {
2909 	if (spa_busy() || zfs_busy() || zvol_busy())
2910 		return (DDI_FAILURE);
2911 
2912 	if (cmd != DDI_DETACH)
2913 		return (DDI_FAILURE);
2914 
2915 	zfs_dip = NULL;
2916 
2917 	ddi_prop_remove_all(dip);
2918 	ddi_remove_minor_node(dip, NULL);
2919 
2920 	return (DDI_SUCCESS);
2921 }
2922 
2923 /*ARGSUSED*/
2924 static int
2925 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2926 {
2927 	switch (infocmd) {
2928 	case DDI_INFO_DEVT2DEVINFO:
2929 		*result = zfs_dip;
2930 		return (DDI_SUCCESS);
2931 
2932 	case DDI_INFO_DEVT2INSTANCE:
2933 		*result = (void *)0;
2934 		return (DDI_SUCCESS);
2935 	}
2936 
2937 	return (DDI_FAILURE);
2938 }
2939 
2940 /*
2941  * OK, so this is a little weird.
2942  *
2943  * /dev/zfs is the control node, i.e. minor 0.
2944  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
2945  *
2946  * /dev/zfs has basically nothing to do except serve up ioctls,
2947  * so most of the standard driver entry points are in zvol.c.
2948  */
2949 static struct cb_ops zfs_cb_ops = {
2950 	zvol_open,	/* open */
2951 	zvol_close,	/* close */
2952 	zvol_strategy,	/* strategy */
2953 	nodev,		/* print */
2954 	nodev,		/* dump */
2955 	zvol_read,	/* read */
2956 	zvol_write,	/* write */
2957 	zfsdev_ioctl,	/* ioctl */
2958 	nodev,		/* devmap */
2959 	nodev,		/* mmap */
2960 	nodev,		/* segmap */
2961 	nochpoll,	/* poll */
2962 	ddi_prop_op,	/* prop_op */
2963 	NULL,		/* streamtab */
2964 	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
2965 	CB_REV,		/* version */
2966 	nodev,		/* async read */
2967 	nodev,		/* async write */
2968 };
2969 
2970 static struct dev_ops zfs_dev_ops = {
2971 	DEVO_REV,	/* version */
2972 	0,		/* refcnt */
2973 	zfs_info,	/* info */
2974 	nulldev,	/* identify */
2975 	nulldev,	/* probe */
2976 	zfs_attach,	/* attach */
2977 	zfs_detach,	/* detach */
2978 	nodev,		/* reset */
2979 	&zfs_cb_ops,	/* driver operations */
2980 	NULL		/* no bus operations */
2981 };
2982 
2983 static struct modldrv zfs_modldrv = {
2984 	&mod_driverops, "ZFS storage pool version " SPA_VERSION_STRING,
2985 	    &zfs_dev_ops
2986 };
2987 
2988 static struct modlinkage modlinkage = {
2989 	MODREV_1,
2990 	(void *)&zfs_modlfs,
2991 	(void *)&zfs_modldrv,
2992 	NULL
2993 };
2994 
2995 
2996 uint_t zfs_fsyncer_key;
2997 extern uint_t rrw_tsd_key;
2998 
2999 int
3000 _init(void)
3001 {
3002 	int error;
3003 
3004 	spa_init(FREAD | FWRITE);
3005 	zfs_init();
3006 	zvol_init();
3007 
3008 	if ((error = mod_install(&modlinkage)) != 0) {
3009 		zvol_fini();
3010 		zfs_fini();
3011 		spa_fini();
3012 		return (error);
3013 	}
3014 
3015 	tsd_create(&zfs_fsyncer_key, NULL);
3016 	tsd_create(&rrw_tsd_key, NULL);
3017 
3018 	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
3019 	ASSERT(error == 0);
3020 	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
3021 
3022 	return (0);
3023 }
3024 
3025 int
3026 _fini(void)
3027 {
3028 	int error;
3029 
3030 	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
3031 		return (EBUSY);
3032 
3033 	if ((error = mod_remove(&modlinkage)) != 0)
3034 		return (error);
3035 
3036 	zvol_fini();
3037 	zfs_fini();
3038 	spa_fini();
3039 	if (zfs_nfsshare_inited)
3040 		(void) ddi_modclose(nfs_mod);
3041 	if (zfs_smbshare_inited)
3042 		(void) ddi_modclose(smbsrv_mod);
3043 	if (zfs_nfsshare_inited || zfs_smbshare_inited)
3044 		(void) ddi_modclose(sharefs_mod);
3045 
3046 	tsd_destroy(&zfs_fsyncer_key);
3047 	ldi_ident_release(zfs_li);
3048 	zfs_li = NULL;
3049 	mutex_destroy(&zfs_share_lock);
3050 
3051 	return (error);
3052 }
3053 
3054 int
3055 _info(struct modinfo *modinfop)
3056 {
3057 	return (mod_info(&modlinkage, modinfop));
3058 }
3059