xref: /titanic_51/usr/src/uts/common/fs/zfs/zfs_ioctl.c (revision 21ad40f5447a73ac8a7ed2b9b66dd73ff1b088c1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Portions Copyright 2011 Martin Matuska
25  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
26  * Copyright (c) 2012 by Delphix. All rights reserved.
27  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/errno.h>
33 #include <sys/uio.h>
34 #include <sys/buf.h>
35 #include <sys/modctl.h>
36 #include <sys/open.h>
37 #include <sys/file.h>
38 #include <sys/kmem.h>
39 #include <sys/conf.h>
40 #include <sys/cmn_err.h>
41 #include <sys/stat.h>
42 #include <sys/zfs_ioctl.h>
43 #include <sys/zfs_vfsops.h>
44 #include <sys/zfs_znode.h>
45 #include <sys/zap.h>
46 #include <sys/spa.h>
47 #include <sys/spa_impl.h>
48 #include <sys/vdev.h>
49 #include <sys/priv_impl.h>
50 #include <sys/dmu.h>
51 #include <sys/dsl_dir.h>
52 #include <sys/dsl_dataset.h>
53 #include <sys/dsl_prop.h>
54 #include <sys/dsl_deleg.h>
55 #include <sys/dmu_objset.h>
56 #include <sys/dmu_impl.h>
57 #include <sys/ddi.h>
58 #include <sys/sunddi.h>
59 #include <sys/sunldi.h>
60 #include <sys/policy.h>
61 #include <sys/zone.h>
62 #include <sys/nvpair.h>
63 #include <sys/pathname.h>
64 #include <sys/mount.h>
65 #include <sys/sdt.h>
66 #include <sys/fs/zfs.h>
67 #include <sys/zfs_ctldir.h>
68 #include <sys/zfs_dir.h>
69 #include <sys/zfs_onexit.h>
70 #include <sys/zvol.h>
71 #include <sys/dsl_scan.h>
72 #include <sharefs/share.h>
73 #include <sys/dmu_objset.h>
74 
75 #include "zfs_namecheck.h"
76 #include "zfs_prop.h"
77 #include "zfs_deleg.h"
78 #include "zfs_comutil.h"
79 
80 extern struct modlfs zfs_modlfs;
81 
82 extern void zfs_init(void);
83 extern void zfs_fini(void);
84 
85 ldi_ident_t zfs_li = NULL;
86 dev_info_t *zfs_dip;
87 
88 typedef int zfs_ioc_func_t(zfs_cmd_t *);
89 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
90 
91 typedef enum {
92 	NO_NAME,
93 	POOL_NAME,
94 	DATASET_NAME
95 } zfs_ioc_namecheck_t;
96 
97 typedef enum {
98 	POOL_CHECK_NONE		= 1 << 0,
99 	POOL_CHECK_SUSPENDED	= 1 << 1,
100 	POOL_CHECK_READONLY	= 1 << 2
101 } zfs_ioc_poolcheck_t;
102 
103 typedef struct zfs_ioc_vec {
104 	zfs_ioc_func_t		*zvec_func;
105 	zfs_secpolicy_func_t	*zvec_secpolicy;
106 	zfs_ioc_namecheck_t	zvec_namecheck;
107 	boolean_t		zvec_his_log;
108 	zfs_ioc_poolcheck_t	zvec_pool_check;
109 } zfs_ioc_vec_t;
110 
111 /* This array is indexed by zfs_userquota_prop_t */
112 static const char *userquota_perms[] = {
113 	ZFS_DELEG_PERM_USERUSED,
114 	ZFS_DELEG_PERM_USERQUOTA,
115 	ZFS_DELEG_PERM_GROUPUSED,
116 	ZFS_DELEG_PERM_GROUPQUOTA,
117 };
118 
119 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
120 static int zfs_check_settable(const char *name, nvpair_t *property,
121     cred_t *cr);
122 static int zfs_check_clearable(char *dataset, nvlist_t *props,
123     nvlist_t **errors);
124 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
125     boolean_t *);
126 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **);
127 
128 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
129 void
130 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
131 {
132 	const char *newfile;
133 	char buf[512];
134 	va_list adx;
135 
136 	/*
137 	 * Get rid of annoying "../common/" prefix to filename.
138 	 */
139 	newfile = strrchr(file, '/');
140 	if (newfile != NULL) {
141 		newfile = newfile + 1; /* Get rid of leading / */
142 	} else {
143 		newfile = file;
144 	}
145 
146 	va_start(adx, fmt);
147 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
148 	va_end(adx);
149 
150 	/*
151 	 * To get this data, use the zfs-dprintf probe as so:
152 	 * dtrace -q -n 'zfs-dprintf \
153 	 *	/stringof(arg0) == "dbuf.c"/ \
154 	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
155 	 * arg0 = file name
156 	 * arg1 = function name
157 	 * arg2 = line number
158 	 * arg3 = message
159 	 */
160 	DTRACE_PROBE4(zfs__dprintf,
161 	    char *, newfile, char *, func, int, line, char *, buf);
162 }
163 
164 static void
165 history_str_free(char *buf)
166 {
167 	kmem_free(buf, HIS_MAX_RECORD_LEN);
168 }
169 
170 static char *
171 history_str_get(zfs_cmd_t *zc)
172 {
173 	char *buf;
174 
175 	if (zc->zc_history == NULL)
176 		return (NULL);
177 
178 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
179 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
180 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
181 		history_str_free(buf);
182 		return (NULL);
183 	}
184 
185 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
186 
187 	return (buf);
188 }
189 
190 /*
191  * Check to see if the named dataset is currently defined as bootable
192  */
193 static boolean_t
194 zfs_is_bootfs(const char *name)
195 {
196 	objset_t *os;
197 
198 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
199 		boolean_t ret;
200 		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
201 		dmu_objset_rele(os, FTAG);
202 		return (ret);
203 	}
204 	return (B_FALSE);
205 }
206 
207 /*
208  * zfs_earlier_version
209  *
210  *	Return non-zero if the spa version is less than requested version.
211  */
212 static int
213 zfs_earlier_version(const char *name, int version)
214 {
215 	spa_t *spa;
216 
217 	if (spa_open(name, &spa, FTAG) == 0) {
218 		if (spa_version(spa) < version) {
219 			spa_close(spa, FTAG);
220 			return (1);
221 		}
222 		spa_close(spa, FTAG);
223 	}
224 	return (0);
225 }
226 
227 /*
228  * zpl_earlier_version
229  *
230  * Return TRUE if the ZPL version is less than requested version.
231  */
232 static boolean_t
233 zpl_earlier_version(const char *name, int version)
234 {
235 	objset_t *os;
236 	boolean_t rc = B_TRUE;
237 
238 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
239 		uint64_t zplversion;
240 
241 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
242 			dmu_objset_rele(os, FTAG);
243 			return (B_TRUE);
244 		}
245 		/* XXX reading from non-owned objset */
246 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
247 			rc = zplversion < version;
248 		dmu_objset_rele(os, FTAG);
249 	}
250 	return (rc);
251 }
252 
253 static void
254 zfs_log_history(zfs_cmd_t *zc)
255 {
256 	spa_t *spa;
257 	char *buf;
258 
259 	if ((buf = history_str_get(zc)) == NULL)
260 		return;
261 
262 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
263 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
264 			(void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
265 		spa_close(spa, FTAG);
266 	}
267 	history_str_free(buf);
268 }
269 
270 /*
271  * Policy for top-level read operations (list pools).  Requires no privileges,
272  * and can be used in the local zone, as there is no associated dataset.
273  */
274 /* ARGSUSED */
275 static int
276 zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
277 {
278 	return (0);
279 }
280 
281 /*
282  * Policy for dataset read operations (list children, get statistics).  Requires
283  * no privileges, but must be visible in the local zone.
284  */
285 /* ARGSUSED */
286 static int
287 zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
288 {
289 	if (INGLOBALZONE(curproc) ||
290 	    zone_dataset_visible(zc->zc_name, NULL))
291 		return (0);
292 
293 	return (ENOENT);
294 }
295 
296 static int
297 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
298 {
299 	int writable = 1;
300 
301 	/*
302 	 * The dataset must be visible by this zone -- check this first
303 	 * so they don't see EPERM on something they shouldn't know about.
304 	 */
305 	if (!INGLOBALZONE(curproc) &&
306 	    !zone_dataset_visible(dataset, &writable))
307 		return (ENOENT);
308 
309 	if (INGLOBALZONE(curproc)) {
310 		/*
311 		 * If the fs is zoned, only root can access it from the
312 		 * global zone.
313 		 */
314 		if (secpolicy_zfs(cr) && zoned)
315 			return (EPERM);
316 	} else {
317 		/*
318 		 * If we are in a local zone, the 'zoned' property must be set.
319 		 */
320 		if (!zoned)
321 			return (EPERM);
322 
323 		/* must be writable by this zone */
324 		if (!writable)
325 			return (EPERM);
326 	}
327 	return (0);
328 }
329 
330 static int
331 zfs_dozonecheck(const char *dataset, cred_t *cr)
332 {
333 	uint64_t zoned;
334 
335 	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
336 		return (ENOENT);
337 
338 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
339 }
340 
341 static int
342 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
343 {
344 	uint64_t zoned;
345 
346 	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
347 	if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL)) {
348 		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
349 		return (ENOENT);
350 	}
351 	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
352 
353 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
354 }
355 
356 /*
357  * If name ends in a '@', then require recursive permissions.
358  */
359 int
360 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
361 {
362 	int error;
363 	boolean_t descendent = B_FALSE;
364 	dsl_dataset_t *ds;
365 	char *at;
366 
367 	at = strchr(name, '@');
368 	if (at != NULL && at[1] == '\0') {
369 		*at = '\0';
370 		descendent = B_TRUE;
371 	}
372 
373 	error = dsl_dataset_hold(name, FTAG, &ds);
374 	if (at != NULL)
375 		*at = '@';
376 	if (error != 0)
377 		return (error);
378 
379 	error = zfs_dozonecheck_ds(name, ds, cr);
380 	if (error == 0) {
381 		error = secpolicy_zfs(cr);
382 		if (error)
383 			error = dsl_deleg_access_impl(ds, descendent, perm, cr);
384 	}
385 
386 	dsl_dataset_rele(ds, FTAG);
387 	return (error);
388 }
389 
390 int
391 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
392     const char *perm, cred_t *cr)
393 {
394 	int error;
395 
396 	error = zfs_dozonecheck_ds(name, ds, cr);
397 	if (error == 0) {
398 		error = secpolicy_zfs(cr);
399 		if (error)
400 			error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr);
401 	}
402 	return (error);
403 }
404 
405 /*
406  * Policy for setting the security label property.
407  *
408  * Returns 0 for success, non-zero for access and other errors.
409  */
410 static int
411 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
412 {
413 	char		ds_hexsl[MAXNAMELEN];
414 	bslabel_t	ds_sl, new_sl;
415 	boolean_t	new_default = FALSE;
416 	uint64_t	zoned;
417 	int		needed_priv = -1;
418 	int		error;
419 
420 	/* First get the existing dataset label. */
421 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
422 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
423 	if (error)
424 		return (EPERM);
425 
426 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
427 		new_default = TRUE;
428 
429 	/* The label must be translatable */
430 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
431 		return (EINVAL);
432 
433 	/*
434 	 * In a non-global zone, disallow attempts to set a label that
435 	 * doesn't match that of the zone; otherwise no other checks
436 	 * are needed.
437 	 */
438 	if (!INGLOBALZONE(curproc)) {
439 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
440 			return (EPERM);
441 		return (0);
442 	}
443 
444 	/*
445 	 * For global-zone datasets (i.e., those whose zoned property is
446 	 * "off", verify that the specified new label is valid for the
447 	 * global zone.
448 	 */
449 	if (dsl_prop_get_integer(name,
450 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
451 		return (EPERM);
452 	if (!zoned) {
453 		if (zfs_check_global_label(name, strval) != 0)
454 			return (EPERM);
455 	}
456 
457 	/*
458 	 * If the existing dataset label is nondefault, check if the
459 	 * dataset is mounted (label cannot be changed while mounted).
460 	 * Get the zfsvfs; if there isn't one, then the dataset isn't
461 	 * mounted (or isn't a dataset, doesn't exist, ...).
462 	 */
463 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
464 		objset_t *os;
465 		static char *setsl_tag = "setsl_tag";
466 
467 		/*
468 		 * Try to own the dataset; abort if there is any error,
469 		 * (e.g., already mounted, in use, or other error).
470 		 */
471 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
472 		    setsl_tag, &os);
473 		if (error)
474 			return (EPERM);
475 
476 		dmu_objset_disown(os, setsl_tag);
477 
478 		if (new_default) {
479 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
480 			goto out_check;
481 		}
482 
483 		if (hexstr_to_label(strval, &new_sl) != 0)
484 			return (EPERM);
485 
486 		if (blstrictdom(&ds_sl, &new_sl))
487 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
488 		else if (blstrictdom(&new_sl, &ds_sl))
489 			needed_priv = PRIV_FILE_UPGRADE_SL;
490 	} else {
491 		/* dataset currently has a default label */
492 		if (!new_default)
493 			needed_priv = PRIV_FILE_UPGRADE_SL;
494 	}
495 
496 out_check:
497 	if (needed_priv != -1)
498 		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
499 	return (0);
500 }
501 
502 static int
503 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
504     cred_t *cr)
505 {
506 	char *strval;
507 
508 	/*
509 	 * Check permissions for special properties.
510 	 */
511 	switch (prop) {
512 	case ZFS_PROP_ZONED:
513 		/*
514 		 * Disallow setting of 'zoned' from within a local zone.
515 		 */
516 		if (!INGLOBALZONE(curproc))
517 			return (EPERM);
518 		break;
519 
520 	case ZFS_PROP_QUOTA:
521 		if (!INGLOBALZONE(curproc)) {
522 			uint64_t zoned;
523 			char setpoint[MAXNAMELEN];
524 			/*
525 			 * Unprivileged users are allowed to modify the
526 			 * quota on things *under* (ie. contained by)
527 			 * the thing they own.
528 			 */
529 			if (dsl_prop_get_integer(dsname, "zoned", &zoned,
530 			    setpoint))
531 				return (EPERM);
532 			if (!zoned || strlen(dsname) <= strlen(setpoint))
533 				return (EPERM);
534 		}
535 		break;
536 
537 	case ZFS_PROP_MLSLABEL:
538 		if (!is_system_labeled())
539 			return (EPERM);
540 
541 		if (nvpair_value_string(propval, &strval) == 0) {
542 			int err;
543 
544 			err = zfs_set_slabel_policy(dsname, strval, CRED());
545 			if (err != 0)
546 				return (err);
547 		}
548 		break;
549 	}
550 
551 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
552 }
553 
554 int
555 zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
556 {
557 	int error;
558 
559 	error = zfs_dozonecheck(zc->zc_name, cr);
560 	if (error)
561 		return (error);
562 
563 	/*
564 	 * permission to set permissions will be evaluated later in
565 	 * dsl_deleg_can_allow()
566 	 */
567 	return (0);
568 }
569 
570 int
571 zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
572 {
573 	return (zfs_secpolicy_write_perms(zc->zc_name,
574 	    ZFS_DELEG_PERM_ROLLBACK, cr));
575 }
576 
577 int
578 zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
579 {
580 	spa_t *spa;
581 	dsl_pool_t *dp;
582 	dsl_dataset_t *ds;
583 	char *cp;
584 	int error;
585 
586 	/*
587 	 * Generate the current snapshot name from the given objsetid, then
588 	 * use that name for the secpolicy/zone checks.
589 	 */
590 	cp = strchr(zc->zc_name, '@');
591 	if (cp == NULL)
592 		return (EINVAL);
593 	error = spa_open(zc->zc_name, &spa, FTAG);
594 	if (error)
595 		return (error);
596 
597 	dp = spa_get_dsl(spa);
598 	rw_enter(&dp->dp_config_rwlock, RW_READER);
599 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
600 	rw_exit(&dp->dp_config_rwlock);
601 	spa_close(spa, FTAG);
602 	if (error)
603 		return (error);
604 
605 	dsl_dataset_name(ds, zc->zc_name);
606 
607 	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
608 	    ZFS_DELEG_PERM_SEND, cr);
609 	dsl_dataset_rele(ds, FTAG);
610 
611 	return (error);
612 }
613 
614 static int
615 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
616 {
617 	vnode_t *vp;
618 	int error;
619 
620 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
621 	    NO_FOLLOW, NULL, &vp)) != 0)
622 		return (error);
623 
624 	/* Now make sure mntpnt and dataset are ZFS */
625 
626 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
627 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
628 	    zc->zc_name) != 0)) {
629 		VN_RELE(vp);
630 		return (EPERM);
631 	}
632 
633 	VN_RELE(vp);
634 	return (dsl_deleg_access(zc->zc_name,
635 	    ZFS_DELEG_PERM_SHARE, cr));
636 }
637 
638 int
639 zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
640 {
641 	if (!INGLOBALZONE(curproc))
642 		return (EPERM);
643 
644 	if (secpolicy_nfs(cr) == 0) {
645 		return (0);
646 	} else {
647 		return (zfs_secpolicy_deleg_share(zc, cr));
648 	}
649 }
650 
651 int
652 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
653 {
654 	if (!INGLOBALZONE(curproc))
655 		return (EPERM);
656 
657 	if (secpolicy_smb(cr) == 0) {
658 		return (0);
659 	} else {
660 		return (zfs_secpolicy_deleg_share(zc, cr));
661 	}
662 }
663 
664 static int
665 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
666 {
667 	char *cp;
668 
669 	/*
670 	 * Remove the @bla or /bla from the end of the name to get the parent.
671 	 */
672 	(void) strncpy(parent, datasetname, parentsize);
673 	cp = strrchr(parent, '@');
674 	if (cp != NULL) {
675 		cp[0] = '\0';
676 	} else {
677 		cp = strrchr(parent, '/');
678 		if (cp == NULL)
679 			return (ENOENT);
680 		cp[0] = '\0';
681 	}
682 
683 	return (0);
684 }
685 
686 int
687 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
688 {
689 	int error;
690 
691 	if ((error = zfs_secpolicy_write_perms(name,
692 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
693 		return (error);
694 
695 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
696 }
697 
698 static int
699 zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
700 {
701 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
702 }
703 
704 /*
705  * Destroying snapshots with delegated permissions requires
706  * descendent mount and destroy permissions.
707  */
708 static int
709 zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr)
710 {
711 	int error;
712 	char *dsname;
713 
714 	dsname = kmem_asprintf("%s@", zc->zc_name);
715 
716 	error = zfs_secpolicy_destroy_perms(dsname, cr);
717 
718 	strfree(dsname);
719 	return (error);
720 }
721 
722 int
723 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
724 {
725 	char	parentname[MAXNAMELEN];
726 	int	error;
727 
728 	if ((error = zfs_secpolicy_write_perms(from,
729 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
730 		return (error);
731 
732 	if ((error = zfs_secpolicy_write_perms(from,
733 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
734 		return (error);
735 
736 	if ((error = zfs_get_parent(to, parentname,
737 	    sizeof (parentname))) != 0)
738 		return (error);
739 
740 	if ((error = zfs_secpolicy_write_perms(parentname,
741 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
742 		return (error);
743 
744 	if ((error = zfs_secpolicy_write_perms(parentname,
745 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
746 		return (error);
747 
748 	return (error);
749 }
750 
751 static int
752 zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
753 {
754 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
755 }
756 
757 static int
758 zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
759 {
760 	char	parentname[MAXNAMELEN];
761 	objset_t *clone;
762 	int error;
763 
764 	error = zfs_secpolicy_write_perms(zc->zc_name,
765 	    ZFS_DELEG_PERM_PROMOTE, cr);
766 	if (error)
767 		return (error);
768 
769 	error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
770 
771 	if (error == 0) {
772 		dsl_dataset_t *pclone = NULL;
773 		dsl_dir_t *dd;
774 		dd = clone->os_dsl_dataset->ds_dir;
775 
776 		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
777 		error = dsl_dataset_hold_obj(dd->dd_pool,
778 		    dd->dd_phys->dd_origin_obj, FTAG, &pclone);
779 		rw_exit(&dd->dd_pool->dp_config_rwlock);
780 		if (error) {
781 			dmu_objset_rele(clone, FTAG);
782 			return (error);
783 		}
784 
785 		error = zfs_secpolicy_write_perms(zc->zc_name,
786 		    ZFS_DELEG_PERM_MOUNT, cr);
787 
788 		dsl_dataset_name(pclone, parentname);
789 		dmu_objset_rele(clone, FTAG);
790 		dsl_dataset_rele(pclone, FTAG);
791 		if (error == 0)
792 			error = zfs_secpolicy_write_perms(parentname,
793 			    ZFS_DELEG_PERM_PROMOTE, cr);
794 	}
795 	return (error);
796 }
797 
798 static int
799 zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
800 {
801 	int error;
802 
803 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
804 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
805 		return (error);
806 
807 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
808 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
809 		return (error);
810 
811 	return (zfs_secpolicy_write_perms(zc->zc_name,
812 	    ZFS_DELEG_PERM_CREATE, cr));
813 }
814 
815 int
816 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
817 {
818 	return (zfs_secpolicy_write_perms(name,
819 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
820 }
821 
822 static int
823 zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
824 {
825 
826 	return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
827 }
828 
829 static int
830 zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
831 {
832 	char	parentname[MAXNAMELEN];
833 	int	error;
834 
835 	if ((error = zfs_get_parent(zc->zc_name, parentname,
836 	    sizeof (parentname))) != 0)
837 		return (error);
838 
839 	if (zc->zc_value[0] != '\0') {
840 		if ((error = zfs_secpolicy_write_perms(zc->zc_value,
841 		    ZFS_DELEG_PERM_CLONE, cr)) != 0)
842 			return (error);
843 	}
844 
845 	if ((error = zfs_secpolicy_write_perms(parentname,
846 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
847 		return (error);
848 
849 	error = zfs_secpolicy_write_perms(parentname,
850 	    ZFS_DELEG_PERM_MOUNT, cr);
851 
852 	return (error);
853 }
854 
855 static int
856 zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
857 {
858 	int error;
859 
860 	error = secpolicy_fs_unmount(cr, NULL);
861 	if (error) {
862 		error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
863 	}
864 	return (error);
865 }
866 
867 /*
868  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
869  * SYS_CONFIG privilege, which is not available in a local zone.
870  */
871 /* ARGSUSED */
872 static int
873 zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
874 {
875 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
876 		return (EPERM);
877 
878 	return (0);
879 }
880 
881 /*
882  * Policy for object to name lookups.
883  */
884 /* ARGSUSED */
885 static int
886 zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr)
887 {
888 	int error;
889 
890 	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
891 		return (0);
892 
893 	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
894 	return (error);
895 }
896 
897 /*
898  * Policy for fault injection.  Requires all privileges.
899  */
900 /* ARGSUSED */
901 static int
902 zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
903 {
904 	return (secpolicy_zinject(cr));
905 }
906 
907 static int
908 zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
909 {
910 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
911 
912 	if (prop == ZPROP_INVAL) {
913 		if (!zfs_prop_user(zc->zc_value))
914 			return (EINVAL);
915 		return (zfs_secpolicy_write_perms(zc->zc_name,
916 		    ZFS_DELEG_PERM_USERPROP, cr));
917 	} else {
918 		return (zfs_secpolicy_setprop(zc->zc_name, prop,
919 		    NULL, cr));
920 	}
921 }
922 
923 static int
924 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
925 {
926 	int err = zfs_secpolicy_read(zc, cr);
927 	if (err)
928 		return (err);
929 
930 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
931 		return (EINVAL);
932 
933 	if (zc->zc_value[0] == 0) {
934 		/*
935 		 * They are asking about a posix uid/gid.  If it's
936 		 * themself, allow it.
937 		 */
938 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
939 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
940 			if (zc->zc_guid == crgetuid(cr))
941 				return (0);
942 		} else {
943 			if (groupmember(zc->zc_guid, cr))
944 				return (0);
945 		}
946 	}
947 
948 	return (zfs_secpolicy_write_perms(zc->zc_name,
949 	    userquota_perms[zc->zc_objset_type], cr));
950 }
951 
952 static int
953 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
954 {
955 	int err = zfs_secpolicy_read(zc, cr);
956 	if (err)
957 		return (err);
958 
959 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
960 		return (EINVAL);
961 
962 	return (zfs_secpolicy_write_perms(zc->zc_name,
963 	    userquota_perms[zc->zc_objset_type], cr));
964 }
965 
966 static int
967 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
968 {
969 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
970 	    NULL, cr));
971 }
972 
973 static int
974 zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
975 {
976 	return (zfs_secpolicy_write_perms(zc->zc_name,
977 	    ZFS_DELEG_PERM_HOLD, cr));
978 }
979 
980 static int
981 zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
982 {
983 	return (zfs_secpolicy_write_perms(zc->zc_name,
984 	    ZFS_DELEG_PERM_RELEASE, cr));
985 }
986 
987 /*
988  * Policy for allowing temporary snapshots to be taken or released
989  */
990 static int
991 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr)
992 {
993 	/*
994 	 * A temporary snapshot is the same as a snapshot,
995 	 * hold, destroy and release all rolled into one.
996 	 * Delegated diff alone is sufficient that we allow this.
997 	 */
998 	int error;
999 
1000 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1001 	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
1002 		return (0);
1003 
1004 	error = zfs_secpolicy_snapshot(zc, cr);
1005 	if (!error)
1006 		error = zfs_secpolicy_hold(zc, cr);
1007 	if (!error)
1008 		error = zfs_secpolicy_release(zc, cr);
1009 	if (!error)
1010 		error = zfs_secpolicy_destroy(zc, cr);
1011 	return (error);
1012 }
1013 
1014 /*
1015  * Returns the nvlist as specified by the user in the zfs_cmd_t.
1016  */
1017 static int
1018 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1019 {
1020 	char *packed;
1021 	int error;
1022 	nvlist_t *list = NULL;
1023 
1024 	/*
1025 	 * Read in and unpack the user-supplied nvlist.
1026 	 */
1027 	if (size == 0)
1028 		return (EINVAL);
1029 
1030 	packed = kmem_alloc(size, KM_SLEEP);
1031 
1032 	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1033 	    iflag)) != 0) {
1034 		kmem_free(packed, size);
1035 		return (error);
1036 	}
1037 
1038 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1039 		kmem_free(packed, size);
1040 		return (error);
1041 	}
1042 
1043 	kmem_free(packed, size);
1044 
1045 	*nvp = list;
1046 	return (0);
1047 }
1048 
1049 static int
1050 fit_error_list(zfs_cmd_t *zc, nvlist_t **errors)
1051 {
1052 	size_t size;
1053 
1054 	VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
1055 
1056 	if (size > zc->zc_nvlist_dst_size) {
1057 		nvpair_t *more_errors;
1058 		int n = 0;
1059 
1060 		if (zc->zc_nvlist_dst_size < 1024)
1061 			return (ENOMEM);
1062 
1063 		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0);
1064 		more_errors = nvlist_prev_nvpair(*errors, NULL);
1065 
1066 		do {
1067 			nvpair_t *pair = nvlist_prev_nvpair(*errors,
1068 			    more_errors);
1069 			VERIFY(nvlist_remove_nvpair(*errors, pair) == 0);
1070 			n++;
1071 			VERIFY(nvlist_size(*errors, &size,
1072 			    NV_ENCODE_NATIVE) == 0);
1073 		} while (size > zc->zc_nvlist_dst_size);
1074 
1075 		VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0);
1076 		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0);
1077 		ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
1078 		ASSERT(size <= zc->zc_nvlist_dst_size);
1079 	}
1080 
1081 	return (0);
1082 }
1083 
1084 static int
1085 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1086 {
1087 	char *packed = NULL;
1088 	int error = 0;
1089 	size_t size;
1090 
1091 	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
1092 
1093 	if (size > zc->zc_nvlist_dst_size) {
1094 		error = ENOMEM;
1095 	} else {
1096 		packed = kmem_alloc(size, KM_SLEEP);
1097 		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
1098 		    KM_SLEEP) == 0);
1099 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1100 		    size, zc->zc_iflags) != 0)
1101 			error = EFAULT;
1102 		kmem_free(packed, size);
1103 	}
1104 
1105 	zc->zc_nvlist_dst_size = size;
1106 	return (error);
1107 }
1108 
1109 static int
1110 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1111 {
1112 	objset_t *os;
1113 	int error;
1114 
1115 	error = dmu_objset_hold(dsname, FTAG, &os);
1116 	if (error)
1117 		return (error);
1118 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1119 		dmu_objset_rele(os, FTAG);
1120 		return (EINVAL);
1121 	}
1122 
1123 	mutex_enter(&os->os_user_ptr_lock);
1124 	*zfvp = dmu_objset_get_user(os);
1125 	if (*zfvp) {
1126 		VFS_HOLD((*zfvp)->z_vfs);
1127 	} else {
1128 		error = ESRCH;
1129 	}
1130 	mutex_exit(&os->os_user_ptr_lock);
1131 	dmu_objset_rele(os, FTAG);
1132 	return (error);
1133 }
1134 
1135 /*
1136  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1137  * case its z_vfs will be NULL, and it will be opened as the owner.
1138  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1139  * which prevents all vnode ops from running.
1140  */
1141 static int
1142 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1143 {
1144 	int error = 0;
1145 
1146 	if (getzfsvfs(name, zfvp) != 0)
1147 		error = zfsvfs_create(name, zfvp);
1148 	if (error == 0) {
1149 		rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1150 		    RW_READER, tag);
1151 		if ((*zfvp)->z_unmounted) {
1152 			/*
1153 			 * XXX we could probably try again, since the unmounting
1154 			 * thread should be just about to disassociate the
1155 			 * objset from the zfsvfs.
1156 			 */
1157 			rrw_exit(&(*zfvp)->z_teardown_lock, tag);
1158 			return (EBUSY);
1159 		}
1160 	}
1161 	return (error);
1162 }
1163 
1164 static void
1165 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1166 {
1167 	rrw_exit(&zfsvfs->z_teardown_lock, tag);
1168 
1169 	if (zfsvfs->z_vfs) {
1170 		VFS_RELE(zfsvfs->z_vfs);
1171 	} else {
1172 		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1173 		zfsvfs_free(zfsvfs);
1174 	}
1175 }
1176 
1177 static int
1178 zfs_ioc_pool_create(zfs_cmd_t *zc)
1179 {
1180 	int error;
1181 	nvlist_t *config, *props = NULL;
1182 	nvlist_t *rootprops = NULL;
1183 	nvlist_t *zplprops = NULL;
1184 	char *buf;
1185 
1186 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1187 	    zc->zc_iflags, &config))
1188 		return (error);
1189 
1190 	if (zc->zc_nvlist_src_size != 0 && (error =
1191 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1192 	    zc->zc_iflags, &props))) {
1193 		nvlist_free(config);
1194 		return (error);
1195 	}
1196 
1197 	if (props) {
1198 		nvlist_t *nvl = NULL;
1199 		uint64_t version = SPA_VERSION;
1200 
1201 		(void) nvlist_lookup_uint64(props,
1202 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1203 		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1204 			error = EINVAL;
1205 			goto pool_props_bad;
1206 		}
1207 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1208 		if (nvl) {
1209 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1210 			if (error != 0) {
1211 				nvlist_free(config);
1212 				nvlist_free(props);
1213 				return (error);
1214 			}
1215 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1216 		}
1217 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1218 		error = zfs_fill_zplprops_root(version, rootprops,
1219 		    zplprops, NULL);
1220 		if (error)
1221 			goto pool_props_bad;
1222 	}
1223 
1224 	buf = history_str_get(zc);
1225 
1226 	error = spa_create(zc->zc_name, config, props, buf, zplprops);
1227 
1228 	/*
1229 	 * Set the remaining root properties
1230 	 */
1231 	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1232 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1233 		(void) spa_destroy(zc->zc_name);
1234 
1235 	if (buf != NULL)
1236 		history_str_free(buf);
1237 
1238 pool_props_bad:
1239 	nvlist_free(rootprops);
1240 	nvlist_free(zplprops);
1241 	nvlist_free(config);
1242 	nvlist_free(props);
1243 
1244 	return (error);
1245 }
1246 
1247 static int
1248 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1249 {
1250 	int error;
1251 	zfs_log_history(zc);
1252 	error = spa_destroy(zc->zc_name);
1253 	if (error == 0)
1254 		zvol_remove_minors(zc->zc_name);
1255 	return (error);
1256 }
1257 
1258 static int
1259 zfs_ioc_pool_import(zfs_cmd_t *zc)
1260 {
1261 	nvlist_t *config, *props = NULL;
1262 	uint64_t guid;
1263 	int error;
1264 
1265 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1266 	    zc->zc_iflags, &config)) != 0)
1267 		return (error);
1268 
1269 	if (zc->zc_nvlist_src_size != 0 && (error =
1270 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1271 	    zc->zc_iflags, &props))) {
1272 		nvlist_free(config);
1273 		return (error);
1274 	}
1275 
1276 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1277 	    guid != zc->zc_guid)
1278 		error = EINVAL;
1279 	else
1280 		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1281 
1282 	if (zc->zc_nvlist_dst != 0) {
1283 		int err;
1284 
1285 		if ((err = put_nvlist(zc, config)) != 0)
1286 			error = err;
1287 	}
1288 
1289 	nvlist_free(config);
1290 
1291 	if (props)
1292 		nvlist_free(props);
1293 
1294 	return (error);
1295 }
1296 
1297 static int
1298 zfs_ioc_pool_export(zfs_cmd_t *zc)
1299 {
1300 	int error;
1301 	boolean_t force = (boolean_t)zc->zc_cookie;
1302 	boolean_t hardforce = (boolean_t)zc->zc_guid;
1303 
1304 	zfs_log_history(zc);
1305 	error = spa_export(zc->zc_name, NULL, force, hardforce);
1306 	if (error == 0)
1307 		zvol_remove_minors(zc->zc_name);
1308 	return (error);
1309 }
1310 
1311 static int
1312 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1313 {
1314 	nvlist_t *configs;
1315 	int error;
1316 
1317 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1318 		return (EEXIST);
1319 
1320 	error = put_nvlist(zc, configs);
1321 
1322 	nvlist_free(configs);
1323 
1324 	return (error);
1325 }
1326 
1327 /*
1328  * inputs:
1329  * zc_name		name of the pool
1330  *
1331  * outputs:
1332  * zc_cookie		real errno
1333  * zc_nvlist_dst	config nvlist
1334  * zc_nvlist_dst_size	size of config nvlist
1335  */
1336 static int
1337 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1338 {
1339 	nvlist_t *config;
1340 	int error;
1341 	int ret = 0;
1342 
1343 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1344 	    sizeof (zc->zc_value));
1345 
1346 	if (config != NULL) {
1347 		ret = put_nvlist(zc, config);
1348 		nvlist_free(config);
1349 
1350 		/*
1351 		 * The config may be present even if 'error' is non-zero.
1352 		 * In this case we return success, and preserve the real errno
1353 		 * in 'zc_cookie'.
1354 		 */
1355 		zc->zc_cookie = error;
1356 	} else {
1357 		ret = error;
1358 	}
1359 
1360 	return (ret);
1361 }
1362 
1363 /*
1364  * Try to import the given pool, returning pool stats as appropriate so that
1365  * user land knows which devices are available and overall pool health.
1366  */
1367 static int
1368 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1369 {
1370 	nvlist_t *tryconfig, *config;
1371 	int error;
1372 
1373 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1374 	    zc->zc_iflags, &tryconfig)) != 0)
1375 		return (error);
1376 
1377 	config = spa_tryimport(tryconfig);
1378 
1379 	nvlist_free(tryconfig);
1380 
1381 	if (config == NULL)
1382 		return (EINVAL);
1383 
1384 	error = put_nvlist(zc, config);
1385 	nvlist_free(config);
1386 
1387 	return (error);
1388 }
1389 
1390 /*
1391  * inputs:
1392  * zc_name              name of the pool
1393  * zc_cookie            scan func (pool_scan_func_t)
1394  */
1395 static int
1396 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1397 {
1398 	spa_t *spa;
1399 	int error;
1400 
1401 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1402 		return (error);
1403 
1404 	if (zc->zc_cookie == POOL_SCAN_NONE)
1405 		error = spa_scan_stop(spa);
1406 	else
1407 		error = spa_scan(spa, zc->zc_cookie);
1408 
1409 	spa_close(spa, FTAG);
1410 
1411 	return (error);
1412 }
1413 
1414 static int
1415 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1416 {
1417 	spa_t *spa;
1418 	int error;
1419 
1420 	error = spa_open(zc->zc_name, &spa, FTAG);
1421 	if (error == 0) {
1422 		spa_freeze(spa);
1423 		spa_close(spa, FTAG);
1424 	}
1425 	return (error);
1426 }
1427 
1428 static int
1429 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1430 {
1431 	spa_t *spa;
1432 	int error;
1433 
1434 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1435 		return (error);
1436 
1437 	if (zc->zc_cookie < spa_version(spa) ||
1438 	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1439 		spa_close(spa, FTAG);
1440 		return (EINVAL);
1441 	}
1442 
1443 	spa_upgrade(spa, zc->zc_cookie);
1444 	spa_close(spa, FTAG);
1445 
1446 	return (error);
1447 }
1448 
1449 static int
1450 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1451 {
1452 	spa_t *spa;
1453 	char *hist_buf;
1454 	uint64_t size;
1455 	int error;
1456 
1457 	if ((size = zc->zc_history_len) == 0)
1458 		return (EINVAL);
1459 
1460 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1461 		return (error);
1462 
1463 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1464 		spa_close(spa, FTAG);
1465 		return (ENOTSUP);
1466 	}
1467 
1468 	hist_buf = kmem_alloc(size, KM_SLEEP);
1469 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1470 	    &zc->zc_history_len, hist_buf)) == 0) {
1471 		error = ddi_copyout(hist_buf,
1472 		    (void *)(uintptr_t)zc->zc_history,
1473 		    zc->zc_history_len, zc->zc_iflags);
1474 	}
1475 
1476 	spa_close(spa, FTAG);
1477 	kmem_free(hist_buf, size);
1478 	return (error);
1479 }
1480 
1481 static int
1482 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1483 {
1484 	spa_t *spa;
1485 	int error;
1486 
1487 	error = spa_open(zc->zc_name, &spa, FTAG);
1488 	if (error == 0) {
1489 		error = spa_change_guid(spa);
1490 		spa_close(spa, FTAG);
1491 	}
1492 	return (error);
1493 }
1494 
1495 static int
1496 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1497 {
1498 	int error;
1499 
1500 	if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
1501 		return (error);
1502 
1503 	return (0);
1504 }
1505 
1506 /*
1507  * inputs:
1508  * zc_name		name of filesystem
1509  * zc_obj		object to find
1510  *
1511  * outputs:
1512  * zc_value		name of object
1513  */
1514 static int
1515 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1516 {
1517 	objset_t *os;
1518 	int error;
1519 
1520 	/* XXX reading from objset not owned */
1521 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1522 		return (error);
1523 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1524 		dmu_objset_rele(os, FTAG);
1525 		return (EINVAL);
1526 	}
1527 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1528 	    sizeof (zc->zc_value));
1529 	dmu_objset_rele(os, FTAG);
1530 
1531 	return (error);
1532 }
1533 
1534 /*
1535  * inputs:
1536  * zc_name		name of filesystem
1537  * zc_obj		object to find
1538  *
1539  * outputs:
1540  * zc_stat		stats on object
1541  * zc_value		path to object
1542  */
1543 static int
1544 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1545 {
1546 	objset_t *os;
1547 	int error;
1548 
1549 	/* XXX reading from objset not owned */
1550 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1551 		return (error);
1552 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1553 		dmu_objset_rele(os, FTAG);
1554 		return (EINVAL);
1555 	}
1556 	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1557 	    sizeof (zc->zc_value));
1558 	dmu_objset_rele(os, FTAG);
1559 
1560 	return (error);
1561 }
1562 
1563 static int
1564 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1565 {
1566 	spa_t *spa;
1567 	int error;
1568 	nvlist_t *config, **l2cache, **spares;
1569 	uint_t nl2cache = 0, nspares = 0;
1570 
1571 	error = spa_open(zc->zc_name, &spa, FTAG);
1572 	if (error != 0)
1573 		return (error);
1574 
1575 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1576 	    zc->zc_iflags, &config);
1577 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1578 	    &l2cache, &nl2cache);
1579 
1580 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1581 	    &spares, &nspares);
1582 
1583 	/*
1584 	 * A root pool with concatenated devices is not supported.
1585 	 * Thus, can not add a device to a root pool.
1586 	 *
1587 	 * Intent log device can not be added to a rootpool because
1588 	 * during mountroot, zil is replayed, a seperated log device
1589 	 * can not be accessed during the mountroot time.
1590 	 *
1591 	 * l2cache and spare devices are ok to be added to a rootpool.
1592 	 */
1593 	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1594 		nvlist_free(config);
1595 		spa_close(spa, FTAG);
1596 		return (EDOM);
1597 	}
1598 
1599 	if (error == 0) {
1600 		error = spa_vdev_add(spa, config);
1601 		nvlist_free(config);
1602 	}
1603 	spa_close(spa, FTAG);
1604 	return (error);
1605 }
1606 
1607 /*
1608  * inputs:
1609  * zc_name		name of the pool
1610  * zc_nvlist_conf	nvlist of devices to remove
1611  * zc_cookie		to stop the remove?
1612  */
1613 static int
1614 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1615 {
1616 	spa_t *spa;
1617 	int error;
1618 
1619 	error = spa_open(zc->zc_name, &spa, FTAG);
1620 	if (error != 0)
1621 		return (error);
1622 	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1623 	spa_close(spa, FTAG);
1624 	return (error);
1625 }
1626 
1627 static int
1628 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1629 {
1630 	spa_t *spa;
1631 	int error;
1632 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1633 
1634 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1635 		return (error);
1636 	switch (zc->zc_cookie) {
1637 	case VDEV_STATE_ONLINE:
1638 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1639 		break;
1640 
1641 	case VDEV_STATE_OFFLINE:
1642 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1643 		break;
1644 
1645 	case VDEV_STATE_FAULTED:
1646 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1647 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1648 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1649 
1650 		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1651 		break;
1652 
1653 	case VDEV_STATE_DEGRADED:
1654 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1655 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1656 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1657 
1658 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1659 		break;
1660 
1661 	default:
1662 		error = EINVAL;
1663 	}
1664 	zc->zc_cookie = newstate;
1665 	spa_close(spa, FTAG);
1666 	return (error);
1667 }
1668 
1669 static int
1670 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1671 {
1672 	spa_t *spa;
1673 	int replacing = zc->zc_cookie;
1674 	nvlist_t *config;
1675 	int error;
1676 
1677 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1678 		return (error);
1679 
1680 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1681 	    zc->zc_iflags, &config)) == 0) {
1682 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1683 		nvlist_free(config);
1684 	}
1685 
1686 	spa_close(spa, FTAG);
1687 	return (error);
1688 }
1689 
1690 static int
1691 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1692 {
1693 	spa_t *spa;
1694 	int error;
1695 
1696 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1697 		return (error);
1698 
1699 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1700 
1701 	spa_close(spa, FTAG);
1702 	return (error);
1703 }
1704 
1705 static int
1706 zfs_ioc_vdev_split(zfs_cmd_t *zc)
1707 {
1708 	spa_t *spa;
1709 	nvlist_t *config, *props = NULL;
1710 	int error;
1711 	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1712 
1713 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1714 		return (error);
1715 
1716 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1717 	    zc->zc_iflags, &config)) {
1718 		spa_close(spa, FTAG);
1719 		return (error);
1720 	}
1721 
1722 	if (zc->zc_nvlist_src_size != 0 && (error =
1723 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1724 	    zc->zc_iflags, &props))) {
1725 		spa_close(spa, FTAG);
1726 		nvlist_free(config);
1727 		return (error);
1728 	}
1729 
1730 	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
1731 
1732 	spa_close(spa, FTAG);
1733 
1734 	nvlist_free(config);
1735 	nvlist_free(props);
1736 
1737 	return (error);
1738 }
1739 
1740 static int
1741 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1742 {
1743 	spa_t *spa;
1744 	char *path = zc->zc_value;
1745 	uint64_t guid = zc->zc_guid;
1746 	int error;
1747 
1748 	error = spa_open(zc->zc_name, &spa, FTAG);
1749 	if (error != 0)
1750 		return (error);
1751 
1752 	error = spa_vdev_setpath(spa, guid, path);
1753 	spa_close(spa, FTAG);
1754 	return (error);
1755 }
1756 
1757 static int
1758 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
1759 {
1760 	spa_t *spa;
1761 	char *fru = zc->zc_value;
1762 	uint64_t guid = zc->zc_guid;
1763 	int error;
1764 
1765 	error = spa_open(zc->zc_name, &spa, FTAG);
1766 	if (error != 0)
1767 		return (error);
1768 
1769 	error = spa_vdev_setfru(spa, guid, fru);
1770 	spa_close(spa, FTAG);
1771 	return (error);
1772 }
1773 
1774 static int
1775 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
1776 {
1777 	int error = 0;
1778 	nvlist_t *nv;
1779 
1780 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1781 
1782 	if (zc->zc_nvlist_dst != 0 &&
1783 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
1784 		dmu_objset_stats(os, nv);
1785 		/*
1786 		 * NB: zvol_get_stats() will read the objset contents,
1787 		 * which we aren't supposed to do with a
1788 		 * DS_MODE_USER hold, because it could be
1789 		 * inconsistent.  So this is a bit of a workaround...
1790 		 * XXX reading with out owning
1791 		 */
1792 		if (!zc->zc_objset_stats.dds_inconsistent &&
1793 		    dmu_objset_type(os) == DMU_OST_ZVOL) {
1794 			error = zvol_get_stats(os, nv);
1795 			if (error == EIO)
1796 				return (error);
1797 			VERIFY3S(error, ==, 0);
1798 		}
1799 		error = put_nvlist(zc, nv);
1800 		nvlist_free(nv);
1801 	}
1802 
1803 	return (error);
1804 }
1805 
1806 /*
1807  * inputs:
1808  * zc_name		name of filesystem
1809  * zc_nvlist_dst_size	size of buffer for property nvlist
1810  *
1811  * outputs:
1812  * zc_objset_stats	stats
1813  * zc_nvlist_dst	property nvlist
1814  * zc_nvlist_dst_size	size of property nvlist
1815  */
1816 static int
1817 zfs_ioc_objset_stats(zfs_cmd_t *zc)
1818 {
1819 	objset_t *os = NULL;
1820 	int error;
1821 
1822 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1823 		return (error);
1824 
1825 	error = zfs_ioc_objset_stats_impl(zc, os);
1826 
1827 	dmu_objset_rele(os, FTAG);
1828 
1829 	return (error);
1830 }
1831 
1832 /*
1833  * inputs:
1834  * zc_name		name of filesystem
1835  * zc_nvlist_dst_size	size of buffer for property nvlist
1836  *
1837  * outputs:
1838  * zc_nvlist_dst	received property nvlist
1839  * zc_nvlist_dst_size	size of received property nvlist
1840  *
1841  * Gets received properties (distinct from local properties on or after
1842  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
1843  * local property values.
1844  */
1845 static int
1846 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
1847 {
1848 	objset_t *os = NULL;
1849 	int error;
1850 	nvlist_t *nv;
1851 
1852 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1853 		return (error);
1854 
1855 	/*
1856 	 * Without this check, we would return local property values if the
1857 	 * caller has not already received properties on or after
1858 	 * SPA_VERSION_RECVD_PROPS.
1859 	 */
1860 	if (!dsl_prop_get_hasrecvd(os)) {
1861 		dmu_objset_rele(os, FTAG);
1862 		return (ENOTSUP);
1863 	}
1864 
1865 	if (zc->zc_nvlist_dst != 0 &&
1866 	    (error = dsl_prop_get_received(os, &nv)) == 0) {
1867 		error = put_nvlist(zc, nv);
1868 		nvlist_free(nv);
1869 	}
1870 
1871 	dmu_objset_rele(os, FTAG);
1872 	return (error);
1873 }
1874 
1875 static int
1876 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
1877 {
1878 	uint64_t value;
1879 	int error;
1880 
1881 	/*
1882 	 * zfs_get_zplprop() will either find a value or give us
1883 	 * the default value (if there is one).
1884 	 */
1885 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
1886 		return (error);
1887 	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
1888 	return (0);
1889 }
1890 
1891 /*
1892  * inputs:
1893  * zc_name		name of filesystem
1894  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
1895  *
1896  * outputs:
1897  * zc_nvlist_dst	zpl property nvlist
1898  * zc_nvlist_dst_size	size of zpl property nvlist
1899  */
1900 static int
1901 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
1902 {
1903 	objset_t *os;
1904 	int err;
1905 
1906 	/* XXX reading without owning */
1907 	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
1908 		return (err);
1909 
1910 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1911 
1912 	/*
1913 	 * NB: nvl_add_zplprop() will read the objset contents,
1914 	 * which we aren't supposed to do with a DS_MODE_USER
1915 	 * hold, because it could be inconsistent.
1916 	 */
1917 	if (zc->zc_nvlist_dst != NULL &&
1918 	    !zc->zc_objset_stats.dds_inconsistent &&
1919 	    dmu_objset_type(os) == DMU_OST_ZFS) {
1920 		nvlist_t *nv;
1921 
1922 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1923 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
1924 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
1925 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
1926 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
1927 			err = put_nvlist(zc, nv);
1928 		nvlist_free(nv);
1929 	} else {
1930 		err = ENOENT;
1931 	}
1932 	dmu_objset_rele(os, FTAG);
1933 	return (err);
1934 }
1935 
1936 static boolean_t
1937 dataset_name_hidden(const char *name)
1938 {
1939 	/*
1940 	 * Skip over datasets that are not visible in this zone,
1941 	 * internal datasets (which have a $ in their name), and
1942 	 * temporary datasets (which have a % in their name).
1943 	 */
1944 	if (strchr(name, '$') != NULL)
1945 		return (B_TRUE);
1946 	if (strchr(name, '%') != NULL)
1947 		return (B_TRUE);
1948 	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
1949 		return (B_TRUE);
1950 	return (B_FALSE);
1951 }
1952 
1953 /*
1954  * inputs:
1955  * zc_name		name of filesystem
1956  * zc_cookie		zap cursor
1957  * zc_nvlist_dst_size	size of buffer for property nvlist
1958  *
1959  * outputs:
1960  * zc_name		name of next filesystem
1961  * zc_cookie		zap cursor
1962  * zc_objset_stats	stats
1963  * zc_nvlist_dst	property nvlist
1964  * zc_nvlist_dst_size	size of property nvlist
1965  */
1966 static int
1967 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
1968 {
1969 	objset_t *os;
1970 	int error;
1971 	char *p;
1972 	size_t orig_len = strlen(zc->zc_name);
1973 
1974 top:
1975 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
1976 		if (error == ENOENT)
1977 			error = ESRCH;
1978 		return (error);
1979 	}
1980 
1981 	p = strrchr(zc->zc_name, '/');
1982 	if (p == NULL || p[1] != '\0')
1983 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
1984 	p = zc->zc_name + strlen(zc->zc_name);
1985 
1986 	/*
1987 	 * Pre-fetch the datasets.  dmu_objset_prefetch() always returns 0
1988 	 * but is not declared void because its called by dmu_objset_find().
1989 	 */
1990 	if (zc->zc_cookie == 0) {
1991 		uint64_t cookie = 0;
1992 		int len = sizeof (zc->zc_name) - (p - zc->zc_name);
1993 
1994 		while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
1995 			if (!dataset_name_hidden(zc->zc_name))
1996 				(void) dmu_objset_prefetch(zc->zc_name, NULL);
1997 		}
1998 	}
1999 
2000 	do {
2001 		error = dmu_dir_list_next(os,
2002 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2003 		    NULL, &zc->zc_cookie);
2004 		if (error == ENOENT)
2005 			error = ESRCH;
2006 	} while (error == 0 && dataset_name_hidden(zc->zc_name));
2007 	dmu_objset_rele(os, FTAG);
2008 
2009 	/*
2010 	 * If it's an internal dataset (ie. with a '$' in its name),
2011 	 * don't try to get stats for it, otherwise we'll return ENOENT.
2012 	 */
2013 	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2014 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2015 		if (error == ENOENT) {
2016 			/* We lost a race with destroy, get the next one. */
2017 			zc->zc_name[orig_len] = '\0';
2018 			goto top;
2019 		}
2020 	}
2021 	return (error);
2022 }
2023 
2024 /*
2025  * inputs:
2026  * zc_name		name of filesystem
2027  * zc_cookie		zap cursor
2028  * zc_nvlist_dst_size	size of buffer for property nvlist
2029  *
2030  * outputs:
2031  * zc_name		name of next snapshot
2032  * zc_objset_stats	stats
2033  * zc_nvlist_dst	property nvlist
2034  * zc_nvlist_dst_size	size of property nvlist
2035  */
2036 static int
2037 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2038 {
2039 	objset_t *os;
2040 	int error;
2041 
2042 top:
2043 	if (zc->zc_cookie == 0)
2044 		(void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
2045 		    NULL, DS_FIND_SNAPSHOTS);
2046 
2047 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2048 	if (error)
2049 		return (error == ENOENT ? ESRCH : error);
2050 
2051 	/*
2052 	 * A dataset name of maximum length cannot have any snapshots,
2053 	 * so exit immediately.
2054 	 */
2055 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
2056 		dmu_objset_rele(os, FTAG);
2057 		return (ESRCH);
2058 	}
2059 
2060 	error = dmu_snapshot_list_next(os,
2061 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
2062 	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2063 	    NULL);
2064 
2065 	if (error == 0) {
2066 		dsl_dataset_t *ds;
2067 		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2068 
2069 		/*
2070 		 * Since we probably don't have a hold on this snapshot,
2071 		 * it's possible that the objsetid could have been destroyed
2072 		 * and reused for a new objset. It's OK if this happens during
2073 		 * a zfs send operation, since the new createtxg will be
2074 		 * beyond the range we're interested in.
2075 		 */
2076 		rw_enter(&dp->dp_config_rwlock, RW_READER);
2077 		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2078 		rw_exit(&dp->dp_config_rwlock);
2079 		if (error) {
2080 			if (error == ENOENT) {
2081 				/* Racing with destroy, get the next one. */
2082 				*strchr(zc->zc_name, '@') = '\0';
2083 				dmu_objset_rele(os, FTAG);
2084 				goto top;
2085 			}
2086 		} else {
2087 			objset_t *ossnap;
2088 
2089 			error = dmu_objset_from_ds(ds, &ossnap);
2090 			if (error == 0)
2091 				error = zfs_ioc_objset_stats_impl(zc, ossnap);
2092 			dsl_dataset_rele(ds, FTAG);
2093 		}
2094 	} else if (error == ENOENT) {
2095 		error = ESRCH;
2096 	}
2097 
2098 	dmu_objset_rele(os, FTAG);
2099 	/* if we failed, undo the @ that we tacked on to zc_name */
2100 	if (error)
2101 		*strchr(zc->zc_name, '@') = '\0';
2102 	return (error);
2103 }
2104 
2105 static int
2106 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2107 {
2108 	const char *propname = nvpair_name(pair);
2109 	uint64_t *valary;
2110 	unsigned int vallen;
2111 	const char *domain;
2112 	char *dash;
2113 	zfs_userquota_prop_t type;
2114 	uint64_t rid;
2115 	uint64_t quota;
2116 	zfsvfs_t *zfsvfs;
2117 	int err;
2118 
2119 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2120 		nvlist_t *attrs;
2121 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2122 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2123 		    &pair) != 0)
2124 			return (EINVAL);
2125 	}
2126 
2127 	/*
2128 	 * A correctly constructed propname is encoded as
2129 	 * userquota@<rid>-<domain>.
2130 	 */
2131 	if ((dash = strchr(propname, '-')) == NULL ||
2132 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2133 	    vallen != 3)
2134 		return (EINVAL);
2135 
2136 	domain = dash + 1;
2137 	type = valary[0];
2138 	rid = valary[1];
2139 	quota = valary[2];
2140 
2141 	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2142 	if (err == 0) {
2143 		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2144 		zfsvfs_rele(zfsvfs, FTAG);
2145 	}
2146 
2147 	return (err);
2148 }
2149 
2150 /*
2151  * If the named property is one that has a special function to set its value,
2152  * return 0 on success and a positive error code on failure; otherwise if it is
2153  * not one of the special properties handled by this function, return -1.
2154  *
2155  * XXX: It would be better for callers of the property interface if we handled
2156  * these special cases in dsl_prop.c (in the dsl layer).
2157  */
2158 static int
2159 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2160     nvpair_t *pair)
2161 {
2162 	const char *propname = nvpair_name(pair);
2163 	zfs_prop_t prop = zfs_name_to_prop(propname);
2164 	uint64_t intval;
2165 	int err;
2166 
2167 	if (prop == ZPROP_INVAL) {
2168 		if (zfs_prop_userquota(propname))
2169 			return (zfs_prop_set_userquota(dsname, pair));
2170 		return (-1);
2171 	}
2172 
2173 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2174 		nvlist_t *attrs;
2175 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2176 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2177 		    &pair) == 0);
2178 	}
2179 
2180 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2181 		return (-1);
2182 
2183 	VERIFY(0 == nvpair_value_uint64(pair, &intval));
2184 
2185 	switch (prop) {
2186 	case ZFS_PROP_QUOTA:
2187 		err = dsl_dir_set_quota(dsname, source, intval);
2188 		break;
2189 	case ZFS_PROP_REFQUOTA:
2190 		err = dsl_dataset_set_quota(dsname, source, intval);
2191 		break;
2192 	case ZFS_PROP_RESERVATION:
2193 		err = dsl_dir_set_reservation(dsname, source, intval);
2194 		break;
2195 	case ZFS_PROP_REFRESERVATION:
2196 		err = dsl_dataset_set_reservation(dsname, source, intval);
2197 		break;
2198 	case ZFS_PROP_VOLSIZE:
2199 		err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip),
2200 		    intval);
2201 		break;
2202 	case ZFS_PROP_VERSION:
2203 	{
2204 		zfsvfs_t *zfsvfs;
2205 
2206 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2207 			break;
2208 
2209 		err = zfs_set_version(zfsvfs, intval);
2210 		zfsvfs_rele(zfsvfs, FTAG);
2211 
2212 		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2213 			zfs_cmd_t *zc;
2214 
2215 			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2216 			(void) strcpy(zc->zc_name, dsname);
2217 			(void) zfs_ioc_userspace_upgrade(zc);
2218 			kmem_free(zc, sizeof (zfs_cmd_t));
2219 		}
2220 		break;
2221 	}
2222 
2223 	default:
2224 		err = -1;
2225 	}
2226 
2227 	return (err);
2228 }
2229 
2230 /*
2231  * This function is best effort. If it fails to set any of the given properties,
2232  * it continues to set as many as it can and returns the first error
2233  * encountered. If the caller provides a non-NULL errlist, it also gives the
2234  * complete list of names of all the properties it failed to set along with the
2235  * corresponding error numbers. The caller is responsible for freeing the
2236  * returned errlist.
2237  *
2238  * If every property is set successfully, zero is returned and the list pointed
2239  * at by errlist is NULL.
2240  */
2241 int
2242 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2243     nvlist_t **errlist)
2244 {
2245 	nvpair_t *pair;
2246 	nvpair_t *propval;
2247 	int rv = 0;
2248 	uint64_t intval;
2249 	char *strval;
2250 	nvlist_t *genericnvl;
2251 	nvlist_t *errors;
2252 	nvlist_t *retrynvl;
2253 
2254 	VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2255 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2256 	VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2257 
2258 retry:
2259 	pair = NULL;
2260 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2261 		const char *propname = nvpair_name(pair);
2262 		zfs_prop_t prop = zfs_name_to_prop(propname);
2263 		int err = 0;
2264 
2265 		/* decode the property value */
2266 		propval = pair;
2267 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2268 			nvlist_t *attrs;
2269 			VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2270 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2271 			    &propval) != 0)
2272 				err = EINVAL;
2273 		}
2274 
2275 		/* Validate value type */
2276 		if (err == 0 && prop == ZPROP_INVAL) {
2277 			if (zfs_prop_user(propname)) {
2278 				if (nvpair_type(propval) != DATA_TYPE_STRING)
2279 					err = EINVAL;
2280 			} else if (zfs_prop_userquota(propname)) {
2281 				if (nvpair_type(propval) !=
2282 				    DATA_TYPE_UINT64_ARRAY)
2283 					err = EINVAL;
2284 			} else {
2285 				err = EINVAL;
2286 			}
2287 		} else if (err == 0) {
2288 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2289 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2290 					err = EINVAL;
2291 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2292 				const char *unused;
2293 
2294 				VERIFY(nvpair_value_uint64(propval,
2295 				    &intval) == 0);
2296 
2297 				switch (zfs_prop_get_type(prop)) {
2298 				case PROP_TYPE_NUMBER:
2299 					break;
2300 				case PROP_TYPE_STRING:
2301 					err = EINVAL;
2302 					break;
2303 				case PROP_TYPE_INDEX:
2304 					if (zfs_prop_index_to_string(prop,
2305 					    intval, &unused) != 0)
2306 						err = EINVAL;
2307 					break;
2308 				default:
2309 					cmn_err(CE_PANIC,
2310 					    "unknown property type");
2311 				}
2312 			} else {
2313 				err = EINVAL;
2314 			}
2315 		}
2316 
2317 		/* Validate permissions */
2318 		if (err == 0)
2319 			err = zfs_check_settable(dsname, pair, CRED());
2320 
2321 		if (err == 0) {
2322 			err = zfs_prop_set_special(dsname, source, pair);
2323 			if (err == -1) {
2324 				/*
2325 				 * For better performance we build up a list of
2326 				 * properties to set in a single transaction.
2327 				 */
2328 				err = nvlist_add_nvpair(genericnvl, pair);
2329 			} else if (err != 0 && nvl != retrynvl) {
2330 				/*
2331 				 * This may be a spurious error caused by
2332 				 * receiving quota and reservation out of order.
2333 				 * Try again in a second pass.
2334 				 */
2335 				err = nvlist_add_nvpair(retrynvl, pair);
2336 			}
2337 		}
2338 
2339 		if (err != 0)
2340 			VERIFY(nvlist_add_int32(errors, propname, err) == 0);
2341 	}
2342 
2343 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2344 		nvl = retrynvl;
2345 		goto retry;
2346 	}
2347 
2348 	if (!nvlist_empty(genericnvl) &&
2349 	    dsl_props_set(dsname, source, genericnvl) != 0) {
2350 		/*
2351 		 * If this fails, we still want to set as many properties as we
2352 		 * can, so try setting them individually.
2353 		 */
2354 		pair = NULL;
2355 		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2356 			const char *propname = nvpair_name(pair);
2357 			int err = 0;
2358 
2359 			propval = pair;
2360 			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2361 				nvlist_t *attrs;
2362 				VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2363 				VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2364 				    &propval) == 0);
2365 			}
2366 
2367 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2368 				VERIFY(nvpair_value_string(propval,
2369 				    &strval) == 0);
2370 				err = dsl_prop_set(dsname, propname, source, 1,
2371 				    strlen(strval) + 1, strval);
2372 			} else {
2373 				VERIFY(nvpair_value_uint64(propval,
2374 				    &intval) == 0);
2375 				err = dsl_prop_set(dsname, propname, source, 8,
2376 				    1, &intval);
2377 			}
2378 
2379 			if (err != 0) {
2380 				VERIFY(nvlist_add_int32(errors, propname,
2381 				    err) == 0);
2382 			}
2383 		}
2384 	}
2385 	nvlist_free(genericnvl);
2386 	nvlist_free(retrynvl);
2387 
2388 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
2389 		nvlist_free(errors);
2390 		errors = NULL;
2391 	} else {
2392 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
2393 	}
2394 
2395 	if (errlist == NULL)
2396 		nvlist_free(errors);
2397 	else
2398 		*errlist = errors;
2399 
2400 	return (rv);
2401 }
2402 
2403 /*
2404  * Check that all the properties are valid user properties.
2405  */
2406 static int
2407 zfs_check_userprops(char *fsname, nvlist_t *nvl)
2408 {
2409 	nvpair_t *pair = NULL;
2410 	int error = 0;
2411 
2412 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2413 		const char *propname = nvpair_name(pair);
2414 		char *valstr;
2415 
2416 		if (!zfs_prop_user(propname) ||
2417 		    nvpair_type(pair) != DATA_TYPE_STRING)
2418 			return (EINVAL);
2419 
2420 		if (error = zfs_secpolicy_write_perms(fsname,
2421 		    ZFS_DELEG_PERM_USERPROP, CRED()))
2422 			return (error);
2423 
2424 		if (strlen(propname) >= ZAP_MAXNAMELEN)
2425 			return (ENAMETOOLONG);
2426 
2427 		VERIFY(nvpair_value_string(pair, &valstr) == 0);
2428 		if (strlen(valstr) >= ZAP_MAXVALUELEN)
2429 			return (E2BIG);
2430 	}
2431 	return (0);
2432 }
2433 
2434 static void
2435 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2436 {
2437 	nvpair_t *pair;
2438 
2439 	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2440 
2441 	pair = NULL;
2442 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2443 		if (nvlist_exists(skipped, nvpair_name(pair)))
2444 			continue;
2445 
2446 		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2447 	}
2448 }
2449 
2450 static int
2451 clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
2452     nvlist_t *skipped)
2453 {
2454 	int err = 0;
2455 	nvlist_t *cleared_props = NULL;
2456 	props_skip(props, skipped, &cleared_props);
2457 	if (!nvlist_empty(cleared_props)) {
2458 		/*
2459 		 * Acts on local properties until the dataset has received
2460 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2461 		 */
2462 		zprop_source_t flags = (ZPROP_SRC_NONE |
2463 		    (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0));
2464 		err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL);
2465 	}
2466 	nvlist_free(cleared_props);
2467 	return (err);
2468 }
2469 
2470 /*
2471  * inputs:
2472  * zc_name		name of filesystem
2473  * zc_value		name of property to set
2474  * zc_nvlist_src{_size}	nvlist of properties to apply
2475  * zc_cookie		received properties flag
2476  *
2477  * outputs:
2478  * zc_nvlist_dst{_size} error for each unapplied received property
2479  */
2480 static int
2481 zfs_ioc_set_prop(zfs_cmd_t *zc)
2482 {
2483 	nvlist_t *nvl;
2484 	boolean_t received = zc->zc_cookie;
2485 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2486 	    ZPROP_SRC_LOCAL);
2487 	nvlist_t *errors = NULL;
2488 	int error;
2489 
2490 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2491 	    zc->zc_iflags, &nvl)) != 0)
2492 		return (error);
2493 
2494 	if (received) {
2495 		nvlist_t *origprops;
2496 		objset_t *os;
2497 
2498 		if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
2499 			if (dsl_prop_get_received(os, &origprops) == 0) {
2500 				(void) clear_received_props(os,
2501 				    zc->zc_name, origprops, nvl);
2502 				nvlist_free(origprops);
2503 			}
2504 
2505 			dsl_prop_set_hasrecvd(os);
2506 			dmu_objset_rele(os, FTAG);
2507 		}
2508 	}
2509 
2510 	error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors);
2511 
2512 	if (zc->zc_nvlist_dst != NULL && errors != NULL) {
2513 		(void) put_nvlist(zc, errors);
2514 	}
2515 
2516 	nvlist_free(errors);
2517 	nvlist_free(nvl);
2518 	return (error);
2519 }
2520 
2521 /*
2522  * inputs:
2523  * zc_name		name of filesystem
2524  * zc_value		name of property to inherit
2525  * zc_cookie		revert to received value if TRUE
2526  *
2527  * outputs:		none
2528  */
2529 static int
2530 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2531 {
2532 	const char *propname = zc->zc_value;
2533 	zfs_prop_t prop = zfs_name_to_prop(propname);
2534 	boolean_t received = zc->zc_cookie;
2535 	zprop_source_t source = (received
2536 	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2537 	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2538 
2539 	if (received) {
2540 		nvlist_t *dummy;
2541 		nvpair_t *pair;
2542 		zprop_type_t type;
2543 		int err;
2544 
2545 		/*
2546 		 * zfs_prop_set_special() expects properties in the form of an
2547 		 * nvpair with type info.
2548 		 */
2549 		if (prop == ZPROP_INVAL) {
2550 			if (!zfs_prop_user(propname))
2551 				return (EINVAL);
2552 
2553 			type = PROP_TYPE_STRING;
2554 		} else if (prop == ZFS_PROP_VOLSIZE ||
2555 		    prop == ZFS_PROP_VERSION) {
2556 			return (EINVAL);
2557 		} else {
2558 			type = zfs_prop_get_type(prop);
2559 		}
2560 
2561 		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2562 
2563 		switch (type) {
2564 		case PROP_TYPE_STRING:
2565 			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2566 			break;
2567 		case PROP_TYPE_NUMBER:
2568 		case PROP_TYPE_INDEX:
2569 			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2570 			break;
2571 		default:
2572 			nvlist_free(dummy);
2573 			return (EINVAL);
2574 		}
2575 
2576 		pair = nvlist_next_nvpair(dummy, NULL);
2577 		err = zfs_prop_set_special(zc->zc_name, source, pair);
2578 		nvlist_free(dummy);
2579 		if (err != -1)
2580 			return (err); /* special property already handled */
2581 	} else {
2582 		/*
2583 		 * Only check this in the non-received case. We want to allow
2584 		 * 'inherit -S' to revert non-inheritable properties like quota
2585 		 * and reservation to the received or default values even though
2586 		 * they are not considered inheritable.
2587 		 */
2588 		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2589 			return (EINVAL);
2590 	}
2591 
2592 	/* the property name has been validated by zfs_secpolicy_inherit() */
2593 	return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
2594 }
2595 
2596 static int
2597 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2598 {
2599 	nvlist_t *props;
2600 	spa_t *spa;
2601 	int error;
2602 	nvpair_t *pair;
2603 
2604 	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2605 	    zc->zc_iflags, &props))
2606 		return (error);
2607 
2608 	/*
2609 	 * If the only property is the configfile, then just do a spa_lookup()
2610 	 * to handle the faulted case.
2611 	 */
2612 	pair = nvlist_next_nvpair(props, NULL);
2613 	if (pair != NULL && strcmp(nvpair_name(pair),
2614 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2615 	    nvlist_next_nvpair(props, pair) == NULL) {
2616 		mutex_enter(&spa_namespace_lock);
2617 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2618 			spa_configfile_set(spa, props, B_FALSE);
2619 			spa_config_sync(spa, B_FALSE, B_TRUE);
2620 		}
2621 		mutex_exit(&spa_namespace_lock);
2622 		if (spa != NULL) {
2623 			nvlist_free(props);
2624 			return (0);
2625 		}
2626 	}
2627 
2628 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2629 		nvlist_free(props);
2630 		return (error);
2631 	}
2632 
2633 	error = spa_prop_set(spa, props);
2634 
2635 	nvlist_free(props);
2636 	spa_close(spa, FTAG);
2637 
2638 	return (error);
2639 }
2640 
2641 static int
2642 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2643 {
2644 	spa_t *spa;
2645 	int error;
2646 	nvlist_t *nvp = NULL;
2647 
2648 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2649 		/*
2650 		 * If the pool is faulted, there may be properties we can still
2651 		 * get (such as altroot and cachefile), so attempt to get them
2652 		 * anyway.
2653 		 */
2654 		mutex_enter(&spa_namespace_lock);
2655 		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2656 			error = spa_prop_get(spa, &nvp);
2657 		mutex_exit(&spa_namespace_lock);
2658 	} else {
2659 		error = spa_prop_get(spa, &nvp);
2660 		spa_close(spa, FTAG);
2661 	}
2662 
2663 	if (error == 0 && zc->zc_nvlist_dst != NULL)
2664 		error = put_nvlist(zc, nvp);
2665 	else
2666 		error = EFAULT;
2667 
2668 	nvlist_free(nvp);
2669 	return (error);
2670 }
2671 
2672 /*
2673  * inputs:
2674  * zc_name		name of filesystem
2675  * zc_nvlist_src{_size}	nvlist of delegated permissions
2676  * zc_perm_action	allow/unallow flag
2677  *
2678  * outputs:		none
2679  */
2680 static int
2681 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2682 {
2683 	int error;
2684 	nvlist_t *fsaclnv = NULL;
2685 
2686 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2687 	    zc->zc_iflags, &fsaclnv)) != 0)
2688 		return (error);
2689 
2690 	/*
2691 	 * Verify nvlist is constructed correctly
2692 	 */
2693 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2694 		nvlist_free(fsaclnv);
2695 		return (EINVAL);
2696 	}
2697 
2698 	/*
2699 	 * If we don't have PRIV_SYS_MOUNT, then validate
2700 	 * that user is allowed to hand out each permission in
2701 	 * the nvlist(s)
2702 	 */
2703 
2704 	error = secpolicy_zfs(CRED());
2705 	if (error) {
2706 		if (zc->zc_perm_action == B_FALSE) {
2707 			error = dsl_deleg_can_allow(zc->zc_name,
2708 			    fsaclnv, CRED());
2709 		} else {
2710 			error = dsl_deleg_can_unallow(zc->zc_name,
2711 			    fsaclnv, CRED());
2712 		}
2713 	}
2714 
2715 	if (error == 0)
2716 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2717 
2718 	nvlist_free(fsaclnv);
2719 	return (error);
2720 }
2721 
2722 /*
2723  * inputs:
2724  * zc_name		name of filesystem
2725  *
2726  * outputs:
2727  * zc_nvlist_src{_size}	nvlist of delegated permissions
2728  */
2729 static int
2730 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2731 {
2732 	nvlist_t *nvp;
2733 	int error;
2734 
2735 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2736 		error = put_nvlist(zc, nvp);
2737 		nvlist_free(nvp);
2738 	}
2739 
2740 	return (error);
2741 }
2742 
2743 /*
2744  * Search the vfs list for a specified resource.  Returns a pointer to it
2745  * or NULL if no suitable entry is found. The caller of this routine
2746  * is responsible for releasing the returned vfs pointer.
2747  */
2748 static vfs_t *
2749 zfs_get_vfs(const char *resource)
2750 {
2751 	struct vfs *vfsp;
2752 	struct vfs *vfs_found = NULL;
2753 
2754 	vfs_list_read_lock();
2755 	vfsp = rootvfs;
2756 	do {
2757 		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
2758 			VFS_HOLD(vfsp);
2759 			vfs_found = vfsp;
2760 			break;
2761 		}
2762 		vfsp = vfsp->vfs_next;
2763 	} while (vfsp != rootvfs);
2764 	vfs_list_unlock();
2765 	return (vfs_found);
2766 }
2767 
2768 /* ARGSUSED */
2769 static void
2770 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2771 {
2772 	zfs_creat_t *zct = arg;
2773 
2774 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
2775 }
2776 
2777 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
2778 
2779 /*
2780  * inputs:
2781  * createprops		list of properties requested by creator
2782  * default_zplver	zpl version to use if unspecified in createprops
2783  * fuids_ok		fuids allowed in this version of the spa?
2784  * os			parent objset pointer (NULL if root fs)
2785  *
2786  * outputs:
2787  * zplprops	values for the zplprops we attach to the master node object
2788  * is_ci	true if requested file system will be purely case-insensitive
2789  *
2790  * Determine the settings for utf8only, normalization and
2791  * casesensitivity.  Specific values may have been requested by the
2792  * creator and/or we can inherit values from the parent dataset.  If
2793  * the file system is of too early a vintage, a creator can not
2794  * request settings for these properties, even if the requested
2795  * setting is the default value.  We don't actually want to create dsl
2796  * properties for these, so remove them from the source nvlist after
2797  * processing.
2798  */
2799 static int
2800 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
2801     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
2802     nvlist_t *zplprops, boolean_t *is_ci)
2803 {
2804 	uint64_t sense = ZFS_PROP_UNDEFINED;
2805 	uint64_t norm = ZFS_PROP_UNDEFINED;
2806 	uint64_t u8 = ZFS_PROP_UNDEFINED;
2807 
2808 	ASSERT(zplprops != NULL);
2809 
2810 	/*
2811 	 * Pull out creator prop choices, if any.
2812 	 */
2813 	if (createprops) {
2814 		(void) nvlist_lookup_uint64(createprops,
2815 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
2816 		(void) nvlist_lookup_uint64(createprops,
2817 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
2818 		(void) nvlist_remove_all(createprops,
2819 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
2820 		(void) nvlist_lookup_uint64(createprops,
2821 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
2822 		(void) nvlist_remove_all(createprops,
2823 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
2824 		(void) nvlist_lookup_uint64(createprops,
2825 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
2826 		(void) nvlist_remove_all(createprops,
2827 		    zfs_prop_to_name(ZFS_PROP_CASE));
2828 	}
2829 
2830 	/*
2831 	 * If the zpl version requested is whacky or the file system
2832 	 * or pool is version is too "young" to support normalization
2833 	 * and the creator tried to set a value for one of the props,
2834 	 * error out.
2835 	 */
2836 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
2837 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
2838 	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
2839 	    (zplver < ZPL_VERSION_NORMALIZATION &&
2840 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
2841 	    sense != ZFS_PROP_UNDEFINED)))
2842 		return (ENOTSUP);
2843 
2844 	/*
2845 	 * Put the version in the zplprops
2846 	 */
2847 	VERIFY(nvlist_add_uint64(zplprops,
2848 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
2849 
2850 	if (norm == ZFS_PROP_UNDEFINED)
2851 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
2852 	VERIFY(nvlist_add_uint64(zplprops,
2853 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
2854 
2855 	/*
2856 	 * If we're normalizing, names must always be valid UTF-8 strings.
2857 	 */
2858 	if (norm)
2859 		u8 = 1;
2860 	if (u8 == ZFS_PROP_UNDEFINED)
2861 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
2862 	VERIFY(nvlist_add_uint64(zplprops,
2863 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
2864 
2865 	if (sense == ZFS_PROP_UNDEFINED)
2866 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
2867 	VERIFY(nvlist_add_uint64(zplprops,
2868 	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
2869 
2870 	if (is_ci)
2871 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
2872 
2873 	return (0);
2874 }
2875 
2876 static int
2877 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
2878     nvlist_t *zplprops, boolean_t *is_ci)
2879 {
2880 	boolean_t fuids_ok, sa_ok;
2881 	uint64_t zplver = ZPL_VERSION;
2882 	objset_t *os = NULL;
2883 	char parentname[MAXNAMELEN];
2884 	char *cp;
2885 	spa_t *spa;
2886 	uint64_t spa_vers;
2887 	int error;
2888 
2889 	(void) strlcpy(parentname, dataset, sizeof (parentname));
2890 	cp = strrchr(parentname, '/');
2891 	ASSERT(cp != NULL);
2892 	cp[0] = '\0';
2893 
2894 	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
2895 		return (error);
2896 
2897 	spa_vers = spa_version(spa);
2898 	spa_close(spa, FTAG);
2899 
2900 	zplver = zfs_zpl_version_map(spa_vers);
2901 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
2902 	sa_ok = (zplver >= ZPL_VERSION_SA);
2903 
2904 	/*
2905 	 * Open parent object set so we can inherit zplprop values.
2906 	 */
2907 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
2908 		return (error);
2909 
2910 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
2911 	    zplprops, is_ci);
2912 	dmu_objset_rele(os, FTAG);
2913 	return (error);
2914 }
2915 
2916 static int
2917 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
2918     nvlist_t *zplprops, boolean_t *is_ci)
2919 {
2920 	boolean_t fuids_ok;
2921 	boolean_t sa_ok;
2922 	uint64_t zplver = ZPL_VERSION;
2923 	int error;
2924 
2925 	zplver = zfs_zpl_version_map(spa_vers);
2926 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
2927 	sa_ok = (zplver >= ZPL_VERSION_SA);
2928 
2929 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
2930 	    createprops, zplprops, is_ci);
2931 	return (error);
2932 }
2933 
2934 /*
2935  * inputs:
2936  * zc_objset_type	type of objset to create (fs vs zvol)
2937  * zc_name		name of new objset
2938  * zc_value		name of snapshot to clone from (may be empty)
2939  * zc_nvlist_src{_size}	nvlist of properties to apply
2940  *
2941  * outputs: none
2942  */
2943 static int
2944 zfs_ioc_create(zfs_cmd_t *zc)
2945 {
2946 	objset_t *clone;
2947 	int error = 0;
2948 	zfs_creat_t zct;
2949 	nvlist_t *nvprops = NULL;
2950 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
2951 	dmu_objset_type_t type = zc->zc_objset_type;
2952 
2953 	switch (type) {
2954 
2955 	case DMU_OST_ZFS:
2956 		cbfunc = zfs_create_cb;
2957 		break;
2958 
2959 	case DMU_OST_ZVOL:
2960 		cbfunc = zvol_create_cb;
2961 		break;
2962 
2963 	default:
2964 		cbfunc = NULL;
2965 		break;
2966 	}
2967 	if (strchr(zc->zc_name, '@') ||
2968 	    strchr(zc->zc_name, '%'))
2969 		return (EINVAL);
2970 
2971 	if (zc->zc_nvlist_src != NULL &&
2972 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2973 	    zc->zc_iflags, &nvprops)) != 0)
2974 		return (error);
2975 
2976 	zct.zct_zplprops = NULL;
2977 	zct.zct_props = nvprops;
2978 
2979 	if (zc->zc_value[0] != '\0') {
2980 		/*
2981 		 * We're creating a clone of an existing snapshot.
2982 		 */
2983 		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2984 		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
2985 			nvlist_free(nvprops);
2986 			return (EINVAL);
2987 		}
2988 
2989 		error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
2990 		if (error) {
2991 			nvlist_free(nvprops);
2992 			return (error);
2993 		}
2994 
2995 		error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
2996 		dmu_objset_rele(clone, FTAG);
2997 		if (error) {
2998 			nvlist_free(nvprops);
2999 			return (error);
3000 		}
3001 	} else {
3002 		boolean_t is_insensitive = B_FALSE;
3003 
3004 		if (cbfunc == NULL) {
3005 			nvlist_free(nvprops);
3006 			return (EINVAL);
3007 		}
3008 
3009 		if (type == DMU_OST_ZVOL) {
3010 			uint64_t volsize, volblocksize;
3011 
3012 			if (nvprops == NULL ||
3013 			    nvlist_lookup_uint64(nvprops,
3014 			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
3015 			    &volsize) != 0) {
3016 				nvlist_free(nvprops);
3017 				return (EINVAL);
3018 			}
3019 
3020 			if ((error = nvlist_lookup_uint64(nvprops,
3021 			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3022 			    &volblocksize)) != 0 && error != ENOENT) {
3023 				nvlist_free(nvprops);
3024 				return (EINVAL);
3025 			}
3026 
3027 			if (error != 0)
3028 				volblocksize = zfs_prop_default_numeric(
3029 				    ZFS_PROP_VOLBLOCKSIZE);
3030 
3031 			if ((error = zvol_check_volblocksize(
3032 			    volblocksize)) != 0 ||
3033 			    (error = zvol_check_volsize(volsize,
3034 			    volblocksize)) != 0) {
3035 				nvlist_free(nvprops);
3036 				return (error);
3037 			}
3038 		} else if (type == DMU_OST_ZFS) {
3039 			int error;
3040 
3041 			/*
3042 			 * We have to have normalization and
3043 			 * case-folding flags correct when we do the
3044 			 * file system creation, so go figure them out
3045 			 * now.
3046 			 */
3047 			VERIFY(nvlist_alloc(&zct.zct_zplprops,
3048 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
3049 			error = zfs_fill_zplprops(zc->zc_name, nvprops,
3050 			    zct.zct_zplprops, &is_insensitive);
3051 			if (error != 0) {
3052 				nvlist_free(nvprops);
3053 				nvlist_free(zct.zct_zplprops);
3054 				return (error);
3055 			}
3056 		}
3057 		error = dmu_objset_create(zc->zc_name, type,
3058 		    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3059 		nvlist_free(zct.zct_zplprops);
3060 	}
3061 
3062 	/*
3063 	 * It would be nice to do this atomically.
3064 	 */
3065 	if (error == 0) {
3066 		error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
3067 		    nvprops, NULL);
3068 		if (error != 0)
3069 			(void) dmu_objset_destroy(zc->zc_name, B_FALSE);
3070 	}
3071 	nvlist_free(nvprops);
3072 	return (error);
3073 }
3074 
3075 /*
3076  * inputs:
3077  * zc_name	name of filesystem
3078  * zc_value	short name of snapshot
3079  * zc_cookie	recursive flag
3080  * zc_nvlist_src[_size] property list
3081  *
3082  * outputs:
3083  * zc_value	short snapname (i.e. part after the '@')
3084  */
3085 static int
3086 zfs_ioc_snapshot(zfs_cmd_t *zc)
3087 {
3088 	nvlist_t *nvprops = NULL;
3089 	int error;
3090 	boolean_t recursive = zc->zc_cookie;
3091 
3092 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
3093 		return (EINVAL);
3094 
3095 	if (zc->zc_nvlist_src != NULL &&
3096 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3097 	    zc->zc_iflags, &nvprops)) != 0)
3098 		return (error);
3099 
3100 	error = zfs_check_userprops(zc->zc_name, nvprops);
3101 	if (error)
3102 		goto out;
3103 
3104 	if (!nvlist_empty(nvprops) &&
3105 	    zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) {
3106 		error = ENOTSUP;
3107 		goto out;
3108 	}
3109 
3110 	error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL,
3111 	    nvprops, recursive, B_FALSE, -1);
3112 
3113 out:
3114 	nvlist_free(nvprops);
3115 	return (error);
3116 }
3117 
3118 int
3119 zfs_unmount_snap(const char *name, void *arg)
3120 {
3121 	vfs_t *vfsp = NULL;
3122 
3123 	if (arg) {
3124 		char *snapname = arg;
3125 		char *fullname = kmem_asprintf("%s@%s", name, snapname);
3126 		vfsp = zfs_get_vfs(fullname);
3127 		strfree(fullname);
3128 	} else if (strchr(name, '@')) {
3129 		vfsp = zfs_get_vfs(name);
3130 	}
3131 
3132 	if (vfsp) {
3133 		/*
3134 		 * Always force the unmount for snapshots.
3135 		 */
3136 		int flag = MS_FORCE;
3137 		int err;
3138 
3139 		if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
3140 			VFS_RELE(vfsp);
3141 			return (err);
3142 		}
3143 		VFS_RELE(vfsp);
3144 		if ((err = dounmount(vfsp, flag, kcred)) != 0)
3145 			return (err);
3146 	}
3147 	return (0);
3148 }
3149 
3150 /*
3151  * inputs:
3152  * zc_name		name of filesystem, snaps must be under it
3153  * zc_nvlist_src[_size]	full names of snapshots to destroy
3154  * zc_defer_destroy	mark for deferred destroy
3155  *
3156  * outputs:
3157  * zc_name		on failure, name of failed snapshot
3158  */
3159 static int
3160 zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc)
3161 {
3162 	int err, len;
3163 	nvlist_t *nvl;
3164 	nvpair_t *pair;
3165 
3166 	if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3167 	    zc->zc_iflags, &nvl)) != 0)
3168 		return (err);
3169 
3170 	len = strlen(zc->zc_name);
3171 	for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL;
3172 	    pair = nvlist_next_nvpair(nvl, pair)) {
3173 		const char *name = nvpair_name(pair);
3174 		/*
3175 		 * The snap name must be underneath the zc_name.  This ensures
3176 		 * that our permission checks were legitimate.
3177 		 */
3178 		if (strncmp(zc->zc_name, name, len) != 0 ||
3179 		    (name[len] != '@' && name[len] != '/')) {
3180 			nvlist_free(nvl);
3181 			return (EINVAL);
3182 		}
3183 
3184 		(void) zfs_unmount_snap(name, NULL);
3185 	}
3186 
3187 	err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy,
3188 	    zc->zc_name);
3189 	nvlist_free(nvl);
3190 	return (err);
3191 }
3192 
3193 /*
3194  * inputs:
3195  * zc_name		name of dataset to destroy
3196  * zc_objset_type	type of objset
3197  * zc_defer_destroy	mark for deferred destroy
3198  *
3199  * outputs:		none
3200  */
3201 static int
3202 zfs_ioc_destroy(zfs_cmd_t *zc)
3203 {
3204 	int err;
3205 	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
3206 		err = zfs_unmount_snap(zc->zc_name, NULL);
3207 		if (err)
3208 			return (err);
3209 	}
3210 
3211 	err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy);
3212 	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
3213 		(void) zvol_remove_minor(zc->zc_name);
3214 	return (err);
3215 }
3216 
3217 /*
3218  * inputs:
3219  * zc_name	name of dataset to rollback (to most recent snapshot)
3220  *
3221  * outputs:	none
3222  */
3223 static int
3224 zfs_ioc_rollback(zfs_cmd_t *zc)
3225 {
3226 	dsl_dataset_t *ds, *clone;
3227 	int error;
3228 	zfsvfs_t *zfsvfs;
3229 	char *clone_name;
3230 
3231 	error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
3232 	if (error)
3233 		return (error);
3234 
3235 	/* must not be a snapshot */
3236 	if (dsl_dataset_is_snapshot(ds)) {
3237 		dsl_dataset_rele(ds, FTAG);
3238 		return (EINVAL);
3239 	}
3240 
3241 	/* must have a most recent snapshot */
3242 	if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
3243 		dsl_dataset_rele(ds, FTAG);
3244 		return (EINVAL);
3245 	}
3246 
3247 	/*
3248 	 * Create clone of most recent snapshot.
3249 	 */
3250 	clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
3251 	error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
3252 	if (error)
3253 		goto out;
3254 
3255 	error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
3256 	if (error)
3257 		goto out;
3258 
3259 	/*
3260 	 * Do clone swap.
3261 	 */
3262 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
3263 		error = zfs_suspend_fs(zfsvfs);
3264 		if (error == 0) {
3265 			int resume_err;
3266 
3267 			if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3268 				error = dsl_dataset_clone_swap(clone, ds,
3269 				    B_TRUE);
3270 				dsl_dataset_disown(ds, FTAG);
3271 				ds = NULL;
3272 			} else {
3273 				error = EBUSY;
3274 			}
3275 			resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
3276 			error = error ? error : resume_err;
3277 		}
3278 		VFS_RELE(zfsvfs->z_vfs);
3279 	} else {
3280 		if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3281 			error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
3282 			dsl_dataset_disown(ds, FTAG);
3283 			ds = NULL;
3284 		} else {
3285 			error = EBUSY;
3286 		}
3287 	}
3288 
3289 	/*
3290 	 * Destroy clone (which also closes it).
3291 	 */
3292 	(void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
3293 
3294 out:
3295 	strfree(clone_name);
3296 	if (ds)
3297 		dsl_dataset_rele(ds, FTAG);
3298 	return (error);
3299 }
3300 
3301 /*
3302  * inputs:
3303  * zc_name	old name of dataset
3304  * zc_value	new name of dataset
3305  * zc_cookie	recursive flag (only valid for snapshots)
3306  *
3307  * outputs:	none
3308  */
3309 static int
3310 zfs_ioc_rename(zfs_cmd_t *zc)
3311 {
3312 	boolean_t recursive = zc->zc_cookie & 1;
3313 
3314 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3315 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3316 	    strchr(zc->zc_value, '%'))
3317 		return (EINVAL);
3318 
3319 	/*
3320 	 * Unmount snapshot unless we're doing a recursive rename,
3321 	 * in which case the dataset code figures out which snapshots
3322 	 * to unmount.
3323 	 */
3324 	if (!recursive && strchr(zc->zc_name, '@') != NULL &&
3325 	    zc->zc_objset_type == DMU_OST_ZFS) {
3326 		int err = zfs_unmount_snap(zc->zc_name, NULL);
3327 		if (err)
3328 			return (err);
3329 	}
3330 	if (zc->zc_objset_type == DMU_OST_ZVOL)
3331 		(void) zvol_remove_minor(zc->zc_name);
3332 	return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
3333 }
3334 
3335 static int
3336 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3337 {
3338 	const char *propname = nvpair_name(pair);
3339 	boolean_t issnap = (strchr(dsname, '@') != NULL);
3340 	zfs_prop_t prop = zfs_name_to_prop(propname);
3341 	uint64_t intval;
3342 	int err;
3343 
3344 	if (prop == ZPROP_INVAL) {
3345 		if (zfs_prop_user(propname)) {
3346 			if (err = zfs_secpolicy_write_perms(dsname,
3347 			    ZFS_DELEG_PERM_USERPROP, cr))
3348 				return (err);
3349 			return (0);
3350 		}
3351 
3352 		if (!issnap && zfs_prop_userquota(propname)) {
3353 			const char *perm = NULL;
3354 			const char *uq_prefix =
3355 			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3356 			const char *gq_prefix =
3357 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3358 
3359 			if (strncmp(propname, uq_prefix,
3360 			    strlen(uq_prefix)) == 0) {
3361 				perm = ZFS_DELEG_PERM_USERQUOTA;
3362 			} else if (strncmp(propname, gq_prefix,
3363 			    strlen(gq_prefix)) == 0) {
3364 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
3365 			} else {
3366 				/* USERUSED and GROUPUSED are read-only */
3367 				return (EINVAL);
3368 			}
3369 
3370 			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3371 				return (err);
3372 			return (0);
3373 		}
3374 
3375 		return (EINVAL);
3376 	}
3377 
3378 	if (issnap)
3379 		return (EINVAL);
3380 
3381 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3382 		/*
3383 		 * dsl_prop_get_all_impl() returns properties in this
3384 		 * format.
3385 		 */
3386 		nvlist_t *attrs;
3387 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3388 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3389 		    &pair) == 0);
3390 	}
3391 
3392 	/*
3393 	 * Check that this value is valid for this pool version
3394 	 */
3395 	switch (prop) {
3396 	case ZFS_PROP_COMPRESSION:
3397 		/*
3398 		 * If the user specified gzip compression, make sure
3399 		 * the SPA supports it. We ignore any errors here since
3400 		 * we'll catch them later.
3401 		 */
3402 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3403 		    nvpair_value_uint64(pair, &intval) == 0) {
3404 			if (intval >= ZIO_COMPRESS_GZIP_1 &&
3405 			    intval <= ZIO_COMPRESS_GZIP_9 &&
3406 			    zfs_earlier_version(dsname,
3407 			    SPA_VERSION_GZIP_COMPRESSION)) {
3408 				return (ENOTSUP);
3409 			}
3410 
3411 			if (intval == ZIO_COMPRESS_ZLE &&
3412 			    zfs_earlier_version(dsname,
3413 			    SPA_VERSION_ZLE_COMPRESSION))
3414 				return (ENOTSUP);
3415 
3416 			/*
3417 			 * If this is a bootable dataset then
3418 			 * verify that the compression algorithm
3419 			 * is supported for booting. We must return
3420 			 * something other than ENOTSUP since it
3421 			 * implies a downrev pool version.
3422 			 */
3423 			if (zfs_is_bootfs(dsname) &&
3424 			    !BOOTFS_COMPRESS_VALID(intval)) {
3425 				return (ERANGE);
3426 			}
3427 		}
3428 		break;
3429 
3430 	case ZFS_PROP_COPIES:
3431 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3432 			return (ENOTSUP);
3433 		break;
3434 
3435 	case ZFS_PROP_DEDUP:
3436 		if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3437 			return (ENOTSUP);
3438 		break;
3439 
3440 	case ZFS_PROP_SHARESMB:
3441 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3442 			return (ENOTSUP);
3443 		break;
3444 
3445 	case ZFS_PROP_ACLINHERIT:
3446 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3447 		    nvpair_value_uint64(pair, &intval) == 0) {
3448 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
3449 			    zfs_earlier_version(dsname,
3450 			    SPA_VERSION_PASSTHROUGH_X))
3451 				return (ENOTSUP);
3452 		}
3453 		break;
3454 	}
3455 
3456 	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3457 }
3458 
3459 /*
3460  * Removes properties from the given props list that fail permission checks
3461  * needed to clear them and to restore them in case of a receive error. For each
3462  * property, make sure we have both set and inherit permissions.
3463  *
3464  * Returns the first error encountered if any permission checks fail. If the
3465  * caller provides a non-NULL errlist, it also gives the complete list of names
3466  * of all the properties that failed a permission check along with the
3467  * corresponding error numbers. The caller is responsible for freeing the
3468  * returned errlist.
3469  *
3470  * If every property checks out successfully, zero is returned and the list
3471  * pointed at by errlist is NULL.
3472  */
3473 static int
3474 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
3475 {
3476 	zfs_cmd_t *zc;
3477 	nvpair_t *pair, *next_pair;
3478 	nvlist_t *errors;
3479 	int err, rv = 0;
3480 
3481 	if (props == NULL)
3482 		return (0);
3483 
3484 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3485 
3486 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
3487 	(void) strcpy(zc->zc_name, dataset);
3488 	pair = nvlist_next_nvpair(props, NULL);
3489 	while (pair != NULL) {
3490 		next_pair = nvlist_next_nvpair(props, pair);
3491 
3492 		(void) strcpy(zc->zc_value, nvpair_name(pair));
3493 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
3494 		    (err = zfs_secpolicy_inherit(zc, CRED())) != 0) {
3495 			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
3496 			VERIFY(nvlist_add_int32(errors,
3497 			    zc->zc_value, err) == 0);
3498 		}
3499 		pair = next_pair;
3500 	}
3501 	kmem_free(zc, sizeof (zfs_cmd_t));
3502 
3503 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
3504 		nvlist_free(errors);
3505 		errors = NULL;
3506 	} else {
3507 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
3508 	}
3509 
3510 	if (errlist == NULL)
3511 		nvlist_free(errors);
3512 	else
3513 		*errlist = errors;
3514 
3515 	return (rv);
3516 }
3517 
3518 static boolean_t
3519 propval_equals(nvpair_t *p1, nvpair_t *p2)
3520 {
3521 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
3522 		/* dsl_prop_get_all_impl() format */
3523 		nvlist_t *attrs;
3524 		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
3525 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3526 		    &p1) == 0);
3527 	}
3528 
3529 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
3530 		nvlist_t *attrs;
3531 		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
3532 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3533 		    &p2) == 0);
3534 	}
3535 
3536 	if (nvpair_type(p1) != nvpair_type(p2))
3537 		return (B_FALSE);
3538 
3539 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
3540 		char *valstr1, *valstr2;
3541 
3542 		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
3543 		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
3544 		return (strcmp(valstr1, valstr2) == 0);
3545 	} else {
3546 		uint64_t intval1, intval2;
3547 
3548 		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
3549 		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
3550 		return (intval1 == intval2);
3551 	}
3552 }
3553 
3554 /*
3555  * Remove properties from props if they are not going to change (as determined
3556  * by comparison with origprops). Remove them from origprops as well, since we
3557  * do not need to clear or restore properties that won't change.
3558  */
3559 static void
3560 props_reduce(nvlist_t *props, nvlist_t *origprops)
3561 {
3562 	nvpair_t *pair, *next_pair;
3563 
3564 	if (origprops == NULL)
3565 		return; /* all props need to be received */
3566 
3567 	pair = nvlist_next_nvpair(props, NULL);
3568 	while (pair != NULL) {
3569 		const char *propname = nvpair_name(pair);
3570 		nvpair_t *match;
3571 
3572 		next_pair = nvlist_next_nvpair(props, pair);
3573 
3574 		if ((nvlist_lookup_nvpair(origprops, propname,
3575 		    &match) != 0) || !propval_equals(pair, match))
3576 			goto next; /* need to set received value */
3577 
3578 		/* don't clear the existing received value */
3579 		(void) nvlist_remove_nvpair(origprops, match);
3580 		/* don't bother receiving the property */
3581 		(void) nvlist_remove_nvpair(props, pair);
3582 next:
3583 		pair = next_pair;
3584 	}
3585 }
3586 
3587 #ifdef	DEBUG
3588 static boolean_t zfs_ioc_recv_inject_err;
3589 #endif
3590 
3591 /*
3592  * inputs:
3593  * zc_name		name of containing filesystem
3594  * zc_nvlist_src{_size}	nvlist of properties to apply
3595  * zc_value		name of snapshot to create
3596  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
3597  * zc_cookie		file descriptor to recv from
3598  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
3599  * zc_guid		force flag
3600  * zc_cleanup_fd	cleanup-on-exit file descriptor
3601  * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
3602  *
3603  * outputs:
3604  * zc_cookie		number of bytes read
3605  * zc_nvlist_dst{_size} error for each unapplied received property
3606  * zc_obj		zprop_errflags_t
3607  * zc_action_handle	handle for this guid/ds mapping
3608  */
3609 static int
3610 zfs_ioc_recv(zfs_cmd_t *zc)
3611 {
3612 	file_t *fp;
3613 	objset_t *os;
3614 	dmu_recv_cookie_t drc;
3615 	boolean_t force = (boolean_t)zc->zc_guid;
3616 	int fd;
3617 	int error = 0;
3618 	int props_error = 0;
3619 	nvlist_t *errors;
3620 	offset_t off;
3621 	nvlist_t *props = NULL; /* sent properties */
3622 	nvlist_t *origprops = NULL; /* existing properties */
3623 	objset_t *origin = NULL;
3624 	char *tosnap;
3625 	char tofs[ZFS_MAXNAMELEN];
3626 	boolean_t first_recvd_props = B_FALSE;
3627 
3628 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3629 	    strchr(zc->zc_value, '@') == NULL ||
3630 	    strchr(zc->zc_value, '%'))
3631 		return (EINVAL);
3632 
3633 	(void) strcpy(tofs, zc->zc_value);
3634 	tosnap = strchr(tofs, '@');
3635 	*tosnap++ = '\0';
3636 
3637 	if (zc->zc_nvlist_src != NULL &&
3638 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3639 	    zc->zc_iflags, &props)) != 0)
3640 		return (error);
3641 
3642 	fd = zc->zc_cookie;
3643 	fp = getf(fd);
3644 	if (fp == NULL) {
3645 		nvlist_free(props);
3646 		return (EBADF);
3647 	}
3648 
3649 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3650 
3651 	if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
3652 		if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) &&
3653 		    !dsl_prop_get_hasrecvd(os)) {
3654 			first_recvd_props = B_TRUE;
3655 		}
3656 
3657 		/*
3658 		 * If new received properties are supplied, they are to
3659 		 * completely replace the existing received properties, so stash
3660 		 * away the existing ones.
3661 		 */
3662 		if (dsl_prop_get_received(os, &origprops) == 0) {
3663 			nvlist_t *errlist = NULL;
3664 			/*
3665 			 * Don't bother writing a property if its value won't
3666 			 * change (and avoid the unnecessary security checks).
3667 			 *
3668 			 * The first receive after SPA_VERSION_RECVD_PROPS is a
3669 			 * special case where we blow away all local properties
3670 			 * regardless.
3671 			 */
3672 			if (!first_recvd_props)
3673 				props_reduce(props, origprops);
3674 			if (zfs_check_clearable(tofs, origprops,
3675 			    &errlist) != 0)
3676 				(void) nvlist_merge(errors, errlist, 0);
3677 			nvlist_free(errlist);
3678 		}
3679 
3680 		dmu_objset_rele(os, FTAG);
3681 	}
3682 
3683 	if (zc->zc_string[0]) {
3684 		error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
3685 		if (error)
3686 			goto out;
3687 	}
3688 
3689 	error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds,
3690 	    &zc->zc_begin_record, force, origin, &drc);
3691 	if (origin)
3692 		dmu_objset_rele(origin, FTAG);
3693 	if (error)
3694 		goto out;
3695 
3696 	/*
3697 	 * Set properties before we receive the stream so that they are applied
3698 	 * to the new data. Note that we must call dmu_recv_stream() if
3699 	 * dmu_recv_begin() succeeds.
3700 	 */
3701 	if (props) {
3702 		nvlist_t *errlist;
3703 
3704 		if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {
3705 			if (drc.drc_newfs) {
3706 				if (spa_version(os->os_spa) >=
3707 				    SPA_VERSION_RECVD_PROPS)
3708 					first_recvd_props = B_TRUE;
3709 			} else if (origprops != NULL) {
3710 				if (clear_received_props(os, tofs, origprops,
3711 				    first_recvd_props ? NULL : props) != 0)
3712 					zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3713 			} else {
3714 				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3715 			}
3716 			dsl_prop_set_hasrecvd(os);
3717 		} else if (!drc.drc_newfs) {
3718 			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3719 		}
3720 
3721 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
3722 		    props, &errlist);
3723 		(void) nvlist_merge(errors, errlist, 0);
3724 		nvlist_free(errlist);
3725 	}
3726 
3727 	if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) {
3728 		/*
3729 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
3730 		 * size or supplied an invalid address.
3731 		 */
3732 		props_error = EINVAL;
3733 	}
3734 
3735 	off = fp->f_offset;
3736 	error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
3737 	    &zc->zc_action_handle);
3738 
3739 	if (error == 0) {
3740 		zfsvfs_t *zfsvfs = NULL;
3741 
3742 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
3743 			/* online recv */
3744 			int end_err;
3745 
3746 			error = zfs_suspend_fs(zfsvfs);
3747 			/*
3748 			 * If the suspend fails, then the recv_end will
3749 			 * likely also fail, and clean up after itself.
3750 			 */
3751 			end_err = dmu_recv_end(&drc);
3752 			if (error == 0)
3753 				error = zfs_resume_fs(zfsvfs, tofs);
3754 			error = error ? error : end_err;
3755 			VFS_RELE(zfsvfs->z_vfs);
3756 		} else {
3757 			error = dmu_recv_end(&drc);
3758 		}
3759 	}
3760 
3761 	zc->zc_cookie = off - fp->f_offset;
3762 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3763 		fp->f_offset = off;
3764 
3765 #ifdef	DEBUG
3766 	if (zfs_ioc_recv_inject_err) {
3767 		zfs_ioc_recv_inject_err = B_FALSE;
3768 		error = 1;
3769 	}
3770 #endif
3771 	/*
3772 	 * On error, restore the original props.
3773 	 */
3774 	if (error && props) {
3775 		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
3776 			if (clear_received_props(os, tofs, props, NULL) != 0) {
3777 				/*
3778 				 * We failed to clear the received properties.
3779 				 * Since we may have left a $recvd value on the
3780 				 * system, we can't clear the $hasrecvd flag.
3781 				 */
3782 				zc->zc_obj |= ZPROP_ERR_NORESTORE;
3783 			} else if (first_recvd_props) {
3784 				dsl_prop_unset_hasrecvd(os);
3785 			}
3786 			dmu_objset_rele(os, FTAG);
3787 		} else if (!drc.drc_newfs) {
3788 			/* We failed to clear the received properties. */
3789 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3790 		}
3791 
3792 		if (origprops == NULL && !drc.drc_newfs) {
3793 			/* We failed to stash the original properties. */
3794 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3795 		}
3796 
3797 		/*
3798 		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
3799 		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
3800 		 * explictly if we're restoring local properties cleared in the
3801 		 * first new-style receive.
3802 		 */
3803 		if (origprops != NULL &&
3804 		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
3805 		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
3806 		    origprops, NULL) != 0) {
3807 			/*
3808 			 * We stashed the original properties but failed to
3809 			 * restore them.
3810 			 */
3811 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3812 		}
3813 	}
3814 out:
3815 	nvlist_free(props);
3816 	nvlist_free(origprops);
3817 	nvlist_free(errors);
3818 	releasef(fd);
3819 
3820 	if (error == 0)
3821 		error = props_error;
3822 
3823 	return (error);
3824 }
3825 
3826 /*
3827  * inputs:
3828  * zc_name	name of snapshot to send
3829  * zc_cookie	file descriptor to send stream to
3830  * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
3831  * zc_sendobj	objsetid of snapshot to send
3832  * zc_fromobj	objsetid of incremental fromsnap (may be zero)
3833  * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
3834  *		output size in zc_objset_type.
3835  *
3836  * outputs: none
3837  */
3838 static int
3839 zfs_ioc_send(zfs_cmd_t *zc)
3840 {
3841 	objset_t *fromsnap = NULL;
3842 	objset_t *tosnap;
3843 	int error;
3844 	offset_t off;
3845 	dsl_dataset_t *ds;
3846 	dsl_dataset_t *dsfrom = NULL;
3847 	spa_t *spa;
3848 	dsl_pool_t *dp;
3849 	boolean_t estimate = (zc->zc_guid != 0);
3850 
3851 	error = spa_open(zc->zc_name, &spa, FTAG);
3852 	if (error)
3853 		return (error);
3854 
3855 	dp = spa_get_dsl(spa);
3856 	rw_enter(&dp->dp_config_rwlock, RW_READER);
3857 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
3858 	rw_exit(&dp->dp_config_rwlock);
3859 	if (error) {
3860 		spa_close(spa, FTAG);
3861 		return (error);
3862 	}
3863 
3864 	error = dmu_objset_from_ds(ds, &tosnap);
3865 	if (error) {
3866 		dsl_dataset_rele(ds, FTAG);
3867 		spa_close(spa, FTAG);
3868 		return (error);
3869 	}
3870 
3871 	if (zc->zc_fromobj != 0) {
3872 		rw_enter(&dp->dp_config_rwlock, RW_READER);
3873 		error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom);
3874 		rw_exit(&dp->dp_config_rwlock);
3875 		spa_close(spa, FTAG);
3876 		if (error) {
3877 			dsl_dataset_rele(ds, FTAG);
3878 			return (error);
3879 		}
3880 		error = dmu_objset_from_ds(dsfrom, &fromsnap);
3881 		if (error) {
3882 			dsl_dataset_rele(dsfrom, FTAG);
3883 			dsl_dataset_rele(ds, FTAG);
3884 			return (error);
3885 		}
3886 	} else {
3887 		spa_close(spa, FTAG);
3888 	}
3889 
3890 	if (estimate) {
3891 		error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj,
3892 		    &zc->zc_objset_type);
3893 	} else {
3894 		file_t *fp = getf(zc->zc_cookie);
3895 		if (fp == NULL) {
3896 			dsl_dataset_rele(ds, FTAG);
3897 			if (dsfrom)
3898 				dsl_dataset_rele(dsfrom, FTAG);
3899 			return (EBADF);
3900 		}
3901 
3902 		off = fp->f_offset;
3903 		error = dmu_send(tosnap, fromsnap, zc->zc_obj,
3904 		    zc->zc_cookie, fp->f_vnode, &off);
3905 
3906 		if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3907 			fp->f_offset = off;
3908 		releasef(zc->zc_cookie);
3909 	}
3910 	if (dsfrom)
3911 		dsl_dataset_rele(dsfrom, FTAG);
3912 	dsl_dataset_rele(ds, FTAG);
3913 	return (error);
3914 }
3915 
3916 /*
3917  * inputs:
3918  * zc_name	name of snapshot on which to report progress
3919  * zc_cookie	file descriptor of send stream
3920  *
3921  * outputs:
3922  * zc_cookie	number of bytes written in send stream thus far
3923  */
3924 static int
3925 zfs_ioc_send_progress(zfs_cmd_t *zc)
3926 {
3927 	dsl_dataset_t *ds;
3928 	dmu_sendarg_t *dsp = NULL;
3929 	int error;
3930 
3931 	if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0)
3932 		return (error);
3933 
3934 	mutex_enter(&ds->ds_sendstream_lock);
3935 
3936 	/*
3937 	 * Iterate over all the send streams currently active on this dataset.
3938 	 * If there's one which matches the specified file descriptor _and_ the
3939 	 * stream was started by the current process, return the progress of
3940 	 * that stream.
3941 	 */
3942 	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
3943 	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
3944 		if (dsp->dsa_outfd == zc->zc_cookie &&
3945 		    dsp->dsa_proc == curproc)
3946 			break;
3947 	}
3948 
3949 	if (dsp != NULL)
3950 		zc->zc_cookie = *(dsp->dsa_off);
3951 	else
3952 		error = ENOENT;
3953 
3954 	mutex_exit(&ds->ds_sendstream_lock);
3955 	dsl_dataset_rele(ds, FTAG);
3956 	return (error);
3957 }
3958 
3959 static int
3960 zfs_ioc_inject_fault(zfs_cmd_t *zc)
3961 {
3962 	int id, error;
3963 
3964 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
3965 	    &zc->zc_inject_record);
3966 
3967 	if (error == 0)
3968 		zc->zc_guid = (uint64_t)id;
3969 
3970 	return (error);
3971 }
3972 
3973 static int
3974 zfs_ioc_clear_fault(zfs_cmd_t *zc)
3975 {
3976 	return (zio_clear_fault((int)zc->zc_guid));
3977 }
3978 
3979 static int
3980 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
3981 {
3982 	int id = (int)zc->zc_guid;
3983 	int error;
3984 
3985 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
3986 	    &zc->zc_inject_record);
3987 
3988 	zc->zc_guid = id;
3989 
3990 	return (error);
3991 }
3992 
3993 static int
3994 zfs_ioc_error_log(zfs_cmd_t *zc)
3995 {
3996 	spa_t *spa;
3997 	int error;
3998 	size_t count = (size_t)zc->zc_nvlist_dst_size;
3999 
4000 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4001 		return (error);
4002 
4003 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
4004 	    &count);
4005 	if (error == 0)
4006 		zc->zc_nvlist_dst_size = count;
4007 	else
4008 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
4009 
4010 	spa_close(spa, FTAG);
4011 
4012 	return (error);
4013 }
4014 
4015 static int
4016 zfs_ioc_clear(zfs_cmd_t *zc)
4017 {
4018 	spa_t *spa;
4019 	vdev_t *vd;
4020 	int error;
4021 
4022 	/*
4023 	 * On zpool clear we also fix up missing slogs
4024 	 */
4025 	mutex_enter(&spa_namespace_lock);
4026 	spa = spa_lookup(zc->zc_name);
4027 	if (spa == NULL) {
4028 		mutex_exit(&spa_namespace_lock);
4029 		return (EIO);
4030 	}
4031 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4032 		/* we need to let spa_open/spa_load clear the chains */
4033 		spa_set_log_state(spa, SPA_LOG_CLEAR);
4034 	}
4035 	spa->spa_last_open_failed = 0;
4036 	mutex_exit(&spa_namespace_lock);
4037 
4038 	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4039 		error = spa_open(zc->zc_name, &spa, FTAG);
4040 	} else {
4041 		nvlist_t *policy;
4042 		nvlist_t *config = NULL;
4043 
4044 		if (zc->zc_nvlist_src == NULL)
4045 			return (EINVAL);
4046 
4047 		if ((error = get_nvlist(zc->zc_nvlist_src,
4048 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4049 			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4050 			    policy, &config);
4051 			if (config != NULL) {
4052 				int err;
4053 
4054 				if ((err = put_nvlist(zc, config)) != 0)
4055 					error = err;
4056 				nvlist_free(config);
4057 			}
4058 			nvlist_free(policy);
4059 		}
4060 	}
4061 
4062 	if (error)
4063 		return (error);
4064 
4065 	spa_vdev_state_enter(spa, SCL_NONE);
4066 
4067 	if (zc->zc_guid == 0) {
4068 		vd = NULL;
4069 	} else {
4070 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4071 		if (vd == NULL) {
4072 			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
4073 			spa_close(spa, FTAG);
4074 			return (ENODEV);
4075 		}
4076 	}
4077 
4078 	vdev_clear(spa, vd);
4079 
4080 	(void) spa_vdev_state_exit(spa, NULL, 0);
4081 
4082 	/*
4083 	 * Resume any suspended I/Os.
4084 	 */
4085 	if (zio_resume(spa) != 0)
4086 		error = EIO;
4087 
4088 	spa_close(spa, FTAG);
4089 
4090 	return (error);
4091 }
4092 
4093 static int
4094 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
4095 {
4096 	spa_t *spa;
4097 	int error;
4098 
4099 	error = spa_open(zc->zc_name, &spa, FTAG);
4100 	if (error)
4101 		return (error);
4102 
4103 	spa_vdev_state_enter(spa, SCL_NONE);
4104 	vdev_reopen(spa->spa_root_vdev);
4105 	(void) spa_vdev_state_exit(spa, NULL, 0);
4106 	spa_close(spa, FTAG);
4107 	return (0);
4108 }
4109 /*
4110  * inputs:
4111  * zc_name	name of filesystem
4112  * zc_value	name of origin snapshot
4113  *
4114  * outputs:
4115  * zc_string	name of conflicting snapshot, if there is one
4116  */
4117 static int
4118 zfs_ioc_promote(zfs_cmd_t *zc)
4119 {
4120 	char *cp;
4121 
4122 	/*
4123 	 * We don't need to unmount *all* the origin fs's snapshots, but
4124 	 * it's easier.
4125 	 */
4126 	cp = strchr(zc->zc_value, '@');
4127 	if (cp)
4128 		*cp = '\0';
4129 	(void) dmu_objset_find(zc->zc_value,
4130 	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
4131 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
4132 }
4133 
4134 /*
4135  * Retrieve a single {user|group}{used|quota}@... property.
4136  *
4137  * inputs:
4138  * zc_name	name of filesystem
4139  * zc_objset_type zfs_userquota_prop_t
4140  * zc_value	domain name (eg. "S-1-234-567-89")
4141  * zc_guid	RID/UID/GID
4142  *
4143  * outputs:
4144  * zc_cookie	property value
4145  */
4146 static int
4147 zfs_ioc_userspace_one(zfs_cmd_t *zc)
4148 {
4149 	zfsvfs_t *zfsvfs;
4150 	int error;
4151 
4152 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
4153 		return (EINVAL);
4154 
4155 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4156 	if (error)
4157 		return (error);
4158 
4159 	error = zfs_userspace_one(zfsvfs,
4160 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
4161 	zfsvfs_rele(zfsvfs, FTAG);
4162 
4163 	return (error);
4164 }
4165 
4166 /*
4167  * inputs:
4168  * zc_name		name of filesystem
4169  * zc_cookie		zap cursor
4170  * zc_objset_type	zfs_userquota_prop_t
4171  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
4172  *
4173  * outputs:
4174  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
4175  * zc_cookie	zap cursor
4176  */
4177 static int
4178 zfs_ioc_userspace_many(zfs_cmd_t *zc)
4179 {
4180 	zfsvfs_t *zfsvfs;
4181 	int bufsize = zc->zc_nvlist_dst_size;
4182 
4183 	if (bufsize <= 0)
4184 		return (ENOMEM);
4185 
4186 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4187 	if (error)
4188 		return (error);
4189 
4190 	void *buf = kmem_alloc(bufsize, KM_SLEEP);
4191 
4192 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
4193 	    buf, &zc->zc_nvlist_dst_size);
4194 
4195 	if (error == 0) {
4196 		error = xcopyout(buf,
4197 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
4198 		    zc->zc_nvlist_dst_size);
4199 	}
4200 	kmem_free(buf, bufsize);
4201 	zfsvfs_rele(zfsvfs, FTAG);
4202 
4203 	return (error);
4204 }
4205 
4206 /*
4207  * inputs:
4208  * zc_name		name of filesystem
4209  *
4210  * outputs:
4211  * none
4212  */
4213 static int
4214 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
4215 {
4216 	objset_t *os;
4217 	int error = 0;
4218 	zfsvfs_t *zfsvfs;
4219 
4220 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
4221 		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
4222 			/*
4223 			 * If userused is not enabled, it may be because the
4224 			 * objset needs to be closed & reopened (to grow the
4225 			 * objset_phys_t).  Suspend/resume the fs will do that.
4226 			 */
4227 			error = zfs_suspend_fs(zfsvfs);
4228 			if (error == 0)
4229 				error = zfs_resume_fs(zfsvfs, zc->zc_name);
4230 		}
4231 		if (error == 0)
4232 			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
4233 		VFS_RELE(zfsvfs->z_vfs);
4234 	} else {
4235 		/* XXX kind of reading contents without owning */
4236 		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4237 		if (error)
4238 			return (error);
4239 
4240 		error = dmu_objset_userspace_upgrade(os);
4241 		dmu_objset_rele(os, FTAG);
4242 	}
4243 
4244 	return (error);
4245 }
4246 
4247 /*
4248  * We don't want to have a hard dependency
4249  * against some special symbols in sharefs
4250  * nfs, and smbsrv.  Determine them if needed when
4251  * the first file system is shared.
4252  * Neither sharefs, nfs or smbsrv are unloadable modules.
4253  */
4254 int (*znfsexport_fs)(void *arg);
4255 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
4256 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
4257 
4258 int zfs_nfsshare_inited;
4259 int zfs_smbshare_inited;
4260 
4261 ddi_modhandle_t nfs_mod;
4262 ddi_modhandle_t sharefs_mod;
4263 ddi_modhandle_t smbsrv_mod;
4264 kmutex_t zfs_share_lock;
4265 
4266 static int
4267 zfs_init_sharefs()
4268 {
4269 	int error;
4270 
4271 	ASSERT(MUTEX_HELD(&zfs_share_lock));
4272 	/* Both NFS and SMB shares also require sharetab support. */
4273 	if (sharefs_mod == NULL && ((sharefs_mod =
4274 	    ddi_modopen("fs/sharefs",
4275 	    KRTLD_MODE_FIRST, &error)) == NULL)) {
4276 		return (ENOSYS);
4277 	}
4278 	if (zshare_fs == NULL && ((zshare_fs =
4279 	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
4280 	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
4281 		return (ENOSYS);
4282 	}
4283 	return (0);
4284 }
4285 
4286 static int
4287 zfs_ioc_share(zfs_cmd_t *zc)
4288 {
4289 	int error;
4290 	int opcode;
4291 
4292 	switch (zc->zc_share.z_sharetype) {
4293 	case ZFS_SHARE_NFS:
4294 	case ZFS_UNSHARE_NFS:
4295 		if (zfs_nfsshare_inited == 0) {
4296 			mutex_enter(&zfs_share_lock);
4297 			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
4298 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
4299 				mutex_exit(&zfs_share_lock);
4300 				return (ENOSYS);
4301 			}
4302 			if (znfsexport_fs == NULL &&
4303 			    ((znfsexport_fs = (int (*)(void *))
4304 			    ddi_modsym(nfs_mod,
4305 			    "nfs_export", &error)) == NULL)) {
4306 				mutex_exit(&zfs_share_lock);
4307 				return (ENOSYS);
4308 			}
4309 			error = zfs_init_sharefs();
4310 			if (error) {
4311 				mutex_exit(&zfs_share_lock);
4312 				return (ENOSYS);
4313 			}
4314 			zfs_nfsshare_inited = 1;
4315 			mutex_exit(&zfs_share_lock);
4316 		}
4317 		break;
4318 	case ZFS_SHARE_SMB:
4319 	case ZFS_UNSHARE_SMB:
4320 		if (zfs_smbshare_inited == 0) {
4321 			mutex_enter(&zfs_share_lock);
4322 			if (smbsrv_mod == NULL && ((smbsrv_mod =
4323 			    ddi_modopen("drv/smbsrv",
4324 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
4325 				mutex_exit(&zfs_share_lock);
4326 				return (ENOSYS);
4327 			}
4328 			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
4329 			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
4330 			    "smb_server_share", &error)) == NULL)) {
4331 				mutex_exit(&zfs_share_lock);
4332 				return (ENOSYS);
4333 			}
4334 			error = zfs_init_sharefs();
4335 			if (error) {
4336 				mutex_exit(&zfs_share_lock);
4337 				return (ENOSYS);
4338 			}
4339 			zfs_smbshare_inited = 1;
4340 			mutex_exit(&zfs_share_lock);
4341 		}
4342 		break;
4343 	default:
4344 		return (EINVAL);
4345 	}
4346 
4347 	switch (zc->zc_share.z_sharetype) {
4348 	case ZFS_SHARE_NFS:
4349 	case ZFS_UNSHARE_NFS:
4350 		if (error =
4351 		    znfsexport_fs((void *)
4352 		    (uintptr_t)zc->zc_share.z_exportdata))
4353 			return (error);
4354 		break;
4355 	case ZFS_SHARE_SMB:
4356 	case ZFS_UNSHARE_SMB:
4357 		if (error = zsmbexport_fs((void *)
4358 		    (uintptr_t)zc->zc_share.z_exportdata,
4359 		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
4360 		    B_TRUE: B_FALSE)) {
4361 			return (error);
4362 		}
4363 		break;
4364 	}
4365 
4366 	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
4367 	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
4368 	    SHAREFS_ADD : SHAREFS_REMOVE;
4369 
4370 	/*
4371 	 * Add or remove share from sharetab
4372 	 */
4373 	error = zshare_fs(opcode,
4374 	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
4375 	    zc->zc_share.z_sharemax);
4376 
4377 	return (error);
4378 
4379 }
4380 
4381 ace_t full_access[] = {
4382 	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
4383 };
4384 
4385 /*
4386  * inputs:
4387  * zc_name		name of containing filesystem
4388  * zc_obj		object # beyond which we want next in-use object #
4389  *
4390  * outputs:
4391  * zc_obj		next in-use object #
4392  */
4393 static int
4394 zfs_ioc_next_obj(zfs_cmd_t *zc)
4395 {
4396 	objset_t *os = NULL;
4397 	int error;
4398 
4399 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4400 	if (error)
4401 		return (error);
4402 
4403 	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
4404 	    os->os_dsl_dataset->ds_phys->ds_prev_snap_txg);
4405 
4406 	dmu_objset_rele(os, FTAG);
4407 	return (error);
4408 }
4409 
4410 /*
4411  * inputs:
4412  * zc_name		name of filesystem
4413  * zc_value		prefix name for snapshot
4414  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
4415  *
4416  * outputs:
4417  */
4418 static int
4419 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
4420 {
4421 	char *snap_name;
4422 	int error;
4423 
4424 	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
4425 	    (u_longlong_t)ddi_get_lbolt64());
4426 
4427 	if (strlen(snap_name) >= MAXNAMELEN) {
4428 		strfree(snap_name);
4429 		return (E2BIG);
4430 	}
4431 
4432 	error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name,
4433 	    NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd);
4434 	if (error != 0) {
4435 		strfree(snap_name);
4436 		return (error);
4437 	}
4438 
4439 	(void) strcpy(zc->zc_value, snap_name);
4440 	strfree(snap_name);
4441 	return (0);
4442 }
4443 
4444 /*
4445  * inputs:
4446  * zc_name		name of "to" snapshot
4447  * zc_value		name of "from" snapshot
4448  * zc_cookie		file descriptor to write diff data on
4449  *
4450  * outputs:
4451  * dmu_diff_record_t's to the file descriptor
4452  */
4453 static int
4454 zfs_ioc_diff(zfs_cmd_t *zc)
4455 {
4456 	objset_t *fromsnap;
4457 	objset_t *tosnap;
4458 	file_t *fp;
4459 	offset_t off;
4460 	int error;
4461 
4462 	error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
4463 	if (error)
4464 		return (error);
4465 
4466 	error = dmu_objset_hold(zc->zc_value, FTAG, &fromsnap);
4467 	if (error) {
4468 		dmu_objset_rele(tosnap, FTAG);
4469 		return (error);
4470 	}
4471 
4472 	fp = getf(zc->zc_cookie);
4473 	if (fp == NULL) {
4474 		dmu_objset_rele(fromsnap, FTAG);
4475 		dmu_objset_rele(tosnap, FTAG);
4476 		return (EBADF);
4477 	}
4478 
4479 	off = fp->f_offset;
4480 
4481 	error = dmu_diff(tosnap, fromsnap, fp->f_vnode, &off);
4482 
4483 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4484 		fp->f_offset = off;
4485 	releasef(zc->zc_cookie);
4486 
4487 	dmu_objset_rele(fromsnap, FTAG);
4488 	dmu_objset_rele(tosnap, FTAG);
4489 	return (error);
4490 }
4491 
4492 /*
4493  * Remove all ACL files in shares dir
4494  */
4495 static int
4496 zfs_smb_acl_purge(znode_t *dzp)
4497 {
4498 	zap_cursor_t	zc;
4499 	zap_attribute_t	zap;
4500 	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
4501 	int error;
4502 
4503 	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
4504 	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
4505 	    zap_cursor_advance(&zc)) {
4506 		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
4507 		    NULL, 0)) != 0)
4508 			break;
4509 	}
4510 	zap_cursor_fini(&zc);
4511 	return (error);
4512 }
4513 
4514 static int
4515 zfs_ioc_smb_acl(zfs_cmd_t *zc)
4516 {
4517 	vnode_t *vp;
4518 	znode_t *dzp;
4519 	vnode_t *resourcevp = NULL;
4520 	znode_t *sharedir;
4521 	zfsvfs_t *zfsvfs;
4522 	nvlist_t *nvlist;
4523 	char *src, *target;
4524 	vattr_t vattr;
4525 	vsecattr_t vsec;
4526 	int error = 0;
4527 
4528 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
4529 	    NO_FOLLOW, NULL, &vp)) != 0)
4530 		return (error);
4531 
4532 	/* Now make sure mntpnt and dataset are ZFS */
4533 
4534 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
4535 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
4536 	    zc->zc_name) != 0)) {
4537 		VN_RELE(vp);
4538 		return (EINVAL);
4539 	}
4540 
4541 	dzp = VTOZ(vp);
4542 	zfsvfs = dzp->z_zfsvfs;
4543 	ZFS_ENTER(zfsvfs);
4544 
4545 	/*
4546 	 * Create share dir if its missing.
4547 	 */
4548 	mutex_enter(&zfsvfs->z_lock);
4549 	if (zfsvfs->z_shares_dir == 0) {
4550 		dmu_tx_t *tx;
4551 
4552 		tx = dmu_tx_create(zfsvfs->z_os);
4553 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
4554 		    ZFS_SHARES_DIR);
4555 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
4556 		error = dmu_tx_assign(tx, TXG_WAIT);
4557 		if (error) {
4558 			dmu_tx_abort(tx);
4559 		} else {
4560 			error = zfs_create_share_dir(zfsvfs, tx);
4561 			dmu_tx_commit(tx);
4562 		}
4563 		if (error) {
4564 			mutex_exit(&zfsvfs->z_lock);
4565 			VN_RELE(vp);
4566 			ZFS_EXIT(zfsvfs);
4567 			return (error);
4568 		}
4569 	}
4570 	mutex_exit(&zfsvfs->z_lock);
4571 
4572 	ASSERT(zfsvfs->z_shares_dir);
4573 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
4574 		VN_RELE(vp);
4575 		ZFS_EXIT(zfsvfs);
4576 		return (error);
4577 	}
4578 
4579 	switch (zc->zc_cookie) {
4580 	case ZFS_SMB_ACL_ADD:
4581 		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
4582 		vattr.va_type = VREG;
4583 		vattr.va_mode = S_IFREG|0777;
4584 		vattr.va_uid = 0;
4585 		vattr.va_gid = 0;
4586 
4587 		vsec.vsa_mask = VSA_ACE;
4588 		vsec.vsa_aclentp = &full_access;
4589 		vsec.vsa_aclentsz = sizeof (full_access);
4590 		vsec.vsa_aclcnt = 1;
4591 
4592 		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
4593 		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
4594 		if (resourcevp)
4595 			VN_RELE(resourcevp);
4596 		break;
4597 
4598 	case ZFS_SMB_ACL_REMOVE:
4599 		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
4600 		    NULL, 0);
4601 		break;
4602 
4603 	case ZFS_SMB_ACL_RENAME:
4604 		if ((error = get_nvlist(zc->zc_nvlist_src,
4605 		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
4606 			VN_RELE(vp);
4607 			ZFS_EXIT(zfsvfs);
4608 			return (error);
4609 		}
4610 		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
4611 		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
4612 		    &target)) {
4613 			VN_RELE(vp);
4614 			VN_RELE(ZTOV(sharedir));
4615 			ZFS_EXIT(zfsvfs);
4616 			nvlist_free(nvlist);
4617 			return (error);
4618 		}
4619 		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
4620 		    kcred, NULL, 0);
4621 		nvlist_free(nvlist);
4622 		break;
4623 
4624 	case ZFS_SMB_ACL_PURGE:
4625 		error = zfs_smb_acl_purge(sharedir);
4626 		break;
4627 
4628 	default:
4629 		error = EINVAL;
4630 		break;
4631 	}
4632 
4633 	VN_RELE(vp);
4634 	VN_RELE(ZTOV(sharedir));
4635 
4636 	ZFS_EXIT(zfsvfs);
4637 
4638 	return (error);
4639 }
4640 
4641 /*
4642  * inputs:
4643  * zc_name		name of filesystem
4644  * zc_value		short name of snap
4645  * zc_string		user-supplied tag for this hold
4646  * zc_cookie		recursive flag
4647  * zc_temphold		set if hold is temporary
4648  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
4649  * zc_sendobj		if non-zero, the objid for zc_name@zc_value
4650  * zc_createtxg		if zc_sendobj is non-zero, snap must have zc_createtxg
4651  *
4652  * outputs:		none
4653  */
4654 static int
4655 zfs_ioc_hold(zfs_cmd_t *zc)
4656 {
4657 	boolean_t recursive = zc->zc_cookie;
4658 	spa_t *spa;
4659 	dsl_pool_t *dp;
4660 	dsl_dataset_t *ds;
4661 	int error;
4662 	minor_t minor = 0;
4663 
4664 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4665 		return (EINVAL);
4666 
4667 	if (zc->zc_sendobj == 0) {
4668 		return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
4669 		    zc->zc_string, recursive, zc->zc_temphold,
4670 		    zc->zc_cleanup_fd));
4671 	}
4672 
4673 	if (recursive)
4674 		return (EINVAL);
4675 
4676 	error = spa_open(zc->zc_name, &spa, FTAG);
4677 	if (error)
4678 		return (error);
4679 
4680 	dp = spa_get_dsl(spa);
4681 	rw_enter(&dp->dp_config_rwlock, RW_READER);
4682 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
4683 	rw_exit(&dp->dp_config_rwlock);
4684 	spa_close(spa, FTAG);
4685 	if (error)
4686 		return (error);
4687 
4688 	/*
4689 	 * Until we have a hold on this snapshot, it's possible that
4690 	 * zc_sendobj could've been destroyed and reused as part
4691 	 * of a later txg.  Make sure we're looking at the right object.
4692 	 */
4693 	if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) {
4694 		dsl_dataset_rele(ds, FTAG);
4695 		return (ENOENT);
4696 	}
4697 
4698 	if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) {
4699 		error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
4700 		if (error) {
4701 			dsl_dataset_rele(ds, FTAG);
4702 			return (error);
4703 		}
4704 	}
4705 
4706 	error = dsl_dataset_user_hold_for_send(ds, zc->zc_string,
4707 	    zc->zc_temphold);
4708 	if (minor != 0) {
4709 		if (error == 0) {
4710 			dsl_register_onexit_hold_cleanup(ds, zc->zc_string,
4711 			    minor);
4712 		}
4713 		zfs_onexit_fd_rele(zc->zc_cleanup_fd);
4714 	}
4715 	dsl_dataset_rele(ds, FTAG);
4716 
4717 	return (error);
4718 }
4719 
4720 /*
4721  * inputs:
4722  * zc_name	name of dataset from which we're releasing a user hold
4723  * zc_value	short name of snap
4724  * zc_string	user-supplied tag for this hold
4725  * zc_cookie	recursive flag
4726  *
4727  * outputs:	none
4728  */
4729 static int
4730 zfs_ioc_release(zfs_cmd_t *zc)
4731 {
4732 	boolean_t recursive = zc->zc_cookie;
4733 
4734 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4735 		return (EINVAL);
4736 
4737 	return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
4738 	    zc->zc_string, recursive));
4739 }
4740 
4741 /*
4742  * inputs:
4743  * zc_name		name of filesystem
4744  *
4745  * outputs:
4746  * zc_nvlist_src{_size}	nvlist of snapshot holds
4747  */
4748 static int
4749 zfs_ioc_get_holds(zfs_cmd_t *zc)
4750 {
4751 	nvlist_t *nvp;
4752 	int error;
4753 
4754 	if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
4755 		error = put_nvlist(zc, nvp);
4756 		nvlist_free(nvp);
4757 	}
4758 
4759 	return (error);
4760 }
4761 
4762 /*
4763  * inputs:
4764  * zc_name		name of new filesystem or snapshot
4765  * zc_value		full name of old snapshot
4766  *
4767  * outputs:
4768  * zc_cookie		space in bytes
4769  * zc_objset_type	compressed space in bytes
4770  * zc_perm_action	uncompressed space in bytes
4771  */
4772 static int
4773 zfs_ioc_space_written(zfs_cmd_t *zc)
4774 {
4775 	int error;
4776 	dsl_dataset_t *new, *old;
4777 
4778 	error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
4779 	if (error != 0)
4780 		return (error);
4781 	error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
4782 	if (error != 0) {
4783 		dsl_dataset_rele(new, FTAG);
4784 		return (error);
4785 	}
4786 
4787 	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
4788 	    &zc->zc_objset_type, &zc->zc_perm_action);
4789 	dsl_dataset_rele(old, FTAG);
4790 	dsl_dataset_rele(new, FTAG);
4791 	return (error);
4792 }
4793 
4794 /*
4795  * inputs:
4796  * zc_name		full name of last snapshot
4797  * zc_value		full name of first snapshot
4798  *
4799  * outputs:
4800  * zc_cookie		space in bytes
4801  * zc_objset_type	compressed space in bytes
4802  * zc_perm_action	uncompressed space in bytes
4803  */
4804 static int
4805 zfs_ioc_space_snaps(zfs_cmd_t *zc)
4806 {
4807 	int error;
4808 	dsl_dataset_t *new, *old;
4809 
4810 	error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
4811 	if (error != 0)
4812 		return (error);
4813 	error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
4814 	if (error != 0) {
4815 		dsl_dataset_rele(new, FTAG);
4816 		return (error);
4817 	}
4818 
4819 	error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie,
4820 	    &zc->zc_objset_type, &zc->zc_perm_action);
4821 	dsl_dataset_rele(old, FTAG);
4822 	dsl_dataset_rele(new, FTAG);
4823 	return (error);
4824 }
4825 
4826 /*
4827  * pool create, destroy, and export don't log the history as part of
4828  * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
4829  * do the logging of those commands.
4830  */
4831 static zfs_ioc_vec_t zfs_ioc_vec[] = {
4832 	{ zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4833 	    POOL_CHECK_NONE },
4834 	{ zfs_ioc_pool_destroy,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4835 	    POOL_CHECK_NONE },
4836 	{ zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4837 	    POOL_CHECK_NONE },
4838 	{ zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4839 	    POOL_CHECK_NONE },
4840 	{ zfs_ioc_pool_configs,	zfs_secpolicy_none, NO_NAME, B_FALSE,
4841 	    POOL_CHECK_NONE },
4842 	{ zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4843 	    POOL_CHECK_NONE },
4844 	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
4845 	    POOL_CHECK_NONE },
4846 	{ zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4847 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4848 	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
4849 	    POOL_CHECK_READONLY },
4850 	{ zfs_ioc_pool_upgrade,	zfs_secpolicy_config, POOL_NAME, B_TRUE,
4851 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4852 	{ zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4853 	    POOL_CHECK_NONE },
4854 	{ zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4855 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4856 	{ zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4857 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4858 	{ zfs_ioc_vdev_set_state, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
4859 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4860 	{ zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4861 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4862 	{ zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4863 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4864 	{ zfs_ioc_vdev_setpath,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4865 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4866 	{ zfs_ioc_vdev_setfru,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4867 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4868 	{ zfs_ioc_objset_stats,	zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4869 	    POOL_CHECK_SUSPENDED },
4870 	{ zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4871 	    POOL_CHECK_NONE },
4872 	{ zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4873 	    POOL_CHECK_SUSPENDED },
4874 	{ zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4875 	    POOL_CHECK_SUSPENDED },
4876 	{ zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE,
4877 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4878 	{ zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE,
4879 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4880 	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE,
4881 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4882 	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE,
4883 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4884 	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE,
4885 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4886 	{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE,
4887 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4888 	{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE,
4889 	    POOL_CHECK_NONE },
4890 	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE,
4891 	    POOL_CHECK_NONE },
4892 	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4893 	    POOL_CHECK_NONE },
4894 	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4895 	    POOL_CHECK_NONE },
4896 	{ zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE,
4897 	    POOL_CHECK_NONE },
4898 	{ zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4899 	    POOL_CHECK_NONE },
4900 	{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
4901 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4902 	{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
4903 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4904 	{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE,
4905 	    POOL_CHECK_NONE },
4906 	{ zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4907 	    POOL_CHECK_SUSPENDED },
4908 	{ zfs_ioc_pool_set_props, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
4909 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4910 	{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4911 	    POOL_CHECK_NONE },
4912 	{ zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE,
4913 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4914 	{ zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4915 	    POOL_CHECK_NONE },
4916 	{ zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE,
4917 	    POOL_CHECK_NONE },
4918 	{ zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
4919 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4920 	{ zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
4921 	    POOL_CHECK_NONE },
4922 	{ zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME,
4923 	    B_FALSE, POOL_CHECK_NONE },
4924 	{ zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME,
4925 	    B_FALSE, POOL_CHECK_NONE },
4926 	{ zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
4927 	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4928 	{ zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE,
4929 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4930 	{ zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
4931 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4932 	{ zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4933 	    POOL_CHECK_SUSPENDED },
4934 	{ zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4935 	    POOL_CHECK_NONE },
4936 	{ zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4937 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4938 	{ zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4939 	    POOL_CHECK_NONE },
4940 	{ zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4941 	    POOL_CHECK_NONE },
4942 	{ zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME,
4943 	    B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4944 	{ zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4945 	    POOL_CHECK_SUSPENDED },
4946 	{ zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4947 	    POOL_CHECK_SUSPENDED },
4948 	{ zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4949 	    POOL_CHECK_SUSPENDED },
4950 	{ zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive,
4951 	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4952 	{ zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4953 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4954 	{ zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4955 	    POOL_CHECK_SUSPENDED },
4956 	{ zfs_ioc_send_progress, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4957 	    POOL_CHECK_NONE }
4958 };
4959 
4960 int
4961 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
4962     zfs_ioc_poolcheck_t check)
4963 {
4964 	spa_t *spa;
4965 	int error;
4966 
4967 	ASSERT(type == POOL_NAME || type == DATASET_NAME);
4968 
4969 	if (check & POOL_CHECK_NONE)
4970 		return (0);
4971 
4972 	error = spa_open(name, &spa, FTAG);
4973 	if (error == 0) {
4974 		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
4975 			error = EAGAIN;
4976 		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
4977 			error = EROFS;
4978 		spa_close(spa, FTAG);
4979 	}
4980 	return (error);
4981 }
4982 
4983 /*
4984  * Find a free minor number.
4985  */
4986 minor_t
4987 zfsdev_minor_alloc(void)
4988 {
4989 	static minor_t last_minor;
4990 	minor_t m;
4991 
4992 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
4993 
4994 	for (m = last_minor + 1; m != last_minor; m++) {
4995 		if (m > ZFSDEV_MAX_MINOR)
4996 			m = 1;
4997 		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
4998 			last_minor = m;
4999 			return (m);
5000 		}
5001 	}
5002 
5003 	return (0);
5004 }
5005 
5006 static int
5007 zfs_ctldev_init(dev_t *devp)
5008 {
5009 	minor_t minor;
5010 	zfs_soft_state_t *zs;
5011 
5012 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5013 	ASSERT(getminor(*devp) == 0);
5014 
5015 	minor = zfsdev_minor_alloc();
5016 	if (minor == 0)
5017 		return (ENXIO);
5018 
5019 	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
5020 		return (EAGAIN);
5021 
5022 	*devp = makedevice(getemajor(*devp), minor);
5023 
5024 	zs = ddi_get_soft_state(zfsdev_state, minor);
5025 	zs->zss_type = ZSST_CTLDEV;
5026 	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
5027 
5028 	return (0);
5029 }
5030 
5031 static void
5032 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
5033 {
5034 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5035 
5036 	zfs_onexit_destroy(zo);
5037 	ddi_soft_state_free(zfsdev_state, minor);
5038 }
5039 
5040 void *
5041 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
5042 {
5043 	zfs_soft_state_t *zp;
5044 
5045 	zp = ddi_get_soft_state(zfsdev_state, minor);
5046 	if (zp == NULL || zp->zss_type != which)
5047 		return (NULL);
5048 
5049 	return (zp->zss_data);
5050 }
5051 
5052 static int
5053 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
5054 {
5055 	int error = 0;
5056 
5057 	if (getminor(*devp) != 0)
5058 		return (zvol_open(devp, flag, otyp, cr));
5059 
5060 	/* This is the control device. Allocate a new minor if requested. */
5061 	if (flag & FEXCL) {
5062 		mutex_enter(&zfsdev_state_lock);
5063 		error = zfs_ctldev_init(devp);
5064 		mutex_exit(&zfsdev_state_lock);
5065 	}
5066 
5067 	return (error);
5068 }
5069 
5070 static int
5071 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
5072 {
5073 	zfs_onexit_t *zo;
5074 	minor_t minor = getminor(dev);
5075 
5076 	if (minor == 0)
5077 		return (0);
5078 
5079 	mutex_enter(&zfsdev_state_lock);
5080 	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
5081 	if (zo == NULL) {
5082 		mutex_exit(&zfsdev_state_lock);
5083 		return (zvol_close(dev, flag, otyp, cr));
5084 	}
5085 	zfs_ctldev_destroy(zo, minor);
5086 	mutex_exit(&zfsdev_state_lock);
5087 
5088 	return (0);
5089 }
5090 
5091 static int
5092 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
5093 {
5094 	zfs_cmd_t *zc;
5095 	uint_t vec;
5096 	int error, rc;
5097 	minor_t minor = getminor(dev);
5098 
5099 	if (minor != 0 &&
5100 	    zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
5101 		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
5102 
5103 	vec = cmd - ZFS_IOC;
5104 	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
5105 
5106 	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
5107 		return (EINVAL);
5108 
5109 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
5110 
5111 	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
5112 	if (error != 0)
5113 		error = EFAULT;
5114 
5115 	if ((error == 0) && !(flag & FKIOCTL))
5116 		error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr);
5117 
5118 	/*
5119 	 * Ensure that all pool/dataset names are valid before we pass down to
5120 	 * the lower layers.
5121 	 */
5122 	if (error == 0) {
5123 		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5124 		zc->zc_iflags = flag & FKIOCTL;
5125 		switch (zfs_ioc_vec[vec].zvec_namecheck) {
5126 		case POOL_NAME:
5127 			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
5128 				error = EINVAL;
5129 			error = pool_status_check(zc->zc_name,
5130 			    zfs_ioc_vec[vec].zvec_namecheck,
5131 			    zfs_ioc_vec[vec].zvec_pool_check);
5132 			break;
5133 
5134 		case DATASET_NAME:
5135 			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
5136 				error = EINVAL;
5137 			error = pool_status_check(zc->zc_name,
5138 			    zfs_ioc_vec[vec].zvec_namecheck,
5139 			    zfs_ioc_vec[vec].zvec_pool_check);
5140 			break;
5141 
5142 		case NO_NAME:
5143 			break;
5144 		}
5145 	}
5146 
5147 	if (error == 0)
5148 		error = zfs_ioc_vec[vec].zvec_func(zc);
5149 
5150 	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
5151 	if (error == 0) {
5152 		if (rc != 0)
5153 			error = EFAULT;
5154 		if (zfs_ioc_vec[vec].zvec_his_log)
5155 			zfs_log_history(zc);
5156 	}
5157 
5158 	kmem_free(zc, sizeof (zfs_cmd_t));
5159 	return (error);
5160 }
5161 
5162 static int
5163 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5164 {
5165 	if (cmd != DDI_ATTACH)
5166 		return (DDI_FAILURE);
5167 
5168 	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
5169 	    DDI_PSEUDO, 0) == DDI_FAILURE)
5170 		return (DDI_FAILURE);
5171 
5172 	zfs_dip = dip;
5173 
5174 	ddi_report_dev(dip);
5175 
5176 	return (DDI_SUCCESS);
5177 }
5178 
5179 static int
5180 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5181 {
5182 	if (spa_busy() || zfs_busy() || zvol_busy())
5183 		return (DDI_FAILURE);
5184 
5185 	if (cmd != DDI_DETACH)
5186 		return (DDI_FAILURE);
5187 
5188 	zfs_dip = NULL;
5189 
5190 	ddi_prop_remove_all(dip);
5191 	ddi_remove_minor_node(dip, NULL);
5192 
5193 	return (DDI_SUCCESS);
5194 }
5195 
5196 /*ARGSUSED*/
5197 static int
5198 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
5199 {
5200 	switch (infocmd) {
5201 	case DDI_INFO_DEVT2DEVINFO:
5202 		*result = zfs_dip;
5203 		return (DDI_SUCCESS);
5204 
5205 	case DDI_INFO_DEVT2INSTANCE:
5206 		*result = (void *)0;
5207 		return (DDI_SUCCESS);
5208 	}
5209 
5210 	return (DDI_FAILURE);
5211 }
5212 
5213 /*
5214  * OK, so this is a little weird.
5215  *
5216  * /dev/zfs is the control node, i.e. minor 0.
5217  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
5218  *
5219  * /dev/zfs has basically nothing to do except serve up ioctls,
5220  * so most of the standard driver entry points are in zvol.c.
5221  */
5222 static struct cb_ops zfs_cb_ops = {
5223 	zfsdev_open,	/* open */
5224 	zfsdev_close,	/* close */
5225 	zvol_strategy,	/* strategy */
5226 	nodev,		/* print */
5227 	zvol_dump,	/* dump */
5228 	zvol_read,	/* read */
5229 	zvol_write,	/* write */
5230 	zfsdev_ioctl,	/* ioctl */
5231 	nodev,		/* devmap */
5232 	nodev,		/* mmap */
5233 	nodev,		/* segmap */
5234 	nochpoll,	/* poll */
5235 	ddi_prop_op,	/* prop_op */
5236 	NULL,		/* streamtab */
5237 	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
5238 	CB_REV,		/* version */
5239 	nodev,		/* async read */
5240 	nodev,		/* async write */
5241 };
5242 
5243 static struct dev_ops zfs_dev_ops = {
5244 	DEVO_REV,	/* version */
5245 	0,		/* refcnt */
5246 	zfs_info,	/* info */
5247 	nulldev,	/* identify */
5248 	nulldev,	/* probe */
5249 	zfs_attach,	/* attach */
5250 	zfs_detach,	/* detach */
5251 	nodev,		/* reset */
5252 	&zfs_cb_ops,	/* driver operations */
5253 	NULL,		/* no bus operations */
5254 	NULL,		/* power */
5255 	ddi_quiesce_not_needed,	/* quiesce */
5256 };
5257 
5258 static struct modldrv zfs_modldrv = {
5259 	&mod_driverops,
5260 	"ZFS storage pool",
5261 	&zfs_dev_ops
5262 };
5263 
5264 static struct modlinkage modlinkage = {
5265 	MODREV_1,
5266 	(void *)&zfs_modlfs,
5267 	(void *)&zfs_modldrv,
5268 	NULL
5269 };
5270 
5271 
5272 uint_t zfs_fsyncer_key;
5273 extern uint_t rrw_tsd_key;
5274 
5275 int
5276 _init(void)
5277 {
5278 	int error;
5279 
5280 	spa_init(FREAD | FWRITE);
5281 	zfs_init();
5282 	zvol_init();
5283 
5284 	if ((error = mod_install(&modlinkage)) != 0) {
5285 		zvol_fini();
5286 		zfs_fini();
5287 		spa_fini();
5288 		return (error);
5289 	}
5290 
5291 	tsd_create(&zfs_fsyncer_key, NULL);
5292 	tsd_create(&rrw_tsd_key, NULL);
5293 
5294 	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
5295 	ASSERT(error == 0);
5296 	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
5297 
5298 	return (0);
5299 }
5300 
5301 int
5302 _fini(void)
5303 {
5304 	int error;
5305 
5306 	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
5307 		return (EBUSY);
5308 
5309 	if ((error = mod_remove(&modlinkage)) != 0)
5310 		return (error);
5311 
5312 	zvol_fini();
5313 	zfs_fini();
5314 	spa_fini();
5315 	if (zfs_nfsshare_inited)
5316 		(void) ddi_modclose(nfs_mod);
5317 	if (zfs_smbshare_inited)
5318 		(void) ddi_modclose(smbsrv_mod);
5319 	if (zfs_nfsshare_inited || zfs_smbshare_inited)
5320 		(void) ddi_modclose(sharefs_mod);
5321 
5322 	tsd_destroy(&zfs_fsyncer_key);
5323 	ldi_ident_release(zfs_li);
5324 	zfs_li = NULL;
5325 	mutex_destroy(&zfs_share_lock);
5326 
5327 	return (error);
5328 }
5329 
5330 int
5331 _info(struct modinfo *modinfop)
5332 {
5333 	return (mod_info(&modlinkage, modinfop));
5334 }
5335