xref: /titanic_41/usr/src/uts/common/fs/zfs/zfs_ioctl.c (revision 989f28072d20c73ae0955d6a1e3e2fc74831cb39)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Portions Copyright 2011 Martin Matuska
25  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
26  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
27  * Copyright (c) 2012 by Delphix. All rights reserved.
28  */
29 
30 /*
31  * ZFS ioctls.
32  *
33  * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
34  * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
35  *
36  * There are two ways that we handle ioctls: the legacy way where almost
37  * all of the logic is in the ioctl callback, and the new way where most
38  * of the marshalling is handled in the common entry point, zfsdev_ioctl().
39  *
40  * Non-legacy ioctls should be registered by calling
41  * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
42  * from userland by lzc_ioctl().
43  *
44  * The registration arguments are as follows:
45  *
46  * const char *name
47  *   The name of the ioctl.  This is used for history logging.  If the
48  *   ioctl returns successfully (the callback returns 0), and allow_log
49  *   is true, then a history log entry will be recorded with the input &
50  *   output nvlists.  The log entry can be printed with "zpool history -i".
51  *
52  * zfs_ioc_t ioc
53  *   The ioctl request number, which userland will pass to ioctl(2).
54  *   The ioctl numbers can change from release to release, because
55  *   the caller (libzfs) must be matched to the kernel.
56  *
57  * zfs_secpolicy_func_t *secpolicy
58  *   This function will be called before the zfs_ioc_func_t, to
59  *   determine if this operation is permitted.  It should return EPERM
60  *   on failure, and 0 on success.  Checks include determining if the
61  *   dataset is visible in this zone, and if the user has either all
62  *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
63  *   to do this operation on this dataset with "zfs allow".
64  *
65  * zfs_ioc_namecheck_t namecheck
66  *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
67  *   name, a dataset name, or nothing.  If the name is not well-formed,
68  *   the ioctl will fail and the callback will not be called.
69  *   Therefore, the callback can assume that the name is well-formed
70  *   (e.g. is null-terminated, doesn't have more than one '@' character,
71  *   doesn't have invalid characters).
72  *
73  * zfs_ioc_poolcheck_t pool_check
74  *   This specifies requirements on the pool state.  If the pool does
75  *   not meet them (is suspended or is readonly), the ioctl will fail
76  *   and the callback will not be called.  If any checks are specified
77  *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
78  *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
79  *   POOL_CHECK_READONLY).
80  *
81  * boolean_t smush_outnvlist
82  *   If smush_outnvlist is true, then the output is presumed to be a
83  *   list of errors, and it will be "smushed" down to fit into the
84  *   caller's buffer, by removing some entries and replacing them with a
85  *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
86  *   nvlist_smush() for details.  If smush_outnvlist is false, and the
87  *   outnvlist does not fit into the userland-provided buffer, then the
88  *   ioctl will fail with ENOMEM.
89  *
90  * zfs_ioc_func_t *func
91  *   The callback function that will perform the operation.
92  *
93  *   The callback should return 0 on success, or an error number on
94  *   failure.  If the function fails, the userland ioctl will return -1,
95  *   and errno will be set to the callback's return value.  The callback
96  *   will be called with the following arguments:
97  *
98  *   const char *name
99  *     The name of the pool or dataset to operate on, from
100  *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
101  *     expected type (pool, dataset, or none).
102  *
103  *   nvlist_t *innvl
104  *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
105  *     NULL if no input nvlist was provided.  Changes to this nvlist are
106  *     ignored.  If the input nvlist could not be deserialized, the
107  *     ioctl will fail and the callback will not be called.
108  *
109  *   nvlist_t *outnvl
110  *     The output nvlist, initially empty.  The callback can fill it in,
111  *     and it will be returned to userland by serializing it into
112  *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
113  *     fails (e.g. because the caller didn't supply a large enough
114  *     buffer), then the overall ioctl will fail.  See the
115  *     'smush_nvlist' argument above for additional behaviors.
116  *
117  *     There are two typical uses of the output nvlist:
118  *       - To return state, e.g. property values.  In this case,
119  *         smush_outnvlist should be false.  If the buffer was not large
120  *         enough, the caller will reallocate a larger buffer and try
121  *         the ioctl again.
122  *
123  *       - To return multiple errors from an ioctl which makes on-disk
124  *         changes.  In this case, smush_outnvlist should be true.
125  *         Ioctls which make on-disk modifications should generally not
126  *         use the outnvl if they succeed, because the caller can not
127  *         distinguish between the operation failing, and
128  *         deserialization failing.
129  */
130 
131 #include <sys/types.h>
132 #include <sys/param.h>
133 #include <sys/errno.h>
134 #include <sys/uio.h>
135 #include <sys/buf.h>
136 #include <sys/modctl.h>
137 #include <sys/open.h>
138 #include <sys/file.h>
139 #include <sys/kmem.h>
140 #include <sys/conf.h>
141 #include <sys/cmn_err.h>
142 #include <sys/stat.h>
143 #include <sys/zfs_ioctl.h>
144 #include <sys/zfs_vfsops.h>
145 #include <sys/zfs_znode.h>
146 #include <sys/zap.h>
147 #include <sys/spa.h>
148 #include <sys/spa_impl.h>
149 #include <sys/vdev.h>
150 #include <sys/priv_impl.h>
151 #include <sys/dmu.h>
152 #include <sys/dsl_dir.h>
153 #include <sys/dsl_dataset.h>
154 #include <sys/dsl_prop.h>
155 #include <sys/dsl_deleg.h>
156 #include <sys/dmu_objset.h>
157 #include <sys/dmu_impl.h>
158 #include <sys/ddi.h>
159 #include <sys/sunddi.h>
160 #include <sys/sunldi.h>
161 #include <sys/policy.h>
162 #include <sys/zone.h>
163 #include <sys/nvpair.h>
164 #include <sys/pathname.h>
165 #include <sys/mount.h>
166 #include <sys/sdt.h>
167 #include <sys/fs/zfs.h>
168 #include <sys/zfs_ctldir.h>
169 #include <sys/zfs_dir.h>
170 #include <sys/zfs_onexit.h>
171 #include <sys/zvol.h>
172 #include <sys/dsl_scan.h>
173 #include <sharefs/share.h>
174 #include <sys/dmu_objset.h>
175 
176 #include "zfs_namecheck.h"
177 #include "zfs_prop.h"
178 #include "zfs_deleg.h"
179 #include "zfs_comutil.h"
180 
181 extern struct modlfs zfs_modlfs;
182 
183 extern void zfs_init(void);
184 extern void zfs_fini(void);
185 
186 ldi_ident_t zfs_li = NULL;
187 dev_info_t *zfs_dip;
188 
189 uint_t zfs_fsyncer_key;
190 extern uint_t rrw_tsd_key;
191 static uint_t zfs_allow_log_key;
192 
193 typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
194 typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
195 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
196 
197 typedef enum {
198 	NO_NAME,
199 	POOL_NAME,
200 	DATASET_NAME
201 } zfs_ioc_namecheck_t;
202 
203 typedef enum {
204 	POOL_CHECK_NONE		= 1 << 0,
205 	POOL_CHECK_SUSPENDED	= 1 << 1,
206 	POOL_CHECK_READONLY	= 1 << 2,
207 } zfs_ioc_poolcheck_t;
208 
209 typedef struct zfs_ioc_vec {
210 	zfs_ioc_legacy_func_t	*zvec_legacy_func;
211 	zfs_ioc_func_t		*zvec_func;
212 	zfs_secpolicy_func_t	*zvec_secpolicy;
213 	zfs_ioc_namecheck_t	zvec_namecheck;
214 	boolean_t		zvec_allow_log;
215 	zfs_ioc_poolcheck_t	zvec_pool_check;
216 	boolean_t		zvec_smush_outnvlist;
217 	const char		*zvec_name;
218 } zfs_ioc_vec_t;
219 
220 /* This array is indexed by zfs_userquota_prop_t */
221 static const char *userquota_perms[] = {
222 	ZFS_DELEG_PERM_USERUSED,
223 	ZFS_DELEG_PERM_USERQUOTA,
224 	ZFS_DELEG_PERM_GROUPUSED,
225 	ZFS_DELEG_PERM_GROUPQUOTA,
226 };
227 
228 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
229 static int zfs_check_settable(const char *name, nvpair_t *property,
230     cred_t *cr);
231 static int zfs_check_clearable(char *dataset, nvlist_t *props,
232     nvlist_t **errors);
233 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
234     boolean_t *);
235 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
236 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
237 
238 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
239 void
240 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
241 {
242 	const char *newfile;
243 	char buf[512];
244 	va_list adx;
245 
246 	/*
247 	 * Get rid of annoying "../common/" prefix to filename.
248 	 */
249 	newfile = strrchr(file, '/');
250 	if (newfile != NULL) {
251 		newfile = newfile + 1; /* Get rid of leading / */
252 	} else {
253 		newfile = file;
254 	}
255 
256 	va_start(adx, fmt);
257 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
258 	va_end(adx);
259 
260 	/*
261 	 * To get this data, use the zfs-dprintf probe as so:
262 	 * dtrace -q -n 'zfs-dprintf \
263 	 *	/stringof(arg0) == "dbuf.c"/ \
264 	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
265 	 * arg0 = file name
266 	 * arg1 = function name
267 	 * arg2 = line number
268 	 * arg3 = message
269 	 */
270 	DTRACE_PROBE4(zfs__dprintf,
271 	    char *, newfile, char *, func, int, line, char *, buf);
272 }
273 
274 static void
275 history_str_free(char *buf)
276 {
277 	kmem_free(buf, HIS_MAX_RECORD_LEN);
278 }
279 
280 static char *
281 history_str_get(zfs_cmd_t *zc)
282 {
283 	char *buf;
284 
285 	if (zc->zc_history == NULL)
286 		return (NULL);
287 
288 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
289 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
290 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
291 		history_str_free(buf);
292 		return (NULL);
293 	}
294 
295 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
296 
297 	return (buf);
298 }
299 
300 /*
301  * Check to see if the named dataset is currently defined as bootable
302  */
303 static boolean_t
304 zfs_is_bootfs(const char *name)
305 {
306 	objset_t *os;
307 
308 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
309 		boolean_t ret;
310 		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
311 		dmu_objset_rele(os, FTAG);
312 		return (ret);
313 	}
314 	return (B_FALSE);
315 }
316 
317 /*
318  * zfs_earlier_version
319  *
320  *	Return non-zero if the spa version is less than requested version.
321  */
322 static int
323 zfs_earlier_version(const char *name, int version)
324 {
325 	spa_t *spa;
326 
327 	if (spa_open(name, &spa, FTAG) == 0) {
328 		if (spa_version(spa) < version) {
329 			spa_close(spa, FTAG);
330 			return (1);
331 		}
332 		spa_close(spa, FTAG);
333 	}
334 	return (0);
335 }
336 
337 /*
338  * zpl_earlier_version
339  *
340  * Return TRUE if the ZPL version is less than requested version.
341  */
342 static boolean_t
343 zpl_earlier_version(const char *name, int version)
344 {
345 	objset_t *os;
346 	boolean_t rc = B_TRUE;
347 
348 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
349 		uint64_t zplversion;
350 
351 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
352 			dmu_objset_rele(os, FTAG);
353 			return (B_TRUE);
354 		}
355 		/* XXX reading from non-owned objset */
356 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
357 			rc = zplversion < version;
358 		dmu_objset_rele(os, FTAG);
359 	}
360 	return (rc);
361 }
362 
363 static void
364 zfs_log_history(zfs_cmd_t *zc)
365 {
366 	spa_t *spa;
367 	char *buf;
368 
369 	if ((buf = history_str_get(zc)) == NULL)
370 		return;
371 
372 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
373 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
374 			(void) spa_history_log(spa, buf);
375 		spa_close(spa, FTAG);
376 	}
377 	history_str_free(buf);
378 }
379 
380 /*
381  * Policy for top-level read operations (list pools).  Requires no privileges,
382  * and can be used in the local zone, as there is no associated dataset.
383  */
384 /* ARGSUSED */
385 static int
386 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
387 {
388 	return (0);
389 }
390 
391 /*
392  * Policy for dataset read operations (list children, get statistics).  Requires
393  * no privileges, but must be visible in the local zone.
394  */
395 /* ARGSUSED */
396 static int
397 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
398 {
399 	if (INGLOBALZONE(curproc) ||
400 	    zone_dataset_visible(zc->zc_name, NULL))
401 		return (0);
402 
403 	return (ENOENT);
404 }
405 
406 static int
407 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
408 {
409 	int writable = 1;
410 
411 	/*
412 	 * The dataset must be visible by this zone -- check this first
413 	 * so they don't see EPERM on something they shouldn't know about.
414 	 */
415 	if (!INGLOBALZONE(curproc) &&
416 	    !zone_dataset_visible(dataset, &writable))
417 		return (ENOENT);
418 
419 	if (INGLOBALZONE(curproc)) {
420 		/*
421 		 * If the fs is zoned, only root can access it from the
422 		 * global zone.
423 		 */
424 		if (secpolicy_zfs(cr) && zoned)
425 			return (EPERM);
426 	} else {
427 		/*
428 		 * If we are in a local zone, the 'zoned' property must be set.
429 		 */
430 		if (!zoned)
431 			return (EPERM);
432 
433 		/* must be writable by this zone */
434 		if (!writable)
435 			return (EPERM);
436 	}
437 	return (0);
438 }
439 
440 static int
441 zfs_dozonecheck(const char *dataset, cred_t *cr)
442 {
443 	uint64_t zoned;
444 
445 	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
446 		return (ENOENT);
447 
448 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
449 }
450 
451 static int
452 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
453 {
454 	uint64_t zoned;
455 
456 	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
457 	if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL)) {
458 		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
459 		return (ENOENT);
460 	}
461 	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
462 
463 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
464 }
465 
466 static int
467 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
468 {
469 	int error;
470 	dsl_dataset_t *ds;
471 
472 	error = dsl_dataset_hold(name, FTAG, &ds);
473 	if (error != 0)
474 		return (error);
475 
476 	error = zfs_dozonecheck_ds(name, ds, cr);
477 	if (error == 0) {
478 		error = secpolicy_zfs(cr);
479 		if (error)
480 			error = dsl_deleg_access_impl(ds, perm, cr);
481 	}
482 
483 	dsl_dataset_rele(ds, FTAG);
484 	return (error);
485 }
486 
487 static int
488 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
489     const char *perm, cred_t *cr)
490 {
491 	int error;
492 
493 	error = zfs_dozonecheck_ds(name, ds, cr);
494 	if (error == 0) {
495 		error = secpolicy_zfs(cr);
496 		if (error)
497 			error = dsl_deleg_access_impl(ds, perm, cr);
498 	}
499 	return (error);
500 }
501 
502 /*
503  * Policy for setting the security label property.
504  *
505  * Returns 0 for success, non-zero for access and other errors.
506  */
507 static int
508 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
509 {
510 	char		ds_hexsl[MAXNAMELEN];
511 	bslabel_t	ds_sl, new_sl;
512 	boolean_t	new_default = FALSE;
513 	uint64_t	zoned;
514 	int		needed_priv = -1;
515 	int		error;
516 
517 	/* First get the existing dataset label. */
518 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
519 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
520 	if (error)
521 		return (EPERM);
522 
523 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
524 		new_default = TRUE;
525 
526 	/* The label must be translatable */
527 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
528 		return (EINVAL);
529 
530 	/*
531 	 * In a non-global zone, disallow attempts to set a label that
532 	 * doesn't match that of the zone; otherwise no other checks
533 	 * are needed.
534 	 */
535 	if (!INGLOBALZONE(curproc)) {
536 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
537 			return (EPERM);
538 		return (0);
539 	}
540 
541 	/*
542 	 * For global-zone datasets (i.e., those whose zoned property is
543 	 * "off", verify that the specified new label is valid for the
544 	 * global zone.
545 	 */
546 	if (dsl_prop_get_integer(name,
547 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
548 		return (EPERM);
549 	if (!zoned) {
550 		if (zfs_check_global_label(name, strval) != 0)
551 			return (EPERM);
552 	}
553 
554 	/*
555 	 * If the existing dataset label is nondefault, check if the
556 	 * dataset is mounted (label cannot be changed while mounted).
557 	 * Get the zfsvfs; if there isn't one, then the dataset isn't
558 	 * mounted (or isn't a dataset, doesn't exist, ...).
559 	 */
560 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
561 		objset_t *os;
562 		static char *setsl_tag = "setsl_tag";
563 
564 		/*
565 		 * Try to own the dataset; abort if there is any error,
566 		 * (e.g., already mounted, in use, or other error).
567 		 */
568 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
569 		    setsl_tag, &os);
570 		if (error)
571 			return (EPERM);
572 
573 		dmu_objset_disown(os, setsl_tag);
574 
575 		if (new_default) {
576 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
577 			goto out_check;
578 		}
579 
580 		if (hexstr_to_label(strval, &new_sl) != 0)
581 			return (EPERM);
582 
583 		if (blstrictdom(&ds_sl, &new_sl))
584 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
585 		else if (blstrictdom(&new_sl, &ds_sl))
586 			needed_priv = PRIV_FILE_UPGRADE_SL;
587 	} else {
588 		/* dataset currently has a default label */
589 		if (!new_default)
590 			needed_priv = PRIV_FILE_UPGRADE_SL;
591 	}
592 
593 out_check:
594 	if (needed_priv != -1)
595 		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
596 	return (0);
597 }
598 
599 static int
600 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
601     cred_t *cr)
602 {
603 	char *strval;
604 
605 	/*
606 	 * Check permissions for special properties.
607 	 */
608 	switch (prop) {
609 	case ZFS_PROP_ZONED:
610 		/*
611 		 * Disallow setting of 'zoned' from within a local zone.
612 		 */
613 		if (!INGLOBALZONE(curproc))
614 			return (EPERM);
615 		break;
616 
617 	case ZFS_PROP_QUOTA:
618 		if (!INGLOBALZONE(curproc)) {
619 			uint64_t zoned;
620 			char setpoint[MAXNAMELEN];
621 			/*
622 			 * Unprivileged users are allowed to modify the
623 			 * quota on things *under* (ie. contained by)
624 			 * the thing they own.
625 			 */
626 			if (dsl_prop_get_integer(dsname, "zoned", &zoned,
627 			    setpoint))
628 				return (EPERM);
629 			if (!zoned || strlen(dsname) <= strlen(setpoint))
630 				return (EPERM);
631 		}
632 		break;
633 
634 	case ZFS_PROP_MLSLABEL:
635 		if (!is_system_labeled())
636 			return (EPERM);
637 
638 		if (nvpair_value_string(propval, &strval) == 0) {
639 			int err;
640 
641 			err = zfs_set_slabel_policy(dsname, strval, CRED());
642 			if (err != 0)
643 				return (err);
644 		}
645 		break;
646 	}
647 
648 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
649 }
650 
651 /* ARGSUSED */
652 static int
653 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
654 {
655 	int error;
656 
657 	error = zfs_dozonecheck(zc->zc_name, cr);
658 	if (error)
659 		return (error);
660 
661 	/*
662 	 * permission to set permissions will be evaluated later in
663 	 * dsl_deleg_can_allow()
664 	 */
665 	return (0);
666 }
667 
668 /* ARGSUSED */
669 static int
670 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
671 {
672 	return (zfs_secpolicy_write_perms(zc->zc_name,
673 	    ZFS_DELEG_PERM_ROLLBACK, cr));
674 }
675 
676 /* ARGSUSED */
677 static int
678 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
679 {
680 	spa_t *spa;
681 	dsl_pool_t *dp;
682 	dsl_dataset_t *ds;
683 	char *cp;
684 	int error;
685 
686 	/*
687 	 * Generate the current snapshot name from the given objsetid, then
688 	 * use that name for the secpolicy/zone checks.
689 	 */
690 	cp = strchr(zc->zc_name, '@');
691 	if (cp == NULL)
692 		return (EINVAL);
693 	error = spa_open(zc->zc_name, &spa, FTAG);
694 	if (error)
695 		return (error);
696 
697 	dp = spa_get_dsl(spa);
698 	rw_enter(&dp->dp_config_rwlock, RW_READER);
699 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
700 	rw_exit(&dp->dp_config_rwlock);
701 	spa_close(spa, FTAG);
702 	if (error)
703 		return (error);
704 
705 	dsl_dataset_name(ds, zc->zc_name);
706 
707 	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
708 	    ZFS_DELEG_PERM_SEND, cr);
709 	dsl_dataset_rele(ds, FTAG);
710 
711 	return (error);
712 }
713 
714 /* ARGSUSED */
715 static int
716 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
717 {
718 	return (zfs_secpolicy_write_perms(zc->zc_name,
719 	    ZFS_DELEG_PERM_SEND, cr));
720 }
721 
722 /* ARGSUSED */
723 static int
724 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
725 {
726 	vnode_t *vp;
727 	int error;
728 
729 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
730 	    NO_FOLLOW, NULL, &vp)) != 0)
731 		return (error);
732 
733 	/* Now make sure mntpnt and dataset are ZFS */
734 
735 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
736 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
737 	    zc->zc_name) != 0)) {
738 		VN_RELE(vp);
739 		return (EPERM);
740 	}
741 
742 	VN_RELE(vp);
743 	return (dsl_deleg_access(zc->zc_name,
744 	    ZFS_DELEG_PERM_SHARE, cr));
745 }
746 
747 int
748 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
749 {
750 	if (!INGLOBALZONE(curproc))
751 		return (EPERM);
752 
753 	if (secpolicy_nfs(cr) == 0) {
754 		return (0);
755 	} else {
756 		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
757 	}
758 }
759 
760 int
761 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
762 {
763 	if (!INGLOBALZONE(curproc))
764 		return (EPERM);
765 
766 	if (secpolicy_smb(cr) == 0) {
767 		return (0);
768 	} else {
769 		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
770 	}
771 }
772 
773 static int
774 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
775 {
776 	char *cp;
777 
778 	/*
779 	 * Remove the @bla or /bla from the end of the name to get the parent.
780 	 */
781 	(void) strncpy(parent, datasetname, parentsize);
782 	cp = strrchr(parent, '@');
783 	if (cp != NULL) {
784 		cp[0] = '\0';
785 	} else {
786 		cp = strrchr(parent, '/');
787 		if (cp == NULL)
788 			return (ENOENT);
789 		cp[0] = '\0';
790 	}
791 
792 	return (0);
793 }
794 
795 int
796 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
797 {
798 	int error;
799 
800 	if ((error = zfs_secpolicy_write_perms(name,
801 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
802 		return (error);
803 
804 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
805 }
806 
807 /* ARGSUSED */
808 static int
809 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
810 {
811 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
812 }
813 
814 /*
815  * Destroying snapshots with delegated permissions requires
816  * descendant mount and destroy permissions.
817  */
818 /* ARGSUSED */
819 static int
820 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
821 {
822 	nvlist_t *snaps;
823 	nvpair_t *pair, *nextpair;
824 	int error = 0;
825 
826 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
827 		return (EINVAL);
828 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
829 	    pair = nextpair) {
830 		dsl_dataset_t *ds;
831 
832 		nextpair = nvlist_next_nvpair(snaps, pair);
833 		error = dsl_dataset_hold(nvpair_name(pair), FTAG, &ds);
834 		if (error == 0) {
835 			dsl_dataset_rele(ds, FTAG);
836 		} else if (error == ENOENT) {
837 			/*
838 			 * Ignore any snapshots that don't exist (we consider
839 			 * them "already destroyed").  Remove the name from the
840 			 * nvl here in case the snapshot is created between
841 			 * now and when we try to destroy it (in which case
842 			 * we don't want to destroy it since we haven't
843 			 * checked for permission).
844 			 */
845 			fnvlist_remove_nvpair(snaps, pair);
846 			error = 0;
847 			continue;
848 		} else {
849 			break;
850 		}
851 		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
852 		if (error != 0)
853 			break;
854 	}
855 
856 	return (error);
857 }
858 
859 int
860 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
861 {
862 	char	parentname[MAXNAMELEN];
863 	int	error;
864 
865 	if ((error = zfs_secpolicy_write_perms(from,
866 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
867 		return (error);
868 
869 	if ((error = zfs_secpolicy_write_perms(from,
870 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
871 		return (error);
872 
873 	if ((error = zfs_get_parent(to, parentname,
874 	    sizeof (parentname))) != 0)
875 		return (error);
876 
877 	if ((error = zfs_secpolicy_write_perms(parentname,
878 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
879 		return (error);
880 
881 	if ((error = zfs_secpolicy_write_perms(parentname,
882 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
883 		return (error);
884 
885 	return (error);
886 }
887 
888 /* ARGSUSED */
889 static int
890 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
891 {
892 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
893 }
894 
895 /* ARGSUSED */
896 static int
897 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
898 {
899 	char	parentname[MAXNAMELEN];
900 	objset_t *clone;
901 	int error;
902 
903 	error = zfs_secpolicy_write_perms(zc->zc_name,
904 	    ZFS_DELEG_PERM_PROMOTE, cr);
905 	if (error)
906 		return (error);
907 
908 	error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
909 
910 	if (error == 0) {
911 		dsl_dataset_t *pclone = NULL;
912 		dsl_dir_t *dd;
913 		dd = clone->os_dsl_dataset->ds_dir;
914 
915 		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
916 		error = dsl_dataset_hold_obj(dd->dd_pool,
917 		    dd->dd_phys->dd_origin_obj, FTAG, &pclone);
918 		rw_exit(&dd->dd_pool->dp_config_rwlock);
919 		if (error) {
920 			dmu_objset_rele(clone, FTAG);
921 			return (error);
922 		}
923 
924 		error = zfs_secpolicy_write_perms(zc->zc_name,
925 		    ZFS_DELEG_PERM_MOUNT, cr);
926 
927 		dsl_dataset_name(pclone, parentname);
928 		dmu_objset_rele(clone, FTAG);
929 		dsl_dataset_rele(pclone, FTAG);
930 		if (error == 0)
931 			error = zfs_secpolicy_write_perms(parentname,
932 			    ZFS_DELEG_PERM_PROMOTE, cr);
933 	}
934 	return (error);
935 }
936 
937 /* ARGSUSED */
938 static int
939 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
940 {
941 	int error;
942 
943 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
944 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
945 		return (error);
946 
947 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
948 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
949 		return (error);
950 
951 	return (zfs_secpolicy_write_perms(zc->zc_name,
952 	    ZFS_DELEG_PERM_CREATE, cr));
953 }
954 
955 int
956 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
957 {
958 	return (zfs_secpolicy_write_perms(name,
959 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
960 }
961 
962 /*
963  * Check for permission to create each snapshot in the nvlist.
964  */
965 /* ARGSUSED */
966 static int
967 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
968 {
969 	nvlist_t *snaps;
970 	int error;
971 	nvpair_t *pair;
972 
973 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
974 		return (EINVAL);
975 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
976 	    pair = nvlist_next_nvpair(snaps, pair)) {
977 		char *name = nvpair_name(pair);
978 		char *atp = strchr(name, '@');
979 
980 		if (atp == NULL) {
981 			error = EINVAL;
982 			break;
983 		}
984 		*atp = '\0';
985 		error = zfs_secpolicy_snapshot_perms(name, cr);
986 		*atp = '@';
987 		if (error != 0)
988 			break;
989 	}
990 	return (error);
991 }
992 
993 /* ARGSUSED */
994 static int
995 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
996 {
997 	/*
998 	 * Even root must have a proper TSD so that we know what pool
999 	 * to log to.
1000 	 */
1001 	if (tsd_get(zfs_allow_log_key) == NULL)
1002 		return (EPERM);
1003 	return (0);
1004 }
1005 
1006 static int
1007 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1008 {
1009 	char	parentname[MAXNAMELEN];
1010 	int	error;
1011 	char	*origin;
1012 
1013 	if ((error = zfs_get_parent(zc->zc_name, parentname,
1014 	    sizeof (parentname))) != 0)
1015 		return (error);
1016 
1017 	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1018 	    (error = zfs_secpolicy_write_perms(origin,
1019 	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1020 		return (error);
1021 
1022 	if ((error = zfs_secpolicy_write_perms(parentname,
1023 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1024 		return (error);
1025 
1026 	return (zfs_secpolicy_write_perms(parentname,
1027 	    ZFS_DELEG_PERM_MOUNT, cr));
1028 }
1029 
1030 /*
1031  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1032  * SYS_CONFIG privilege, which is not available in a local zone.
1033  */
1034 /* ARGSUSED */
1035 static int
1036 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1037 {
1038 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
1039 		return (EPERM);
1040 
1041 	return (0);
1042 }
1043 
1044 /*
1045  * Policy for object to name lookups.
1046  */
1047 /* ARGSUSED */
1048 static int
1049 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1050 {
1051 	int error;
1052 
1053 	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1054 		return (0);
1055 
1056 	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1057 	return (error);
1058 }
1059 
1060 /*
1061  * Policy for fault injection.  Requires all privileges.
1062  */
1063 /* ARGSUSED */
1064 static int
1065 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1066 {
1067 	return (secpolicy_zinject(cr));
1068 }
1069 
1070 /* ARGSUSED */
1071 static int
1072 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1073 {
1074 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1075 
1076 	if (prop == ZPROP_INVAL) {
1077 		if (!zfs_prop_user(zc->zc_value))
1078 			return (EINVAL);
1079 		return (zfs_secpolicy_write_perms(zc->zc_name,
1080 		    ZFS_DELEG_PERM_USERPROP, cr));
1081 	} else {
1082 		return (zfs_secpolicy_setprop(zc->zc_name, prop,
1083 		    NULL, cr));
1084 	}
1085 }
1086 
1087 static int
1088 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1089 {
1090 	int err = zfs_secpolicy_read(zc, innvl, cr);
1091 	if (err)
1092 		return (err);
1093 
1094 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1095 		return (EINVAL);
1096 
1097 	if (zc->zc_value[0] == 0) {
1098 		/*
1099 		 * They are asking about a posix uid/gid.  If it's
1100 		 * themself, allow it.
1101 		 */
1102 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1103 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
1104 			if (zc->zc_guid == crgetuid(cr))
1105 				return (0);
1106 		} else {
1107 			if (groupmember(zc->zc_guid, cr))
1108 				return (0);
1109 		}
1110 	}
1111 
1112 	return (zfs_secpolicy_write_perms(zc->zc_name,
1113 	    userquota_perms[zc->zc_objset_type], cr));
1114 }
1115 
1116 static int
1117 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1118 {
1119 	int err = zfs_secpolicy_read(zc, innvl, cr);
1120 	if (err)
1121 		return (err);
1122 
1123 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1124 		return (EINVAL);
1125 
1126 	return (zfs_secpolicy_write_perms(zc->zc_name,
1127 	    userquota_perms[zc->zc_objset_type], cr));
1128 }
1129 
1130 /* ARGSUSED */
1131 static int
1132 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1133 {
1134 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1135 	    NULL, cr));
1136 }
1137 
1138 /* ARGSUSED */
1139 static int
1140 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1141 {
1142 	return (zfs_secpolicy_write_perms(zc->zc_name,
1143 	    ZFS_DELEG_PERM_HOLD, cr));
1144 }
1145 
1146 /* ARGSUSED */
1147 static int
1148 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1149 {
1150 	return (zfs_secpolicy_write_perms(zc->zc_name,
1151 	    ZFS_DELEG_PERM_RELEASE, cr));
1152 }
1153 
1154 /*
1155  * Policy for allowing temporary snapshots to be taken or released
1156  */
1157 static int
1158 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1159 {
1160 	/*
1161 	 * A temporary snapshot is the same as a snapshot,
1162 	 * hold, destroy and release all rolled into one.
1163 	 * Delegated diff alone is sufficient that we allow this.
1164 	 */
1165 	int error;
1166 
1167 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1168 	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
1169 		return (0);
1170 
1171 	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1172 	if (!error)
1173 		error = zfs_secpolicy_hold(zc, innvl, cr);
1174 	if (!error)
1175 		error = zfs_secpolicy_release(zc, innvl, cr);
1176 	if (!error)
1177 		error = zfs_secpolicy_destroy(zc, innvl, cr);
1178 	return (error);
1179 }
1180 
1181 /*
1182  * Returns the nvlist as specified by the user in the zfs_cmd_t.
1183  */
1184 static int
1185 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1186 {
1187 	char *packed;
1188 	int error;
1189 	nvlist_t *list = NULL;
1190 
1191 	/*
1192 	 * Read in and unpack the user-supplied nvlist.
1193 	 */
1194 	if (size == 0)
1195 		return (EINVAL);
1196 
1197 	packed = kmem_alloc(size, KM_SLEEP);
1198 
1199 	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1200 	    iflag)) != 0) {
1201 		kmem_free(packed, size);
1202 		return (error);
1203 	}
1204 
1205 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1206 		kmem_free(packed, size);
1207 		return (error);
1208 	}
1209 
1210 	kmem_free(packed, size);
1211 
1212 	*nvp = list;
1213 	return (0);
1214 }
1215 
1216 /*
1217  * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1218  * Entries will be removed from the end of the nvlist, and one int32 entry
1219  * named "N_MORE_ERRORS" will be added indicating how many entries were
1220  * removed.
1221  */
1222 static int
1223 nvlist_smush(nvlist_t *errors, size_t max)
1224 {
1225 	size_t size;
1226 
1227 	size = fnvlist_size(errors);
1228 
1229 	if (size > max) {
1230 		nvpair_t *more_errors;
1231 		int n = 0;
1232 
1233 		if (max < 1024)
1234 			return (ENOMEM);
1235 
1236 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1237 		more_errors = nvlist_prev_nvpair(errors, NULL);
1238 
1239 		do {
1240 			nvpair_t *pair = nvlist_prev_nvpair(errors,
1241 			    more_errors);
1242 			fnvlist_remove_nvpair(errors, pair);
1243 			n++;
1244 			size = fnvlist_size(errors);
1245 		} while (size > max);
1246 
1247 		fnvlist_remove_nvpair(errors, more_errors);
1248 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1249 		ASSERT3U(fnvlist_size(errors), <=, max);
1250 	}
1251 
1252 	return (0);
1253 }
1254 
1255 static int
1256 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1257 {
1258 	char *packed = NULL;
1259 	int error = 0;
1260 	size_t size;
1261 
1262 	size = fnvlist_size(nvl);
1263 
1264 	if (size > zc->zc_nvlist_dst_size) {
1265 		error = ENOMEM;
1266 	} else {
1267 		packed = fnvlist_pack(nvl, &size);
1268 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1269 		    size, zc->zc_iflags) != 0)
1270 			error = EFAULT;
1271 		fnvlist_pack_free(packed, size);
1272 	}
1273 
1274 	zc->zc_nvlist_dst_size = size;
1275 	zc->zc_nvlist_dst_filled = B_TRUE;
1276 	return (error);
1277 }
1278 
1279 static int
1280 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1281 {
1282 	objset_t *os;
1283 	int error;
1284 
1285 	error = dmu_objset_hold(dsname, FTAG, &os);
1286 	if (error)
1287 		return (error);
1288 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1289 		dmu_objset_rele(os, FTAG);
1290 		return (EINVAL);
1291 	}
1292 
1293 	mutex_enter(&os->os_user_ptr_lock);
1294 	*zfvp = dmu_objset_get_user(os);
1295 	if (*zfvp) {
1296 		VFS_HOLD((*zfvp)->z_vfs);
1297 	} else {
1298 		error = ESRCH;
1299 	}
1300 	mutex_exit(&os->os_user_ptr_lock);
1301 	dmu_objset_rele(os, FTAG);
1302 	return (error);
1303 }
1304 
1305 /*
1306  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1307  * case its z_vfs will be NULL, and it will be opened as the owner.
1308  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1309  * which prevents all vnode ops from running.
1310  */
1311 static int
1312 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1313 {
1314 	int error = 0;
1315 
1316 	if (getzfsvfs(name, zfvp) != 0)
1317 		error = zfsvfs_create(name, zfvp);
1318 	if (error == 0) {
1319 		rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1320 		    RW_READER, tag);
1321 		if ((*zfvp)->z_unmounted) {
1322 			/*
1323 			 * XXX we could probably try again, since the unmounting
1324 			 * thread should be just about to disassociate the
1325 			 * objset from the zfsvfs.
1326 			 */
1327 			rrw_exit(&(*zfvp)->z_teardown_lock, tag);
1328 			return (EBUSY);
1329 		}
1330 	}
1331 	return (error);
1332 }
1333 
1334 static void
1335 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1336 {
1337 	rrw_exit(&zfsvfs->z_teardown_lock, tag);
1338 
1339 	if (zfsvfs->z_vfs) {
1340 		VFS_RELE(zfsvfs->z_vfs);
1341 	} else {
1342 		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1343 		zfsvfs_free(zfsvfs);
1344 	}
1345 }
1346 
1347 static int
1348 zfs_ioc_pool_create(zfs_cmd_t *zc)
1349 {
1350 	int error;
1351 	nvlist_t *config, *props = NULL;
1352 	nvlist_t *rootprops = NULL;
1353 	nvlist_t *zplprops = NULL;
1354 
1355 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1356 	    zc->zc_iflags, &config))
1357 		return (error);
1358 
1359 	if (zc->zc_nvlist_src_size != 0 && (error =
1360 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1361 	    zc->zc_iflags, &props))) {
1362 		nvlist_free(config);
1363 		return (error);
1364 	}
1365 
1366 	if (props) {
1367 		nvlist_t *nvl = NULL;
1368 		uint64_t version = SPA_VERSION;
1369 
1370 		(void) nvlist_lookup_uint64(props,
1371 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1372 		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1373 			error = EINVAL;
1374 			goto pool_props_bad;
1375 		}
1376 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1377 		if (nvl) {
1378 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1379 			if (error != 0) {
1380 				nvlist_free(config);
1381 				nvlist_free(props);
1382 				return (error);
1383 			}
1384 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1385 		}
1386 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1387 		error = zfs_fill_zplprops_root(version, rootprops,
1388 		    zplprops, NULL);
1389 		if (error)
1390 			goto pool_props_bad;
1391 	}
1392 
1393 	error = spa_create(zc->zc_name, config, props, zplprops);
1394 
1395 	/*
1396 	 * Set the remaining root properties
1397 	 */
1398 	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1399 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1400 		(void) spa_destroy(zc->zc_name);
1401 
1402 pool_props_bad:
1403 	nvlist_free(rootprops);
1404 	nvlist_free(zplprops);
1405 	nvlist_free(config);
1406 	nvlist_free(props);
1407 
1408 	return (error);
1409 }
1410 
1411 static int
1412 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1413 {
1414 	int error;
1415 	zfs_log_history(zc);
1416 	error = spa_destroy(zc->zc_name);
1417 	if (error == 0)
1418 		zvol_remove_minors(zc->zc_name);
1419 	return (error);
1420 }
1421 
1422 static int
1423 zfs_ioc_pool_import(zfs_cmd_t *zc)
1424 {
1425 	nvlist_t *config, *props = NULL;
1426 	uint64_t guid;
1427 	int error;
1428 
1429 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1430 	    zc->zc_iflags, &config)) != 0)
1431 		return (error);
1432 
1433 	if (zc->zc_nvlist_src_size != 0 && (error =
1434 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1435 	    zc->zc_iflags, &props))) {
1436 		nvlist_free(config);
1437 		return (error);
1438 	}
1439 
1440 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1441 	    guid != zc->zc_guid)
1442 		error = EINVAL;
1443 	else
1444 		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1445 
1446 	if (zc->zc_nvlist_dst != 0) {
1447 		int err;
1448 
1449 		if ((err = put_nvlist(zc, config)) != 0)
1450 			error = err;
1451 	}
1452 
1453 	nvlist_free(config);
1454 
1455 	if (props)
1456 		nvlist_free(props);
1457 
1458 	return (error);
1459 }
1460 
1461 static int
1462 zfs_ioc_pool_export(zfs_cmd_t *zc)
1463 {
1464 	int error;
1465 	boolean_t force = (boolean_t)zc->zc_cookie;
1466 	boolean_t hardforce = (boolean_t)zc->zc_guid;
1467 
1468 	zfs_log_history(zc);
1469 	error = spa_export(zc->zc_name, NULL, force, hardforce);
1470 	if (error == 0)
1471 		zvol_remove_minors(zc->zc_name);
1472 	return (error);
1473 }
1474 
1475 static int
1476 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1477 {
1478 	nvlist_t *configs;
1479 	int error;
1480 
1481 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1482 		return (EEXIST);
1483 
1484 	error = put_nvlist(zc, configs);
1485 
1486 	nvlist_free(configs);
1487 
1488 	return (error);
1489 }
1490 
1491 /*
1492  * inputs:
1493  * zc_name		name of the pool
1494  *
1495  * outputs:
1496  * zc_cookie		real errno
1497  * zc_nvlist_dst	config nvlist
1498  * zc_nvlist_dst_size	size of config nvlist
1499  */
1500 static int
1501 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1502 {
1503 	nvlist_t *config;
1504 	int error;
1505 	int ret = 0;
1506 
1507 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1508 	    sizeof (zc->zc_value));
1509 
1510 	if (config != NULL) {
1511 		ret = put_nvlist(zc, config);
1512 		nvlist_free(config);
1513 
1514 		/*
1515 		 * The config may be present even if 'error' is non-zero.
1516 		 * In this case we return success, and preserve the real errno
1517 		 * in 'zc_cookie'.
1518 		 */
1519 		zc->zc_cookie = error;
1520 	} else {
1521 		ret = error;
1522 	}
1523 
1524 	return (ret);
1525 }
1526 
1527 /*
1528  * Try to import the given pool, returning pool stats as appropriate so that
1529  * user land knows which devices are available and overall pool health.
1530  */
1531 static int
1532 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1533 {
1534 	nvlist_t *tryconfig, *config;
1535 	int error;
1536 
1537 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1538 	    zc->zc_iflags, &tryconfig)) != 0)
1539 		return (error);
1540 
1541 	config = spa_tryimport(tryconfig);
1542 
1543 	nvlist_free(tryconfig);
1544 
1545 	if (config == NULL)
1546 		return (EINVAL);
1547 
1548 	error = put_nvlist(zc, config);
1549 	nvlist_free(config);
1550 
1551 	return (error);
1552 }
1553 
1554 /*
1555  * inputs:
1556  * zc_name              name of the pool
1557  * zc_cookie            scan func (pool_scan_func_t)
1558  */
1559 static int
1560 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1561 {
1562 	spa_t *spa;
1563 	int error;
1564 
1565 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1566 		return (error);
1567 
1568 	if (zc->zc_cookie == POOL_SCAN_NONE)
1569 		error = spa_scan_stop(spa);
1570 	else
1571 		error = spa_scan(spa, zc->zc_cookie);
1572 
1573 	spa_close(spa, FTAG);
1574 
1575 	return (error);
1576 }
1577 
1578 static int
1579 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1580 {
1581 	spa_t *spa;
1582 	int error;
1583 
1584 	error = spa_open(zc->zc_name, &spa, FTAG);
1585 	if (error == 0) {
1586 		spa_freeze(spa);
1587 		spa_close(spa, FTAG);
1588 	}
1589 	return (error);
1590 }
1591 
1592 static int
1593 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1594 {
1595 	spa_t *spa;
1596 	int error;
1597 
1598 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1599 		return (error);
1600 
1601 	if (zc->zc_cookie < spa_version(spa) ||
1602 	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1603 		spa_close(spa, FTAG);
1604 		return (EINVAL);
1605 	}
1606 
1607 	spa_upgrade(spa, zc->zc_cookie);
1608 	spa_close(spa, FTAG);
1609 
1610 	return (error);
1611 }
1612 
1613 static int
1614 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1615 {
1616 	spa_t *spa;
1617 	char *hist_buf;
1618 	uint64_t size;
1619 	int error;
1620 
1621 	if ((size = zc->zc_history_len) == 0)
1622 		return (EINVAL);
1623 
1624 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1625 		return (error);
1626 
1627 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1628 		spa_close(spa, FTAG);
1629 		return (ENOTSUP);
1630 	}
1631 
1632 	hist_buf = kmem_alloc(size, KM_SLEEP);
1633 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1634 	    &zc->zc_history_len, hist_buf)) == 0) {
1635 		error = ddi_copyout(hist_buf,
1636 		    (void *)(uintptr_t)zc->zc_history,
1637 		    zc->zc_history_len, zc->zc_iflags);
1638 	}
1639 
1640 	spa_close(spa, FTAG);
1641 	kmem_free(hist_buf, size);
1642 	return (error);
1643 }
1644 
1645 static int
1646 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1647 {
1648 	spa_t *spa;
1649 	int error;
1650 
1651 	error = spa_open(zc->zc_name, &spa, FTAG);
1652 	if (error == 0) {
1653 		error = spa_change_guid(spa);
1654 		spa_close(spa, FTAG);
1655 	}
1656 	return (error);
1657 }
1658 
1659 static int
1660 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1661 {
1662 	int error;
1663 
1664 	if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
1665 		return (error);
1666 
1667 	return (0);
1668 }
1669 
1670 /*
1671  * inputs:
1672  * zc_name		name of filesystem
1673  * zc_obj		object to find
1674  *
1675  * outputs:
1676  * zc_value		name of object
1677  */
1678 static int
1679 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1680 {
1681 	objset_t *os;
1682 	int error;
1683 
1684 	/* XXX reading from objset not owned */
1685 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1686 		return (error);
1687 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1688 		dmu_objset_rele(os, FTAG);
1689 		return (EINVAL);
1690 	}
1691 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1692 	    sizeof (zc->zc_value));
1693 	dmu_objset_rele(os, FTAG);
1694 
1695 	return (error);
1696 }
1697 
1698 /*
1699  * inputs:
1700  * zc_name		name of filesystem
1701  * zc_obj		object to find
1702  *
1703  * outputs:
1704  * zc_stat		stats on object
1705  * zc_value		path to object
1706  */
1707 static int
1708 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1709 {
1710 	objset_t *os;
1711 	int error;
1712 
1713 	/* XXX reading from objset not owned */
1714 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1715 		return (error);
1716 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1717 		dmu_objset_rele(os, FTAG);
1718 		return (EINVAL);
1719 	}
1720 	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1721 	    sizeof (zc->zc_value));
1722 	dmu_objset_rele(os, FTAG);
1723 
1724 	return (error);
1725 }
1726 
1727 static int
1728 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1729 {
1730 	spa_t *spa;
1731 	int error;
1732 	nvlist_t *config, **l2cache, **spares;
1733 	uint_t nl2cache = 0, nspares = 0;
1734 
1735 	error = spa_open(zc->zc_name, &spa, FTAG);
1736 	if (error != 0)
1737 		return (error);
1738 
1739 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1740 	    zc->zc_iflags, &config);
1741 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1742 	    &l2cache, &nl2cache);
1743 
1744 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1745 	    &spares, &nspares);
1746 
1747 	/*
1748 	 * A root pool with concatenated devices is not supported.
1749 	 * Thus, can not add a device to a root pool.
1750 	 *
1751 	 * Intent log device can not be added to a rootpool because
1752 	 * during mountroot, zil is replayed, a seperated log device
1753 	 * can not be accessed during the mountroot time.
1754 	 *
1755 	 * l2cache and spare devices are ok to be added to a rootpool.
1756 	 */
1757 	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1758 		nvlist_free(config);
1759 		spa_close(spa, FTAG);
1760 		return (EDOM);
1761 	}
1762 
1763 	if (error == 0) {
1764 		error = spa_vdev_add(spa, config);
1765 		nvlist_free(config);
1766 	}
1767 	spa_close(spa, FTAG);
1768 	return (error);
1769 }
1770 
1771 /*
1772  * inputs:
1773  * zc_name		name of the pool
1774  * zc_nvlist_conf	nvlist of devices to remove
1775  * zc_cookie		to stop the remove?
1776  */
1777 static int
1778 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1779 {
1780 	spa_t *spa;
1781 	int error;
1782 
1783 	error = spa_open(zc->zc_name, &spa, FTAG);
1784 	if (error != 0)
1785 		return (error);
1786 	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1787 	spa_close(spa, FTAG);
1788 	return (error);
1789 }
1790 
1791 static int
1792 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1793 {
1794 	spa_t *spa;
1795 	int error;
1796 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1797 
1798 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1799 		return (error);
1800 	switch (zc->zc_cookie) {
1801 	case VDEV_STATE_ONLINE:
1802 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1803 		break;
1804 
1805 	case VDEV_STATE_OFFLINE:
1806 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1807 		break;
1808 
1809 	case VDEV_STATE_FAULTED:
1810 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1811 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1812 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1813 
1814 		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1815 		break;
1816 
1817 	case VDEV_STATE_DEGRADED:
1818 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1819 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1820 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1821 
1822 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1823 		break;
1824 
1825 	default:
1826 		error = EINVAL;
1827 	}
1828 	zc->zc_cookie = newstate;
1829 	spa_close(spa, FTAG);
1830 	return (error);
1831 }
1832 
1833 static int
1834 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1835 {
1836 	spa_t *spa;
1837 	int replacing = zc->zc_cookie;
1838 	nvlist_t *config;
1839 	int error;
1840 
1841 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1842 		return (error);
1843 
1844 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1845 	    zc->zc_iflags, &config)) == 0) {
1846 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1847 		nvlist_free(config);
1848 	}
1849 
1850 	spa_close(spa, FTAG);
1851 	return (error);
1852 }
1853 
1854 static int
1855 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1856 {
1857 	spa_t *spa;
1858 	int error;
1859 
1860 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1861 		return (error);
1862 
1863 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1864 
1865 	spa_close(spa, FTAG);
1866 	return (error);
1867 }
1868 
1869 static int
1870 zfs_ioc_vdev_split(zfs_cmd_t *zc)
1871 {
1872 	spa_t *spa;
1873 	nvlist_t *config, *props = NULL;
1874 	int error;
1875 	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1876 
1877 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1878 		return (error);
1879 
1880 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1881 	    zc->zc_iflags, &config)) {
1882 		spa_close(spa, FTAG);
1883 		return (error);
1884 	}
1885 
1886 	if (zc->zc_nvlist_src_size != 0 && (error =
1887 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1888 	    zc->zc_iflags, &props))) {
1889 		spa_close(spa, FTAG);
1890 		nvlist_free(config);
1891 		return (error);
1892 	}
1893 
1894 	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
1895 
1896 	spa_close(spa, FTAG);
1897 
1898 	nvlist_free(config);
1899 	nvlist_free(props);
1900 
1901 	return (error);
1902 }
1903 
1904 static int
1905 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1906 {
1907 	spa_t *spa;
1908 	char *path = zc->zc_value;
1909 	uint64_t guid = zc->zc_guid;
1910 	int error;
1911 
1912 	error = spa_open(zc->zc_name, &spa, FTAG);
1913 	if (error != 0)
1914 		return (error);
1915 
1916 	error = spa_vdev_setpath(spa, guid, path);
1917 	spa_close(spa, FTAG);
1918 	return (error);
1919 }
1920 
1921 static int
1922 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
1923 {
1924 	spa_t *spa;
1925 	char *fru = zc->zc_value;
1926 	uint64_t guid = zc->zc_guid;
1927 	int error;
1928 
1929 	error = spa_open(zc->zc_name, &spa, FTAG);
1930 	if (error != 0)
1931 		return (error);
1932 
1933 	error = spa_vdev_setfru(spa, guid, fru);
1934 	spa_close(spa, FTAG);
1935 	return (error);
1936 }
1937 
1938 static int
1939 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
1940 {
1941 	int error = 0;
1942 	nvlist_t *nv;
1943 
1944 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1945 
1946 	if (zc->zc_nvlist_dst != 0 &&
1947 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
1948 		dmu_objset_stats(os, nv);
1949 		/*
1950 		 * NB: zvol_get_stats() will read the objset contents,
1951 		 * which we aren't supposed to do with a
1952 		 * DS_MODE_USER hold, because it could be
1953 		 * inconsistent.  So this is a bit of a workaround...
1954 		 * XXX reading with out owning
1955 		 */
1956 		if (!zc->zc_objset_stats.dds_inconsistent &&
1957 		    dmu_objset_type(os) == DMU_OST_ZVOL) {
1958 			error = zvol_get_stats(os, nv);
1959 			if (error == EIO)
1960 				return (error);
1961 			VERIFY0(error);
1962 		}
1963 		error = put_nvlist(zc, nv);
1964 		nvlist_free(nv);
1965 	}
1966 
1967 	return (error);
1968 }
1969 
1970 /*
1971  * inputs:
1972  * zc_name		name of filesystem
1973  * zc_nvlist_dst_size	size of buffer for property nvlist
1974  *
1975  * outputs:
1976  * zc_objset_stats	stats
1977  * zc_nvlist_dst	property nvlist
1978  * zc_nvlist_dst_size	size of property nvlist
1979  */
1980 static int
1981 zfs_ioc_objset_stats(zfs_cmd_t *zc)
1982 {
1983 	objset_t *os = NULL;
1984 	int error;
1985 
1986 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1987 		return (error);
1988 
1989 	error = zfs_ioc_objset_stats_impl(zc, os);
1990 
1991 	dmu_objset_rele(os, FTAG);
1992 
1993 	return (error);
1994 }
1995 
1996 /*
1997  * inputs:
1998  * zc_name		name of filesystem
1999  * zc_nvlist_dst_size	size of buffer for property nvlist
2000  *
2001  * outputs:
2002  * zc_nvlist_dst	received property nvlist
2003  * zc_nvlist_dst_size	size of received property nvlist
2004  *
2005  * Gets received properties (distinct from local properties on or after
2006  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2007  * local property values.
2008  */
2009 static int
2010 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2011 {
2012 	objset_t *os = NULL;
2013 	int error;
2014 	nvlist_t *nv;
2015 
2016 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
2017 		return (error);
2018 
2019 	/*
2020 	 * Without this check, we would return local property values if the
2021 	 * caller has not already received properties on or after
2022 	 * SPA_VERSION_RECVD_PROPS.
2023 	 */
2024 	if (!dsl_prop_get_hasrecvd(os)) {
2025 		dmu_objset_rele(os, FTAG);
2026 		return (ENOTSUP);
2027 	}
2028 
2029 	if (zc->zc_nvlist_dst != 0 &&
2030 	    (error = dsl_prop_get_received(os, &nv)) == 0) {
2031 		error = put_nvlist(zc, nv);
2032 		nvlist_free(nv);
2033 	}
2034 
2035 	dmu_objset_rele(os, FTAG);
2036 	return (error);
2037 }
2038 
2039 static int
2040 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2041 {
2042 	uint64_t value;
2043 	int error;
2044 
2045 	/*
2046 	 * zfs_get_zplprop() will either find a value or give us
2047 	 * the default value (if there is one).
2048 	 */
2049 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2050 		return (error);
2051 	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2052 	return (0);
2053 }
2054 
2055 /*
2056  * inputs:
2057  * zc_name		name of filesystem
2058  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
2059  *
2060  * outputs:
2061  * zc_nvlist_dst	zpl property nvlist
2062  * zc_nvlist_dst_size	size of zpl property nvlist
2063  */
2064 static int
2065 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2066 {
2067 	objset_t *os;
2068 	int err;
2069 
2070 	/* XXX reading without owning */
2071 	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2072 		return (err);
2073 
2074 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2075 
2076 	/*
2077 	 * NB: nvl_add_zplprop() will read the objset contents,
2078 	 * which we aren't supposed to do with a DS_MODE_USER
2079 	 * hold, because it could be inconsistent.
2080 	 */
2081 	if (zc->zc_nvlist_dst != NULL &&
2082 	    !zc->zc_objset_stats.dds_inconsistent &&
2083 	    dmu_objset_type(os) == DMU_OST_ZFS) {
2084 		nvlist_t *nv;
2085 
2086 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2087 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2088 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2089 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2090 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2091 			err = put_nvlist(zc, nv);
2092 		nvlist_free(nv);
2093 	} else {
2094 		err = ENOENT;
2095 	}
2096 	dmu_objset_rele(os, FTAG);
2097 	return (err);
2098 }
2099 
2100 static boolean_t
2101 dataset_name_hidden(const char *name)
2102 {
2103 	/*
2104 	 * Skip over datasets that are not visible in this zone,
2105 	 * internal datasets (which have a $ in their name), and
2106 	 * temporary datasets (which have a % in their name).
2107 	 */
2108 	if (strchr(name, '$') != NULL)
2109 		return (B_TRUE);
2110 	if (strchr(name, '%') != NULL)
2111 		return (B_TRUE);
2112 	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
2113 		return (B_TRUE);
2114 	return (B_FALSE);
2115 }
2116 
2117 /*
2118  * inputs:
2119  * zc_name		name of filesystem
2120  * zc_cookie		zap cursor
2121  * zc_nvlist_dst_size	size of buffer for property nvlist
2122  *
2123  * outputs:
2124  * zc_name		name of next filesystem
2125  * zc_cookie		zap cursor
2126  * zc_objset_stats	stats
2127  * zc_nvlist_dst	property nvlist
2128  * zc_nvlist_dst_size	size of property nvlist
2129  */
2130 static int
2131 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2132 {
2133 	objset_t *os;
2134 	int error;
2135 	char *p;
2136 	size_t orig_len = strlen(zc->zc_name);
2137 
2138 top:
2139 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2140 		if (error == ENOENT)
2141 			error = ESRCH;
2142 		return (error);
2143 	}
2144 
2145 	p = strrchr(zc->zc_name, '/');
2146 	if (p == NULL || p[1] != '\0')
2147 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2148 	p = zc->zc_name + strlen(zc->zc_name);
2149 
2150 	/*
2151 	 * Pre-fetch the datasets.  dmu_objset_prefetch() always returns 0
2152 	 * but is not declared void because its called by dmu_objset_find().
2153 	 */
2154 	if (zc->zc_cookie == 0) {
2155 		uint64_t cookie = 0;
2156 		int len = sizeof (zc->zc_name) - (p - zc->zc_name);
2157 
2158 		while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
2159 			if (!dataset_name_hidden(zc->zc_name))
2160 				(void) dmu_objset_prefetch(zc->zc_name, NULL);
2161 		}
2162 	}
2163 
2164 	do {
2165 		error = dmu_dir_list_next(os,
2166 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2167 		    NULL, &zc->zc_cookie);
2168 		if (error == ENOENT)
2169 			error = ESRCH;
2170 	} while (error == 0 && dataset_name_hidden(zc->zc_name));
2171 	dmu_objset_rele(os, FTAG);
2172 
2173 	/*
2174 	 * If it's an internal dataset (ie. with a '$' in its name),
2175 	 * don't try to get stats for it, otherwise we'll return ENOENT.
2176 	 */
2177 	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2178 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2179 		if (error == ENOENT) {
2180 			/* We lost a race with destroy, get the next one. */
2181 			zc->zc_name[orig_len] = '\0';
2182 			goto top;
2183 		}
2184 	}
2185 	return (error);
2186 }
2187 
2188 /*
2189  * inputs:
2190  * zc_name		name of filesystem
2191  * zc_cookie		zap cursor
2192  * zc_nvlist_dst_size	size of buffer for property nvlist
2193  *
2194  * outputs:
2195  * zc_name		name of next snapshot
2196  * zc_objset_stats	stats
2197  * zc_nvlist_dst	property nvlist
2198  * zc_nvlist_dst_size	size of property nvlist
2199  */
2200 static int
2201 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2202 {
2203 	objset_t *os;
2204 	int error;
2205 
2206 top:
2207 	if (zc->zc_cookie == 0)
2208 		(void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
2209 		    NULL, DS_FIND_SNAPSHOTS);
2210 
2211 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2212 	if (error)
2213 		return (error == ENOENT ? ESRCH : error);
2214 
2215 	/*
2216 	 * A dataset name of maximum length cannot have any snapshots,
2217 	 * so exit immediately.
2218 	 */
2219 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
2220 		dmu_objset_rele(os, FTAG);
2221 		return (ESRCH);
2222 	}
2223 
2224 	error = dmu_snapshot_list_next(os,
2225 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
2226 	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2227 	    NULL);
2228 
2229 	if (error == 0) {
2230 		dsl_dataset_t *ds;
2231 		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2232 
2233 		/*
2234 		 * Since we probably don't have a hold on this snapshot,
2235 		 * it's possible that the objsetid could have been destroyed
2236 		 * and reused for a new objset. It's OK if this happens during
2237 		 * a zfs send operation, since the new createtxg will be
2238 		 * beyond the range we're interested in.
2239 		 */
2240 		rw_enter(&dp->dp_config_rwlock, RW_READER);
2241 		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2242 		rw_exit(&dp->dp_config_rwlock);
2243 		if (error) {
2244 			if (error == ENOENT) {
2245 				/* Racing with destroy, get the next one. */
2246 				*strchr(zc->zc_name, '@') = '\0';
2247 				dmu_objset_rele(os, FTAG);
2248 				goto top;
2249 			}
2250 		} else {
2251 			objset_t *ossnap;
2252 
2253 			error = dmu_objset_from_ds(ds, &ossnap);
2254 			if (error == 0)
2255 				error = zfs_ioc_objset_stats_impl(zc, ossnap);
2256 			dsl_dataset_rele(ds, FTAG);
2257 		}
2258 	} else if (error == ENOENT) {
2259 		error = ESRCH;
2260 	}
2261 
2262 	dmu_objset_rele(os, FTAG);
2263 	/* if we failed, undo the @ that we tacked on to zc_name */
2264 	if (error)
2265 		*strchr(zc->zc_name, '@') = '\0';
2266 	return (error);
2267 }
2268 
2269 static int
2270 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2271 {
2272 	const char *propname = nvpair_name(pair);
2273 	uint64_t *valary;
2274 	unsigned int vallen;
2275 	const char *domain;
2276 	char *dash;
2277 	zfs_userquota_prop_t type;
2278 	uint64_t rid;
2279 	uint64_t quota;
2280 	zfsvfs_t *zfsvfs;
2281 	int err;
2282 
2283 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2284 		nvlist_t *attrs;
2285 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2286 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2287 		    &pair) != 0)
2288 			return (EINVAL);
2289 	}
2290 
2291 	/*
2292 	 * A correctly constructed propname is encoded as
2293 	 * userquota@<rid>-<domain>.
2294 	 */
2295 	if ((dash = strchr(propname, '-')) == NULL ||
2296 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2297 	    vallen != 3)
2298 		return (EINVAL);
2299 
2300 	domain = dash + 1;
2301 	type = valary[0];
2302 	rid = valary[1];
2303 	quota = valary[2];
2304 
2305 	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2306 	if (err == 0) {
2307 		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2308 		zfsvfs_rele(zfsvfs, FTAG);
2309 	}
2310 
2311 	return (err);
2312 }
2313 
2314 /*
2315  * If the named property is one that has a special function to set its value,
2316  * return 0 on success and a positive error code on failure; otherwise if it is
2317  * not one of the special properties handled by this function, return -1.
2318  *
2319  * XXX: It would be better for callers of the property interface if we handled
2320  * these special cases in dsl_prop.c (in the dsl layer).
2321  */
2322 static int
2323 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2324     nvpair_t *pair)
2325 {
2326 	const char *propname = nvpair_name(pair);
2327 	zfs_prop_t prop = zfs_name_to_prop(propname);
2328 	uint64_t intval;
2329 	int err;
2330 
2331 	if (prop == ZPROP_INVAL) {
2332 		if (zfs_prop_userquota(propname))
2333 			return (zfs_prop_set_userquota(dsname, pair));
2334 		return (-1);
2335 	}
2336 
2337 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2338 		nvlist_t *attrs;
2339 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2340 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2341 		    &pair) == 0);
2342 	}
2343 
2344 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2345 		return (-1);
2346 
2347 	VERIFY(0 == nvpair_value_uint64(pair, &intval));
2348 
2349 	switch (prop) {
2350 	case ZFS_PROP_QUOTA:
2351 		err = dsl_dir_set_quota(dsname, source, intval);
2352 		break;
2353 	case ZFS_PROP_REFQUOTA:
2354 		err = dsl_dataset_set_quota(dsname, source, intval);
2355 		break;
2356 	case ZFS_PROP_RESERVATION:
2357 		err = dsl_dir_set_reservation(dsname, source, intval);
2358 		break;
2359 	case ZFS_PROP_REFRESERVATION:
2360 		err = dsl_dataset_set_reservation(dsname, source, intval);
2361 		break;
2362 	case ZFS_PROP_VOLSIZE:
2363 		err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip),
2364 		    intval);
2365 		break;
2366 	case ZFS_PROP_VERSION:
2367 	{
2368 		zfsvfs_t *zfsvfs;
2369 
2370 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2371 			break;
2372 
2373 		err = zfs_set_version(zfsvfs, intval);
2374 		zfsvfs_rele(zfsvfs, FTAG);
2375 
2376 		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2377 			zfs_cmd_t *zc;
2378 
2379 			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2380 			(void) strcpy(zc->zc_name, dsname);
2381 			(void) zfs_ioc_userspace_upgrade(zc);
2382 			kmem_free(zc, sizeof (zfs_cmd_t));
2383 		}
2384 		break;
2385 	}
2386 
2387 	default:
2388 		err = -1;
2389 	}
2390 
2391 	return (err);
2392 }
2393 
2394 /*
2395  * This function is best effort. If it fails to set any of the given properties,
2396  * it continues to set as many as it can and returns the last error
2397  * encountered. If the caller provides a non-NULL errlist, it will be filled in
2398  * with the list of names of all the properties that failed along with the
2399  * corresponding error numbers.
2400  *
2401  * If every property is set successfully, zero is returned and errlist is not
2402  * modified.
2403  */
2404 int
2405 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2406     nvlist_t *errlist)
2407 {
2408 	nvpair_t *pair;
2409 	nvpair_t *propval;
2410 	int rv = 0;
2411 	uint64_t intval;
2412 	char *strval;
2413 	nvlist_t *genericnvl = fnvlist_alloc();
2414 	nvlist_t *retrynvl = fnvlist_alloc();
2415 
2416 retry:
2417 	pair = NULL;
2418 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2419 		const char *propname = nvpair_name(pair);
2420 		zfs_prop_t prop = zfs_name_to_prop(propname);
2421 		int err = 0;
2422 
2423 		/* decode the property value */
2424 		propval = pair;
2425 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2426 			nvlist_t *attrs;
2427 			attrs = fnvpair_value_nvlist(pair);
2428 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2429 			    &propval) != 0)
2430 				err = EINVAL;
2431 		}
2432 
2433 		/* Validate value type */
2434 		if (err == 0 && prop == ZPROP_INVAL) {
2435 			if (zfs_prop_user(propname)) {
2436 				if (nvpair_type(propval) != DATA_TYPE_STRING)
2437 					err = EINVAL;
2438 			} else if (zfs_prop_userquota(propname)) {
2439 				if (nvpair_type(propval) !=
2440 				    DATA_TYPE_UINT64_ARRAY)
2441 					err = EINVAL;
2442 			} else {
2443 				err = EINVAL;
2444 			}
2445 		} else if (err == 0) {
2446 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2447 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2448 					err = EINVAL;
2449 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2450 				const char *unused;
2451 
2452 				intval = fnvpair_value_uint64(propval);
2453 
2454 				switch (zfs_prop_get_type(prop)) {
2455 				case PROP_TYPE_NUMBER:
2456 					break;
2457 				case PROP_TYPE_STRING:
2458 					err = EINVAL;
2459 					break;
2460 				case PROP_TYPE_INDEX:
2461 					if (zfs_prop_index_to_string(prop,
2462 					    intval, &unused) != 0)
2463 						err = EINVAL;
2464 					break;
2465 				default:
2466 					cmn_err(CE_PANIC,
2467 					    "unknown property type");
2468 				}
2469 			} else {
2470 				err = EINVAL;
2471 			}
2472 		}
2473 
2474 		/* Validate permissions */
2475 		if (err == 0)
2476 			err = zfs_check_settable(dsname, pair, CRED());
2477 
2478 		if (err == 0) {
2479 			err = zfs_prop_set_special(dsname, source, pair);
2480 			if (err == -1) {
2481 				/*
2482 				 * For better performance we build up a list of
2483 				 * properties to set in a single transaction.
2484 				 */
2485 				err = nvlist_add_nvpair(genericnvl, pair);
2486 			} else if (err != 0 && nvl != retrynvl) {
2487 				/*
2488 				 * This may be a spurious error caused by
2489 				 * receiving quota and reservation out of order.
2490 				 * Try again in a second pass.
2491 				 */
2492 				err = nvlist_add_nvpair(retrynvl, pair);
2493 			}
2494 		}
2495 
2496 		if (err != 0) {
2497 			if (errlist != NULL)
2498 				fnvlist_add_int32(errlist, propname, err);
2499 			rv = err;
2500 		}
2501 	}
2502 
2503 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2504 		nvl = retrynvl;
2505 		goto retry;
2506 	}
2507 
2508 	if (!nvlist_empty(genericnvl) &&
2509 	    dsl_props_set(dsname, source, genericnvl) != 0) {
2510 		/*
2511 		 * If this fails, we still want to set as many properties as we
2512 		 * can, so try setting them individually.
2513 		 */
2514 		pair = NULL;
2515 		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2516 			const char *propname = nvpair_name(pair);
2517 			int err = 0;
2518 
2519 			propval = pair;
2520 			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2521 				nvlist_t *attrs;
2522 				attrs = fnvpair_value_nvlist(pair);
2523 				propval = fnvlist_lookup_nvpair(attrs,
2524 				    ZPROP_VALUE);
2525 			}
2526 
2527 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2528 				strval = fnvpair_value_string(propval);
2529 				err = dsl_prop_set(dsname, propname, source, 1,
2530 				    strlen(strval) + 1, strval);
2531 			} else {
2532 				intval = fnvpair_value_uint64(propval);
2533 				err = dsl_prop_set(dsname, propname, source, 8,
2534 				    1, &intval);
2535 			}
2536 
2537 			if (err != 0) {
2538 				if (errlist != NULL) {
2539 					fnvlist_add_int32(errlist, propname,
2540 					    err);
2541 				}
2542 				rv = err;
2543 			}
2544 		}
2545 	}
2546 	nvlist_free(genericnvl);
2547 	nvlist_free(retrynvl);
2548 
2549 	return (rv);
2550 }
2551 
2552 /*
2553  * Check that all the properties are valid user properties.
2554  */
2555 static int
2556 zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2557 {
2558 	nvpair_t *pair = NULL;
2559 	int error = 0;
2560 
2561 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2562 		const char *propname = nvpair_name(pair);
2563 		char *valstr;
2564 
2565 		if (!zfs_prop_user(propname) ||
2566 		    nvpair_type(pair) != DATA_TYPE_STRING)
2567 			return (EINVAL);
2568 
2569 		if (error = zfs_secpolicy_write_perms(fsname,
2570 		    ZFS_DELEG_PERM_USERPROP, CRED()))
2571 			return (error);
2572 
2573 		if (strlen(propname) >= ZAP_MAXNAMELEN)
2574 			return (ENAMETOOLONG);
2575 
2576 		VERIFY(nvpair_value_string(pair, &valstr) == 0);
2577 		if (strlen(valstr) >= ZAP_MAXVALUELEN)
2578 			return (E2BIG);
2579 	}
2580 	return (0);
2581 }
2582 
2583 static void
2584 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2585 {
2586 	nvpair_t *pair;
2587 
2588 	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2589 
2590 	pair = NULL;
2591 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2592 		if (nvlist_exists(skipped, nvpair_name(pair)))
2593 			continue;
2594 
2595 		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2596 	}
2597 }
2598 
2599 static int
2600 clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
2601     nvlist_t *skipped)
2602 {
2603 	int err = 0;
2604 	nvlist_t *cleared_props = NULL;
2605 	props_skip(props, skipped, &cleared_props);
2606 	if (!nvlist_empty(cleared_props)) {
2607 		/*
2608 		 * Acts on local properties until the dataset has received
2609 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2610 		 */
2611 		zprop_source_t flags = (ZPROP_SRC_NONE |
2612 		    (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0));
2613 		err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL);
2614 	}
2615 	nvlist_free(cleared_props);
2616 	return (err);
2617 }
2618 
2619 /*
2620  * inputs:
2621  * zc_name		name of filesystem
2622  * zc_value		name of property to set
2623  * zc_nvlist_src{_size}	nvlist of properties to apply
2624  * zc_cookie		received properties flag
2625  *
2626  * outputs:
2627  * zc_nvlist_dst{_size} error for each unapplied received property
2628  */
2629 static int
2630 zfs_ioc_set_prop(zfs_cmd_t *zc)
2631 {
2632 	nvlist_t *nvl;
2633 	boolean_t received = zc->zc_cookie;
2634 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2635 	    ZPROP_SRC_LOCAL);
2636 	nvlist_t *errors;
2637 	int error;
2638 
2639 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2640 	    zc->zc_iflags, &nvl)) != 0)
2641 		return (error);
2642 
2643 	if (received) {
2644 		nvlist_t *origprops;
2645 		objset_t *os;
2646 
2647 		if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
2648 			if (dsl_prop_get_received(os, &origprops) == 0) {
2649 				(void) clear_received_props(os,
2650 				    zc->zc_name, origprops, nvl);
2651 				nvlist_free(origprops);
2652 			}
2653 
2654 			dsl_prop_set_hasrecvd(os);
2655 			dmu_objset_rele(os, FTAG);
2656 		}
2657 	}
2658 
2659 	errors = fnvlist_alloc();
2660 	error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2661 
2662 	if (zc->zc_nvlist_dst != NULL && errors != NULL) {
2663 		(void) put_nvlist(zc, errors);
2664 	}
2665 
2666 	nvlist_free(errors);
2667 	nvlist_free(nvl);
2668 	return (error);
2669 }
2670 
2671 /*
2672  * inputs:
2673  * zc_name		name of filesystem
2674  * zc_value		name of property to inherit
2675  * zc_cookie		revert to received value if TRUE
2676  *
2677  * outputs:		none
2678  */
2679 static int
2680 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2681 {
2682 	const char *propname = zc->zc_value;
2683 	zfs_prop_t prop = zfs_name_to_prop(propname);
2684 	boolean_t received = zc->zc_cookie;
2685 	zprop_source_t source = (received
2686 	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2687 	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2688 
2689 	if (received) {
2690 		nvlist_t *dummy;
2691 		nvpair_t *pair;
2692 		zprop_type_t type;
2693 		int err;
2694 
2695 		/*
2696 		 * zfs_prop_set_special() expects properties in the form of an
2697 		 * nvpair with type info.
2698 		 */
2699 		if (prop == ZPROP_INVAL) {
2700 			if (!zfs_prop_user(propname))
2701 				return (EINVAL);
2702 
2703 			type = PROP_TYPE_STRING;
2704 		} else if (prop == ZFS_PROP_VOLSIZE ||
2705 		    prop == ZFS_PROP_VERSION) {
2706 			return (EINVAL);
2707 		} else {
2708 			type = zfs_prop_get_type(prop);
2709 		}
2710 
2711 		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2712 
2713 		switch (type) {
2714 		case PROP_TYPE_STRING:
2715 			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2716 			break;
2717 		case PROP_TYPE_NUMBER:
2718 		case PROP_TYPE_INDEX:
2719 			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2720 			break;
2721 		default:
2722 			nvlist_free(dummy);
2723 			return (EINVAL);
2724 		}
2725 
2726 		pair = nvlist_next_nvpair(dummy, NULL);
2727 		err = zfs_prop_set_special(zc->zc_name, source, pair);
2728 		nvlist_free(dummy);
2729 		if (err != -1)
2730 			return (err); /* special property already handled */
2731 	} else {
2732 		/*
2733 		 * Only check this in the non-received case. We want to allow
2734 		 * 'inherit -S' to revert non-inheritable properties like quota
2735 		 * and reservation to the received or default values even though
2736 		 * they are not considered inheritable.
2737 		 */
2738 		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2739 			return (EINVAL);
2740 	}
2741 
2742 	/* property name has been validated by zfs_secpolicy_inherit_prop() */
2743 	return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
2744 }
2745 
2746 static int
2747 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2748 {
2749 	nvlist_t *props;
2750 	spa_t *spa;
2751 	int error;
2752 	nvpair_t *pair;
2753 
2754 	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2755 	    zc->zc_iflags, &props))
2756 		return (error);
2757 
2758 	/*
2759 	 * If the only property is the configfile, then just do a spa_lookup()
2760 	 * to handle the faulted case.
2761 	 */
2762 	pair = nvlist_next_nvpair(props, NULL);
2763 	if (pair != NULL && strcmp(nvpair_name(pair),
2764 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2765 	    nvlist_next_nvpair(props, pair) == NULL) {
2766 		mutex_enter(&spa_namespace_lock);
2767 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2768 			spa_configfile_set(spa, props, B_FALSE);
2769 			spa_config_sync(spa, B_FALSE, B_TRUE);
2770 		}
2771 		mutex_exit(&spa_namespace_lock);
2772 		if (spa != NULL) {
2773 			nvlist_free(props);
2774 			return (0);
2775 		}
2776 	}
2777 
2778 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2779 		nvlist_free(props);
2780 		return (error);
2781 	}
2782 
2783 	error = spa_prop_set(spa, props);
2784 
2785 	nvlist_free(props);
2786 	spa_close(spa, FTAG);
2787 
2788 	return (error);
2789 }
2790 
2791 static int
2792 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2793 {
2794 	spa_t *spa;
2795 	int error;
2796 	nvlist_t *nvp = NULL;
2797 
2798 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2799 		/*
2800 		 * If the pool is faulted, there may be properties we can still
2801 		 * get (such as altroot and cachefile), so attempt to get them
2802 		 * anyway.
2803 		 */
2804 		mutex_enter(&spa_namespace_lock);
2805 		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2806 			error = spa_prop_get(spa, &nvp);
2807 		mutex_exit(&spa_namespace_lock);
2808 	} else {
2809 		error = spa_prop_get(spa, &nvp);
2810 		spa_close(spa, FTAG);
2811 	}
2812 
2813 	if (error == 0 && zc->zc_nvlist_dst != NULL)
2814 		error = put_nvlist(zc, nvp);
2815 	else
2816 		error = EFAULT;
2817 
2818 	nvlist_free(nvp);
2819 	return (error);
2820 }
2821 
2822 /*
2823  * inputs:
2824  * zc_name		name of filesystem
2825  * zc_nvlist_src{_size}	nvlist of delegated permissions
2826  * zc_perm_action	allow/unallow flag
2827  *
2828  * outputs:		none
2829  */
2830 static int
2831 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2832 {
2833 	int error;
2834 	nvlist_t *fsaclnv = NULL;
2835 
2836 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2837 	    zc->zc_iflags, &fsaclnv)) != 0)
2838 		return (error);
2839 
2840 	/*
2841 	 * Verify nvlist is constructed correctly
2842 	 */
2843 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2844 		nvlist_free(fsaclnv);
2845 		return (EINVAL);
2846 	}
2847 
2848 	/*
2849 	 * If we don't have PRIV_SYS_MOUNT, then validate
2850 	 * that user is allowed to hand out each permission in
2851 	 * the nvlist(s)
2852 	 */
2853 
2854 	error = secpolicy_zfs(CRED());
2855 	if (error) {
2856 		if (zc->zc_perm_action == B_FALSE) {
2857 			error = dsl_deleg_can_allow(zc->zc_name,
2858 			    fsaclnv, CRED());
2859 		} else {
2860 			error = dsl_deleg_can_unallow(zc->zc_name,
2861 			    fsaclnv, CRED());
2862 		}
2863 	}
2864 
2865 	if (error == 0)
2866 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2867 
2868 	nvlist_free(fsaclnv);
2869 	return (error);
2870 }
2871 
2872 /*
2873  * inputs:
2874  * zc_name		name of filesystem
2875  *
2876  * outputs:
2877  * zc_nvlist_src{_size}	nvlist of delegated permissions
2878  */
2879 static int
2880 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2881 {
2882 	nvlist_t *nvp;
2883 	int error;
2884 
2885 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2886 		error = put_nvlist(zc, nvp);
2887 		nvlist_free(nvp);
2888 	}
2889 
2890 	return (error);
2891 }
2892 
2893 /*
2894  * Search the vfs list for a specified resource.  Returns a pointer to it
2895  * or NULL if no suitable entry is found. The caller of this routine
2896  * is responsible for releasing the returned vfs pointer.
2897  */
2898 static vfs_t *
2899 zfs_get_vfs(const char *resource)
2900 {
2901 	struct vfs *vfsp;
2902 	struct vfs *vfs_found = NULL;
2903 
2904 	vfs_list_read_lock();
2905 	vfsp = rootvfs;
2906 	do {
2907 		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
2908 			VFS_HOLD(vfsp);
2909 			vfs_found = vfsp;
2910 			break;
2911 		}
2912 		vfsp = vfsp->vfs_next;
2913 	} while (vfsp != rootvfs);
2914 	vfs_list_unlock();
2915 	return (vfs_found);
2916 }
2917 
2918 /* ARGSUSED */
2919 static void
2920 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2921 {
2922 	zfs_creat_t *zct = arg;
2923 
2924 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
2925 }
2926 
2927 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
2928 
2929 /*
2930  * inputs:
2931  * createprops		list of properties requested by creator
2932  * default_zplver	zpl version to use if unspecified in createprops
2933  * fuids_ok		fuids allowed in this version of the spa?
2934  * os			parent objset pointer (NULL if root fs)
2935  *
2936  * outputs:
2937  * zplprops	values for the zplprops we attach to the master node object
2938  * is_ci	true if requested file system will be purely case-insensitive
2939  *
2940  * Determine the settings for utf8only, normalization and
2941  * casesensitivity.  Specific values may have been requested by the
2942  * creator and/or we can inherit values from the parent dataset.  If
2943  * the file system is of too early a vintage, a creator can not
2944  * request settings for these properties, even if the requested
2945  * setting is the default value.  We don't actually want to create dsl
2946  * properties for these, so remove them from the source nvlist after
2947  * processing.
2948  */
2949 static int
2950 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
2951     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
2952     nvlist_t *zplprops, boolean_t *is_ci)
2953 {
2954 	uint64_t sense = ZFS_PROP_UNDEFINED;
2955 	uint64_t norm = ZFS_PROP_UNDEFINED;
2956 	uint64_t u8 = ZFS_PROP_UNDEFINED;
2957 
2958 	ASSERT(zplprops != NULL);
2959 
2960 	/*
2961 	 * Pull out creator prop choices, if any.
2962 	 */
2963 	if (createprops) {
2964 		(void) nvlist_lookup_uint64(createprops,
2965 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
2966 		(void) nvlist_lookup_uint64(createprops,
2967 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
2968 		(void) nvlist_remove_all(createprops,
2969 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
2970 		(void) nvlist_lookup_uint64(createprops,
2971 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
2972 		(void) nvlist_remove_all(createprops,
2973 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
2974 		(void) nvlist_lookup_uint64(createprops,
2975 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
2976 		(void) nvlist_remove_all(createprops,
2977 		    zfs_prop_to_name(ZFS_PROP_CASE));
2978 	}
2979 
2980 	/*
2981 	 * If the zpl version requested is whacky or the file system
2982 	 * or pool is version is too "young" to support normalization
2983 	 * and the creator tried to set a value for one of the props,
2984 	 * error out.
2985 	 */
2986 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
2987 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
2988 	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
2989 	    (zplver < ZPL_VERSION_NORMALIZATION &&
2990 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
2991 	    sense != ZFS_PROP_UNDEFINED)))
2992 		return (ENOTSUP);
2993 
2994 	/*
2995 	 * Put the version in the zplprops
2996 	 */
2997 	VERIFY(nvlist_add_uint64(zplprops,
2998 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
2999 
3000 	if (norm == ZFS_PROP_UNDEFINED)
3001 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3002 	VERIFY(nvlist_add_uint64(zplprops,
3003 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3004 
3005 	/*
3006 	 * If we're normalizing, names must always be valid UTF-8 strings.
3007 	 */
3008 	if (norm)
3009 		u8 = 1;
3010 	if (u8 == ZFS_PROP_UNDEFINED)
3011 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3012 	VERIFY(nvlist_add_uint64(zplprops,
3013 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3014 
3015 	if (sense == ZFS_PROP_UNDEFINED)
3016 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3017 	VERIFY(nvlist_add_uint64(zplprops,
3018 	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3019 
3020 	if (is_ci)
3021 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
3022 
3023 	return (0);
3024 }
3025 
3026 static int
3027 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3028     nvlist_t *zplprops, boolean_t *is_ci)
3029 {
3030 	boolean_t fuids_ok, sa_ok;
3031 	uint64_t zplver = ZPL_VERSION;
3032 	objset_t *os = NULL;
3033 	char parentname[MAXNAMELEN];
3034 	char *cp;
3035 	spa_t *spa;
3036 	uint64_t spa_vers;
3037 	int error;
3038 
3039 	(void) strlcpy(parentname, dataset, sizeof (parentname));
3040 	cp = strrchr(parentname, '/');
3041 	ASSERT(cp != NULL);
3042 	cp[0] = '\0';
3043 
3044 	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3045 		return (error);
3046 
3047 	spa_vers = spa_version(spa);
3048 	spa_close(spa, FTAG);
3049 
3050 	zplver = zfs_zpl_version_map(spa_vers);
3051 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3052 	sa_ok = (zplver >= ZPL_VERSION_SA);
3053 
3054 	/*
3055 	 * Open parent object set so we can inherit zplprop values.
3056 	 */
3057 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3058 		return (error);
3059 
3060 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3061 	    zplprops, is_ci);
3062 	dmu_objset_rele(os, FTAG);
3063 	return (error);
3064 }
3065 
3066 static int
3067 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3068     nvlist_t *zplprops, boolean_t *is_ci)
3069 {
3070 	boolean_t fuids_ok;
3071 	boolean_t sa_ok;
3072 	uint64_t zplver = ZPL_VERSION;
3073 	int error;
3074 
3075 	zplver = zfs_zpl_version_map(spa_vers);
3076 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3077 	sa_ok = (zplver >= ZPL_VERSION_SA);
3078 
3079 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3080 	    createprops, zplprops, is_ci);
3081 	return (error);
3082 }
3083 
3084 /*
3085  * innvl: {
3086  *     "type" -> dmu_objset_type_t (int32)
3087  *     (optional) "props" -> { prop -> value }
3088  * }
3089  *
3090  * outnvl: propname -> error code (int32)
3091  */
3092 static int
3093 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3094 {
3095 	int error = 0;
3096 	zfs_creat_t zct = { 0 };
3097 	nvlist_t *nvprops = NULL;
3098 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3099 	int32_t type32;
3100 	dmu_objset_type_t type;
3101 	boolean_t is_insensitive = B_FALSE;
3102 
3103 	if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3104 		return (EINVAL);
3105 	type = type32;
3106 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3107 
3108 	switch (type) {
3109 	case DMU_OST_ZFS:
3110 		cbfunc = zfs_create_cb;
3111 		break;
3112 
3113 	case DMU_OST_ZVOL:
3114 		cbfunc = zvol_create_cb;
3115 		break;
3116 
3117 	default:
3118 		cbfunc = NULL;
3119 		break;
3120 	}
3121 	if (strchr(fsname, '@') ||
3122 	    strchr(fsname, '%'))
3123 		return (EINVAL);
3124 
3125 	zct.zct_props = nvprops;
3126 
3127 	if (cbfunc == NULL)
3128 		return (EINVAL);
3129 
3130 	if (type == DMU_OST_ZVOL) {
3131 		uint64_t volsize, volblocksize;
3132 
3133 		if (nvprops == NULL)
3134 			return (EINVAL);
3135 		if (nvlist_lookup_uint64(nvprops,
3136 		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3137 			return (EINVAL);
3138 
3139 		if ((error = nvlist_lookup_uint64(nvprops,
3140 		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3141 		    &volblocksize)) != 0 && error != ENOENT)
3142 			return (EINVAL);
3143 
3144 		if (error != 0)
3145 			volblocksize = zfs_prop_default_numeric(
3146 			    ZFS_PROP_VOLBLOCKSIZE);
3147 
3148 		if ((error = zvol_check_volblocksize(
3149 		    volblocksize)) != 0 ||
3150 		    (error = zvol_check_volsize(volsize,
3151 		    volblocksize)) != 0)
3152 			return (error);
3153 	} else if (type == DMU_OST_ZFS) {
3154 		int error;
3155 
3156 		/*
3157 		 * We have to have normalization and
3158 		 * case-folding flags correct when we do the
3159 		 * file system creation, so go figure them out
3160 		 * now.
3161 		 */
3162 		VERIFY(nvlist_alloc(&zct.zct_zplprops,
3163 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
3164 		error = zfs_fill_zplprops(fsname, nvprops,
3165 		    zct.zct_zplprops, &is_insensitive);
3166 		if (error != 0) {
3167 			nvlist_free(zct.zct_zplprops);
3168 			return (error);
3169 		}
3170 	}
3171 
3172 	error = dmu_objset_create(fsname, type,
3173 	    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3174 	nvlist_free(zct.zct_zplprops);
3175 
3176 	/*
3177 	 * It would be nice to do this atomically.
3178 	 */
3179 	if (error == 0) {
3180 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3181 		    nvprops, outnvl);
3182 		if (error != 0)
3183 			(void) dmu_objset_destroy(fsname, B_FALSE);
3184 	}
3185 	return (error);
3186 }
3187 
3188 /*
3189  * innvl: {
3190  *     "origin" -> name of origin snapshot
3191  *     (optional) "props" -> { prop -> value }
3192  * }
3193  *
3194  * outnvl: propname -> error code (int32)
3195  */
3196 static int
3197 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3198 {
3199 	int error = 0;
3200 	nvlist_t *nvprops = NULL;
3201 	char *origin_name;
3202 	dsl_dataset_t *origin;
3203 
3204 	if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3205 		return (EINVAL);
3206 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3207 
3208 	if (strchr(fsname, '@') ||
3209 	    strchr(fsname, '%'))
3210 		return (EINVAL);
3211 
3212 	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3213 		return (EINVAL);
3214 
3215 	error = dsl_dataset_hold(origin_name, FTAG, &origin);
3216 	if (error)
3217 		return (error);
3218 
3219 	error = dmu_objset_clone(fsname, origin, 0);
3220 	dsl_dataset_rele(origin, FTAG);
3221 	if (error)
3222 		return (error);
3223 
3224 	/*
3225 	 * It would be nice to do this atomically.
3226 	 */
3227 	if (error == 0) {
3228 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3229 		    nvprops, outnvl);
3230 		if (error != 0)
3231 			(void) dmu_objset_destroy(fsname, B_FALSE);
3232 	}
3233 	return (error);
3234 }
3235 
3236 /*
3237  * innvl: {
3238  *     "snaps" -> { snapshot1, snapshot2 }
3239  *     (optional) "props" -> { prop -> value (string) }
3240  * }
3241  *
3242  * outnvl: snapshot -> error code (int32)
3243  *
3244  */
3245 static int
3246 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3247 {
3248 	nvlist_t *snaps;
3249 	nvlist_t *props = NULL;
3250 	int error, poollen;
3251 	nvpair_t *pair;
3252 
3253 	(void) nvlist_lookup_nvlist(innvl, "props", &props);
3254 	if ((error = zfs_check_userprops(poolname, props)) != 0)
3255 		return (error);
3256 
3257 	if (!nvlist_empty(props) &&
3258 	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3259 		return (ENOTSUP);
3260 
3261 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3262 		return (EINVAL);
3263 	poollen = strlen(poolname);
3264 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3265 	    pair = nvlist_next_nvpair(snaps, pair)) {
3266 		const char *name = nvpair_name(pair);
3267 		const char *cp = strchr(name, '@');
3268 
3269 		/*
3270 		 * The snap name must contain an @, and the part after it must
3271 		 * contain only valid characters.
3272 		 */
3273 		if (cp == NULL || snapshot_namecheck(cp + 1, NULL, NULL) != 0)
3274 			return (EINVAL);
3275 
3276 		/*
3277 		 * The snap must be in the specified pool.
3278 		 */
3279 		if (strncmp(name, poolname, poollen) != 0 ||
3280 		    (name[poollen] != '/' && name[poollen] != '@'))
3281 			return (EXDEV);
3282 
3283 		/* This must be the only snap of this fs. */
3284 		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3285 		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3286 			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3287 			    == 0) {
3288 				return (EXDEV);
3289 			}
3290 		}
3291 	}
3292 
3293 	error = dmu_objset_snapshot(snaps, props, outnvl);
3294 	return (error);
3295 }
3296 
3297 /*
3298  * innvl: "message" -> string
3299  */
3300 /* ARGSUSED */
3301 static int
3302 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3303 {
3304 	char *message;
3305 	spa_t *spa;
3306 	int error;
3307 	char *poolname;
3308 
3309 	/*
3310 	 * The poolname in the ioctl is not set, we get it from the TSD,
3311 	 * which was set at the end of the last successful ioctl that allows
3312 	 * logging.  The secpolicy func already checked that it is set.
3313 	 * Only one log ioctl is allowed after each successful ioctl, so
3314 	 * we clear the TSD here.
3315 	 */
3316 	poolname = tsd_get(zfs_allow_log_key);
3317 	(void) tsd_set(zfs_allow_log_key, NULL);
3318 	error = spa_open(poolname, &spa, FTAG);
3319 	strfree(poolname);
3320 	if (error != 0)
3321 		return (error);
3322 
3323 	if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
3324 		spa_close(spa, FTAG);
3325 		return (EINVAL);
3326 	}
3327 
3328 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3329 		spa_close(spa, FTAG);
3330 		return (ENOTSUP);
3331 	}
3332 
3333 	error = spa_history_log(spa, message);
3334 	spa_close(spa, FTAG);
3335 	return (error);
3336 }
3337 
3338 /* ARGSUSED */
3339 int
3340 zfs_unmount_snap(const char *name, void *arg)
3341 {
3342 	vfs_t *vfsp;
3343 	int err;
3344 
3345 	if (strchr(name, '@') == NULL)
3346 		return (0);
3347 
3348 	vfsp = zfs_get_vfs(name);
3349 	if (vfsp == NULL)
3350 		return (0);
3351 
3352 	if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
3353 		VFS_RELE(vfsp);
3354 		return (err);
3355 	}
3356 	VFS_RELE(vfsp);
3357 
3358 	/*
3359 	 * Always force the unmount for snapshots.
3360 	 */
3361 	return (dounmount(vfsp, MS_FORCE, kcred));
3362 }
3363 
3364 /*
3365  * innvl: {
3366  *     "snaps" -> { snapshot1, snapshot2 }
3367  *     (optional boolean) "defer"
3368  * }
3369  *
3370  * outnvl: snapshot -> error code (int32)
3371  *
3372  */
3373 static int
3374 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3375 {
3376 	int poollen;
3377 	nvlist_t *snaps;
3378 	nvpair_t *pair;
3379 	boolean_t defer;
3380 
3381 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3382 		return (EINVAL);
3383 	defer = nvlist_exists(innvl, "defer");
3384 
3385 	poollen = strlen(poolname);
3386 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3387 	    pair = nvlist_next_nvpair(snaps, pair)) {
3388 		const char *name = nvpair_name(pair);
3389 
3390 		/*
3391 		 * The snap must be in the specified pool.
3392 		 */
3393 		if (strncmp(name, poolname, poollen) != 0 ||
3394 		    (name[poollen] != '/' && name[poollen] != '@'))
3395 			return (EXDEV);
3396 
3397 		/*
3398 		 * Ignore failures to unmount; dmu_snapshots_destroy_nvl()
3399 		 * will deal with this gracefully (by filling in outnvl).
3400 		 */
3401 		(void) zfs_unmount_snap(name, NULL);
3402 	}
3403 
3404 	return (dmu_snapshots_destroy_nvl(snaps, defer, outnvl));
3405 }
3406 
3407 /*
3408  * inputs:
3409  * zc_name		name of dataset to destroy
3410  * zc_objset_type	type of objset
3411  * zc_defer_destroy	mark for deferred destroy
3412  *
3413  * outputs:		none
3414  */
3415 static int
3416 zfs_ioc_destroy(zfs_cmd_t *zc)
3417 {
3418 	int err;
3419 	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
3420 		err = zfs_unmount_snap(zc->zc_name, NULL);
3421 		if (err)
3422 			return (err);
3423 	}
3424 
3425 	err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy);
3426 	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
3427 		(void) zvol_remove_minor(zc->zc_name);
3428 	return (err);
3429 }
3430 
3431 /*
3432  * inputs:
3433  * zc_name	name of dataset to rollback (to most recent snapshot)
3434  *
3435  * outputs:	none
3436  */
3437 static int
3438 zfs_ioc_rollback(zfs_cmd_t *zc)
3439 {
3440 	dsl_dataset_t *ds, *clone;
3441 	int error;
3442 	zfsvfs_t *zfsvfs;
3443 	char *clone_name;
3444 
3445 	error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
3446 	if (error)
3447 		return (error);
3448 
3449 	/* must not be a snapshot */
3450 	if (dsl_dataset_is_snapshot(ds)) {
3451 		dsl_dataset_rele(ds, FTAG);
3452 		return (EINVAL);
3453 	}
3454 
3455 	/* must have a most recent snapshot */
3456 	if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
3457 		dsl_dataset_rele(ds, FTAG);
3458 		return (EINVAL);
3459 	}
3460 
3461 	/*
3462 	 * Create clone of most recent snapshot.
3463 	 */
3464 	clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
3465 	error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
3466 	if (error)
3467 		goto out;
3468 
3469 	error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
3470 	if (error)
3471 		goto out;
3472 
3473 	/*
3474 	 * Do clone swap.
3475 	 */
3476 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
3477 		error = zfs_suspend_fs(zfsvfs);
3478 		if (error == 0) {
3479 			int resume_err;
3480 
3481 			if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3482 				error = dsl_dataset_clone_swap(clone, ds,
3483 				    B_TRUE);
3484 				dsl_dataset_disown(ds, FTAG);
3485 				ds = NULL;
3486 			} else {
3487 				error = EBUSY;
3488 			}
3489 			resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
3490 			error = error ? error : resume_err;
3491 		}
3492 		VFS_RELE(zfsvfs->z_vfs);
3493 	} else {
3494 		if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3495 			error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
3496 			dsl_dataset_disown(ds, FTAG);
3497 			ds = NULL;
3498 		} else {
3499 			error = EBUSY;
3500 		}
3501 	}
3502 
3503 	/*
3504 	 * Destroy clone (which also closes it).
3505 	 */
3506 	(void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
3507 
3508 out:
3509 	strfree(clone_name);
3510 	if (ds)
3511 		dsl_dataset_rele(ds, FTAG);
3512 	return (error);
3513 }
3514 
3515 /*
3516  * inputs:
3517  * zc_name	old name of dataset
3518  * zc_value	new name of dataset
3519  * zc_cookie	recursive flag (only valid for snapshots)
3520  *
3521  * outputs:	none
3522  */
3523 static int
3524 zfs_ioc_rename(zfs_cmd_t *zc)
3525 {
3526 	boolean_t recursive = zc->zc_cookie & 1;
3527 
3528 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3529 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3530 	    strchr(zc->zc_value, '%'))
3531 		return (EINVAL);
3532 
3533 	/*
3534 	 * Unmount snapshot unless we're doing a recursive rename,
3535 	 * in which case the dataset code figures out which snapshots
3536 	 * to unmount.
3537 	 */
3538 	if (!recursive && strchr(zc->zc_name, '@') != NULL &&
3539 	    zc->zc_objset_type == DMU_OST_ZFS) {
3540 		int err = zfs_unmount_snap(zc->zc_name, NULL);
3541 		if (err)
3542 			return (err);
3543 	}
3544 	if (zc->zc_objset_type == DMU_OST_ZVOL)
3545 		(void) zvol_remove_minor(zc->zc_name);
3546 	return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
3547 }
3548 
3549 static int
3550 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3551 {
3552 	const char *propname = nvpair_name(pair);
3553 	boolean_t issnap = (strchr(dsname, '@') != NULL);
3554 	zfs_prop_t prop = zfs_name_to_prop(propname);
3555 	uint64_t intval;
3556 	int err;
3557 
3558 	if (prop == ZPROP_INVAL) {
3559 		if (zfs_prop_user(propname)) {
3560 			if (err = zfs_secpolicy_write_perms(dsname,
3561 			    ZFS_DELEG_PERM_USERPROP, cr))
3562 				return (err);
3563 			return (0);
3564 		}
3565 
3566 		if (!issnap && zfs_prop_userquota(propname)) {
3567 			const char *perm = NULL;
3568 			const char *uq_prefix =
3569 			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3570 			const char *gq_prefix =
3571 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3572 
3573 			if (strncmp(propname, uq_prefix,
3574 			    strlen(uq_prefix)) == 0) {
3575 				perm = ZFS_DELEG_PERM_USERQUOTA;
3576 			} else if (strncmp(propname, gq_prefix,
3577 			    strlen(gq_prefix)) == 0) {
3578 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
3579 			} else {
3580 				/* USERUSED and GROUPUSED are read-only */
3581 				return (EINVAL);
3582 			}
3583 
3584 			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3585 				return (err);
3586 			return (0);
3587 		}
3588 
3589 		return (EINVAL);
3590 	}
3591 
3592 	if (issnap)
3593 		return (EINVAL);
3594 
3595 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3596 		/*
3597 		 * dsl_prop_get_all_impl() returns properties in this
3598 		 * format.
3599 		 */
3600 		nvlist_t *attrs;
3601 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3602 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3603 		    &pair) == 0);
3604 	}
3605 
3606 	/*
3607 	 * Check that this value is valid for this pool version
3608 	 */
3609 	switch (prop) {
3610 	case ZFS_PROP_COMPRESSION:
3611 		/*
3612 		 * If the user specified gzip compression, make sure
3613 		 * the SPA supports it. We ignore any errors here since
3614 		 * we'll catch them later.
3615 		 */
3616 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3617 		    nvpair_value_uint64(pair, &intval) == 0) {
3618 			if (intval >= ZIO_COMPRESS_GZIP_1 &&
3619 			    intval <= ZIO_COMPRESS_GZIP_9 &&
3620 			    zfs_earlier_version(dsname,
3621 			    SPA_VERSION_GZIP_COMPRESSION)) {
3622 				return (ENOTSUP);
3623 			}
3624 
3625 			if (intval == ZIO_COMPRESS_ZLE &&
3626 			    zfs_earlier_version(dsname,
3627 			    SPA_VERSION_ZLE_COMPRESSION))
3628 				return (ENOTSUP);
3629 
3630 			/*
3631 			 * If this is a bootable dataset then
3632 			 * verify that the compression algorithm
3633 			 * is supported for booting. We must return
3634 			 * something other than ENOTSUP since it
3635 			 * implies a downrev pool version.
3636 			 */
3637 			if (zfs_is_bootfs(dsname) &&
3638 			    !BOOTFS_COMPRESS_VALID(intval)) {
3639 				return (ERANGE);
3640 			}
3641 		}
3642 		break;
3643 
3644 	case ZFS_PROP_COPIES:
3645 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3646 			return (ENOTSUP);
3647 		break;
3648 
3649 	case ZFS_PROP_DEDUP:
3650 		if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3651 			return (ENOTSUP);
3652 		break;
3653 
3654 	case ZFS_PROP_SHARESMB:
3655 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3656 			return (ENOTSUP);
3657 		break;
3658 
3659 	case ZFS_PROP_ACLINHERIT:
3660 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3661 		    nvpair_value_uint64(pair, &intval) == 0) {
3662 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
3663 			    zfs_earlier_version(dsname,
3664 			    SPA_VERSION_PASSTHROUGH_X))
3665 				return (ENOTSUP);
3666 		}
3667 		break;
3668 	}
3669 
3670 	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3671 }
3672 
3673 /*
3674  * Removes properties from the given props list that fail permission checks
3675  * needed to clear them and to restore them in case of a receive error. For each
3676  * property, make sure we have both set and inherit permissions.
3677  *
3678  * Returns the first error encountered if any permission checks fail. If the
3679  * caller provides a non-NULL errlist, it also gives the complete list of names
3680  * of all the properties that failed a permission check along with the
3681  * corresponding error numbers. The caller is responsible for freeing the
3682  * returned errlist.
3683  *
3684  * If every property checks out successfully, zero is returned and the list
3685  * pointed at by errlist is NULL.
3686  */
3687 static int
3688 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
3689 {
3690 	zfs_cmd_t *zc;
3691 	nvpair_t *pair, *next_pair;
3692 	nvlist_t *errors;
3693 	int err, rv = 0;
3694 
3695 	if (props == NULL)
3696 		return (0);
3697 
3698 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3699 
3700 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
3701 	(void) strcpy(zc->zc_name, dataset);
3702 	pair = nvlist_next_nvpair(props, NULL);
3703 	while (pair != NULL) {
3704 		next_pair = nvlist_next_nvpair(props, pair);
3705 
3706 		(void) strcpy(zc->zc_value, nvpair_name(pair));
3707 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
3708 		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
3709 			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
3710 			VERIFY(nvlist_add_int32(errors,
3711 			    zc->zc_value, err) == 0);
3712 		}
3713 		pair = next_pair;
3714 	}
3715 	kmem_free(zc, sizeof (zfs_cmd_t));
3716 
3717 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
3718 		nvlist_free(errors);
3719 		errors = NULL;
3720 	} else {
3721 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
3722 	}
3723 
3724 	if (errlist == NULL)
3725 		nvlist_free(errors);
3726 	else
3727 		*errlist = errors;
3728 
3729 	return (rv);
3730 }
3731 
3732 static boolean_t
3733 propval_equals(nvpair_t *p1, nvpair_t *p2)
3734 {
3735 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
3736 		/* dsl_prop_get_all_impl() format */
3737 		nvlist_t *attrs;
3738 		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
3739 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3740 		    &p1) == 0);
3741 	}
3742 
3743 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
3744 		nvlist_t *attrs;
3745 		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
3746 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3747 		    &p2) == 0);
3748 	}
3749 
3750 	if (nvpair_type(p1) != nvpair_type(p2))
3751 		return (B_FALSE);
3752 
3753 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
3754 		char *valstr1, *valstr2;
3755 
3756 		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
3757 		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
3758 		return (strcmp(valstr1, valstr2) == 0);
3759 	} else {
3760 		uint64_t intval1, intval2;
3761 
3762 		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
3763 		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
3764 		return (intval1 == intval2);
3765 	}
3766 }
3767 
3768 /*
3769  * Remove properties from props if they are not going to change (as determined
3770  * by comparison with origprops). Remove them from origprops as well, since we
3771  * do not need to clear or restore properties that won't change.
3772  */
3773 static void
3774 props_reduce(nvlist_t *props, nvlist_t *origprops)
3775 {
3776 	nvpair_t *pair, *next_pair;
3777 
3778 	if (origprops == NULL)
3779 		return; /* all props need to be received */
3780 
3781 	pair = nvlist_next_nvpair(props, NULL);
3782 	while (pair != NULL) {
3783 		const char *propname = nvpair_name(pair);
3784 		nvpair_t *match;
3785 
3786 		next_pair = nvlist_next_nvpair(props, pair);
3787 
3788 		if ((nvlist_lookup_nvpair(origprops, propname,
3789 		    &match) != 0) || !propval_equals(pair, match))
3790 			goto next; /* need to set received value */
3791 
3792 		/* don't clear the existing received value */
3793 		(void) nvlist_remove_nvpair(origprops, match);
3794 		/* don't bother receiving the property */
3795 		(void) nvlist_remove_nvpair(props, pair);
3796 next:
3797 		pair = next_pair;
3798 	}
3799 }
3800 
3801 #ifdef	DEBUG
3802 static boolean_t zfs_ioc_recv_inject_err;
3803 #endif
3804 
3805 /*
3806  * inputs:
3807  * zc_name		name of containing filesystem
3808  * zc_nvlist_src{_size}	nvlist of properties to apply
3809  * zc_value		name of snapshot to create
3810  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
3811  * zc_cookie		file descriptor to recv from
3812  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
3813  * zc_guid		force flag
3814  * zc_cleanup_fd	cleanup-on-exit file descriptor
3815  * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
3816  *
3817  * outputs:
3818  * zc_cookie		number of bytes read
3819  * zc_nvlist_dst{_size} error for each unapplied received property
3820  * zc_obj		zprop_errflags_t
3821  * zc_action_handle	handle for this guid/ds mapping
3822  */
3823 static int
3824 zfs_ioc_recv(zfs_cmd_t *zc)
3825 {
3826 	file_t *fp;
3827 	objset_t *os;
3828 	dmu_recv_cookie_t drc;
3829 	boolean_t force = (boolean_t)zc->zc_guid;
3830 	int fd;
3831 	int error = 0;
3832 	int props_error = 0;
3833 	nvlist_t *errors;
3834 	offset_t off;
3835 	nvlist_t *props = NULL; /* sent properties */
3836 	nvlist_t *origprops = NULL; /* existing properties */
3837 	objset_t *origin = NULL;
3838 	char *tosnap;
3839 	char tofs[ZFS_MAXNAMELEN];
3840 	boolean_t first_recvd_props = B_FALSE;
3841 
3842 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3843 	    strchr(zc->zc_value, '@') == NULL ||
3844 	    strchr(zc->zc_value, '%'))
3845 		return (EINVAL);
3846 
3847 	(void) strcpy(tofs, zc->zc_value);
3848 	tosnap = strchr(tofs, '@');
3849 	*tosnap++ = '\0';
3850 
3851 	if (zc->zc_nvlist_src != NULL &&
3852 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3853 	    zc->zc_iflags, &props)) != 0)
3854 		return (error);
3855 
3856 	fd = zc->zc_cookie;
3857 	fp = getf(fd);
3858 	if (fp == NULL) {
3859 		nvlist_free(props);
3860 		return (EBADF);
3861 	}
3862 
3863 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3864 
3865 	if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
3866 		if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) &&
3867 		    !dsl_prop_get_hasrecvd(os)) {
3868 			first_recvd_props = B_TRUE;
3869 		}
3870 
3871 		/*
3872 		 * If new received properties are supplied, they are to
3873 		 * completely replace the existing received properties, so stash
3874 		 * away the existing ones.
3875 		 */
3876 		if (dsl_prop_get_received(os, &origprops) == 0) {
3877 			nvlist_t *errlist = NULL;
3878 			/*
3879 			 * Don't bother writing a property if its value won't
3880 			 * change (and avoid the unnecessary security checks).
3881 			 *
3882 			 * The first receive after SPA_VERSION_RECVD_PROPS is a
3883 			 * special case where we blow away all local properties
3884 			 * regardless.
3885 			 */
3886 			if (!first_recvd_props)
3887 				props_reduce(props, origprops);
3888 			if (zfs_check_clearable(tofs, origprops,
3889 			    &errlist) != 0)
3890 				(void) nvlist_merge(errors, errlist, 0);
3891 			nvlist_free(errlist);
3892 		}
3893 
3894 		dmu_objset_rele(os, FTAG);
3895 	}
3896 
3897 	if (zc->zc_string[0]) {
3898 		error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
3899 		if (error)
3900 			goto out;
3901 	}
3902 
3903 	error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds,
3904 	    &zc->zc_begin_record, force, origin, &drc);
3905 	if (origin)
3906 		dmu_objset_rele(origin, FTAG);
3907 	if (error)
3908 		goto out;
3909 
3910 	/*
3911 	 * Set properties before we receive the stream so that they are applied
3912 	 * to the new data. Note that we must call dmu_recv_stream() if
3913 	 * dmu_recv_begin() succeeds.
3914 	 */
3915 	if (props) {
3916 		if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {
3917 			if (drc.drc_newfs) {
3918 				if (spa_version(os->os_spa) >=
3919 				    SPA_VERSION_RECVD_PROPS)
3920 					first_recvd_props = B_TRUE;
3921 			} else if (origprops != NULL) {
3922 				if (clear_received_props(os, tofs, origprops,
3923 				    first_recvd_props ? NULL : props) != 0)
3924 					zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3925 			} else {
3926 				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3927 			}
3928 			dsl_prop_set_hasrecvd(os);
3929 		} else if (!drc.drc_newfs) {
3930 			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3931 		}
3932 
3933 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
3934 		    props, errors);
3935 	}
3936 
3937 	if (zc->zc_nvlist_dst_size != 0 &&
3938 	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
3939 	    put_nvlist(zc, errors) != 0)) {
3940 		/*
3941 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
3942 		 * size or supplied an invalid address.
3943 		 */
3944 		props_error = EINVAL;
3945 	}
3946 
3947 	off = fp->f_offset;
3948 	error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
3949 	    &zc->zc_action_handle);
3950 
3951 	if (error == 0) {
3952 		zfsvfs_t *zfsvfs = NULL;
3953 
3954 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
3955 			/* online recv */
3956 			int end_err;
3957 
3958 			error = zfs_suspend_fs(zfsvfs);
3959 			/*
3960 			 * If the suspend fails, then the recv_end will
3961 			 * likely also fail, and clean up after itself.
3962 			 */
3963 			end_err = dmu_recv_end(&drc);
3964 			if (error == 0)
3965 				error = zfs_resume_fs(zfsvfs, tofs);
3966 			error = error ? error : end_err;
3967 			VFS_RELE(zfsvfs->z_vfs);
3968 		} else {
3969 			error = dmu_recv_end(&drc);
3970 		}
3971 	}
3972 
3973 	zc->zc_cookie = off - fp->f_offset;
3974 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3975 		fp->f_offset = off;
3976 
3977 #ifdef	DEBUG
3978 	if (zfs_ioc_recv_inject_err) {
3979 		zfs_ioc_recv_inject_err = B_FALSE;
3980 		error = 1;
3981 	}
3982 #endif
3983 	/*
3984 	 * On error, restore the original props.
3985 	 */
3986 	if (error && props) {
3987 		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
3988 			if (clear_received_props(os, tofs, props, NULL) != 0) {
3989 				/*
3990 				 * We failed to clear the received properties.
3991 				 * Since we may have left a $recvd value on the
3992 				 * system, we can't clear the $hasrecvd flag.
3993 				 */
3994 				zc->zc_obj |= ZPROP_ERR_NORESTORE;
3995 			} else if (first_recvd_props) {
3996 				dsl_prop_unset_hasrecvd(os);
3997 			}
3998 			dmu_objset_rele(os, FTAG);
3999 		} else if (!drc.drc_newfs) {
4000 			/* We failed to clear the received properties. */
4001 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4002 		}
4003 
4004 		if (origprops == NULL && !drc.drc_newfs) {
4005 			/* We failed to stash the original properties. */
4006 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4007 		}
4008 
4009 		/*
4010 		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
4011 		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4012 		 * explictly if we're restoring local properties cleared in the
4013 		 * first new-style receive.
4014 		 */
4015 		if (origprops != NULL &&
4016 		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4017 		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4018 		    origprops, NULL) != 0) {
4019 			/*
4020 			 * We stashed the original properties but failed to
4021 			 * restore them.
4022 			 */
4023 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4024 		}
4025 	}
4026 out:
4027 	nvlist_free(props);
4028 	nvlist_free(origprops);
4029 	nvlist_free(errors);
4030 	releasef(fd);
4031 
4032 	if (error == 0)
4033 		error = props_error;
4034 
4035 	return (error);
4036 }
4037 
4038 /*
4039  * inputs:
4040  * zc_name	name of snapshot to send
4041  * zc_cookie	file descriptor to send stream to
4042  * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
4043  * zc_sendobj	objsetid of snapshot to send
4044  * zc_fromobj	objsetid of incremental fromsnap (may be zero)
4045  * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
4046  *		output size in zc_objset_type.
4047  *
4048  * outputs: none
4049  */
4050 static int
4051 zfs_ioc_send(zfs_cmd_t *zc)
4052 {
4053 	objset_t *fromsnap = NULL;
4054 	objset_t *tosnap;
4055 	int error;
4056 	offset_t off;
4057 	dsl_dataset_t *ds;
4058 	dsl_dataset_t *dsfrom = NULL;
4059 	spa_t *spa;
4060 	dsl_pool_t *dp;
4061 	boolean_t estimate = (zc->zc_guid != 0);
4062 
4063 	error = spa_open(zc->zc_name, &spa, FTAG);
4064 	if (error)
4065 		return (error);
4066 
4067 	dp = spa_get_dsl(spa);
4068 	rw_enter(&dp->dp_config_rwlock, RW_READER);
4069 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
4070 	rw_exit(&dp->dp_config_rwlock);
4071 	spa_close(spa, FTAG);
4072 	if (error)
4073 		return (error);
4074 
4075 	error = dmu_objset_from_ds(ds, &tosnap);
4076 	if (error) {
4077 		dsl_dataset_rele(ds, FTAG);
4078 		return (error);
4079 	}
4080 
4081 	if (zc->zc_fromobj != 0) {
4082 		rw_enter(&dp->dp_config_rwlock, RW_READER);
4083 		error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom);
4084 		rw_exit(&dp->dp_config_rwlock);
4085 		if (error) {
4086 			dsl_dataset_rele(ds, FTAG);
4087 			return (error);
4088 		}
4089 		error = dmu_objset_from_ds(dsfrom, &fromsnap);
4090 		if (error) {
4091 			dsl_dataset_rele(dsfrom, FTAG);
4092 			dsl_dataset_rele(ds, FTAG);
4093 			return (error);
4094 		}
4095 	}
4096 
4097 	if (zc->zc_obj) {
4098 		dsl_pool_t *dp = ds->ds_dir->dd_pool;
4099 
4100 		if (fromsnap != NULL) {
4101 			dsl_dataset_rele(dsfrom, FTAG);
4102 			dsl_dataset_rele(ds, FTAG);
4103 			return (EINVAL);
4104 		}
4105 
4106 		if (dsl_dir_is_clone(ds->ds_dir)) {
4107 			rw_enter(&dp->dp_config_rwlock, RW_READER);
4108 			error = dsl_dataset_hold_obj(dp,
4109 			    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &dsfrom);
4110 			rw_exit(&dp->dp_config_rwlock);
4111 			if (error) {
4112 				dsl_dataset_rele(ds, FTAG);
4113 				return (error);
4114 			}
4115 			error = dmu_objset_from_ds(dsfrom, &fromsnap);
4116 			if (error) {
4117 				dsl_dataset_rele(dsfrom, FTAG);
4118 				dsl_dataset_rele(ds, FTAG);
4119 				return (error);
4120 			}
4121 		}
4122 	}
4123 
4124 	if (estimate) {
4125 		error = dmu_send_estimate(tosnap, fromsnap,
4126 		    &zc->zc_objset_type);
4127 	} else {
4128 		file_t *fp = getf(zc->zc_cookie);
4129 		if (fp == NULL) {
4130 			dsl_dataset_rele(ds, FTAG);
4131 			if (dsfrom)
4132 				dsl_dataset_rele(dsfrom, FTAG);
4133 			return (EBADF);
4134 		}
4135 
4136 		off = fp->f_offset;
4137 		error = dmu_send(tosnap, fromsnap,
4138 		    zc->zc_cookie, fp->f_vnode, &off);
4139 
4140 		if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4141 			fp->f_offset = off;
4142 		releasef(zc->zc_cookie);
4143 	}
4144 	if (dsfrom)
4145 		dsl_dataset_rele(dsfrom, FTAG);
4146 	dsl_dataset_rele(ds, FTAG);
4147 	return (error);
4148 }
4149 
4150 /*
4151  * inputs:
4152  * zc_name	name of snapshot on which to report progress
4153  * zc_cookie	file descriptor of send stream
4154  *
4155  * outputs:
4156  * zc_cookie	number of bytes written in send stream thus far
4157  */
4158 static int
4159 zfs_ioc_send_progress(zfs_cmd_t *zc)
4160 {
4161 	dsl_dataset_t *ds;
4162 	dmu_sendarg_t *dsp = NULL;
4163 	int error;
4164 
4165 	if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0)
4166 		return (error);
4167 
4168 	mutex_enter(&ds->ds_sendstream_lock);
4169 
4170 	/*
4171 	 * Iterate over all the send streams currently active on this dataset.
4172 	 * If there's one which matches the specified file descriptor _and_ the
4173 	 * stream was started by the current process, return the progress of
4174 	 * that stream.
4175 	 */
4176 	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
4177 	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
4178 		if (dsp->dsa_outfd == zc->zc_cookie &&
4179 		    dsp->dsa_proc == curproc)
4180 			break;
4181 	}
4182 
4183 	if (dsp != NULL)
4184 		zc->zc_cookie = *(dsp->dsa_off);
4185 	else
4186 		error = ENOENT;
4187 
4188 	mutex_exit(&ds->ds_sendstream_lock);
4189 	dsl_dataset_rele(ds, FTAG);
4190 	return (error);
4191 }
4192 
4193 static int
4194 zfs_ioc_inject_fault(zfs_cmd_t *zc)
4195 {
4196 	int id, error;
4197 
4198 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
4199 	    &zc->zc_inject_record);
4200 
4201 	if (error == 0)
4202 		zc->zc_guid = (uint64_t)id;
4203 
4204 	return (error);
4205 }
4206 
4207 static int
4208 zfs_ioc_clear_fault(zfs_cmd_t *zc)
4209 {
4210 	return (zio_clear_fault((int)zc->zc_guid));
4211 }
4212 
4213 static int
4214 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
4215 {
4216 	int id = (int)zc->zc_guid;
4217 	int error;
4218 
4219 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
4220 	    &zc->zc_inject_record);
4221 
4222 	zc->zc_guid = id;
4223 
4224 	return (error);
4225 }
4226 
4227 static int
4228 zfs_ioc_error_log(zfs_cmd_t *zc)
4229 {
4230 	spa_t *spa;
4231 	int error;
4232 	size_t count = (size_t)zc->zc_nvlist_dst_size;
4233 
4234 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4235 		return (error);
4236 
4237 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
4238 	    &count);
4239 	if (error == 0)
4240 		zc->zc_nvlist_dst_size = count;
4241 	else
4242 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
4243 
4244 	spa_close(spa, FTAG);
4245 
4246 	return (error);
4247 }
4248 
4249 static int
4250 zfs_ioc_clear(zfs_cmd_t *zc)
4251 {
4252 	spa_t *spa;
4253 	vdev_t *vd;
4254 	int error;
4255 
4256 	/*
4257 	 * On zpool clear we also fix up missing slogs
4258 	 */
4259 	mutex_enter(&spa_namespace_lock);
4260 	spa = spa_lookup(zc->zc_name);
4261 	if (spa == NULL) {
4262 		mutex_exit(&spa_namespace_lock);
4263 		return (EIO);
4264 	}
4265 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4266 		/* we need to let spa_open/spa_load clear the chains */
4267 		spa_set_log_state(spa, SPA_LOG_CLEAR);
4268 	}
4269 	spa->spa_last_open_failed = 0;
4270 	mutex_exit(&spa_namespace_lock);
4271 
4272 	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4273 		error = spa_open(zc->zc_name, &spa, FTAG);
4274 	} else {
4275 		nvlist_t *policy;
4276 		nvlist_t *config = NULL;
4277 
4278 		if (zc->zc_nvlist_src == NULL)
4279 			return (EINVAL);
4280 
4281 		if ((error = get_nvlist(zc->zc_nvlist_src,
4282 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4283 			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4284 			    policy, &config);
4285 			if (config != NULL) {
4286 				int err;
4287 
4288 				if ((err = put_nvlist(zc, config)) != 0)
4289 					error = err;
4290 				nvlist_free(config);
4291 			}
4292 			nvlist_free(policy);
4293 		}
4294 	}
4295 
4296 	if (error)
4297 		return (error);
4298 
4299 	spa_vdev_state_enter(spa, SCL_NONE);
4300 
4301 	if (zc->zc_guid == 0) {
4302 		vd = NULL;
4303 	} else {
4304 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4305 		if (vd == NULL) {
4306 			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
4307 			spa_close(spa, FTAG);
4308 			return (ENODEV);
4309 		}
4310 	}
4311 
4312 	vdev_clear(spa, vd);
4313 
4314 	(void) spa_vdev_state_exit(spa, NULL, 0);
4315 
4316 	/*
4317 	 * Resume any suspended I/Os.
4318 	 */
4319 	if (zio_resume(spa) != 0)
4320 		error = EIO;
4321 
4322 	spa_close(spa, FTAG);
4323 
4324 	return (error);
4325 }
4326 
4327 static int
4328 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
4329 {
4330 	spa_t *spa;
4331 	int error;
4332 
4333 	error = spa_open(zc->zc_name, &spa, FTAG);
4334 	if (error)
4335 		return (error);
4336 
4337 	spa_vdev_state_enter(spa, SCL_NONE);
4338 
4339 	/*
4340 	 * If a resilver is already in progress then set the
4341 	 * spa_scrub_reopen flag to B_TRUE so that we don't restart
4342 	 * the scan as a side effect of the reopen. Otherwise, let
4343 	 * vdev_open() decided if a resilver is required.
4344 	 */
4345 	spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
4346 	vdev_reopen(spa->spa_root_vdev);
4347 	spa->spa_scrub_reopen = B_FALSE;
4348 
4349 	(void) spa_vdev_state_exit(spa, NULL, 0);
4350 	spa_close(spa, FTAG);
4351 	return (0);
4352 }
4353 /*
4354  * inputs:
4355  * zc_name	name of filesystem
4356  * zc_value	name of origin snapshot
4357  *
4358  * outputs:
4359  * zc_string	name of conflicting snapshot, if there is one
4360  */
4361 static int
4362 zfs_ioc_promote(zfs_cmd_t *zc)
4363 {
4364 	char *cp;
4365 
4366 	/*
4367 	 * We don't need to unmount *all* the origin fs's snapshots, but
4368 	 * it's easier.
4369 	 */
4370 	cp = strchr(zc->zc_value, '@');
4371 	if (cp)
4372 		*cp = '\0';
4373 	(void) dmu_objset_find(zc->zc_value,
4374 	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
4375 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
4376 }
4377 
4378 /*
4379  * Retrieve a single {user|group}{used|quota}@... property.
4380  *
4381  * inputs:
4382  * zc_name	name of filesystem
4383  * zc_objset_type zfs_userquota_prop_t
4384  * zc_value	domain name (eg. "S-1-234-567-89")
4385  * zc_guid	RID/UID/GID
4386  *
4387  * outputs:
4388  * zc_cookie	property value
4389  */
4390 static int
4391 zfs_ioc_userspace_one(zfs_cmd_t *zc)
4392 {
4393 	zfsvfs_t *zfsvfs;
4394 	int error;
4395 
4396 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
4397 		return (EINVAL);
4398 
4399 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4400 	if (error)
4401 		return (error);
4402 
4403 	error = zfs_userspace_one(zfsvfs,
4404 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
4405 	zfsvfs_rele(zfsvfs, FTAG);
4406 
4407 	return (error);
4408 }
4409 
4410 /*
4411  * inputs:
4412  * zc_name		name of filesystem
4413  * zc_cookie		zap cursor
4414  * zc_objset_type	zfs_userquota_prop_t
4415  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
4416  *
4417  * outputs:
4418  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
4419  * zc_cookie	zap cursor
4420  */
4421 static int
4422 zfs_ioc_userspace_many(zfs_cmd_t *zc)
4423 {
4424 	zfsvfs_t *zfsvfs;
4425 	int bufsize = zc->zc_nvlist_dst_size;
4426 
4427 	if (bufsize <= 0)
4428 		return (ENOMEM);
4429 
4430 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4431 	if (error)
4432 		return (error);
4433 
4434 	void *buf = kmem_alloc(bufsize, KM_SLEEP);
4435 
4436 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
4437 	    buf, &zc->zc_nvlist_dst_size);
4438 
4439 	if (error == 0) {
4440 		error = xcopyout(buf,
4441 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
4442 		    zc->zc_nvlist_dst_size);
4443 	}
4444 	kmem_free(buf, bufsize);
4445 	zfsvfs_rele(zfsvfs, FTAG);
4446 
4447 	return (error);
4448 }
4449 
4450 /*
4451  * inputs:
4452  * zc_name		name of filesystem
4453  *
4454  * outputs:
4455  * none
4456  */
4457 static int
4458 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
4459 {
4460 	objset_t *os;
4461 	int error = 0;
4462 	zfsvfs_t *zfsvfs;
4463 
4464 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
4465 		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
4466 			/*
4467 			 * If userused is not enabled, it may be because the
4468 			 * objset needs to be closed & reopened (to grow the
4469 			 * objset_phys_t).  Suspend/resume the fs will do that.
4470 			 */
4471 			error = zfs_suspend_fs(zfsvfs);
4472 			if (error == 0)
4473 				error = zfs_resume_fs(zfsvfs, zc->zc_name);
4474 		}
4475 		if (error == 0)
4476 			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
4477 		VFS_RELE(zfsvfs->z_vfs);
4478 	} else {
4479 		/* XXX kind of reading contents without owning */
4480 		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4481 		if (error)
4482 			return (error);
4483 
4484 		error = dmu_objset_userspace_upgrade(os);
4485 		dmu_objset_rele(os, FTAG);
4486 	}
4487 
4488 	return (error);
4489 }
4490 
4491 /*
4492  * We don't want to have a hard dependency
4493  * against some special symbols in sharefs
4494  * nfs, and smbsrv.  Determine them if needed when
4495  * the first file system is shared.
4496  * Neither sharefs, nfs or smbsrv are unloadable modules.
4497  */
4498 int (*znfsexport_fs)(void *arg);
4499 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
4500 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
4501 
4502 int zfs_nfsshare_inited;
4503 int zfs_smbshare_inited;
4504 
4505 ddi_modhandle_t nfs_mod;
4506 ddi_modhandle_t sharefs_mod;
4507 ddi_modhandle_t smbsrv_mod;
4508 kmutex_t zfs_share_lock;
4509 
4510 static int
4511 zfs_init_sharefs()
4512 {
4513 	int error;
4514 
4515 	ASSERT(MUTEX_HELD(&zfs_share_lock));
4516 	/* Both NFS and SMB shares also require sharetab support. */
4517 	if (sharefs_mod == NULL && ((sharefs_mod =
4518 	    ddi_modopen("fs/sharefs",
4519 	    KRTLD_MODE_FIRST, &error)) == NULL)) {
4520 		return (ENOSYS);
4521 	}
4522 	if (zshare_fs == NULL && ((zshare_fs =
4523 	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
4524 	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
4525 		return (ENOSYS);
4526 	}
4527 	return (0);
4528 }
4529 
4530 static int
4531 zfs_ioc_share(zfs_cmd_t *zc)
4532 {
4533 	int error;
4534 	int opcode;
4535 
4536 	switch (zc->zc_share.z_sharetype) {
4537 	case ZFS_SHARE_NFS:
4538 	case ZFS_UNSHARE_NFS:
4539 		if (zfs_nfsshare_inited == 0) {
4540 			mutex_enter(&zfs_share_lock);
4541 			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
4542 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
4543 				mutex_exit(&zfs_share_lock);
4544 				return (ENOSYS);
4545 			}
4546 			if (znfsexport_fs == NULL &&
4547 			    ((znfsexport_fs = (int (*)(void *))
4548 			    ddi_modsym(nfs_mod,
4549 			    "nfs_export", &error)) == NULL)) {
4550 				mutex_exit(&zfs_share_lock);
4551 				return (ENOSYS);
4552 			}
4553 			error = zfs_init_sharefs();
4554 			if (error) {
4555 				mutex_exit(&zfs_share_lock);
4556 				return (ENOSYS);
4557 			}
4558 			zfs_nfsshare_inited = 1;
4559 			mutex_exit(&zfs_share_lock);
4560 		}
4561 		break;
4562 	case ZFS_SHARE_SMB:
4563 	case ZFS_UNSHARE_SMB:
4564 		if (zfs_smbshare_inited == 0) {
4565 			mutex_enter(&zfs_share_lock);
4566 			if (smbsrv_mod == NULL && ((smbsrv_mod =
4567 			    ddi_modopen("drv/smbsrv",
4568 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
4569 				mutex_exit(&zfs_share_lock);
4570 				return (ENOSYS);
4571 			}
4572 			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
4573 			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
4574 			    "smb_server_share", &error)) == NULL)) {
4575 				mutex_exit(&zfs_share_lock);
4576 				return (ENOSYS);
4577 			}
4578 			error = zfs_init_sharefs();
4579 			if (error) {
4580 				mutex_exit(&zfs_share_lock);
4581 				return (ENOSYS);
4582 			}
4583 			zfs_smbshare_inited = 1;
4584 			mutex_exit(&zfs_share_lock);
4585 		}
4586 		break;
4587 	default:
4588 		return (EINVAL);
4589 	}
4590 
4591 	switch (zc->zc_share.z_sharetype) {
4592 	case ZFS_SHARE_NFS:
4593 	case ZFS_UNSHARE_NFS:
4594 		if (error =
4595 		    znfsexport_fs((void *)
4596 		    (uintptr_t)zc->zc_share.z_exportdata))
4597 			return (error);
4598 		break;
4599 	case ZFS_SHARE_SMB:
4600 	case ZFS_UNSHARE_SMB:
4601 		if (error = zsmbexport_fs((void *)
4602 		    (uintptr_t)zc->zc_share.z_exportdata,
4603 		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
4604 		    B_TRUE: B_FALSE)) {
4605 			return (error);
4606 		}
4607 		break;
4608 	}
4609 
4610 	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
4611 	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
4612 	    SHAREFS_ADD : SHAREFS_REMOVE;
4613 
4614 	/*
4615 	 * Add or remove share from sharetab
4616 	 */
4617 	error = zshare_fs(opcode,
4618 	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
4619 	    zc->zc_share.z_sharemax);
4620 
4621 	return (error);
4622 
4623 }
4624 
4625 ace_t full_access[] = {
4626 	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
4627 };
4628 
4629 /*
4630  * inputs:
4631  * zc_name		name of containing filesystem
4632  * zc_obj		object # beyond which we want next in-use object #
4633  *
4634  * outputs:
4635  * zc_obj		next in-use object #
4636  */
4637 static int
4638 zfs_ioc_next_obj(zfs_cmd_t *zc)
4639 {
4640 	objset_t *os = NULL;
4641 	int error;
4642 
4643 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4644 	if (error)
4645 		return (error);
4646 
4647 	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
4648 	    os->os_dsl_dataset->ds_phys->ds_prev_snap_txg);
4649 
4650 	dmu_objset_rele(os, FTAG);
4651 	return (error);
4652 }
4653 
4654 /*
4655  * inputs:
4656  * zc_name		name of filesystem
4657  * zc_value		prefix name for snapshot
4658  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
4659  *
4660  * outputs:
4661  * zc_value		short name of new snapshot
4662  */
4663 static int
4664 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
4665 {
4666 	char *snap_name;
4667 	int error;
4668 
4669 	snap_name = kmem_asprintf("%s@%s-%016llx", zc->zc_name, zc->zc_value,
4670 	    (u_longlong_t)ddi_get_lbolt64());
4671 
4672 	if (strlen(snap_name) >= MAXPATHLEN) {
4673 		strfree(snap_name);
4674 		return (E2BIG);
4675 	}
4676 
4677 	error = dmu_objset_snapshot_tmp(snap_name, "%temp", zc->zc_cleanup_fd);
4678 	if (error != 0) {
4679 		strfree(snap_name);
4680 		return (error);
4681 	}
4682 
4683 	(void) strcpy(zc->zc_value, strchr(snap_name, '@') + 1);
4684 	strfree(snap_name);
4685 	return (0);
4686 }
4687 
4688 /*
4689  * inputs:
4690  * zc_name		name of "to" snapshot
4691  * zc_value		name of "from" snapshot
4692  * zc_cookie		file descriptor to write diff data on
4693  *
4694  * outputs:
4695  * dmu_diff_record_t's to the file descriptor
4696  */
4697 static int
4698 zfs_ioc_diff(zfs_cmd_t *zc)
4699 {
4700 	objset_t *fromsnap;
4701 	objset_t *tosnap;
4702 	file_t *fp;
4703 	offset_t off;
4704 	int error;
4705 
4706 	error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
4707 	if (error)
4708 		return (error);
4709 
4710 	error = dmu_objset_hold(zc->zc_value, FTAG, &fromsnap);
4711 	if (error) {
4712 		dmu_objset_rele(tosnap, FTAG);
4713 		return (error);
4714 	}
4715 
4716 	fp = getf(zc->zc_cookie);
4717 	if (fp == NULL) {
4718 		dmu_objset_rele(fromsnap, FTAG);
4719 		dmu_objset_rele(tosnap, FTAG);
4720 		return (EBADF);
4721 	}
4722 
4723 	off = fp->f_offset;
4724 
4725 	error = dmu_diff(tosnap, fromsnap, fp->f_vnode, &off);
4726 
4727 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4728 		fp->f_offset = off;
4729 	releasef(zc->zc_cookie);
4730 
4731 	dmu_objset_rele(fromsnap, FTAG);
4732 	dmu_objset_rele(tosnap, FTAG);
4733 	return (error);
4734 }
4735 
4736 /*
4737  * Remove all ACL files in shares dir
4738  */
4739 static int
4740 zfs_smb_acl_purge(znode_t *dzp)
4741 {
4742 	zap_cursor_t	zc;
4743 	zap_attribute_t	zap;
4744 	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
4745 	int error;
4746 
4747 	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
4748 	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
4749 	    zap_cursor_advance(&zc)) {
4750 		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
4751 		    NULL, 0)) != 0)
4752 			break;
4753 	}
4754 	zap_cursor_fini(&zc);
4755 	return (error);
4756 }
4757 
4758 static int
4759 zfs_ioc_smb_acl(zfs_cmd_t *zc)
4760 {
4761 	vnode_t *vp;
4762 	znode_t *dzp;
4763 	vnode_t *resourcevp = NULL;
4764 	znode_t *sharedir;
4765 	zfsvfs_t *zfsvfs;
4766 	nvlist_t *nvlist;
4767 	char *src, *target;
4768 	vattr_t vattr;
4769 	vsecattr_t vsec;
4770 	int error = 0;
4771 
4772 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
4773 	    NO_FOLLOW, NULL, &vp)) != 0)
4774 		return (error);
4775 
4776 	/* Now make sure mntpnt and dataset are ZFS */
4777 
4778 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
4779 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
4780 	    zc->zc_name) != 0)) {
4781 		VN_RELE(vp);
4782 		return (EINVAL);
4783 	}
4784 
4785 	dzp = VTOZ(vp);
4786 	zfsvfs = dzp->z_zfsvfs;
4787 	ZFS_ENTER(zfsvfs);
4788 
4789 	/*
4790 	 * Create share dir if its missing.
4791 	 */
4792 	mutex_enter(&zfsvfs->z_lock);
4793 	if (zfsvfs->z_shares_dir == 0) {
4794 		dmu_tx_t *tx;
4795 
4796 		tx = dmu_tx_create(zfsvfs->z_os);
4797 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
4798 		    ZFS_SHARES_DIR);
4799 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
4800 		error = dmu_tx_assign(tx, TXG_WAIT);
4801 		if (error) {
4802 			dmu_tx_abort(tx);
4803 		} else {
4804 			error = zfs_create_share_dir(zfsvfs, tx);
4805 			dmu_tx_commit(tx);
4806 		}
4807 		if (error) {
4808 			mutex_exit(&zfsvfs->z_lock);
4809 			VN_RELE(vp);
4810 			ZFS_EXIT(zfsvfs);
4811 			return (error);
4812 		}
4813 	}
4814 	mutex_exit(&zfsvfs->z_lock);
4815 
4816 	ASSERT(zfsvfs->z_shares_dir);
4817 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
4818 		VN_RELE(vp);
4819 		ZFS_EXIT(zfsvfs);
4820 		return (error);
4821 	}
4822 
4823 	switch (zc->zc_cookie) {
4824 	case ZFS_SMB_ACL_ADD:
4825 		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
4826 		vattr.va_type = VREG;
4827 		vattr.va_mode = S_IFREG|0777;
4828 		vattr.va_uid = 0;
4829 		vattr.va_gid = 0;
4830 
4831 		vsec.vsa_mask = VSA_ACE;
4832 		vsec.vsa_aclentp = &full_access;
4833 		vsec.vsa_aclentsz = sizeof (full_access);
4834 		vsec.vsa_aclcnt = 1;
4835 
4836 		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
4837 		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
4838 		if (resourcevp)
4839 			VN_RELE(resourcevp);
4840 		break;
4841 
4842 	case ZFS_SMB_ACL_REMOVE:
4843 		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
4844 		    NULL, 0);
4845 		break;
4846 
4847 	case ZFS_SMB_ACL_RENAME:
4848 		if ((error = get_nvlist(zc->zc_nvlist_src,
4849 		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
4850 			VN_RELE(vp);
4851 			ZFS_EXIT(zfsvfs);
4852 			return (error);
4853 		}
4854 		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
4855 		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
4856 		    &target)) {
4857 			VN_RELE(vp);
4858 			VN_RELE(ZTOV(sharedir));
4859 			ZFS_EXIT(zfsvfs);
4860 			nvlist_free(nvlist);
4861 			return (error);
4862 		}
4863 		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
4864 		    kcred, NULL, 0);
4865 		nvlist_free(nvlist);
4866 		break;
4867 
4868 	case ZFS_SMB_ACL_PURGE:
4869 		error = zfs_smb_acl_purge(sharedir);
4870 		break;
4871 
4872 	default:
4873 		error = EINVAL;
4874 		break;
4875 	}
4876 
4877 	VN_RELE(vp);
4878 	VN_RELE(ZTOV(sharedir));
4879 
4880 	ZFS_EXIT(zfsvfs);
4881 
4882 	return (error);
4883 }
4884 
4885 /*
4886  * inputs:
4887  * zc_name		name of filesystem
4888  * zc_value		short name of snap
4889  * zc_string		user-supplied tag for this hold
4890  * zc_cookie		recursive flag
4891  * zc_temphold		set if hold is temporary
4892  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
4893  * zc_sendobj		if non-zero, the objid for zc_name@zc_value
4894  * zc_createtxg		if zc_sendobj is non-zero, snap must have zc_createtxg
4895  *
4896  * outputs:		none
4897  */
4898 static int
4899 zfs_ioc_hold(zfs_cmd_t *zc)
4900 {
4901 	boolean_t recursive = zc->zc_cookie;
4902 	spa_t *spa;
4903 	dsl_pool_t *dp;
4904 	dsl_dataset_t *ds;
4905 	int error;
4906 	minor_t minor = 0;
4907 
4908 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4909 		return (EINVAL);
4910 
4911 	if (zc->zc_sendobj == 0) {
4912 		return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
4913 		    zc->zc_string, recursive, zc->zc_temphold,
4914 		    zc->zc_cleanup_fd));
4915 	}
4916 
4917 	if (recursive)
4918 		return (EINVAL);
4919 
4920 	error = spa_open(zc->zc_name, &spa, FTAG);
4921 	if (error)
4922 		return (error);
4923 
4924 	dp = spa_get_dsl(spa);
4925 	rw_enter(&dp->dp_config_rwlock, RW_READER);
4926 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
4927 	rw_exit(&dp->dp_config_rwlock);
4928 	spa_close(spa, FTAG);
4929 	if (error)
4930 		return (error);
4931 
4932 	/*
4933 	 * Until we have a hold on this snapshot, it's possible that
4934 	 * zc_sendobj could've been destroyed and reused as part
4935 	 * of a later txg.  Make sure we're looking at the right object.
4936 	 */
4937 	if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) {
4938 		dsl_dataset_rele(ds, FTAG);
4939 		return (ENOENT);
4940 	}
4941 
4942 	if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) {
4943 		error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
4944 		if (error) {
4945 			dsl_dataset_rele(ds, FTAG);
4946 			return (error);
4947 		}
4948 	}
4949 
4950 	error = dsl_dataset_user_hold_for_send(ds, zc->zc_string,
4951 	    zc->zc_temphold);
4952 	if (minor != 0) {
4953 		if (error == 0) {
4954 			dsl_register_onexit_hold_cleanup(ds, zc->zc_string,
4955 			    minor);
4956 		}
4957 		zfs_onexit_fd_rele(zc->zc_cleanup_fd);
4958 	}
4959 	dsl_dataset_rele(ds, FTAG);
4960 
4961 	return (error);
4962 }
4963 
4964 /*
4965  * inputs:
4966  * zc_name	name of dataset from which we're releasing a user hold
4967  * zc_value	short name of snap
4968  * zc_string	user-supplied tag for this hold
4969  * zc_cookie	recursive flag
4970  *
4971  * outputs:	none
4972  */
4973 static int
4974 zfs_ioc_release(zfs_cmd_t *zc)
4975 {
4976 	boolean_t recursive = zc->zc_cookie;
4977 
4978 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4979 		return (EINVAL);
4980 
4981 	return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
4982 	    zc->zc_string, recursive));
4983 }
4984 
4985 /*
4986  * inputs:
4987  * zc_name		name of filesystem
4988  *
4989  * outputs:
4990  * zc_nvlist_src{_size}	nvlist of snapshot holds
4991  */
4992 static int
4993 zfs_ioc_get_holds(zfs_cmd_t *zc)
4994 {
4995 	nvlist_t *nvp;
4996 	int error;
4997 
4998 	if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
4999 		error = put_nvlist(zc, nvp);
5000 		nvlist_free(nvp);
5001 	}
5002 
5003 	return (error);
5004 }
5005 
5006 /*
5007  * inputs:
5008  * zc_name		name of new filesystem or snapshot
5009  * zc_value		full name of old snapshot
5010  *
5011  * outputs:
5012  * zc_cookie		space in bytes
5013  * zc_objset_type	compressed space in bytes
5014  * zc_perm_action	uncompressed space in bytes
5015  */
5016 static int
5017 zfs_ioc_space_written(zfs_cmd_t *zc)
5018 {
5019 	int error;
5020 	dsl_dataset_t *new, *old;
5021 
5022 	error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
5023 	if (error != 0)
5024 		return (error);
5025 	error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
5026 	if (error != 0) {
5027 		dsl_dataset_rele(new, FTAG);
5028 		return (error);
5029 	}
5030 
5031 	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
5032 	    &zc->zc_objset_type, &zc->zc_perm_action);
5033 	dsl_dataset_rele(old, FTAG);
5034 	dsl_dataset_rele(new, FTAG);
5035 	return (error);
5036 }
5037 /*
5038  * innvl: {
5039  *     "firstsnap" -> snapshot name
5040  * }
5041  *
5042  * outnvl: {
5043  *     "used" -> space in bytes
5044  *     "compressed" -> compressed space in bytes
5045  *     "uncompressed" -> uncompressed space in bytes
5046  * }
5047  */
5048 static int
5049 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
5050 {
5051 	int error;
5052 	dsl_dataset_t *new, *old;
5053 	char *firstsnap;
5054 	uint64_t used, comp, uncomp;
5055 
5056 	if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
5057 		return (EINVAL);
5058 
5059 	error = dsl_dataset_hold(lastsnap, FTAG, &new);
5060 	if (error != 0)
5061 		return (error);
5062 	error = dsl_dataset_hold(firstsnap, FTAG, &old);
5063 	if (error != 0) {
5064 		dsl_dataset_rele(new, FTAG);
5065 		return (error);
5066 	}
5067 
5068 	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
5069 	dsl_dataset_rele(old, FTAG);
5070 	dsl_dataset_rele(new, FTAG);
5071 	fnvlist_add_uint64(outnvl, "used", used);
5072 	fnvlist_add_uint64(outnvl, "compressed", comp);
5073 	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
5074 	return (error);
5075 }
5076 
5077 /*
5078  * innvl: {
5079  *     "fd" -> file descriptor to write stream to (int32)
5080  *     (optional) "fromsnap" -> full snap name to send an incremental from
5081  * }
5082  *
5083  * outnvl is unused
5084  */
5085 /* ARGSUSED */
5086 static int
5087 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5088 {
5089 	objset_t *fromsnap = NULL;
5090 	objset_t *tosnap;
5091 	int error;
5092 	offset_t off;
5093 	char *fromname;
5094 	int fd;
5095 
5096 	error = nvlist_lookup_int32(innvl, "fd", &fd);
5097 	if (error != 0)
5098 		return (EINVAL);
5099 
5100 	error = dmu_objset_hold(snapname, FTAG, &tosnap);
5101 	if (error)
5102 		return (error);
5103 
5104 	error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
5105 	if (error == 0) {
5106 		error = dmu_objset_hold(fromname, FTAG, &fromsnap);
5107 		if (error) {
5108 			dmu_objset_rele(tosnap, FTAG);
5109 			return (error);
5110 		}
5111 	}
5112 
5113 	file_t *fp = getf(fd);
5114 	if (fp == NULL) {
5115 		dmu_objset_rele(tosnap, FTAG);
5116 		if (fromsnap != NULL)
5117 			dmu_objset_rele(fromsnap, FTAG);
5118 		return (EBADF);
5119 	}
5120 
5121 	off = fp->f_offset;
5122 	error = dmu_send(tosnap, fromsnap, fd, fp->f_vnode, &off);
5123 
5124 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5125 		fp->f_offset = off;
5126 	releasef(fd);
5127 	if (fromsnap != NULL)
5128 		dmu_objset_rele(fromsnap, FTAG);
5129 	dmu_objset_rele(tosnap, FTAG);
5130 	return (error);
5131 }
5132 
5133 /*
5134  * Determine approximately how large a zfs send stream will be -- the number
5135  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
5136  *
5137  * innvl: {
5138  *     (optional) "fromsnap" -> full snap name to send an incremental from
5139  * }
5140  *
5141  * outnvl: {
5142  *     "space" -> bytes of space (uint64)
5143  * }
5144  */
5145 static int
5146 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5147 {
5148 	objset_t *fromsnap = NULL;
5149 	objset_t *tosnap;
5150 	int error;
5151 	char *fromname;
5152 	uint64_t space;
5153 
5154 	error = dmu_objset_hold(snapname, FTAG, &tosnap);
5155 	if (error)
5156 		return (error);
5157 
5158 	error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
5159 	if (error == 0) {
5160 		error = dmu_objset_hold(fromname, FTAG, &fromsnap);
5161 		if (error) {
5162 			dmu_objset_rele(tosnap, FTAG);
5163 			return (error);
5164 		}
5165 	}
5166 
5167 	error = dmu_send_estimate(tosnap, fromsnap, &space);
5168 	fnvlist_add_uint64(outnvl, "space", space);
5169 
5170 	if (fromsnap != NULL)
5171 		dmu_objset_rele(fromsnap, FTAG);
5172 	dmu_objset_rele(tosnap, FTAG);
5173 	return (error);
5174 }
5175 
5176 
5177 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
5178 
5179 static void
5180 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5181     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5182     boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
5183 {
5184 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5185 
5186 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5187 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5188 	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5189 	ASSERT3P(vec->zvec_func, ==, NULL);
5190 
5191 	vec->zvec_legacy_func = func;
5192 	vec->zvec_secpolicy = secpolicy;
5193 	vec->zvec_namecheck = namecheck;
5194 	vec->zvec_allow_log = log_history;
5195 	vec->zvec_pool_check = pool_check;
5196 }
5197 
5198 /*
5199  * See the block comment at the beginning of this file for details on
5200  * each argument to this function.
5201  */
5202 static void
5203 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
5204     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5205     zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
5206     boolean_t allow_log)
5207 {
5208 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5209 
5210 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5211 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5212 	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5213 	ASSERT3P(vec->zvec_func, ==, NULL);
5214 
5215 	/* if we are logging, the name must be valid */
5216 	ASSERT(!allow_log || namecheck != NO_NAME);
5217 
5218 	vec->zvec_name = name;
5219 	vec->zvec_func = func;
5220 	vec->zvec_secpolicy = secpolicy;
5221 	vec->zvec_namecheck = namecheck;
5222 	vec->zvec_pool_check = pool_check;
5223 	vec->zvec_smush_outnvlist = smush_outnvlist;
5224 	vec->zvec_allow_log = allow_log;
5225 }
5226 
5227 static void
5228 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5229     zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
5230     zfs_ioc_poolcheck_t pool_check)
5231 {
5232 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5233 	    POOL_NAME, log_history, pool_check);
5234 }
5235 
5236 static void
5237 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5238     zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
5239 {
5240 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5241 	    DATASET_NAME, B_FALSE, pool_check);
5242 }
5243 
5244 static void
5245 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5246 {
5247 	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
5248 	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5249 }
5250 
5251 static void
5252 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5253     zfs_secpolicy_func_t *secpolicy)
5254 {
5255 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5256 	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
5257 }
5258 
5259 static void
5260 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
5261     zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
5262 {
5263 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5264 	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
5265 }
5266 
5267 static void
5268 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5269 {
5270 	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
5271 	    zfs_secpolicy_read);
5272 }
5273 
5274 static void
5275 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5276 	zfs_secpolicy_func_t *secpolicy)
5277 {
5278 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5279 	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5280 }
5281 
5282 static void
5283 zfs_ioctl_init(void)
5284 {
5285 	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
5286 	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
5287 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5288 
5289 	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
5290 	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
5291 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
5292 
5293 	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
5294 	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
5295 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5296 
5297 	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
5298 	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
5299 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5300 
5301 	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
5302 	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
5303 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5304 
5305 	zfs_ioctl_register("create", ZFS_IOC_CREATE,
5306 	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
5307 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5308 
5309 	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
5310 	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
5311 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5312 
5313 	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
5314 	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
5315 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5316 
5317 	/* IOCTLS that use the legacy function signature */
5318 
5319 	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
5320 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
5321 
5322 	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
5323 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5324 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
5325 	    zfs_ioc_pool_scan);
5326 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
5327 	    zfs_ioc_pool_upgrade);
5328 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
5329 	    zfs_ioc_vdev_add);
5330 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
5331 	    zfs_ioc_vdev_remove);
5332 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
5333 	    zfs_ioc_vdev_set_state);
5334 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
5335 	    zfs_ioc_vdev_attach);
5336 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
5337 	    zfs_ioc_vdev_detach);
5338 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
5339 	    zfs_ioc_vdev_setpath);
5340 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
5341 	    zfs_ioc_vdev_setfru);
5342 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
5343 	    zfs_ioc_pool_set_props);
5344 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
5345 	    zfs_ioc_vdev_split);
5346 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
5347 	    zfs_ioc_pool_reguid);
5348 
5349 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
5350 	    zfs_ioc_pool_configs, zfs_secpolicy_none);
5351 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
5352 	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
5353 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
5354 	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
5355 	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
5356 	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
5357 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
5358 	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
5359 
5360 	/*
5361 	 * pool destroy, and export don't log the history as part of
5362 	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
5363 	 * does the logging of those commands.
5364 	 */
5365 	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
5366 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
5367 	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
5368 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
5369 
5370 	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
5371 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
5372 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
5373 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
5374 
5375 	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
5376 	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
5377 	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
5378 	    zfs_ioc_dsobj_to_dsname,
5379 	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
5380 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
5381 	    zfs_ioc_pool_get_history,
5382 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
5383 
5384 	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
5385 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5386 
5387 	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
5388 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
5389 	zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
5390 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
5391 
5392 	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
5393 	    zfs_ioc_space_written);
5394 	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_HOLDS,
5395 	    zfs_ioc_get_holds);
5396 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
5397 	    zfs_ioc_objset_recvd_props);
5398 	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
5399 	    zfs_ioc_next_obj);
5400 	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
5401 	    zfs_ioc_get_fsacl);
5402 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
5403 	    zfs_ioc_objset_stats);
5404 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
5405 	    zfs_ioc_objset_zplprops);
5406 	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
5407 	    zfs_ioc_dataset_list_next);
5408 	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
5409 	    zfs_ioc_snapshot_list_next);
5410 	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
5411 	    zfs_ioc_send_progress);
5412 
5413 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
5414 	    zfs_ioc_diff, zfs_secpolicy_diff);
5415 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
5416 	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
5417 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
5418 	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
5419 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
5420 	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
5421 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
5422 	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
5423 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
5424 	    zfs_ioc_send, zfs_secpolicy_send);
5425 
5426 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
5427 	    zfs_secpolicy_none);
5428 	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
5429 	    zfs_secpolicy_destroy);
5430 	zfs_ioctl_register_dataset_modify(ZFS_IOC_ROLLBACK, zfs_ioc_rollback,
5431 	    zfs_secpolicy_rollback);
5432 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
5433 	    zfs_secpolicy_rename);
5434 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
5435 	    zfs_secpolicy_recv);
5436 	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
5437 	    zfs_secpolicy_promote);
5438 	zfs_ioctl_register_dataset_modify(ZFS_IOC_HOLD, zfs_ioc_hold,
5439 	    zfs_secpolicy_hold);
5440 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RELEASE, zfs_ioc_release,
5441 	    zfs_secpolicy_release);
5442 	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
5443 	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
5444 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
5445 	    zfs_secpolicy_set_fsacl);
5446 
5447 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
5448 	    zfs_secpolicy_share, POOL_CHECK_NONE);
5449 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
5450 	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
5451 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
5452 	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
5453 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5454 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
5455 	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
5456 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5457 }
5458 
5459 int
5460 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
5461     zfs_ioc_poolcheck_t check)
5462 {
5463 	spa_t *spa;
5464 	int error;
5465 
5466 	ASSERT(type == POOL_NAME || type == DATASET_NAME);
5467 
5468 	if (check & POOL_CHECK_NONE)
5469 		return (0);
5470 
5471 	error = spa_open(name, &spa, FTAG);
5472 	if (error == 0) {
5473 		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
5474 			error = EAGAIN;
5475 		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
5476 			error = EROFS;
5477 		spa_close(spa, FTAG);
5478 	}
5479 	return (error);
5480 }
5481 
5482 /*
5483  * Find a free minor number.
5484  */
5485 minor_t
5486 zfsdev_minor_alloc(void)
5487 {
5488 	static minor_t last_minor;
5489 	minor_t m;
5490 
5491 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5492 
5493 	for (m = last_minor + 1; m != last_minor; m++) {
5494 		if (m > ZFSDEV_MAX_MINOR)
5495 			m = 1;
5496 		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
5497 			last_minor = m;
5498 			return (m);
5499 		}
5500 	}
5501 
5502 	return (0);
5503 }
5504 
5505 static int
5506 zfs_ctldev_init(dev_t *devp)
5507 {
5508 	minor_t minor;
5509 	zfs_soft_state_t *zs;
5510 
5511 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5512 	ASSERT(getminor(*devp) == 0);
5513 
5514 	minor = zfsdev_minor_alloc();
5515 	if (minor == 0)
5516 		return (ENXIO);
5517 
5518 	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
5519 		return (EAGAIN);
5520 
5521 	*devp = makedevice(getemajor(*devp), minor);
5522 
5523 	zs = ddi_get_soft_state(zfsdev_state, minor);
5524 	zs->zss_type = ZSST_CTLDEV;
5525 	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
5526 
5527 	return (0);
5528 }
5529 
5530 static void
5531 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
5532 {
5533 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5534 
5535 	zfs_onexit_destroy(zo);
5536 	ddi_soft_state_free(zfsdev_state, minor);
5537 }
5538 
5539 void *
5540 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
5541 {
5542 	zfs_soft_state_t *zp;
5543 
5544 	zp = ddi_get_soft_state(zfsdev_state, minor);
5545 	if (zp == NULL || zp->zss_type != which)
5546 		return (NULL);
5547 
5548 	return (zp->zss_data);
5549 }
5550 
5551 static int
5552 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
5553 {
5554 	int error = 0;
5555 
5556 	if (getminor(*devp) != 0)
5557 		return (zvol_open(devp, flag, otyp, cr));
5558 
5559 	/* This is the control device. Allocate a new minor if requested. */
5560 	if (flag & FEXCL) {
5561 		mutex_enter(&zfsdev_state_lock);
5562 		error = zfs_ctldev_init(devp);
5563 		mutex_exit(&zfsdev_state_lock);
5564 	}
5565 
5566 	return (error);
5567 }
5568 
5569 static int
5570 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
5571 {
5572 	zfs_onexit_t *zo;
5573 	minor_t minor = getminor(dev);
5574 
5575 	if (minor == 0)
5576 		return (0);
5577 
5578 	mutex_enter(&zfsdev_state_lock);
5579 	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
5580 	if (zo == NULL) {
5581 		mutex_exit(&zfsdev_state_lock);
5582 		return (zvol_close(dev, flag, otyp, cr));
5583 	}
5584 	zfs_ctldev_destroy(zo, minor);
5585 	mutex_exit(&zfsdev_state_lock);
5586 
5587 	return (0);
5588 }
5589 
5590 static int
5591 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
5592 {
5593 	zfs_cmd_t *zc;
5594 	uint_t vecnum;
5595 	int error, rc, len;
5596 	minor_t minor = getminor(dev);
5597 	const zfs_ioc_vec_t *vec;
5598 	char *saved_poolname = NULL;
5599 	nvlist_t *innvl = NULL;
5600 
5601 	if (minor != 0 &&
5602 	    zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
5603 		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
5604 
5605 	vecnum = cmd - ZFS_IOC_FIRST;
5606 	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
5607 
5608 	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
5609 		return (EINVAL);
5610 	vec = &zfs_ioc_vec[vecnum];
5611 
5612 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
5613 
5614 	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
5615 	if (error != 0) {
5616 		error = EFAULT;
5617 		goto out;
5618 	}
5619 
5620 	zc->zc_iflags = flag & FKIOCTL;
5621 	if (zc->zc_nvlist_src_size != 0) {
5622 		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
5623 		    zc->zc_iflags, &innvl);
5624 		if (error != 0)
5625 			goto out;
5626 	}
5627 
5628 	/*
5629 	 * Ensure that all pool/dataset names are valid before we pass down to
5630 	 * the lower layers.
5631 	 */
5632 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5633 	switch (vec->zvec_namecheck) {
5634 	case POOL_NAME:
5635 		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
5636 			error = EINVAL;
5637 		else
5638 			error = pool_status_check(zc->zc_name,
5639 			    vec->zvec_namecheck, vec->zvec_pool_check);
5640 		break;
5641 
5642 	case DATASET_NAME:
5643 		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
5644 			error = EINVAL;
5645 		else
5646 			error = pool_status_check(zc->zc_name,
5647 			    vec->zvec_namecheck, vec->zvec_pool_check);
5648 		break;
5649 
5650 	case NO_NAME:
5651 		break;
5652 	}
5653 
5654 
5655 	if (error == 0 && !(flag & FKIOCTL))
5656 		error = vec->zvec_secpolicy(zc, innvl, cr);
5657 
5658 	if (error != 0)
5659 		goto out;
5660 
5661 	/* legacy ioctls can modify zc_name */
5662 	len = strcspn(zc->zc_name, "/@") + 1;
5663 	saved_poolname = kmem_alloc(len, KM_SLEEP);
5664 	(void) strlcpy(saved_poolname, zc->zc_name, len);
5665 
5666 	if (vec->zvec_func != NULL) {
5667 		nvlist_t *outnvl;
5668 		int puterror = 0;
5669 		spa_t *spa;
5670 		nvlist_t *lognv = NULL;
5671 
5672 		ASSERT(vec->zvec_legacy_func == NULL);
5673 
5674 		/*
5675 		 * Add the innvl to the lognv before calling the func,
5676 		 * in case the func changes the innvl.
5677 		 */
5678 		if (vec->zvec_allow_log) {
5679 			lognv = fnvlist_alloc();
5680 			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
5681 			    vec->zvec_name);
5682 			if (!nvlist_empty(innvl)) {
5683 				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
5684 				    innvl);
5685 			}
5686 		}
5687 
5688 		outnvl = fnvlist_alloc();
5689 		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
5690 
5691 		if (error == 0 && vec->zvec_allow_log &&
5692 		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
5693 			if (!nvlist_empty(outnvl)) {
5694 				fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
5695 				    outnvl);
5696 			}
5697 			(void) spa_history_log_nvl(spa, lognv);
5698 			spa_close(spa, FTAG);
5699 		}
5700 		fnvlist_free(lognv);
5701 
5702 		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
5703 			int smusherror = 0;
5704 			if (vec->zvec_smush_outnvlist) {
5705 				smusherror = nvlist_smush(outnvl,
5706 				    zc->zc_nvlist_dst_size);
5707 			}
5708 			if (smusherror == 0)
5709 				puterror = put_nvlist(zc, outnvl);
5710 		}
5711 
5712 		if (puterror != 0)
5713 			error = puterror;
5714 
5715 		nvlist_free(outnvl);
5716 	} else {
5717 		error = vec->zvec_legacy_func(zc);
5718 	}
5719 
5720 out:
5721 	nvlist_free(innvl);
5722 	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
5723 	if (error == 0 && rc != 0)
5724 		error = EFAULT;
5725 	if (error == 0 && vec->zvec_allow_log) {
5726 		char *s = tsd_get(zfs_allow_log_key);
5727 		if (s != NULL)
5728 			strfree(s);
5729 		(void) tsd_set(zfs_allow_log_key, saved_poolname);
5730 	} else {
5731 		if (saved_poolname != NULL)
5732 			strfree(saved_poolname);
5733 	}
5734 
5735 	kmem_free(zc, sizeof (zfs_cmd_t));
5736 	return (error);
5737 }
5738 
5739 static int
5740 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5741 {
5742 	if (cmd != DDI_ATTACH)
5743 		return (DDI_FAILURE);
5744 
5745 	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
5746 	    DDI_PSEUDO, 0) == DDI_FAILURE)
5747 		return (DDI_FAILURE);
5748 
5749 	zfs_dip = dip;
5750 
5751 	ddi_report_dev(dip);
5752 
5753 	return (DDI_SUCCESS);
5754 }
5755 
5756 static int
5757 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5758 {
5759 	if (spa_busy() || zfs_busy() || zvol_busy())
5760 		return (DDI_FAILURE);
5761 
5762 	if (cmd != DDI_DETACH)
5763 		return (DDI_FAILURE);
5764 
5765 	zfs_dip = NULL;
5766 
5767 	ddi_prop_remove_all(dip);
5768 	ddi_remove_minor_node(dip, NULL);
5769 
5770 	return (DDI_SUCCESS);
5771 }
5772 
5773 /*ARGSUSED*/
5774 static int
5775 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
5776 {
5777 	switch (infocmd) {
5778 	case DDI_INFO_DEVT2DEVINFO:
5779 		*result = zfs_dip;
5780 		return (DDI_SUCCESS);
5781 
5782 	case DDI_INFO_DEVT2INSTANCE:
5783 		*result = (void *)0;
5784 		return (DDI_SUCCESS);
5785 	}
5786 
5787 	return (DDI_FAILURE);
5788 }
5789 
5790 /*
5791  * OK, so this is a little weird.
5792  *
5793  * /dev/zfs is the control node, i.e. minor 0.
5794  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
5795  *
5796  * /dev/zfs has basically nothing to do except serve up ioctls,
5797  * so most of the standard driver entry points are in zvol.c.
5798  */
5799 static struct cb_ops zfs_cb_ops = {
5800 	zfsdev_open,	/* open */
5801 	zfsdev_close,	/* close */
5802 	zvol_strategy,	/* strategy */
5803 	nodev,		/* print */
5804 	zvol_dump,	/* dump */
5805 	zvol_read,	/* read */
5806 	zvol_write,	/* write */
5807 	zfsdev_ioctl,	/* ioctl */
5808 	nodev,		/* devmap */
5809 	nodev,		/* mmap */
5810 	nodev,		/* segmap */
5811 	nochpoll,	/* poll */
5812 	ddi_prop_op,	/* prop_op */
5813 	NULL,		/* streamtab */
5814 	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
5815 	CB_REV,		/* version */
5816 	nodev,		/* async read */
5817 	nodev,		/* async write */
5818 };
5819 
5820 static struct dev_ops zfs_dev_ops = {
5821 	DEVO_REV,	/* version */
5822 	0,		/* refcnt */
5823 	zfs_info,	/* info */
5824 	nulldev,	/* identify */
5825 	nulldev,	/* probe */
5826 	zfs_attach,	/* attach */
5827 	zfs_detach,	/* detach */
5828 	nodev,		/* reset */
5829 	&zfs_cb_ops,	/* driver operations */
5830 	NULL,		/* no bus operations */
5831 	NULL,		/* power */
5832 	ddi_quiesce_not_needed,	/* quiesce */
5833 };
5834 
5835 static struct modldrv zfs_modldrv = {
5836 	&mod_driverops,
5837 	"ZFS storage pool",
5838 	&zfs_dev_ops
5839 };
5840 
5841 static struct modlinkage modlinkage = {
5842 	MODREV_1,
5843 	(void *)&zfs_modlfs,
5844 	(void *)&zfs_modldrv,
5845 	NULL
5846 };
5847 
5848 static void
5849 zfs_allow_log_destroy(void *arg)
5850 {
5851 	char *poolname = arg;
5852 	strfree(poolname);
5853 }
5854 
5855 int
5856 _init(void)
5857 {
5858 	int error;
5859 
5860 	spa_init(FREAD | FWRITE);
5861 	zfs_init();
5862 	zvol_init();
5863 	zfs_ioctl_init();
5864 
5865 	if ((error = mod_install(&modlinkage)) != 0) {
5866 		zvol_fini();
5867 		zfs_fini();
5868 		spa_fini();
5869 		return (error);
5870 	}
5871 
5872 	tsd_create(&zfs_fsyncer_key, NULL);
5873 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
5874 	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
5875 
5876 	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
5877 	ASSERT(error == 0);
5878 	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
5879 
5880 	return (0);
5881 }
5882 
5883 int
5884 _fini(void)
5885 {
5886 	int error;
5887 
5888 	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
5889 		return (EBUSY);
5890 
5891 	if ((error = mod_remove(&modlinkage)) != 0)
5892 		return (error);
5893 
5894 	zvol_fini();
5895 	zfs_fini();
5896 	spa_fini();
5897 	if (zfs_nfsshare_inited)
5898 		(void) ddi_modclose(nfs_mod);
5899 	if (zfs_smbshare_inited)
5900 		(void) ddi_modclose(smbsrv_mod);
5901 	if (zfs_nfsshare_inited || zfs_smbshare_inited)
5902 		(void) ddi_modclose(sharefs_mod);
5903 
5904 	tsd_destroy(&zfs_fsyncer_key);
5905 	ldi_ident_release(zfs_li);
5906 	zfs_li = NULL;
5907 	mutex_destroy(&zfs_share_lock);
5908 
5909 	return (error);
5910 }
5911 
5912 int
5913 _info(struct modinfo *modinfop)
5914 {
5915 	return (mod_info(&modlinkage, modinfop));
5916 }
5917