xref: /illumos-gate/usr/src/uts/common/fs/zfs/zfs_ioctl.c (revision dd328bf6d39366b8d7bde6a36114538fc14332dd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
25  * Portions Copyright 2011 Martin Matuska
26  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
27  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
28  * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
29  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
30  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
31  * Copyright (c) 2013 Steven Hartland. All rights reserved.
32  * Copyright (c) 2014 Integros [integros.com]
33  * Copyright 2016 Toomas Soome <tsoome@me.com>
34  * Copyright 2017 RackTop Systems.
35  * Copyright (c) 2017 Datto Inc.
36  */
37 
38 /*
39  * ZFS ioctls.
40  *
41  * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
42  * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
43  *
44  * There are two ways that we handle ioctls: the legacy way where almost
45  * all of the logic is in the ioctl callback, and the new way where most
46  * of the marshalling is handled in the common entry point, zfsdev_ioctl().
47  *
48  * Non-legacy ioctls should be registered by calling
49  * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
50  * from userland by lzc_ioctl().
51  *
52  * The registration arguments are as follows:
53  *
54  * const char *name
55  *   The name of the ioctl.  This is used for history logging.  If the
56  *   ioctl returns successfully (the callback returns 0), and allow_log
57  *   is true, then a history log entry will be recorded with the input &
58  *   output nvlists.  The log entry can be printed with "zpool history -i".
59  *
60  * zfs_ioc_t ioc
61  *   The ioctl request number, which userland will pass to ioctl(2).
62  *   The ioctl numbers can change from release to release, because
63  *   the caller (libzfs) must be matched to the kernel.
64  *
65  * zfs_secpolicy_func_t *secpolicy
66  *   This function will be called before the zfs_ioc_func_t, to
67  *   determine if this operation is permitted.  It should return EPERM
68  *   on failure, and 0 on success.  Checks include determining if the
69  *   dataset is visible in this zone, and if the user has either all
70  *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
71  *   to do this operation on this dataset with "zfs allow".
72  *
73  * zfs_ioc_namecheck_t namecheck
74  *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
75  *   name, a dataset name, or nothing.  If the name is not well-formed,
76  *   the ioctl will fail and the callback will not be called.
77  *   Therefore, the callback can assume that the name is well-formed
78  *   (e.g. is null-terminated, doesn't have more than one '@' character,
79  *   doesn't have invalid characters).
80  *
81  * zfs_ioc_poolcheck_t pool_check
82  *   This specifies requirements on the pool state.  If the pool does
83  *   not meet them (is suspended or is readonly), the ioctl will fail
84  *   and the callback will not be called.  If any checks are specified
85  *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
86  *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
87  *   POOL_CHECK_READONLY).
88  *
89  * boolean_t smush_outnvlist
90  *   If smush_outnvlist is true, then the output is presumed to be a
91  *   list of errors, and it will be "smushed" down to fit into the
92  *   caller's buffer, by removing some entries and replacing them with a
93  *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
94  *   nvlist_smush() for details.  If smush_outnvlist is false, and the
95  *   outnvlist does not fit into the userland-provided buffer, then the
96  *   ioctl will fail with ENOMEM.
97  *
98  * zfs_ioc_func_t *func
99  *   The callback function that will perform the operation.
100  *
101  *   The callback should return 0 on success, or an error number on
102  *   failure.  If the function fails, the userland ioctl will return -1,
103  *   and errno will be set to the callback's return value.  The callback
104  *   will be called with the following arguments:
105  *
106  *   const char *name
107  *     The name of the pool or dataset to operate on, from
108  *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
109  *     expected type (pool, dataset, or none).
110  *
111  *   nvlist_t *innvl
112  *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
113  *     NULL if no input nvlist was provided.  Changes to this nvlist are
114  *     ignored.  If the input nvlist could not be deserialized, the
115  *     ioctl will fail and the callback will not be called.
116  *
117  *   nvlist_t *outnvl
118  *     The output nvlist, initially empty.  The callback can fill it in,
119  *     and it will be returned to userland by serializing it into
120  *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
121  *     fails (e.g. because the caller didn't supply a large enough
122  *     buffer), then the overall ioctl will fail.  See the
123  *     'smush_nvlist' argument above for additional behaviors.
124  *
125  *     There are two typical uses of the output nvlist:
126  *       - To return state, e.g. property values.  In this case,
127  *         smush_outnvlist should be false.  If the buffer was not large
128  *         enough, the caller will reallocate a larger buffer and try
129  *         the ioctl again.
130  *
131  *       - To return multiple errors from an ioctl which makes on-disk
132  *         changes.  In this case, smush_outnvlist should be true.
133  *         Ioctls which make on-disk modifications should generally not
134  *         use the outnvl if they succeed, because the caller can not
135  *         distinguish between the operation failing, and
136  *         deserialization failing.
137  */
138 
139 #include <sys/types.h>
140 #include <sys/param.h>
141 #include <sys/errno.h>
142 #include <sys/uio.h>
143 #include <sys/buf.h>
144 #include <sys/modctl.h>
145 #include <sys/open.h>
146 #include <sys/file.h>
147 #include <sys/kmem.h>
148 #include <sys/conf.h>
149 #include <sys/cmn_err.h>
150 #include <sys/stat.h>
151 #include <sys/zfs_ioctl.h>
152 #include <sys/zfs_vfsops.h>
153 #include <sys/zfs_znode.h>
154 #include <sys/zap.h>
155 #include <sys/spa.h>
156 #include <sys/spa_impl.h>
157 #include <sys/vdev.h>
158 #include <sys/priv_impl.h>
159 #include <sys/dmu.h>
160 #include <sys/dsl_dir.h>
161 #include <sys/dsl_dataset.h>
162 #include <sys/dsl_prop.h>
163 #include <sys/dsl_deleg.h>
164 #include <sys/dmu_objset.h>
165 #include <sys/dmu_impl.h>
166 #include <sys/dmu_tx.h>
167 #include <sys/ddi.h>
168 #include <sys/sunddi.h>
169 #include <sys/sunldi.h>
170 #include <sys/policy.h>
171 #include <sys/zone.h>
172 #include <sys/nvpair.h>
173 #include <sys/pathname.h>
174 #include <sys/mount.h>
175 #include <sys/sdt.h>
176 #include <sys/fs/zfs.h>
177 #include <sys/zfs_ctldir.h>
178 #include <sys/zfs_dir.h>
179 #include <sys/zfs_onexit.h>
180 #include <sys/zvol.h>
181 #include <sys/dsl_scan.h>
182 #include <sharefs/share.h>
183 #include <sys/dmu_objset.h>
184 #include <sys/dmu_recv.h>
185 #include <sys/dmu_send.h>
186 #include <sys/dsl_destroy.h>
187 #include <sys/dsl_bookmark.h>
188 #include <sys/dsl_userhold.h>
189 #include <sys/zfeature.h>
190 #include <sys/zcp.h>
191 #include <sys/zio_checksum.h>
192 #include <sys/vdev_removal.h>
193 #include <sys/vdev_impl.h>
194 #include <sys/vdev_initialize.h>
195 
196 #include "zfs_namecheck.h"
197 #include "zfs_prop.h"
198 #include "zfs_deleg.h"
199 #include "zfs_comutil.h"
200 
201 #include "lua.h"
202 #include "lauxlib.h"
203 
204 extern struct modlfs zfs_modlfs;
205 
206 extern void zfs_init(void);
207 extern void zfs_fini(void);
208 
209 ldi_ident_t zfs_li = NULL;
210 dev_info_t *zfs_dip;
211 
212 uint_t zfs_fsyncer_key;
213 extern uint_t rrw_tsd_key;
214 static uint_t zfs_allow_log_key;
215 
216 typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
217 typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
218 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
219 
220 typedef enum {
221 	NO_NAME,
222 	POOL_NAME,
223 	DATASET_NAME
224 } zfs_ioc_namecheck_t;
225 
226 typedef enum {
227 	POOL_CHECK_NONE		= 1 << 0,
228 	POOL_CHECK_SUSPENDED	= 1 << 1,
229 	POOL_CHECK_READONLY	= 1 << 2,
230 } zfs_ioc_poolcheck_t;
231 
232 typedef struct zfs_ioc_vec {
233 	zfs_ioc_legacy_func_t	*zvec_legacy_func;
234 	zfs_ioc_func_t		*zvec_func;
235 	zfs_secpolicy_func_t	*zvec_secpolicy;
236 	zfs_ioc_namecheck_t	zvec_namecheck;
237 	boolean_t		zvec_allow_log;
238 	zfs_ioc_poolcheck_t	zvec_pool_check;
239 	boolean_t		zvec_smush_outnvlist;
240 	const char		*zvec_name;
241 } zfs_ioc_vec_t;
242 
243 /* This array is indexed by zfs_userquota_prop_t */
244 static const char *userquota_perms[] = {
245 	ZFS_DELEG_PERM_USERUSED,
246 	ZFS_DELEG_PERM_USERQUOTA,
247 	ZFS_DELEG_PERM_GROUPUSED,
248 	ZFS_DELEG_PERM_GROUPQUOTA,
249 };
250 
251 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
252 static int zfs_check_settable(const char *name, nvpair_t *property,
253     cred_t *cr);
254 static int zfs_check_clearable(char *dataset, nvlist_t *props,
255     nvlist_t **errors);
256 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
257     boolean_t *);
258 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
259 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
260 
261 static int zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature);
262 
263 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
264 void
265 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
266 {
267 	const char *newfile;
268 	char buf[512];
269 	va_list adx;
270 
271 	/*
272 	 * Get rid of annoying "../common/" prefix to filename.
273 	 */
274 	newfile = strrchr(file, '/');
275 	if (newfile != NULL) {
276 		newfile = newfile + 1; /* Get rid of leading / */
277 	} else {
278 		newfile = file;
279 	}
280 
281 	va_start(adx, fmt);
282 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
283 	va_end(adx);
284 
285 	/*
286 	 * To get this data, use the zfs-dprintf probe as so:
287 	 * dtrace -q -n 'zfs-dprintf \
288 	 *	/stringof(arg0) == "dbuf.c"/ \
289 	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
290 	 * arg0 = file name
291 	 * arg1 = function name
292 	 * arg2 = line number
293 	 * arg3 = message
294 	 */
295 	DTRACE_PROBE4(zfs__dprintf,
296 	    char *, newfile, char *, func, int, line, char *, buf);
297 }
298 
299 static void
300 history_str_free(char *buf)
301 {
302 	kmem_free(buf, HIS_MAX_RECORD_LEN);
303 }
304 
305 static char *
306 history_str_get(zfs_cmd_t *zc)
307 {
308 	char *buf;
309 
310 	if (zc->zc_history == 0)
311 		return (NULL);
312 
313 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
314 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
315 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
316 		history_str_free(buf);
317 		return (NULL);
318 	}
319 
320 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
321 
322 	return (buf);
323 }
324 
325 /*
326  * Check to see if the named dataset is currently defined as bootable
327  */
328 static boolean_t
329 zfs_is_bootfs(const char *name)
330 {
331 	objset_t *os;
332 
333 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
334 		boolean_t ret;
335 		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
336 		dmu_objset_rele(os, FTAG);
337 		return (ret);
338 	}
339 	return (B_FALSE);
340 }
341 
342 /*
343  * Return non-zero if the spa version is less than requested version.
344  */
345 static int
346 zfs_earlier_version(const char *name, int version)
347 {
348 	spa_t *spa;
349 
350 	if (spa_open(name, &spa, FTAG) == 0) {
351 		if (spa_version(spa) < version) {
352 			spa_close(spa, FTAG);
353 			return (1);
354 		}
355 		spa_close(spa, FTAG);
356 	}
357 	return (0);
358 }
359 
360 /*
361  * Return TRUE if the ZPL version is less than requested version.
362  */
363 static boolean_t
364 zpl_earlier_version(const char *name, int version)
365 {
366 	objset_t *os;
367 	boolean_t rc = B_TRUE;
368 
369 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
370 		uint64_t zplversion;
371 
372 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
373 			dmu_objset_rele(os, FTAG);
374 			return (B_TRUE);
375 		}
376 		/* XXX reading from non-owned objset */
377 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
378 			rc = zplversion < version;
379 		dmu_objset_rele(os, FTAG);
380 	}
381 	return (rc);
382 }
383 
384 static void
385 zfs_log_history(zfs_cmd_t *zc)
386 {
387 	spa_t *spa;
388 	char *buf;
389 
390 	if ((buf = history_str_get(zc)) == NULL)
391 		return;
392 
393 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
394 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
395 			(void) spa_history_log(spa, buf);
396 		spa_close(spa, FTAG);
397 	}
398 	history_str_free(buf);
399 }
400 
401 /*
402  * Policy for top-level read operations (list pools).  Requires no privileges,
403  * and can be used in the local zone, as there is no associated dataset.
404  */
405 /* ARGSUSED */
406 static int
407 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
408 {
409 	return (0);
410 }
411 
412 /*
413  * Policy for dataset read operations (list children, get statistics).  Requires
414  * no privileges, but must be visible in the local zone.
415  */
416 /* ARGSUSED */
417 static int
418 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
419 {
420 	if (INGLOBALZONE(curproc) ||
421 	    zone_dataset_visible(zc->zc_name, NULL))
422 		return (0);
423 
424 	return (SET_ERROR(ENOENT));
425 }
426 
427 static int
428 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
429 {
430 	int writable = 1;
431 
432 	/*
433 	 * The dataset must be visible by this zone -- check this first
434 	 * so they don't see EPERM on something they shouldn't know about.
435 	 */
436 	if (!INGLOBALZONE(curproc) &&
437 	    !zone_dataset_visible(dataset, &writable))
438 		return (SET_ERROR(ENOENT));
439 
440 	if (INGLOBALZONE(curproc)) {
441 		/*
442 		 * If the fs is zoned, only root can access it from the
443 		 * global zone.
444 		 */
445 		if (secpolicy_zfs(cr) && zoned)
446 			return (SET_ERROR(EPERM));
447 	} else {
448 		/*
449 		 * If we are in a local zone, the 'zoned' property must be set.
450 		 */
451 		if (!zoned)
452 			return (SET_ERROR(EPERM));
453 
454 		/* must be writable by this zone */
455 		if (!writable)
456 			return (SET_ERROR(EPERM));
457 	}
458 	return (0);
459 }
460 
461 static int
462 zfs_dozonecheck(const char *dataset, cred_t *cr)
463 {
464 	uint64_t zoned;
465 
466 	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
467 		return (SET_ERROR(ENOENT));
468 
469 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
470 }
471 
472 static int
473 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
474 {
475 	uint64_t zoned;
476 
477 	if (dsl_prop_get_int_ds(ds, "zoned", &zoned))
478 		return (SET_ERROR(ENOENT));
479 
480 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
481 }
482 
483 static int
484 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
485     const char *perm, cred_t *cr)
486 {
487 	int error;
488 
489 	error = zfs_dozonecheck_ds(name, ds, cr);
490 	if (error == 0) {
491 		error = secpolicy_zfs(cr);
492 		if (error != 0)
493 			error = dsl_deleg_access_impl(ds, perm, cr);
494 	}
495 	return (error);
496 }
497 
498 static int
499 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
500 {
501 	int error;
502 	dsl_dataset_t *ds;
503 	dsl_pool_t *dp;
504 
505 	/*
506 	 * First do a quick check for root in the global zone, which
507 	 * is allowed to do all write_perms.  This ensures that zfs_ioc_*
508 	 * will get to handle nonexistent datasets.
509 	 */
510 	if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
511 		return (0);
512 
513 	error = dsl_pool_hold(name, FTAG, &dp);
514 	if (error != 0)
515 		return (error);
516 
517 	error = dsl_dataset_hold(dp, name, FTAG, &ds);
518 	if (error != 0) {
519 		dsl_pool_rele(dp, FTAG);
520 		return (error);
521 	}
522 
523 	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
524 
525 	dsl_dataset_rele(ds, FTAG);
526 	dsl_pool_rele(dp, FTAG);
527 	return (error);
528 }
529 
530 /*
531  * Policy for setting the security label property.
532  *
533  * Returns 0 for success, non-zero for access and other errors.
534  */
535 static int
536 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
537 {
538 	char		ds_hexsl[MAXNAMELEN];
539 	bslabel_t	ds_sl, new_sl;
540 	boolean_t	new_default = FALSE;
541 	uint64_t	zoned;
542 	int		needed_priv = -1;
543 	int		error;
544 
545 	/* First get the existing dataset label. */
546 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
547 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
548 	if (error != 0)
549 		return (SET_ERROR(EPERM));
550 
551 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
552 		new_default = TRUE;
553 
554 	/* The label must be translatable */
555 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
556 		return (SET_ERROR(EINVAL));
557 
558 	/*
559 	 * In a non-global zone, disallow attempts to set a label that
560 	 * doesn't match that of the zone; otherwise no other checks
561 	 * are needed.
562 	 */
563 	if (!INGLOBALZONE(curproc)) {
564 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
565 			return (SET_ERROR(EPERM));
566 		return (0);
567 	}
568 
569 	/*
570 	 * For global-zone datasets (i.e., those whose zoned property is
571 	 * "off", verify that the specified new label is valid for the
572 	 * global zone.
573 	 */
574 	if (dsl_prop_get_integer(name,
575 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
576 		return (SET_ERROR(EPERM));
577 	if (!zoned) {
578 		if (zfs_check_global_label(name, strval) != 0)
579 			return (SET_ERROR(EPERM));
580 	}
581 
582 	/*
583 	 * If the existing dataset label is nondefault, check if the
584 	 * dataset is mounted (label cannot be changed while mounted).
585 	 * Get the zfsvfs; if there isn't one, then the dataset isn't
586 	 * mounted (or isn't a dataset, doesn't exist, ...).
587 	 */
588 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
589 		objset_t *os;
590 		static char *setsl_tag = "setsl_tag";
591 
592 		/*
593 		 * Try to own the dataset; abort if there is any error,
594 		 * (e.g., already mounted, in use, or other error).
595 		 */
596 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
597 		    setsl_tag, &os);
598 		if (error != 0)
599 			return (SET_ERROR(EPERM));
600 
601 		dmu_objset_disown(os, setsl_tag);
602 
603 		if (new_default) {
604 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
605 			goto out_check;
606 		}
607 
608 		if (hexstr_to_label(strval, &new_sl) != 0)
609 			return (SET_ERROR(EPERM));
610 
611 		if (blstrictdom(&ds_sl, &new_sl))
612 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
613 		else if (blstrictdom(&new_sl, &ds_sl))
614 			needed_priv = PRIV_FILE_UPGRADE_SL;
615 	} else {
616 		/* dataset currently has a default label */
617 		if (!new_default)
618 			needed_priv = PRIV_FILE_UPGRADE_SL;
619 	}
620 
621 out_check:
622 	if (needed_priv != -1)
623 		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
624 	return (0);
625 }
626 
627 static int
628 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
629     cred_t *cr)
630 {
631 	char *strval;
632 
633 	/*
634 	 * Check permissions for special properties.
635 	 */
636 	switch (prop) {
637 	case ZFS_PROP_ZONED:
638 		/*
639 		 * Disallow setting of 'zoned' from within a local zone.
640 		 */
641 		if (!INGLOBALZONE(curproc))
642 			return (SET_ERROR(EPERM));
643 		break;
644 
645 	case ZFS_PROP_QUOTA:
646 	case ZFS_PROP_FILESYSTEM_LIMIT:
647 	case ZFS_PROP_SNAPSHOT_LIMIT:
648 		if (!INGLOBALZONE(curproc)) {
649 			uint64_t zoned;
650 			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
651 			/*
652 			 * Unprivileged users are allowed to modify the
653 			 * limit on things *under* (ie. contained by)
654 			 * the thing they own.
655 			 */
656 			if (dsl_prop_get_integer(dsname, "zoned", &zoned,
657 			    setpoint))
658 				return (SET_ERROR(EPERM));
659 			if (!zoned || strlen(dsname) <= strlen(setpoint))
660 				return (SET_ERROR(EPERM));
661 		}
662 		break;
663 
664 	case ZFS_PROP_MLSLABEL:
665 		if (!is_system_labeled())
666 			return (SET_ERROR(EPERM));
667 
668 		if (nvpair_value_string(propval, &strval) == 0) {
669 			int err;
670 
671 			err = zfs_set_slabel_policy(dsname, strval, CRED());
672 			if (err != 0)
673 				return (err);
674 		}
675 		break;
676 	}
677 
678 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
679 }
680 
681 /* ARGSUSED */
682 static int
683 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
684 {
685 	int error;
686 
687 	error = zfs_dozonecheck(zc->zc_name, cr);
688 	if (error != 0)
689 		return (error);
690 
691 	/*
692 	 * permission to set permissions will be evaluated later in
693 	 * dsl_deleg_can_allow()
694 	 */
695 	return (0);
696 }
697 
698 /* ARGSUSED */
699 static int
700 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
701 {
702 	return (zfs_secpolicy_write_perms(zc->zc_name,
703 	    ZFS_DELEG_PERM_ROLLBACK, cr));
704 }
705 
706 /* ARGSUSED */
707 static int
708 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
709 {
710 	dsl_pool_t *dp;
711 	dsl_dataset_t *ds;
712 	char *cp;
713 	int error;
714 
715 	/*
716 	 * Generate the current snapshot name from the given objsetid, then
717 	 * use that name for the secpolicy/zone checks.
718 	 */
719 	cp = strchr(zc->zc_name, '@');
720 	if (cp == NULL)
721 		return (SET_ERROR(EINVAL));
722 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
723 	if (error != 0)
724 		return (error);
725 
726 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
727 	if (error != 0) {
728 		dsl_pool_rele(dp, FTAG);
729 		return (error);
730 	}
731 
732 	dsl_dataset_name(ds, zc->zc_name);
733 
734 	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
735 	    ZFS_DELEG_PERM_SEND, cr);
736 	dsl_dataset_rele(ds, FTAG);
737 	dsl_pool_rele(dp, FTAG);
738 
739 	return (error);
740 }
741 
742 /* ARGSUSED */
743 static int
744 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
745 {
746 	return (zfs_secpolicy_write_perms(zc->zc_name,
747 	    ZFS_DELEG_PERM_SEND, cr));
748 }
749 
750 /* ARGSUSED */
751 static int
752 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
753 {
754 	vnode_t *vp;
755 	int error;
756 
757 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
758 	    NO_FOLLOW, NULL, &vp)) != 0)
759 		return (error);
760 
761 	/* Now make sure mntpnt and dataset are ZFS */
762 
763 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
764 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
765 	    zc->zc_name) != 0)) {
766 		VN_RELE(vp);
767 		return (SET_ERROR(EPERM));
768 	}
769 
770 	VN_RELE(vp);
771 	return (dsl_deleg_access(zc->zc_name,
772 	    ZFS_DELEG_PERM_SHARE, cr));
773 }
774 
775 int
776 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
777 {
778 	if (!INGLOBALZONE(curproc))
779 		return (SET_ERROR(EPERM));
780 
781 	if (secpolicy_nfs(cr) == 0) {
782 		return (0);
783 	} else {
784 		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
785 	}
786 }
787 
788 int
789 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
790 {
791 	if (!INGLOBALZONE(curproc))
792 		return (SET_ERROR(EPERM));
793 
794 	if (secpolicy_smb(cr) == 0) {
795 		return (0);
796 	} else {
797 		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
798 	}
799 }
800 
801 static int
802 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
803 {
804 	char *cp;
805 
806 	/*
807 	 * Remove the @bla or /bla from the end of the name to get the parent.
808 	 */
809 	(void) strncpy(parent, datasetname, parentsize);
810 	cp = strrchr(parent, '@');
811 	if (cp != NULL) {
812 		cp[0] = '\0';
813 	} else {
814 		cp = strrchr(parent, '/');
815 		if (cp == NULL)
816 			return (SET_ERROR(ENOENT));
817 		cp[0] = '\0';
818 	}
819 
820 	return (0);
821 }
822 
823 int
824 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
825 {
826 	int error;
827 
828 	if ((error = zfs_secpolicy_write_perms(name,
829 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
830 		return (error);
831 
832 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
833 }
834 
835 /* ARGSUSED */
836 static int
837 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
838 {
839 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
840 }
841 
842 /*
843  * Destroying snapshots with delegated permissions requires
844  * descendant mount and destroy permissions.
845  */
846 /* ARGSUSED */
847 static int
848 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
849 {
850 	nvlist_t *snaps;
851 	nvpair_t *pair, *nextpair;
852 	int error = 0;
853 
854 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
855 		return (SET_ERROR(EINVAL));
856 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
857 	    pair = nextpair) {
858 		nextpair = nvlist_next_nvpair(snaps, pair);
859 		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
860 		if (error == ENOENT) {
861 			/*
862 			 * Ignore any snapshots that don't exist (we consider
863 			 * them "already destroyed").  Remove the name from the
864 			 * nvl here in case the snapshot is created between
865 			 * now and when we try to destroy it (in which case
866 			 * we don't want to destroy it since we haven't
867 			 * checked for permission).
868 			 */
869 			fnvlist_remove_nvpair(snaps, pair);
870 			error = 0;
871 		}
872 		if (error != 0)
873 			break;
874 	}
875 
876 	return (error);
877 }
878 
879 int
880 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
881 {
882 	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
883 	int	error;
884 
885 	if ((error = zfs_secpolicy_write_perms(from,
886 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
887 		return (error);
888 
889 	if ((error = zfs_secpolicy_write_perms(from,
890 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
891 		return (error);
892 
893 	if ((error = zfs_get_parent(to, parentname,
894 	    sizeof (parentname))) != 0)
895 		return (error);
896 
897 	if ((error = zfs_secpolicy_write_perms(parentname,
898 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
899 		return (error);
900 
901 	if ((error = zfs_secpolicy_write_perms(parentname,
902 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
903 		return (error);
904 
905 	return (error);
906 }
907 
908 /* ARGSUSED */
909 static int
910 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
911 {
912 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
913 }
914 
915 /* ARGSUSED */
916 static int
917 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
918 {
919 	dsl_pool_t *dp;
920 	dsl_dataset_t *clone;
921 	int error;
922 
923 	error = zfs_secpolicy_write_perms(zc->zc_name,
924 	    ZFS_DELEG_PERM_PROMOTE, cr);
925 	if (error != 0)
926 		return (error);
927 
928 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
929 	if (error != 0)
930 		return (error);
931 
932 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
933 
934 	if (error == 0) {
935 		char parentname[ZFS_MAX_DATASET_NAME_LEN];
936 		dsl_dataset_t *origin = NULL;
937 		dsl_dir_t *dd;
938 		dd = clone->ds_dir;
939 
940 		error = dsl_dataset_hold_obj(dd->dd_pool,
941 		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
942 		if (error != 0) {
943 			dsl_dataset_rele(clone, FTAG);
944 			dsl_pool_rele(dp, FTAG);
945 			return (error);
946 		}
947 
948 		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
949 		    ZFS_DELEG_PERM_MOUNT, cr);
950 
951 		dsl_dataset_name(origin, parentname);
952 		if (error == 0) {
953 			error = zfs_secpolicy_write_perms_ds(parentname, origin,
954 			    ZFS_DELEG_PERM_PROMOTE, cr);
955 		}
956 		dsl_dataset_rele(clone, FTAG);
957 		dsl_dataset_rele(origin, FTAG);
958 	}
959 	dsl_pool_rele(dp, FTAG);
960 	return (error);
961 }
962 
963 /* ARGSUSED */
964 static int
965 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
966 {
967 	int error;
968 
969 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
970 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
971 		return (error);
972 
973 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
974 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
975 		return (error);
976 
977 	return (zfs_secpolicy_write_perms(zc->zc_name,
978 	    ZFS_DELEG_PERM_CREATE, cr));
979 }
980 
981 int
982 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
983 {
984 	return (zfs_secpolicy_write_perms(name,
985 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
986 }
987 
988 /*
989  * Check for permission to create each snapshot in the nvlist.
990  */
991 /* ARGSUSED */
992 static int
993 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
994 {
995 	nvlist_t *snaps;
996 	int error = 0;
997 	nvpair_t *pair;
998 
999 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
1000 		return (SET_ERROR(EINVAL));
1001 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1002 	    pair = nvlist_next_nvpair(snaps, pair)) {
1003 		char *name = nvpair_name(pair);
1004 		char *atp = strchr(name, '@');
1005 
1006 		if (atp == NULL) {
1007 			error = SET_ERROR(EINVAL);
1008 			break;
1009 		}
1010 		*atp = '\0';
1011 		error = zfs_secpolicy_snapshot_perms(name, cr);
1012 		*atp = '@';
1013 		if (error != 0)
1014 			break;
1015 	}
1016 	return (error);
1017 }
1018 
1019 /*
1020  * Check for permission to create each snapshot in the nvlist.
1021  */
1022 /* ARGSUSED */
1023 static int
1024 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1025 {
1026 	int error = 0;
1027 
1028 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1029 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1030 		char *name = nvpair_name(pair);
1031 		char *hashp = strchr(name, '#');
1032 
1033 		if (hashp == NULL) {
1034 			error = SET_ERROR(EINVAL);
1035 			break;
1036 		}
1037 		*hashp = '\0';
1038 		error = zfs_secpolicy_write_perms(name,
1039 		    ZFS_DELEG_PERM_BOOKMARK, cr);
1040 		*hashp = '#';
1041 		if (error != 0)
1042 			break;
1043 	}
1044 	return (error);
1045 }
1046 
1047 /* ARGSUSED */
1048 static int
1049 zfs_secpolicy_remap(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1050 {
1051 	return (zfs_secpolicy_write_perms(zc->zc_name,
1052 	    ZFS_DELEG_PERM_REMAP, cr));
1053 }
1054 
1055 /* ARGSUSED */
1056 static int
1057 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1058 {
1059 	nvpair_t *pair, *nextpair;
1060 	int error = 0;
1061 
1062 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1063 	    pair = nextpair) {
1064 		char *name = nvpair_name(pair);
1065 		char *hashp = strchr(name, '#');
1066 		nextpair = nvlist_next_nvpair(innvl, pair);
1067 
1068 		if (hashp == NULL) {
1069 			error = SET_ERROR(EINVAL);
1070 			break;
1071 		}
1072 
1073 		*hashp = '\0';
1074 		error = zfs_secpolicy_write_perms(name,
1075 		    ZFS_DELEG_PERM_DESTROY, cr);
1076 		*hashp = '#';
1077 		if (error == ENOENT) {
1078 			/*
1079 			 * Ignore any filesystems that don't exist (we consider
1080 			 * their bookmarks "already destroyed").  Remove
1081 			 * the name from the nvl here in case the filesystem
1082 			 * is created between now and when we try to destroy
1083 			 * the bookmark (in which case we don't want to
1084 			 * destroy it since we haven't checked for permission).
1085 			 */
1086 			fnvlist_remove_nvpair(innvl, pair);
1087 			error = 0;
1088 		}
1089 		if (error != 0)
1090 			break;
1091 	}
1092 
1093 	return (error);
1094 }
1095 
1096 /* ARGSUSED */
1097 static int
1098 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1099 {
1100 	/*
1101 	 * Even root must have a proper TSD so that we know what pool
1102 	 * to log to.
1103 	 */
1104 	if (tsd_get(zfs_allow_log_key) == NULL)
1105 		return (SET_ERROR(EPERM));
1106 	return (0);
1107 }
1108 
1109 static int
1110 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1111 {
1112 	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
1113 	int	error;
1114 	char	*origin;
1115 
1116 	if ((error = zfs_get_parent(zc->zc_name, parentname,
1117 	    sizeof (parentname))) != 0)
1118 		return (error);
1119 
1120 	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1121 	    (error = zfs_secpolicy_write_perms(origin,
1122 	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1123 		return (error);
1124 
1125 	if ((error = zfs_secpolicy_write_perms(parentname,
1126 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1127 		return (error);
1128 
1129 	return (zfs_secpolicy_write_perms(parentname,
1130 	    ZFS_DELEG_PERM_MOUNT, cr));
1131 }
1132 
1133 /*
1134  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1135  * SYS_CONFIG privilege, which is not available in a local zone.
1136  */
1137 /* ARGSUSED */
1138 static int
1139 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1140 {
1141 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
1142 		return (SET_ERROR(EPERM));
1143 
1144 	return (0);
1145 }
1146 
1147 /*
1148  * Policy for object to name lookups.
1149  */
1150 /* ARGSUSED */
1151 static int
1152 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1153 {
1154 	int error;
1155 
1156 	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1157 		return (0);
1158 
1159 	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1160 	return (error);
1161 }
1162 
1163 /*
1164  * Policy for fault injection.  Requires all privileges.
1165  */
1166 /* ARGSUSED */
1167 static int
1168 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1169 {
1170 	return (secpolicy_zinject(cr));
1171 }
1172 
1173 /* ARGSUSED */
1174 static int
1175 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1176 {
1177 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1178 
1179 	if (prop == ZPROP_INVAL) {
1180 		if (!zfs_prop_user(zc->zc_value))
1181 			return (SET_ERROR(EINVAL));
1182 		return (zfs_secpolicy_write_perms(zc->zc_name,
1183 		    ZFS_DELEG_PERM_USERPROP, cr));
1184 	} else {
1185 		return (zfs_secpolicy_setprop(zc->zc_name, prop,
1186 		    NULL, cr));
1187 	}
1188 }
1189 
1190 static int
1191 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1192 {
1193 	int err = zfs_secpolicy_read(zc, innvl, cr);
1194 	if (err)
1195 		return (err);
1196 
1197 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1198 		return (SET_ERROR(EINVAL));
1199 
1200 	if (zc->zc_value[0] == 0) {
1201 		/*
1202 		 * They are asking about a posix uid/gid.  If it's
1203 		 * themself, allow it.
1204 		 */
1205 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1206 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
1207 			if (zc->zc_guid == crgetuid(cr))
1208 				return (0);
1209 		} else {
1210 			if (groupmember(zc->zc_guid, cr))
1211 				return (0);
1212 		}
1213 	}
1214 
1215 	return (zfs_secpolicy_write_perms(zc->zc_name,
1216 	    userquota_perms[zc->zc_objset_type], cr));
1217 }
1218 
1219 static int
1220 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1221 {
1222 	int err = zfs_secpolicy_read(zc, innvl, cr);
1223 	if (err)
1224 		return (err);
1225 
1226 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1227 		return (SET_ERROR(EINVAL));
1228 
1229 	return (zfs_secpolicy_write_perms(zc->zc_name,
1230 	    userquota_perms[zc->zc_objset_type], cr));
1231 }
1232 
1233 /* ARGSUSED */
1234 static int
1235 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1236 {
1237 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1238 	    NULL, cr));
1239 }
1240 
1241 /* ARGSUSED */
1242 static int
1243 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1244 {
1245 	nvpair_t *pair;
1246 	nvlist_t *holds;
1247 	int error;
1248 
1249 	error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1250 	if (error != 0)
1251 		return (SET_ERROR(EINVAL));
1252 
1253 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1254 	    pair = nvlist_next_nvpair(holds, pair)) {
1255 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1256 		error = dmu_fsname(nvpair_name(pair), fsname);
1257 		if (error != 0)
1258 			return (error);
1259 		error = zfs_secpolicy_write_perms(fsname,
1260 		    ZFS_DELEG_PERM_HOLD, cr);
1261 		if (error != 0)
1262 			return (error);
1263 	}
1264 	return (0);
1265 }
1266 
1267 /* ARGSUSED */
1268 static int
1269 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1270 {
1271 	nvpair_t *pair;
1272 	int error;
1273 
1274 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1275 	    pair = nvlist_next_nvpair(innvl, pair)) {
1276 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1277 		error = dmu_fsname(nvpair_name(pair), fsname);
1278 		if (error != 0)
1279 			return (error);
1280 		error = zfs_secpolicy_write_perms(fsname,
1281 		    ZFS_DELEG_PERM_RELEASE, cr);
1282 		if (error != 0)
1283 			return (error);
1284 	}
1285 	return (0);
1286 }
1287 
1288 /*
1289  * Policy for allowing temporary snapshots to be taken or released
1290  */
1291 static int
1292 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1293 {
1294 	/*
1295 	 * A temporary snapshot is the same as a snapshot,
1296 	 * hold, destroy and release all rolled into one.
1297 	 * Delegated diff alone is sufficient that we allow this.
1298 	 */
1299 	int error;
1300 
1301 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1302 	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
1303 		return (0);
1304 
1305 	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1306 	if (error == 0)
1307 		error = zfs_secpolicy_hold(zc, innvl, cr);
1308 	if (error == 0)
1309 		error = zfs_secpolicy_release(zc, innvl, cr);
1310 	if (error == 0)
1311 		error = zfs_secpolicy_destroy(zc, innvl, cr);
1312 	return (error);
1313 }
1314 
1315 /*
1316  * Returns the nvlist as specified by the user in the zfs_cmd_t.
1317  */
1318 static int
1319 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1320 {
1321 	char *packed;
1322 	int error;
1323 	nvlist_t *list = NULL;
1324 
1325 	/*
1326 	 * Read in and unpack the user-supplied nvlist.
1327 	 */
1328 	if (size == 0)
1329 		return (SET_ERROR(EINVAL));
1330 
1331 	packed = kmem_alloc(size, KM_SLEEP);
1332 
1333 	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1334 	    iflag)) != 0) {
1335 		kmem_free(packed, size);
1336 		return (SET_ERROR(EFAULT));
1337 	}
1338 
1339 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1340 		kmem_free(packed, size);
1341 		return (error);
1342 	}
1343 
1344 	kmem_free(packed, size);
1345 
1346 	*nvp = list;
1347 	return (0);
1348 }
1349 
1350 /*
1351  * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1352  * Entries will be removed from the end of the nvlist, and one int32 entry
1353  * named "N_MORE_ERRORS" will be added indicating how many entries were
1354  * removed.
1355  */
1356 static int
1357 nvlist_smush(nvlist_t *errors, size_t max)
1358 {
1359 	size_t size;
1360 
1361 	size = fnvlist_size(errors);
1362 
1363 	if (size > max) {
1364 		nvpair_t *more_errors;
1365 		int n = 0;
1366 
1367 		if (max < 1024)
1368 			return (SET_ERROR(ENOMEM));
1369 
1370 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1371 		more_errors = nvlist_prev_nvpair(errors, NULL);
1372 
1373 		do {
1374 			nvpair_t *pair = nvlist_prev_nvpair(errors,
1375 			    more_errors);
1376 			fnvlist_remove_nvpair(errors, pair);
1377 			n++;
1378 			size = fnvlist_size(errors);
1379 		} while (size > max);
1380 
1381 		fnvlist_remove_nvpair(errors, more_errors);
1382 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1383 		ASSERT3U(fnvlist_size(errors), <=, max);
1384 	}
1385 
1386 	return (0);
1387 }
1388 
1389 static int
1390 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1391 {
1392 	char *packed = NULL;
1393 	int error = 0;
1394 	size_t size;
1395 
1396 	size = fnvlist_size(nvl);
1397 
1398 	if (size > zc->zc_nvlist_dst_size) {
1399 		error = SET_ERROR(ENOMEM);
1400 	} else {
1401 		packed = fnvlist_pack(nvl, &size);
1402 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1403 		    size, zc->zc_iflags) != 0)
1404 			error = SET_ERROR(EFAULT);
1405 		fnvlist_pack_free(packed, size);
1406 	}
1407 
1408 	zc->zc_nvlist_dst_size = size;
1409 	zc->zc_nvlist_dst_filled = B_TRUE;
1410 	return (error);
1411 }
1412 
1413 int
1414 getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
1415 {
1416 	int error = 0;
1417 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1418 		return (SET_ERROR(EINVAL));
1419 	}
1420 
1421 	mutex_enter(&os->os_user_ptr_lock);
1422 	*zfvp = dmu_objset_get_user(os);
1423 	if (*zfvp) {
1424 		VFS_HOLD((*zfvp)->z_vfs);
1425 	} else {
1426 		error = SET_ERROR(ESRCH);
1427 	}
1428 	mutex_exit(&os->os_user_ptr_lock);
1429 	return (error);
1430 }
1431 
1432 int
1433 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1434 {
1435 	objset_t *os;
1436 	int error;
1437 
1438 	error = dmu_objset_hold(dsname, FTAG, &os);
1439 	if (error != 0)
1440 		return (error);
1441 
1442 	error = getzfsvfs_impl(os, zfvp);
1443 	dmu_objset_rele(os, FTAG);
1444 	return (error);
1445 }
1446 
1447 /*
1448  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1449  * case its z_vfs will be NULL, and it will be opened as the owner.
1450  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1451  * which prevents all vnode ops from running.
1452  */
1453 static int
1454 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1455 {
1456 	int error = 0;
1457 
1458 	if (getzfsvfs(name, zfvp) != 0)
1459 		error = zfsvfs_create(name, zfvp);
1460 	if (error == 0) {
1461 		rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1462 		    RW_READER, tag);
1463 		if ((*zfvp)->z_unmounted) {
1464 			/*
1465 			 * XXX we could probably try again, since the unmounting
1466 			 * thread should be just about to disassociate the
1467 			 * objset from the zfsvfs.
1468 			 */
1469 			rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1470 			return (SET_ERROR(EBUSY));
1471 		}
1472 	}
1473 	return (error);
1474 }
1475 
1476 static void
1477 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1478 {
1479 	rrm_exit(&zfsvfs->z_teardown_lock, tag);
1480 
1481 	if (zfsvfs->z_vfs) {
1482 		VFS_RELE(zfsvfs->z_vfs);
1483 	} else {
1484 		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1485 		zfsvfs_free(zfsvfs);
1486 	}
1487 }
1488 
1489 static int
1490 zfs_ioc_pool_create(zfs_cmd_t *zc)
1491 {
1492 	int error;
1493 	nvlist_t *config, *props = NULL;
1494 	nvlist_t *rootprops = NULL;
1495 	nvlist_t *zplprops = NULL;
1496 	char *spa_name = zc->zc_name;
1497 
1498 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1499 	    zc->zc_iflags, &config))
1500 		return (error);
1501 
1502 	if (zc->zc_nvlist_src_size != 0 && (error =
1503 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1504 	    zc->zc_iflags, &props))) {
1505 		nvlist_free(config);
1506 		return (error);
1507 	}
1508 
1509 	if (props) {
1510 		nvlist_t *nvl = NULL;
1511 		uint64_t version = SPA_VERSION;
1512 		char *tname;
1513 
1514 		(void) nvlist_lookup_uint64(props,
1515 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1516 		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1517 			error = SET_ERROR(EINVAL);
1518 			goto pool_props_bad;
1519 		}
1520 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1521 		if (nvl) {
1522 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1523 			if (error != 0) {
1524 				nvlist_free(config);
1525 				nvlist_free(props);
1526 				return (error);
1527 			}
1528 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1529 		}
1530 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1531 		error = zfs_fill_zplprops_root(version, rootprops,
1532 		    zplprops, NULL);
1533 		if (error != 0)
1534 			goto pool_props_bad;
1535 
1536 		if (nvlist_lookup_string(props,
1537 		    zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
1538 			spa_name = tname;
1539 	}
1540 
1541 	error = spa_create(zc->zc_name, config, props, zplprops);
1542 
1543 	/*
1544 	 * Set the remaining root properties
1545 	 */
1546 	if (!error && (error = zfs_set_prop_nvlist(spa_name,
1547 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1548 		(void) spa_destroy(spa_name);
1549 
1550 pool_props_bad:
1551 	nvlist_free(rootprops);
1552 	nvlist_free(zplprops);
1553 	nvlist_free(config);
1554 	nvlist_free(props);
1555 
1556 	return (error);
1557 }
1558 
1559 static int
1560 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1561 {
1562 	int error;
1563 	zfs_log_history(zc);
1564 	error = spa_destroy(zc->zc_name);
1565 	if (error == 0)
1566 		zvol_remove_minors(zc->zc_name);
1567 	return (error);
1568 }
1569 
1570 static int
1571 zfs_ioc_pool_import(zfs_cmd_t *zc)
1572 {
1573 	nvlist_t *config, *props = NULL;
1574 	uint64_t guid;
1575 	int error;
1576 
1577 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1578 	    zc->zc_iflags, &config)) != 0)
1579 		return (error);
1580 
1581 	if (zc->zc_nvlist_src_size != 0 && (error =
1582 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1583 	    zc->zc_iflags, &props))) {
1584 		nvlist_free(config);
1585 		return (error);
1586 	}
1587 
1588 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1589 	    guid != zc->zc_guid)
1590 		error = SET_ERROR(EINVAL);
1591 	else
1592 		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1593 
1594 	if (zc->zc_nvlist_dst != 0) {
1595 		int err;
1596 
1597 		if ((err = put_nvlist(zc, config)) != 0)
1598 			error = err;
1599 	}
1600 
1601 	nvlist_free(config);
1602 
1603 	nvlist_free(props);
1604 
1605 	return (error);
1606 }
1607 
1608 static int
1609 zfs_ioc_pool_export(zfs_cmd_t *zc)
1610 {
1611 	int error;
1612 	boolean_t force = (boolean_t)zc->zc_cookie;
1613 	boolean_t hardforce = (boolean_t)zc->zc_guid;
1614 
1615 	zfs_log_history(zc);
1616 	error = spa_export(zc->zc_name, NULL, force, hardforce);
1617 	if (error == 0)
1618 		zvol_remove_minors(zc->zc_name);
1619 	return (error);
1620 }
1621 
1622 static int
1623 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1624 {
1625 	nvlist_t *configs;
1626 	int error;
1627 
1628 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1629 		return (SET_ERROR(EEXIST));
1630 
1631 	error = put_nvlist(zc, configs);
1632 
1633 	nvlist_free(configs);
1634 
1635 	return (error);
1636 }
1637 
1638 /*
1639  * inputs:
1640  * zc_name		name of the pool
1641  *
1642  * outputs:
1643  * zc_cookie		real errno
1644  * zc_nvlist_dst	config nvlist
1645  * zc_nvlist_dst_size	size of config nvlist
1646  */
1647 static int
1648 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1649 {
1650 	nvlist_t *config;
1651 	int error;
1652 	int ret = 0;
1653 
1654 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1655 	    sizeof (zc->zc_value));
1656 
1657 	if (config != NULL) {
1658 		ret = put_nvlist(zc, config);
1659 		nvlist_free(config);
1660 
1661 		/*
1662 		 * The config may be present even if 'error' is non-zero.
1663 		 * In this case we return success, and preserve the real errno
1664 		 * in 'zc_cookie'.
1665 		 */
1666 		zc->zc_cookie = error;
1667 	} else {
1668 		ret = error;
1669 	}
1670 
1671 	return (ret);
1672 }
1673 
1674 /*
1675  * Try to import the given pool, returning pool stats as appropriate so that
1676  * user land knows which devices are available and overall pool health.
1677  */
1678 static int
1679 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1680 {
1681 	nvlist_t *tryconfig, *config;
1682 	int error;
1683 
1684 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1685 	    zc->zc_iflags, &tryconfig)) != 0)
1686 		return (error);
1687 
1688 	config = spa_tryimport(tryconfig);
1689 
1690 	nvlist_free(tryconfig);
1691 
1692 	if (config == NULL)
1693 		return (SET_ERROR(EINVAL));
1694 
1695 	error = put_nvlist(zc, config);
1696 	nvlist_free(config);
1697 
1698 	return (error);
1699 }
1700 
1701 /*
1702  * inputs:
1703  * zc_name              name of the pool
1704  * zc_cookie            scan func (pool_scan_func_t)
1705  * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
1706  */
1707 static int
1708 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1709 {
1710 	spa_t *spa;
1711 	int error;
1712 
1713 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1714 		return (error);
1715 
1716 	if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1717 		return (SET_ERROR(EINVAL));
1718 
1719 	if (zc->zc_flags == POOL_SCRUB_PAUSE)
1720 		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1721 	else if (zc->zc_cookie == POOL_SCAN_NONE)
1722 		error = spa_scan_stop(spa);
1723 	else
1724 		error = spa_scan(spa, zc->zc_cookie);
1725 
1726 	spa_close(spa, FTAG);
1727 
1728 	return (error);
1729 }
1730 
1731 static int
1732 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1733 {
1734 	spa_t *spa;
1735 	int error;
1736 
1737 	error = spa_open(zc->zc_name, &spa, FTAG);
1738 	if (error == 0) {
1739 		spa_freeze(spa);
1740 		spa_close(spa, FTAG);
1741 	}
1742 	return (error);
1743 }
1744 
1745 static int
1746 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1747 {
1748 	spa_t *spa;
1749 	int error;
1750 
1751 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1752 		return (error);
1753 
1754 	if (zc->zc_cookie < spa_version(spa) ||
1755 	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1756 		spa_close(spa, FTAG);
1757 		return (SET_ERROR(EINVAL));
1758 	}
1759 
1760 	spa_upgrade(spa, zc->zc_cookie);
1761 	spa_close(spa, FTAG);
1762 
1763 	return (error);
1764 }
1765 
1766 static int
1767 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1768 {
1769 	spa_t *spa;
1770 	char *hist_buf;
1771 	uint64_t size;
1772 	int error;
1773 
1774 	if ((size = zc->zc_history_len) == 0)
1775 		return (SET_ERROR(EINVAL));
1776 
1777 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1778 		return (error);
1779 
1780 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1781 		spa_close(spa, FTAG);
1782 		return (SET_ERROR(ENOTSUP));
1783 	}
1784 
1785 	hist_buf = kmem_alloc(size, KM_SLEEP);
1786 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1787 	    &zc->zc_history_len, hist_buf)) == 0) {
1788 		error = ddi_copyout(hist_buf,
1789 		    (void *)(uintptr_t)zc->zc_history,
1790 		    zc->zc_history_len, zc->zc_iflags);
1791 	}
1792 
1793 	spa_close(spa, FTAG);
1794 	kmem_free(hist_buf, size);
1795 	return (error);
1796 }
1797 
1798 static int
1799 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1800 {
1801 	spa_t *spa;
1802 	int error;
1803 
1804 	error = spa_open(zc->zc_name, &spa, FTAG);
1805 	if (error == 0) {
1806 		error = spa_change_guid(spa);
1807 		spa_close(spa, FTAG);
1808 	}
1809 	return (error);
1810 }
1811 
1812 static int
1813 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1814 {
1815 	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1816 }
1817 
1818 /*
1819  * inputs:
1820  * zc_name		name of filesystem
1821  * zc_obj		object to find
1822  *
1823  * outputs:
1824  * zc_value		name of object
1825  */
1826 static int
1827 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1828 {
1829 	objset_t *os;
1830 	int error;
1831 
1832 	/* XXX reading from objset not owned */
1833 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1834 		return (error);
1835 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1836 		dmu_objset_rele(os, FTAG);
1837 		return (SET_ERROR(EINVAL));
1838 	}
1839 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1840 	    sizeof (zc->zc_value));
1841 	dmu_objset_rele(os, FTAG);
1842 
1843 	return (error);
1844 }
1845 
1846 /*
1847  * inputs:
1848  * zc_name		name of filesystem
1849  * zc_obj		object to find
1850  *
1851  * outputs:
1852  * zc_stat		stats on object
1853  * zc_value		path to object
1854  */
1855 static int
1856 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1857 {
1858 	objset_t *os;
1859 	int error;
1860 
1861 	/* XXX reading from objset not owned */
1862 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1863 		return (error);
1864 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1865 		dmu_objset_rele(os, FTAG);
1866 		return (SET_ERROR(EINVAL));
1867 	}
1868 	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1869 	    sizeof (zc->zc_value));
1870 	dmu_objset_rele(os, FTAG);
1871 
1872 	return (error);
1873 }
1874 
1875 static int
1876 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1877 {
1878 	spa_t *spa;
1879 	int error;
1880 	nvlist_t *config, **l2cache, **spares;
1881 	uint_t nl2cache = 0, nspares = 0;
1882 
1883 	error = spa_open(zc->zc_name, &spa, FTAG);
1884 	if (error != 0)
1885 		return (error);
1886 
1887 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1888 	    zc->zc_iflags, &config);
1889 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1890 	    &l2cache, &nl2cache);
1891 
1892 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1893 	    &spares, &nspares);
1894 
1895 	/*
1896 	 * A root pool with concatenated devices is not supported.
1897 	 * Thus, can not add a device to a root pool.
1898 	 *
1899 	 * Intent log device can not be added to a rootpool because
1900 	 * during mountroot, zil is replayed, a seperated log device
1901 	 * can not be accessed during the mountroot time.
1902 	 *
1903 	 * l2cache and spare devices are ok to be added to a rootpool.
1904 	 */
1905 	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1906 		nvlist_free(config);
1907 		spa_close(spa, FTAG);
1908 		return (SET_ERROR(EDOM));
1909 	}
1910 
1911 	if (error == 0) {
1912 		error = spa_vdev_add(spa, config);
1913 		nvlist_free(config);
1914 	}
1915 	spa_close(spa, FTAG);
1916 	return (error);
1917 }
1918 
1919 /*
1920  * inputs:
1921  * zc_name		name of the pool
1922  * zc_guid		guid of vdev to remove
1923  * zc_cookie		cancel removal
1924  */
1925 static int
1926 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1927 {
1928 	spa_t *spa;
1929 	int error;
1930 
1931 	error = spa_open(zc->zc_name, &spa, FTAG);
1932 	if (error != 0)
1933 		return (error);
1934 	if (zc->zc_cookie != 0) {
1935 		error = spa_vdev_remove_cancel(spa);
1936 	} else {
1937 		error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1938 	}
1939 	spa_close(spa, FTAG);
1940 	return (error);
1941 }
1942 
1943 static int
1944 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1945 {
1946 	spa_t *spa;
1947 	int error;
1948 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1949 
1950 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1951 		return (error);
1952 	switch (zc->zc_cookie) {
1953 	case VDEV_STATE_ONLINE:
1954 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1955 		break;
1956 
1957 	case VDEV_STATE_OFFLINE:
1958 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1959 		break;
1960 
1961 	case VDEV_STATE_FAULTED:
1962 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1963 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1964 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1965 
1966 		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1967 		break;
1968 
1969 	case VDEV_STATE_DEGRADED:
1970 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1971 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1972 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1973 
1974 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1975 		break;
1976 
1977 	default:
1978 		error = SET_ERROR(EINVAL);
1979 	}
1980 	zc->zc_cookie = newstate;
1981 	spa_close(spa, FTAG);
1982 	return (error);
1983 }
1984 
1985 static int
1986 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1987 {
1988 	spa_t *spa;
1989 	int replacing = zc->zc_cookie;
1990 	nvlist_t *config;
1991 	int error;
1992 
1993 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1994 		return (error);
1995 
1996 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1997 	    zc->zc_iflags, &config)) == 0) {
1998 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1999 		nvlist_free(config);
2000 	}
2001 
2002 	spa_close(spa, FTAG);
2003 	return (error);
2004 }
2005 
2006 static int
2007 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
2008 {
2009 	spa_t *spa;
2010 	int error;
2011 
2012 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2013 		return (error);
2014 
2015 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2016 
2017 	spa_close(spa, FTAG);
2018 	return (error);
2019 }
2020 
2021 static int
2022 zfs_ioc_vdev_split(zfs_cmd_t *zc)
2023 {
2024 	spa_t *spa;
2025 	nvlist_t *config, *props = NULL;
2026 	int error;
2027 	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2028 
2029 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2030 		return (error);
2031 
2032 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2033 	    zc->zc_iflags, &config)) {
2034 		spa_close(spa, FTAG);
2035 		return (error);
2036 	}
2037 
2038 	if (zc->zc_nvlist_src_size != 0 && (error =
2039 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2040 	    zc->zc_iflags, &props))) {
2041 		spa_close(spa, FTAG);
2042 		nvlist_free(config);
2043 		return (error);
2044 	}
2045 
2046 	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2047 
2048 	spa_close(spa, FTAG);
2049 
2050 	nvlist_free(config);
2051 	nvlist_free(props);
2052 
2053 	return (error);
2054 }
2055 
2056 static int
2057 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2058 {
2059 	spa_t *spa;
2060 	char *path = zc->zc_value;
2061 	uint64_t guid = zc->zc_guid;
2062 	int error;
2063 
2064 	error = spa_open(zc->zc_name, &spa, FTAG);
2065 	if (error != 0)
2066 		return (error);
2067 
2068 	error = spa_vdev_setpath(spa, guid, path);
2069 	spa_close(spa, FTAG);
2070 	return (error);
2071 }
2072 
2073 static int
2074 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2075 {
2076 	spa_t *spa;
2077 	char *fru = zc->zc_value;
2078 	uint64_t guid = zc->zc_guid;
2079 	int error;
2080 
2081 	error = spa_open(zc->zc_name, &spa, FTAG);
2082 	if (error != 0)
2083 		return (error);
2084 
2085 	error = spa_vdev_setfru(spa, guid, fru);
2086 	spa_close(spa, FTAG);
2087 	return (error);
2088 }
2089 
2090 static int
2091 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2092 {
2093 	int error = 0;
2094 	nvlist_t *nv;
2095 
2096 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2097 
2098 	if (zc->zc_nvlist_dst != 0 &&
2099 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
2100 		dmu_objset_stats(os, nv);
2101 		/*
2102 		 * NB: zvol_get_stats() will read the objset contents,
2103 		 * which we aren't supposed to do with a
2104 		 * DS_MODE_USER hold, because it could be
2105 		 * inconsistent.  So this is a bit of a workaround...
2106 		 * XXX reading with out owning
2107 		 */
2108 		if (!zc->zc_objset_stats.dds_inconsistent &&
2109 		    dmu_objset_type(os) == DMU_OST_ZVOL) {
2110 			error = zvol_get_stats(os, nv);
2111 			if (error == EIO)
2112 				return (error);
2113 			VERIFY0(error);
2114 		}
2115 		error = put_nvlist(zc, nv);
2116 		nvlist_free(nv);
2117 	}
2118 
2119 	return (error);
2120 }
2121 
2122 /*
2123  * inputs:
2124  * zc_name		name of filesystem
2125  * zc_nvlist_dst_size	size of buffer for property nvlist
2126  *
2127  * outputs:
2128  * zc_objset_stats	stats
2129  * zc_nvlist_dst	property nvlist
2130  * zc_nvlist_dst_size	size of property nvlist
2131  */
2132 static int
2133 zfs_ioc_objset_stats(zfs_cmd_t *zc)
2134 {
2135 	objset_t *os;
2136 	int error;
2137 
2138 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2139 	if (error == 0) {
2140 		error = zfs_ioc_objset_stats_impl(zc, os);
2141 		dmu_objset_rele(os, FTAG);
2142 	}
2143 
2144 	return (error);
2145 }
2146 
2147 /*
2148  * inputs:
2149  * zc_name		name of filesystem
2150  * zc_nvlist_dst_size	size of buffer for property nvlist
2151  *
2152  * outputs:
2153  * zc_nvlist_dst	received property nvlist
2154  * zc_nvlist_dst_size	size of received property nvlist
2155  *
2156  * Gets received properties (distinct from local properties on or after
2157  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2158  * local property values.
2159  */
2160 static int
2161 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2162 {
2163 	int error = 0;
2164 	nvlist_t *nv;
2165 
2166 	/*
2167 	 * Without this check, we would return local property values if the
2168 	 * caller has not already received properties on or after
2169 	 * SPA_VERSION_RECVD_PROPS.
2170 	 */
2171 	if (!dsl_prop_get_hasrecvd(zc->zc_name))
2172 		return (SET_ERROR(ENOTSUP));
2173 
2174 	if (zc->zc_nvlist_dst != 0 &&
2175 	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2176 		error = put_nvlist(zc, nv);
2177 		nvlist_free(nv);
2178 	}
2179 
2180 	return (error);
2181 }
2182 
2183 static int
2184 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2185 {
2186 	uint64_t value;
2187 	int error;
2188 
2189 	/*
2190 	 * zfs_get_zplprop() will either find a value or give us
2191 	 * the default value (if there is one).
2192 	 */
2193 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2194 		return (error);
2195 	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2196 	return (0);
2197 }
2198 
2199 /*
2200  * inputs:
2201  * zc_name		name of filesystem
2202  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
2203  *
2204  * outputs:
2205  * zc_nvlist_dst	zpl property nvlist
2206  * zc_nvlist_dst_size	size of zpl property nvlist
2207  */
2208 static int
2209 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2210 {
2211 	objset_t *os;
2212 	int err;
2213 
2214 	/* XXX reading without owning */
2215 	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2216 		return (err);
2217 
2218 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2219 
2220 	/*
2221 	 * NB: nvl_add_zplprop() will read the objset contents,
2222 	 * which we aren't supposed to do with a DS_MODE_USER
2223 	 * hold, because it could be inconsistent.
2224 	 */
2225 	if (zc->zc_nvlist_dst != 0 &&
2226 	    !zc->zc_objset_stats.dds_inconsistent &&
2227 	    dmu_objset_type(os) == DMU_OST_ZFS) {
2228 		nvlist_t *nv;
2229 
2230 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2231 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2232 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2233 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2234 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2235 			err = put_nvlist(zc, nv);
2236 		nvlist_free(nv);
2237 	} else {
2238 		err = SET_ERROR(ENOENT);
2239 	}
2240 	dmu_objset_rele(os, FTAG);
2241 	return (err);
2242 }
2243 
2244 static boolean_t
2245 dataset_name_hidden(const char *name)
2246 {
2247 	/*
2248 	 * Skip over datasets that are not visible in this zone,
2249 	 * internal datasets (which have a $ in their name), and
2250 	 * temporary datasets (which have a % in their name).
2251 	 */
2252 	if (strchr(name, '$') != NULL)
2253 		return (B_TRUE);
2254 	if (strchr(name, '%') != NULL)
2255 		return (B_TRUE);
2256 	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
2257 		return (B_TRUE);
2258 	return (B_FALSE);
2259 }
2260 
2261 /*
2262  * inputs:
2263  * zc_name		name of filesystem
2264  * zc_cookie		zap cursor
2265  * zc_nvlist_dst_size	size of buffer for property nvlist
2266  *
2267  * outputs:
2268  * zc_name		name of next filesystem
2269  * zc_cookie		zap cursor
2270  * zc_objset_stats	stats
2271  * zc_nvlist_dst	property nvlist
2272  * zc_nvlist_dst_size	size of property nvlist
2273  */
2274 static int
2275 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2276 {
2277 	objset_t *os;
2278 	int error;
2279 	char *p;
2280 	size_t orig_len = strlen(zc->zc_name);
2281 
2282 top:
2283 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2284 		if (error == ENOENT)
2285 			error = SET_ERROR(ESRCH);
2286 		return (error);
2287 	}
2288 
2289 	p = strrchr(zc->zc_name, '/');
2290 	if (p == NULL || p[1] != '\0')
2291 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2292 	p = zc->zc_name + strlen(zc->zc_name);
2293 
2294 	do {
2295 		error = dmu_dir_list_next(os,
2296 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2297 		    NULL, &zc->zc_cookie);
2298 		if (error == ENOENT)
2299 			error = SET_ERROR(ESRCH);
2300 	} while (error == 0 && dataset_name_hidden(zc->zc_name));
2301 	dmu_objset_rele(os, FTAG);
2302 
2303 	/*
2304 	 * If it's an internal dataset (ie. with a '$' in its name),
2305 	 * don't try to get stats for it, otherwise we'll return ENOENT.
2306 	 */
2307 	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2308 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2309 		if (error == ENOENT) {
2310 			/* We lost a race with destroy, get the next one. */
2311 			zc->zc_name[orig_len] = '\0';
2312 			goto top;
2313 		}
2314 	}
2315 	return (error);
2316 }
2317 
2318 /*
2319  * inputs:
2320  * zc_name		name of filesystem
2321  * zc_cookie		zap cursor
2322  * zc_nvlist_dst_size	size of buffer for property nvlist
2323  * zc_simple		when set, only name is requested
2324  *
2325  * outputs:
2326  * zc_name		name of next snapshot
2327  * zc_objset_stats	stats
2328  * zc_nvlist_dst	property nvlist
2329  * zc_nvlist_dst_size	size of property nvlist
2330  */
2331 static int
2332 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2333 {
2334 	objset_t *os;
2335 	int error;
2336 
2337 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2338 	if (error != 0) {
2339 		return (error == ENOENT ? ESRCH : error);
2340 	}
2341 
2342 	/*
2343 	 * A dataset name of maximum length cannot have any snapshots,
2344 	 * so exit immediately.
2345 	 */
2346 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2347 	    ZFS_MAX_DATASET_NAME_LEN) {
2348 		dmu_objset_rele(os, FTAG);
2349 		return (SET_ERROR(ESRCH));
2350 	}
2351 
2352 	error = dmu_snapshot_list_next(os,
2353 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
2354 	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2355 	    NULL);
2356 
2357 	if (error == 0 && !zc->zc_simple) {
2358 		dsl_dataset_t *ds;
2359 		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2360 
2361 		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2362 		if (error == 0) {
2363 			objset_t *ossnap;
2364 
2365 			error = dmu_objset_from_ds(ds, &ossnap);
2366 			if (error == 0)
2367 				error = zfs_ioc_objset_stats_impl(zc, ossnap);
2368 			dsl_dataset_rele(ds, FTAG);
2369 		}
2370 	} else if (error == ENOENT) {
2371 		error = SET_ERROR(ESRCH);
2372 	}
2373 
2374 	dmu_objset_rele(os, FTAG);
2375 	/* if we failed, undo the @ that we tacked on to zc_name */
2376 	if (error != 0)
2377 		*strchr(zc->zc_name, '@') = '\0';
2378 	return (error);
2379 }
2380 
2381 static int
2382 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2383 {
2384 	const char *propname = nvpair_name(pair);
2385 	uint64_t *valary;
2386 	unsigned int vallen;
2387 	const char *domain;
2388 	char *dash;
2389 	zfs_userquota_prop_t type;
2390 	uint64_t rid;
2391 	uint64_t quota;
2392 	zfsvfs_t *zfsvfs;
2393 	int err;
2394 
2395 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2396 		nvlist_t *attrs;
2397 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2398 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2399 		    &pair) != 0)
2400 			return (SET_ERROR(EINVAL));
2401 	}
2402 
2403 	/*
2404 	 * A correctly constructed propname is encoded as
2405 	 * userquota@<rid>-<domain>.
2406 	 */
2407 	if ((dash = strchr(propname, '-')) == NULL ||
2408 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2409 	    vallen != 3)
2410 		return (SET_ERROR(EINVAL));
2411 
2412 	domain = dash + 1;
2413 	type = valary[0];
2414 	rid = valary[1];
2415 	quota = valary[2];
2416 
2417 	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2418 	if (err == 0) {
2419 		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2420 		zfsvfs_rele(zfsvfs, FTAG);
2421 	}
2422 
2423 	return (err);
2424 }
2425 
2426 /*
2427  * If the named property is one that has a special function to set its value,
2428  * return 0 on success and a positive error code on failure; otherwise if it is
2429  * not one of the special properties handled by this function, return -1.
2430  *
2431  * XXX: It would be better for callers of the property interface if we handled
2432  * these special cases in dsl_prop.c (in the dsl layer).
2433  */
2434 static int
2435 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2436     nvpair_t *pair)
2437 {
2438 	const char *propname = nvpair_name(pair);
2439 	zfs_prop_t prop = zfs_name_to_prop(propname);
2440 	uint64_t intval;
2441 	int err = -1;
2442 
2443 	if (prop == ZPROP_INVAL) {
2444 		if (zfs_prop_userquota(propname))
2445 			return (zfs_prop_set_userquota(dsname, pair));
2446 		return (-1);
2447 	}
2448 
2449 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2450 		nvlist_t *attrs;
2451 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2452 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2453 		    &pair) == 0);
2454 	}
2455 
2456 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2457 		return (-1);
2458 
2459 	VERIFY(0 == nvpair_value_uint64(pair, &intval));
2460 
2461 	switch (prop) {
2462 	case ZFS_PROP_QUOTA:
2463 		err = dsl_dir_set_quota(dsname, source, intval);
2464 		break;
2465 	case ZFS_PROP_REFQUOTA:
2466 		err = dsl_dataset_set_refquota(dsname, source, intval);
2467 		break;
2468 	case ZFS_PROP_FILESYSTEM_LIMIT:
2469 	case ZFS_PROP_SNAPSHOT_LIMIT:
2470 		if (intval == UINT64_MAX) {
2471 			/* clearing the limit, just do it */
2472 			err = 0;
2473 		} else {
2474 			err = dsl_dir_activate_fs_ss_limit(dsname);
2475 		}
2476 		/*
2477 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2478 		 * default path to set the value in the nvlist.
2479 		 */
2480 		if (err == 0)
2481 			err = -1;
2482 		break;
2483 	case ZFS_PROP_RESERVATION:
2484 		err = dsl_dir_set_reservation(dsname, source, intval);
2485 		break;
2486 	case ZFS_PROP_REFRESERVATION:
2487 		err = dsl_dataset_set_refreservation(dsname, source, intval);
2488 		break;
2489 	case ZFS_PROP_VOLSIZE:
2490 		err = zvol_set_volsize(dsname, intval);
2491 		break;
2492 	case ZFS_PROP_VERSION:
2493 	{
2494 		zfsvfs_t *zfsvfs;
2495 
2496 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2497 			break;
2498 
2499 		err = zfs_set_version(zfsvfs, intval);
2500 		zfsvfs_rele(zfsvfs, FTAG);
2501 
2502 		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2503 			zfs_cmd_t *zc;
2504 
2505 			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2506 			(void) strcpy(zc->zc_name, dsname);
2507 			(void) zfs_ioc_userspace_upgrade(zc);
2508 			kmem_free(zc, sizeof (zfs_cmd_t));
2509 		}
2510 		break;
2511 	}
2512 	default:
2513 		err = -1;
2514 	}
2515 
2516 	return (err);
2517 }
2518 
2519 /*
2520  * This function is best effort. If it fails to set any of the given properties,
2521  * it continues to set as many as it can and returns the last error
2522  * encountered. If the caller provides a non-NULL errlist, it will be filled in
2523  * with the list of names of all the properties that failed along with the
2524  * corresponding error numbers.
2525  *
2526  * If every property is set successfully, zero is returned and errlist is not
2527  * modified.
2528  */
2529 int
2530 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2531     nvlist_t *errlist)
2532 {
2533 	nvpair_t *pair;
2534 	nvpair_t *propval;
2535 	int rv = 0;
2536 	uint64_t intval;
2537 	char *strval;
2538 	nvlist_t *genericnvl = fnvlist_alloc();
2539 	nvlist_t *retrynvl = fnvlist_alloc();
2540 
2541 retry:
2542 	pair = NULL;
2543 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2544 		const char *propname = nvpair_name(pair);
2545 		zfs_prop_t prop = zfs_name_to_prop(propname);
2546 		int err = 0;
2547 
2548 		/* decode the property value */
2549 		propval = pair;
2550 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2551 			nvlist_t *attrs;
2552 			attrs = fnvpair_value_nvlist(pair);
2553 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2554 			    &propval) != 0)
2555 				err = SET_ERROR(EINVAL);
2556 		}
2557 
2558 		/* Validate value type */
2559 		if (err == 0 && prop == ZPROP_INVAL) {
2560 			if (zfs_prop_user(propname)) {
2561 				if (nvpair_type(propval) != DATA_TYPE_STRING)
2562 					err = SET_ERROR(EINVAL);
2563 			} else if (zfs_prop_userquota(propname)) {
2564 				if (nvpair_type(propval) !=
2565 				    DATA_TYPE_UINT64_ARRAY)
2566 					err = SET_ERROR(EINVAL);
2567 			} else {
2568 				err = SET_ERROR(EINVAL);
2569 			}
2570 		} else if (err == 0) {
2571 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2572 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2573 					err = SET_ERROR(EINVAL);
2574 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2575 				const char *unused;
2576 
2577 				intval = fnvpair_value_uint64(propval);
2578 
2579 				switch (zfs_prop_get_type(prop)) {
2580 				case PROP_TYPE_NUMBER:
2581 					break;
2582 				case PROP_TYPE_STRING:
2583 					err = SET_ERROR(EINVAL);
2584 					break;
2585 				case PROP_TYPE_INDEX:
2586 					if (zfs_prop_index_to_string(prop,
2587 					    intval, &unused) != 0)
2588 						err = SET_ERROR(EINVAL);
2589 					break;
2590 				default:
2591 					cmn_err(CE_PANIC,
2592 					    "unknown property type");
2593 				}
2594 			} else {
2595 				err = SET_ERROR(EINVAL);
2596 			}
2597 		}
2598 
2599 		/* Validate permissions */
2600 		if (err == 0)
2601 			err = zfs_check_settable(dsname, pair, CRED());
2602 
2603 		if (err == 0) {
2604 			err = zfs_prop_set_special(dsname, source, pair);
2605 			if (err == -1) {
2606 				/*
2607 				 * For better performance we build up a list of
2608 				 * properties to set in a single transaction.
2609 				 */
2610 				err = nvlist_add_nvpair(genericnvl, pair);
2611 			} else if (err != 0 && nvl != retrynvl) {
2612 				/*
2613 				 * This may be a spurious error caused by
2614 				 * receiving quota and reservation out of order.
2615 				 * Try again in a second pass.
2616 				 */
2617 				err = nvlist_add_nvpair(retrynvl, pair);
2618 			}
2619 		}
2620 
2621 		if (err != 0) {
2622 			if (errlist != NULL)
2623 				fnvlist_add_int32(errlist, propname, err);
2624 			rv = err;
2625 		}
2626 	}
2627 
2628 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2629 		nvl = retrynvl;
2630 		goto retry;
2631 	}
2632 
2633 	if (!nvlist_empty(genericnvl) &&
2634 	    dsl_props_set(dsname, source, genericnvl) != 0) {
2635 		/*
2636 		 * If this fails, we still want to set as many properties as we
2637 		 * can, so try setting them individually.
2638 		 */
2639 		pair = NULL;
2640 		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2641 			const char *propname = nvpair_name(pair);
2642 			int err = 0;
2643 
2644 			propval = pair;
2645 			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2646 				nvlist_t *attrs;
2647 				attrs = fnvpair_value_nvlist(pair);
2648 				propval = fnvlist_lookup_nvpair(attrs,
2649 				    ZPROP_VALUE);
2650 			}
2651 
2652 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2653 				strval = fnvpair_value_string(propval);
2654 				err = dsl_prop_set_string(dsname, propname,
2655 				    source, strval);
2656 			} else {
2657 				intval = fnvpair_value_uint64(propval);
2658 				err = dsl_prop_set_int(dsname, propname, source,
2659 				    intval);
2660 			}
2661 
2662 			if (err != 0) {
2663 				if (errlist != NULL) {
2664 					fnvlist_add_int32(errlist, propname,
2665 					    err);
2666 				}
2667 				rv = err;
2668 			}
2669 		}
2670 	}
2671 	nvlist_free(genericnvl);
2672 	nvlist_free(retrynvl);
2673 
2674 	return (rv);
2675 }
2676 
2677 /*
2678  * Check that all the properties are valid user properties.
2679  */
2680 static int
2681 zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2682 {
2683 	nvpair_t *pair = NULL;
2684 	int error = 0;
2685 
2686 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2687 		const char *propname = nvpair_name(pair);
2688 
2689 		if (!zfs_prop_user(propname) ||
2690 		    nvpair_type(pair) != DATA_TYPE_STRING)
2691 			return (SET_ERROR(EINVAL));
2692 
2693 		if (error = zfs_secpolicy_write_perms(fsname,
2694 		    ZFS_DELEG_PERM_USERPROP, CRED()))
2695 			return (error);
2696 
2697 		if (strlen(propname) >= ZAP_MAXNAMELEN)
2698 			return (SET_ERROR(ENAMETOOLONG));
2699 
2700 		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2701 			return (E2BIG);
2702 	}
2703 	return (0);
2704 }
2705 
2706 static void
2707 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2708 {
2709 	nvpair_t *pair;
2710 
2711 	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2712 
2713 	pair = NULL;
2714 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2715 		if (nvlist_exists(skipped, nvpair_name(pair)))
2716 			continue;
2717 
2718 		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2719 	}
2720 }
2721 
2722 static int
2723 clear_received_props(const char *dsname, nvlist_t *props,
2724     nvlist_t *skipped)
2725 {
2726 	int err = 0;
2727 	nvlist_t *cleared_props = NULL;
2728 	props_skip(props, skipped, &cleared_props);
2729 	if (!nvlist_empty(cleared_props)) {
2730 		/*
2731 		 * Acts on local properties until the dataset has received
2732 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2733 		 */
2734 		zprop_source_t flags = (ZPROP_SRC_NONE |
2735 		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2736 		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2737 	}
2738 	nvlist_free(cleared_props);
2739 	return (err);
2740 }
2741 
2742 /*
2743  * inputs:
2744  * zc_name		name of filesystem
2745  * zc_value		name of property to set
2746  * zc_nvlist_src{_size}	nvlist of properties to apply
2747  * zc_cookie		received properties flag
2748  *
2749  * outputs:
2750  * zc_nvlist_dst{_size} error for each unapplied received property
2751  */
2752 static int
2753 zfs_ioc_set_prop(zfs_cmd_t *zc)
2754 {
2755 	nvlist_t *nvl;
2756 	boolean_t received = zc->zc_cookie;
2757 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2758 	    ZPROP_SRC_LOCAL);
2759 	nvlist_t *errors;
2760 	int error;
2761 
2762 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2763 	    zc->zc_iflags, &nvl)) != 0)
2764 		return (error);
2765 
2766 	if (received) {
2767 		nvlist_t *origprops;
2768 
2769 		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2770 			(void) clear_received_props(zc->zc_name,
2771 			    origprops, nvl);
2772 			nvlist_free(origprops);
2773 		}
2774 
2775 		error = dsl_prop_set_hasrecvd(zc->zc_name);
2776 	}
2777 
2778 	errors = fnvlist_alloc();
2779 	if (error == 0)
2780 		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2781 
2782 	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
2783 		(void) put_nvlist(zc, errors);
2784 	}
2785 
2786 	nvlist_free(errors);
2787 	nvlist_free(nvl);
2788 	return (error);
2789 }
2790 
2791 /*
2792  * inputs:
2793  * zc_name		name of filesystem
2794  * zc_value		name of property to inherit
2795  * zc_cookie		revert to received value if TRUE
2796  *
2797  * outputs:		none
2798  */
2799 static int
2800 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2801 {
2802 	const char *propname = zc->zc_value;
2803 	zfs_prop_t prop = zfs_name_to_prop(propname);
2804 	boolean_t received = zc->zc_cookie;
2805 	zprop_source_t source = (received
2806 	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2807 	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2808 
2809 	if (received) {
2810 		nvlist_t *dummy;
2811 		nvpair_t *pair;
2812 		zprop_type_t type;
2813 		int err;
2814 
2815 		/*
2816 		 * zfs_prop_set_special() expects properties in the form of an
2817 		 * nvpair with type info.
2818 		 */
2819 		if (prop == ZPROP_INVAL) {
2820 			if (!zfs_prop_user(propname))
2821 				return (SET_ERROR(EINVAL));
2822 
2823 			type = PROP_TYPE_STRING;
2824 		} else if (prop == ZFS_PROP_VOLSIZE ||
2825 		    prop == ZFS_PROP_VERSION) {
2826 			return (SET_ERROR(EINVAL));
2827 		} else {
2828 			type = zfs_prop_get_type(prop);
2829 		}
2830 
2831 		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2832 
2833 		switch (type) {
2834 		case PROP_TYPE_STRING:
2835 			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2836 			break;
2837 		case PROP_TYPE_NUMBER:
2838 		case PROP_TYPE_INDEX:
2839 			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2840 			break;
2841 		default:
2842 			nvlist_free(dummy);
2843 			return (SET_ERROR(EINVAL));
2844 		}
2845 
2846 		pair = nvlist_next_nvpair(dummy, NULL);
2847 		err = zfs_prop_set_special(zc->zc_name, source, pair);
2848 		nvlist_free(dummy);
2849 		if (err != -1)
2850 			return (err); /* special property already handled */
2851 	} else {
2852 		/*
2853 		 * Only check this in the non-received case. We want to allow
2854 		 * 'inherit -S' to revert non-inheritable properties like quota
2855 		 * and reservation to the received or default values even though
2856 		 * they are not considered inheritable.
2857 		 */
2858 		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2859 			return (SET_ERROR(EINVAL));
2860 	}
2861 
2862 	/* property name has been validated by zfs_secpolicy_inherit_prop() */
2863 	return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
2864 }
2865 
2866 static int
2867 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2868 {
2869 	nvlist_t *props;
2870 	spa_t *spa;
2871 	int error;
2872 	nvpair_t *pair;
2873 
2874 	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2875 	    zc->zc_iflags, &props))
2876 		return (error);
2877 
2878 	/*
2879 	 * If the only property is the configfile, then just do a spa_lookup()
2880 	 * to handle the faulted case.
2881 	 */
2882 	pair = nvlist_next_nvpair(props, NULL);
2883 	if (pair != NULL && strcmp(nvpair_name(pair),
2884 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2885 	    nvlist_next_nvpair(props, pair) == NULL) {
2886 		mutex_enter(&spa_namespace_lock);
2887 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2888 			spa_configfile_set(spa, props, B_FALSE);
2889 			spa_write_cachefile(spa, B_FALSE, B_TRUE);
2890 		}
2891 		mutex_exit(&spa_namespace_lock);
2892 		if (spa != NULL) {
2893 			nvlist_free(props);
2894 			return (0);
2895 		}
2896 	}
2897 
2898 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2899 		nvlist_free(props);
2900 		return (error);
2901 	}
2902 
2903 	error = spa_prop_set(spa, props);
2904 
2905 	nvlist_free(props);
2906 	spa_close(spa, FTAG);
2907 
2908 	return (error);
2909 }
2910 
2911 static int
2912 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2913 {
2914 	spa_t *spa;
2915 	int error;
2916 	nvlist_t *nvp = NULL;
2917 
2918 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2919 		/*
2920 		 * If the pool is faulted, there may be properties we can still
2921 		 * get (such as altroot and cachefile), so attempt to get them
2922 		 * anyway.
2923 		 */
2924 		mutex_enter(&spa_namespace_lock);
2925 		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2926 			error = spa_prop_get(spa, &nvp);
2927 		mutex_exit(&spa_namespace_lock);
2928 	} else {
2929 		error = spa_prop_get(spa, &nvp);
2930 		spa_close(spa, FTAG);
2931 	}
2932 
2933 	if (error == 0 && zc->zc_nvlist_dst != 0)
2934 		error = put_nvlist(zc, nvp);
2935 	else
2936 		error = SET_ERROR(EFAULT);
2937 
2938 	nvlist_free(nvp);
2939 	return (error);
2940 }
2941 
2942 /*
2943  * inputs:
2944  * zc_name		name of filesystem
2945  * zc_nvlist_src{_size}	nvlist of delegated permissions
2946  * zc_perm_action	allow/unallow flag
2947  *
2948  * outputs:		none
2949  */
2950 static int
2951 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2952 {
2953 	int error;
2954 	nvlist_t *fsaclnv = NULL;
2955 
2956 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2957 	    zc->zc_iflags, &fsaclnv)) != 0)
2958 		return (error);
2959 
2960 	/*
2961 	 * Verify nvlist is constructed correctly
2962 	 */
2963 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2964 		nvlist_free(fsaclnv);
2965 		return (SET_ERROR(EINVAL));
2966 	}
2967 
2968 	/*
2969 	 * If we don't have PRIV_SYS_MOUNT, then validate
2970 	 * that user is allowed to hand out each permission in
2971 	 * the nvlist(s)
2972 	 */
2973 
2974 	error = secpolicy_zfs(CRED());
2975 	if (error != 0) {
2976 		if (zc->zc_perm_action == B_FALSE) {
2977 			error = dsl_deleg_can_allow(zc->zc_name,
2978 			    fsaclnv, CRED());
2979 		} else {
2980 			error = dsl_deleg_can_unallow(zc->zc_name,
2981 			    fsaclnv, CRED());
2982 		}
2983 	}
2984 
2985 	if (error == 0)
2986 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2987 
2988 	nvlist_free(fsaclnv);
2989 	return (error);
2990 }
2991 
2992 /*
2993  * inputs:
2994  * zc_name		name of filesystem
2995  *
2996  * outputs:
2997  * zc_nvlist_src{_size}	nvlist of delegated permissions
2998  */
2999 static int
3000 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3001 {
3002 	nvlist_t *nvp;
3003 	int error;
3004 
3005 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3006 		error = put_nvlist(zc, nvp);
3007 		nvlist_free(nvp);
3008 	}
3009 
3010 	return (error);
3011 }
3012 
3013 /* ARGSUSED */
3014 static void
3015 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3016 {
3017 	zfs_creat_t *zct = arg;
3018 
3019 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3020 }
3021 
3022 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
3023 
3024 /*
3025  * inputs:
3026  * os			parent objset pointer (NULL if root fs)
3027  * fuids_ok		fuids allowed in this version of the spa?
3028  * sa_ok		SAs allowed in this version of the spa?
3029  * createprops		list of properties requested by creator
3030  *
3031  * outputs:
3032  * zplprops	values for the zplprops we attach to the master node object
3033  * is_ci	true if requested file system will be purely case-insensitive
3034  *
3035  * Determine the settings for utf8only, normalization and
3036  * casesensitivity.  Specific values may have been requested by the
3037  * creator and/or we can inherit values from the parent dataset.  If
3038  * the file system is of too early a vintage, a creator can not
3039  * request settings for these properties, even if the requested
3040  * setting is the default value.  We don't actually want to create dsl
3041  * properties for these, so remove them from the source nvlist after
3042  * processing.
3043  */
3044 static int
3045 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3046     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3047     nvlist_t *zplprops, boolean_t *is_ci)
3048 {
3049 	uint64_t sense = ZFS_PROP_UNDEFINED;
3050 	uint64_t norm = ZFS_PROP_UNDEFINED;
3051 	uint64_t u8 = ZFS_PROP_UNDEFINED;
3052 
3053 	ASSERT(zplprops != NULL);
3054 
3055 	if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3056 		return (SET_ERROR(EINVAL));
3057 
3058 	/*
3059 	 * Pull out creator prop choices, if any.
3060 	 */
3061 	if (createprops) {
3062 		(void) nvlist_lookup_uint64(createprops,
3063 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3064 		(void) nvlist_lookup_uint64(createprops,
3065 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3066 		(void) nvlist_remove_all(createprops,
3067 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3068 		(void) nvlist_lookup_uint64(createprops,
3069 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3070 		(void) nvlist_remove_all(createprops,
3071 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3072 		(void) nvlist_lookup_uint64(createprops,
3073 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3074 		(void) nvlist_remove_all(createprops,
3075 		    zfs_prop_to_name(ZFS_PROP_CASE));
3076 	}
3077 
3078 	/*
3079 	 * If the zpl version requested is whacky or the file system
3080 	 * or pool is version is too "young" to support normalization
3081 	 * and the creator tried to set a value for one of the props,
3082 	 * error out.
3083 	 */
3084 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3085 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3086 	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3087 	    (zplver < ZPL_VERSION_NORMALIZATION &&
3088 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3089 	    sense != ZFS_PROP_UNDEFINED)))
3090 		return (SET_ERROR(ENOTSUP));
3091 
3092 	/*
3093 	 * Put the version in the zplprops
3094 	 */
3095 	VERIFY(nvlist_add_uint64(zplprops,
3096 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3097 
3098 	if (norm == ZFS_PROP_UNDEFINED)
3099 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3100 	VERIFY(nvlist_add_uint64(zplprops,
3101 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3102 
3103 	/*
3104 	 * If we're normalizing, names must always be valid UTF-8 strings.
3105 	 */
3106 	if (norm)
3107 		u8 = 1;
3108 	if (u8 == ZFS_PROP_UNDEFINED)
3109 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3110 	VERIFY(nvlist_add_uint64(zplprops,
3111 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3112 
3113 	if (sense == ZFS_PROP_UNDEFINED)
3114 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3115 	VERIFY(nvlist_add_uint64(zplprops,
3116 	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3117 
3118 	if (is_ci)
3119 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
3120 
3121 	return (0);
3122 }
3123 
3124 static int
3125 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3126     nvlist_t *zplprops, boolean_t *is_ci)
3127 {
3128 	boolean_t fuids_ok, sa_ok;
3129 	uint64_t zplver = ZPL_VERSION;
3130 	objset_t *os = NULL;
3131 	char parentname[ZFS_MAX_DATASET_NAME_LEN];
3132 	char *cp;
3133 	spa_t *spa;
3134 	uint64_t spa_vers;
3135 	int error;
3136 
3137 	(void) strlcpy(parentname, dataset, sizeof (parentname));
3138 	cp = strrchr(parentname, '/');
3139 	ASSERT(cp != NULL);
3140 	cp[0] = '\0';
3141 
3142 	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3143 		return (error);
3144 
3145 	spa_vers = spa_version(spa);
3146 	spa_close(spa, FTAG);
3147 
3148 	zplver = zfs_zpl_version_map(spa_vers);
3149 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3150 	sa_ok = (zplver >= ZPL_VERSION_SA);
3151 
3152 	/*
3153 	 * Open parent object set so we can inherit zplprop values.
3154 	 */
3155 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3156 		return (error);
3157 
3158 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3159 	    zplprops, is_ci);
3160 	dmu_objset_rele(os, FTAG);
3161 	return (error);
3162 }
3163 
3164 static int
3165 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3166     nvlist_t *zplprops, boolean_t *is_ci)
3167 {
3168 	boolean_t fuids_ok;
3169 	boolean_t sa_ok;
3170 	uint64_t zplver = ZPL_VERSION;
3171 	int error;
3172 
3173 	zplver = zfs_zpl_version_map(spa_vers);
3174 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3175 	sa_ok = (zplver >= ZPL_VERSION_SA);
3176 
3177 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3178 	    createprops, zplprops, is_ci);
3179 	return (error);
3180 }
3181 
3182 /*
3183  * innvl: {
3184  *     "type" -> dmu_objset_type_t (int32)
3185  *     (optional) "props" -> { prop -> value }
3186  * }
3187  *
3188  * outnvl: propname -> error code (int32)
3189  */
3190 static int
3191 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3192 {
3193 	int error = 0;
3194 	zfs_creat_t zct = { 0 };
3195 	nvlist_t *nvprops = NULL;
3196 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3197 	int32_t type32;
3198 	dmu_objset_type_t type;
3199 	boolean_t is_insensitive = B_FALSE;
3200 
3201 	if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3202 		return (SET_ERROR(EINVAL));
3203 	type = type32;
3204 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3205 
3206 	switch (type) {
3207 	case DMU_OST_ZFS:
3208 		cbfunc = zfs_create_cb;
3209 		break;
3210 
3211 	case DMU_OST_ZVOL:
3212 		cbfunc = zvol_create_cb;
3213 		break;
3214 
3215 	default:
3216 		cbfunc = NULL;
3217 		break;
3218 	}
3219 	if (strchr(fsname, '@') ||
3220 	    strchr(fsname, '%'))
3221 		return (SET_ERROR(EINVAL));
3222 
3223 	zct.zct_props = nvprops;
3224 
3225 	if (cbfunc == NULL)
3226 		return (SET_ERROR(EINVAL));
3227 
3228 	if (type == DMU_OST_ZVOL) {
3229 		uint64_t volsize, volblocksize;
3230 
3231 		if (nvprops == NULL)
3232 			return (SET_ERROR(EINVAL));
3233 		if (nvlist_lookup_uint64(nvprops,
3234 		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3235 			return (SET_ERROR(EINVAL));
3236 
3237 		if ((error = nvlist_lookup_uint64(nvprops,
3238 		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3239 		    &volblocksize)) != 0 && error != ENOENT)
3240 			return (SET_ERROR(EINVAL));
3241 
3242 		if (error != 0)
3243 			volblocksize = zfs_prop_default_numeric(
3244 			    ZFS_PROP_VOLBLOCKSIZE);
3245 
3246 		if ((error = zvol_check_volblocksize(
3247 		    volblocksize)) != 0 ||
3248 		    (error = zvol_check_volsize(volsize,
3249 		    volblocksize)) != 0)
3250 			return (error);
3251 	} else if (type == DMU_OST_ZFS) {
3252 		int error;
3253 
3254 		/*
3255 		 * We have to have normalization and
3256 		 * case-folding flags correct when we do the
3257 		 * file system creation, so go figure them out
3258 		 * now.
3259 		 */
3260 		VERIFY(nvlist_alloc(&zct.zct_zplprops,
3261 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
3262 		error = zfs_fill_zplprops(fsname, nvprops,
3263 		    zct.zct_zplprops, &is_insensitive);
3264 		if (error != 0) {
3265 			nvlist_free(zct.zct_zplprops);
3266 			return (error);
3267 		}
3268 	}
3269 
3270 	error = dmu_objset_create(fsname, type,
3271 	    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3272 	nvlist_free(zct.zct_zplprops);
3273 
3274 	/*
3275 	 * It would be nice to do this atomically.
3276 	 */
3277 	if (error == 0) {
3278 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3279 		    nvprops, outnvl);
3280 		if (error != 0)
3281 			(void) dsl_destroy_head(fsname);
3282 	}
3283 	return (error);
3284 }
3285 
3286 /*
3287  * innvl: {
3288  *     "origin" -> name of origin snapshot
3289  *     (optional) "props" -> { prop -> value }
3290  * }
3291  *
3292  * outnvl: propname -> error code (int32)
3293  */
3294 static int
3295 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3296 {
3297 	int error = 0;
3298 	nvlist_t *nvprops = NULL;
3299 	char *origin_name;
3300 
3301 	if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3302 		return (SET_ERROR(EINVAL));
3303 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3304 
3305 	if (strchr(fsname, '@') ||
3306 	    strchr(fsname, '%'))
3307 		return (SET_ERROR(EINVAL));
3308 
3309 	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3310 		return (SET_ERROR(EINVAL));
3311 	error = dmu_objset_clone(fsname, origin_name);
3312 	if (error != 0)
3313 		return (error);
3314 
3315 	/*
3316 	 * It would be nice to do this atomically.
3317 	 */
3318 	if (error == 0) {
3319 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3320 		    nvprops, outnvl);
3321 		if (error != 0)
3322 			(void) dsl_destroy_head(fsname);
3323 	}
3324 	return (error);
3325 }
3326 
3327 /* ARGSUSED */
3328 static int
3329 zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3330 {
3331 	if (strchr(fsname, '@') ||
3332 	    strchr(fsname, '%'))
3333 		return (SET_ERROR(EINVAL));
3334 
3335 	return (dmu_objset_remap_indirects(fsname));
3336 }
3337 
3338 /*
3339  * innvl: {
3340  *     "snaps" -> { snapshot1, snapshot2 }
3341  *     (optional) "props" -> { prop -> value (string) }
3342  * }
3343  *
3344  * outnvl: snapshot -> error code (int32)
3345  */
3346 static int
3347 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3348 {
3349 	nvlist_t *snaps;
3350 	nvlist_t *props = NULL;
3351 	int error, poollen;
3352 	nvpair_t *pair;
3353 
3354 	(void) nvlist_lookup_nvlist(innvl, "props", &props);
3355 	if ((error = zfs_check_userprops(poolname, props)) != 0)
3356 		return (error);
3357 
3358 	if (!nvlist_empty(props) &&
3359 	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3360 		return (SET_ERROR(ENOTSUP));
3361 
3362 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3363 		return (SET_ERROR(EINVAL));
3364 	poollen = strlen(poolname);
3365 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3366 	    pair = nvlist_next_nvpair(snaps, pair)) {
3367 		const char *name = nvpair_name(pair);
3368 		const char *cp = strchr(name, '@');
3369 
3370 		/*
3371 		 * The snap name must contain an @, and the part after it must
3372 		 * contain only valid characters.
3373 		 */
3374 		if (cp == NULL ||
3375 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3376 			return (SET_ERROR(EINVAL));
3377 
3378 		/*
3379 		 * The snap must be in the specified pool.
3380 		 */
3381 		if (strncmp(name, poolname, poollen) != 0 ||
3382 		    (name[poollen] != '/' && name[poollen] != '@'))
3383 			return (SET_ERROR(EXDEV));
3384 
3385 		/* This must be the only snap of this fs. */
3386 		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3387 		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3388 			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3389 			    == 0) {
3390 				return (SET_ERROR(EXDEV));
3391 			}
3392 		}
3393 	}
3394 
3395 	error = dsl_dataset_snapshot(snaps, props, outnvl);
3396 	return (error);
3397 }
3398 
3399 /*
3400  * innvl: "message" -> string
3401  */
3402 /* ARGSUSED */
3403 static int
3404 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3405 {
3406 	char *message;
3407 	spa_t *spa;
3408 	int error;
3409 	char *poolname;
3410 
3411 	/*
3412 	 * The poolname in the ioctl is not set, we get it from the TSD,
3413 	 * which was set at the end of the last successful ioctl that allows
3414 	 * logging.  The secpolicy func already checked that it is set.
3415 	 * Only one log ioctl is allowed after each successful ioctl, so
3416 	 * we clear the TSD here.
3417 	 */
3418 	poolname = tsd_get(zfs_allow_log_key);
3419 	(void) tsd_set(zfs_allow_log_key, NULL);
3420 	error = spa_open(poolname, &spa, FTAG);
3421 	strfree(poolname);
3422 	if (error != 0)
3423 		return (error);
3424 
3425 	if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
3426 		spa_close(spa, FTAG);
3427 		return (SET_ERROR(EINVAL));
3428 	}
3429 
3430 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3431 		spa_close(spa, FTAG);
3432 		return (SET_ERROR(ENOTSUP));
3433 	}
3434 
3435 	error = spa_history_log(spa, message);
3436 	spa_close(spa, FTAG);
3437 	return (error);
3438 }
3439 
3440 /*
3441  * The dp_config_rwlock must not be held when calling this, because the
3442  * unmount may need to write out data.
3443  *
3444  * This function is best-effort.  Callers must deal gracefully if it
3445  * remains mounted (or is remounted after this call).
3446  *
3447  * Returns 0 if the argument is not a snapshot, or it is not currently a
3448  * filesystem, or we were able to unmount it.  Returns error code otherwise.
3449  */
3450 void
3451 zfs_unmount_snap(const char *snapname)
3452 {
3453 	vfs_t *vfsp = NULL;
3454 	zfsvfs_t *zfsvfs = NULL;
3455 
3456 	if (strchr(snapname, '@') == NULL)
3457 		return;
3458 
3459 	int err = getzfsvfs(snapname, &zfsvfs);
3460 	if (err != 0) {
3461 		ASSERT3P(zfsvfs, ==, NULL);
3462 		return;
3463 	}
3464 	vfsp = zfsvfs->z_vfs;
3465 
3466 	ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
3467 
3468 	err = vn_vfswlock(vfsp->vfs_vnodecovered);
3469 	VFS_RELE(vfsp);
3470 	if (err != 0)
3471 		return;
3472 
3473 	/*
3474 	 * Always force the unmount for snapshots.
3475 	 */
3476 	(void) dounmount(vfsp, MS_FORCE, kcred);
3477 }
3478 
3479 /* ARGSUSED */
3480 static int
3481 zfs_unmount_snap_cb(const char *snapname, void *arg)
3482 {
3483 	zfs_unmount_snap(snapname);
3484 	return (0);
3485 }
3486 
3487 /*
3488  * When a clone is destroyed, its origin may also need to be destroyed,
3489  * in which case it must be unmounted.  This routine will do that unmount
3490  * if necessary.
3491  */
3492 void
3493 zfs_destroy_unmount_origin(const char *fsname)
3494 {
3495 	int error;
3496 	objset_t *os;
3497 	dsl_dataset_t *ds;
3498 
3499 	error = dmu_objset_hold(fsname, FTAG, &os);
3500 	if (error != 0)
3501 		return;
3502 	ds = dmu_objset_ds(os);
3503 	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3504 		char originname[ZFS_MAX_DATASET_NAME_LEN];
3505 		dsl_dataset_name(ds->ds_prev, originname);
3506 		dmu_objset_rele(os, FTAG);
3507 		zfs_unmount_snap(originname);
3508 	} else {
3509 		dmu_objset_rele(os, FTAG);
3510 	}
3511 }
3512 
3513 /*
3514  * innvl: {
3515  *     "snaps" -> { snapshot1, snapshot2 }
3516  *     (optional boolean) "defer"
3517  * }
3518  *
3519  * outnvl: snapshot -> error code (int32)
3520  *
3521  */
3522 /* ARGSUSED */
3523 static int
3524 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3525 {
3526 	nvlist_t *snaps;
3527 	nvpair_t *pair;
3528 	boolean_t defer;
3529 
3530 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3531 		return (SET_ERROR(EINVAL));
3532 	defer = nvlist_exists(innvl, "defer");
3533 
3534 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3535 	    pair = nvlist_next_nvpair(snaps, pair)) {
3536 		zfs_unmount_snap(nvpair_name(pair));
3537 	}
3538 
3539 	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3540 }
3541 
3542 /*
3543  * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
3544  * All bookmarks must be in the same pool.
3545  *
3546  * innvl: {
3547  *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
3548  * }
3549  *
3550  * outnvl: bookmark -> error code (int32)
3551  *
3552  */
3553 /* ARGSUSED */
3554 static int
3555 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3556 {
3557 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3558 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3559 		char *snap_name;
3560 
3561 		/*
3562 		 * Verify the snapshot argument.
3563 		 */
3564 		if (nvpair_value_string(pair, &snap_name) != 0)
3565 			return (SET_ERROR(EINVAL));
3566 
3567 
3568 		/* Verify that the keys (bookmarks) are unique */
3569 		for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
3570 		    pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
3571 			if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
3572 				return (SET_ERROR(EINVAL));
3573 		}
3574 	}
3575 
3576 	return (dsl_bookmark_create(innvl, outnvl));
3577 }
3578 
3579 /*
3580  * innvl: {
3581  *     property 1, property 2, ...
3582  * }
3583  *
3584  * outnvl: {
3585  *     bookmark name 1 -> { property 1, property 2, ... },
3586  *     bookmark name 2 -> { property 1, property 2, ... }
3587  * }
3588  *
3589  */
3590 static int
3591 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3592 {
3593 	return (dsl_get_bookmarks(fsname, innvl, outnvl));
3594 }
3595 
3596 /*
3597  * innvl: {
3598  *     bookmark name 1, bookmark name 2
3599  * }
3600  *
3601  * outnvl: bookmark -> error code (int32)
3602  *
3603  */
3604 static int
3605 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3606     nvlist_t *outnvl)
3607 {
3608 	int error, poollen;
3609 
3610 	poollen = strlen(poolname);
3611 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3612 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3613 		const char *name = nvpair_name(pair);
3614 		const char *cp = strchr(name, '#');
3615 
3616 		/*
3617 		 * The bookmark name must contain an #, and the part after it
3618 		 * must contain only valid characters.
3619 		 */
3620 		if (cp == NULL ||
3621 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3622 			return (SET_ERROR(EINVAL));
3623 
3624 		/*
3625 		 * The bookmark must be in the specified pool.
3626 		 */
3627 		if (strncmp(name, poolname, poollen) != 0 ||
3628 		    (name[poollen] != '/' && name[poollen] != '#'))
3629 			return (SET_ERROR(EXDEV));
3630 	}
3631 
3632 	error = dsl_bookmark_destroy(innvl, outnvl);
3633 	return (error);
3634 }
3635 
3636 static int
3637 zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
3638     nvlist_t *outnvl)
3639 {
3640 	char *program;
3641 	uint64_t instrlimit, memlimit;
3642 	boolean_t sync_flag;
3643 	nvpair_t *nvarg = NULL;
3644 
3645 	if (0 != nvlist_lookup_string(innvl, ZCP_ARG_PROGRAM, &program)) {
3646 		return (EINVAL);
3647 	}
3648 	if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
3649 		sync_flag = B_TRUE;
3650 	}
3651 	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
3652 		instrlimit = ZCP_DEFAULT_INSTRLIMIT;
3653 	}
3654 	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
3655 		memlimit = ZCP_DEFAULT_MEMLIMIT;
3656 	}
3657 	if (0 != nvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST, &nvarg)) {
3658 		return (EINVAL);
3659 	}
3660 
3661 	if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
3662 		return (EINVAL);
3663 	if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
3664 		return (EINVAL);
3665 
3666 	return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
3667 	    nvarg, outnvl));
3668 }
3669 
3670 /*
3671  * innvl: unused
3672  * outnvl: empty
3673  */
3674 /* ARGSUSED */
3675 static int
3676 zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3677 {
3678 	return (spa_checkpoint(poolname));
3679 }
3680 
3681 /*
3682  * innvl: unused
3683  * outnvl: empty
3684  */
3685 /* ARGSUSED */
3686 static int
3687 zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
3688     nvlist_t *outnvl)
3689 {
3690 	return (spa_checkpoint_discard(poolname));
3691 }
3692 
3693 /*
3694  * inputs:
3695  * zc_name		name of dataset to destroy
3696  * zc_defer_destroy	mark for deferred destroy
3697  *
3698  * outputs:		none
3699  */
3700 static int
3701 zfs_ioc_destroy(zfs_cmd_t *zc)
3702 {
3703 	objset_t *os;
3704 	dmu_objset_type_t ost;
3705 	int err;
3706 
3707 	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
3708 	if (err != 0)
3709 		return (err);
3710 	ost = dmu_objset_type(os);
3711 	dmu_objset_rele(os, FTAG);
3712 
3713 	if (ost == DMU_OST_ZFS)
3714 		zfs_unmount_snap(zc->zc_name);
3715 
3716 	if (strchr(zc->zc_name, '@'))
3717 		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3718 	else
3719 		err = dsl_destroy_head(zc->zc_name);
3720 	if (ost == DMU_OST_ZVOL && err == 0)
3721 		(void) zvol_remove_minor(zc->zc_name);
3722 	return (err);
3723 }
3724 
3725 /*
3726  * innvl: {
3727  *     vdevs: {
3728  *         guid 1, guid 2, ...
3729  *     },
3730  *     func: POOL_INITIALIZE_{CANCEL|DO|SUSPEND}
3731  * }
3732  *
3733  * outnvl: {
3734  *     [func: EINVAL (if provided command type didn't make sense)],
3735  *     [vdevs: {
3736  *         guid1: errno, (see function body for possible errnos)
3737  *         ...
3738  *     }]
3739  * }
3740  *
3741  */
3742 static int
3743 zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3744 {
3745 	spa_t *spa;
3746 	int error;
3747 
3748 	error = spa_open(poolname, &spa, FTAG);
3749 	if (error != 0)
3750 		return (error);
3751 
3752 	uint64_t cmd_type;
3753 	if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
3754 	    &cmd_type) != 0) {
3755 		spa_close(spa, FTAG);
3756 		return (SET_ERROR(EINVAL));
3757 	}
3758 	if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
3759 	    cmd_type == POOL_INITIALIZE_DO ||
3760 	    cmd_type == POOL_INITIALIZE_SUSPEND)) {
3761 		spa_close(spa, FTAG);
3762 		return (SET_ERROR(EINVAL));
3763 	}
3764 
3765 	nvlist_t *vdev_guids;
3766 	if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
3767 	    &vdev_guids) != 0) {
3768 		spa_close(spa, FTAG);
3769 		return (SET_ERROR(EINVAL));
3770 	}
3771 
3772 	nvlist_t *vdev_errlist = fnvlist_alloc();
3773 	int total_errors = 0;
3774 
3775 	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
3776 	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
3777 		uint64_t vdev_guid = fnvpair_value_uint64(pair);
3778 
3779 		error = spa_vdev_initialize(spa, vdev_guid, cmd_type);
3780 		if (error != 0) {
3781 			char guid_as_str[MAXNAMELEN];
3782 
3783 			(void) snprintf(guid_as_str, sizeof (guid_as_str),
3784 			    "%llu", (unsigned long long)vdev_guid);
3785 			fnvlist_add_int64(vdev_errlist, guid_as_str, error);
3786 			total_errors++;
3787 		}
3788 	}
3789 	if (fnvlist_size(vdev_errlist) > 0) {
3790 		fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
3791 		    vdev_errlist);
3792 	}
3793 	fnvlist_free(vdev_errlist);
3794 
3795 	spa_close(spa, FTAG);
3796 	return (total_errors > 0 ? EINVAL : 0);
3797 }
3798 
3799 /*
3800  * fsname is name of dataset to rollback (to most recent snapshot)
3801  *
3802  * innvl may contain name of expected target snapshot
3803  *
3804  * outnvl: "target" -> name of most recent snapshot
3805  * }
3806  */
3807 /* ARGSUSED */
3808 static int
3809 zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3810 {
3811 	zfsvfs_t *zfsvfs;
3812 	char *target = NULL;
3813 	int error;
3814 
3815 	(void) nvlist_lookup_string(innvl, "target", &target);
3816 	if (target != NULL) {
3817 		const char *cp = strchr(target, '@');
3818 
3819 		/*
3820 		 * The snap name must contain an @, and the part after it must
3821 		 * contain only valid characters.
3822 		 */
3823 		if (cp == NULL ||
3824 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3825 			return (SET_ERROR(EINVAL));
3826 	}
3827 
3828 	if (getzfsvfs(fsname, &zfsvfs) == 0) {
3829 		dsl_dataset_t *ds;
3830 
3831 		ds = dmu_objset_ds(zfsvfs->z_os);
3832 		error = zfs_suspend_fs(zfsvfs);
3833 		if (error == 0) {
3834 			int resume_err;
3835 
3836 			error = dsl_dataset_rollback(fsname, target, zfsvfs,
3837 			    outnvl);
3838 			resume_err = zfs_resume_fs(zfsvfs, ds);
3839 			error = error ? error : resume_err;
3840 		}
3841 		VFS_RELE(zfsvfs->z_vfs);
3842 	} else {
3843 		error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
3844 	}
3845 	return (error);
3846 }
3847 
3848 static int
3849 recursive_unmount(const char *fsname, void *arg)
3850 {
3851 	const char *snapname = arg;
3852 	char fullname[ZFS_MAX_DATASET_NAME_LEN];
3853 
3854 	(void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
3855 	zfs_unmount_snap(fullname);
3856 
3857 	return (0);
3858 }
3859 
3860 /*
3861  * inputs:
3862  * zc_name	old name of dataset
3863  * zc_value	new name of dataset
3864  * zc_cookie	recursive flag (only valid for snapshots)
3865  *
3866  * outputs:	none
3867  */
3868 static int
3869 zfs_ioc_rename(zfs_cmd_t *zc)
3870 {
3871 	objset_t *os;
3872 	dmu_objset_type_t ost;
3873 	boolean_t recursive = zc->zc_cookie & 1;
3874 	char *at;
3875 	int err;
3876 
3877 	/* "zfs rename" from and to ...%recv datasets should both fail */
3878 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
3879 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3880 	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
3881 	    dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3882 	    strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
3883 		return (SET_ERROR(EINVAL));
3884 
3885 	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
3886 	if (err != 0)
3887 		return (err);
3888 	ost = dmu_objset_type(os);
3889 	dmu_objset_rele(os, FTAG);
3890 
3891 	at = strchr(zc->zc_name, '@');
3892 	if (at != NULL) {
3893 		/* snaps must be in same fs */
3894 		int error;
3895 
3896 		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
3897 			return (SET_ERROR(EXDEV));
3898 		*at = '\0';
3899 		if (ost == DMU_OST_ZFS) {
3900 			error = dmu_objset_find(zc->zc_name,
3901 			    recursive_unmount, at + 1,
3902 			    recursive ? DS_FIND_CHILDREN : 0);
3903 			if (error != 0) {
3904 				*at = '@';
3905 				return (error);
3906 			}
3907 		}
3908 		error = dsl_dataset_rename_snapshot(zc->zc_name,
3909 		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
3910 		*at = '@';
3911 
3912 		return (error);
3913 	} else {
3914 		if (ost == DMU_OST_ZVOL)
3915 			(void) zvol_remove_minor(zc->zc_name);
3916 		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
3917 	}
3918 }
3919 
3920 static int
3921 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3922 {
3923 	const char *propname = nvpair_name(pair);
3924 	boolean_t issnap = (strchr(dsname, '@') != NULL);
3925 	zfs_prop_t prop = zfs_name_to_prop(propname);
3926 	uint64_t intval;
3927 	int err;
3928 
3929 	if (prop == ZPROP_INVAL) {
3930 		if (zfs_prop_user(propname)) {
3931 			if (err = zfs_secpolicy_write_perms(dsname,
3932 			    ZFS_DELEG_PERM_USERPROP, cr))
3933 				return (err);
3934 			return (0);
3935 		}
3936 
3937 		if (!issnap && zfs_prop_userquota(propname)) {
3938 			const char *perm = NULL;
3939 			const char *uq_prefix =
3940 			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3941 			const char *gq_prefix =
3942 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3943 
3944 			if (strncmp(propname, uq_prefix,
3945 			    strlen(uq_prefix)) == 0) {
3946 				perm = ZFS_DELEG_PERM_USERQUOTA;
3947 			} else if (strncmp(propname, gq_prefix,
3948 			    strlen(gq_prefix)) == 0) {
3949 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
3950 			} else {
3951 				/* USERUSED and GROUPUSED are read-only */
3952 				return (SET_ERROR(EINVAL));
3953 			}
3954 
3955 			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3956 				return (err);
3957 			return (0);
3958 		}
3959 
3960 		return (SET_ERROR(EINVAL));
3961 	}
3962 
3963 	if (issnap)
3964 		return (SET_ERROR(EINVAL));
3965 
3966 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3967 		/*
3968 		 * dsl_prop_get_all_impl() returns properties in this
3969 		 * format.
3970 		 */
3971 		nvlist_t *attrs;
3972 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3973 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3974 		    &pair) == 0);
3975 	}
3976 
3977 	/*
3978 	 * Check that this value is valid for this pool version
3979 	 */
3980 	switch (prop) {
3981 	case ZFS_PROP_COMPRESSION:
3982 		/*
3983 		 * If the user specified gzip compression, make sure
3984 		 * the SPA supports it. We ignore any errors here since
3985 		 * we'll catch them later.
3986 		 */
3987 		if (nvpair_value_uint64(pair, &intval) == 0) {
3988 			if (intval >= ZIO_COMPRESS_GZIP_1 &&
3989 			    intval <= ZIO_COMPRESS_GZIP_9 &&
3990 			    zfs_earlier_version(dsname,
3991 			    SPA_VERSION_GZIP_COMPRESSION)) {
3992 				return (SET_ERROR(ENOTSUP));
3993 			}
3994 
3995 			if (intval == ZIO_COMPRESS_ZLE &&
3996 			    zfs_earlier_version(dsname,
3997 			    SPA_VERSION_ZLE_COMPRESSION))
3998 				return (SET_ERROR(ENOTSUP));
3999 
4000 			if (intval == ZIO_COMPRESS_LZ4) {
4001 				spa_t *spa;
4002 
4003 				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4004 					return (err);
4005 
4006 				if (!spa_feature_is_enabled(spa,
4007 				    SPA_FEATURE_LZ4_COMPRESS)) {
4008 					spa_close(spa, FTAG);
4009 					return (SET_ERROR(ENOTSUP));
4010 				}
4011 				spa_close(spa, FTAG);
4012 			}
4013 
4014 			/*
4015 			 * If this is a bootable dataset then
4016 			 * verify that the compression algorithm
4017 			 * is supported for booting. We must return
4018 			 * something other than ENOTSUP since it
4019 			 * implies a downrev pool version.
4020 			 */
4021 			if (zfs_is_bootfs(dsname) &&
4022 			    !BOOTFS_COMPRESS_VALID(intval)) {
4023 				return (SET_ERROR(ERANGE));
4024 			}
4025 		}
4026 		break;
4027 
4028 	case ZFS_PROP_COPIES:
4029 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
4030 			return (SET_ERROR(ENOTSUP));
4031 		break;
4032 
4033 	case ZFS_PROP_RECORDSIZE:
4034 		/* Record sizes above 128k need the feature to be enabled */
4035 		if (nvpair_value_uint64(pair, &intval) == 0 &&
4036 		    intval > SPA_OLD_MAXBLOCKSIZE) {
4037 			spa_t *spa;
4038 
4039 			/*
4040 			 * We don't allow setting the property above 1MB,
4041 			 * unless the tunable has been changed.
4042 			 */
4043 			if (intval > zfs_max_recordsize ||
4044 			    intval > SPA_MAXBLOCKSIZE)
4045 				return (SET_ERROR(ERANGE));
4046 
4047 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4048 				return (err);
4049 
4050 			if (!spa_feature_is_enabled(spa,
4051 			    SPA_FEATURE_LARGE_BLOCKS)) {
4052 				spa_close(spa, FTAG);
4053 				return (SET_ERROR(ENOTSUP));
4054 			}
4055 			spa_close(spa, FTAG);
4056 		}
4057 		break;
4058 
4059 	case ZFS_PROP_DNODESIZE:
4060 		/* Dnode sizes above 512 need the feature to be enabled */
4061 		if (nvpair_value_uint64(pair, &intval) == 0 &&
4062 		    intval != ZFS_DNSIZE_LEGACY) {
4063 			spa_t *spa;
4064 
4065 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4066 				return (err);
4067 
4068 			if (!spa_feature_is_enabled(spa,
4069 			    SPA_FEATURE_LARGE_DNODE)) {
4070 				spa_close(spa, FTAG);
4071 				return (SET_ERROR(ENOTSUP));
4072 			}
4073 			spa_close(spa, FTAG);
4074 		}
4075 		break;
4076 
4077 	case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
4078 		/*
4079 		 * This property could require the allocation classes
4080 		 * feature to be active for setting, however we allow
4081 		 * it so that tests of settable properties succeed.
4082 		 * The CLI will issue a warning in this case.
4083 		 */
4084 		break;
4085 
4086 	case ZFS_PROP_SHARESMB:
4087 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4088 			return (SET_ERROR(ENOTSUP));
4089 		break;
4090 
4091 	case ZFS_PROP_ACLINHERIT:
4092 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4093 		    nvpair_value_uint64(pair, &intval) == 0) {
4094 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
4095 			    zfs_earlier_version(dsname,
4096 			    SPA_VERSION_PASSTHROUGH_X))
4097 				return (SET_ERROR(ENOTSUP));
4098 		}
4099 		break;
4100 
4101 	case ZFS_PROP_CHECKSUM:
4102 	case ZFS_PROP_DEDUP:
4103 	{
4104 		spa_feature_t feature;
4105 		spa_t *spa;
4106 
4107 		/* dedup feature version checks */
4108 		if (prop == ZFS_PROP_DEDUP &&
4109 		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4110 			return (SET_ERROR(ENOTSUP));
4111 
4112 		if (nvpair_value_uint64(pair, &intval) != 0)
4113 			return (SET_ERROR(EINVAL));
4114 
4115 		/* check prop value is enabled in features */
4116 		feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
4117 		if (feature == SPA_FEATURE_NONE)
4118 			break;
4119 
4120 		if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4121 			return (err);
4122 
4123 		if (!spa_feature_is_enabled(spa, feature)) {
4124 			spa_close(spa, FTAG);
4125 			return (SET_ERROR(ENOTSUP));
4126 		}
4127 		spa_close(spa, FTAG);
4128 		break;
4129 	}
4130 	}
4131 
4132 	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4133 }
4134 
4135 /*
4136  * Checks for a race condition to make sure we don't increment a feature flag
4137  * multiple times.
4138  */
4139 static int
4140 zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
4141 {
4142 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4143 	spa_feature_t *featurep = arg;
4144 
4145 	if (!spa_feature_is_active(spa, *featurep))
4146 		return (0);
4147 	else
4148 		return (SET_ERROR(EBUSY));
4149 }
4150 
4151 /*
4152  * The callback invoked on feature activation in the sync task caused by
4153  * zfs_prop_activate_feature.
4154  */
4155 static void
4156 zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
4157 {
4158 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4159 	spa_feature_t *featurep = arg;
4160 
4161 	spa_feature_incr(spa, *featurep, tx);
4162 }
4163 
4164 /*
4165  * Activates a feature on a pool in response to a property setting. This
4166  * creates a new sync task which modifies the pool to reflect the feature
4167  * as being active.
4168  */
4169 static int
4170 zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature)
4171 {
4172 	int err;
4173 
4174 	/* EBUSY here indicates that the feature is already active */
4175 	err = dsl_sync_task(spa_name(spa),
4176 	    zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
4177 	    &feature, 2, ZFS_SPACE_CHECK_RESERVED);
4178 
4179 	if (err != 0 && err != EBUSY)
4180 		return (err);
4181 	else
4182 		return (0);
4183 }
4184 
4185 /*
4186  * Removes properties from the given props list that fail permission checks
4187  * needed to clear them and to restore them in case of a receive error. For each
4188  * property, make sure we have both set and inherit permissions.
4189  *
4190  * Returns the first error encountered if any permission checks fail. If the
4191  * caller provides a non-NULL errlist, it also gives the complete list of names
4192  * of all the properties that failed a permission check along with the
4193  * corresponding error numbers. The caller is responsible for freeing the
4194  * returned errlist.
4195  *
4196  * If every property checks out successfully, zero is returned and the list
4197  * pointed at by errlist is NULL.
4198  */
4199 static int
4200 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
4201 {
4202 	zfs_cmd_t *zc;
4203 	nvpair_t *pair, *next_pair;
4204 	nvlist_t *errors;
4205 	int err, rv = 0;
4206 
4207 	if (props == NULL)
4208 		return (0);
4209 
4210 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4211 
4212 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4213 	(void) strcpy(zc->zc_name, dataset);
4214 	pair = nvlist_next_nvpair(props, NULL);
4215 	while (pair != NULL) {
4216 		next_pair = nvlist_next_nvpair(props, pair);
4217 
4218 		(void) strcpy(zc->zc_value, nvpair_name(pair));
4219 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4220 		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4221 			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4222 			VERIFY(nvlist_add_int32(errors,
4223 			    zc->zc_value, err) == 0);
4224 		}
4225 		pair = next_pair;
4226 	}
4227 	kmem_free(zc, sizeof (zfs_cmd_t));
4228 
4229 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4230 		nvlist_free(errors);
4231 		errors = NULL;
4232 	} else {
4233 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
4234 	}
4235 
4236 	if (errlist == NULL)
4237 		nvlist_free(errors);
4238 	else
4239 		*errlist = errors;
4240 
4241 	return (rv);
4242 }
4243 
4244 static boolean_t
4245 propval_equals(nvpair_t *p1, nvpair_t *p2)
4246 {
4247 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4248 		/* dsl_prop_get_all_impl() format */
4249 		nvlist_t *attrs;
4250 		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4251 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4252 		    &p1) == 0);
4253 	}
4254 
4255 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4256 		nvlist_t *attrs;
4257 		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4258 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4259 		    &p2) == 0);
4260 	}
4261 
4262 	if (nvpair_type(p1) != nvpair_type(p2))
4263 		return (B_FALSE);
4264 
4265 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
4266 		char *valstr1, *valstr2;
4267 
4268 		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4269 		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4270 		return (strcmp(valstr1, valstr2) == 0);
4271 	} else {
4272 		uint64_t intval1, intval2;
4273 
4274 		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4275 		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4276 		return (intval1 == intval2);
4277 	}
4278 }
4279 
4280 /*
4281  * Remove properties from props if they are not going to change (as determined
4282  * by comparison with origprops). Remove them from origprops as well, since we
4283  * do not need to clear or restore properties that won't change.
4284  */
4285 static void
4286 props_reduce(nvlist_t *props, nvlist_t *origprops)
4287 {
4288 	nvpair_t *pair, *next_pair;
4289 
4290 	if (origprops == NULL)
4291 		return; /* all props need to be received */
4292 
4293 	pair = nvlist_next_nvpair(props, NULL);
4294 	while (pair != NULL) {
4295 		const char *propname = nvpair_name(pair);
4296 		nvpair_t *match;
4297 
4298 		next_pair = nvlist_next_nvpair(props, pair);
4299 
4300 		if ((nvlist_lookup_nvpair(origprops, propname,
4301 		    &match) != 0) || !propval_equals(pair, match))
4302 			goto next; /* need to set received value */
4303 
4304 		/* don't clear the existing received value */
4305 		(void) nvlist_remove_nvpair(origprops, match);
4306 		/* don't bother receiving the property */
4307 		(void) nvlist_remove_nvpair(props, pair);
4308 next:
4309 		pair = next_pair;
4310 	}
4311 }
4312 
4313 /*
4314  * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4315  * For example, refquota cannot be set until after the receipt of a dataset,
4316  * because in replication streams, an older/earlier snapshot may exceed the
4317  * refquota.  We want to receive the older/earlier snapshot, but setting
4318  * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4319  * the older/earlier snapshot from being received (with EDQUOT).
4320  *
4321  * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4322  *
4323  * libzfs will need to be judicious handling errors encountered by props
4324  * extracted by this function.
4325  */
4326 static nvlist_t *
4327 extract_delay_props(nvlist_t *props)
4328 {
4329 	nvlist_t *delayprops;
4330 	nvpair_t *nvp, *tmp;
4331 	static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
4332 	int i;
4333 
4334 	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4335 
4336 	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4337 	    nvp = nvlist_next_nvpair(props, nvp)) {
4338 		/*
4339 		 * strcmp() is safe because zfs_prop_to_name() always returns
4340 		 * a bounded string.
4341 		 */
4342 		for (i = 0; delayable[i] != 0; i++) {
4343 			if (strcmp(zfs_prop_to_name(delayable[i]),
4344 			    nvpair_name(nvp)) == 0) {
4345 				break;
4346 			}
4347 		}
4348 		if (delayable[i] != 0) {
4349 			tmp = nvlist_prev_nvpair(props, nvp);
4350 			VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4351 			VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4352 			nvp = tmp;
4353 		}
4354 	}
4355 
4356 	if (nvlist_empty(delayprops)) {
4357 		nvlist_free(delayprops);
4358 		delayprops = NULL;
4359 	}
4360 	return (delayprops);
4361 }
4362 
4363 #ifdef	DEBUG
4364 static boolean_t zfs_ioc_recv_inject_err;
4365 #endif
4366 
4367 /*
4368  * inputs:
4369  * zc_name		name of containing filesystem
4370  * zc_nvlist_src{_size}	nvlist of properties to apply
4371  * zc_value		name of snapshot to create
4372  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
4373  * zc_cookie		file descriptor to recv from
4374  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
4375  * zc_guid		force flag
4376  * zc_cleanup_fd	cleanup-on-exit file descriptor
4377  * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
4378  * zc_resumable		if data is incomplete assume sender will resume
4379  *
4380  * outputs:
4381  * zc_cookie		number of bytes read
4382  * zc_nvlist_dst{_size} error for each unapplied received property
4383  * zc_obj		zprop_errflags_t
4384  * zc_action_handle	handle for this guid/ds mapping
4385  */
4386 static int
4387 zfs_ioc_recv(zfs_cmd_t *zc)
4388 {
4389 	file_t *fp;
4390 	dmu_recv_cookie_t drc;
4391 	boolean_t force = (boolean_t)zc->zc_guid;
4392 	int fd;
4393 	int error = 0;
4394 	int props_error = 0;
4395 	nvlist_t *errors;
4396 	offset_t off;
4397 	nvlist_t *props = NULL; /* sent properties */
4398 	nvlist_t *origprops = NULL; /* existing properties */
4399 	nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
4400 	char *origin = NULL;
4401 	char *tosnap;
4402 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
4403 	boolean_t first_recvd_props = B_FALSE;
4404 
4405 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4406 	    strchr(zc->zc_value, '@') == NULL ||
4407 	    strchr(zc->zc_value, '%'))
4408 		return (SET_ERROR(EINVAL));
4409 
4410 	(void) strcpy(tofs, zc->zc_value);
4411 	tosnap = strchr(tofs, '@');
4412 	*tosnap++ = '\0';
4413 
4414 	if (zc->zc_nvlist_src != 0 &&
4415 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
4416 	    zc->zc_iflags, &props)) != 0)
4417 		return (error);
4418 
4419 	fd = zc->zc_cookie;
4420 	fp = getf(fd);
4421 	if (fp == NULL) {
4422 		nvlist_free(props);
4423 		return (SET_ERROR(EBADF));
4424 	}
4425 
4426 	errors = fnvlist_alloc();
4427 
4428 	if (zc->zc_string[0])
4429 		origin = zc->zc_string;
4430 
4431 	error = dmu_recv_begin(tofs, tosnap,
4432 	    &zc->zc_begin_record, force, zc->zc_resumable, origin, &drc);
4433 	if (error != 0)
4434 		goto out;
4435 
4436 	/*
4437 	 * Set properties before we receive the stream so that they are applied
4438 	 * to the new data. Note that we must call dmu_recv_stream() if
4439 	 * dmu_recv_begin() succeeds.
4440 	 */
4441 	if (props != NULL && !drc.drc_newfs) {
4442 		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4443 		    SPA_VERSION_RECVD_PROPS &&
4444 		    !dsl_prop_get_hasrecvd(tofs))
4445 			first_recvd_props = B_TRUE;
4446 
4447 		/*
4448 		 * If new received properties are supplied, they are to
4449 		 * completely replace the existing received properties, so stash
4450 		 * away the existing ones.
4451 		 */
4452 		if (dsl_prop_get_received(tofs, &origprops) == 0) {
4453 			nvlist_t *errlist = NULL;
4454 			/*
4455 			 * Don't bother writing a property if its value won't
4456 			 * change (and avoid the unnecessary security checks).
4457 			 *
4458 			 * The first receive after SPA_VERSION_RECVD_PROPS is a
4459 			 * special case where we blow away all local properties
4460 			 * regardless.
4461 			 */
4462 			if (!first_recvd_props)
4463 				props_reduce(props, origprops);
4464 			if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
4465 				(void) nvlist_merge(errors, errlist, 0);
4466 			nvlist_free(errlist);
4467 
4468 			if (clear_received_props(tofs, origprops,
4469 			    first_recvd_props ? NULL : props) != 0)
4470 				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4471 		} else {
4472 			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4473 		}
4474 	}
4475 
4476 	if (props != NULL) {
4477 		props_error = dsl_prop_set_hasrecvd(tofs);
4478 
4479 		if (props_error == 0) {
4480 			delayprops = extract_delay_props(props);
4481 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4482 			    props, errors);
4483 		}
4484 	}
4485 
4486 	off = fp->f_offset;
4487 	error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
4488 	    &zc->zc_action_handle);
4489 
4490 	if (error == 0) {
4491 		zfsvfs_t *zfsvfs = NULL;
4492 
4493 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
4494 			/* online recv */
4495 			dsl_dataset_t *ds;
4496 			int end_err;
4497 
4498 			ds = dmu_objset_ds(zfsvfs->z_os);
4499 			error = zfs_suspend_fs(zfsvfs);
4500 			/*
4501 			 * If the suspend fails, then the recv_end will
4502 			 * likely also fail, and clean up after itself.
4503 			 */
4504 			end_err = dmu_recv_end(&drc, zfsvfs);
4505 			if (error == 0)
4506 				error = zfs_resume_fs(zfsvfs, ds);
4507 			error = error ? error : end_err;
4508 			VFS_RELE(zfsvfs->z_vfs);
4509 		} else {
4510 			error = dmu_recv_end(&drc, NULL);
4511 		}
4512 
4513 		/* Set delayed properties now, after we're done receiving. */
4514 		if (delayprops != NULL && error == 0) {
4515 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4516 			    delayprops, errors);
4517 		}
4518 	}
4519 
4520 	if (delayprops != NULL) {
4521 		/*
4522 		 * Merge delayed props back in with initial props, in case
4523 		 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
4524 		 * we have to make sure clear_received_props() includes
4525 		 * the delayed properties).
4526 		 *
4527 		 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
4528 		 * using ASSERT() will be just like a VERIFY.
4529 		 */
4530 		ASSERT(nvlist_merge(props, delayprops, 0) == 0);
4531 		nvlist_free(delayprops);
4532 	}
4533 
4534 	/*
4535 	 * Now that all props, initial and delayed, are set, report the prop
4536 	 * errors to the caller.
4537 	 */
4538 	if (zc->zc_nvlist_dst_size != 0 &&
4539 	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
4540 	    put_nvlist(zc, errors) != 0)) {
4541 		/*
4542 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
4543 		 * size or supplied an invalid address.
4544 		 */
4545 		props_error = SET_ERROR(EINVAL);
4546 	}
4547 
4548 	zc->zc_cookie = off - fp->f_offset;
4549 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4550 		fp->f_offset = off;
4551 
4552 #ifdef	DEBUG
4553 	if (zfs_ioc_recv_inject_err) {
4554 		zfs_ioc_recv_inject_err = B_FALSE;
4555 		error = 1;
4556 	}
4557 #endif
4558 	/*
4559 	 * On error, restore the original props.
4560 	 */
4561 	if (error != 0 && props != NULL && !drc.drc_newfs) {
4562 		if (clear_received_props(tofs, props, NULL) != 0) {
4563 			/*
4564 			 * We failed to clear the received properties.
4565 			 * Since we may have left a $recvd value on the
4566 			 * system, we can't clear the $hasrecvd flag.
4567 			 */
4568 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4569 		} else if (first_recvd_props) {
4570 			dsl_prop_unset_hasrecvd(tofs);
4571 		}
4572 
4573 		if (origprops == NULL && !drc.drc_newfs) {
4574 			/* We failed to stash the original properties. */
4575 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4576 		}
4577 
4578 		/*
4579 		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
4580 		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4581 		 * explictly if we're restoring local properties cleared in the
4582 		 * first new-style receive.
4583 		 */
4584 		if (origprops != NULL &&
4585 		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4586 		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4587 		    origprops, NULL) != 0) {
4588 			/*
4589 			 * We stashed the original properties but failed to
4590 			 * restore them.
4591 			 */
4592 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4593 		}
4594 	}
4595 out:
4596 	nvlist_free(props);
4597 	nvlist_free(origprops);
4598 	nvlist_free(errors);
4599 	releasef(fd);
4600 
4601 	if (error == 0)
4602 		error = props_error;
4603 
4604 	return (error);
4605 }
4606 
4607 /*
4608  * inputs:
4609  * zc_name	name of snapshot to send
4610  * zc_cookie	file descriptor to send stream to
4611  * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
4612  * zc_sendobj	objsetid of snapshot to send
4613  * zc_fromobj	objsetid of incremental fromsnap (may be zero)
4614  * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
4615  *		output size in zc_objset_type.
4616  * zc_flags	lzc_send_flags
4617  *
4618  * outputs:
4619  * zc_objset_type	estimated size, if zc_guid is set
4620  */
4621 static int
4622 zfs_ioc_send(zfs_cmd_t *zc)
4623 {
4624 	int error;
4625 	offset_t off;
4626 	boolean_t estimate = (zc->zc_guid != 0);
4627 	boolean_t embedok = (zc->zc_flags & 0x1);
4628 	boolean_t large_block_ok = (zc->zc_flags & 0x2);
4629 	boolean_t compressok = (zc->zc_flags & 0x4);
4630 
4631 	if (zc->zc_obj != 0) {
4632 		dsl_pool_t *dp;
4633 		dsl_dataset_t *tosnap;
4634 
4635 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4636 		if (error != 0)
4637 			return (error);
4638 
4639 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4640 		if (error != 0) {
4641 			dsl_pool_rele(dp, FTAG);
4642 			return (error);
4643 		}
4644 
4645 		if (dsl_dir_is_clone(tosnap->ds_dir))
4646 			zc->zc_fromobj =
4647 			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
4648 		dsl_dataset_rele(tosnap, FTAG);
4649 		dsl_pool_rele(dp, FTAG);
4650 	}
4651 
4652 	if (estimate) {
4653 		dsl_pool_t *dp;
4654 		dsl_dataset_t *tosnap;
4655 		dsl_dataset_t *fromsnap = NULL;
4656 
4657 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4658 		if (error != 0)
4659 			return (error);
4660 
4661 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4662 		if (error != 0) {
4663 			dsl_pool_rele(dp, FTAG);
4664 			return (error);
4665 		}
4666 
4667 		if (zc->zc_fromobj != 0) {
4668 			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
4669 			    FTAG, &fromsnap);
4670 			if (error != 0) {
4671 				dsl_dataset_rele(tosnap, FTAG);
4672 				dsl_pool_rele(dp, FTAG);
4673 				return (error);
4674 			}
4675 		}
4676 
4677 		error = dmu_send_estimate(tosnap, fromsnap, compressok,
4678 		    &zc->zc_objset_type);
4679 
4680 		if (fromsnap != NULL)
4681 			dsl_dataset_rele(fromsnap, FTAG);
4682 		dsl_dataset_rele(tosnap, FTAG);
4683 		dsl_pool_rele(dp, FTAG);
4684 	} else {
4685 		file_t *fp = getf(zc->zc_cookie);
4686 		if (fp == NULL)
4687 			return (SET_ERROR(EBADF));
4688 
4689 		off = fp->f_offset;
4690 		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
4691 		    zc->zc_fromobj, embedok, large_block_ok, compressok,
4692 		    zc->zc_cookie, fp->f_vnode, &off);
4693 
4694 		if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4695 			fp->f_offset = off;
4696 		releasef(zc->zc_cookie);
4697 	}
4698 	return (error);
4699 }
4700 
4701 /*
4702  * inputs:
4703  * zc_name	name of snapshot on which to report progress
4704  * zc_cookie	file descriptor of send stream
4705  *
4706  * outputs:
4707  * zc_cookie	number of bytes written in send stream thus far
4708  */
4709 static int
4710 zfs_ioc_send_progress(zfs_cmd_t *zc)
4711 {
4712 	dsl_pool_t *dp;
4713 	dsl_dataset_t *ds;
4714 	dmu_sendarg_t *dsp = NULL;
4715 	int error;
4716 
4717 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4718 	if (error != 0)
4719 		return (error);
4720 
4721 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
4722 	if (error != 0) {
4723 		dsl_pool_rele(dp, FTAG);
4724 		return (error);
4725 	}
4726 
4727 	mutex_enter(&ds->ds_sendstream_lock);
4728 
4729 	/*
4730 	 * Iterate over all the send streams currently active on this dataset.
4731 	 * If there's one which matches the specified file descriptor _and_ the
4732 	 * stream was started by the current process, return the progress of
4733 	 * that stream.
4734 	 */
4735 	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
4736 	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
4737 		if (dsp->dsa_outfd == zc->zc_cookie &&
4738 		    dsp->dsa_proc == curproc)
4739 			break;
4740 	}
4741 
4742 	if (dsp != NULL)
4743 		zc->zc_cookie = *(dsp->dsa_off);
4744 	else
4745 		error = SET_ERROR(ENOENT);
4746 
4747 	mutex_exit(&ds->ds_sendstream_lock);
4748 	dsl_dataset_rele(ds, FTAG);
4749 	dsl_pool_rele(dp, FTAG);
4750 	return (error);
4751 }
4752 
4753 static int
4754 zfs_ioc_inject_fault(zfs_cmd_t *zc)
4755 {
4756 	int id, error;
4757 
4758 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
4759 	    &zc->zc_inject_record);
4760 
4761 	if (error == 0)
4762 		zc->zc_guid = (uint64_t)id;
4763 
4764 	return (error);
4765 }
4766 
4767 static int
4768 zfs_ioc_clear_fault(zfs_cmd_t *zc)
4769 {
4770 	return (zio_clear_fault((int)zc->zc_guid));
4771 }
4772 
4773 static int
4774 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
4775 {
4776 	int id = (int)zc->zc_guid;
4777 	int error;
4778 
4779 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
4780 	    &zc->zc_inject_record);
4781 
4782 	zc->zc_guid = id;
4783 
4784 	return (error);
4785 }
4786 
4787 static int
4788 zfs_ioc_error_log(zfs_cmd_t *zc)
4789 {
4790 	spa_t *spa;
4791 	int error;
4792 	size_t count = (size_t)zc->zc_nvlist_dst_size;
4793 
4794 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4795 		return (error);
4796 
4797 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
4798 	    &count);
4799 	if (error == 0)
4800 		zc->zc_nvlist_dst_size = count;
4801 	else
4802 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
4803 
4804 	spa_close(spa, FTAG);
4805 
4806 	return (error);
4807 }
4808 
4809 static int
4810 zfs_ioc_clear(zfs_cmd_t *zc)
4811 {
4812 	spa_t *spa;
4813 	vdev_t *vd;
4814 	int error;
4815 
4816 	/*
4817 	 * On zpool clear we also fix up missing slogs
4818 	 */
4819 	mutex_enter(&spa_namespace_lock);
4820 	spa = spa_lookup(zc->zc_name);
4821 	if (spa == NULL) {
4822 		mutex_exit(&spa_namespace_lock);
4823 		return (SET_ERROR(EIO));
4824 	}
4825 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4826 		/* we need to let spa_open/spa_load clear the chains */
4827 		spa_set_log_state(spa, SPA_LOG_CLEAR);
4828 	}
4829 	spa->spa_last_open_failed = 0;
4830 	mutex_exit(&spa_namespace_lock);
4831 
4832 	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4833 		error = spa_open(zc->zc_name, &spa, FTAG);
4834 	} else {
4835 		nvlist_t *policy;
4836 		nvlist_t *config = NULL;
4837 
4838 		if (zc->zc_nvlist_src == 0)
4839 			return (SET_ERROR(EINVAL));
4840 
4841 		if ((error = get_nvlist(zc->zc_nvlist_src,
4842 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4843 			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4844 			    policy, &config);
4845 			if (config != NULL) {
4846 				int err;
4847 
4848 				if ((err = put_nvlist(zc, config)) != 0)
4849 					error = err;
4850 				nvlist_free(config);
4851 			}
4852 			nvlist_free(policy);
4853 		}
4854 	}
4855 
4856 	if (error != 0)
4857 		return (error);
4858 
4859 	/*
4860 	 * If multihost is enabled, resuming I/O is unsafe as another
4861 	 * host may have imported the pool.
4862 	 */
4863 	if (spa_multihost(spa) && spa_suspended(spa))
4864 		return (SET_ERROR(EINVAL));
4865 
4866 	spa_vdev_state_enter(spa, SCL_NONE);
4867 
4868 	if (zc->zc_guid == 0) {
4869 		vd = NULL;
4870 	} else {
4871 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4872 		if (vd == NULL) {
4873 			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
4874 			spa_close(spa, FTAG);
4875 			return (SET_ERROR(ENODEV));
4876 		}
4877 	}
4878 
4879 	vdev_clear(spa, vd);
4880 
4881 	(void) spa_vdev_state_exit(spa, NULL, 0);
4882 
4883 	/*
4884 	 * Resume any suspended I/Os.
4885 	 */
4886 	if (zio_resume(spa) != 0)
4887 		error = SET_ERROR(EIO);
4888 
4889 	spa_close(spa, FTAG);
4890 
4891 	return (error);
4892 }
4893 
4894 static int
4895 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
4896 {
4897 	spa_t *spa;
4898 	int error;
4899 
4900 	error = spa_open(zc->zc_name, &spa, FTAG);
4901 	if (error != 0)
4902 		return (error);
4903 
4904 	spa_vdev_state_enter(spa, SCL_NONE);
4905 
4906 	/*
4907 	 * If a resilver is already in progress then set the
4908 	 * spa_scrub_reopen flag to B_TRUE so that we don't restart
4909 	 * the scan as a side effect of the reopen. Otherwise, let
4910 	 * vdev_open() decided if a resilver is required.
4911 	 */
4912 	spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
4913 	vdev_reopen(spa->spa_root_vdev);
4914 	spa->spa_scrub_reopen = B_FALSE;
4915 
4916 	(void) spa_vdev_state_exit(spa, NULL, 0);
4917 	spa_close(spa, FTAG);
4918 	return (0);
4919 }
4920 /*
4921  * inputs:
4922  * zc_name	name of filesystem
4923  *
4924  * outputs:
4925  * zc_string	name of conflicting snapshot, if there is one
4926  */
4927 static int
4928 zfs_ioc_promote(zfs_cmd_t *zc)
4929 {
4930 	dsl_pool_t *dp;
4931 	dsl_dataset_t *ds, *ods;
4932 	char origin[ZFS_MAX_DATASET_NAME_LEN];
4933 	char *cp;
4934 	int error;
4935 
4936 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4937 	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
4938 	    strchr(zc->zc_name, '%'))
4939 		return (SET_ERROR(EINVAL));
4940 
4941 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4942 	if (error != 0)
4943 		return (error);
4944 
4945 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
4946 	if (error != 0) {
4947 		dsl_pool_rele(dp, FTAG);
4948 		return (error);
4949 	}
4950 
4951 	if (!dsl_dir_is_clone(ds->ds_dir)) {
4952 		dsl_dataset_rele(ds, FTAG);
4953 		dsl_pool_rele(dp, FTAG);
4954 		return (SET_ERROR(EINVAL));
4955 	}
4956 
4957 	error = dsl_dataset_hold_obj(dp,
4958 	    dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
4959 	if (error != 0) {
4960 		dsl_dataset_rele(ds, FTAG);
4961 		dsl_pool_rele(dp, FTAG);
4962 		return (error);
4963 	}
4964 
4965 	dsl_dataset_name(ods, origin);
4966 	dsl_dataset_rele(ods, FTAG);
4967 	dsl_dataset_rele(ds, FTAG);
4968 	dsl_pool_rele(dp, FTAG);
4969 
4970 	/*
4971 	 * We don't need to unmount *all* the origin fs's snapshots, but
4972 	 * it's easier.
4973 	 */
4974 	cp = strchr(origin, '@');
4975 	if (cp)
4976 		*cp = '\0';
4977 	(void) dmu_objset_find(origin,
4978 	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
4979 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
4980 }
4981 
4982 /*
4983  * Retrieve a single {user|group}{used|quota}@... property.
4984  *
4985  * inputs:
4986  * zc_name	name of filesystem
4987  * zc_objset_type zfs_userquota_prop_t
4988  * zc_value	domain name (eg. "S-1-234-567-89")
4989  * zc_guid	RID/UID/GID
4990  *
4991  * outputs:
4992  * zc_cookie	property value
4993  */
4994 static int
4995 zfs_ioc_userspace_one(zfs_cmd_t *zc)
4996 {
4997 	zfsvfs_t *zfsvfs;
4998 	int error;
4999 
5000 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
5001 		return (SET_ERROR(EINVAL));
5002 
5003 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5004 	if (error != 0)
5005 		return (error);
5006 
5007 	error = zfs_userspace_one(zfsvfs,
5008 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
5009 	zfsvfs_rele(zfsvfs, FTAG);
5010 
5011 	return (error);
5012 }
5013 
5014 /*
5015  * inputs:
5016  * zc_name		name of filesystem
5017  * zc_cookie		zap cursor
5018  * zc_objset_type	zfs_userquota_prop_t
5019  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
5020  *
5021  * outputs:
5022  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
5023  * zc_cookie	zap cursor
5024  */
5025 static int
5026 zfs_ioc_userspace_many(zfs_cmd_t *zc)
5027 {
5028 	zfsvfs_t *zfsvfs;
5029 	int bufsize = zc->zc_nvlist_dst_size;
5030 
5031 	if (bufsize <= 0)
5032 		return (SET_ERROR(ENOMEM));
5033 
5034 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5035 	if (error != 0)
5036 		return (error);
5037 
5038 	void *buf = kmem_alloc(bufsize, KM_SLEEP);
5039 
5040 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
5041 	    buf, &zc->zc_nvlist_dst_size);
5042 
5043 	if (error == 0) {
5044 		error = xcopyout(buf,
5045 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
5046 		    zc->zc_nvlist_dst_size);
5047 	}
5048 	kmem_free(buf, bufsize);
5049 	zfsvfs_rele(zfsvfs, FTAG);
5050 
5051 	return (error);
5052 }
5053 
5054 /*
5055  * inputs:
5056  * zc_name		name of filesystem
5057  *
5058  * outputs:
5059  * none
5060  */
5061 static int
5062 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
5063 {
5064 	objset_t *os;
5065 	int error = 0;
5066 	zfsvfs_t *zfsvfs;
5067 
5068 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
5069 		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
5070 			/*
5071 			 * If userused is not enabled, it may be because the
5072 			 * objset needs to be closed & reopened (to grow the
5073 			 * objset_phys_t).  Suspend/resume the fs will do that.
5074 			 */
5075 			dsl_dataset_t *ds, *newds;
5076 
5077 			ds = dmu_objset_ds(zfsvfs->z_os);
5078 			error = zfs_suspend_fs(zfsvfs);
5079 			if (error == 0) {
5080 				dmu_objset_refresh_ownership(ds, &newds,
5081 				    zfsvfs);
5082 				error = zfs_resume_fs(zfsvfs, newds);
5083 			}
5084 		}
5085 		if (error == 0)
5086 			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
5087 		VFS_RELE(zfsvfs->z_vfs);
5088 	} else {
5089 		/* XXX kind of reading contents without owning */
5090 		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5091 		if (error != 0)
5092 			return (error);
5093 
5094 		error = dmu_objset_userspace_upgrade(os);
5095 		dmu_objset_rele(os, FTAG);
5096 	}
5097 
5098 	return (error);
5099 }
5100 
5101 /*
5102  * We don't want to have a hard dependency
5103  * against some special symbols in sharefs
5104  * nfs, and smbsrv.  Determine them if needed when
5105  * the first file system is shared.
5106  * Neither sharefs, nfs or smbsrv are unloadable modules.
5107  */
5108 int (*znfsexport_fs)(void *arg);
5109 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
5110 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
5111 
5112 int zfs_nfsshare_inited;
5113 int zfs_smbshare_inited;
5114 
5115 ddi_modhandle_t nfs_mod;
5116 ddi_modhandle_t sharefs_mod;
5117 ddi_modhandle_t smbsrv_mod;
5118 kmutex_t zfs_share_lock;
5119 
5120 static int
5121 zfs_init_sharefs()
5122 {
5123 	int error;
5124 
5125 	ASSERT(MUTEX_HELD(&zfs_share_lock));
5126 	/* Both NFS and SMB shares also require sharetab support. */
5127 	if (sharefs_mod == NULL && ((sharefs_mod =
5128 	    ddi_modopen("fs/sharefs",
5129 	    KRTLD_MODE_FIRST, &error)) == NULL)) {
5130 		return (SET_ERROR(ENOSYS));
5131 	}
5132 	if (zshare_fs == NULL && ((zshare_fs =
5133 	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
5134 	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
5135 		return (SET_ERROR(ENOSYS));
5136 	}
5137 	return (0);
5138 }
5139 
5140 static int
5141 zfs_ioc_share(zfs_cmd_t *zc)
5142 {
5143 	int error;
5144 	int opcode;
5145 
5146 	switch (zc->zc_share.z_sharetype) {
5147 	case ZFS_SHARE_NFS:
5148 	case ZFS_UNSHARE_NFS:
5149 		if (zfs_nfsshare_inited == 0) {
5150 			mutex_enter(&zfs_share_lock);
5151 			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
5152 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5153 				mutex_exit(&zfs_share_lock);
5154 				return (SET_ERROR(ENOSYS));
5155 			}
5156 			if (znfsexport_fs == NULL &&
5157 			    ((znfsexport_fs = (int (*)(void *))
5158 			    ddi_modsym(nfs_mod,
5159 			    "nfs_export", &error)) == NULL)) {
5160 				mutex_exit(&zfs_share_lock);
5161 				return (SET_ERROR(ENOSYS));
5162 			}
5163 			error = zfs_init_sharefs();
5164 			if (error != 0) {
5165 				mutex_exit(&zfs_share_lock);
5166 				return (SET_ERROR(ENOSYS));
5167 			}
5168 			zfs_nfsshare_inited = 1;
5169 			mutex_exit(&zfs_share_lock);
5170 		}
5171 		break;
5172 	case ZFS_SHARE_SMB:
5173 	case ZFS_UNSHARE_SMB:
5174 		if (zfs_smbshare_inited == 0) {
5175 			mutex_enter(&zfs_share_lock);
5176 			if (smbsrv_mod == NULL && ((smbsrv_mod =
5177 			    ddi_modopen("drv/smbsrv",
5178 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5179 				mutex_exit(&zfs_share_lock);
5180 				return (SET_ERROR(ENOSYS));
5181 			}
5182 			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
5183 			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
5184 			    "smb_server_share", &error)) == NULL)) {
5185 				mutex_exit(&zfs_share_lock);
5186 				return (SET_ERROR(ENOSYS));
5187 			}
5188 			error = zfs_init_sharefs();
5189 			if (error != 0) {
5190 				mutex_exit(&zfs_share_lock);
5191 				return (SET_ERROR(ENOSYS));
5192 			}
5193 			zfs_smbshare_inited = 1;
5194 			mutex_exit(&zfs_share_lock);
5195 		}
5196 		break;
5197 	default:
5198 		return (SET_ERROR(EINVAL));
5199 	}
5200 
5201 	switch (zc->zc_share.z_sharetype) {
5202 	case ZFS_SHARE_NFS:
5203 	case ZFS_UNSHARE_NFS:
5204 		if (error =
5205 		    znfsexport_fs((void *)
5206 		    (uintptr_t)zc->zc_share.z_exportdata))
5207 			return (error);
5208 		break;
5209 	case ZFS_SHARE_SMB:
5210 	case ZFS_UNSHARE_SMB:
5211 		if (error = zsmbexport_fs((void *)
5212 		    (uintptr_t)zc->zc_share.z_exportdata,
5213 		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
5214 		    B_TRUE: B_FALSE)) {
5215 			return (error);
5216 		}
5217 		break;
5218 	}
5219 
5220 	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
5221 	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
5222 	    SHAREFS_ADD : SHAREFS_REMOVE;
5223 
5224 	/*
5225 	 * Add or remove share from sharetab
5226 	 */
5227 	error = zshare_fs(opcode,
5228 	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
5229 	    zc->zc_share.z_sharemax);
5230 
5231 	return (error);
5232 
5233 }
5234 
5235 ace_t full_access[] = {
5236 	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
5237 };
5238 
5239 /*
5240  * inputs:
5241  * zc_name		name of containing filesystem
5242  * zc_obj		object # beyond which we want next in-use object #
5243  *
5244  * outputs:
5245  * zc_obj		next in-use object #
5246  */
5247 static int
5248 zfs_ioc_next_obj(zfs_cmd_t *zc)
5249 {
5250 	objset_t *os = NULL;
5251 	int error;
5252 
5253 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5254 	if (error != 0)
5255 		return (error);
5256 
5257 	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
5258 	    dsl_dataset_phys(os->os_dsl_dataset)->ds_prev_snap_txg);
5259 
5260 	dmu_objset_rele(os, FTAG);
5261 	return (error);
5262 }
5263 
5264 /*
5265  * inputs:
5266  * zc_name		name of filesystem
5267  * zc_value		prefix name for snapshot
5268  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
5269  *
5270  * outputs:
5271  * zc_value		short name of new snapshot
5272  */
5273 static int
5274 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5275 {
5276 	char *snap_name;
5277 	char *hold_name;
5278 	int error;
5279 	minor_t minor;
5280 
5281 	error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5282 	if (error != 0)
5283 		return (error);
5284 
5285 	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5286 	    (u_longlong_t)ddi_get_lbolt64());
5287 	hold_name = kmem_asprintf("%%%s", zc->zc_value);
5288 
5289 	error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5290 	    hold_name);
5291 	if (error == 0)
5292 		(void) strcpy(zc->zc_value, snap_name);
5293 	strfree(snap_name);
5294 	strfree(hold_name);
5295 	zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5296 	return (error);
5297 }
5298 
5299 /*
5300  * inputs:
5301  * zc_name		name of "to" snapshot
5302  * zc_value		name of "from" snapshot
5303  * zc_cookie		file descriptor to write diff data on
5304  *
5305  * outputs:
5306  * dmu_diff_record_t's to the file descriptor
5307  */
5308 static int
5309 zfs_ioc_diff(zfs_cmd_t *zc)
5310 {
5311 	file_t *fp;
5312 	offset_t off;
5313 	int error;
5314 
5315 	fp = getf(zc->zc_cookie);
5316 	if (fp == NULL)
5317 		return (SET_ERROR(EBADF));
5318 
5319 	off = fp->f_offset;
5320 
5321 	error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5322 
5323 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5324 		fp->f_offset = off;
5325 	releasef(zc->zc_cookie);
5326 
5327 	return (error);
5328 }
5329 
5330 /*
5331  * Remove all ACL files in shares dir
5332  */
5333 static int
5334 zfs_smb_acl_purge(znode_t *dzp)
5335 {
5336 	zap_cursor_t	zc;
5337 	zap_attribute_t	zap;
5338 	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
5339 	int error;
5340 
5341 	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
5342 	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5343 	    zap_cursor_advance(&zc)) {
5344 		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5345 		    NULL, 0)) != 0)
5346 			break;
5347 	}
5348 	zap_cursor_fini(&zc);
5349 	return (error);
5350 }
5351 
5352 static int
5353 zfs_ioc_smb_acl(zfs_cmd_t *zc)
5354 {
5355 	vnode_t *vp;
5356 	znode_t *dzp;
5357 	vnode_t *resourcevp = NULL;
5358 	znode_t *sharedir;
5359 	zfsvfs_t *zfsvfs;
5360 	nvlist_t *nvlist;
5361 	char *src, *target;
5362 	vattr_t vattr;
5363 	vsecattr_t vsec;
5364 	int error = 0;
5365 
5366 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5367 	    NO_FOLLOW, NULL, &vp)) != 0)
5368 		return (error);
5369 
5370 	/* Now make sure mntpnt and dataset are ZFS */
5371 
5372 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
5373 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5374 	    zc->zc_name) != 0)) {
5375 		VN_RELE(vp);
5376 		return (SET_ERROR(EINVAL));
5377 	}
5378 
5379 	dzp = VTOZ(vp);
5380 	zfsvfs = dzp->z_zfsvfs;
5381 	ZFS_ENTER(zfsvfs);
5382 
5383 	/*
5384 	 * Create share dir if its missing.
5385 	 */
5386 	mutex_enter(&zfsvfs->z_lock);
5387 	if (zfsvfs->z_shares_dir == 0) {
5388 		dmu_tx_t *tx;
5389 
5390 		tx = dmu_tx_create(zfsvfs->z_os);
5391 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5392 		    ZFS_SHARES_DIR);
5393 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5394 		error = dmu_tx_assign(tx, TXG_WAIT);
5395 		if (error != 0) {
5396 			dmu_tx_abort(tx);
5397 		} else {
5398 			error = zfs_create_share_dir(zfsvfs, tx);
5399 			dmu_tx_commit(tx);
5400 		}
5401 		if (error != 0) {
5402 			mutex_exit(&zfsvfs->z_lock);
5403 			VN_RELE(vp);
5404 			ZFS_EXIT(zfsvfs);
5405 			return (error);
5406 		}
5407 	}
5408 	mutex_exit(&zfsvfs->z_lock);
5409 
5410 	ASSERT(zfsvfs->z_shares_dir);
5411 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
5412 		VN_RELE(vp);
5413 		ZFS_EXIT(zfsvfs);
5414 		return (error);
5415 	}
5416 
5417 	switch (zc->zc_cookie) {
5418 	case ZFS_SMB_ACL_ADD:
5419 		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5420 		vattr.va_type = VREG;
5421 		vattr.va_mode = S_IFREG|0777;
5422 		vattr.va_uid = 0;
5423 		vattr.va_gid = 0;
5424 
5425 		vsec.vsa_mask = VSA_ACE;
5426 		vsec.vsa_aclentp = &full_access;
5427 		vsec.vsa_aclentsz = sizeof (full_access);
5428 		vsec.vsa_aclcnt = 1;
5429 
5430 		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5431 		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5432 		if (resourcevp)
5433 			VN_RELE(resourcevp);
5434 		break;
5435 
5436 	case ZFS_SMB_ACL_REMOVE:
5437 		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5438 		    NULL, 0);
5439 		break;
5440 
5441 	case ZFS_SMB_ACL_RENAME:
5442 		if ((error = get_nvlist(zc->zc_nvlist_src,
5443 		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5444 			VN_RELE(vp);
5445 			VN_RELE(ZTOV(sharedir));
5446 			ZFS_EXIT(zfsvfs);
5447 			return (error);
5448 		}
5449 		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5450 		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5451 		    &target)) {
5452 			VN_RELE(vp);
5453 			VN_RELE(ZTOV(sharedir));
5454 			ZFS_EXIT(zfsvfs);
5455 			nvlist_free(nvlist);
5456 			return (error);
5457 		}
5458 		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5459 		    kcred, NULL, 0);
5460 		nvlist_free(nvlist);
5461 		break;
5462 
5463 	case ZFS_SMB_ACL_PURGE:
5464 		error = zfs_smb_acl_purge(sharedir);
5465 		break;
5466 
5467 	default:
5468 		error = SET_ERROR(EINVAL);
5469 		break;
5470 	}
5471 
5472 	VN_RELE(vp);
5473 	VN_RELE(ZTOV(sharedir));
5474 
5475 	ZFS_EXIT(zfsvfs);
5476 
5477 	return (error);
5478 }
5479 
5480 /*
5481  * innvl: {
5482  *     "holds" -> { snapname -> holdname (string), ... }
5483  *     (optional) "cleanup_fd" -> fd (int32)
5484  * }
5485  *
5486  * outnvl: {
5487  *     snapname -> error value (int32)
5488  *     ...
5489  * }
5490  */
5491 /* ARGSUSED */
5492 static int
5493 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5494 {
5495 	nvpair_t *pair;
5496 	nvlist_t *holds;
5497 	int cleanup_fd = -1;
5498 	int error;
5499 	minor_t minor = 0;
5500 
5501 	error = nvlist_lookup_nvlist(args, "holds", &holds);
5502 	if (error != 0)
5503 		return (SET_ERROR(EINVAL));
5504 
5505 	/* make sure the user didn't pass us any invalid (empty) tags */
5506 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
5507 	    pair = nvlist_next_nvpair(holds, pair)) {
5508 		char *htag;
5509 
5510 		error = nvpair_value_string(pair, &htag);
5511 		if (error != 0)
5512 			return (SET_ERROR(error));
5513 
5514 		if (strlen(htag) == 0)
5515 			return (SET_ERROR(EINVAL));
5516 	}
5517 
5518 	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
5519 		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
5520 		if (error != 0)
5521 			return (error);
5522 	}
5523 
5524 	error = dsl_dataset_user_hold(holds, minor, errlist);
5525 	if (minor != 0)
5526 		zfs_onexit_fd_rele(cleanup_fd);
5527 	return (error);
5528 }
5529 
5530 /*
5531  * innvl is not used.
5532  *
5533  * outnvl: {
5534  *    holdname -> time added (uint64 seconds since epoch)
5535  *    ...
5536  * }
5537  */
5538 /* ARGSUSED */
5539 static int
5540 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
5541 {
5542 	ASSERT3P(args, ==, NULL);
5543 	return (dsl_dataset_get_holds(snapname, outnvl));
5544 }
5545 
5546 /*
5547  * innvl: {
5548  *     snapname -> { holdname, ... }
5549  *     ...
5550  * }
5551  *
5552  * outnvl: {
5553  *     snapname -> error value (int32)
5554  *     ...
5555  * }
5556  */
5557 /* ARGSUSED */
5558 static int
5559 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
5560 {
5561 	return (dsl_dataset_user_release(holds, errlist));
5562 }
5563 
5564 /*
5565  * inputs:
5566  * zc_name		name of new filesystem or snapshot
5567  * zc_value		full name of old snapshot
5568  *
5569  * outputs:
5570  * zc_cookie		space in bytes
5571  * zc_objset_type	compressed space in bytes
5572  * zc_perm_action	uncompressed space in bytes
5573  */
5574 static int
5575 zfs_ioc_space_written(zfs_cmd_t *zc)
5576 {
5577 	int error;
5578 	dsl_pool_t *dp;
5579 	dsl_dataset_t *new, *old;
5580 
5581 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5582 	if (error != 0)
5583 		return (error);
5584 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
5585 	if (error != 0) {
5586 		dsl_pool_rele(dp, FTAG);
5587 		return (error);
5588 	}
5589 	error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
5590 	if (error != 0) {
5591 		dsl_dataset_rele(new, FTAG);
5592 		dsl_pool_rele(dp, FTAG);
5593 		return (error);
5594 	}
5595 
5596 	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
5597 	    &zc->zc_objset_type, &zc->zc_perm_action);
5598 	dsl_dataset_rele(old, FTAG);
5599 	dsl_dataset_rele(new, FTAG);
5600 	dsl_pool_rele(dp, FTAG);
5601 	return (error);
5602 }
5603 
5604 /*
5605  * innvl: {
5606  *     "firstsnap" -> snapshot name
5607  * }
5608  *
5609  * outnvl: {
5610  *     "used" -> space in bytes
5611  *     "compressed" -> compressed space in bytes
5612  *     "uncompressed" -> uncompressed space in bytes
5613  * }
5614  */
5615 static int
5616 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
5617 {
5618 	int error;
5619 	dsl_pool_t *dp;
5620 	dsl_dataset_t *new, *old;
5621 	char *firstsnap;
5622 	uint64_t used, comp, uncomp;
5623 
5624 	if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
5625 		return (SET_ERROR(EINVAL));
5626 
5627 	error = dsl_pool_hold(lastsnap, FTAG, &dp);
5628 	if (error != 0)
5629 		return (error);
5630 
5631 	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
5632 	if (error == 0 && !new->ds_is_snapshot) {
5633 		dsl_dataset_rele(new, FTAG);
5634 		error = SET_ERROR(EINVAL);
5635 	}
5636 	if (error != 0) {
5637 		dsl_pool_rele(dp, FTAG);
5638 		return (error);
5639 	}
5640 	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
5641 	if (error == 0 && !old->ds_is_snapshot) {
5642 		dsl_dataset_rele(old, FTAG);
5643 		error = SET_ERROR(EINVAL);
5644 	}
5645 	if (error != 0) {
5646 		dsl_dataset_rele(new, FTAG);
5647 		dsl_pool_rele(dp, FTAG);
5648 		return (error);
5649 	}
5650 
5651 	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
5652 	dsl_dataset_rele(old, FTAG);
5653 	dsl_dataset_rele(new, FTAG);
5654 	dsl_pool_rele(dp, FTAG);
5655 	fnvlist_add_uint64(outnvl, "used", used);
5656 	fnvlist_add_uint64(outnvl, "compressed", comp);
5657 	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
5658 	return (error);
5659 }
5660 
5661 /*
5662  * innvl: {
5663  *     "fd" -> file descriptor to write stream to (int32)
5664  *     (optional) "fromsnap" -> full snap name to send an incremental from
5665  *     (optional) "largeblockok" -> (value ignored)
5666  *         indicates that blocks > 128KB are permitted
5667  *     (optional) "embedok" -> (value ignored)
5668  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
5669  *     (optional) "compressok" -> (value ignored)
5670  *         presence indicates compressed DRR_WRITE records are permitted
5671  *     (optional) "resume_object" and "resume_offset" -> (uint64)
5672  *         if present, resume send stream from specified object and offset.
5673  * }
5674  *
5675  * outnvl is unused
5676  */
5677 /* ARGSUSED */
5678 static int
5679 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5680 {
5681 	int error;
5682 	offset_t off;
5683 	char *fromname = NULL;
5684 	int fd;
5685 	boolean_t largeblockok;
5686 	boolean_t embedok;
5687 	boolean_t compressok;
5688 	uint64_t resumeobj = 0;
5689 	uint64_t resumeoff = 0;
5690 
5691 	error = nvlist_lookup_int32(innvl, "fd", &fd);
5692 	if (error != 0)
5693 		return (SET_ERROR(EINVAL));
5694 
5695 	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
5696 
5697 	largeblockok = nvlist_exists(innvl, "largeblockok");
5698 	embedok = nvlist_exists(innvl, "embedok");
5699 	compressok = nvlist_exists(innvl, "compressok");
5700 
5701 	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
5702 	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
5703 
5704 	file_t *fp = getf(fd);
5705 	if (fp == NULL)
5706 		return (SET_ERROR(EBADF));
5707 
5708 	off = fp->f_offset;
5709 	error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
5710 	    fd, resumeobj, resumeoff, fp->f_vnode, &off);
5711 
5712 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5713 		fp->f_offset = off;
5714 	releasef(fd);
5715 	return (error);
5716 }
5717 
5718 /*
5719  * Determine approximately how large a zfs send stream will be -- the number
5720  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
5721  *
5722  * innvl: {
5723  *     (optional) "from" -> full snap or bookmark name to send an incremental
5724  *                          from
5725  *     (optional) "largeblockok" -> (value ignored)
5726  *         indicates that blocks > 128KB are permitted
5727  *     (optional) "embedok" -> (value ignored)
5728  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
5729  *     (optional) "compressok" -> (value ignored)
5730  *         presence indicates compressed DRR_WRITE records are permitted
5731  * }
5732  *
5733  * outnvl: {
5734  *     "space" -> bytes of space (uint64)
5735  * }
5736  */
5737 static int
5738 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5739 {
5740 	dsl_pool_t *dp;
5741 	dsl_dataset_t *tosnap;
5742 	int error;
5743 	char *fromname;
5744 	boolean_t compressok;
5745 	uint64_t space;
5746 
5747 	error = dsl_pool_hold(snapname, FTAG, &dp);
5748 	if (error != 0)
5749 		return (error);
5750 
5751 	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
5752 	if (error != 0) {
5753 		dsl_pool_rele(dp, FTAG);
5754 		return (error);
5755 	}
5756 
5757 	compressok = nvlist_exists(innvl, "compressok");
5758 
5759 	error = nvlist_lookup_string(innvl, "from", &fromname);
5760 	if (error == 0) {
5761 		if (strchr(fromname, '@') != NULL) {
5762 			/*
5763 			 * If from is a snapshot, hold it and use the more
5764 			 * efficient dmu_send_estimate to estimate send space
5765 			 * size using deadlists.
5766 			 */
5767 			dsl_dataset_t *fromsnap;
5768 			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
5769 			if (error != 0)
5770 				goto out;
5771 			error = dmu_send_estimate(tosnap, fromsnap, compressok,
5772 			    &space);
5773 			dsl_dataset_rele(fromsnap, FTAG);
5774 		} else if (strchr(fromname, '#') != NULL) {
5775 			/*
5776 			 * If from is a bookmark, fetch the creation TXG of the
5777 			 * snapshot it was created from and use that to find
5778 			 * blocks that were born after it.
5779 			 */
5780 			zfs_bookmark_phys_t frombm;
5781 
5782 			error = dsl_bookmark_lookup(dp, fromname, tosnap,
5783 			    &frombm);
5784 			if (error != 0)
5785 				goto out;
5786 			error = dmu_send_estimate_from_txg(tosnap,
5787 			    frombm.zbm_creation_txg, compressok, &space);
5788 		} else {
5789 			/*
5790 			 * from is not properly formatted as a snapshot or
5791 			 * bookmark
5792 			 */
5793 			error = SET_ERROR(EINVAL);
5794 			goto out;
5795 		}
5796 	} else {
5797 		/*
5798 		 * If estimating the size of a full send, use dmu_send_estimate.
5799 		 */
5800 		error = dmu_send_estimate(tosnap, NULL, compressok, &space);
5801 	}
5802 
5803 	fnvlist_add_uint64(outnvl, "space", space);
5804 
5805 out:
5806 	dsl_dataset_rele(tosnap, FTAG);
5807 	dsl_pool_rele(dp, FTAG);
5808 	return (error);
5809 }
5810 
5811 /*
5812  * Sync the currently open TXG to disk for the specified pool.
5813  * This is somewhat similar to 'zfs_sync()'.
5814  * For cases that do not result in error this ioctl will wait for
5815  * the currently open TXG to commit before returning back to the caller.
5816  *
5817  * innvl: {
5818  *  "force" -> when true, force uberblock update even if there is no dirty data.
5819  *             In addition this will cause the vdev configuration to be written
5820  *             out including updating the zpool cache file. (boolean_t)
5821  * }
5822  *
5823  * onvl is unused
5824  */
5825 /* ARGSUSED */
5826 static int
5827 zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
5828 {
5829 	int err;
5830 	boolean_t force;
5831 	spa_t *spa;
5832 
5833 	if ((err = spa_open(pool, &spa, FTAG)) != 0)
5834 		return (err);
5835 
5836 	force = fnvlist_lookup_boolean_value(innvl, "force");
5837 	if (force) {
5838 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
5839 		vdev_config_dirty(spa->spa_root_vdev);
5840 		spa_config_exit(spa, SCL_CONFIG, FTAG);
5841 	}
5842 	txg_wait_synced(spa_get_dsl(spa), 0);
5843 
5844 	spa_close(spa, FTAG);
5845 
5846 	return (err);
5847 }
5848 
5849 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
5850 
5851 static void
5852 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5853     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5854     boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
5855 {
5856 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5857 
5858 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5859 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5860 	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5861 	ASSERT3P(vec->zvec_func, ==, NULL);
5862 
5863 	vec->zvec_legacy_func = func;
5864 	vec->zvec_secpolicy = secpolicy;
5865 	vec->zvec_namecheck = namecheck;
5866 	vec->zvec_allow_log = log_history;
5867 	vec->zvec_pool_check = pool_check;
5868 }
5869 
5870 /*
5871  * See the block comment at the beginning of this file for details on
5872  * each argument to this function.
5873  */
5874 static void
5875 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
5876     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5877     zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
5878     boolean_t allow_log)
5879 {
5880 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5881 
5882 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5883 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5884 	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5885 	ASSERT3P(vec->zvec_func, ==, NULL);
5886 
5887 	/* if we are logging, the name must be valid */
5888 	ASSERT(!allow_log || namecheck != NO_NAME);
5889 
5890 	vec->zvec_name = name;
5891 	vec->zvec_func = func;
5892 	vec->zvec_secpolicy = secpolicy;
5893 	vec->zvec_namecheck = namecheck;
5894 	vec->zvec_pool_check = pool_check;
5895 	vec->zvec_smush_outnvlist = smush_outnvlist;
5896 	vec->zvec_allow_log = allow_log;
5897 }
5898 
5899 static void
5900 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5901     zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
5902     zfs_ioc_poolcheck_t pool_check)
5903 {
5904 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5905 	    POOL_NAME, log_history, pool_check);
5906 }
5907 
5908 static void
5909 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5910     zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
5911 {
5912 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5913 	    DATASET_NAME, B_FALSE, pool_check);
5914 }
5915 
5916 static void
5917 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5918 {
5919 	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
5920 	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5921 }
5922 
5923 static void
5924 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5925     zfs_secpolicy_func_t *secpolicy)
5926 {
5927 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5928 	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
5929 }
5930 
5931 static void
5932 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
5933     zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
5934 {
5935 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5936 	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
5937 }
5938 
5939 static void
5940 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5941 {
5942 	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
5943 	    zfs_secpolicy_read);
5944 }
5945 
5946 static void
5947 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5948     zfs_secpolicy_func_t *secpolicy)
5949 {
5950 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5951 	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5952 }
5953 
5954 static void
5955 zfs_ioctl_init(void)
5956 {
5957 	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
5958 	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
5959 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5960 
5961 	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
5962 	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
5963 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
5964 
5965 	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
5966 	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
5967 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5968 
5969 	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
5970 	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
5971 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5972 
5973 	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
5974 	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
5975 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5976 
5977 	zfs_ioctl_register("create", ZFS_IOC_CREATE,
5978 	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
5979 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5980 
5981 	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
5982 	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
5983 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5984 
5985 	zfs_ioctl_register("remap", ZFS_IOC_REMAP,
5986 	    zfs_ioc_remap, zfs_secpolicy_remap, DATASET_NAME,
5987 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
5988 
5989 	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
5990 	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
5991 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5992 
5993 	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
5994 	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
5995 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5996 	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
5997 	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
5998 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5999 
6000 	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
6001 	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
6002 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6003 
6004 	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
6005 	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
6006 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
6007 
6008 	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
6009 	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
6010 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6011 
6012 	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
6013 	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
6014 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
6015 
6016 	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
6017 	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
6018 	    POOL_NAME,
6019 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6020 
6021 	zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
6022 	    zfs_ioc_channel_program, zfs_secpolicy_config,
6023 	    POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
6024 	    B_TRUE);
6025 
6026 	zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
6027 	    zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
6028 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6029 
6030 	zfs_ioctl_register("zpool_discard_checkpoint",
6031 	    ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
6032 	    zfs_secpolicy_config, POOL_NAME,
6033 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6034 
6035 	zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
6036 	    zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
6037 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6038 
6039 	zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
6040 	    zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
6041 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
6042 
6043 	/* IOCTLS that use the legacy function signature */
6044 
6045 	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
6046 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
6047 
6048 	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
6049 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6050 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
6051 	    zfs_ioc_pool_scan);
6052 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
6053 	    zfs_ioc_pool_upgrade);
6054 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
6055 	    zfs_ioc_vdev_add);
6056 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
6057 	    zfs_ioc_vdev_remove);
6058 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
6059 	    zfs_ioc_vdev_set_state);
6060 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
6061 	    zfs_ioc_vdev_attach);
6062 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
6063 	    zfs_ioc_vdev_detach);
6064 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
6065 	    zfs_ioc_vdev_setpath);
6066 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
6067 	    zfs_ioc_vdev_setfru);
6068 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
6069 	    zfs_ioc_pool_set_props);
6070 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
6071 	    zfs_ioc_vdev_split);
6072 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
6073 	    zfs_ioc_pool_reguid);
6074 
6075 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
6076 	    zfs_ioc_pool_configs, zfs_secpolicy_none);
6077 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
6078 	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
6079 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
6080 	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
6081 	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
6082 	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
6083 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
6084 	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
6085 
6086 	/*
6087 	 * pool destroy, and export don't log the history as part of
6088 	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
6089 	 * does the logging of those commands.
6090 	 */
6091 	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
6092 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6093 	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
6094 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6095 
6096 	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
6097 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6098 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
6099 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6100 
6101 	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
6102 	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
6103 	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
6104 	    zfs_ioc_dsobj_to_dsname,
6105 	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
6106 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
6107 	    zfs_ioc_pool_get_history,
6108 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
6109 
6110 	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
6111 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6112 
6113 	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
6114 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
6115 	zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
6116 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
6117 
6118 	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
6119 	    zfs_ioc_space_written);
6120 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
6121 	    zfs_ioc_objset_recvd_props);
6122 	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
6123 	    zfs_ioc_next_obj);
6124 	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
6125 	    zfs_ioc_get_fsacl);
6126 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
6127 	    zfs_ioc_objset_stats);
6128 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
6129 	    zfs_ioc_objset_zplprops);
6130 	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
6131 	    zfs_ioc_dataset_list_next);
6132 	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
6133 	    zfs_ioc_snapshot_list_next);
6134 	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
6135 	    zfs_ioc_send_progress);
6136 
6137 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
6138 	    zfs_ioc_diff, zfs_secpolicy_diff);
6139 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
6140 	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
6141 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
6142 	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
6143 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
6144 	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
6145 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
6146 	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
6147 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
6148 	    zfs_ioc_send, zfs_secpolicy_send);
6149 
6150 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
6151 	    zfs_secpolicy_none);
6152 	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
6153 	    zfs_secpolicy_destroy);
6154 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
6155 	    zfs_secpolicy_rename);
6156 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
6157 	    zfs_secpolicy_recv);
6158 	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
6159 	    zfs_secpolicy_promote);
6160 	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
6161 	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
6162 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
6163 	    zfs_secpolicy_set_fsacl);
6164 
6165 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
6166 	    zfs_secpolicy_share, POOL_CHECK_NONE);
6167 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
6168 	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
6169 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
6170 	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
6171 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6172 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
6173 	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
6174 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6175 }
6176 
6177 int
6178 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
6179     zfs_ioc_poolcheck_t check)
6180 {
6181 	spa_t *spa;
6182 	int error;
6183 
6184 	ASSERT(type == POOL_NAME || type == DATASET_NAME);
6185 
6186 	if (check & POOL_CHECK_NONE)
6187 		return (0);
6188 
6189 	error = spa_open(name, &spa, FTAG);
6190 	if (error == 0) {
6191 		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
6192 			error = SET_ERROR(EAGAIN);
6193 		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
6194 			error = SET_ERROR(EROFS);
6195 		spa_close(spa, FTAG);
6196 	}
6197 	return (error);
6198 }
6199 
6200 /*
6201  * Find a free minor number.
6202  */
6203 minor_t
6204 zfsdev_minor_alloc(void)
6205 {
6206 	static minor_t last_minor;
6207 	minor_t m;
6208 
6209 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6210 
6211 	for (m = last_minor + 1; m != last_minor; m++) {
6212 		if (m > ZFSDEV_MAX_MINOR)
6213 			m = 1;
6214 		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
6215 			last_minor = m;
6216 			return (m);
6217 		}
6218 	}
6219 
6220 	return (0);
6221 }
6222 
6223 static int
6224 zfs_ctldev_init(dev_t *devp)
6225 {
6226 	minor_t minor;
6227 	zfs_soft_state_t *zs;
6228 
6229 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6230 	ASSERT(getminor(*devp) == 0);
6231 
6232 	minor = zfsdev_minor_alloc();
6233 	if (minor == 0)
6234 		return (SET_ERROR(ENXIO));
6235 
6236 	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
6237 		return (SET_ERROR(EAGAIN));
6238 
6239 	*devp = makedevice(getemajor(*devp), minor);
6240 
6241 	zs = ddi_get_soft_state(zfsdev_state, minor);
6242 	zs->zss_type = ZSST_CTLDEV;
6243 	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
6244 
6245 	return (0);
6246 }
6247 
6248 static void
6249 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
6250 {
6251 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6252 
6253 	zfs_onexit_destroy(zo);
6254 	ddi_soft_state_free(zfsdev_state, minor);
6255 }
6256 
6257 void *
6258 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
6259 {
6260 	zfs_soft_state_t *zp;
6261 
6262 	zp = ddi_get_soft_state(zfsdev_state, minor);
6263 	if (zp == NULL || zp->zss_type != which)
6264 		return (NULL);
6265 
6266 	return (zp->zss_data);
6267 }
6268 
6269 static int
6270 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
6271 {
6272 	int error = 0;
6273 
6274 	if (getminor(*devp) != 0)
6275 		return (zvol_open(devp, flag, otyp, cr));
6276 
6277 	/* This is the control device. Allocate a new minor if requested. */
6278 	if (flag & FEXCL) {
6279 		mutex_enter(&zfsdev_state_lock);
6280 		error = zfs_ctldev_init(devp);
6281 		mutex_exit(&zfsdev_state_lock);
6282 	}
6283 
6284 	return (error);
6285 }
6286 
6287 static int
6288 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
6289 {
6290 	zfs_onexit_t *zo;
6291 	minor_t minor = getminor(dev);
6292 
6293 	if (minor == 0)
6294 		return (0);
6295 
6296 	mutex_enter(&zfsdev_state_lock);
6297 	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
6298 	if (zo == NULL) {
6299 		mutex_exit(&zfsdev_state_lock);
6300 		return (zvol_close(dev, flag, otyp, cr));
6301 	}
6302 	zfs_ctldev_destroy(zo, minor);
6303 	mutex_exit(&zfsdev_state_lock);
6304 
6305 	return (0);
6306 }
6307 
6308 static int
6309 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
6310 {
6311 	zfs_cmd_t *zc;
6312 	uint_t vecnum;
6313 	int error, rc, len;
6314 	minor_t minor = getminor(dev);
6315 	const zfs_ioc_vec_t *vec;
6316 	char *saved_poolname = NULL;
6317 	nvlist_t *innvl = NULL;
6318 
6319 	if (minor != 0 &&
6320 	    zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
6321 		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
6322 
6323 	vecnum = cmd - ZFS_IOC_FIRST;
6324 	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
6325 
6326 	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
6327 		return (SET_ERROR(EINVAL));
6328 	vec = &zfs_ioc_vec[vecnum];
6329 
6330 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
6331 
6332 	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
6333 	if (error != 0) {
6334 		error = SET_ERROR(EFAULT);
6335 		goto out;
6336 	}
6337 
6338 	zc->zc_iflags = flag & FKIOCTL;
6339 	if (zc->zc_nvlist_src_size != 0) {
6340 		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6341 		    zc->zc_iflags, &innvl);
6342 		if (error != 0)
6343 			goto out;
6344 	}
6345 
6346 	/*
6347 	 * Ensure that all pool/dataset names are valid before we pass down to
6348 	 * the lower layers.
6349 	 */
6350 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6351 	switch (vec->zvec_namecheck) {
6352 	case POOL_NAME:
6353 		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
6354 			error = SET_ERROR(EINVAL);
6355 		else
6356 			error = pool_status_check(zc->zc_name,
6357 			    vec->zvec_namecheck, vec->zvec_pool_check);
6358 		break;
6359 
6360 	case DATASET_NAME:
6361 		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
6362 			error = SET_ERROR(EINVAL);
6363 		else
6364 			error = pool_status_check(zc->zc_name,
6365 			    vec->zvec_namecheck, vec->zvec_pool_check);
6366 		break;
6367 
6368 	case NO_NAME:
6369 		break;
6370 	}
6371 
6372 
6373 	if (error == 0)
6374 		error = vec->zvec_secpolicy(zc, innvl, cr);
6375 
6376 	if (error != 0)
6377 		goto out;
6378 
6379 	/* legacy ioctls can modify zc_name */
6380 	len = strcspn(zc->zc_name, "/@#") + 1;
6381 	saved_poolname = kmem_alloc(len, KM_SLEEP);
6382 	(void) strlcpy(saved_poolname, zc->zc_name, len);
6383 
6384 	if (vec->zvec_func != NULL) {
6385 		nvlist_t *outnvl;
6386 		int puterror = 0;
6387 		spa_t *spa;
6388 		nvlist_t *lognv = NULL;
6389 
6390 		ASSERT(vec->zvec_legacy_func == NULL);
6391 
6392 		/*
6393 		 * Add the innvl to the lognv before calling the func,
6394 		 * in case the func changes the innvl.
6395 		 */
6396 		if (vec->zvec_allow_log) {
6397 			lognv = fnvlist_alloc();
6398 			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
6399 			    vec->zvec_name);
6400 			if (!nvlist_empty(innvl)) {
6401 				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
6402 				    innvl);
6403 			}
6404 		}
6405 
6406 		outnvl = fnvlist_alloc();
6407 		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
6408 
6409 		/*
6410 		 * Some commands can partially execute, modify state, and still
6411 		 * return an error.  In these cases, attempt to record what
6412 		 * was modified.
6413 		 */
6414 		if ((error == 0 ||
6415 		    (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
6416 		    vec->zvec_allow_log &&
6417 		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
6418 			if (!nvlist_empty(outnvl)) {
6419 				fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
6420 				    outnvl);
6421 			}
6422 			if (error != 0) {
6423 				fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
6424 				    error);
6425 			}
6426 			(void) spa_history_log_nvl(spa, lognv);
6427 			spa_close(spa, FTAG);
6428 		}
6429 		fnvlist_free(lognv);
6430 
6431 		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
6432 			int smusherror = 0;
6433 			if (vec->zvec_smush_outnvlist) {
6434 				smusherror = nvlist_smush(outnvl,
6435 				    zc->zc_nvlist_dst_size);
6436 			}
6437 			if (smusherror == 0)
6438 				puterror = put_nvlist(zc, outnvl);
6439 		}
6440 
6441 		if (puterror != 0)
6442 			error = puterror;
6443 
6444 		nvlist_free(outnvl);
6445 	} else {
6446 		error = vec->zvec_legacy_func(zc);
6447 	}
6448 
6449 out:
6450 	nvlist_free(innvl);
6451 	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
6452 	if (error == 0 && rc != 0)
6453 		error = SET_ERROR(EFAULT);
6454 	if (error == 0 && vec->zvec_allow_log) {
6455 		char *s = tsd_get(zfs_allow_log_key);
6456 		if (s != NULL)
6457 			strfree(s);
6458 		(void) tsd_set(zfs_allow_log_key, saved_poolname);
6459 	} else {
6460 		if (saved_poolname != NULL)
6461 			strfree(saved_poolname);
6462 	}
6463 
6464 	kmem_free(zc, sizeof (zfs_cmd_t));
6465 	return (error);
6466 }
6467 
6468 static int
6469 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
6470 {
6471 	if (cmd != DDI_ATTACH)
6472 		return (DDI_FAILURE);
6473 
6474 	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
6475 	    DDI_PSEUDO, 0) == DDI_FAILURE)
6476 		return (DDI_FAILURE);
6477 
6478 	zfs_dip = dip;
6479 
6480 	ddi_report_dev(dip);
6481 
6482 	return (DDI_SUCCESS);
6483 }
6484 
6485 static int
6486 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6487 {
6488 	if (spa_busy() || zfs_busy() || zvol_busy())
6489 		return (DDI_FAILURE);
6490 
6491 	if (cmd != DDI_DETACH)
6492 		return (DDI_FAILURE);
6493 
6494 	zfs_dip = NULL;
6495 
6496 	ddi_prop_remove_all(dip);
6497 	ddi_remove_minor_node(dip, NULL);
6498 
6499 	return (DDI_SUCCESS);
6500 }
6501 
6502 /*ARGSUSED*/
6503 static int
6504 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
6505 {
6506 	switch (infocmd) {
6507 	case DDI_INFO_DEVT2DEVINFO:
6508 		*result = zfs_dip;
6509 		return (DDI_SUCCESS);
6510 
6511 	case DDI_INFO_DEVT2INSTANCE:
6512 		*result = (void *)0;
6513 		return (DDI_SUCCESS);
6514 	}
6515 
6516 	return (DDI_FAILURE);
6517 }
6518 
6519 /*
6520  * OK, so this is a little weird.
6521  *
6522  * /dev/zfs is the control node, i.e. minor 0.
6523  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
6524  *
6525  * /dev/zfs has basically nothing to do except serve up ioctls,
6526  * so most of the standard driver entry points are in zvol.c.
6527  */
6528 static struct cb_ops zfs_cb_ops = {
6529 	zfsdev_open,	/* open */
6530 	zfsdev_close,	/* close */
6531 	zvol_strategy,	/* strategy */
6532 	nodev,		/* print */
6533 	zvol_dump,	/* dump */
6534 	zvol_read,	/* read */
6535 	zvol_write,	/* write */
6536 	zfsdev_ioctl,	/* ioctl */
6537 	nodev,		/* devmap */
6538 	nodev,		/* mmap */
6539 	nodev,		/* segmap */
6540 	nochpoll,	/* poll */
6541 	ddi_prop_op,	/* prop_op */
6542 	NULL,		/* streamtab */
6543 	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
6544 	CB_REV,		/* version */
6545 	nodev,		/* async read */
6546 	nodev,		/* async write */
6547 };
6548 
6549 static struct dev_ops zfs_dev_ops = {
6550 	DEVO_REV,	/* version */
6551 	0,		/* refcnt */
6552 	zfs_info,	/* info */
6553 	nulldev,	/* identify */
6554 	nulldev,	/* probe */
6555 	zfs_attach,	/* attach */
6556 	zfs_detach,	/* detach */
6557 	nodev,		/* reset */
6558 	&zfs_cb_ops,	/* driver operations */
6559 	NULL,		/* no bus operations */
6560 	NULL,		/* power */
6561 	ddi_quiesce_not_needed,	/* quiesce */
6562 };
6563 
6564 static struct modldrv zfs_modldrv = {
6565 	&mod_driverops,
6566 	"ZFS storage pool",
6567 	&zfs_dev_ops
6568 };
6569 
6570 static struct modlinkage modlinkage = {
6571 	MODREV_1,
6572 	(void *)&zfs_modlfs,
6573 	(void *)&zfs_modldrv,
6574 	NULL
6575 };
6576 
6577 static void
6578 zfs_allow_log_destroy(void *arg)
6579 {
6580 	char *poolname = arg;
6581 	strfree(poolname);
6582 }
6583 
6584 int
6585 _init(void)
6586 {
6587 	int error;
6588 
6589 	spa_init(FREAD | FWRITE);
6590 	zfs_init();
6591 	zvol_init();
6592 	zfs_ioctl_init();
6593 
6594 	if ((error = mod_install(&modlinkage)) != 0) {
6595 		zvol_fini();
6596 		zfs_fini();
6597 		spa_fini();
6598 		return (error);
6599 	}
6600 
6601 	tsd_create(&zfs_fsyncer_key, NULL);
6602 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
6603 	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
6604 
6605 	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
6606 	ASSERT(error == 0);
6607 	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
6608 
6609 	return (0);
6610 }
6611 
6612 int
6613 _fini(void)
6614 {
6615 	int error;
6616 
6617 	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
6618 		return (SET_ERROR(EBUSY));
6619 
6620 	if ((error = mod_remove(&modlinkage)) != 0)
6621 		return (error);
6622 
6623 	zvol_fini();
6624 	zfs_fini();
6625 	spa_fini();
6626 	if (zfs_nfsshare_inited)
6627 		(void) ddi_modclose(nfs_mod);
6628 	if (zfs_smbshare_inited)
6629 		(void) ddi_modclose(smbsrv_mod);
6630 	if (zfs_nfsshare_inited || zfs_smbshare_inited)
6631 		(void) ddi_modclose(sharefs_mod);
6632 
6633 	tsd_destroy(&zfs_fsyncer_key);
6634 	ldi_ident_release(zfs_li);
6635 	zfs_li = NULL;
6636 	mutex_destroy(&zfs_share_lock);
6637 
6638 	return (error);
6639 }
6640 
6641 int
6642 _info(struct modinfo *modinfop)
6643 {
6644 	return (mod_info(&modlinkage, modinfop));
6645 }
6646