xref: /illumos-gate/usr/src/uts/common/fs/zfs/zfs_ioctl.c (revision 619a0f6c7269dc1950adc2e401a36d843dd9fa02)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
25  * Portions Copyright 2011 Martin Matuska
26  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
27  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
28  * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
29  * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
30  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
31  * Copyright (c) 2013 Steven Hartland. All rights reserved.
32  * Copyright (c) 2014 Integros [integros.com]
33  * Copyright 2016 Toomas Soome <tsoome@me.com>
34  * Copyright 2017 RackTop Systems.
35  * Copyright (c) 2017 Datto Inc.
36  */
37 
38 /*
39  * ZFS ioctls.
40  *
41  * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
42  * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
43  *
44  * There are two ways that we handle ioctls: the legacy way where almost
45  * all of the logic is in the ioctl callback, and the new way where most
46  * of the marshalling is handled in the common entry point, zfsdev_ioctl().
47  *
48  * Non-legacy ioctls should be registered by calling
49  * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
50  * from userland by lzc_ioctl().
51  *
52  * The registration arguments are as follows:
53  *
54  * const char *name
55  *   The name of the ioctl.  This is used for history logging.  If the
56  *   ioctl returns successfully (the callback returns 0), and allow_log
57  *   is true, then a history log entry will be recorded with the input &
58  *   output nvlists.  The log entry can be printed with "zpool history -i".
59  *
60  * zfs_ioc_t ioc
61  *   The ioctl request number, which userland will pass to ioctl(2).
62  *   The ioctl numbers can change from release to release, because
63  *   the caller (libzfs) must be matched to the kernel.
64  *
65  * zfs_secpolicy_func_t *secpolicy
66  *   This function will be called before the zfs_ioc_func_t, to
67  *   determine if this operation is permitted.  It should return EPERM
68  *   on failure, and 0 on success.  Checks include determining if the
69  *   dataset is visible in this zone, and if the user has either all
70  *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
71  *   to do this operation on this dataset with "zfs allow".
72  *
73  * zfs_ioc_namecheck_t namecheck
74  *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
75  *   name, a dataset name, or nothing.  If the name is not well-formed,
76  *   the ioctl will fail and the callback will not be called.
77  *   Therefore, the callback can assume that the name is well-formed
78  *   (e.g. is null-terminated, doesn't have more than one '@' character,
79  *   doesn't have invalid characters).
80  *
81  * zfs_ioc_poolcheck_t pool_check
82  *   This specifies requirements on the pool state.  If the pool does
83  *   not meet them (is suspended or is readonly), the ioctl will fail
84  *   and the callback will not be called.  If any checks are specified
85  *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
86  *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
87  *   POOL_CHECK_READONLY).
88  *
89  * boolean_t smush_outnvlist
90  *   If smush_outnvlist is true, then the output is presumed to be a
91  *   list of errors, and it will be "smushed" down to fit into the
92  *   caller's buffer, by removing some entries and replacing them with a
93  *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
94  *   nvlist_smush() for details.  If smush_outnvlist is false, and the
95  *   outnvlist does not fit into the userland-provided buffer, then the
96  *   ioctl will fail with ENOMEM.
97  *
98  * zfs_ioc_func_t *func
99  *   The callback function that will perform the operation.
100  *
101  *   The callback should return 0 on success, or an error number on
102  *   failure.  If the function fails, the userland ioctl will return -1,
103  *   and errno will be set to the callback's return value.  The callback
104  *   will be called with the following arguments:
105  *
106  *   const char *name
107  *     The name of the pool or dataset to operate on, from
108  *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
109  *     expected type (pool, dataset, or none).
110  *
111  *   nvlist_t *innvl
112  *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
113  *     NULL if no input nvlist was provided.  Changes to this nvlist are
114  *     ignored.  If the input nvlist could not be deserialized, the
115  *     ioctl will fail and the callback will not be called.
116  *
117  *   nvlist_t *outnvl
118  *     The output nvlist, initially empty.  The callback can fill it in,
119  *     and it will be returned to userland by serializing it into
120  *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
121  *     fails (e.g. because the caller didn't supply a large enough
122  *     buffer), then the overall ioctl will fail.  See the
123  *     'smush_nvlist' argument above for additional behaviors.
124  *
125  *     There are two typical uses of the output nvlist:
126  *       - To return state, e.g. property values.  In this case,
127  *         smush_outnvlist should be false.  If the buffer was not large
128  *         enough, the caller will reallocate a larger buffer and try
129  *         the ioctl again.
130  *
131  *       - To return multiple errors from an ioctl which makes on-disk
132  *         changes.  In this case, smush_outnvlist should be true.
133  *         Ioctls which make on-disk modifications should generally not
134  *         use the outnvl if they succeed, because the caller can not
135  *         distinguish between the operation failing, and
136  *         deserialization failing.
137  */
138 
139 #include <sys/types.h>
140 #include <sys/param.h>
141 #include <sys/errno.h>
142 #include <sys/uio.h>
143 #include <sys/buf.h>
144 #include <sys/modctl.h>
145 #include <sys/open.h>
146 #include <sys/file.h>
147 #include <sys/kmem.h>
148 #include <sys/conf.h>
149 #include <sys/cmn_err.h>
150 #include <sys/stat.h>
151 #include <sys/zfs_ioctl.h>
152 #include <sys/zfs_vfsops.h>
153 #include <sys/zfs_znode.h>
154 #include <sys/zap.h>
155 #include <sys/spa.h>
156 #include <sys/spa_impl.h>
157 #include <sys/vdev.h>
158 #include <sys/priv_impl.h>
159 #include <sys/dmu.h>
160 #include <sys/dsl_dir.h>
161 #include <sys/dsl_dataset.h>
162 #include <sys/dsl_prop.h>
163 #include <sys/dsl_deleg.h>
164 #include <sys/dmu_objset.h>
165 #include <sys/dmu_impl.h>
166 #include <sys/dmu_tx.h>
167 #include <sys/ddi.h>
168 #include <sys/sunddi.h>
169 #include <sys/sunldi.h>
170 #include <sys/policy.h>
171 #include <sys/zone.h>
172 #include <sys/nvpair.h>
173 #include <sys/pathname.h>
174 #include <sys/mount.h>
175 #include <sys/sdt.h>
176 #include <sys/fs/zfs.h>
177 #include <sys/zfs_ctldir.h>
178 #include <sys/zfs_dir.h>
179 #include <sys/zfs_onexit.h>
180 #include <sys/zvol.h>
181 #include <sys/dsl_scan.h>
182 #include <sharefs/share.h>
183 #include <sys/dmu_objset.h>
184 #include <sys/dmu_recv.h>
185 #include <sys/dmu_send.h>
186 #include <sys/dsl_destroy.h>
187 #include <sys/dsl_bookmark.h>
188 #include <sys/dsl_userhold.h>
189 #include <sys/zfeature.h>
190 #include <sys/zcp.h>
191 #include <sys/zio_checksum.h>
192 #include <sys/vdev_removal.h>
193 #include <sys/vdev_impl.h>
194 #include <sys/vdev_initialize.h>
195 
196 #include "zfs_namecheck.h"
197 #include "zfs_prop.h"
198 #include "zfs_deleg.h"
199 #include "zfs_comutil.h"
200 
201 #include "lua.h"
202 #include "lauxlib.h"
203 
204 extern struct modlfs zfs_modlfs;
205 
206 extern void zfs_init(void);
207 extern void zfs_fini(void);
208 
209 ldi_ident_t zfs_li = NULL;
210 dev_info_t *zfs_dip;
211 
212 uint_t zfs_fsyncer_key;
213 extern uint_t rrw_tsd_key;
214 static uint_t zfs_allow_log_key;
215 
216 typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
217 typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
218 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
219 
220 typedef enum {
221 	NO_NAME,
222 	POOL_NAME,
223 	DATASET_NAME
224 } zfs_ioc_namecheck_t;
225 
226 typedef enum {
227 	POOL_CHECK_NONE		= 1 << 0,
228 	POOL_CHECK_SUSPENDED	= 1 << 1,
229 	POOL_CHECK_READONLY	= 1 << 2,
230 } zfs_ioc_poolcheck_t;
231 
232 typedef struct zfs_ioc_vec {
233 	zfs_ioc_legacy_func_t	*zvec_legacy_func;
234 	zfs_ioc_func_t		*zvec_func;
235 	zfs_secpolicy_func_t	*zvec_secpolicy;
236 	zfs_ioc_namecheck_t	zvec_namecheck;
237 	boolean_t		zvec_allow_log;
238 	zfs_ioc_poolcheck_t	zvec_pool_check;
239 	boolean_t		zvec_smush_outnvlist;
240 	const char		*zvec_name;
241 } zfs_ioc_vec_t;
242 
243 /* This array is indexed by zfs_userquota_prop_t */
244 static const char *userquota_perms[] = {
245 	ZFS_DELEG_PERM_USERUSED,
246 	ZFS_DELEG_PERM_USERQUOTA,
247 	ZFS_DELEG_PERM_GROUPUSED,
248 	ZFS_DELEG_PERM_GROUPQUOTA,
249 };
250 
251 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
252 static int zfs_check_settable(const char *name, nvpair_t *property,
253     cred_t *cr);
254 static int zfs_check_clearable(char *dataset, nvlist_t *props,
255     nvlist_t **errors);
256 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
257     boolean_t *);
258 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
259 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
260 
261 static int zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature);
262 
263 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
264 void
265 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
266 {
267 	const char *newfile;
268 	char buf[512];
269 	va_list adx;
270 
271 	/*
272 	 * Get rid of annoying "../common/" prefix to filename.
273 	 */
274 	newfile = strrchr(file, '/');
275 	if (newfile != NULL) {
276 		newfile = newfile + 1; /* Get rid of leading / */
277 	} else {
278 		newfile = file;
279 	}
280 
281 	va_start(adx, fmt);
282 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
283 	va_end(adx);
284 
285 	/*
286 	 * To get this data, use the zfs-dprintf probe as so:
287 	 * dtrace -q -n 'zfs-dprintf \
288 	 *	/stringof(arg0) == "dbuf.c"/ \
289 	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
290 	 * arg0 = file name
291 	 * arg1 = function name
292 	 * arg2 = line number
293 	 * arg3 = message
294 	 */
295 	DTRACE_PROBE4(zfs__dprintf,
296 	    char *, newfile, char *, func, int, line, char *, buf);
297 }
298 
299 static void
300 history_str_free(char *buf)
301 {
302 	kmem_free(buf, HIS_MAX_RECORD_LEN);
303 }
304 
305 static char *
306 history_str_get(zfs_cmd_t *zc)
307 {
308 	char *buf;
309 
310 	if (zc->zc_history == NULL)
311 		return (NULL);
312 
313 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
314 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
315 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
316 		history_str_free(buf);
317 		return (NULL);
318 	}
319 
320 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
321 
322 	return (buf);
323 }
324 
325 /*
326  * Check to see if the named dataset is currently defined as bootable
327  */
328 static boolean_t
329 zfs_is_bootfs(const char *name)
330 {
331 	objset_t *os;
332 
333 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
334 		boolean_t ret;
335 		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
336 		dmu_objset_rele(os, FTAG);
337 		return (ret);
338 	}
339 	return (B_FALSE);
340 }
341 
342 /*
343  * Return non-zero if the spa version is less than requested version.
344  */
345 static int
346 zfs_earlier_version(const char *name, int version)
347 {
348 	spa_t *spa;
349 
350 	if (spa_open(name, &spa, FTAG) == 0) {
351 		if (spa_version(spa) < version) {
352 			spa_close(spa, FTAG);
353 			return (1);
354 		}
355 		spa_close(spa, FTAG);
356 	}
357 	return (0);
358 }
359 
360 /*
361  * Return TRUE if the ZPL version is less than requested version.
362  */
363 static boolean_t
364 zpl_earlier_version(const char *name, int version)
365 {
366 	objset_t *os;
367 	boolean_t rc = B_TRUE;
368 
369 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
370 		uint64_t zplversion;
371 
372 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
373 			dmu_objset_rele(os, FTAG);
374 			return (B_TRUE);
375 		}
376 		/* XXX reading from non-owned objset */
377 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
378 			rc = zplversion < version;
379 		dmu_objset_rele(os, FTAG);
380 	}
381 	return (rc);
382 }
383 
384 static void
385 zfs_log_history(zfs_cmd_t *zc)
386 {
387 	spa_t *spa;
388 	char *buf;
389 
390 	if ((buf = history_str_get(zc)) == NULL)
391 		return;
392 
393 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
394 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
395 			(void) spa_history_log(spa, buf);
396 		spa_close(spa, FTAG);
397 	}
398 	history_str_free(buf);
399 }
400 
401 /*
402  * Policy for top-level read operations (list pools).  Requires no privileges,
403  * and can be used in the local zone, as there is no associated dataset.
404  */
405 /* ARGSUSED */
406 static int
407 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
408 {
409 	return (0);
410 }
411 
412 /*
413  * Policy for dataset read operations (list children, get statistics).  Requires
414  * no privileges, but must be visible in the local zone.
415  */
416 /* ARGSUSED */
417 static int
418 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
419 {
420 	if (INGLOBALZONE(curproc) ||
421 	    zone_dataset_visible(zc->zc_name, NULL))
422 		return (0);
423 
424 	return (SET_ERROR(ENOENT));
425 }
426 
427 static int
428 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
429 {
430 	int writable = 1;
431 
432 	/*
433 	 * The dataset must be visible by this zone -- check this first
434 	 * so they don't see EPERM on something they shouldn't know about.
435 	 */
436 	if (!INGLOBALZONE(curproc) &&
437 	    !zone_dataset_visible(dataset, &writable))
438 		return (SET_ERROR(ENOENT));
439 
440 	if (INGLOBALZONE(curproc)) {
441 		/*
442 		 * If the fs is zoned, only root can access it from the
443 		 * global zone.
444 		 */
445 		if (secpolicy_zfs(cr) && zoned)
446 			return (SET_ERROR(EPERM));
447 	} else {
448 		/*
449 		 * If we are in a local zone, the 'zoned' property must be set.
450 		 */
451 		if (!zoned)
452 			return (SET_ERROR(EPERM));
453 
454 		/* must be writable by this zone */
455 		if (!writable)
456 			return (SET_ERROR(EPERM));
457 	}
458 	return (0);
459 }
460 
461 static int
462 zfs_dozonecheck(const char *dataset, cred_t *cr)
463 {
464 	uint64_t zoned;
465 
466 	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
467 		return (SET_ERROR(ENOENT));
468 
469 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
470 }
471 
472 static int
473 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
474 {
475 	uint64_t zoned;
476 
477 	if (dsl_prop_get_int_ds(ds, "zoned", &zoned))
478 		return (SET_ERROR(ENOENT));
479 
480 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
481 }
482 
483 static int
484 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
485     const char *perm, cred_t *cr)
486 {
487 	int error;
488 
489 	error = zfs_dozonecheck_ds(name, ds, cr);
490 	if (error == 0) {
491 		error = secpolicy_zfs(cr);
492 		if (error != 0)
493 			error = dsl_deleg_access_impl(ds, perm, cr);
494 	}
495 	return (error);
496 }
497 
498 static int
499 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
500 {
501 	int error;
502 	dsl_dataset_t *ds;
503 	dsl_pool_t *dp;
504 
505 	/*
506 	 * First do a quick check for root in the global zone, which
507 	 * is allowed to do all write_perms.  This ensures that zfs_ioc_*
508 	 * will get to handle nonexistent datasets.
509 	 */
510 	if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
511 		return (0);
512 
513 	error = dsl_pool_hold(name, FTAG, &dp);
514 	if (error != 0)
515 		return (error);
516 
517 	error = dsl_dataset_hold(dp, name, FTAG, &ds);
518 	if (error != 0) {
519 		dsl_pool_rele(dp, FTAG);
520 		return (error);
521 	}
522 
523 	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
524 
525 	dsl_dataset_rele(ds, FTAG);
526 	dsl_pool_rele(dp, FTAG);
527 	return (error);
528 }
529 
530 /*
531  * Policy for setting the security label property.
532  *
533  * Returns 0 for success, non-zero for access and other errors.
534  */
535 static int
536 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
537 {
538 	char		ds_hexsl[MAXNAMELEN];
539 	bslabel_t	ds_sl, new_sl;
540 	boolean_t	new_default = FALSE;
541 	uint64_t	zoned;
542 	int		needed_priv = -1;
543 	int		error;
544 
545 	/* First get the existing dataset label. */
546 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
547 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
548 	if (error != 0)
549 		return (SET_ERROR(EPERM));
550 
551 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
552 		new_default = TRUE;
553 
554 	/* The label must be translatable */
555 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
556 		return (SET_ERROR(EINVAL));
557 
558 	/*
559 	 * In a non-global zone, disallow attempts to set a label that
560 	 * doesn't match that of the zone; otherwise no other checks
561 	 * are needed.
562 	 */
563 	if (!INGLOBALZONE(curproc)) {
564 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
565 			return (SET_ERROR(EPERM));
566 		return (0);
567 	}
568 
569 	/*
570 	 * For global-zone datasets (i.e., those whose zoned property is
571 	 * "off", verify that the specified new label is valid for the
572 	 * global zone.
573 	 */
574 	if (dsl_prop_get_integer(name,
575 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
576 		return (SET_ERROR(EPERM));
577 	if (!zoned) {
578 		if (zfs_check_global_label(name, strval) != 0)
579 			return (SET_ERROR(EPERM));
580 	}
581 
582 	/*
583 	 * If the existing dataset label is nondefault, check if the
584 	 * dataset is mounted (label cannot be changed while mounted).
585 	 * Get the zfsvfs; if there isn't one, then the dataset isn't
586 	 * mounted (or isn't a dataset, doesn't exist, ...).
587 	 */
588 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
589 		objset_t *os;
590 		static char *setsl_tag = "setsl_tag";
591 
592 		/*
593 		 * Try to own the dataset; abort if there is any error,
594 		 * (e.g., already mounted, in use, or other error).
595 		 */
596 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
597 		    setsl_tag, &os);
598 		if (error != 0)
599 			return (SET_ERROR(EPERM));
600 
601 		dmu_objset_disown(os, setsl_tag);
602 
603 		if (new_default) {
604 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
605 			goto out_check;
606 		}
607 
608 		if (hexstr_to_label(strval, &new_sl) != 0)
609 			return (SET_ERROR(EPERM));
610 
611 		if (blstrictdom(&ds_sl, &new_sl))
612 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
613 		else if (blstrictdom(&new_sl, &ds_sl))
614 			needed_priv = PRIV_FILE_UPGRADE_SL;
615 	} else {
616 		/* dataset currently has a default label */
617 		if (!new_default)
618 			needed_priv = PRIV_FILE_UPGRADE_SL;
619 	}
620 
621 out_check:
622 	if (needed_priv != -1)
623 		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
624 	return (0);
625 }
626 
627 static int
628 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
629     cred_t *cr)
630 {
631 	char *strval;
632 
633 	/*
634 	 * Check permissions for special properties.
635 	 */
636 	switch (prop) {
637 	case ZFS_PROP_ZONED:
638 		/*
639 		 * Disallow setting of 'zoned' from within a local zone.
640 		 */
641 		if (!INGLOBALZONE(curproc))
642 			return (SET_ERROR(EPERM));
643 		break;
644 
645 	case ZFS_PROP_QUOTA:
646 	case ZFS_PROP_FILESYSTEM_LIMIT:
647 	case ZFS_PROP_SNAPSHOT_LIMIT:
648 		if (!INGLOBALZONE(curproc)) {
649 			uint64_t zoned;
650 			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
651 			/*
652 			 * Unprivileged users are allowed to modify the
653 			 * limit on things *under* (ie. contained by)
654 			 * the thing they own.
655 			 */
656 			if (dsl_prop_get_integer(dsname, "zoned", &zoned,
657 			    setpoint))
658 				return (SET_ERROR(EPERM));
659 			if (!zoned || strlen(dsname) <= strlen(setpoint))
660 				return (SET_ERROR(EPERM));
661 		}
662 		break;
663 
664 	case ZFS_PROP_MLSLABEL:
665 		if (!is_system_labeled())
666 			return (SET_ERROR(EPERM));
667 
668 		if (nvpair_value_string(propval, &strval) == 0) {
669 			int err;
670 
671 			err = zfs_set_slabel_policy(dsname, strval, CRED());
672 			if (err != 0)
673 				return (err);
674 		}
675 		break;
676 	}
677 
678 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
679 }
680 
681 /* ARGSUSED */
682 static int
683 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
684 {
685 	int error;
686 
687 	error = zfs_dozonecheck(zc->zc_name, cr);
688 	if (error != 0)
689 		return (error);
690 
691 	/*
692 	 * permission to set permissions will be evaluated later in
693 	 * dsl_deleg_can_allow()
694 	 */
695 	return (0);
696 }
697 
698 /* ARGSUSED */
699 static int
700 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
701 {
702 	return (zfs_secpolicy_write_perms(zc->zc_name,
703 	    ZFS_DELEG_PERM_ROLLBACK, cr));
704 }
705 
706 /* ARGSUSED */
707 static int
708 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
709 {
710 	dsl_pool_t *dp;
711 	dsl_dataset_t *ds;
712 	char *cp;
713 	int error;
714 
715 	/*
716 	 * Generate the current snapshot name from the given objsetid, then
717 	 * use that name for the secpolicy/zone checks.
718 	 */
719 	cp = strchr(zc->zc_name, '@');
720 	if (cp == NULL)
721 		return (SET_ERROR(EINVAL));
722 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
723 	if (error != 0)
724 		return (error);
725 
726 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
727 	if (error != 0) {
728 		dsl_pool_rele(dp, FTAG);
729 		return (error);
730 	}
731 
732 	dsl_dataset_name(ds, zc->zc_name);
733 
734 	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
735 	    ZFS_DELEG_PERM_SEND, cr);
736 	dsl_dataset_rele(ds, FTAG);
737 	dsl_pool_rele(dp, FTAG);
738 
739 	return (error);
740 }
741 
742 /* ARGSUSED */
743 static int
744 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
745 {
746 	return (zfs_secpolicy_write_perms(zc->zc_name,
747 	    ZFS_DELEG_PERM_SEND, cr));
748 }
749 
750 /* ARGSUSED */
751 static int
752 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
753 {
754 	vnode_t *vp;
755 	int error;
756 
757 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
758 	    NO_FOLLOW, NULL, &vp)) != 0)
759 		return (error);
760 
761 	/* Now make sure mntpnt and dataset are ZFS */
762 
763 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
764 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
765 	    zc->zc_name) != 0)) {
766 		VN_RELE(vp);
767 		return (SET_ERROR(EPERM));
768 	}
769 
770 	VN_RELE(vp);
771 	return (dsl_deleg_access(zc->zc_name,
772 	    ZFS_DELEG_PERM_SHARE, cr));
773 }
774 
775 int
776 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
777 {
778 	if (!INGLOBALZONE(curproc))
779 		return (SET_ERROR(EPERM));
780 
781 	if (secpolicy_nfs(cr) == 0) {
782 		return (0);
783 	} else {
784 		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
785 	}
786 }
787 
788 int
789 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
790 {
791 	if (!INGLOBALZONE(curproc))
792 		return (SET_ERROR(EPERM));
793 
794 	if (secpolicy_smb(cr) == 0) {
795 		return (0);
796 	} else {
797 		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
798 	}
799 }
800 
801 static int
802 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
803 {
804 	char *cp;
805 
806 	/*
807 	 * Remove the @bla or /bla from the end of the name to get the parent.
808 	 */
809 	(void) strncpy(parent, datasetname, parentsize);
810 	cp = strrchr(parent, '@');
811 	if (cp != NULL) {
812 		cp[0] = '\0';
813 	} else {
814 		cp = strrchr(parent, '/');
815 		if (cp == NULL)
816 			return (SET_ERROR(ENOENT));
817 		cp[0] = '\0';
818 	}
819 
820 	return (0);
821 }
822 
823 int
824 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
825 {
826 	int error;
827 
828 	if ((error = zfs_secpolicy_write_perms(name,
829 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
830 		return (error);
831 
832 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
833 }
834 
835 /* ARGSUSED */
836 static int
837 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
838 {
839 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
840 }
841 
842 /*
843  * Destroying snapshots with delegated permissions requires
844  * descendant mount and destroy permissions.
845  */
846 /* ARGSUSED */
847 static int
848 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
849 {
850 	nvlist_t *snaps;
851 	nvpair_t *pair, *nextpair;
852 	int error = 0;
853 
854 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
855 		return (SET_ERROR(EINVAL));
856 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
857 	    pair = nextpair) {
858 		nextpair = nvlist_next_nvpair(snaps, pair);
859 		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
860 		if (error == ENOENT) {
861 			/*
862 			 * Ignore any snapshots that don't exist (we consider
863 			 * them "already destroyed").  Remove the name from the
864 			 * nvl here in case the snapshot is created between
865 			 * now and when we try to destroy it (in which case
866 			 * we don't want to destroy it since we haven't
867 			 * checked for permission).
868 			 */
869 			fnvlist_remove_nvpair(snaps, pair);
870 			error = 0;
871 		}
872 		if (error != 0)
873 			break;
874 	}
875 
876 	return (error);
877 }
878 
879 int
880 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
881 {
882 	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
883 	int	error;
884 
885 	if ((error = zfs_secpolicy_write_perms(from,
886 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
887 		return (error);
888 
889 	if ((error = zfs_secpolicy_write_perms(from,
890 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
891 		return (error);
892 
893 	if ((error = zfs_get_parent(to, parentname,
894 	    sizeof (parentname))) != 0)
895 		return (error);
896 
897 	if ((error = zfs_secpolicy_write_perms(parentname,
898 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
899 		return (error);
900 
901 	if ((error = zfs_secpolicy_write_perms(parentname,
902 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
903 		return (error);
904 
905 	return (error);
906 }
907 
908 /* ARGSUSED */
909 static int
910 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
911 {
912 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
913 }
914 
915 /* ARGSUSED */
916 static int
917 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
918 {
919 	dsl_pool_t *dp;
920 	dsl_dataset_t *clone;
921 	int error;
922 
923 	error = zfs_secpolicy_write_perms(zc->zc_name,
924 	    ZFS_DELEG_PERM_PROMOTE, cr);
925 	if (error != 0)
926 		return (error);
927 
928 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
929 	if (error != 0)
930 		return (error);
931 
932 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
933 
934 	if (error == 0) {
935 		char parentname[ZFS_MAX_DATASET_NAME_LEN];
936 		dsl_dataset_t *origin = NULL;
937 		dsl_dir_t *dd;
938 		dd = clone->ds_dir;
939 
940 		error = dsl_dataset_hold_obj(dd->dd_pool,
941 		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
942 		if (error != 0) {
943 			dsl_dataset_rele(clone, FTAG);
944 			dsl_pool_rele(dp, FTAG);
945 			return (error);
946 		}
947 
948 		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
949 		    ZFS_DELEG_PERM_MOUNT, cr);
950 
951 		dsl_dataset_name(origin, parentname);
952 		if (error == 0) {
953 			error = zfs_secpolicy_write_perms_ds(parentname, origin,
954 			    ZFS_DELEG_PERM_PROMOTE, cr);
955 		}
956 		dsl_dataset_rele(clone, FTAG);
957 		dsl_dataset_rele(origin, FTAG);
958 	}
959 	dsl_pool_rele(dp, FTAG);
960 	return (error);
961 }
962 
963 /* ARGSUSED */
964 static int
965 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
966 {
967 	int error;
968 
969 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
970 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
971 		return (error);
972 
973 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
974 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
975 		return (error);
976 
977 	return (zfs_secpolicy_write_perms(zc->zc_name,
978 	    ZFS_DELEG_PERM_CREATE, cr));
979 }
980 
981 int
982 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
983 {
984 	return (zfs_secpolicy_write_perms(name,
985 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
986 }
987 
988 /*
989  * Check for permission to create each snapshot in the nvlist.
990  */
991 /* ARGSUSED */
992 static int
993 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
994 {
995 	nvlist_t *snaps;
996 	int error = 0;
997 	nvpair_t *pair;
998 
999 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
1000 		return (SET_ERROR(EINVAL));
1001 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1002 	    pair = nvlist_next_nvpair(snaps, pair)) {
1003 		char *name = nvpair_name(pair);
1004 		char *atp = strchr(name, '@');
1005 
1006 		if (atp == NULL) {
1007 			error = SET_ERROR(EINVAL);
1008 			break;
1009 		}
1010 		*atp = '\0';
1011 		error = zfs_secpolicy_snapshot_perms(name, cr);
1012 		*atp = '@';
1013 		if (error != 0)
1014 			break;
1015 	}
1016 	return (error);
1017 }
1018 
1019 /*
1020  * Check for permission to create each snapshot in the nvlist.
1021  */
1022 /* ARGSUSED */
1023 static int
1024 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1025 {
1026 	int error = 0;
1027 
1028 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1029 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1030 		char *name = nvpair_name(pair);
1031 		char *hashp = strchr(name, '#');
1032 
1033 		if (hashp == NULL) {
1034 			error = SET_ERROR(EINVAL);
1035 			break;
1036 		}
1037 		*hashp = '\0';
1038 		error = zfs_secpolicy_write_perms(name,
1039 		    ZFS_DELEG_PERM_BOOKMARK, cr);
1040 		*hashp = '#';
1041 		if (error != 0)
1042 			break;
1043 	}
1044 	return (error);
1045 }
1046 
1047 /* ARGSUSED */
1048 static int
1049 zfs_secpolicy_remap(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1050 {
1051 	return (zfs_secpolicy_write_perms(zc->zc_name,
1052 	    ZFS_DELEG_PERM_REMAP, cr));
1053 }
1054 
1055 /* ARGSUSED */
1056 static int
1057 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1058 {
1059 	nvpair_t *pair, *nextpair;
1060 	int error = 0;
1061 
1062 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1063 	    pair = nextpair) {
1064 		char *name = nvpair_name(pair);
1065 		char *hashp = strchr(name, '#');
1066 		nextpair = nvlist_next_nvpair(innvl, pair);
1067 
1068 		if (hashp == NULL) {
1069 			error = SET_ERROR(EINVAL);
1070 			break;
1071 		}
1072 
1073 		*hashp = '\0';
1074 		error = zfs_secpolicy_write_perms(name,
1075 		    ZFS_DELEG_PERM_DESTROY, cr);
1076 		*hashp = '#';
1077 		if (error == ENOENT) {
1078 			/*
1079 			 * Ignore any filesystems that don't exist (we consider
1080 			 * their bookmarks "already destroyed").  Remove
1081 			 * the name from the nvl here in case the filesystem
1082 			 * is created between now and when we try to destroy
1083 			 * the bookmark (in which case we don't want to
1084 			 * destroy it since we haven't checked for permission).
1085 			 */
1086 			fnvlist_remove_nvpair(innvl, pair);
1087 			error = 0;
1088 		}
1089 		if (error != 0)
1090 			break;
1091 	}
1092 
1093 	return (error);
1094 }
1095 
1096 /* ARGSUSED */
1097 static int
1098 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1099 {
1100 	/*
1101 	 * Even root must have a proper TSD so that we know what pool
1102 	 * to log to.
1103 	 */
1104 	if (tsd_get(zfs_allow_log_key) == NULL)
1105 		return (SET_ERROR(EPERM));
1106 	return (0);
1107 }
1108 
1109 static int
1110 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1111 {
1112 	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
1113 	int	error;
1114 	char	*origin;
1115 
1116 	if ((error = zfs_get_parent(zc->zc_name, parentname,
1117 	    sizeof (parentname))) != 0)
1118 		return (error);
1119 
1120 	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1121 	    (error = zfs_secpolicy_write_perms(origin,
1122 	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1123 		return (error);
1124 
1125 	if ((error = zfs_secpolicy_write_perms(parentname,
1126 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1127 		return (error);
1128 
1129 	return (zfs_secpolicy_write_perms(parentname,
1130 	    ZFS_DELEG_PERM_MOUNT, cr));
1131 }
1132 
1133 /*
1134  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1135  * SYS_CONFIG privilege, which is not available in a local zone.
1136  */
1137 /* ARGSUSED */
1138 static int
1139 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1140 {
1141 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
1142 		return (SET_ERROR(EPERM));
1143 
1144 	return (0);
1145 }
1146 
1147 /*
1148  * Policy for object to name lookups.
1149  */
1150 /* ARGSUSED */
1151 static int
1152 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1153 {
1154 	int error;
1155 
1156 	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1157 		return (0);
1158 
1159 	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1160 	return (error);
1161 }
1162 
1163 /*
1164  * Policy for fault injection.  Requires all privileges.
1165  */
1166 /* ARGSUSED */
1167 static int
1168 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1169 {
1170 	return (secpolicy_zinject(cr));
1171 }
1172 
1173 /* ARGSUSED */
1174 static int
1175 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1176 {
1177 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1178 
1179 	if (prop == ZPROP_INVAL) {
1180 		if (!zfs_prop_user(zc->zc_value))
1181 			return (SET_ERROR(EINVAL));
1182 		return (zfs_secpolicy_write_perms(zc->zc_name,
1183 		    ZFS_DELEG_PERM_USERPROP, cr));
1184 	} else {
1185 		return (zfs_secpolicy_setprop(zc->zc_name, prop,
1186 		    NULL, cr));
1187 	}
1188 }
1189 
1190 static int
1191 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1192 {
1193 	int err = zfs_secpolicy_read(zc, innvl, cr);
1194 	if (err)
1195 		return (err);
1196 
1197 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1198 		return (SET_ERROR(EINVAL));
1199 
1200 	if (zc->zc_value[0] == 0) {
1201 		/*
1202 		 * They are asking about a posix uid/gid.  If it's
1203 		 * themself, allow it.
1204 		 */
1205 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1206 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
1207 			if (zc->zc_guid == crgetuid(cr))
1208 				return (0);
1209 		} else {
1210 			if (groupmember(zc->zc_guid, cr))
1211 				return (0);
1212 		}
1213 	}
1214 
1215 	return (zfs_secpolicy_write_perms(zc->zc_name,
1216 	    userquota_perms[zc->zc_objset_type], cr));
1217 }
1218 
1219 static int
1220 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1221 {
1222 	int err = zfs_secpolicy_read(zc, innvl, cr);
1223 	if (err)
1224 		return (err);
1225 
1226 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1227 		return (SET_ERROR(EINVAL));
1228 
1229 	return (zfs_secpolicy_write_perms(zc->zc_name,
1230 	    userquota_perms[zc->zc_objset_type], cr));
1231 }
1232 
1233 /* ARGSUSED */
1234 static int
1235 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1236 {
1237 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1238 	    NULL, cr));
1239 }
1240 
1241 /* ARGSUSED */
1242 static int
1243 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1244 {
1245 	nvpair_t *pair;
1246 	nvlist_t *holds;
1247 	int error;
1248 
1249 	error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1250 	if (error != 0)
1251 		return (SET_ERROR(EINVAL));
1252 
1253 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1254 	    pair = nvlist_next_nvpair(holds, pair)) {
1255 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1256 		error = dmu_fsname(nvpair_name(pair), fsname);
1257 		if (error != 0)
1258 			return (error);
1259 		error = zfs_secpolicy_write_perms(fsname,
1260 		    ZFS_DELEG_PERM_HOLD, cr);
1261 		if (error != 0)
1262 			return (error);
1263 	}
1264 	return (0);
1265 }
1266 
1267 /* ARGSUSED */
1268 static int
1269 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1270 {
1271 	nvpair_t *pair;
1272 	int error;
1273 
1274 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1275 	    pair = nvlist_next_nvpair(innvl, pair)) {
1276 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1277 		error = dmu_fsname(nvpair_name(pair), fsname);
1278 		if (error != 0)
1279 			return (error);
1280 		error = zfs_secpolicy_write_perms(fsname,
1281 		    ZFS_DELEG_PERM_RELEASE, cr);
1282 		if (error != 0)
1283 			return (error);
1284 	}
1285 	return (0);
1286 }
1287 
1288 /*
1289  * Policy for allowing temporary snapshots to be taken or released
1290  */
1291 static int
1292 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1293 {
1294 	/*
1295 	 * A temporary snapshot is the same as a snapshot,
1296 	 * hold, destroy and release all rolled into one.
1297 	 * Delegated diff alone is sufficient that we allow this.
1298 	 */
1299 	int error;
1300 
1301 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1302 	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
1303 		return (0);
1304 
1305 	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1306 	if (error == 0)
1307 		error = zfs_secpolicy_hold(zc, innvl, cr);
1308 	if (error == 0)
1309 		error = zfs_secpolicy_release(zc, innvl, cr);
1310 	if (error == 0)
1311 		error = zfs_secpolicy_destroy(zc, innvl, cr);
1312 	return (error);
1313 }
1314 
1315 /*
1316  * Returns the nvlist as specified by the user in the zfs_cmd_t.
1317  */
1318 static int
1319 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1320 {
1321 	char *packed;
1322 	int error;
1323 	nvlist_t *list = NULL;
1324 
1325 	/*
1326 	 * Read in and unpack the user-supplied nvlist.
1327 	 */
1328 	if (size == 0)
1329 		return (SET_ERROR(EINVAL));
1330 
1331 	packed = kmem_alloc(size, KM_SLEEP);
1332 
1333 	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1334 	    iflag)) != 0) {
1335 		kmem_free(packed, size);
1336 		return (SET_ERROR(EFAULT));
1337 	}
1338 
1339 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1340 		kmem_free(packed, size);
1341 		return (error);
1342 	}
1343 
1344 	kmem_free(packed, size);
1345 
1346 	*nvp = list;
1347 	return (0);
1348 }
1349 
1350 /*
1351  * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1352  * Entries will be removed from the end of the nvlist, and one int32 entry
1353  * named "N_MORE_ERRORS" will be added indicating how many entries were
1354  * removed.
1355  */
1356 static int
1357 nvlist_smush(nvlist_t *errors, size_t max)
1358 {
1359 	size_t size;
1360 
1361 	size = fnvlist_size(errors);
1362 
1363 	if (size > max) {
1364 		nvpair_t *more_errors;
1365 		int n = 0;
1366 
1367 		if (max < 1024)
1368 			return (SET_ERROR(ENOMEM));
1369 
1370 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1371 		more_errors = nvlist_prev_nvpair(errors, NULL);
1372 
1373 		do {
1374 			nvpair_t *pair = nvlist_prev_nvpair(errors,
1375 			    more_errors);
1376 			fnvlist_remove_nvpair(errors, pair);
1377 			n++;
1378 			size = fnvlist_size(errors);
1379 		} while (size > max);
1380 
1381 		fnvlist_remove_nvpair(errors, more_errors);
1382 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1383 		ASSERT3U(fnvlist_size(errors), <=, max);
1384 	}
1385 
1386 	return (0);
1387 }
1388 
1389 static int
1390 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1391 {
1392 	char *packed = NULL;
1393 	int error = 0;
1394 	size_t size;
1395 
1396 	size = fnvlist_size(nvl);
1397 
1398 	if (size > zc->zc_nvlist_dst_size) {
1399 		error = SET_ERROR(ENOMEM);
1400 	} else {
1401 		packed = fnvlist_pack(nvl, &size);
1402 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1403 		    size, zc->zc_iflags) != 0)
1404 			error = SET_ERROR(EFAULT);
1405 		fnvlist_pack_free(packed, size);
1406 	}
1407 
1408 	zc->zc_nvlist_dst_size = size;
1409 	zc->zc_nvlist_dst_filled = B_TRUE;
1410 	return (error);
1411 }
1412 
1413 int
1414 getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
1415 {
1416 	int error = 0;
1417 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1418 		return (SET_ERROR(EINVAL));
1419 	}
1420 
1421 	mutex_enter(&os->os_user_ptr_lock);
1422 	*zfvp = dmu_objset_get_user(os);
1423 	if (*zfvp) {
1424 		VFS_HOLD((*zfvp)->z_vfs);
1425 	} else {
1426 		error = SET_ERROR(ESRCH);
1427 	}
1428 	mutex_exit(&os->os_user_ptr_lock);
1429 	return (error);
1430 }
1431 
1432 int
1433 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1434 {
1435 	objset_t *os;
1436 	int error;
1437 
1438 	error = dmu_objset_hold(dsname, FTAG, &os);
1439 	if (error != 0)
1440 		return (error);
1441 
1442 	error = getzfsvfs_impl(os, zfvp);
1443 	dmu_objset_rele(os, FTAG);
1444 	return (error);
1445 }
1446 
1447 /*
1448  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1449  * case its z_vfs will be NULL, and it will be opened as the owner.
1450  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1451  * which prevents all vnode ops from running.
1452  */
1453 static int
1454 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1455 {
1456 	int error = 0;
1457 
1458 	if (getzfsvfs(name, zfvp) != 0)
1459 		error = zfsvfs_create(name, zfvp);
1460 	if (error == 0) {
1461 		rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1462 		    RW_READER, tag);
1463 		if ((*zfvp)->z_unmounted) {
1464 			/*
1465 			 * XXX we could probably try again, since the unmounting
1466 			 * thread should be just about to disassociate the
1467 			 * objset from the zfsvfs.
1468 			 */
1469 			rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1470 			return (SET_ERROR(EBUSY));
1471 		}
1472 	}
1473 	return (error);
1474 }
1475 
1476 static void
1477 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1478 {
1479 	rrm_exit(&zfsvfs->z_teardown_lock, tag);
1480 
1481 	if (zfsvfs->z_vfs) {
1482 		VFS_RELE(zfsvfs->z_vfs);
1483 	} else {
1484 		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1485 		zfsvfs_free(zfsvfs);
1486 	}
1487 }
1488 
1489 static int
1490 zfs_ioc_pool_create(zfs_cmd_t *zc)
1491 {
1492 	int error;
1493 	nvlist_t *config, *props = NULL;
1494 	nvlist_t *rootprops = NULL;
1495 	nvlist_t *zplprops = NULL;
1496 	char *spa_name = zc->zc_name;
1497 
1498 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1499 	    zc->zc_iflags, &config))
1500 		return (error);
1501 
1502 	if (zc->zc_nvlist_src_size != 0 && (error =
1503 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1504 	    zc->zc_iflags, &props))) {
1505 		nvlist_free(config);
1506 		return (error);
1507 	}
1508 
1509 	if (props) {
1510 		nvlist_t *nvl = NULL;
1511 		uint64_t version = SPA_VERSION;
1512 		char *tname;
1513 
1514 		(void) nvlist_lookup_uint64(props,
1515 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1516 		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1517 			error = SET_ERROR(EINVAL);
1518 			goto pool_props_bad;
1519 		}
1520 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1521 		if (nvl) {
1522 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1523 			if (error != 0) {
1524 				nvlist_free(config);
1525 				nvlist_free(props);
1526 				return (error);
1527 			}
1528 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1529 		}
1530 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1531 		error = zfs_fill_zplprops_root(version, rootprops,
1532 		    zplprops, NULL);
1533 		if (error != 0)
1534 			goto pool_props_bad;
1535 
1536 		if (nvlist_lookup_string(props,
1537 		    zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
1538 			spa_name = tname;
1539 	}
1540 
1541 	error = spa_create(zc->zc_name, config, props, zplprops);
1542 
1543 	/*
1544 	 * Set the remaining root properties
1545 	 */
1546 	if (!error && (error = zfs_set_prop_nvlist(spa_name,
1547 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1548 		(void) spa_destroy(spa_name);
1549 
1550 pool_props_bad:
1551 	nvlist_free(rootprops);
1552 	nvlist_free(zplprops);
1553 	nvlist_free(config);
1554 	nvlist_free(props);
1555 
1556 	return (error);
1557 }
1558 
1559 static int
1560 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1561 {
1562 	int error;
1563 	zfs_log_history(zc);
1564 	error = spa_destroy(zc->zc_name);
1565 	if (error == 0)
1566 		zvol_remove_minors(zc->zc_name);
1567 	return (error);
1568 }
1569 
1570 static int
1571 zfs_ioc_pool_import(zfs_cmd_t *zc)
1572 {
1573 	nvlist_t *config, *props = NULL;
1574 	uint64_t guid;
1575 	int error;
1576 
1577 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1578 	    zc->zc_iflags, &config)) != 0)
1579 		return (error);
1580 
1581 	if (zc->zc_nvlist_src_size != 0 && (error =
1582 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1583 	    zc->zc_iflags, &props))) {
1584 		nvlist_free(config);
1585 		return (error);
1586 	}
1587 
1588 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1589 	    guid != zc->zc_guid)
1590 		error = SET_ERROR(EINVAL);
1591 	else
1592 		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1593 
1594 	if (zc->zc_nvlist_dst != 0) {
1595 		int err;
1596 
1597 		if ((err = put_nvlist(zc, config)) != 0)
1598 			error = err;
1599 	}
1600 
1601 	nvlist_free(config);
1602 
1603 	nvlist_free(props);
1604 
1605 	return (error);
1606 }
1607 
1608 static int
1609 zfs_ioc_pool_export(zfs_cmd_t *zc)
1610 {
1611 	int error;
1612 	boolean_t force = (boolean_t)zc->zc_cookie;
1613 	boolean_t hardforce = (boolean_t)zc->zc_guid;
1614 
1615 	zfs_log_history(zc);
1616 	error = spa_export(zc->zc_name, NULL, force, hardforce);
1617 	if (error == 0)
1618 		zvol_remove_minors(zc->zc_name);
1619 	return (error);
1620 }
1621 
1622 static int
1623 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1624 {
1625 	nvlist_t *configs;
1626 	int error;
1627 
1628 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1629 		return (SET_ERROR(EEXIST));
1630 
1631 	error = put_nvlist(zc, configs);
1632 
1633 	nvlist_free(configs);
1634 
1635 	return (error);
1636 }
1637 
1638 /*
1639  * inputs:
1640  * zc_name		name of the pool
1641  *
1642  * outputs:
1643  * zc_cookie		real errno
1644  * zc_nvlist_dst	config nvlist
1645  * zc_nvlist_dst_size	size of config nvlist
1646  */
1647 static int
1648 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1649 {
1650 	nvlist_t *config;
1651 	int error;
1652 	int ret = 0;
1653 
1654 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1655 	    sizeof (zc->zc_value));
1656 
1657 	if (config != NULL) {
1658 		ret = put_nvlist(zc, config);
1659 		nvlist_free(config);
1660 
1661 		/*
1662 		 * The config may be present even if 'error' is non-zero.
1663 		 * In this case we return success, and preserve the real errno
1664 		 * in 'zc_cookie'.
1665 		 */
1666 		zc->zc_cookie = error;
1667 	} else {
1668 		ret = error;
1669 	}
1670 
1671 	return (ret);
1672 }
1673 
1674 /*
1675  * Try to import the given pool, returning pool stats as appropriate so that
1676  * user land knows which devices are available and overall pool health.
1677  */
1678 static int
1679 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1680 {
1681 	nvlist_t *tryconfig, *config;
1682 	int error;
1683 
1684 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1685 	    zc->zc_iflags, &tryconfig)) != 0)
1686 		return (error);
1687 
1688 	config = spa_tryimport(tryconfig);
1689 
1690 	nvlist_free(tryconfig);
1691 
1692 	if (config == NULL)
1693 		return (SET_ERROR(EINVAL));
1694 
1695 	error = put_nvlist(zc, config);
1696 	nvlist_free(config);
1697 
1698 	return (error);
1699 }
1700 
1701 /*
1702  * inputs:
1703  * zc_name              name of the pool
1704  * zc_cookie            scan func (pool_scan_func_t)
1705  * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
1706  */
1707 static int
1708 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1709 {
1710 	spa_t *spa;
1711 	int error;
1712 
1713 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1714 		return (error);
1715 
1716 	if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1717 		return (SET_ERROR(EINVAL));
1718 
1719 	if (zc->zc_flags == POOL_SCRUB_PAUSE)
1720 		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1721 	else if (zc->zc_cookie == POOL_SCAN_NONE)
1722 		error = spa_scan_stop(spa);
1723 	else
1724 		error = spa_scan(spa, zc->zc_cookie);
1725 
1726 	spa_close(spa, FTAG);
1727 
1728 	return (error);
1729 }
1730 
1731 static int
1732 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1733 {
1734 	spa_t *spa;
1735 	int error;
1736 
1737 	error = spa_open(zc->zc_name, &spa, FTAG);
1738 	if (error == 0) {
1739 		spa_freeze(spa);
1740 		spa_close(spa, FTAG);
1741 	}
1742 	return (error);
1743 }
1744 
1745 static int
1746 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1747 {
1748 	spa_t *spa;
1749 	int error;
1750 
1751 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1752 		return (error);
1753 
1754 	if (zc->zc_cookie < spa_version(spa) ||
1755 	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1756 		spa_close(spa, FTAG);
1757 		return (SET_ERROR(EINVAL));
1758 	}
1759 
1760 	spa_upgrade(spa, zc->zc_cookie);
1761 	spa_close(spa, FTAG);
1762 
1763 	return (error);
1764 }
1765 
1766 static int
1767 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1768 {
1769 	spa_t *spa;
1770 	char *hist_buf;
1771 	uint64_t size;
1772 	int error;
1773 
1774 	if ((size = zc->zc_history_len) == 0)
1775 		return (SET_ERROR(EINVAL));
1776 
1777 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1778 		return (error);
1779 
1780 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1781 		spa_close(spa, FTAG);
1782 		return (SET_ERROR(ENOTSUP));
1783 	}
1784 
1785 	hist_buf = kmem_alloc(size, KM_SLEEP);
1786 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1787 	    &zc->zc_history_len, hist_buf)) == 0) {
1788 		error = ddi_copyout(hist_buf,
1789 		    (void *)(uintptr_t)zc->zc_history,
1790 		    zc->zc_history_len, zc->zc_iflags);
1791 	}
1792 
1793 	spa_close(spa, FTAG);
1794 	kmem_free(hist_buf, size);
1795 	return (error);
1796 }
1797 
1798 static int
1799 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1800 {
1801 	spa_t *spa;
1802 	int error;
1803 
1804 	error = spa_open(zc->zc_name, &spa, FTAG);
1805 	if (error == 0) {
1806 		error = spa_change_guid(spa);
1807 		spa_close(spa, FTAG);
1808 	}
1809 	return (error);
1810 }
1811 
1812 static int
1813 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1814 {
1815 	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1816 }
1817 
1818 /*
1819  * inputs:
1820  * zc_name		name of filesystem
1821  * zc_obj		object to find
1822  *
1823  * outputs:
1824  * zc_value		name of object
1825  */
1826 static int
1827 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1828 {
1829 	objset_t *os;
1830 	int error;
1831 
1832 	/* XXX reading from objset not owned */
1833 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1834 		return (error);
1835 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1836 		dmu_objset_rele(os, FTAG);
1837 		return (SET_ERROR(EINVAL));
1838 	}
1839 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1840 	    sizeof (zc->zc_value));
1841 	dmu_objset_rele(os, FTAG);
1842 
1843 	return (error);
1844 }
1845 
1846 /*
1847  * inputs:
1848  * zc_name		name of filesystem
1849  * zc_obj		object to find
1850  *
1851  * outputs:
1852  * zc_stat		stats on object
1853  * zc_value		path to object
1854  */
1855 static int
1856 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1857 {
1858 	objset_t *os;
1859 	int error;
1860 
1861 	/* XXX reading from objset not owned */
1862 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1863 		return (error);
1864 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1865 		dmu_objset_rele(os, FTAG);
1866 		return (SET_ERROR(EINVAL));
1867 	}
1868 	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1869 	    sizeof (zc->zc_value));
1870 	dmu_objset_rele(os, FTAG);
1871 
1872 	return (error);
1873 }
1874 
1875 static int
1876 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1877 {
1878 	spa_t *spa;
1879 	int error;
1880 	nvlist_t *config, **l2cache, **spares;
1881 	uint_t nl2cache = 0, nspares = 0;
1882 
1883 	error = spa_open(zc->zc_name, &spa, FTAG);
1884 	if (error != 0)
1885 		return (error);
1886 
1887 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1888 	    zc->zc_iflags, &config);
1889 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1890 	    &l2cache, &nl2cache);
1891 
1892 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1893 	    &spares, &nspares);
1894 
1895 	/*
1896 	 * A root pool with concatenated devices is not supported.
1897 	 * Thus, can not add a device to a root pool.
1898 	 *
1899 	 * Intent log device can not be added to a rootpool because
1900 	 * during mountroot, zil is replayed, a seperated log device
1901 	 * can not be accessed during the mountroot time.
1902 	 *
1903 	 * l2cache and spare devices are ok to be added to a rootpool.
1904 	 */
1905 	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1906 		nvlist_free(config);
1907 		spa_close(spa, FTAG);
1908 		return (SET_ERROR(EDOM));
1909 	}
1910 
1911 	if (error == 0) {
1912 		error = spa_vdev_add(spa, config);
1913 		nvlist_free(config);
1914 	}
1915 	spa_close(spa, FTAG);
1916 	return (error);
1917 }
1918 
1919 /*
1920  * inputs:
1921  * zc_name		name of the pool
1922  * zc_guid		guid of vdev to remove
1923  * zc_cookie		cancel removal
1924  */
1925 static int
1926 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1927 {
1928 	spa_t *spa;
1929 	int error;
1930 
1931 	error = spa_open(zc->zc_name, &spa, FTAG);
1932 	if (error != 0)
1933 		return (error);
1934 	if (zc->zc_cookie != 0) {
1935 		error = spa_vdev_remove_cancel(spa);
1936 	} else {
1937 		error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1938 	}
1939 	spa_close(spa, FTAG);
1940 	return (error);
1941 }
1942 
1943 static int
1944 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1945 {
1946 	spa_t *spa;
1947 	int error;
1948 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1949 
1950 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1951 		return (error);
1952 	switch (zc->zc_cookie) {
1953 	case VDEV_STATE_ONLINE:
1954 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1955 		break;
1956 
1957 	case VDEV_STATE_OFFLINE:
1958 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1959 		break;
1960 
1961 	case VDEV_STATE_FAULTED:
1962 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1963 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1964 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1965 
1966 		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1967 		break;
1968 
1969 	case VDEV_STATE_DEGRADED:
1970 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1971 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1972 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1973 
1974 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1975 		break;
1976 
1977 	default:
1978 		error = SET_ERROR(EINVAL);
1979 	}
1980 	zc->zc_cookie = newstate;
1981 	spa_close(spa, FTAG);
1982 	return (error);
1983 }
1984 
1985 static int
1986 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1987 {
1988 	spa_t *spa;
1989 	int replacing = zc->zc_cookie;
1990 	nvlist_t *config;
1991 	int error;
1992 
1993 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1994 		return (error);
1995 
1996 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1997 	    zc->zc_iflags, &config)) == 0) {
1998 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1999 		nvlist_free(config);
2000 	}
2001 
2002 	spa_close(spa, FTAG);
2003 	return (error);
2004 }
2005 
2006 static int
2007 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
2008 {
2009 	spa_t *spa;
2010 	int error;
2011 
2012 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2013 		return (error);
2014 
2015 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2016 
2017 	spa_close(spa, FTAG);
2018 	return (error);
2019 }
2020 
2021 static int
2022 zfs_ioc_vdev_split(zfs_cmd_t *zc)
2023 {
2024 	spa_t *spa;
2025 	nvlist_t *config, *props = NULL;
2026 	int error;
2027 	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2028 
2029 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2030 		return (error);
2031 
2032 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2033 	    zc->zc_iflags, &config)) {
2034 		spa_close(spa, FTAG);
2035 		return (error);
2036 	}
2037 
2038 	if (zc->zc_nvlist_src_size != 0 && (error =
2039 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2040 	    zc->zc_iflags, &props))) {
2041 		spa_close(spa, FTAG);
2042 		nvlist_free(config);
2043 		return (error);
2044 	}
2045 
2046 	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2047 
2048 	spa_close(spa, FTAG);
2049 
2050 	nvlist_free(config);
2051 	nvlist_free(props);
2052 
2053 	return (error);
2054 }
2055 
2056 static int
2057 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2058 {
2059 	spa_t *spa;
2060 	char *path = zc->zc_value;
2061 	uint64_t guid = zc->zc_guid;
2062 	int error;
2063 
2064 	error = spa_open(zc->zc_name, &spa, FTAG);
2065 	if (error != 0)
2066 		return (error);
2067 
2068 	error = spa_vdev_setpath(spa, guid, path);
2069 	spa_close(spa, FTAG);
2070 	return (error);
2071 }
2072 
2073 static int
2074 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2075 {
2076 	spa_t *spa;
2077 	char *fru = zc->zc_value;
2078 	uint64_t guid = zc->zc_guid;
2079 	int error;
2080 
2081 	error = spa_open(zc->zc_name, &spa, FTAG);
2082 	if (error != 0)
2083 		return (error);
2084 
2085 	error = spa_vdev_setfru(spa, guid, fru);
2086 	spa_close(spa, FTAG);
2087 	return (error);
2088 }
2089 
2090 static int
2091 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2092 {
2093 	int error = 0;
2094 	nvlist_t *nv;
2095 
2096 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2097 
2098 	if (zc->zc_nvlist_dst != 0 &&
2099 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
2100 		dmu_objset_stats(os, nv);
2101 		/*
2102 		 * NB: zvol_get_stats() will read the objset contents,
2103 		 * which we aren't supposed to do with a
2104 		 * DS_MODE_USER hold, because it could be
2105 		 * inconsistent.  So this is a bit of a workaround...
2106 		 * XXX reading with out owning
2107 		 */
2108 		if (!zc->zc_objset_stats.dds_inconsistent &&
2109 		    dmu_objset_type(os) == DMU_OST_ZVOL) {
2110 			error = zvol_get_stats(os, nv);
2111 			if (error == EIO)
2112 				return (error);
2113 			VERIFY0(error);
2114 		}
2115 		error = put_nvlist(zc, nv);
2116 		nvlist_free(nv);
2117 	}
2118 
2119 	return (error);
2120 }
2121 
2122 /*
2123  * inputs:
2124  * zc_name		name of filesystem
2125  * zc_nvlist_dst_size	size of buffer for property nvlist
2126  *
2127  * outputs:
2128  * zc_objset_stats	stats
2129  * zc_nvlist_dst	property nvlist
2130  * zc_nvlist_dst_size	size of property nvlist
2131  */
2132 static int
2133 zfs_ioc_objset_stats(zfs_cmd_t *zc)
2134 {
2135 	objset_t *os;
2136 	int error;
2137 
2138 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2139 	if (error == 0) {
2140 		error = zfs_ioc_objset_stats_impl(zc, os);
2141 		dmu_objset_rele(os, FTAG);
2142 	}
2143 
2144 	return (error);
2145 }
2146 
2147 /*
2148  * inputs:
2149  * zc_name		name of filesystem
2150  * zc_nvlist_dst_size	size of buffer for property nvlist
2151  *
2152  * outputs:
2153  * zc_nvlist_dst	received property nvlist
2154  * zc_nvlist_dst_size	size of received property nvlist
2155  *
2156  * Gets received properties (distinct from local properties on or after
2157  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2158  * local property values.
2159  */
2160 static int
2161 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2162 {
2163 	int error = 0;
2164 	nvlist_t *nv;
2165 
2166 	/*
2167 	 * Without this check, we would return local property values if the
2168 	 * caller has not already received properties on or after
2169 	 * SPA_VERSION_RECVD_PROPS.
2170 	 */
2171 	if (!dsl_prop_get_hasrecvd(zc->zc_name))
2172 		return (SET_ERROR(ENOTSUP));
2173 
2174 	if (zc->zc_nvlist_dst != 0 &&
2175 	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2176 		error = put_nvlist(zc, nv);
2177 		nvlist_free(nv);
2178 	}
2179 
2180 	return (error);
2181 }
2182 
2183 static int
2184 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2185 {
2186 	uint64_t value;
2187 	int error;
2188 
2189 	/*
2190 	 * zfs_get_zplprop() will either find a value or give us
2191 	 * the default value (if there is one).
2192 	 */
2193 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2194 		return (error);
2195 	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2196 	return (0);
2197 }
2198 
2199 /*
2200  * inputs:
2201  * zc_name		name of filesystem
2202  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
2203  *
2204  * outputs:
2205  * zc_nvlist_dst	zpl property nvlist
2206  * zc_nvlist_dst_size	size of zpl property nvlist
2207  */
2208 static int
2209 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2210 {
2211 	objset_t *os;
2212 	int err;
2213 
2214 	/* XXX reading without owning */
2215 	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2216 		return (err);
2217 
2218 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2219 
2220 	/*
2221 	 * NB: nvl_add_zplprop() will read the objset contents,
2222 	 * which we aren't supposed to do with a DS_MODE_USER
2223 	 * hold, because it could be inconsistent.
2224 	 */
2225 	if (zc->zc_nvlist_dst != NULL &&
2226 	    !zc->zc_objset_stats.dds_inconsistent &&
2227 	    dmu_objset_type(os) == DMU_OST_ZFS) {
2228 		nvlist_t *nv;
2229 
2230 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2231 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2232 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2233 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2234 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2235 			err = put_nvlist(zc, nv);
2236 		nvlist_free(nv);
2237 	} else {
2238 		err = SET_ERROR(ENOENT);
2239 	}
2240 	dmu_objset_rele(os, FTAG);
2241 	return (err);
2242 }
2243 
2244 static boolean_t
2245 dataset_name_hidden(const char *name)
2246 {
2247 	/*
2248 	 * Skip over datasets that are not visible in this zone,
2249 	 * internal datasets (which have a $ in their name), and
2250 	 * temporary datasets (which have a % in their name).
2251 	 */
2252 	if (strchr(name, '$') != NULL)
2253 		return (B_TRUE);
2254 	if (strchr(name, '%') != NULL)
2255 		return (B_TRUE);
2256 	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
2257 		return (B_TRUE);
2258 	return (B_FALSE);
2259 }
2260 
2261 /*
2262  * inputs:
2263  * zc_name		name of filesystem
2264  * zc_cookie		zap cursor
2265  * zc_nvlist_dst_size	size of buffer for property nvlist
2266  *
2267  * outputs:
2268  * zc_name		name of next filesystem
2269  * zc_cookie		zap cursor
2270  * zc_objset_stats	stats
2271  * zc_nvlist_dst	property nvlist
2272  * zc_nvlist_dst_size	size of property nvlist
2273  */
2274 static int
2275 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2276 {
2277 	objset_t *os;
2278 	int error;
2279 	char *p;
2280 	size_t orig_len = strlen(zc->zc_name);
2281 
2282 top:
2283 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2284 		if (error == ENOENT)
2285 			error = SET_ERROR(ESRCH);
2286 		return (error);
2287 	}
2288 
2289 	p = strrchr(zc->zc_name, '/');
2290 	if (p == NULL || p[1] != '\0')
2291 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2292 	p = zc->zc_name + strlen(zc->zc_name);
2293 
2294 	do {
2295 		error = dmu_dir_list_next(os,
2296 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2297 		    NULL, &zc->zc_cookie);
2298 		if (error == ENOENT)
2299 			error = SET_ERROR(ESRCH);
2300 	} while (error == 0 && dataset_name_hidden(zc->zc_name));
2301 	dmu_objset_rele(os, FTAG);
2302 
2303 	/*
2304 	 * If it's an internal dataset (ie. with a '$' in its name),
2305 	 * don't try to get stats for it, otherwise we'll return ENOENT.
2306 	 */
2307 	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2308 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2309 		if (error == ENOENT) {
2310 			/* We lost a race with destroy, get the next one. */
2311 			zc->zc_name[orig_len] = '\0';
2312 			goto top;
2313 		}
2314 	}
2315 	return (error);
2316 }
2317 
2318 /*
2319  * inputs:
2320  * zc_name		name of filesystem
2321  * zc_cookie		zap cursor
2322  * zc_nvlist_dst_size	size of buffer for property nvlist
2323  * zc_simple		when set, only name is requested
2324  *
2325  * outputs:
2326  * zc_name		name of next snapshot
2327  * zc_objset_stats	stats
2328  * zc_nvlist_dst	property nvlist
2329  * zc_nvlist_dst_size	size of property nvlist
2330  */
2331 static int
2332 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2333 {
2334 	objset_t *os;
2335 	int error;
2336 
2337 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2338 	if (error != 0) {
2339 		return (error == ENOENT ? ESRCH : error);
2340 	}
2341 
2342 	/*
2343 	 * A dataset name of maximum length cannot have any snapshots,
2344 	 * so exit immediately.
2345 	 */
2346 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2347 	    ZFS_MAX_DATASET_NAME_LEN) {
2348 		dmu_objset_rele(os, FTAG);
2349 		return (SET_ERROR(ESRCH));
2350 	}
2351 
2352 	error = dmu_snapshot_list_next(os,
2353 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
2354 	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2355 	    NULL);
2356 
2357 	if (error == 0 && !zc->zc_simple) {
2358 		dsl_dataset_t *ds;
2359 		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2360 
2361 		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2362 		if (error == 0) {
2363 			objset_t *ossnap;
2364 
2365 			error = dmu_objset_from_ds(ds, &ossnap);
2366 			if (error == 0)
2367 				error = zfs_ioc_objset_stats_impl(zc, ossnap);
2368 			dsl_dataset_rele(ds, FTAG);
2369 		}
2370 	} else if (error == ENOENT) {
2371 		error = SET_ERROR(ESRCH);
2372 	}
2373 
2374 	dmu_objset_rele(os, FTAG);
2375 	/* if we failed, undo the @ that we tacked on to zc_name */
2376 	if (error != 0)
2377 		*strchr(zc->zc_name, '@') = '\0';
2378 	return (error);
2379 }
2380 
2381 static int
2382 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2383 {
2384 	const char *propname = nvpair_name(pair);
2385 	uint64_t *valary;
2386 	unsigned int vallen;
2387 	const char *domain;
2388 	char *dash;
2389 	zfs_userquota_prop_t type;
2390 	uint64_t rid;
2391 	uint64_t quota;
2392 	zfsvfs_t *zfsvfs;
2393 	int err;
2394 
2395 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2396 		nvlist_t *attrs;
2397 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2398 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2399 		    &pair) != 0)
2400 			return (SET_ERROR(EINVAL));
2401 	}
2402 
2403 	/*
2404 	 * A correctly constructed propname is encoded as
2405 	 * userquota@<rid>-<domain>.
2406 	 */
2407 	if ((dash = strchr(propname, '-')) == NULL ||
2408 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2409 	    vallen != 3)
2410 		return (SET_ERROR(EINVAL));
2411 
2412 	domain = dash + 1;
2413 	type = valary[0];
2414 	rid = valary[1];
2415 	quota = valary[2];
2416 
2417 	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2418 	if (err == 0) {
2419 		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2420 		zfsvfs_rele(zfsvfs, FTAG);
2421 	}
2422 
2423 	return (err);
2424 }
2425 
2426 /*
2427  * If the named property is one that has a special function to set its value,
2428  * return 0 on success and a positive error code on failure; otherwise if it is
2429  * not one of the special properties handled by this function, return -1.
2430  *
2431  * XXX: It would be better for callers of the property interface if we handled
2432  * these special cases in dsl_prop.c (in the dsl layer).
2433  */
2434 static int
2435 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2436     nvpair_t *pair)
2437 {
2438 	const char *propname = nvpair_name(pair);
2439 	zfs_prop_t prop = zfs_name_to_prop(propname);
2440 	uint64_t intval;
2441 	int err = -1;
2442 
2443 	if (prop == ZPROP_INVAL) {
2444 		if (zfs_prop_userquota(propname))
2445 			return (zfs_prop_set_userquota(dsname, pair));
2446 		return (-1);
2447 	}
2448 
2449 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2450 		nvlist_t *attrs;
2451 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2452 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2453 		    &pair) == 0);
2454 	}
2455 
2456 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2457 		return (-1);
2458 
2459 	VERIFY(0 == nvpair_value_uint64(pair, &intval));
2460 
2461 	switch (prop) {
2462 	case ZFS_PROP_QUOTA:
2463 		err = dsl_dir_set_quota(dsname, source, intval);
2464 		break;
2465 	case ZFS_PROP_REFQUOTA:
2466 		err = dsl_dataset_set_refquota(dsname, source, intval);
2467 		break;
2468 	case ZFS_PROP_FILESYSTEM_LIMIT:
2469 	case ZFS_PROP_SNAPSHOT_LIMIT:
2470 		if (intval == UINT64_MAX) {
2471 			/* clearing the limit, just do it */
2472 			err = 0;
2473 		} else {
2474 			err = dsl_dir_activate_fs_ss_limit(dsname);
2475 		}
2476 		/*
2477 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2478 		 * default path to set the value in the nvlist.
2479 		 */
2480 		if (err == 0)
2481 			err = -1;
2482 		break;
2483 	case ZFS_PROP_RESERVATION:
2484 		err = dsl_dir_set_reservation(dsname, source, intval);
2485 		break;
2486 	case ZFS_PROP_REFRESERVATION:
2487 		err = dsl_dataset_set_refreservation(dsname, source, intval);
2488 		break;
2489 	case ZFS_PROP_VOLSIZE:
2490 		err = zvol_set_volsize(dsname, intval);
2491 		break;
2492 	case ZFS_PROP_VERSION:
2493 	{
2494 		zfsvfs_t *zfsvfs;
2495 
2496 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2497 			break;
2498 
2499 		err = zfs_set_version(zfsvfs, intval);
2500 		zfsvfs_rele(zfsvfs, FTAG);
2501 
2502 		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2503 			zfs_cmd_t *zc;
2504 
2505 			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2506 			(void) strcpy(zc->zc_name, dsname);
2507 			(void) zfs_ioc_userspace_upgrade(zc);
2508 			kmem_free(zc, sizeof (zfs_cmd_t));
2509 		}
2510 		break;
2511 	}
2512 	default:
2513 		err = -1;
2514 	}
2515 
2516 	return (err);
2517 }
2518 
2519 /*
2520  * This function is best effort. If it fails to set any of the given properties,
2521  * it continues to set as many as it can and returns the last error
2522  * encountered. If the caller provides a non-NULL errlist, it will be filled in
2523  * with the list of names of all the properties that failed along with the
2524  * corresponding error numbers.
2525  *
2526  * If every property is set successfully, zero is returned and errlist is not
2527  * modified.
2528  */
2529 int
2530 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2531     nvlist_t *errlist)
2532 {
2533 	nvpair_t *pair;
2534 	nvpair_t *propval;
2535 	int rv = 0;
2536 	uint64_t intval;
2537 	char *strval;
2538 	nvlist_t *genericnvl = fnvlist_alloc();
2539 	nvlist_t *retrynvl = fnvlist_alloc();
2540 
2541 retry:
2542 	pair = NULL;
2543 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2544 		const char *propname = nvpair_name(pair);
2545 		zfs_prop_t prop = zfs_name_to_prop(propname);
2546 		int err = 0;
2547 
2548 		/* decode the property value */
2549 		propval = pair;
2550 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2551 			nvlist_t *attrs;
2552 			attrs = fnvpair_value_nvlist(pair);
2553 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2554 			    &propval) != 0)
2555 				err = SET_ERROR(EINVAL);
2556 		}
2557 
2558 		/* Validate value type */
2559 		if (err == 0 && prop == ZPROP_INVAL) {
2560 			if (zfs_prop_user(propname)) {
2561 				if (nvpair_type(propval) != DATA_TYPE_STRING)
2562 					err = SET_ERROR(EINVAL);
2563 			} else if (zfs_prop_userquota(propname)) {
2564 				if (nvpair_type(propval) !=
2565 				    DATA_TYPE_UINT64_ARRAY)
2566 					err = SET_ERROR(EINVAL);
2567 			} else {
2568 				err = SET_ERROR(EINVAL);
2569 			}
2570 		} else if (err == 0) {
2571 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2572 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2573 					err = SET_ERROR(EINVAL);
2574 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2575 				const char *unused;
2576 
2577 				intval = fnvpair_value_uint64(propval);
2578 
2579 				switch (zfs_prop_get_type(prop)) {
2580 				case PROP_TYPE_NUMBER:
2581 					break;
2582 				case PROP_TYPE_STRING:
2583 					err = SET_ERROR(EINVAL);
2584 					break;
2585 				case PROP_TYPE_INDEX:
2586 					if (zfs_prop_index_to_string(prop,
2587 					    intval, &unused) != 0)
2588 						err = SET_ERROR(EINVAL);
2589 					break;
2590 				default:
2591 					cmn_err(CE_PANIC,
2592 					    "unknown property type");
2593 				}
2594 			} else {
2595 				err = SET_ERROR(EINVAL);
2596 			}
2597 		}
2598 
2599 		/* Validate permissions */
2600 		if (err == 0)
2601 			err = zfs_check_settable(dsname, pair, CRED());
2602 
2603 		if (err == 0) {
2604 			err = zfs_prop_set_special(dsname, source, pair);
2605 			if (err == -1) {
2606 				/*
2607 				 * For better performance we build up a list of
2608 				 * properties to set in a single transaction.
2609 				 */
2610 				err = nvlist_add_nvpair(genericnvl, pair);
2611 			} else if (err != 0 && nvl != retrynvl) {
2612 				/*
2613 				 * This may be a spurious error caused by
2614 				 * receiving quota and reservation out of order.
2615 				 * Try again in a second pass.
2616 				 */
2617 				err = nvlist_add_nvpair(retrynvl, pair);
2618 			}
2619 		}
2620 
2621 		if (err != 0) {
2622 			if (errlist != NULL)
2623 				fnvlist_add_int32(errlist, propname, err);
2624 			rv = err;
2625 		}
2626 	}
2627 
2628 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2629 		nvl = retrynvl;
2630 		goto retry;
2631 	}
2632 
2633 	if (!nvlist_empty(genericnvl) &&
2634 	    dsl_props_set(dsname, source, genericnvl) != 0) {
2635 		/*
2636 		 * If this fails, we still want to set as many properties as we
2637 		 * can, so try setting them individually.
2638 		 */
2639 		pair = NULL;
2640 		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2641 			const char *propname = nvpair_name(pair);
2642 			int err = 0;
2643 
2644 			propval = pair;
2645 			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2646 				nvlist_t *attrs;
2647 				attrs = fnvpair_value_nvlist(pair);
2648 				propval = fnvlist_lookup_nvpair(attrs,
2649 				    ZPROP_VALUE);
2650 			}
2651 
2652 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2653 				strval = fnvpair_value_string(propval);
2654 				err = dsl_prop_set_string(dsname, propname,
2655 				    source, strval);
2656 			} else {
2657 				intval = fnvpair_value_uint64(propval);
2658 				err = dsl_prop_set_int(dsname, propname, source,
2659 				    intval);
2660 			}
2661 
2662 			if (err != 0) {
2663 				if (errlist != NULL) {
2664 					fnvlist_add_int32(errlist, propname,
2665 					    err);
2666 				}
2667 				rv = err;
2668 			}
2669 		}
2670 	}
2671 	nvlist_free(genericnvl);
2672 	nvlist_free(retrynvl);
2673 
2674 	return (rv);
2675 }
2676 
2677 /*
2678  * Check that all the properties are valid user properties.
2679  */
2680 static int
2681 zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2682 {
2683 	nvpair_t *pair = NULL;
2684 	int error = 0;
2685 
2686 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2687 		const char *propname = nvpair_name(pair);
2688 
2689 		if (!zfs_prop_user(propname) ||
2690 		    nvpair_type(pair) != DATA_TYPE_STRING)
2691 			return (SET_ERROR(EINVAL));
2692 
2693 		if (error = zfs_secpolicy_write_perms(fsname,
2694 		    ZFS_DELEG_PERM_USERPROP, CRED()))
2695 			return (error);
2696 
2697 		if (strlen(propname) >= ZAP_MAXNAMELEN)
2698 			return (SET_ERROR(ENAMETOOLONG));
2699 
2700 		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2701 			return (E2BIG);
2702 	}
2703 	return (0);
2704 }
2705 
2706 static void
2707 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2708 {
2709 	nvpair_t *pair;
2710 
2711 	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2712 
2713 	pair = NULL;
2714 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2715 		if (nvlist_exists(skipped, nvpair_name(pair)))
2716 			continue;
2717 
2718 		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2719 	}
2720 }
2721 
2722 static int
2723 clear_received_props(const char *dsname, nvlist_t *props,
2724     nvlist_t *skipped)
2725 {
2726 	int err = 0;
2727 	nvlist_t *cleared_props = NULL;
2728 	props_skip(props, skipped, &cleared_props);
2729 	if (!nvlist_empty(cleared_props)) {
2730 		/*
2731 		 * Acts on local properties until the dataset has received
2732 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2733 		 */
2734 		zprop_source_t flags = (ZPROP_SRC_NONE |
2735 		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2736 		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2737 	}
2738 	nvlist_free(cleared_props);
2739 	return (err);
2740 }
2741 
2742 /*
2743  * inputs:
2744  * zc_name		name of filesystem
2745  * zc_value		name of property to set
2746  * zc_nvlist_src{_size}	nvlist of properties to apply
2747  * zc_cookie		received properties flag
2748  *
2749  * outputs:
2750  * zc_nvlist_dst{_size} error for each unapplied received property
2751  */
2752 static int
2753 zfs_ioc_set_prop(zfs_cmd_t *zc)
2754 {
2755 	nvlist_t *nvl;
2756 	boolean_t received = zc->zc_cookie;
2757 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2758 	    ZPROP_SRC_LOCAL);
2759 	nvlist_t *errors;
2760 	int error;
2761 
2762 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2763 	    zc->zc_iflags, &nvl)) != 0)
2764 		return (error);
2765 
2766 	if (received) {
2767 		nvlist_t *origprops;
2768 
2769 		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2770 			(void) clear_received_props(zc->zc_name,
2771 			    origprops, nvl);
2772 			nvlist_free(origprops);
2773 		}
2774 
2775 		error = dsl_prop_set_hasrecvd(zc->zc_name);
2776 	}
2777 
2778 	errors = fnvlist_alloc();
2779 	if (error == 0)
2780 		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2781 
2782 	if (zc->zc_nvlist_dst != NULL && errors != NULL) {
2783 		(void) put_nvlist(zc, errors);
2784 	}
2785 
2786 	nvlist_free(errors);
2787 	nvlist_free(nvl);
2788 	return (error);
2789 }
2790 
2791 /*
2792  * inputs:
2793  * zc_name		name of filesystem
2794  * zc_value		name of property to inherit
2795  * zc_cookie		revert to received value if TRUE
2796  *
2797  * outputs:		none
2798  */
2799 static int
2800 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2801 {
2802 	const char *propname = zc->zc_value;
2803 	zfs_prop_t prop = zfs_name_to_prop(propname);
2804 	boolean_t received = zc->zc_cookie;
2805 	zprop_source_t source = (received
2806 	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2807 	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2808 
2809 	if (received) {
2810 		nvlist_t *dummy;
2811 		nvpair_t *pair;
2812 		zprop_type_t type;
2813 		int err;
2814 
2815 		/*
2816 		 * zfs_prop_set_special() expects properties in the form of an
2817 		 * nvpair with type info.
2818 		 */
2819 		if (prop == ZPROP_INVAL) {
2820 			if (!zfs_prop_user(propname))
2821 				return (SET_ERROR(EINVAL));
2822 
2823 			type = PROP_TYPE_STRING;
2824 		} else if (prop == ZFS_PROP_VOLSIZE ||
2825 		    prop == ZFS_PROP_VERSION) {
2826 			return (SET_ERROR(EINVAL));
2827 		} else {
2828 			type = zfs_prop_get_type(prop);
2829 		}
2830 
2831 		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2832 
2833 		switch (type) {
2834 		case PROP_TYPE_STRING:
2835 			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2836 			break;
2837 		case PROP_TYPE_NUMBER:
2838 		case PROP_TYPE_INDEX:
2839 			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2840 			break;
2841 		default:
2842 			nvlist_free(dummy);
2843 			return (SET_ERROR(EINVAL));
2844 		}
2845 
2846 		pair = nvlist_next_nvpair(dummy, NULL);
2847 		err = zfs_prop_set_special(zc->zc_name, source, pair);
2848 		nvlist_free(dummy);
2849 		if (err != -1)
2850 			return (err); /* special property already handled */
2851 	} else {
2852 		/*
2853 		 * Only check this in the non-received case. We want to allow
2854 		 * 'inherit -S' to revert non-inheritable properties like quota
2855 		 * and reservation to the received or default values even though
2856 		 * they are not considered inheritable.
2857 		 */
2858 		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2859 			return (SET_ERROR(EINVAL));
2860 	}
2861 
2862 	/* property name has been validated by zfs_secpolicy_inherit_prop() */
2863 	return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
2864 }
2865 
2866 static int
2867 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2868 {
2869 	nvlist_t *props;
2870 	spa_t *spa;
2871 	int error;
2872 	nvpair_t *pair;
2873 
2874 	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2875 	    zc->zc_iflags, &props))
2876 		return (error);
2877 
2878 	/*
2879 	 * If the only property is the configfile, then just do a spa_lookup()
2880 	 * to handle the faulted case.
2881 	 */
2882 	pair = nvlist_next_nvpair(props, NULL);
2883 	if (pair != NULL && strcmp(nvpair_name(pair),
2884 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2885 	    nvlist_next_nvpair(props, pair) == NULL) {
2886 		mutex_enter(&spa_namespace_lock);
2887 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2888 			spa_configfile_set(spa, props, B_FALSE);
2889 			spa_write_cachefile(spa, B_FALSE, B_TRUE);
2890 		}
2891 		mutex_exit(&spa_namespace_lock);
2892 		if (spa != NULL) {
2893 			nvlist_free(props);
2894 			return (0);
2895 		}
2896 	}
2897 
2898 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2899 		nvlist_free(props);
2900 		return (error);
2901 	}
2902 
2903 	error = spa_prop_set(spa, props);
2904 
2905 	nvlist_free(props);
2906 	spa_close(spa, FTAG);
2907 
2908 	return (error);
2909 }
2910 
2911 static int
2912 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2913 {
2914 	spa_t *spa;
2915 	int error;
2916 	nvlist_t *nvp = NULL;
2917 
2918 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2919 		/*
2920 		 * If the pool is faulted, there may be properties we can still
2921 		 * get (such as altroot and cachefile), so attempt to get them
2922 		 * anyway.
2923 		 */
2924 		mutex_enter(&spa_namespace_lock);
2925 		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2926 			error = spa_prop_get(spa, &nvp);
2927 		mutex_exit(&spa_namespace_lock);
2928 	} else {
2929 		error = spa_prop_get(spa, &nvp);
2930 		spa_close(spa, FTAG);
2931 	}
2932 
2933 	if (error == 0 && zc->zc_nvlist_dst != NULL)
2934 		error = put_nvlist(zc, nvp);
2935 	else
2936 		error = SET_ERROR(EFAULT);
2937 
2938 	nvlist_free(nvp);
2939 	return (error);
2940 }
2941 
2942 /*
2943  * inputs:
2944  * zc_name		name of filesystem
2945  * zc_nvlist_src{_size}	nvlist of delegated permissions
2946  * zc_perm_action	allow/unallow flag
2947  *
2948  * outputs:		none
2949  */
2950 static int
2951 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2952 {
2953 	int error;
2954 	nvlist_t *fsaclnv = NULL;
2955 
2956 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2957 	    zc->zc_iflags, &fsaclnv)) != 0)
2958 		return (error);
2959 
2960 	/*
2961 	 * Verify nvlist is constructed correctly
2962 	 */
2963 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2964 		nvlist_free(fsaclnv);
2965 		return (SET_ERROR(EINVAL));
2966 	}
2967 
2968 	/*
2969 	 * If we don't have PRIV_SYS_MOUNT, then validate
2970 	 * that user is allowed to hand out each permission in
2971 	 * the nvlist(s)
2972 	 */
2973 
2974 	error = secpolicy_zfs(CRED());
2975 	if (error != 0) {
2976 		if (zc->zc_perm_action == B_FALSE) {
2977 			error = dsl_deleg_can_allow(zc->zc_name,
2978 			    fsaclnv, CRED());
2979 		} else {
2980 			error = dsl_deleg_can_unallow(zc->zc_name,
2981 			    fsaclnv, CRED());
2982 		}
2983 	}
2984 
2985 	if (error == 0)
2986 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2987 
2988 	nvlist_free(fsaclnv);
2989 	return (error);
2990 }
2991 
2992 /*
2993  * inputs:
2994  * zc_name		name of filesystem
2995  *
2996  * outputs:
2997  * zc_nvlist_src{_size}	nvlist of delegated permissions
2998  */
2999 static int
3000 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3001 {
3002 	nvlist_t *nvp;
3003 	int error;
3004 
3005 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3006 		error = put_nvlist(zc, nvp);
3007 		nvlist_free(nvp);
3008 	}
3009 
3010 	return (error);
3011 }
3012 
3013 /* ARGSUSED */
3014 static void
3015 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3016 {
3017 	zfs_creat_t *zct = arg;
3018 
3019 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3020 }
3021 
3022 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
3023 
3024 /*
3025  * inputs:
3026  * os			parent objset pointer (NULL if root fs)
3027  * fuids_ok		fuids allowed in this version of the spa?
3028  * sa_ok		SAs allowed in this version of the spa?
3029  * createprops		list of properties requested by creator
3030  *
3031  * outputs:
3032  * zplprops	values for the zplprops we attach to the master node object
3033  * is_ci	true if requested file system will be purely case-insensitive
3034  *
3035  * Determine the settings for utf8only, normalization and
3036  * casesensitivity.  Specific values may have been requested by the
3037  * creator and/or we can inherit values from the parent dataset.  If
3038  * the file system is of too early a vintage, a creator can not
3039  * request settings for these properties, even if the requested
3040  * setting is the default value.  We don't actually want to create dsl
3041  * properties for these, so remove them from the source nvlist after
3042  * processing.
3043  */
3044 static int
3045 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3046     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3047     nvlist_t *zplprops, boolean_t *is_ci)
3048 {
3049 	uint64_t sense = ZFS_PROP_UNDEFINED;
3050 	uint64_t norm = ZFS_PROP_UNDEFINED;
3051 	uint64_t u8 = ZFS_PROP_UNDEFINED;
3052 
3053 	ASSERT(zplprops != NULL);
3054 
3055 	if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3056 		return (SET_ERROR(EINVAL));
3057 
3058 	/*
3059 	 * Pull out creator prop choices, if any.
3060 	 */
3061 	if (createprops) {
3062 		(void) nvlist_lookup_uint64(createprops,
3063 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3064 		(void) nvlist_lookup_uint64(createprops,
3065 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3066 		(void) nvlist_remove_all(createprops,
3067 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3068 		(void) nvlist_lookup_uint64(createprops,
3069 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3070 		(void) nvlist_remove_all(createprops,
3071 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3072 		(void) nvlist_lookup_uint64(createprops,
3073 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3074 		(void) nvlist_remove_all(createprops,
3075 		    zfs_prop_to_name(ZFS_PROP_CASE));
3076 	}
3077 
3078 	/*
3079 	 * If the zpl version requested is whacky or the file system
3080 	 * or pool is version is too "young" to support normalization
3081 	 * and the creator tried to set a value for one of the props,
3082 	 * error out.
3083 	 */
3084 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3085 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3086 	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3087 	    (zplver < ZPL_VERSION_NORMALIZATION &&
3088 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3089 	    sense != ZFS_PROP_UNDEFINED)))
3090 		return (SET_ERROR(ENOTSUP));
3091 
3092 	/*
3093 	 * Put the version in the zplprops
3094 	 */
3095 	VERIFY(nvlist_add_uint64(zplprops,
3096 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3097 
3098 	if (norm == ZFS_PROP_UNDEFINED)
3099 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3100 	VERIFY(nvlist_add_uint64(zplprops,
3101 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3102 
3103 	/*
3104 	 * If we're normalizing, names must always be valid UTF-8 strings.
3105 	 */
3106 	if (norm)
3107 		u8 = 1;
3108 	if (u8 == ZFS_PROP_UNDEFINED)
3109 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3110 	VERIFY(nvlist_add_uint64(zplprops,
3111 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3112 
3113 	if (sense == ZFS_PROP_UNDEFINED)
3114 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3115 	VERIFY(nvlist_add_uint64(zplprops,
3116 	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3117 
3118 	if (is_ci)
3119 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
3120 
3121 	return (0);
3122 }
3123 
3124 static int
3125 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3126     nvlist_t *zplprops, boolean_t *is_ci)
3127 {
3128 	boolean_t fuids_ok, sa_ok;
3129 	uint64_t zplver = ZPL_VERSION;
3130 	objset_t *os = NULL;
3131 	char parentname[ZFS_MAX_DATASET_NAME_LEN];
3132 	char *cp;
3133 	spa_t *spa;
3134 	uint64_t spa_vers;
3135 	int error;
3136 
3137 	(void) strlcpy(parentname, dataset, sizeof (parentname));
3138 	cp = strrchr(parentname, '/');
3139 	ASSERT(cp != NULL);
3140 	cp[0] = '\0';
3141 
3142 	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3143 		return (error);
3144 
3145 	spa_vers = spa_version(spa);
3146 	spa_close(spa, FTAG);
3147 
3148 	zplver = zfs_zpl_version_map(spa_vers);
3149 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3150 	sa_ok = (zplver >= ZPL_VERSION_SA);
3151 
3152 	/*
3153 	 * Open parent object set so we can inherit zplprop values.
3154 	 */
3155 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3156 		return (error);
3157 
3158 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3159 	    zplprops, is_ci);
3160 	dmu_objset_rele(os, FTAG);
3161 	return (error);
3162 }
3163 
3164 static int
3165 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3166     nvlist_t *zplprops, boolean_t *is_ci)
3167 {
3168 	boolean_t fuids_ok;
3169 	boolean_t sa_ok;
3170 	uint64_t zplver = ZPL_VERSION;
3171 	int error;
3172 
3173 	zplver = zfs_zpl_version_map(spa_vers);
3174 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3175 	sa_ok = (zplver >= ZPL_VERSION_SA);
3176 
3177 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3178 	    createprops, zplprops, is_ci);
3179 	return (error);
3180 }
3181 
3182 /*
3183  * innvl: {
3184  *     "type" -> dmu_objset_type_t (int32)
3185  *     (optional) "props" -> { prop -> value }
3186  * }
3187  *
3188  * outnvl: propname -> error code (int32)
3189  */
3190 static int
3191 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3192 {
3193 	int error = 0;
3194 	zfs_creat_t zct = { 0 };
3195 	nvlist_t *nvprops = NULL;
3196 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3197 	int32_t type32;
3198 	dmu_objset_type_t type;
3199 	boolean_t is_insensitive = B_FALSE;
3200 
3201 	if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3202 		return (SET_ERROR(EINVAL));
3203 	type = type32;
3204 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3205 
3206 	switch (type) {
3207 	case DMU_OST_ZFS:
3208 		cbfunc = zfs_create_cb;
3209 		break;
3210 
3211 	case DMU_OST_ZVOL:
3212 		cbfunc = zvol_create_cb;
3213 		break;
3214 
3215 	default:
3216 		cbfunc = NULL;
3217 		break;
3218 	}
3219 	if (strchr(fsname, '@') ||
3220 	    strchr(fsname, '%'))
3221 		return (SET_ERROR(EINVAL));
3222 
3223 	zct.zct_props = nvprops;
3224 
3225 	if (cbfunc == NULL)
3226 		return (SET_ERROR(EINVAL));
3227 
3228 	if (type == DMU_OST_ZVOL) {
3229 		uint64_t volsize, volblocksize;
3230 
3231 		if (nvprops == NULL)
3232 			return (SET_ERROR(EINVAL));
3233 		if (nvlist_lookup_uint64(nvprops,
3234 		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3235 			return (SET_ERROR(EINVAL));
3236 
3237 		if ((error = nvlist_lookup_uint64(nvprops,
3238 		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3239 		    &volblocksize)) != 0 && error != ENOENT)
3240 			return (SET_ERROR(EINVAL));
3241 
3242 		if (error != 0)
3243 			volblocksize = zfs_prop_default_numeric(
3244 			    ZFS_PROP_VOLBLOCKSIZE);
3245 
3246 		if ((error = zvol_check_volblocksize(
3247 		    volblocksize)) != 0 ||
3248 		    (error = zvol_check_volsize(volsize,
3249 		    volblocksize)) != 0)
3250 			return (error);
3251 	} else if (type == DMU_OST_ZFS) {
3252 		int error;
3253 
3254 		/*
3255 		 * We have to have normalization and
3256 		 * case-folding flags correct when we do the
3257 		 * file system creation, so go figure them out
3258 		 * now.
3259 		 */
3260 		VERIFY(nvlist_alloc(&zct.zct_zplprops,
3261 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
3262 		error = zfs_fill_zplprops(fsname, nvprops,
3263 		    zct.zct_zplprops, &is_insensitive);
3264 		if (error != 0) {
3265 			nvlist_free(zct.zct_zplprops);
3266 			return (error);
3267 		}
3268 	}
3269 
3270 	error = dmu_objset_create(fsname, type,
3271 	    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3272 	nvlist_free(zct.zct_zplprops);
3273 
3274 	/*
3275 	 * It would be nice to do this atomically.
3276 	 */
3277 	if (error == 0) {
3278 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3279 		    nvprops, outnvl);
3280 		if (error != 0)
3281 			(void) dsl_destroy_head(fsname);
3282 	}
3283 	return (error);
3284 }
3285 
3286 /*
3287  * innvl: {
3288  *     "origin" -> name of origin snapshot
3289  *     (optional) "props" -> { prop -> value }
3290  * }
3291  *
3292  * outnvl: propname -> error code (int32)
3293  */
3294 static int
3295 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3296 {
3297 	int error = 0;
3298 	nvlist_t *nvprops = NULL;
3299 	char *origin_name;
3300 
3301 	if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3302 		return (SET_ERROR(EINVAL));
3303 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3304 
3305 	if (strchr(fsname, '@') ||
3306 	    strchr(fsname, '%'))
3307 		return (SET_ERROR(EINVAL));
3308 
3309 	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3310 		return (SET_ERROR(EINVAL));
3311 	error = dmu_objset_clone(fsname, origin_name);
3312 	if (error != 0)
3313 		return (error);
3314 
3315 	/*
3316 	 * It would be nice to do this atomically.
3317 	 */
3318 	if (error == 0) {
3319 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3320 		    nvprops, outnvl);
3321 		if (error != 0)
3322 			(void) dsl_destroy_head(fsname);
3323 	}
3324 	return (error);
3325 }
3326 
3327 /* ARGSUSED */
3328 static int
3329 zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3330 {
3331 	if (strchr(fsname, '@') ||
3332 	    strchr(fsname, '%'))
3333 		return (SET_ERROR(EINVAL));
3334 
3335 	return (dmu_objset_remap_indirects(fsname));
3336 }
3337 
3338 /*
3339  * innvl: {
3340  *     "snaps" -> { snapshot1, snapshot2 }
3341  *     (optional) "props" -> { prop -> value (string) }
3342  * }
3343  *
3344  * outnvl: snapshot -> error code (int32)
3345  */
3346 static int
3347 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3348 {
3349 	nvlist_t *snaps;
3350 	nvlist_t *props = NULL;
3351 	int error, poollen;
3352 	nvpair_t *pair;
3353 
3354 	(void) nvlist_lookup_nvlist(innvl, "props", &props);
3355 	if ((error = zfs_check_userprops(poolname, props)) != 0)
3356 		return (error);
3357 
3358 	if (!nvlist_empty(props) &&
3359 	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3360 		return (SET_ERROR(ENOTSUP));
3361 
3362 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3363 		return (SET_ERROR(EINVAL));
3364 	poollen = strlen(poolname);
3365 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3366 	    pair = nvlist_next_nvpair(snaps, pair)) {
3367 		const char *name = nvpair_name(pair);
3368 		const char *cp = strchr(name, '@');
3369 
3370 		/*
3371 		 * The snap name must contain an @, and the part after it must
3372 		 * contain only valid characters.
3373 		 */
3374 		if (cp == NULL ||
3375 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3376 			return (SET_ERROR(EINVAL));
3377 
3378 		/*
3379 		 * The snap must be in the specified pool.
3380 		 */
3381 		if (strncmp(name, poolname, poollen) != 0 ||
3382 		    (name[poollen] != '/' && name[poollen] != '@'))
3383 			return (SET_ERROR(EXDEV));
3384 
3385 		/* This must be the only snap of this fs. */
3386 		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3387 		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3388 			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3389 			    == 0) {
3390 				return (SET_ERROR(EXDEV));
3391 			}
3392 		}
3393 	}
3394 
3395 	error = dsl_dataset_snapshot(snaps, props, outnvl);
3396 	return (error);
3397 }
3398 
3399 /*
3400  * innvl: "message" -> string
3401  */
3402 /* ARGSUSED */
3403 static int
3404 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3405 {
3406 	char *message;
3407 	spa_t *spa;
3408 	int error;
3409 	char *poolname;
3410 
3411 	/*
3412 	 * The poolname in the ioctl is not set, we get it from the TSD,
3413 	 * which was set at the end of the last successful ioctl that allows
3414 	 * logging.  The secpolicy func already checked that it is set.
3415 	 * Only one log ioctl is allowed after each successful ioctl, so
3416 	 * we clear the TSD here.
3417 	 */
3418 	poolname = tsd_get(zfs_allow_log_key);
3419 	(void) tsd_set(zfs_allow_log_key, NULL);
3420 	error = spa_open(poolname, &spa, FTAG);
3421 	strfree(poolname);
3422 	if (error != 0)
3423 		return (error);
3424 
3425 	if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
3426 		spa_close(spa, FTAG);
3427 		return (SET_ERROR(EINVAL));
3428 	}
3429 
3430 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3431 		spa_close(spa, FTAG);
3432 		return (SET_ERROR(ENOTSUP));
3433 	}
3434 
3435 	error = spa_history_log(spa, message);
3436 	spa_close(spa, FTAG);
3437 	return (error);
3438 }
3439 
3440 /*
3441  * The dp_config_rwlock must not be held when calling this, because the
3442  * unmount may need to write out data.
3443  *
3444  * This function is best-effort.  Callers must deal gracefully if it
3445  * remains mounted (or is remounted after this call).
3446  *
3447  * Returns 0 if the argument is not a snapshot, or it is not currently a
3448  * filesystem, or we were able to unmount it.  Returns error code otherwise.
3449  */
3450 void
3451 zfs_unmount_snap(const char *snapname)
3452 {
3453 	vfs_t *vfsp = NULL;
3454 	zfsvfs_t *zfsvfs = NULL;
3455 
3456 	if (strchr(snapname, '@') == NULL)
3457 		return;
3458 
3459 	int err = getzfsvfs(snapname, &zfsvfs);
3460 	if (err != 0) {
3461 		ASSERT3P(zfsvfs, ==, NULL);
3462 		return;
3463 	}
3464 	vfsp = zfsvfs->z_vfs;
3465 
3466 	ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
3467 
3468 	err = vn_vfswlock(vfsp->vfs_vnodecovered);
3469 	VFS_RELE(vfsp);
3470 	if (err != 0)
3471 		return;
3472 
3473 	/*
3474 	 * Always force the unmount for snapshots.
3475 	 */
3476 	(void) dounmount(vfsp, MS_FORCE, kcred);
3477 }
3478 
3479 /* ARGSUSED */
3480 static int
3481 zfs_unmount_snap_cb(const char *snapname, void *arg)
3482 {
3483 	zfs_unmount_snap(snapname);
3484 	return (0);
3485 }
3486 
3487 /*
3488  * When a clone is destroyed, its origin may also need to be destroyed,
3489  * in which case it must be unmounted.  This routine will do that unmount
3490  * if necessary.
3491  */
3492 void
3493 zfs_destroy_unmount_origin(const char *fsname)
3494 {
3495 	int error;
3496 	objset_t *os;
3497 	dsl_dataset_t *ds;
3498 
3499 	error = dmu_objset_hold(fsname, FTAG, &os);
3500 	if (error != 0)
3501 		return;
3502 	ds = dmu_objset_ds(os);
3503 	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3504 		char originname[ZFS_MAX_DATASET_NAME_LEN];
3505 		dsl_dataset_name(ds->ds_prev, originname);
3506 		dmu_objset_rele(os, FTAG);
3507 		zfs_unmount_snap(originname);
3508 	} else {
3509 		dmu_objset_rele(os, FTAG);
3510 	}
3511 }
3512 
3513 /*
3514  * innvl: {
3515  *     "snaps" -> { snapshot1, snapshot2 }
3516  *     (optional boolean) "defer"
3517  * }
3518  *
3519  * outnvl: snapshot -> error code (int32)
3520  *
3521  */
3522 /* ARGSUSED */
3523 static int
3524 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3525 {
3526 	nvlist_t *snaps;
3527 	nvpair_t *pair;
3528 	boolean_t defer;
3529 
3530 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3531 		return (SET_ERROR(EINVAL));
3532 	defer = nvlist_exists(innvl, "defer");
3533 
3534 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3535 	    pair = nvlist_next_nvpair(snaps, pair)) {
3536 		zfs_unmount_snap(nvpair_name(pair));
3537 	}
3538 
3539 	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3540 }
3541 
3542 /*
3543  * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
3544  * All bookmarks must be in the same pool.
3545  *
3546  * innvl: {
3547  *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
3548  * }
3549  *
3550  * outnvl: bookmark -> error code (int32)
3551  *
3552  */
3553 /* ARGSUSED */
3554 static int
3555 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3556 {
3557 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3558 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3559 		char *snap_name;
3560 
3561 		/*
3562 		 * Verify the snapshot argument.
3563 		 */
3564 		if (nvpair_value_string(pair, &snap_name) != 0)
3565 			return (SET_ERROR(EINVAL));
3566 
3567 
3568 		/* Verify that the keys (bookmarks) are unique */
3569 		for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
3570 		    pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
3571 			if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
3572 				return (SET_ERROR(EINVAL));
3573 		}
3574 	}
3575 
3576 	return (dsl_bookmark_create(innvl, outnvl));
3577 }
3578 
3579 /*
3580  * innvl: {
3581  *     property 1, property 2, ...
3582  * }
3583  *
3584  * outnvl: {
3585  *     bookmark name 1 -> { property 1, property 2, ... },
3586  *     bookmark name 2 -> { property 1, property 2, ... }
3587  * }
3588  *
3589  */
3590 static int
3591 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3592 {
3593 	return (dsl_get_bookmarks(fsname, innvl, outnvl));
3594 }
3595 
3596 /*
3597  * innvl: {
3598  *     bookmark name 1, bookmark name 2
3599  * }
3600  *
3601  * outnvl: bookmark -> error code (int32)
3602  *
3603  */
3604 static int
3605 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3606     nvlist_t *outnvl)
3607 {
3608 	int error, poollen;
3609 
3610 	poollen = strlen(poolname);
3611 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3612 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3613 		const char *name = nvpair_name(pair);
3614 		const char *cp = strchr(name, '#');
3615 
3616 		/*
3617 		 * The bookmark name must contain an #, and the part after it
3618 		 * must contain only valid characters.
3619 		 */
3620 		if (cp == NULL ||
3621 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3622 			return (SET_ERROR(EINVAL));
3623 
3624 		/*
3625 		 * The bookmark must be in the specified pool.
3626 		 */
3627 		if (strncmp(name, poolname, poollen) != 0 ||
3628 		    (name[poollen] != '/' && name[poollen] != '#'))
3629 			return (SET_ERROR(EXDEV));
3630 	}
3631 
3632 	error = dsl_bookmark_destroy(innvl, outnvl);
3633 	return (error);
3634 }
3635 
3636 static int
3637 zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
3638     nvlist_t *outnvl)
3639 {
3640 	char *program;
3641 	uint64_t instrlimit, memlimit;
3642 	boolean_t sync_flag;
3643 	nvpair_t *nvarg = NULL;
3644 
3645 	if (0 != nvlist_lookup_string(innvl, ZCP_ARG_PROGRAM, &program)) {
3646 		return (EINVAL);
3647 	}
3648 	if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
3649 		sync_flag = B_TRUE;
3650 	}
3651 	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
3652 		instrlimit = ZCP_DEFAULT_INSTRLIMIT;
3653 	}
3654 	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
3655 		memlimit = ZCP_DEFAULT_MEMLIMIT;
3656 	}
3657 	if (0 != nvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST, &nvarg)) {
3658 		return (EINVAL);
3659 	}
3660 
3661 	if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
3662 		return (EINVAL);
3663 	if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
3664 		return (EINVAL);
3665 
3666 	return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
3667 	    nvarg, outnvl));
3668 }
3669 
3670 /*
3671  * innvl: unused
3672  * outnvl: empty
3673  */
3674 /* ARGSUSED */
3675 static int
3676 zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3677 {
3678 	return (spa_checkpoint(poolname));
3679 }
3680 
3681 /*
3682  * innvl: unused
3683  * outnvl: empty
3684  */
3685 /* ARGSUSED */
3686 static int
3687 zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
3688     nvlist_t *outnvl)
3689 {
3690 	return (spa_checkpoint_discard(poolname));
3691 }
3692 
3693 /*
3694  * inputs:
3695  * zc_name		name of dataset to destroy
3696  * zc_defer_destroy	mark for deferred destroy
3697  *
3698  * outputs:		none
3699  */
3700 static int
3701 zfs_ioc_destroy(zfs_cmd_t *zc)
3702 {
3703 	objset_t *os;
3704 	dmu_objset_type_t ost;
3705 	int err;
3706 
3707 	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
3708 	if (err != 0)
3709 		return (err);
3710 	ost = dmu_objset_type(os);
3711 	dmu_objset_rele(os, FTAG);
3712 
3713 	if (ost == DMU_OST_ZFS)
3714 		zfs_unmount_snap(zc->zc_name);
3715 
3716 	if (strchr(zc->zc_name, '@'))
3717 		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3718 	else
3719 		err = dsl_destroy_head(zc->zc_name);
3720 	if (ost == DMU_OST_ZVOL && err == 0)
3721 		(void) zvol_remove_minor(zc->zc_name);
3722 	return (err);
3723 }
3724 
3725 /*
3726  * innvl: {
3727  *     vdevs: {
3728  *         guid 1, guid 2, ...
3729  *     },
3730  *     func: POOL_INITIALIZE_{CANCEL|DO|SUSPEND}
3731  * }
3732  *
3733  * outnvl: {
3734  *     [func: EINVAL (if provided command type didn't make sense)],
3735  *     [vdevs: {
3736  *         guid1: errno, (see function body for possible errnos)
3737  *         ...
3738  *     }]
3739  * }
3740  *
3741  */
3742 static int
3743 zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3744 {
3745 	spa_t *spa;
3746 	int error;
3747 
3748 	error = spa_open(poolname, &spa, FTAG);
3749 	if (error != 0)
3750 		return (error);
3751 
3752 	uint64_t cmd_type;
3753 	if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
3754 	    &cmd_type) != 0) {
3755 		spa_close(spa, FTAG);
3756 		return (SET_ERROR(EINVAL));
3757 	}
3758 	if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
3759 	    cmd_type == POOL_INITIALIZE_DO ||
3760 	    cmd_type == POOL_INITIALIZE_SUSPEND)) {
3761 		spa_close(spa, FTAG);
3762 		return (SET_ERROR(EINVAL));
3763 	}
3764 
3765 	nvlist_t *vdev_guids;
3766 	if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
3767 	    &vdev_guids) != 0) {
3768 		spa_close(spa, FTAG);
3769 		return (SET_ERROR(EINVAL));
3770 	}
3771 
3772 	nvlist_t *vdev_errlist = fnvlist_alloc();
3773 	int total_errors = 0;
3774 
3775 	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
3776 	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
3777 		uint64_t vdev_guid = fnvpair_value_uint64(pair);
3778 
3779 		error = spa_vdev_initialize(spa, vdev_guid, cmd_type);
3780 		if (error != 0) {
3781 			char guid_as_str[MAXNAMELEN];
3782 
3783 			(void) snprintf(guid_as_str, sizeof (guid_as_str),
3784 			    "%llu", (unsigned long long)vdev_guid);
3785 			fnvlist_add_int64(vdev_errlist, guid_as_str, error);
3786 			total_errors++;
3787 		}
3788 	}
3789 	if (fnvlist_size(vdev_errlist) > 0) {
3790 		fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
3791 		    vdev_errlist);
3792 	}
3793 	fnvlist_free(vdev_errlist);
3794 
3795 	spa_close(spa, FTAG);
3796 	return (total_errors > 0 ? EINVAL : 0);
3797 }
3798 
3799 /*
3800  * fsname is name of dataset to rollback (to most recent snapshot)
3801  *
3802  * innvl may contain name of expected target snapshot
3803  *
3804  * outnvl: "target" -> name of most recent snapshot
3805  * }
3806  */
3807 /* ARGSUSED */
3808 static int
3809 zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3810 {
3811 	zfsvfs_t *zfsvfs;
3812 	char *target = NULL;
3813 	int error;
3814 
3815 	(void) nvlist_lookup_string(innvl, "target", &target);
3816 	if (target != NULL) {
3817 		const char *cp = strchr(target, '@');
3818 
3819 		/*
3820 		 * The snap name must contain an @, and the part after it must
3821 		 * contain only valid characters.
3822 		 */
3823 		if (cp == NULL ||
3824 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3825 			return (SET_ERROR(EINVAL));
3826 	}
3827 
3828 	if (getzfsvfs(fsname, &zfsvfs) == 0) {
3829 		dsl_dataset_t *ds;
3830 
3831 		ds = dmu_objset_ds(zfsvfs->z_os);
3832 		error = zfs_suspend_fs(zfsvfs);
3833 		if (error == 0) {
3834 			int resume_err;
3835 
3836 			error = dsl_dataset_rollback(fsname, target, zfsvfs,
3837 			    outnvl);
3838 			resume_err = zfs_resume_fs(zfsvfs, ds);
3839 			error = error ? error : resume_err;
3840 		}
3841 		VFS_RELE(zfsvfs->z_vfs);
3842 	} else {
3843 		error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
3844 	}
3845 	return (error);
3846 }
3847 
3848 static int
3849 recursive_unmount(const char *fsname, void *arg)
3850 {
3851 	const char *snapname = arg;
3852 	char fullname[ZFS_MAX_DATASET_NAME_LEN];
3853 
3854 	(void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
3855 	zfs_unmount_snap(fullname);
3856 
3857 	return (0);
3858 }
3859 
3860 /*
3861  * inputs:
3862  * zc_name	old name of dataset
3863  * zc_value	new name of dataset
3864  * zc_cookie	recursive flag (only valid for snapshots)
3865  *
3866  * outputs:	none
3867  */
3868 static int
3869 zfs_ioc_rename(zfs_cmd_t *zc)
3870 {
3871 	objset_t *os;
3872 	dmu_objset_type_t ost;
3873 	boolean_t recursive = zc->zc_cookie & 1;
3874 	char *at;
3875 	int err;
3876 
3877 	/* "zfs rename" from and to ...%recv datasets should both fail */
3878 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
3879 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3880 	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
3881 	    dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3882 	    strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
3883 		return (SET_ERROR(EINVAL));
3884 
3885 	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
3886 	if (err != 0)
3887 		return (err);
3888 	ost = dmu_objset_type(os);
3889 	dmu_objset_rele(os, FTAG);
3890 
3891 	at = strchr(zc->zc_name, '@');
3892 	if (at != NULL) {
3893 		/* snaps must be in same fs */
3894 		int error;
3895 
3896 		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
3897 			return (SET_ERROR(EXDEV));
3898 		*at = '\0';
3899 		if (ost == DMU_OST_ZFS) {
3900 			error = dmu_objset_find(zc->zc_name,
3901 			    recursive_unmount, at + 1,
3902 			    recursive ? DS_FIND_CHILDREN : 0);
3903 			if (error != 0) {
3904 				*at = '@';
3905 				return (error);
3906 			}
3907 		}
3908 		error = dsl_dataset_rename_snapshot(zc->zc_name,
3909 		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
3910 		*at = '@';
3911 
3912 		return (error);
3913 	} else {
3914 		if (ost == DMU_OST_ZVOL)
3915 			(void) zvol_remove_minor(zc->zc_name);
3916 		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
3917 	}
3918 }
3919 
3920 static int
3921 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3922 {
3923 	const char *propname = nvpair_name(pair);
3924 	boolean_t issnap = (strchr(dsname, '@') != NULL);
3925 	zfs_prop_t prop = zfs_name_to_prop(propname);
3926 	uint64_t intval;
3927 	int err;
3928 
3929 	if (prop == ZPROP_INVAL) {
3930 		if (zfs_prop_user(propname)) {
3931 			if (err = zfs_secpolicy_write_perms(dsname,
3932 			    ZFS_DELEG_PERM_USERPROP, cr))
3933 				return (err);
3934 			return (0);
3935 		}
3936 
3937 		if (!issnap && zfs_prop_userquota(propname)) {
3938 			const char *perm = NULL;
3939 			const char *uq_prefix =
3940 			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3941 			const char *gq_prefix =
3942 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3943 
3944 			if (strncmp(propname, uq_prefix,
3945 			    strlen(uq_prefix)) == 0) {
3946 				perm = ZFS_DELEG_PERM_USERQUOTA;
3947 			} else if (strncmp(propname, gq_prefix,
3948 			    strlen(gq_prefix)) == 0) {
3949 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
3950 			} else {
3951 				/* USERUSED and GROUPUSED are read-only */
3952 				return (SET_ERROR(EINVAL));
3953 			}
3954 
3955 			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3956 				return (err);
3957 			return (0);
3958 		}
3959 
3960 		return (SET_ERROR(EINVAL));
3961 	}
3962 
3963 	if (issnap)
3964 		return (SET_ERROR(EINVAL));
3965 
3966 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3967 		/*
3968 		 * dsl_prop_get_all_impl() returns properties in this
3969 		 * format.
3970 		 */
3971 		nvlist_t *attrs;
3972 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3973 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3974 		    &pair) == 0);
3975 	}
3976 
3977 	/*
3978 	 * Check that this value is valid for this pool version
3979 	 */
3980 	switch (prop) {
3981 	case ZFS_PROP_COMPRESSION:
3982 		/*
3983 		 * If the user specified gzip compression, make sure
3984 		 * the SPA supports it. We ignore any errors here since
3985 		 * we'll catch them later.
3986 		 */
3987 		if (nvpair_value_uint64(pair, &intval) == 0) {
3988 			if (intval >= ZIO_COMPRESS_GZIP_1 &&
3989 			    intval <= ZIO_COMPRESS_GZIP_9 &&
3990 			    zfs_earlier_version(dsname,
3991 			    SPA_VERSION_GZIP_COMPRESSION)) {
3992 				return (SET_ERROR(ENOTSUP));
3993 			}
3994 
3995 			if (intval == ZIO_COMPRESS_ZLE &&
3996 			    zfs_earlier_version(dsname,
3997 			    SPA_VERSION_ZLE_COMPRESSION))
3998 				return (SET_ERROR(ENOTSUP));
3999 
4000 			if (intval == ZIO_COMPRESS_LZ4) {
4001 				spa_t *spa;
4002 
4003 				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4004 					return (err);
4005 
4006 				if (!spa_feature_is_enabled(spa,
4007 				    SPA_FEATURE_LZ4_COMPRESS)) {
4008 					spa_close(spa, FTAG);
4009 					return (SET_ERROR(ENOTSUP));
4010 				}
4011 				spa_close(spa, FTAG);
4012 			}
4013 
4014 			/*
4015 			 * If this is a bootable dataset then
4016 			 * verify that the compression algorithm
4017 			 * is supported for booting. We must return
4018 			 * something other than ENOTSUP since it
4019 			 * implies a downrev pool version.
4020 			 */
4021 			if (zfs_is_bootfs(dsname) &&
4022 			    !BOOTFS_COMPRESS_VALID(intval)) {
4023 				return (SET_ERROR(ERANGE));
4024 			}
4025 		}
4026 		break;
4027 
4028 	case ZFS_PROP_COPIES:
4029 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
4030 			return (SET_ERROR(ENOTSUP));
4031 		break;
4032 
4033 	case ZFS_PROP_RECORDSIZE:
4034 		/* Record sizes above 128k need the feature to be enabled */
4035 		if (nvpair_value_uint64(pair, &intval) == 0 &&
4036 		    intval > SPA_OLD_MAXBLOCKSIZE) {
4037 			spa_t *spa;
4038 
4039 			/*
4040 			 * We don't allow setting the property above 1MB,
4041 			 * unless the tunable has been changed.
4042 			 */
4043 			if (intval > zfs_max_recordsize ||
4044 			    intval > SPA_MAXBLOCKSIZE)
4045 				return (SET_ERROR(ERANGE));
4046 
4047 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4048 				return (err);
4049 
4050 			if (!spa_feature_is_enabled(spa,
4051 			    SPA_FEATURE_LARGE_BLOCKS)) {
4052 				spa_close(spa, FTAG);
4053 				return (SET_ERROR(ENOTSUP));
4054 			}
4055 			spa_close(spa, FTAG);
4056 		}
4057 		break;
4058 
4059 	case ZFS_PROP_DNODESIZE:
4060 		/* Dnode sizes above 512 need the feature to be enabled */
4061 		if (nvpair_value_uint64(pair, &intval) == 0 &&
4062 		    intval != ZFS_DNSIZE_LEGACY) {
4063 			spa_t *spa;
4064 
4065 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4066 				return (err);
4067 
4068 			if (!spa_feature_is_enabled(spa,
4069 			    SPA_FEATURE_LARGE_DNODE)) {
4070 				spa_close(spa, FTAG);
4071 				return (SET_ERROR(ENOTSUP));
4072 			}
4073 			spa_close(spa, FTAG);
4074 		}
4075 		break;
4076 
4077 	case ZFS_PROP_SHARESMB:
4078 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
4079 			return (SET_ERROR(ENOTSUP));
4080 		break;
4081 
4082 	case ZFS_PROP_ACLINHERIT:
4083 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
4084 		    nvpair_value_uint64(pair, &intval) == 0) {
4085 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
4086 			    zfs_earlier_version(dsname,
4087 			    SPA_VERSION_PASSTHROUGH_X))
4088 				return (SET_ERROR(ENOTSUP));
4089 		}
4090 		break;
4091 
4092 	case ZFS_PROP_CHECKSUM:
4093 	case ZFS_PROP_DEDUP:
4094 	{
4095 		spa_feature_t feature;
4096 		spa_t *spa;
4097 
4098 		/* dedup feature version checks */
4099 		if (prop == ZFS_PROP_DEDUP &&
4100 		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
4101 			return (SET_ERROR(ENOTSUP));
4102 
4103 		if (nvpair_value_uint64(pair, &intval) != 0)
4104 			return (SET_ERROR(EINVAL));
4105 
4106 		/* check prop value is enabled in features */
4107 		feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
4108 		if (feature == SPA_FEATURE_NONE)
4109 			break;
4110 
4111 		if ((err = spa_open(dsname, &spa, FTAG)) != 0)
4112 			return (err);
4113 
4114 		if (!spa_feature_is_enabled(spa, feature)) {
4115 			spa_close(spa, FTAG);
4116 			return (SET_ERROR(ENOTSUP));
4117 		}
4118 		spa_close(spa, FTAG);
4119 		break;
4120 	}
4121 	}
4122 
4123 	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
4124 }
4125 
4126 /*
4127  * Checks for a race condition to make sure we don't increment a feature flag
4128  * multiple times.
4129  */
4130 static int
4131 zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
4132 {
4133 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4134 	spa_feature_t *featurep = arg;
4135 
4136 	if (!spa_feature_is_active(spa, *featurep))
4137 		return (0);
4138 	else
4139 		return (SET_ERROR(EBUSY));
4140 }
4141 
4142 /*
4143  * The callback invoked on feature activation in the sync task caused by
4144  * zfs_prop_activate_feature.
4145  */
4146 static void
4147 zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
4148 {
4149 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
4150 	spa_feature_t *featurep = arg;
4151 
4152 	spa_feature_incr(spa, *featurep, tx);
4153 }
4154 
4155 /*
4156  * Activates a feature on a pool in response to a property setting. This
4157  * creates a new sync task which modifies the pool to reflect the feature
4158  * as being active.
4159  */
4160 static int
4161 zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature)
4162 {
4163 	int err;
4164 
4165 	/* EBUSY here indicates that the feature is already active */
4166 	err = dsl_sync_task(spa_name(spa),
4167 	    zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
4168 	    &feature, 2, ZFS_SPACE_CHECK_RESERVED);
4169 
4170 	if (err != 0 && err != EBUSY)
4171 		return (err);
4172 	else
4173 		return (0);
4174 }
4175 
4176 /*
4177  * Removes properties from the given props list that fail permission checks
4178  * needed to clear them and to restore them in case of a receive error. For each
4179  * property, make sure we have both set and inherit permissions.
4180  *
4181  * Returns the first error encountered if any permission checks fail. If the
4182  * caller provides a non-NULL errlist, it also gives the complete list of names
4183  * of all the properties that failed a permission check along with the
4184  * corresponding error numbers. The caller is responsible for freeing the
4185  * returned errlist.
4186  *
4187  * If every property checks out successfully, zero is returned and the list
4188  * pointed at by errlist is NULL.
4189  */
4190 static int
4191 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
4192 {
4193 	zfs_cmd_t *zc;
4194 	nvpair_t *pair, *next_pair;
4195 	nvlist_t *errors;
4196 	int err, rv = 0;
4197 
4198 	if (props == NULL)
4199 		return (0);
4200 
4201 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4202 
4203 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4204 	(void) strcpy(zc->zc_name, dataset);
4205 	pair = nvlist_next_nvpair(props, NULL);
4206 	while (pair != NULL) {
4207 		next_pair = nvlist_next_nvpair(props, pair);
4208 
4209 		(void) strcpy(zc->zc_value, nvpair_name(pair));
4210 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4211 		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4212 			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4213 			VERIFY(nvlist_add_int32(errors,
4214 			    zc->zc_value, err) == 0);
4215 		}
4216 		pair = next_pair;
4217 	}
4218 	kmem_free(zc, sizeof (zfs_cmd_t));
4219 
4220 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4221 		nvlist_free(errors);
4222 		errors = NULL;
4223 	} else {
4224 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
4225 	}
4226 
4227 	if (errlist == NULL)
4228 		nvlist_free(errors);
4229 	else
4230 		*errlist = errors;
4231 
4232 	return (rv);
4233 }
4234 
4235 static boolean_t
4236 propval_equals(nvpair_t *p1, nvpair_t *p2)
4237 {
4238 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4239 		/* dsl_prop_get_all_impl() format */
4240 		nvlist_t *attrs;
4241 		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4242 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4243 		    &p1) == 0);
4244 	}
4245 
4246 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4247 		nvlist_t *attrs;
4248 		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4249 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4250 		    &p2) == 0);
4251 	}
4252 
4253 	if (nvpair_type(p1) != nvpair_type(p2))
4254 		return (B_FALSE);
4255 
4256 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
4257 		char *valstr1, *valstr2;
4258 
4259 		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4260 		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4261 		return (strcmp(valstr1, valstr2) == 0);
4262 	} else {
4263 		uint64_t intval1, intval2;
4264 
4265 		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4266 		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4267 		return (intval1 == intval2);
4268 	}
4269 }
4270 
4271 /*
4272  * Remove properties from props if they are not going to change (as determined
4273  * by comparison with origprops). Remove them from origprops as well, since we
4274  * do not need to clear or restore properties that won't change.
4275  */
4276 static void
4277 props_reduce(nvlist_t *props, nvlist_t *origprops)
4278 {
4279 	nvpair_t *pair, *next_pair;
4280 
4281 	if (origprops == NULL)
4282 		return; /* all props need to be received */
4283 
4284 	pair = nvlist_next_nvpair(props, NULL);
4285 	while (pair != NULL) {
4286 		const char *propname = nvpair_name(pair);
4287 		nvpair_t *match;
4288 
4289 		next_pair = nvlist_next_nvpair(props, pair);
4290 
4291 		if ((nvlist_lookup_nvpair(origprops, propname,
4292 		    &match) != 0) || !propval_equals(pair, match))
4293 			goto next; /* need to set received value */
4294 
4295 		/* don't clear the existing received value */
4296 		(void) nvlist_remove_nvpair(origprops, match);
4297 		/* don't bother receiving the property */
4298 		(void) nvlist_remove_nvpair(props, pair);
4299 next:
4300 		pair = next_pair;
4301 	}
4302 }
4303 
4304 /*
4305  * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4306  * For example, refquota cannot be set until after the receipt of a dataset,
4307  * because in replication streams, an older/earlier snapshot may exceed the
4308  * refquota.  We want to receive the older/earlier snapshot, but setting
4309  * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4310  * the older/earlier snapshot from being received (with EDQUOT).
4311  *
4312  * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4313  *
4314  * libzfs will need to be judicious handling errors encountered by props
4315  * extracted by this function.
4316  */
4317 static nvlist_t *
4318 extract_delay_props(nvlist_t *props)
4319 {
4320 	nvlist_t *delayprops;
4321 	nvpair_t *nvp, *tmp;
4322 	static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
4323 	int i;
4324 
4325 	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4326 
4327 	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4328 	    nvp = nvlist_next_nvpair(props, nvp)) {
4329 		/*
4330 		 * strcmp() is safe because zfs_prop_to_name() always returns
4331 		 * a bounded string.
4332 		 */
4333 		for (i = 0; delayable[i] != 0; i++) {
4334 			if (strcmp(zfs_prop_to_name(delayable[i]),
4335 			    nvpair_name(nvp)) == 0) {
4336 				break;
4337 			}
4338 		}
4339 		if (delayable[i] != 0) {
4340 			tmp = nvlist_prev_nvpair(props, nvp);
4341 			VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4342 			VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4343 			nvp = tmp;
4344 		}
4345 	}
4346 
4347 	if (nvlist_empty(delayprops)) {
4348 		nvlist_free(delayprops);
4349 		delayprops = NULL;
4350 	}
4351 	return (delayprops);
4352 }
4353 
4354 #ifdef	DEBUG
4355 static boolean_t zfs_ioc_recv_inject_err;
4356 #endif
4357 
4358 /*
4359  * inputs:
4360  * zc_name		name of containing filesystem
4361  * zc_nvlist_src{_size}	nvlist of properties to apply
4362  * zc_value		name of snapshot to create
4363  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
4364  * zc_cookie		file descriptor to recv from
4365  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
4366  * zc_guid		force flag
4367  * zc_cleanup_fd	cleanup-on-exit file descriptor
4368  * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
4369  * zc_resumable		if data is incomplete assume sender will resume
4370  *
4371  * outputs:
4372  * zc_cookie		number of bytes read
4373  * zc_nvlist_dst{_size} error for each unapplied received property
4374  * zc_obj		zprop_errflags_t
4375  * zc_action_handle	handle for this guid/ds mapping
4376  */
4377 static int
4378 zfs_ioc_recv(zfs_cmd_t *zc)
4379 {
4380 	file_t *fp;
4381 	dmu_recv_cookie_t drc;
4382 	boolean_t force = (boolean_t)zc->zc_guid;
4383 	int fd;
4384 	int error = 0;
4385 	int props_error = 0;
4386 	nvlist_t *errors;
4387 	offset_t off;
4388 	nvlist_t *props = NULL; /* sent properties */
4389 	nvlist_t *origprops = NULL; /* existing properties */
4390 	nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
4391 	char *origin = NULL;
4392 	char *tosnap;
4393 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
4394 	boolean_t first_recvd_props = B_FALSE;
4395 
4396 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4397 	    strchr(zc->zc_value, '@') == NULL ||
4398 	    strchr(zc->zc_value, '%'))
4399 		return (SET_ERROR(EINVAL));
4400 
4401 	(void) strcpy(tofs, zc->zc_value);
4402 	tosnap = strchr(tofs, '@');
4403 	*tosnap++ = '\0';
4404 
4405 	if (zc->zc_nvlist_src != NULL &&
4406 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
4407 	    zc->zc_iflags, &props)) != 0)
4408 		return (error);
4409 
4410 	fd = zc->zc_cookie;
4411 	fp = getf(fd);
4412 	if (fp == NULL) {
4413 		nvlist_free(props);
4414 		return (SET_ERROR(EBADF));
4415 	}
4416 
4417 	errors = fnvlist_alloc();
4418 
4419 	if (zc->zc_string[0])
4420 		origin = zc->zc_string;
4421 
4422 	error = dmu_recv_begin(tofs, tosnap,
4423 	    &zc->zc_begin_record, force, zc->zc_resumable, origin, &drc);
4424 	if (error != 0)
4425 		goto out;
4426 
4427 	/*
4428 	 * Set properties before we receive the stream so that they are applied
4429 	 * to the new data. Note that we must call dmu_recv_stream() if
4430 	 * dmu_recv_begin() succeeds.
4431 	 */
4432 	if (props != NULL && !drc.drc_newfs) {
4433 		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4434 		    SPA_VERSION_RECVD_PROPS &&
4435 		    !dsl_prop_get_hasrecvd(tofs))
4436 			first_recvd_props = B_TRUE;
4437 
4438 		/*
4439 		 * If new received properties are supplied, they are to
4440 		 * completely replace the existing received properties, so stash
4441 		 * away the existing ones.
4442 		 */
4443 		if (dsl_prop_get_received(tofs, &origprops) == 0) {
4444 			nvlist_t *errlist = NULL;
4445 			/*
4446 			 * Don't bother writing a property if its value won't
4447 			 * change (and avoid the unnecessary security checks).
4448 			 *
4449 			 * The first receive after SPA_VERSION_RECVD_PROPS is a
4450 			 * special case where we blow away all local properties
4451 			 * regardless.
4452 			 */
4453 			if (!first_recvd_props)
4454 				props_reduce(props, origprops);
4455 			if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
4456 				(void) nvlist_merge(errors, errlist, 0);
4457 			nvlist_free(errlist);
4458 
4459 			if (clear_received_props(tofs, origprops,
4460 			    first_recvd_props ? NULL : props) != 0)
4461 				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4462 		} else {
4463 			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4464 		}
4465 	}
4466 
4467 	if (props != NULL) {
4468 		props_error = dsl_prop_set_hasrecvd(tofs);
4469 
4470 		if (props_error == 0) {
4471 			delayprops = extract_delay_props(props);
4472 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4473 			    props, errors);
4474 		}
4475 	}
4476 
4477 	off = fp->f_offset;
4478 	error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
4479 	    &zc->zc_action_handle);
4480 
4481 	if (error == 0) {
4482 		zfsvfs_t *zfsvfs = NULL;
4483 
4484 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
4485 			/* online recv */
4486 			dsl_dataset_t *ds;
4487 			int end_err;
4488 
4489 			ds = dmu_objset_ds(zfsvfs->z_os);
4490 			error = zfs_suspend_fs(zfsvfs);
4491 			/*
4492 			 * If the suspend fails, then the recv_end will
4493 			 * likely also fail, and clean up after itself.
4494 			 */
4495 			end_err = dmu_recv_end(&drc, zfsvfs);
4496 			if (error == 0)
4497 				error = zfs_resume_fs(zfsvfs, ds);
4498 			error = error ? error : end_err;
4499 			VFS_RELE(zfsvfs->z_vfs);
4500 		} else {
4501 			error = dmu_recv_end(&drc, NULL);
4502 		}
4503 
4504 		/* Set delayed properties now, after we're done receiving. */
4505 		if (delayprops != NULL && error == 0) {
4506 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4507 			    delayprops, errors);
4508 		}
4509 	}
4510 
4511 	if (delayprops != NULL) {
4512 		/*
4513 		 * Merge delayed props back in with initial props, in case
4514 		 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
4515 		 * we have to make sure clear_received_props() includes
4516 		 * the delayed properties).
4517 		 *
4518 		 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
4519 		 * using ASSERT() will be just like a VERIFY.
4520 		 */
4521 		ASSERT(nvlist_merge(props, delayprops, 0) == 0);
4522 		nvlist_free(delayprops);
4523 	}
4524 
4525 	/*
4526 	 * Now that all props, initial and delayed, are set, report the prop
4527 	 * errors to the caller.
4528 	 */
4529 	if (zc->zc_nvlist_dst_size != 0 &&
4530 	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
4531 	    put_nvlist(zc, errors) != 0)) {
4532 		/*
4533 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
4534 		 * size or supplied an invalid address.
4535 		 */
4536 		props_error = SET_ERROR(EINVAL);
4537 	}
4538 
4539 	zc->zc_cookie = off - fp->f_offset;
4540 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4541 		fp->f_offset = off;
4542 
4543 #ifdef	DEBUG
4544 	if (zfs_ioc_recv_inject_err) {
4545 		zfs_ioc_recv_inject_err = B_FALSE;
4546 		error = 1;
4547 	}
4548 #endif
4549 	/*
4550 	 * On error, restore the original props.
4551 	 */
4552 	if (error != 0 && props != NULL && !drc.drc_newfs) {
4553 		if (clear_received_props(tofs, props, NULL) != 0) {
4554 			/*
4555 			 * We failed to clear the received properties.
4556 			 * Since we may have left a $recvd value on the
4557 			 * system, we can't clear the $hasrecvd flag.
4558 			 */
4559 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4560 		} else if (first_recvd_props) {
4561 			dsl_prop_unset_hasrecvd(tofs);
4562 		}
4563 
4564 		if (origprops == NULL && !drc.drc_newfs) {
4565 			/* We failed to stash the original properties. */
4566 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4567 		}
4568 
4569 		/*
4570 		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
4571 		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4572 		 * explictly if we're restoring local properties cleared in the
4573 		 * first new-style receive.
4574 		 */
4575 		if (origprops != NULL &&
4576 		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4577 		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4578 		    origprops, NULL) != 0) {
4579 			/*
4580 			 * We stashed the original properties but failed to
4581 			 * restore them.
4582 			 */
4583 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4584 		}
4585 	}
4586 out:
4587 	nvlist_free(props);
4588 	nvlist_free(origprops);
4589 	nvlist_free(errors);
4590 	releasef(fd);
4591 
4592 	if (error == 0)
4593 		error = props_error;
4594 
4595 	return (error);
4596 }
4597 
4598 /*
4599  * inputs:
4600  * zc_name	name of snapshot to send
4601  * zc_cookie	file descriptor to send stream to
4602  * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
4603  * zc_sendobj	objsetid of snapshot to send
4604  * zc_fromobj	objsetid of incremental fromsnap (may be zero)
4605  * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
4606  *		output size in zc_objset_type.
4607  * zc_flags	lzc_send_flags
4608  *
4609  * outputs:
4610  * zc_objset_type	estimated size, if zc_guid is set
4611  */
4612 static int
4613 zfs_ioc_send(zfs_cmd_t *zc)
4614 {
4615 	int error;
4616 	offset_t off;
4617 	boolean_t estimate = (zc->zc_guid != 0);
4618 	boolean_t embedok = (zc->zc_flags & 0x1);
4619 	boolean_t large_block_ok = (zc->zc_flags & 0x2);
4620 	boolean_t compressok = (zc->zc_flags & 0x4);
4621 
4622 	if (zc->zc_obj != 0) {
4623 		dsl_pool_t *dp;
4624 		dsl_dataset_t *tosnap;
4625 
4626 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4627 		if (error != 0)
4628 			return (error);
4629 
4630 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4631 		if (error != 0) {
4632 			dsl_pool_rele(dp, FTAG);
4633 			return (error);
4634 		}
4635 
4636 		if (dsl_dir_is_clone(tosnap->ds_dir))
4637 			zc->zc_fromobj =
4638 			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
4639 		dsl_dataset_rele(tosnap, FTAG);
4640 		dsl_pool_rele(dp, FTAG);
4641 	}
4642 
4643 	if (estimate) {
4644 		dsl_pool_t *dp;
4645 		dsl_dataset_t *tosnap;
4646 		dsl_dataset_t *fromsnap = NULL;
4647 
4648 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4649 		if (error != 0)
4650 			return (error);
4651 
4652 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4653 		if (error != 0) {
4654 			dsl_pool_rele(dp, FTAG);
4655 			return (error);
4656 		}
4657 
4658 		if (zc->zc_fromobj != 0) {
4659 			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
4660 			    FTAG, &fromsnap);
4661 			if (error != 0) {
4662 				dsl_dataset_rele(tosnap, FTAG);
4663 				dsl_pool_rele(dp, FTAG);
4664 				return (error);
4665 			}
4666 		}
4667 
4668 		error = dmu_send_estimate(tosnap, fromsnap, compressok,
4669 		    &zc->zc_objset_type);
4670 
4671 		if (fromsnap != NULL)
4672 			dsl_dataset_rele(fromsnap, FTAG);
4673 		dsl_dataset_rele(tosnap, FTAG);
4674 		dsl_pool_rele(dp, FTAG);
4675 	} else {
4676 		file_t *fp = getf(zc->zc_cookie);
4677 		if (fp == NULL)
4678 			return (SET_ERROR(EBADF));
4679 
4680 		off = fp->f_offset;
4681 		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
4682 		    zc->zc_fromobj, embedok, large_block_ok, compressok,
4683 		    zc->zc_cookie, fp->f_vnode, &off);
4684 
4685 		if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4686 			fp->f_offset = off;
4687 		releasef(zc->zc_cookie);
4688 	}
4689 	return (error);
4690 }
4691 
4692 /*
4693  * inputs:
4694  * zc_name	name of snapshot on which to report progress
4695  * zc_cookie	file descriptor of send stream
4696  *
4697  * outputs:
4698  * zc_cookie	number of bytes written in send stream thus far
4699  */
4700 static int
4701 zfs_ioc_send_progress(zfs_cmd_t *zc)
4702 {
4703 	dsl_pool_t *dp;
4704 	dsl_dataset_t *ds;
4705 	dmu_sendarg_t *dsp = NULL;
4706 	int error;
4707 
4708 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4709 	if (error != 0)
4710 		return (error);
4711 
4712 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
4713 	if (error != 0) {
4714 		dsl_pool_rele(dp, FTAG);
4715 		return (error);
4716 	}
4717 
4718 	mutex_enter(&ds->ds_sendstream_lock);
4719 
4720 	/*
4721 	 * Iterate over all the send streams currently active on this dataset.
4722 	 * If there's one which matches the specified file descriptor _and_ the
4723 	 * stream was started by the current process, return the progress of
4724 	 * that stream.
4725 	 */
4726 	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
4727 	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
4728 		if (dsp->dsa_outfd == zc->zc_cookie &&
4729 		    dsp->dsa_proc == curproc)
4730 			break;
4731 	}
4732 
4733 	if (dsp != NULL)
4734 		zc->zc_cookie = *(dsp->dsa_off);
4735 	else
4736 		error = SET_ERROR(ENOENT);
4737 
4738 	mutex_exit(&ds->ds_sendstream_lock);
4739 	dsl_dataset_rele(ds, FTAG);
4740 	dsl_pool_rele(dp, FTAG);
4741 	return (error);
4742 }
4743 
4744 static int
4745 zfs_ioc_inject_fault(zfs_cmd_t *zc)
4746 {
4747 	int id, error;
4748 
4749 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
4750 	    &zc->zc_inject_record);
4751 
4752 	if (error == 0)
4753 		zc->zc_guid = (uint64_t)id;
4754 
4755 	return (error);
4756 }
4757 
4758 static int
4759 zfs_ioc_clear_fault(zfs_cmd_t *zc)
4760 {
4761 	return (zio_clear_fault((int)zc->zc_guid));
4762 }
4763 
4764 static int
4765 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
4766 {
4767 	int id = (int)zc->zc_guid;
4768 	int error;
4769 
4770 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
4771 	    &zc->zc_inject_record);
4772 
4773 	zc->zc_guid = id;
4774 
4775 	return (error);
4776 }
4777 
4778 static int
4779 zfs_ioc_error_log(zfs_cmd_t *zc)
4780 {
4781 	spa_t *spa;
4782 	int error;
4783 	size_t count = (size_t)zc->zc_nvlist_dst_size;
4784 
4785 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4786 		return (error);
4787 
4788 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
4789 	    &count);
4790 	if (error == 0)
4791 		zc->zc_nvlist_dst_size = count;
4792 	else
4793 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
4794 
4795 	spa_close(spa, FTAG);
4796 
4797 	return (error);
4798 }
4799 
4800 static int
4801 zfs_ioc_clear(zfs_cmd_t *zc)
4802 {
4803 	spa_t *spa;
4804 	vdev_t *vd;
4805 	int error;
4806 
4807 	/*
4808 	 * On zpool clear we also fix up missing slogs
4809 	 */
4810 	mutex_enter(&spa_namespace_lock);
4811 	spa = spa_lookup(zc->zc_name);
4812 	if (spa == NULL) {
4813 		mutex_exit(&spa_namespace_lock);
4814 		return (SET_ERROR(EIO));
4815 	}
4816 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4817 		/* we need to let spa_open/spa_load clear the chains */
4818 		spa_set_log_state(spa, SPA_LOG_CLEAR);
4819 	}
4820 	spa->spa_last_open_failed = 0;
4821 	mutex_exit(&spa_namespace_lock);
4822 
4823 	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4824 		error = spa_open(zc->zc_name, &spa, FTAG);
4825 	} else {
4826 		nvlist_t *policy;
4827 		nvlist_t *config = NULL;
4828 
4829 		if (zc->zc_nvlist_src == NULL)
4830 			return (SET_ERROR(EINVAL));
4831 
4832 		if ((error = get_nvlist(zc->zc_nvlist_src,
4833 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4834 			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4835 			    policy, &config);
4836 			if (config != NULL) {
4837 				int err;
4838 
4839 				if ((err = put_nvlist(zc, config)) != 0)
4840 					error = err;
4841 				nvlist_free(config);
4842 			}
4843 			nvlist_free(policy);
4844 		}
4845 	}
4846 
4847 	if (error != 0)
4848 		return (error);
4849 
4850 	spa_vdev_state_enter(spa, SCL_NONE);
4851 
4852 	if (zc->zc_guid == 0) {
4853 		vd = NULL;
4854 	} else {
4855 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4856 		if (vd == NULL) {
4857 			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
4858 			spa_close(spa, FTAG);
4859 			return (SET_ERROR(ENODEV));
4860 		}
4861 	}
4862 
4863 	vdev_clear(spa, vd);
4864 
4865 	(void) spa_vdev_state_exit(spa, NULL, 0);
4866 
4867 	/*
4868 	 * Resume any suspended I/Os.
4869 	 */
4870 	if (zio_resume(spa) != 0)
4871 		error = SET_ERROR(EIO);
4872 
4873 	spa_close(spa, FTAG);
4874 
4875 	return (error);
4876 }
4877 
4878 static int
4879 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
4880 {
4881 	spa_t *spa;
4882 	int error;
4883 
4884 	error = spa_open(zc->zc_name, &spa, FTAG);
4885 	if (error != 0)
4886 		return (error);
4887 
4888 	spa_vdev_state_enter(spa, SCL_NONE);
4889 
4890 	/*
4891 	 * If a resilver is already in progress then set the
4892 	 * spa_scrub_reopen flag to B_TRUE so that we don't restart
4893 	 * the scan as a side effect of the reopen. Otherwise, let
4894 	 * vdev_open() decided if a resilver is required.
4895 	 */
4896 	spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
4897 	vdev_reopen(spa->spa_root_vdev);
4898 	spa->spa_scrub_reopen = B_FALSE;
4899 
4900 	(void) spa_vdev_state_exit(spa, NULL, 0);
4901 	spa_close(spa, FTAG);
4902 	return (0);
4903 }
4904 /*
4905  * inputs:
4906  * zc_name	name of filesystem
4907  *
4908  * outputs:
4909  * zc_string	name of conflicting snapshot, if there is one
4910  */
4911 static int
4912 zfs_ioc_promote(zfs_cmd_t *zc)
4913 {
4914 	dsl_pool_t *dp;
4915 	dsl_dataset_t *ds, *ods;
4916 	char origin[ZFS_MAX_DATASET_NAME_LEN];
4917 	char *cp;
4918 	int error;
4919 
4920 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4921 	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
4922 	    strchr(zc->zc_name, '%'))
4923 		return (SET_ERROR(EINVAL));
4924 
4925 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4926 	if (error != 0)
4927 		return (error);
4928 
4929 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
4930 	if (error != 0) {
4931 		dsl_pool_rele(dp, FTAG);
4932 		return (error);
4933 	}
4934 
4935 	if (!dsl_dir_is_clone(ds->ds_dir)) {
4936 		dsl_dataset_rele(ds, FTAG);
4937 		dsl_pool_rele(dp, FTAG);
4938 		return (SET_ERROR(EINVAL));
4939 	}
4940 
4941 	error = dsl_dataset_hold_obj(dp,
4942 	    dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
4943 	if (error != 0) {
4944 		dsl_dataset_rele(ds, FTAG);
4945 		dsl_pool_rele(dp, FTAG);
4946 		return (error);
4947 	}
4948 
4949 	dsl_dataset_name(ods, origin);
4950 	dsl_dataset_rele(ods, FTAG);
4951 	dsl_dataset_rele(ds, FTAG);
4952 	dsl_pool_rele(dp, FTAG);
4953 
4954 	/*
4955 	 * We don't need to unmount *all* the origin fs's snapshots, but
4956 	 * it's easier.
4957 	 */
4958 	cp = strchr(origin, '@');
4959 	if (cp)
4960 		*cp = '\0';
4961 	(void) dmu_objset_find(origin,
4962 	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
4963 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
4964 }
4965 
4966 /*
4967  * Retrieve a single {user|group}{used|quota}@... property.
4968  *
4969  * inputs:
4970  * zc_name	name of filesystem
4971  * zc_objset_type zfs_userquota_prop_t
4972  * zc_value	domain name (eg. "S-1-234-567-89")
4973  * zc_guid	RID/UID/GID
4974  *
4975  * outputs:
4976  * zc_cookie	property value
4977  */
4978 static int
4979 zfs_ioc_userspace_one(zfs_cmd_t *zc)
4980 {
4981 	zfsvfs_t *zfsvfs;
4982 	int error;
4983 
4984 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
4985 		return (SET_ERROR(EINVAL));
4986 
4987 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4988 	if (error != 0)
4989 		return (error);
4990 
4991 	error = zfs_userspace_one(zfsvfs,
4992 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
4993 	zfsvfs_rele(zfsvfs, FTAG);
4994 
4995 	return (error);
4996 }
4997 
4998 /*
4999  * inputs:
5000  * zc_name		name of filesystem
5001  * zc_cookie		zap cursor
5002  * zc_objset_type	zfs_userquota_prop_t
5003  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
5004  *
5005  * outputs:
5006  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
5007  * zc_cookie	zap cursor
5008  */
5009 static int
5010 zfs_ioc_userspace_many(zfs_cmd_t *zc)
5011 {
5012 	zfsvfs_t *zfsvfs;
5013 	int bufsize = zc->zc_nvlist_dst_size;
5014 
5015 	if (bufsize <= 0)
5016 		return (SET_ERROR(ENOMEM));
5017 
5018 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
5019 	if (error != 0)
5020 		return (error);
5021 
5022 	void *buf = kmem_alloc(bufsize, KM_SLEEP);
5023 
5024 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
5025 	    buf, &zc->zc_nvlist_dst_size);
5026 
5027 	if (error == 0) {
5028 		error = xcopyout(buf,
5029 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
5030 		    zc->zc_nvlist_dst_size);
5031 	}
5032 	kmem_free(buf, bufsize);
5033 	zfsvfs_rele(zfsvfs, FTAG);
5034 
5035 	return (error);
5036 }
5037 
5038 /*
5039  * inputs:
5040  * zc_name		name of filesystem
5041  *
5042  * outputs:
5043  * none
5044  */
5045 static int
5046 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
5047 {
5048 	objset_t *os;
5049 	int error = 0;
5050 	zfsvfs_t *zfsvfs;
5051 
5052 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
5053 		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
5054 			/*
5055 			 * If userused is not enabled, it may be because the
5056 			 * objset needs to be closed & reopened (to grow the
5057 			 * objset_phys_t).  Suspend/resume the fs will do that.
5058 			 */
5059 			dsl_dataset_t *ds, *newds;
5060 
5061 			ds = dmu_objset_ds(zfsvfs->z_os);
5062 			error = zfs_suspend_fs(zfsvfs);
5063 			if (error == 0) {
5064 				dmu_objset_refresh_ownership(ds, &newds,
5065 				    zfsvfs);
5066 				error = zfs_resume_fs(zfsvfs, newds);
5067 			}
5068 		}
5069 		if (error == 0)
5070 			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
5071 		VFS_RELE(zfsvfs->z_vfs);
5072 	} else {
5073 		/* XXX kind of reading contents without owning */
5074 		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5075 		if (error != 0)
5076 			return (error);
5077 
5078 		error = dmu_objset_userspace_upgrade(os);
5079 		dmu_objset_rele(os, FTAG);
5080 	}
5081 
5082 	return (error);
5083 }
5084 
5085 /*
5086  * We don't want to have a hard dependency
5087  * against some special symbols in sharefs
5088  * nfs, and smbsrv.  Determine them if needed when
5089  * the first file system is shared.
5090  * Neither sharefs, nfs or smbsrv are unloadable modules.
5091  */
5092 int (*znfsexport_fs)(void *arg);
5093 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
5094 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
5095 
5096 int zfs_nfsshare_inited;
5097 int zfs_smbshare_inited;
5098 
5099 ddi_modhandle_t nfs_mod;
5100 ddi_modhandle_t sharefs_mod;
5101 ddi_modhandle_t smbsrv_mod;
5102 kmutex_t zfs_share_lock;
5103 
5104 static int
5105 zfs_init_sharefs()
5106 {
5107 	int error;
5108 
5109 	ASSERT(MUTEX_HELD(&zfs_share_lock));
5110 	/* Both NFS and SMB shares also require sharetab support. */
5111 	if (sharefs_mod == NULL && ((sharefs_mod =
5112 	    ddi_modopen("fs/sharefs",
5113 	    KRTLD_MODE_FIRST, &error)) == NULL)) {
5114 		return (SET_ERROR(ENOSYS));
5115 	}
5116 	if (zshare_fs == NULL && ((zshare_fs =
5117 	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
5118 	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
5119 		return (SET_ERROR(ENOSYS));
5120 	}
5121 	return (0);
5122 }
5123 
5124 static int
5125 zfs_ioc_share(zfs_cmd_t *zc)
5126 {
5127 	int error;
5128 	int opcode;
5129 
5130 	switch (zc->zc_share.z_sharetype) {
5131 	case ZFS_SHARE_NFS:
5132 	case ZFS_UNSHARE_NFS:
5133 		if (zfs_nfsshare_inited == 0) {
5134 			mutex_enter(&zfs_share_lock);
5135 			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
5136 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5137 				mutex_exit(&zfs_share_lock);
5138 				return (SET_ERROR(ENOSYS));
5139 			}
5140 			if (znfsexport_fs == NULL &&
5141 			    ((znfsexport_fs = (int (*)(void *))
5142 			    ddi_modsym(nfs_mod,
5143 			    "nfs_export", &error)) == NULL)) {
5144 				mutex_exit(&zfs_share_lock);
5145 				return (SET_ERROR(ENOSYS));
5146 			}
5147 			error = zfs_init_sharefs();
5148 			if (error != 0) {
5149 				mutex_exit(&zfs_share_lock);
5150 				return (SET_ERROR(ENOSYS));
5151 			}
5152 			zfs_nfsshare_inited = 1;
5153 			mutex_exit(&zfs_share_lock);
5154 		}
5155 		break;
5156 	case ZFS_SHARE_SMB:
5157 	case ZFS_UNSHARE_SMB:
5158 		if (zfs_smbshare_inited == 0) {
5159 			mutex_enter(&zfs_share_lock);
5160 			if (smbsrv_mod == NULL && ((smbsrv_mod =
5161 			    ddi_modopen("drv/smbsrv",
5162 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
5163 				mutex_exit(&zfs_share_lock);
5164 				return (SET_ERROR(ENOSYS));
5165 			}
5166 			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
5167 			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
5168 			    "smb_server_share", &error)) == NULL)) {
5169 				mutex_exit(&zfs_share_lock);
5170 				return (SET_ERROR(ENOSYS));
5171 			}
5172 			error = zfs_init_sharefs();
5173 			if (error != 0) {
5174 				mutex_exit(&zfs_share_lock);
5175 				return (SET_ERROR(ENOSYS));
5176 			}
5177 			zfs_smbshare_inited = 1;
5178 			mutex_exit(&zfs_share_lock);
5179 		}
5180 		break;
5181 	default:
5182 		return (SET_ERROR(EINVAL));
5183 	}
5184 
5185 	switch (zc->zc_share.z_sharetype) {
5186 	case ZFS_SHARE_NFS:
5187 	case ZFS_UNSHARE_NFS:
5188 		if (error =
5189 		    znfsexport_fs((void *)
5190 		    (uintptr_t)zc->zc_share.z_exportdata))
5191 			return (error);
5192 		break;
5193 	case ZFS_SHARE_SMB:
5194 	case ZFS_UNSHARE_SMB:
5195 		if (error = zsmbexport_fs((void *)
5196 		    (uintptr_t)zc->zc_share.z_exportdata,
5197 		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
5198 		    B_TRUE: B_FALSE)) {
5199 			return (error);
5200 		}
5201 		break;
5202 	}
5203 
5204 	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
5205 	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
5206 	    SHAREFS_ADD : SHAREFS_REMOVE;
5207 
5208 	/*
5209 	 * Add or remove share from sharetab
5210 	 */
5211 	error = zshare_fs(opcode,
5212 	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
5213 	    zc->zc_share.z_sharemax);
5214 
5215 	return (error);
5216 
5217 }
5218 
5219 ace_t full_access[] = {
5220 	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
5221 };
5222 
5223 /*
5224  * inputs:
5225  * zc_name		name of containing filesystem
5226  * zc_obj		object # beyond which we want next in-use object #
5227  *
5228  * outputs:
5229  * zc_obj		next in-use object #
5230  */
5231 static int
5232 zfs_ioc_next_obj(zfs_cmd_t *zc)
5233 {
5234 	objset_t *os = NULL;
5235 	int error;
5236 
5237 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
5238 	if (error != 0)
5239 		return (error);
5240 
5241 	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
5242 	    dsl_dataset_phys(os->os_dsl_dataset)->ds_prev_snap_txg);
5243 
5244 	dmu_objset_rele(os, FTAG);
5245 	return (error);
5246 }
5247 
5248 /*
5249  * inputs:
5250  * zc_name		name of filesystem
5251  * zc_value		prefix name for snapshot
5252  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
5253  *
5254  * outputs:
5255  * zc_value		short name of new snapshot
5256  */
5257 static int
5258 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5259 {
5260 	char *snap_name;
5261 	char *hold_name;
5262 	int error;
5263 	minor_t minor;
5264 
5265 	error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5266 	if (error != 0)
5267 		return (error);
5268 
5269 	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5270 	    (u_longlong_t)ddi_get_lbolt64());
5271 	hold_name = kmem_asprintf("%%%s", zc->zc_value);
5272 
5273 	error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5274 	    hold_name);
5275 	if (error == 0)
5276 		(void) strcpy(zc->zc_value, snap_name);
5277 	strfree(snap_name);
5278 	strfree(hold_name);
5279 	zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5280 	return (error);
5281 }
5282 
5283 /*
5284  * inputs:
5285  * zc_name		name of "to" snapshot
5286  * zc_value		name of "from" snapshot
5287  * zc_cookie		file descriptor to write diff data on
5288  *
5289  * outputs:
5290  * dmu_diff_record_t's to the file descriptor
5291  */
5292 static int
5293 zfs_ioc_diff(zfs_cmd_t *zc)
5294 {
5295 	file_t *fp;
5296 	offset_t off;
5297 	int error;
5298 
5299 	fp = getf(zc->zc_cookie);
5300 	if (fp == NULL)
5301 		return (SET_ERROR(EBADF));
5302 
5303 	off = fp->f_offset;
5304 
5305 	error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5306 
5307 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5308 		fp->f_offset = off;
5309 	releasef(zc->zc_cookie);
5310 
5311 	return (error);
5312 }
5313 
5314 /*
5315  * Remove all ACL files in shares dir
5316  */
5317 static int
5318 zfs_smb_acl_purge(znode_t *dzp)
5319 {
5320 	zap_cursor_t	zc;
5321 	zap_attribute_t	zap;
5322 	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
5323 	int error;
5324 
5325 	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
5326 	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5327 	    zap_cursor_advance(&zc)) {
5328 		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5329 		    NULL, 0)) != 0)
5330 			break;
5331 	}
5332 	zap_cursor_fini(&zc);
5333 	return (error);
5334 }
5335 
5336 static int
5337 zfs_ioc_smb_acl(zfs_cmd_t *zc)
5338 {
5339 	vnode_t *vp;
5340 	znode_t *dzp;
5341 	vnode_t *resourcevp = NULL;
5342 	znode_t *sharedir;
5343 	zfsvfs_t *zfsvfs;
5344 	nvlist_t *nvlist;
5345 	char *src, *target;
5346 	vattr_t vattr;
5347 	vsecattr_t vsec;
5348 	int error = 0;
5349 
5350 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5351 	    NO_FOLLOW, NULL, &vp)) != 0)
5352 		return (error);
5353 
5354 	/* Now make sure mntpnt and dataset are ZFS */
5355 
5356 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
5357 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5358 	    zc->zc_name) != 0)) {
5359 		VN_RELE(vp);
5360 		return (SET_ERROR(EINVAL));
5361 	}
5362 
5363 	dzp = VTOZ(vp);
5364 	zfsvfs = dzp->z_zfsvfs;
5365 	ZFS_ENTER(zfsvfs);
5366 
5367 	/*
5368 	 * Create share dir if its missing.
5369 	 */
5370 	mutex_enter(&zfsvfs->z_lock);
5371 	if (zfsvfs->z_shares_dir == 0) {
5372 		dmu_tx_t *tx;
5373 
5374 		tx = dmu_tx_create(zfsvfs->z_os);
5375 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5376 		    ZFS_SHARES_DIR);
5377 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5378 		error = dmu_tx_assign(tx, TXG_WAIT);
5379 		if (error != 0) {
5380 			dmu_tx_abort(tx);
5381 		} else {
5382 			error = zfs_create_share_dir(zfsvfs, tx);
5383 			dmu_tx_commit(tx);
5384 		}
5385 		if (error != 0) {
5386 			mutex_exit(&zfsvfs->z_lock);
5387 			VN_RELE(vp);
5388 			ZFS_EXIT(zfsvfs);
5389 			return (error);
5390 		}
5391 	}
5392 	mutex_exit(&zfsvfs->z_lock);
5393 
5394 	ASSERT(zfsvfs->z_shares_dir);
5395 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
5396 		VN_RELE(vp);
5397 		ZFS_EXIT(zfsvfs);
5398 		return (error);
5399 	}
5400 
5401 	switch (zc->zc_cookie) {
5402 	case ZFS_SMB_ACL_ADD:
5403 		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5404 		vattr.va_type = VREG;
5405 		vattr.va_mode = S_IFREG|0777;
5406 		vattr.va_uid = 0;
5407 		vattr.va_gid = 0;
5408 
5409 		vsec.vsa_mask = VSA_ACE;
5410 		vsec.vsa_aclentp = &full_access;
5411 		vsec.vsa_aclentsz = sizeof (full_access);
5412 		vsec.vsa_aclcnt = 1;
5413 
5414 		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5415 		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5416 		if (resourcevp)
5417 			VN_RELE(resourcevp);
5418 		break;
5419 
5420 	case ZFS_SMB_ACL_REMOVE:
5421 		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5422 		    NULL, 0);
5423 		break;
5424 
5425 	case ZFS_SMB_ACL_RENAME:
5426 		if ((error = get_nvlist(zc->zc_nvlist_src,
5427 		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5428 			VN_RELE(vp);
5429 			VN_RELE(ZTOV(sharedir));
5430 			ZFS_EXIT(zfsvfs);
5431 			return (error);
5432 		}
5433 		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5434 		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5435 		    &target)) {
5436 			VN_RELE(vp);
5437 			VN_RELE(ZTOV(sharedir));
5438 			ZFS_EXIT(zfsvfs);
5439 			nvlist_free(nvlist);
5440 			return (error);
5441 		}
5442 		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5443 		    kcred, NULL, 0);
5444 		nvlist_free(nvlist);
5445 		break;
5446 
5447 	case ZFS_SMB_ACL_PURGE:
5448 		error = zfs_smb_acl_purge(sharedir);
5449 		break;
5450 
5451 	default:
5452 		error = SET_ERROR(EINVAL);
5453 		break;
5454 	}
5455 
5456 	VN_RELE(vp);
5457 	VN_RELE(ZTOV(sharedir));
5458 
5459 	ZFS_EXIT(zfsvfs);
5460 
5461 	return (error);
5462 }
5463 
5464 /*
5465  * innvl: {
5466  *     "holds" -> { snapname -> holdname (string), ... }
5467  *     (optional) "cleanup_fd" -> fd (int32)
5468  * }
5469  *
5470  * outnvl: {
5471  *     snapname -> error value (int32)
5472  *     ...
5473  * }
5474  */
5475 /* ARGSUSED */
5476 static int
5477 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5478 {
5479 	nvpair_t *pair;
5480 	nvlist_t *holds;
5481 	int cleanup_fd = -1;
5482 	int error;
5483 	minor_t minor = 0;
5484 
5485 	error = nvlist_lookup_nvlist(args, "holds", &holds);
5486 	if (error != 0)
5487 		return (SET_ERROR(EINVAL));
5488 
5489 	/* make sure the user didn't pass us any invalid (empty) tags */
5490 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
5491 	    pair = nvlist_next_nvpair(holds, pair)) {
5492 		char *htag;
5493 
5494 		error = nvpair_value_string(pair, &htag);
5495 		if (error != 0)
5496 			return (SET_ERROR(error));
5497 
5498 		if (strlen(htag) == 0)
5499 			return (SET_ERROR(EINVAL));
5500 	}
5501 
5502 	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
5503 		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
5504 		if (error != 0)
5505 			return (error);
5506 	}
5507 
5508 	error = dsl_dataset_user_hold(holds, minor, errlist);
5509 	if (minor != 0)
5510 		zfs_onexit_fd_rele(cleanup_fd);
5511 	return (error);
5512 }
5513 
5514 /*
5515  * innvl is not used.
5516  *
5517  * outnvl: {
5518  *    holdname -> time added (uint64 seconds since epoch)
5519  *    ...
5520  * }
5521  */
5522 /* ARGSUSED */
5523 static int
5524 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
5525 {
5526 	ASSERT3P(args, ==, NULL);
5527 	return (dsl_dataset_get_holds(snapname, outnvl));
5528 }
5529 
5530 /*
5531  * innvl: {
5532  *     snapname -> { holdname, ... }
5533  *     ...
5534  * }
5535  *
5536  * outnvl: {
5537  *     snapname -> error value (int32)
5538  *     ...
5539  * }
5540  */
5541 /* ARGSUSED */
5542 static int
5543 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
5544 {
5545 	return (dsl_dataset_user_release(holds, errlist));
5546 }
5547 
5548 /*
5549  * inputs:
5550  * zc_name		name of new filesystem or snapshot
5551  * zc_value		full name of old snapshot
5552  *
5553  * outputs:
5554  * zc_cookie		space in bytes
5555  * zc_objset_type	compressed space in bytes
5556  * zc_perm_action	uncompressed space in bytes
5557  */
5558 static int
5559 zfs_ioc_space_written(zfs_cmd_t *zc)
5560 {
5561 	int error;
5562 	dsl_pool_t *dp;
5563 	dsl_dataset_t *new, *old;
5564 
5565 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5566 	if (error != 0)
5567 		return (error);
5568 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
5569 	if (error != 0) {
5570 		dsl_pool_rele(dp, FTAG);
5571 		return (error);
5572 	}
5573 	error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
5574 	if (error != 0) {
5575 		dsl_dataset_rele(new, FTAG);
5576 		dsl_pool_rele(dp, FTAG);
5577 		return (error);
5578 	}
5579 
5580 	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
5581 	    &zc->zc_objset_type, &zc->zc_perm_action);
5582 	dsl_dataset_rele(old, FTAG);
5583 	dsl_dataset_rele(new, FTAG);
5584 	dsl_pool_rele(dp, FTAG);
5585 	return (error);
5586 }
5587 
5588 /*
5589  * innvl: {
5590  *     "firstsnap" -> snapshot name
5591  * }
5592  *
5593  * outnvl: {
5594  *     "used" -> space in bytes
5595  *     "compressed" -> compressed space in bytes
5596  *     "uncompressed" -> uncompressed space in bytes
5597  * }
5598  */
5599 static int
5600 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
5601 {
5602 	int error;
5603 	dsl_pool_t *dp;
5604 	dsl_dataset_t *new, *old;
5605 	char *firstsnap;
5606 	uint64_t used, comp, uncomp;
5607 
5608 	if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
5609 		return (SET_ERROR(EINVAL));
5610 
5611 	error = dsl_pool_hold(lastsnap, FTAG, &dp);
5612 	if (error != 0)
5613 		return (error);
5614 
5615 	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
5616 	if (error == 0 && !new->ds_is_snapshot) {
5617 		dsl_dataset_rele(new, FTAG);
5618 		error = SET_ERROR(EINVAL);
5619 	}
5620 	if (error != 0) {
5621 		dsl_pool_rele(dp, FTAG);
5622 		return (error);
5623 	}
5624 	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
5625 	if (error == 0 && !old->ds_is_snapshot) {
5626 		dsl_dataset_rele(old, FTAG);
5627 		error = SET_ERROR(EINVAL);
5628 	}
5629 	if (error != 0) {
5630 		dsl_dataset_rele(new, FTAG);
5631 		dsl_pool_rele(dp, FTAG);
5632 		return (error);
5633 	}
5634 
5635 	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
5636 	dsl_dataset_rele(old, FTAG);
5637 	dsl_dataset_rele(new, FTAG);
5638 	dsl_pool_rele(dp, FTAG);
5639 	fnvlist_add_uint64(outnvl, "used", used);
5640 	fnvlist_add_uint64(outnvl, "compressed", comp);
5641 	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
5642 	return (error);
5643 }
5644 
5645 /*
5646  * innvl: {
5647  *     "fd" -> file descriptor to write stream to (int32)
5648  *     (optional) "fromsnap" -> full snap name to send an incremental from
5649  *     (optional) "largeblockok" -> (value ignored)
5650  *         indicates that blocks > 128KB are permitted
5651  *     (optional) "embedok" -> (value ignored)
5652  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
5653  *     (optional) "compressok" -> (value ignored)
5654  *         presence indicates compressed DRR_WRITE records are permitted
5655  *     (optional) "resume_object" and "resume_offset" -> (uint64)
5656  *         if present, resume send stream from specified object and offset.
5657  * }
5658  *
5659  * outnvl is unused
5660  */
5661 /* ARGSUSED */
5662 static int
5663 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5664 {
5665 	int error;
5666 	offset_t off;
5667 	char *fromname = NULL;
5668 	int fd;
5669 	boolean_t largeblockok;
5670 	boolean_t embedok;
5671 	boolean_t compressok;
5672 	uint64_t resumeobj = 0;
5673 	uint64_t resumeoff = 0;
5674 
5675 	error = nvlist_lookup_int32(innvl, "fd", &fd);
5676 	if (error != 0)
5677 		return (SET_ERROR(EINVAL));
5678 
5679 	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
5680 
5681 	largeblockok = nvlist_exists(innvl, "largeblockok");
5682 	embedok = nvlist_exists(innvl, "embedok");
5683 	compressok = nvlist_exists(innvl, "compressok");
5684 
5685 	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
5686 	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
5687 
5688 	file_t *fp = getf(fd);
5689 	if (fp == NULL)
5690 		return (SET_ERROR(EBADF));
5691 
5692 	off = fp->f_offset;
5693 	error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
5694 	    fd, resumeobj, resumeoff, fp->f_vnode, &off);
5695 
5696 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5697 		fp->f_offset = off;
5698 	releasef(fd);
5699 	return (error);
5700 }
5701 
5702 /*
5703  * Determine approximately how large a zfs send stream will be -- the number
5704  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
5705  *
5706  * innvl: {
5707  *     (optional) "from" -> full snap or bookmark name to send an incremental
5708  *                          from
5709  *     (optional) "largeblockok" -> (value ignored)
5710  *         indicates that blocks > 128KB are permitted
5711  *     (optional) "embedok" -> (value ignored)
5712  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
5713  *     (optional) "compressok" -> (value ignored)
5714  *         presence indicates compressed DRR_WRITE records are permitted
5715  * }
5716  *
5717  * outnvl: {
5718  *     "space" -> bytes of space (uint64)
5719  * }
5720  */
5721 static int
5722 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5723 {
5724 	dsl_pool_t *dp;
5725 	dsl_dataset_t *tosnap;
5726 	int error;
5727 	char *fromname;
5728 	boolean_t compressok;
5729 	uint64_t space;
5730 
5731 	error = dsl_pool_hold(snapname, FTAG, &dp);
5732 	if (error != 0)
5733 		return (error);
5734 
5735 	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
5736 	if (error != 0) {
5737 		dsl_pool_rele(dp, FTAG);
5738 		return (error);
5739 	}
5740 
5741 	compressok = nvlist_exists(innvl, "compressok");
5742 
5743 	error = nvlist_lookup_string(innvl, "from", &fromname);
5744 	if (error == 0) {
5745 		if (strchr(fromname, '@') != NULL) {
5746 			/*
5747 			 * If from is a snapshot, hold it and use the more
5748 			 * efficient dmu_send_estimate to estimate send space
5749 			 * size using deadlists.
5750 			 */
5751 			dsl_dataset_t *fromsnap;
5752 			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
5753 			if (error != 0)
5754 				goto out;
5755 			error = dmu_send_estimate(tosnap, fromsnap, compressok,
5756 			    &space);
5757 			dsl_dataset_rele(fromsnap, FTAG);
5758 		} else if (strchr(fromname, '#') != NULL) {
5759 			/*
5760 			 * If from is a bookmark, fetch the creation TXG of the
5761 			 * snapshot it was created from and use that to find
5762 			 * blocks that were born after it.
5763 			 */
5764 			zfs_bookmark_phys_t frombm;
5765 
5766 			error = dsl_bookmark_lookup(dp, fromname, tosnap,
5767 			    &frombm);
5768 			if (error != 0)
5769 				goto out;
5770 			error = dmu_send_estimate_from_txg(tosnap,
5771 			    frombm.zbm_creation_txg, compressok, &space);
5772 		} else {
5773 			/*
5774 			 * from is not properly formatted as a snapshot or
5775 			 * bookmark
5776 			 */
5777 			error = SET_ERROR(EINVAL);
5778 			goto out;
5779 		}
5780 	} else {
5781 		/*
5782 		 * If estimating the size of a full send, use dmu_send_estimate.
5783 		 */
5784 		error = dmu_send_estimate(tosnap, NULL, compressok, &space);
5785 	}
5786 
5787 	fnvlist_add_uint64(outnvl, "space", space);
5788 
5789 out:
5790 	dsl_dataset_rele(tosnap, FTAG);
5791 	dsl_pool_rele(dp, FTAG);
5792 	return (error);
5793 }
5794 
5795 /*
5796  * Sync the currently open TXG to disk for the specified pool.
5797  * This is somewhat similar to 'zfs_sync()'.
5798  * For cases that do not result in error this ioctl will wait for
5799  * the currently open TXG to commit before returning back to the caller.
5800  *
5801  * innvl: {
5802  *  "force" -> when true, force uberblock update even if there is no dirty data.
5803  *             In addition this will cause the vdev configuration to be written
5804  *             out including updating the zpool cache file. (boolean_t)
5805  * }
5806  *
5807  * onvl is unused
5808  */
5809 /* ARGSUSED */
5810 static int
5811 zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
5812 {
5813 	int err;
5814 	boolean_t force;
5815 	spa_t *spa;
5816 
5817 	if ((err = spa_open(pool, &spa, FTAG)) != 0)
5818 		return (err);
5819 
5820 	force = fnvlist_lookup_boolean_value(innvl, "force");
5821 	if (force) {
5822 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
5823 		vdev_config_dirty(spa->spa_root_vdev);
5824 		spa_config_exit(spa, SCL_CONFIG, FTAG);
5825 	}
5826 	txg_wait_synced(spa_get_dsl(spa), 0);
5827 
5828 	spa_close(spa, FTAG);
5829 
5830 	return (err);
5831 }
5832 
5833 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
5834 
5835 static void
5836 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5837     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5838     boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
5839 {
5840 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5841 
5842 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5843 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5844 	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5845 	ASSERT3P(vec->zvec_func, ==, NULL);
5846 
5847 	vec->zvec_legacy_func = func;
5848 	vec->zvec_secpolicy = secpolicy;
5849 	vec->zvec_namecheck = namecheck;
5850 	vec->zvec_allow_log = log_history;
5851 	vec->zvec_pool_check = pool_check;
5852 }
5853 
5854 /*
5855  * See the block comment at the beginning of this file for details on
5856  * each argument to this function.
5857  */
5858 static void
5859 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
5860     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5861     zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
5862     boolean_t allow_log)
5863 {
5864 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5865 
5866 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5867 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5868 	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5869 	ASSERT3P(vec->zvec_func, ==, NULL);
5870 
5871 	/* if we are logging, the name must be valid */
5872 	ASSERT(!allow_log || namecheck != NO_NAME);
5873 
5874 	vec->zvec_name = name;
5875 	vec->zvec_func = func;
5876 	vec->zvec_secpolicy = secpolicy;
5877 	vec->zvec_namecheck = namecheck;
5878 	vec->zvec_pool_check = pool_check;
5879 	vec->zvec_smush_outnvlist = smush_outnvlist;
5880 	vec->zvec_allow_log = allow_log;
5881 }
5882 
5883 static void
5884 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5885     zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
5886     zfs_ioc_poolcheck_t pool_check)
5887 {
5888 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5889 	    POOL_NAME, log_history, pool_check);
5890 }
5891 
5892 static void
5893 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5894     zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
5895 {
5896 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5897 	    DATASET_NAME, B_FALSE, pool_check);
5898 }
5899 
5900 static void
5901 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5902 {
5903 	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
5904 	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5905 }
5906 
5907 static void
5908 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5909     zfs_secpolicy_func_t *secpolicy)
5910 {
5911 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5912 	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
5913 }
5914 
5915 static void
5916 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
5917     zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
5918 {
5919 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5920 	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
5921 }
5922 
5923 static void
5924 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5925 {
5926 	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
5927 	    zfs_secpolicy_read);
5928 }
5929 
5930 static void
5931 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5932     zfs_secpolicy_func_t *secpolicy)
5933 {
5934 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5935 	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5936 }
5937 
5938 static void
5939 zfs_ioctl_init(void)
5940 {
5941 	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
5942 	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
5943 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5944 
5945 	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
5946 	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
5947 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
5948 
5949 	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
5950 	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
5951 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5952 
5953 	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
5954 	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
5955 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5956 
5957 	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
5958 	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
5959 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5960 
5961 	zfs_ioctl_register("create", ZFS_IOC_CREATE,
5962 	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
5963 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5964 
5965 	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
5966 	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
5967 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5968 
5969 	zfs_ioctl_register("remap", ZFS_IOC_REMAP,
5970 	    zfs_ioc_remap, zfs_secpolicy_remap, DATASET_NAME,
5971 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
5972 
5973 	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
5974 	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
5975 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5976 
5977 	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
5978 	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
5979 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5980 	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
5981 	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
5982 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5983 
5984 	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
5985 	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
5986 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5987 
5988 	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
5989 	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
5990 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
5991 
5992 	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
5993 	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
5994 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5995 
5996 	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
5997 	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
5998 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5999 
6000 	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
6001 	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
6002 	    POOL_NAME,
6003 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6004 
6005 	zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
6006 	    zfs_ioc_channel_program, zfs_secpolicy_config,
6007 	    POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
6008 	    B_TRUE);
6009 
6010 	zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
6011 	    zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
6012 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6013 
6014 	zfs_ioctl_register("zpool_discard_checkpoint",
6015 	    ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
6016 	    zfs_secpolicy_config, POOL_NAME,
6017 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6018 
6019 	zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
6020 	    zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
6021 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
6022 
6023 	zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
6024 	    zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
6025 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
6026 
6027 	/* IOCTLS that use the legacy function signature */
6028 
6029 	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
6030 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
6031 
6032 	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
6033 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6034 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
6035 	    zfs_ioc_pool_scan);
6036 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
6037 	    zfs_ioc_pool_upgrade);
6038 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
6039 	    zfs_ioc_vdev_add);
6040 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
6041 	    zfs_ioc_vdev_remove);
6042 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
6043 	    zfs_ioc_vdev_set_state);
6044 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
6045 	    zfs_ioc_vdev_attach);
6046 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
6047 	    zfs_ioc_vdev_detach);
6048 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
6049 	    zfs_ioc_vdev_setpath);
6050 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
6051 	    zfs_ioc_vdev_setfru);
6052 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
6053 	    zfs_ioc_pool_set_props);
6054 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
6055 	    zfs_ioc_vdev_split);
6056 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
6057 	    zfs_ioc_pool_reguid);
6058 
6059 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
6060 	    zfs_ioc_pool_configs, zfs_secpolicy_none);
6061 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
6062 	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
6063 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
6064 	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
6065 	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
6066 	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
6067 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
6068 	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
6069 
6070 	/*
6071 	 * pool destroy, and export don't log the history as part of
6072 	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
6073 	 * does the logging of those commands.
6074 	 */
6075 	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
6076 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6077 	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
6078 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
6079 
6080 	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
6081 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6082 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
6083 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
6084 
6085 	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
6086 	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
6087 	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
6088 	    zfs_ioc_dsobj_to_dsname,
6089 	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
6090 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
6091 	    zfs_ioc_pool_get_history,
6092 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
6093 
6094 	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
6095 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
6096 
6097 	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
6098 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
6099 	zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
6100 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
6101 
6102 	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
6103 	    zfs_ioc_space_written);
6104 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
6105 	    zfs_ioc_objset_recvd_props);
6106 	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
6107 	    zfs_ioc_next_obj);
6108 	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
6109 	    zfs_ioc_get_fsacl);
6110 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
6111 	    zfs_ioc_objset_stats);
6112 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
6113 	    zfs_ioc_objset_zplprops);
6114 	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
6115 	    zfs_ioc_dataset_list_next);
6116 	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
6117 	    zfs_ioc_snapshot_list_next);
6118 	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
6119 	    zfs_ioc_send_progress);
6120 
6121 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
6122 	    zfs_ioc_diff, zfs_secpolicy_diff);
6123 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
6124 	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
6125 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
6126 	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
6127 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
6128 	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
6129 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
6130 	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
6131 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
6132 	    zfs_ioc_send, zfs_secpolicy_send);
6133 
6134 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
6135 	    zfs_secpolicy_none);
6136 	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
6137 	    zfs_secpolicy_destroy);
6138 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
6139 	    zfs_secpolicy_rename);
6140 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
6141 	    zfs_secpolicy_recv);
6142 	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
6143 	    zfs_secpolicy_promote);
6144 	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
6145 	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
6146 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
6147 	    zfs_secpolicy_set_fsacl);
6148 
6149 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
6150 	    zfs_secpolicy_share, POOL_CHECK_NONE);
6151 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
6152 	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
6153 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
6154 	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
6155 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6156 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
6157 	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
6158 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
6159 }
6160 
6161 int
6162 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
6163     zfs_ioc_poolcheck_t check)
6164 {
6165 	spa_t *spa;
6166 	int error;
6167 
6168 	ASSERT(type == POOL_NAME || type == DATASET_NAME);
6169 
6170 	if (check & POOL_CHECK_NONE)
6171 		return (0);
6172 
6173 	error = spa_open(name, &spa, FTAG);
6174 	if (error == 0) {
6175 		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
6176 			error = SET_ERROR(EAGAIN);
6177 		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
6178 			error = SET_ERROR(EROFS);
6179 		spa_close(spa, FTAG);
6180 	}
6181 	return (error);
6182 }
6183 
6184 /*
6185  * Find a free minor number.
6186  */
6187 minor_t
6188 zfsdev_minor_alloc(void)
6189 {
6190 	static minor_t last_minor;
6191 	minor_t m;
6192 
6193 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6194 
6195 	for (m = last_minor + 1; m != last_minor; m++) {
6196 		if (m > ZFSDEV_MAX_MINOR)
6197 			m = 1;
6198 		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
6199 			last_minor = m;
6200 			return (m);
6201 		}
6202 	}
6203 
6204 	return (0);
6205 }
6206 
6207 static int
6208 zfs_ctldev_init(dev_t *devp)
6209 {
6210 	minor_t minor;
6211 	zfs_soft_state_t *zs;
6212 
6213 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6214 	ASSERT(getminor(*devp) == 0);
6215 
6216 	minor = zfsdev_minor_alloc();
6217 	if (minor == 0)
6218 		return (SET_ERROR(ENXIO));
6219 
6220 	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
6221 		return (SET_ERROR(EAGAIN));
6222 
6223 	*devp = makedevice(getemajor(*devp), minor);
6224 
6225 	zs = ddi_get_soft_state(zfsdev_state, minor);
6226 	zs->zss_type = ZSST_CTLDEV;
6227 	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
6228 
6229 	return (0);
6230 }
6231 
6232 static void
6233 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
6234 {
6235 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
6236 
6237 	zfs_onexit_destroy(zo);
6238 	ddi_soft_state_free(zfsdev_state, minor);
6239 }
6240 
6241 void *
6242 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
6243 {
6244 	zfs_soft_state_t *zp;
6245 
6246 	zp = ddi_get_soft_state(zfsdev_state, minor);
6247 	if (zp == NULL || zp->zss_type != which)
6248 		return (NULL);
6249 
6250 	return (zp->zss_data);
6251 }
6252 
6253 static int
6254 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
6255 {
6256 	int error = 0;
6257 
6258 	if (getminor(*devp) != 0)
6259 		return (zvol_open(devp, flag, otyp, cr));
6260 
6261 	/* This is the control device. Allocate a new minor if requested. */
6262 	if (flag & FEXCL) {
6263 		mutex_enter(&zfsdev_state_lock);
6264 		error = zfs_ctldev_init(devp);
6265 		mutex_exit(&zfsdev_state_lock);
6266 	}
6267 
6268 	return (error);
6269 }
6270 
6271 static int
6272 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
6273 {
6274 	zfs_onexit_t *zo;
6275 	minor_t minor = getminor(dev);
6276 
6277 	if (minor == 0)
6278 		return (0);
6279 
6280 	mutex_enter(&zfsdev_state_lock);
6281 	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
6282 	if (zo == NULL) {
6283 		mutex_exit(&zfsdev_state_lock);
6284 		return (zvol_close(dev, flag, otyp, cr));
6285 	}
6286 	zfs_ctldev_destroy(zo, minor);
6287 	mutex_exit(&zfsdev_state_lock);
6288 
6289 	return (0);
6290 }
6291 
6292 static int
6293 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
6294 {
6295 	zfs_cmd_t *zc;
6296 	uint_t vecnum;
6297 	int error, rc, len;
6298 	minor_t minor = getminor(dev);
6299 	const zfs_ioc_vec_t *vec;
6300 	char *saved_poolname = NULL;
6301 	nvlist_t *innvl = NULL;
6302 
6303 	if (minor != 0 &&
6304 	    zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
6305 		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
6306 
6307 	vecnum = cmd - ZFS_IOC_FIRST;
6308 	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
6309 
6310 	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
6311 		return (SET_ERROR(EINVAL));
6312 	vec = &zfs_ioc_vec[vecnum];
6313 
6314 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
6315 
6316 	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
6317 	if (error != 0) {
6318 		error = SET_ERROR(EFAULT);
6319 		goto out;
6320 	}
6321 
6322 	zc->zc_iflags = flag & FKIOCTL;
6323 	if (zc->zc_nvlist_src_size != 0) {
6324 		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6325 		    zc->zc_iflags, &innvl);
6326 		if (error != 0)
6327 			goto out;
6328 	}
6329 
6330 	/*
6331 	 * Ensure that all pool/dataset names are valid before we pass down to
6332 	 * the lower layers.
6333 	 */
6334 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6335 	switch (vec->zvec_namecheck) {
6336 	case POOL_NAME:
6337 		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
6338 			error = SET_ERROR(EINVAL);
6339 		else
6340 			error = pool_status_check(zc->zc_name,
6341 			    vec->zvec_namecheck, vec->zvec_pool_check);
6342 		break;
6343 
6344 	case DATASET_NAME:
6345 		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
6346 			error = SET_ERROR(EINVAL);
6347 		else
6348 			error = pool_status_check(zc->zc_name,
6349 			    vec->zvec_namecheck, vec->zvec_pool_check);
6350 		break;
6351 
6352 	case NO_NAME:
6353 		break;
6354 	}
6355 
6356 
6357 	if (error == 0)
6358 		error = vec->zvec_secpolicy(zc, innvl, cr);
6359 
6360 	if (error != 0)
6361 		goto out;
6362 
6363 	/* legacy ioctls can modify zc_name */
6364 	len = strcspn(zc->zc_name, "/@#") + 1;
6365 	saved_poolname = kmem_alloc(len, KM_SLEEP);
6366 	(void) strlcpy(saved_poolname, zc->zc_name, len);
6367 
6368 	if (vec->zvec_func != NULL) {
6369 		nvlist_t *outnvl;
6370 		int puterror = 0;
6371 		spa_t *spa;
6372 		nvlist_t *lognv = NULL;
6373 
6374 		ASSERT(vec->zvec_legacy_func == NULL);
6375 
6376 		/*
6377 		 * Add the innvl to the lognv before calling the func,
6378 		 * in case the func changes the innvl.
6379 		 */
6380 		if (vec->zvec_allow_log) {
6381 			lognv = fnvlist_alloc();
6382 			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
6383 			    vec->zvec_name);
6384 			if (!nvlist_empty(innvl)) {
6385 				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
6386 				    innvl);
6387 			}
6388 		}
6389 
6390 		outnvl = fnvlist_alloc();
6391 		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
6392 
6393 		/*
6394 		 * Some commands can partially execute, modify state, and still
6395 		 * return an error.  In these cases, attempt to record what
6396 		 * was modified.
6397 		 */
6398 		if ((error == 0 ||
6399 		    (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
6400 		    vec->zvec_allow_log &&
6401 		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
6402 			if (!nvlist_empty(outnvl)) {
6403 				fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
6404 				    outnvl);
6405 			}
6406 			if (error != 0) {
6407 				fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
6408 				    error);
6409 			}
6410 			(void) spa_history_log_nvl(spa, lognv);
6411 			spa_close(spa, FTAG);
6412 		}
6413 		fnvlist_free(lognv);
6414 
6415 		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
6416 			int smusherror = 0;
6417 			if (vec->zvec_smush_outnvlist) {
6418 				smusherror = nvlist_smush(outnvl,
6419 				    zc->zc_nvlist_dst_size);
6420 			}
6421 			if (smusherror == 0)
6422 				puterror = put_nvlist(zc, outnvl);
6423 		}
6424 
6425 		if (puterror != 0)
6426 			error = puterror;
6427 
6428 		nvlist_free(outnvl);
6429 	} else {
6430 		error = vec->zvec_legacy_func(zc);
6431 	}
6432 
6433 out:
6434 	nvlist_free(innvl);
6435 	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
6436 	if (error == 0 && rc != 0)
6437 		error = SET_ERROR(EFAULT);
6438 	if (error == 0 && vec->zvec_allow_log) {
6439 		char *s = tsd_get(zfs_allow_log_key);
6440 		if (s != NULL)
6441 			strfree(s);
6442 		(void) tsd_set(zfs_allow_log_key, saved_poolname);
6443 	} else {
6444 		if (saved_poolname != NULL)
6445 			strfree(saved_poolname);
6446 	}
6447 
6448 	kmem_free(zc, sizeof (zfs_cmd_t));
6449 	return (error);
6450 }
6451 
6452 static int
6453 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
6454 {
6455 	if (cmd != DDI_ATTACH)
6456 		return (DDI_FAILURE);
6457 
6458 	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
6459 	    DDI_PSEUDO, 0) == DDI_FAILURE)
6460 		return (DDI_FAILURE);
6461 
6462 	zfs_dip = dip;
6463 
6464 	ddi_report_dev(dip);
6465 
6466 	return (DDI_SUCCESS);
6467 }
6468 
6469 static int
6470 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6471 {
6472 	if (spa_busy() || zfs_busy() || zvol_busy())
6473 		return (DDI_FAILURE);
6474 
6475 	if (cmd != DDI_DETACH)
6476 		return (DDI_FAILURE);
6477 
6478 	zfs_dip = NULL;
6479 
6480 	ddi_prop_remove_all(dip);
6481 	ddi_remove_minor_node(dip, NULL);
6482 
6483 	return (DDI_SUCCESS);
6484 }
6485 
6486 /*ARGSUSED*/
6487 static int
6488 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
6489 {
6490 	switch (infocmd) {
6491 	case DDI_INFO_DEVT2DEVINFO:
6492 		*result = zfs_dip;
6493 		return (DDI_SUCCESS);
6494 
6495 	case DDI_INFO_DEVT2INSTANCE:
6496 		*result = (void *)0;
6497 		return (DDI_SUCCESS);
6498 	}
6499 
6500 	return (DDI_FAILURE);
6501 }
6502 
6503 /*
6504  * OK, so this is a little weird.
6505  *
6506  * /dev/zfs is the control node, i.e. minor 0.
6507  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
6508  *
6509  * /dev/zfs has basically nothing to do except serve up ioctls,
6510  * so most of the standard driver entry points are in zvol.c.
6511  */
6512 static struct cb_ops zfs_cb_ops = {
6513 	zfsdev_open,	/* open */
6514 	zfsdev_close,	/* close */
6515 	zvol_strategy,	/* strategy */
6516 	nodev,		/* print */
6517 	zvol_dump,	/* dump */
6518 	zvol_read,	/* read */
6519 	zvol_write,	/* write */
6520 	zfsdev_ioctl,	/* ioctl */
6521 	nodev,		/* devmap */
6522 	nodev,		/* mmap */
6523 	nodev,		/* segmap */
6524 	nochpoll,	/* poll */
6525 	ddi_prop_op,	/* prop_op */
6526 	NULL,		/* streamtab */
6527 	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
6528 	CB_REV,		/* version */
6529 	nodev,		/* async read */
6530 	nodev,		/* async write */
6531 };
6532 
6533 static struct dev_ops zfs_dev_ops = {
6534 	DEVO_REV,	/* version */
6535 	0,		/* refcnt */
6536 	zfs_info,	/* info */
6537 	nulldev,	/* identify */
6538 	nulldev,	/* probe */
6539 	zfs_attach,	/* attach */
6540 	zfs_detach,	/* detach */
6541 	nodev,		/* reset */
6542 	&zfs_cb_ops,	/* driver operations */
6543 	NULL,		/* no bus operations */
6544 	NULL,		/* power */
6545 	ddi_quiesce_not_needed,	/* quiesce */
6546 };
6547 
6548 static struct modldrv zfs_modldrv = {
6549 	&mod_driverops,
6550 	"ZFS storage pool",
6551 	&zfs_dev_ops
6552 };
6553 
6554 static struct modlinkage modlinkage = {
6555 	MODREV_1,
6556 	(void *)&zfs_modlfs,
6557 	(void *)&zfs_modldrv,
6558 	NULL
6559 };
6560 
6561 static void
6562 zfs_allow_log_destroy(void *arg)
6563 {
6564 	char *poolname = arg;
6565 	strfree(poolname);
6566 }
6567 
6568 int
6569 _init(void)
6570 {
6571 	int error;
6572 
6573 	spa_init(FREAD | FWRITE);
6574 	zfs_init();
6575 	zvol_init();
6576 	zfs_ioctl_init();
6577 
6578 	if ((error = mod_install(&modlinkage)) != 0) {
6579 		zvol_fini();
6580 		zfs_fini();
6581 		spa_fini();
6582 		return (error);
6583 	}
6584 
6585 	tsd_create(&zfs_fsyncer_key, NULL);
6586 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
6587 	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
6588 
6589 	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
6590 	ASSERT(error == 0);
6591 	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
6592 
6593 	return (0);
6594 }
6595 
6596 int
6597 _fini(void)
6598 {
6599 	int error;
6600 
6601 	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
6602 		return (SET_ERROR(EBUSY));
6603 
6604 	if ((error = mod_remove(&modlinkage)) != 0)
6605 		return (error);
6606 
6607 	zvol_fini();
6608 	zfs_fini();
6609 	spa_fini();
6610 	if (zfs_nfsshare_inited)
6611 		(void) ddi_modclose(nfs_mod);
6612 	if (zfs_smbshare_inited)
6613 		(void) ddi_modclose(smbsrv_mod);
6614 	if (zfs_nfsshare_inited || zfs_smbshare_inited)
6615 		(void) ddi_modclose(sharefs_mod);
6616 
6617 	tsd_destroy(&zfs_fsyncer_key);
6618 	ldi_ident_release(zfs_li);
6619 	zfs_li = NULL;
6620 	mutex_destroy(&zfs_share_lock);
6621 
6622 	return (error);
6623 }
6624 
6625 int
6626 _info(struct modinfo *modinfop)
6627 {
6628 	return (mod_info(&modlinkage, modinfop));
6629 }
6630