xref: /illumos-gate/usr/src/uts/common/fs/zfs/zfs_ioctl.c (revision 447b1e1fca22e4de5e04623965fbb1460857930c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2011-2012 Pawel Jakub Dawidek. All rights reserved.
25  * Portions Copyright 2011 Martin Matuska
26  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
27  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
28  * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
29  * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
30  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
31  * Copyright (c) 2013 Steven Hartland. All rights reserved.
32  * Copyright (c) 2014 Integros [integros.com]
33  */
34 
35 /*
36  * ZFS ioctls.
37  *
38  * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
39  * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
40  *
41  * There are two ways that we handle ioctls: the legacy way where almost
42  * all of the logic is in the ioctl callback, and the new way where most
43  * of the marshalling is handled in the common entry point, zfsdev_ioctl().
44  *
45  * Non-legacy ioctls should be registered by calling
46  * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
47  * from userland by lzc_ioctl().
48  *
49  * The registration arguments are as follows:
50  *
51  * const char *name
52  *   The name of the ioctl.  This is used for history logging.  If the
53  *   ioctl returns successfully (the callback returns 0), and allow_log
54  *   is true, then a history log entry will be recorded with the input &
55  *   output nvlists.  The log entry can be printed with "zpool history -i".
56  *
57  * zfs_ioc_t ioc
58  *   The ioctl request number, which userland will pass to ioctl(2).
59  *   The ioctl numbers can change from release to release, because
60  *   the caller (libzfs) must be matched to the kernel.
61  *
62  * zfs_secpolicy_func_t *secpolicy
63  *   This function will be called before the zfs_ioc_func_t, to
64  *   determine if this operation is permitted.  It should return EPERM
65  *   on failure, and 0 on success.  Checks include determining if the
66  *   dataset is visible in this zone, and if the user has either all
67  *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
68  *   to do this operation on this dataset with "zfs allow".
69  *
70  * zfs_ioc_namecheck_t namecheck
71  *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
72  *   name, a dataset name, or nothing.  If the name is not well-formed,
73  *   the ioctl will fail and the callback will not be called.
74  *   Therefore, the callback can assume that the name is well-formed
75  *   (e.g. is null-terminated, doesn't have more than one '@' character,
76  *   doesn't have invalid characters).
77  *
78  * zfs_ioc_poolcheck_t pool_check
79  *   This specifies requirements on the pool state.  If the pool does
80  *   not meet them (is suspended or is readonly), the ioctl will fail
81  *   and the callback will not be called.  If any checks are specified
82  *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
83  *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
84  *   POOL_CHECK_READONLY).
85  *
86  * boolean_t smush_outnvlist
87  *   If smush_outnvlist is true, then the output is presumed to be a
88  *   list of errors, and it will be "smushed" down to fit into the
89  *   caller's buffer, by removing some entries and replacing them with a
90  *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
91  *   nvlist_smush() for details.  If smush_outnvlist is false, and the
92  *   outnvlist does not fit into the userland-provided buffer, then the
93  *   ioctl will fail with ENOMEM.
94  *
95  * zfs_ioc_func_t *func
96  *   The callback function that will perform the operation.
97  *
98  *   The callback should return 0 on success, or an error number on
99  *   failure.  If the function fails, the userland ioctl will return -1,
100  *   and errno will be set to the callback's return value.  The callback
101  *   will be called with the following arguments:
102  *
103  *   const char *name
104  *     The name of the pool or dataset to operate on, from
105  *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
106  *     expected type (pool, dataset, or none).
107  *
108  *   nvlist_t *innvl
109  *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
110  *     NULL if no input nvlist was provided.  Changes to this nvlist are
111  *     ignored.  If the input nvlist could not be deserialized, the
112  *     ioctl will fail and the callback will not be called.
113  *
114  *   nvlist_t *outnvl
115  *     The output nvlist, initially empty.  The callback can fill it in,
116  *     and it will be returned to userland by serializing it into
117  *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
118  *     fails (e.g. because the caller didn't supply a large enough
119  *     buffer), then the overall ioctl will fail.  See the
120  *     'smush_nvlist' argument above for additional behaviors.
121  *
122  *     There are two typical uses of the output nvlist:
123  *       - To return state, e.g. property values.  In this case,
124  *         smush_outnvlist should be false.  If the buffer was not large
125  *         enough, the caller will reallocate a larger buffer and try
126  *         the ioctl again.
127  *
128  *       - To return multiple errors from an ioctl which makes on-disk
129  *         changes.  In this case, smush_outnvlist should be true.
130  *         Ioctls which make on-disk modifications should generally not
131  *         use the outnvl if they succeed, because the caller can not
132  *         distinguish between the operation failing, and
133  *         deserialization failing.
134  */
135 
136 #include <sys/types.h>
137 #include <sys/param.h>
138 #include <sys/errno.h>
139 #include <sys/uio.h>
140 #include <sys/buf.h>
141 #include <sys/modctl.h>
142 #include <sys/open.h>
143 #include <sys/file.h>
144 #include <sys/kmem.h>
145 #include <sys/conf.h>
146 #include <sys/cmn_err.h>
147 #include <sys/stat.h>
148 #include <sys/zfs_ioctl.h>
149 #include <sys/zfs_vfsops.h>
150 #include <sys/zfs_znode.h>
151 #include <sys/zap.h>
152 #include <sys/spa.h>
153 #include <sys/spa_impl.h>
154 #include <sys/vdev.h>
155 #include <sys/priv_impl.h>
156 #include <sys/dmu.h>
157 #include <sys/dsl_dir.h>
158 #include <sys/dsl_dataset.h>
159 #include <sys/dsl_prop.h>
160 #include <sys/dsl_deleg.h>
161 #include <sys/dmu_objset.h>
162 #include <sys/dmu_impl.h>
163 #include <sys/dmu_tx.h>
164 #include <sys/ddi.h>
165 #include <sys/sunddi.h>
166 #include <sys/sunldi.h>
167 #include <sys/policy.h>
168 #include <sys/zone.h>
169 #include <sys/nvpair.h>
170 #include <sys/pathname.h>
171 #include <sys/mount.h>
172 #include <sys/sdt.h>
173 #include <sys/fs/zfs.h>
174 #include <sys/zfs_ctldir.h>
175 #include <sys/zfs_dir.h>
176 #include <sys/zfs_onexit.h>
177 #include <sys/zvol.h>
178 #include <sys/dsl_scan.h>
179 #include <sharefs/share.h>
180 #include <sys/dmu_objset.h>
181 #include <sys/dmu_send.h>
182 #include <sys/dsl_destroy.h>
183 #include <sys/dsl_bookmark.h>
184 #include <sys/dsl_userhold.h>
185 #include <sys/zfeature.h>
186 #include <sys/zio_checksum.h>
187 
188 #include "zfs_namecheck.h"
189 #include "zfs_prop.h"
190 #include "zfs_deleg.h"
191 #include "zfs_comutil.h"
192 
193 extern struct modlfs zfs_modlfs;
194 
195 extern void zfs_init(void);
196 extern void zfs_fini(void);
197 
198 ldi_ident_t zfs_li = NULL;
199 dev_info_t *zfs_dip;
200 
201 uint_t zfs_fsyncer_key;
202 extern uint_t rrw_tsd_key;
203 static uint_t zfs_allow_log_key;
204 
205 typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
206 typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
207 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
208 
209 typedef enum {
210 	NO_NAME,
211 	POOL_NAME,
212 	DATASET_NAME
213 } zfs_ioc_namecheck_t;
214 
215 typedef enum {
216 	POOL_CHECK_NONE		= 1 << 0,
217 	POOL_CHECK_SUSPENDED	= 1 << 1,
218 	POOL_CHECK_READONLY	= 1 << 2,
219 } zfs_ioc_poolcheck_t;
220 
221 typedef struct zfs_ioc_vec {
222 	zfs_ioc_legacy_func_t	*zvec_legacy_func;
223 	zfs_ioc_func_t		*zvec_func;
224 	zfs_secpolicy_func_t	*zvec_secpolicy;
225 	zfs_ioc_namecheck_t	zvec_namecheck;
226 	boolean_t		zvec_allow_log;
227 	zfs_ioc_poolcheck_t	zvec_pool_check;
228 	boolean_t		zvec_smush_outnvlist;
229 	const char		*zvec_name;
230 } zfs_ioc_vec_t;
231 
232 /* This array is indexed by zfs_userquota_prop_t */
233 static const char *userquota_perms[] = {
234 	ZFS_DELEG_PERM_USERUSED,
235 	ZFS_DELEG_PERM_USERQUOTA,
236 	ZFS_DELEG_PERM_GROUPUSED,
237 	ZFS_DELEG_PERM_GROUPQUOTA,
238 };
239 
240 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
241 static int zfs_check_settable(const char *name, nvpair_t *property,
242     cred_t *cr);
243 static int zfs_check_clearable(char *dataset, nvlist_t *props,
244     nvlist_t **errors);
245 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
246     boolean_t *);
247 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
248 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
249 
250 static int zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature);
251 
252 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
253 void
254 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
255 {
256 	const char *newfile;
257 	char buf[512];
258 	va_list adx;
259 
260 	/*
261 	 * Get rid of annoying "../common/" prefix to filename.
262 	 */
263 	newfile = strrchr(file, '/');
264 	if (newfile != NULL) {
265 		newfile = newfile + 1; /* Get rid of leading / */
266 	} else {
267 		newfile = file;
268 	}
269 
270 	va_start(adx, fmt);
271 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
272 	va_end(adx);
273 
274 	/*
275 	 * To get this data, use the zfs-dprintf probe as so:
276 	 * dtrace -q -n 'zfs-dprintf \
277 	 *	/stringof(arg0) == "dbuf.c"/ \
278 	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
279 	 * arg0 = file name
280 	 * arg1 = function name
281 	 * arg2 = line number
282 	 * arg3 = message
283 	 */
284 	DTRACE_PROBE4(zfs__dprintf,
285 	    char *, newfile, char *, func, int, line, char *, buf);
286 }
287 
288 static void
289 history_str_free(char *buf)
290 {
291 	kmem_free(buf, HIS_MAX_RECORD_LEN);
292 }
293 
294 static char *
295 history_str_get(zfs_cmd_t *zc)
296 {
297 	char *buf;
298 
299 	if (zc->zc_history == NULL)
300 		return (NULL);
301 
302 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
303 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
304 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
305 		history_str_free(buf);
306 		return (NULL);
307 	}
308 
309 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
310 
311 	return (buf);
312 }
313 
314 /*
315  * Check to see if the named dataset is currently defined as bootable
316  */
317 static boolean_t
318 zfs_is_bootfs(const char *name)
319 {
320 	objset_t *os;
321 
322 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
323 		boolean_t ret;
324 		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
325 		dmu_objset_rele(os, FTAG);
326 		return (ret);
327 	}
328 	return (B_FALSE);
329 }
330 
331 /*
332  * Return non-zero if the spa version is less than requested version.
333  */
334 static int
335 zfs_earlier_version(const char *name, int version)
336 {
337 	spa_t *spa;
338 
339 	if (spa_open(name, &spa, FTAG) == 0) {
340 		if (spa_version(spa) < version) {
341 			spa_close(spa, FTAG);
342 			return (1);
343 		}
344 		spa_close(spa, FTAG);
345 	}
346 	return (0);
347 }
348 
349 /*
350  * Return TRUE if the ZPL version is less than requested version.
351  */
352 static boolean_t
353 zpl_earlier_version(const char *name, int version)
354 {
355 	objset_t *os;
356 	boolean_t rc = B_TRUE;
357 
358 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
359 		uint64_t zplversion;
360 
361 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
362 			dmu_objset_rele(os, FTAG);
363 			return (B_TRUE);
364 		}
365 		/* XXX reading from non-owned objset */
366 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
367 			rc = zplversion < version;
368 		dmu_objset_rele(os, FTAG);
369 	}
370 	return (rc);
371 }
372 
373 static void
374 zfs_log_history(zfs_cmd_t *zc)
375 {
376 	spa_t *spa;
377 	char *buf;
378 
379 	if ((buf = history_str_get(zc)) == NULL)
380 		return;
381 
382 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
383 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
384 			(void) spa_history_log(spa, buf);
385 		spa_close(spa, FTAG);
386 	}
387 	history_str_free(buf);
388 }
389 
390 /*
391  * Policy for top-level read operations (list pools).  Requires no privileges,
392  * and can be used in the local zone, as there is no associated dataset.
393  */
394 /* ARGSUSED */
395 static int
396 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
397 {
398 	return (0);
399 }
400 
401 /*
402  * Policy for dataset read operations (list children, get statistics).  Requires
403  * no privileges, but must be visible in the local zone.
404  */
405 /* ARGSUSED */
406 static int
407 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
408 {
409 	if (INGLOBALZONE(curproc) ||
410 	    zone_dataset_visible(zc->zc_name, NULL))
411 		return (0);
412 
413 	return (SET_ERROR(ENOENT));
414 }
415 
416 static int
417 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
418 {
419 	int writable = 1;
420 
421 	/*
422 	 * The dataset must be visible by this zone -- check this first
423 	 * so they don't see EPERM on something they shouldn't know about.
424 	 */
425 	if (!INGLOBALZONE(curproc) &&
426 	    !zone_dataset_visible(dataset, &writable))
427 		return (SET_ERROR(ENOENT));
428 
429 	if (INGLOBALZONE(curproc)) {
430 		/*
431 		 * If the fs is zoned, only root can access it from the
432 		 * global zone.
433 		 */
434 		if (secpolicy_zfs(cr) && zoned)
435 			return (SET_ERROR(EPERM));
436 	} else {
437 		/*
438 		 * If we are in a local zone, the 'zoned' property must be set.
439 		 */
440 		if (!zoned)
441 			return (SET_ERROR(EPERM));
442 
443 		/* must be writable by this zone */
444 		if (!writable)
445 			return (SET_ERROR(EPERM));
446 	}
447 	return (0);
448 }
449 
450 static int
451 zfs_dozonecheck(const char *dataset, cred_t *cr)
452 {
453 	uint64_t zoned;
454 
455 	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
456 		return (SET_ERROR(ENOENT));
457 
458 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
459 }
460 
461 static int
462 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
463 {
464 	uint64_t zoned;
465 
466 	if (dsl_prop_get_int_ds(ds, "zoned", &zoned))
467 		return (SET_ERROR(ENOENT));
468 
469 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
470 }
471 
472 static int
473 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
474     const char *perm, cred_t *cr)
475 {
476 	int error;
477 
478 	error = zfs_dozonecheck_ds(name, ds, cr);
479 	if (error == 0) {
480 		error = secpolicy_zfs(cr);
481 		if (error != 0)
482 			error = dsl_deleg_access_impl(ds, perm, cr);
483 	}
484 	return (error);
485 }
486 
487 static int
488 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
489 {
490 	int error;
491 	dsl_dataset_t *ds;
492 	dsl_pool_t *dp;
493 
494 	/*
495 	 * First do a quick check for root in the global zone, which
496 	 * is allowed to do all write_perms.  This ensures that zfs_ioc_*
497 	 * will get to handle nonexistent datasets.
498 	 */
499 	if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
500 		return (0);
501 
502 	error = dsl_pool_hold(name, FTAG, &dp);
503 	if (error != 0)
504 		return (error);
505 
506 	error = dsl_dataset_hold(dp, name, FTAG, &ds);
507 	if (error != 0) {
508 		dsl_pool_rele(dp, FTAG);
509 		return (error);
510 	}
511 
512 	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
513 
514 	dsl_dataset_rele(ds, FTAG);
515 	dsl_pool_rele(dp, FTAG);
516 	return (error);
517 }
518 
519 /*
520  * Policy for setting the security label property.
521  *
522  * Returns 0 for success, non-zero for access and other errors.
523  */
524 static int
525 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
526 {
527 	char		ds_hexsl[MAXNAMELEN];
528 	bslabel_t	ds_sl, new_sl;
529 	boolean_t	new_default = FALSE;
530 	uint64_t	zoned;
531 	int		needed_priv = -1;
532 	int		error;
533 
534 	/* First get the existing dataset label. */
535 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
536 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
537 	if (error != 0)
538 		return (SET_ERROR(EPERM));
539 
540 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
541 		new_default = TRUE;
542 
543 	/* The label must be translatable */
544 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
545 		return (SET_ERROR(EINVAL));
546 
547 	/*
548 	 * In a non-global zone, disallow attempts to set a label that
549 	 * doesn't match that of the zone; otherwise no other checks
550 	 * are needed.
551 	 */
552 	if (!INGLOBALZONE(curproc)) {
553 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
554 			return (SET_ERROR(EPERM));
555 		return (0);
556 	}
557 
558 	/*
559 	 * For global-zone datasets (i.e., those whose zoned property is
560 	 * "off", verify that the specified new label is valid for the
561 	 * global zone.
562 	 */
563 	if (dsl_prop_get_integer(name,
564 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
565 		return (SET_ERROR(EPERM));
566 	if (!zoned) {
567 		if (zfs_check_global_label(name, strval) != 0)
568 			return (SET_ERROR(EPERM));
569 	}
570 
571 	/*
572 	 * If the existing dataset label is nondefault, check if the
573 	 * dataset is mounted (label cannot be changed while mounted).
574 	 * Get the zfsvfs; if there isn't one, then the dataset isn't
575 	 * mounted (or isn't a dataset, doesn't exist, ...).
576 	 */
577 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
578 		objset_t *os;
579 		static char *setsl_tag = "setsl_tag";
580 
581 		/*
582 		 * Try to own the dataset; abort if there is any error,
583 		 * (e.g., already mounted, in use, or other error).
584 		 */
585 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
586 		    setsl_tag, &os);
587 		if (error != 0)
588 			return (SET_ERROR(EPERM));
589 
590 		dmu_objset_disown(os, setsl_tag);
591 
592 		if (new_default) {
593 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
594 			goto out_check;
595 		}
596 
597 		if (hexstr_to_label(strval, &new_sl) != 0)
598 			return (SET_ERROR(EPERM));
599 
600 		if (blstrictdom(&ds_sl, &new_sl))
601 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
602 		else if (blstrictdom(&new_sl, &ds_sl))
603 			needed_priv = PRIV_FILE_UPGRADE_SL;
604 	} else {
605 		/* dataset currently has a default label */
606 		if (!new_default)
607 			needed_priv = PRIV_FILE_UPGRADE_SL;
608 	}
609 
610 out_check:
611 	if (needed_priv != -1)
612 		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
613 	return (0);
614 }
615 
616 static int
617 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
618     cred_t *cr)
619 {
620 	char *strval;
621 
622 	/*
623 	 * Check permissions for special properties.
624 	 */
625 	switch (prop) {
626 	case ZFS_PROP_ZONED:
627 		/*
628 		 * Disallow setting of 'zoned' from within a local zone.
629 		 */
630 		if (!INGLOBALZONE(curproc))
631 			return (SET_ERROR(EPERM));
632 		break;
633 
634 	case ZFS_PROP_QUOTA:
635 	case ZFS_PROP_FILESYSTEM_LIMIT:
636 	case ZFS_PROP_SNAPSHOT_LIMIT:
637 		if (!INGLOBALZONE(curproc)) {
638 			uint64_t zoned;
639 			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
640 			/*
641 			 * Unprivileged users are allowed to modify the
642 			 * limit on things *under* (ie. contained by)
643 			 * the thing they own.
644 			 */
645 			if (dsl_prop_get_integer(dsname, "zoned", &zoned,
646 			    setpoint))
647 				return (SET_ERROR(EPERM));
648 			if (!zoned || strlen(dsname) <= strlen(setpoint))
649 				return (SET_ERROR(EPERM));
650 		}
651 		break;
652 
653 	case ZFS_PROP_MLSLABEL:
654 		if (!is_system_labeled())
655 			return (SET_ERROR(EPERM));
656 
657 		if (nvpair_value_string(propval, &strval) == 0) {
658 			int err;
659 
660 			err = zfs_set_slabel_policy(dsname, strval, CRED());
661 			if (err != 0)
662 				return (err);
663 		}
664 		break;
665 	}
666 
667 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
668 }
669 
670 /* ARGSUSED */
671 static int
672 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
673 {
674 	int error;
675 
676 	error = zfs_dozonecheck(zc->zc_name, cr);
677 	if (error != 0)
678 		return (error);
679 
680 	/*
681 	 * permission to set permissions will be evaluated later in
682 	 * dsl_deleg_can_allow()
683 	 */
684 	return (0);
685 }
686 
687 /* ARGSUSED */
688 static int
689 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
690 {
691 	return (zfs_secpolicy_write_perms(zc->zc_name,
692 	    ZFS_DELEG_PERM_ROLLBACK, cr));
693 }
694 
695 /* ARGSUSED */
696 static int
697 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
698 {
699 	dsl_pool_t *dp;
700 	dsl_dataset_t *ds;
701 	char *cp;
702 	int error;
703 
704 	/*
705 	 * Generate the current snapshot name from the given objsetid, then
706 	 * use that name for the secpolicy/zone checks.
707 	 */
708 	cp = strchr(zc->zc_name, '@');
709 	if (cp == NULL)
710 		return (SET_ERROR(EINVAL));
711 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
712 	if (error != 0)
713 		return (error);
714 
715 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
716 	if (error != 0) {
717 		dsl_pool_rele(dp, FTAG);
718 		return (error);
719 	}
720 
721 	dsl_dataset_name(ds, zc->zc_name);
722 
723 	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
724 	    ZFS_DELEG_PERM_SEND, cr);
725 	dsl_dataset_rele(ds, FTAG);
726 	dsl_pool_rele(dp, FTAG);
727 
728 	return (error);
729 }
730 
731 /* ARGSUSED */
732 static int
733 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
734 {
735 	return (zfs_secpolicy_write_perms(zc->zc_name,
736 	    ZFS_DELEG_PERM_SEND, cr));
737 }
738 
739 /* ARGSUSED */
740 static int
741 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
742 {
743 	vnode_t *vp;
744 	int error;
745 
746 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
747 	    NO_FOLLOW, NULL, &vp)) != 0)
748 		return (error);
749 
750 	/* Now make sure mntpnt and dataset are ZFS */
751 
752 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
753 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
754 	    zc->zc_name) != 0)) {
755 		VN_RELE(vp);
756 		return (SET_ERROR(EPERM));
757 	}
758 
759 	VN_RELE(vp);
760 	return (dsl_deleg_access(zc->zc_name,
761 	    ZFS_DELEG_PERM_SHARE, cr));
762 }
763 
764 int
765 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
766 {
767 	if (!INGLOBALZONE(curproc))
768 		return (SET_ERROR(EPERM));
769 
770 	if (secpolicy_nfs(cr) == 0) {
771 		return (0);
772 	} else {
773 		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
774 	}
775 }
776 
777 int
778 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
779 {
780 	if (!INGLOBALZONE(curproc))
781 		return (SET_ERROR(EPERM));
782 
783 	if (secpolicy_smb(cr) == 0) {
784 		return (0);
785 	} else {
786 		return (zfs_secpolicy_deleg_share(zc, innvl, cr));
787 	}
788 }
789 
790 static int
791 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
792 {
793 	char *cp;
794 
795 	/*
796 	 * Remove the @bla or /bla from the end of the name to get the parent.
797 	 */
798 	(void) strncpy(parent, datasetname, parentsize);
799 	cp = strrchr(parent, '@');
800 	if (cp != NULL) {
801 		cp[0] = '\0';
802 	} else {
803 		cp = strrchr(parent, '/');
804 		if (cp == NULL)
805 			return (SET_ERROR(ENOENT));
806 		cp[0] = '\0';
807 	}
808 
809 	return (0);
810 }
811 
812 int
813 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
814 {
815 	int error;
816 
817 	if ((error = zfs_secpolicy_write_perms(name,
818 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
819 		return (error);
820 
821 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
822 }
823 
824 /* ARGSUSED */
825 static int
826 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
827 {
828 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
829 }
830 
831 /*
832  * Destroying snapshots with delegated permissions requires
833  * descendant mount and destroy permissions.
834  */
835 /* ARGSUSED */
836 static int
837 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
838 {
839 	nvlist_t *snaps;
840 	nvpair_t *pair, *nextpair;
841 	int error = 0;
842 
843 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
844 		return (SET_ERROR(EINVAL));
845 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
846 	    pair = nextpair) {
847 		nextpair = nvlist_next_nvpair(snaps, pair);
848 		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
849 		if (error == ENOENT) {
850 			/*
851 			 * Ignore any snapshots that don't exist (we consider
852 			 * them "already destroyed").  Remove the name from the
853 			 * nvl here in case the snapshot is created between
854 			 * now and when we try to destroy it (in which case
855 			 * we don't want to destroy it since we haven't
856 			 * checked for permission).
857 			 */
858 			fnvlist_remove_nvpair(snaps, pair);
859 			error = 0;
860 		}
861 		if (error != 0)
862 			break;
863 	}
864 
865 	return (error);
866 }
867 
868 int
869 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
870 {
871 	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
872 	int	error;
873 
874 	if ((error = zfs_secpolicy_write_perms(from,
875 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
876 		return (error);
877 
878 	if ((error = zfs_secpolicy_write_perms(from,
879 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
880 		return (error);
881 
882 	if ((error = zfs_get_parent(to, parentname,
883 	    sizeof (parentname))) != 0)
884 		return (error);
885 
886 	if ((error = zfs_secpolicy_write_perms(parentname,
887 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
888 		return (error);
889 
890 	if ((error = zfs_secpolicy_write_perms(parentname,
891 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
892 		return (error);
893 
894 	return (error);
895 }
896 
897 /* ARGSUSED */
898 static int
899 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
900 {
901 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
902 }
903 
904 /* ARGSUSED */
905 static int
906 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
907 {
908 	dsl_pool_t *dp;
909 	dsl_dataset_t *clone;
910 	int error;
911 
912 	error = zfs_secpolicy_write_perms(zc->zc_name,
913 	    ZFS_DELEG_PERM_PROMOTE, cr);
914 	if (error != 0)
915 		return (error);
916 
917 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
918 	if (error != 0)
919 		return (error);
920 
921 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
922 
923 	if (error == 0) {
924 		char parentname[ZFS_MAX_DATASET_NAME_LEN];
925 		dsl_dataset_t *origin = NULL;
926 		dsl_dir_t *dd;
927 		dd = clone->ds_dir;
928 
929 		error = dsl_dataset_hold_obj(dd->dd_pool,
930 		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
931 		if (error != 0) {
932 			dsl_dataset_rele(clone, FTAG);
933 			dsl_pool_rele(dp, FTAG);
934 			return (error);
935 		}
936 
937 		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
938 		    ZFS_DELEG_PERM_MOUNT, cr);
939 
940 		dsl_dataset_name(origin, parentname);
941 		if (error == 0) {
942 			error = zfs_secpolicy_write_perms_ds(parentname, origin,
943 			    ZFS_DELEG_PERM_PROMOTE, cr);
944 		}
945 		dsl_dataset_rele(clone, FTAG);
946 		dsl_dataset_rele(origin, FTAG);
947 	}
948 	dsl_pool_rele(dp, FTAG);
949 	return (error);
950 }
951 
952 /* ARGSUSED */
953 static int
954 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
955 {
956 	int error;
957 
958 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
959 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
960 		return (error);
961 
962 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
963 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
964 		return (error);
965 
966 	return (zfs_secpolicy_write_perms(zc->zc_name,
967 	    ZFS_DELEG_PERM_CREATE, cr));
968 }
969 
970 int
971 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
972 {
973 	return (zfs_secpolicy_write_perms(name,
974 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
975 }
976 
977 /*
978  * Check for permission to create each snapshot in the nvlist.
979  */
980 /* ARGSUSED */
981 static int
982 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
983 {
984 	nvlist_t *snaps;
985 	int error = 0;
986 	nvpair_t *pair;
987 
988 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
989 		return (SET_ERROR(EINVAL));
990 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
991 	    pair = nvlist_next_nvpair(snaps, pair)) {
992 		char *name = nvpair_name(pair);
993 		char *atp = strchr(name, '@');
994 
995 		if (atp == NULL) {
996 			error = SET_ERROR(EINVAL);
997 			break;
998 		}
999 		*atp = '\0';
1000 		error = zfs_secpolicy_snapshot_perms(name, cr);
1001 		*atp = '@';
1002 		if (error != 0)
1003 			break;
1004 	}
1005 	return (error);
1006 }
1007 
1008 /*
1009  * Check for permission to create each snapshot in the nvlist.
1010  */
1011 /* ARGSUSED */
1012 static int
1013 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1014 {
1015 	int error = 0;
1016 
1017 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1018 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1019 		char *name = nvpair_name(pair);
1020 		char *hashp = strchr(name, '#');
1021 
1022 		if (hashp == NULL) {
1023 			error = SET_ERROR(EINVAL);
1024 			break;
1025 		}
1026 		*hashp = '\0';
1027 		error = zfs_secpolicy_write_perms(name,
1028 		    ZFS_DELEG_PERM_BOOKMARK, cr);
1029 		*hashp = '#';
1030 		if (error != 0)
1031 			break;
1032 	}
1033 	return (error);
1034 }
1035 
1036 /* ARGSUSED */
1037 static int
1038 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1039 {
1040 	nvpair_t *pair, *nextpair;
1041 	int error = 0;
1042 
1043 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1044 	    pair = nextpair) {
1045 		char *name = nvpair_name(pair);
1046 		char *hashp = strchr(name, '#');
1047 		nextpair = nvlist_next_nvpair(innvl, pair);
1048 
1049 		if (hashp == NULL) {
1050 			error = SET_ERROR(EINVAL);
1051 			break;
1052 		}
1053 
1054 		*hashp = '\0';
1055 		error = zfs_secpolicy_write_perms(name,
1056 		    ZFS_DELEG_PERM_DESTROY, cr);
1057 		*hashp = '#';
1058 		if (error == ENOENT) {
1059 			/*
1060 			 * Ignore any filesystems that don't exist (we consider
1061 			 * their bookmarks "already destroyed").  Remove
1062 			 * the name from the nvl here in case the filesystem
1063 			 * is created between now and when we try to destroy
1064 			 * the bookmark (in which case we don't want to
1065 			 * destroy it since we haven't checked for permission).
1066 			 */
1067 			fnvlist_remove_nvpair(innvl, pair);
1068 			error = 0;
1069 		}
1070 		if (error != 0)
1071 			break;
1072 	}
1073 
1074 	return (error);
1075 }
1076 
1077 /* ARGSUSED */
1078 static int
1079 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1080 {
1081 	/*
1082 	 * Even root must have a proper TSD so that we know what pool
1083 	 * to log to.
1084 	 */
1085 	if (tsd_get(zfs_allow_log_key) == NULL)
1086 		return (SET_ERROR(EPERM));
1087 	return (0);
1088 }
1089 
1090 static int
1091 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1092 {
1093 	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
1094 	int	error;
1095 	char	*origin;
1096 
1097 	if ((error = zfs_get_parent(zc->zc_name, parentname,
1098 	    sizeof (parentname))) != 0)
1099 		return (error);
1100 
1101 	if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1102 	    (error = zfs_secpolicy_write_perms(origin,
1103 	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1104 		return (error);
1105 
1106 	if ((error = zfs_secpolicy_write_perms(parentname,
1107 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1108 		return (error);
1109 
1110 	return (zfs_secpolicy_write_perms(parentname,
1111 	    ZFS_DELEG_PERM_MOUNT, cr));
1112 }
1113 
1114 /*
1115  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1116  * SYS_CONFIG privilege, which is not available in a local zone.
1117  */
1118 /* ARGSUSED */
1119 static int
1120 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1121 {
1122 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
1123 		return (SET_ERROR(EPERM));
1124 
1125 	return (0);
1126 }
1127 
1128 /*
1129  * Policy for object to name lookups.
1130  */
1131 /* ARGSUSED */
1132 static int
1133 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1134 {
1135 	int error;
1136 
1137 	if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1138 		return (0);
1139 
1140 	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1141 	return (error);
1142 }
1143 
1144 /*
1145  * Policy for fault injection.  Requires all privileges.
1146  */
1147 /* ARGSUSED */
1148 static int
1149 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1150 {
1151 	return (secpolicy_zinject(cr));
1152 }
1153 
1154 /* ARGSUSED */
1155 static int
1156 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1157 {
1158 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1159 
1160 	if (prop == ZPROP_INVAL) {
1161 		if (!zfs_prop_user(zc->zc_value))
1162 			return (SET_ERROR(EINVAL));
1163 		return (zfs_secpolicy_write_perms(zc->zc_name,
1164 		    ZFS_DELEG_PERM_USERPROP, cr));
1165 	} else {
1166 		return (zfs_secpolicy_setprop(zc->zc_name, prop,
1167 		    NULL, cr));
1168 	}
1169 }
1170 
1171 static int
1172 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1173 {
1174 	int err = zfs_secpolicy_read(zc, innvl, cr);
1175 	if (err)
1176 		return (err);
1177 
1178 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1179 		return (SET_ERROR(EINVAL));
1180 
1181 	if (zc->zc_value[0] == 0) {
1182 		/*
1183 		 * They are asking about a posix uid/gid.  If it's
1184 		 * themself, allow it.
1185 		 */
1186 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1187 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
1188 			if (zc->zc_guid == crgetuid(cr))
1189 				return (0);
1190 		} else {
1191 			if (groupmember(zc->zc_guid, cr))
1192 				return (0);
1193 		}
1194 	}
1195 
1196 	return (zfs_secpolicy_write_perms(zc->zc_name,
1197 	    userquota_perms[zc->zc_objset_type], cr));
1198 }
1199 
1200 static int
1201 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1202 {
1203 	int err = zfs_secpolicy_read(zc, innvl, cr);
1204 	if (err)
1205 		return (err);
1206 
1207 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1208 		return (SET_ERROR(EINVAL));
1209 
1210 	return (zfs_secpolicy_write_perms(zc->zc_name,
1211 	    userquota_perms[zc->zc_objset_type], cr));
1212 }
1213 
1214 /* ARGSUSED */
1215 static int
1216 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1217 {
1218 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1219 	    NULL, cr));
1220 }
1221 
1222 /* ARGSUSED */
1223 static int
1224 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1225 {
1226 	nvpair_t *pair;
1227 	nvlist_t *holds;
1228 	int error;
1229 
1230 	error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1231 	if (error != 0)
1232 		return (SET_ERROR(EINVAL));
1233 
1234 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1235 	    pair = nvlist_next_nvpair(holds, pair)) {
1236 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1237 		error = dmu_fsname(nvpair_name(pair), fsname);
1238 		if (error != 0)
1239 			return (error);
1240 		error = zfs_secpolicy_write_perms(fsname,
1241 		    ZFS_DELEG_PERM_HOLD, cr);
1242 		if (error != 0)
1243 			return (error);
1244 	}
1245 	return (0);
1246 }
1247 
1248 /* ARGSUSED */
1249 static int
1250 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1251 {
1252 	nvpair_t *pair;
1253 	int error;
1254 
1255 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1256 	    pair = nvlist_next_nvpair(innvl, pair)) {
1257 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1258 		error = dmu_fsname(nvpair_name(pair), fsname);
1259 		if (error != 0)
1260 			return (error);
1261 		error = zfs_secpolicy_write_perms(fsname,
1262 		    ZFS_DELEG_PERM_RELEASE, cr);
1263 		if (error != 0)
1264 			return (error);
1265 	}
1266 	return (0);
1267 }
1268 
1269 /*
1270  * Policy for allowing temporary snapshots to be taken or released
1271  */
1272 static int
1273 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1274 {
1275 	/*
1276 	 * A temporary snapshot is the same as a snapshot,
1277 	 * hold, destroy and release all rolled into one.
1278 	 * Delegated diff alone is sufficient that we allow this.
1279 	 */
1280 	int error;
1281 
1282 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1283 	    ZFS_DELEG_PERM_DIFF, cr)) == 0)
1284 		return (0);
1285 
1286 	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1287 	if (error == 0)
1288 		error = zfs_secpolicy_hold(zc, innvl, cr);
1289 	if (error == 0)
1290 		error = zfs_secpolicy_release(zc, innvl, cr);
1291 	if (error == 0)
1292 		error = zfs_secpolicy_destroy(zc, innvl, cr);
1293 	return (error);
1294 }
1295 
1296 /*
1297  * Returns the nvlist as specified by the user in the zfs_cmd_t.
1298  */
1299 static int
1300 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1301 {
1302 	char *packed;
1303 	int error;
1304 	nvlist_t *list = NULL;
1305 
1306 	/*
1307 	 * Read in and unpack the user-supplied nvlist.
1308 	 */
1309 	if (size == 0)
1310 		return (SET_ERROR(EINVAL));
1311 
1312 	packed = kmem_alloc(size, KM_SLEEP);
1313 
1314 	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1315 	    iflag)) != 0) {
1316 		kmem_free(packed, size);
1317 		return (SET_ERROR(EFAULT));
1318 	}
1319 
1320 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1321 		kmem_free(packed, size);
1322 		return (error);
1323 	}
1324 
1325 	kmem_free(packed, size);
1326 
1327 	*nvp = list;
1328 	return (0);
1329 }
1330 
1331 /*
1332  * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1333  * Entries will be removed from the end of the nvlist, and one int32 entry
1334  * named "N_MORE_ERRORS" will be added indicating how many entries were
1335  * removed.
1336  */
1337 static int
1338 nvlist_smush(nvlist_t *errors, size_t max)
1339 {
1340 	size_t size;
1341 
1342 	size = fnvlist_size(errors);
1343 
1344 	if (size > max) {
1345 		nvpair_t *more_errors;
1346 		int n = 0;
1347 
1348 		if (max < 1024)
1349 			return (SET_ERROR(ENOMEM));
1350 
1351 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1352 		more_errors = nvlist_prev_nvpair(errors, NULL);
1353 
1354 		do {
1355 			nvpair_t *pair = nvlist_prev_nvpair(errors,
1356 			    more_errors);
1357 			fnvlist_remove_nvpair(errors, pair);
1358 			n++;
1359 			size = fnvlist_size(errors);
1360 		} while (size > max);
1361 
1362 		fnvlist_remove_nvpair(errors, more_errors);
1363 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1364 		ASSERT3U(fnvlist_size(errors), <=, max);
1365 	}
1366 
1367 	return (0);
1368 }
1369 
1370 static int
1371 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1372 {
1373 	char *packed = NULL;
1374 	int error = 0;
1375 	size_t size;
1376 
1377 	size = fnvlist_size(nvl);
1378 
1379 	if (size > zc->zc_nvlist_dst_size) {
1380 		error = SET_ERROR(ENOMEM);
1381 	} else {
1382 		packed = fnvlist_pack(nvl, &size);
1383 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1384 		    size, zc->zc_iflags) != 0)
1385 			error = SET_ERROR(EFAULT);
1386 		fnvlist_pack_free(packed, size);
1387 	}
1388 
1389 	zc->zc_nvlist_dst_size = size;
1390 	zc->zc_nvlist_dst_filled = B_TRUE;
1391 	return (error);
1392 }
1393 
1394 static int
1395 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1396 {
1397 	objset_t *os;
1398 	int error;
1399 
1400 	error = dmu_objset_hold(dsname, FTAG, &os);
1401 	if (error != 0)
1402 		return (error);
1403 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1404 		dmu_objset_rele(os, FTAG);
1405 		return (SET_ERROR(EINVAL));
1406 	}
1407 
1408 	mutex_enter(&os->os_user_ptr_lock);
1409 	*zfvp = dmu_objset_get_user(os);
1410 	if (*zfvp) {
1411 		VFS_HOLD((*zfvp)->z_vfs);
1412 	} else {
1413 		error = SET_ERROR(ESRCH);
1414 	}
1415 	mutex_exit(&os->os_user_ptr_lock);
1416 	dmu_objset_rele(os, FTAG);
1417 	return (error);
1418 }
1419 
1420 /*
1421  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1422  * case its z_vfs will be NULL, and it will be opened as the owner.
1423  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1424  * which prevents all vnode ops from running.
1425  */
1426 static int
1427 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1428 {
1429 	int error = 0;
1430 
1431 	if (getzfsvfs(name, zfvp) != 0)
1432 		error = zfsvfs_create(name, zfvp);
1433 	if (error == 0) {
1434 		rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1435 		    RW_READER, tag);
1436 		if ((*zfvp)->z_unmounted) {
1437 			/*
1438 			 * XXX we could probably try again, since the unmounting
1439 			 * thread should be just about to disassociate the
1440 			 * objset from the zfsvfs.
1441 			 */
1442 			rrm_exit(&(*zfvp)->z_teardown_lock, tag);
1443 			return (SET_ERROR(EBUSY));
1444 		}
1445 	}
1446 	return (error);
1447 }
1448 
1449 static void
1450 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1451 {
1452 	rrm_exit(&zfsvfs->z_teardown_lock, tag);
1453 
1454 	if (zfsvfs->z_vfs) {
1455 		VFS_RELE(zfsvfs->z_vfs);
1456 	} else {
1457 		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1458 		zfsvfs_free(zfsvfs);
1459 	}
1460 }
1461 
1462 static int
1463 zfs_ioc_pool_create(zfs_cmd_t *zc)
1464 {
1465 	int error;
1466 	nvlist_t *config, *props = NULL;
1467 	nvlist_t *rootprops = NULL;
1468 	nvlist_t *zplprops = NULL;
1469 
1470 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1471 	    zc->zc_iflags, &config))
1472 		return (error);
1473 
1474 	if (zc->zc_nvlist_src_size != 0 && (error =
1475 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1476 	    zc->zc_iflags, &props))) {
1477 		nvlist_free(config);
1478 		return (error);
1479 	}
1480 
1481 	if (props) {
1482 		nvlist_t *nvl = NULL;
1483 		uint64_t version = SPA_VERSION;
1484 
1485 		(void) nvlist_lookup_uint64(props,
1486 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1487 		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1488 			error = SET_ERROR(EINVAL);
1489 			goto pool_props_bad;
1490 		}
1491 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1492 		if (nvl) {
1493 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1494 			if (error != 0) {
1495 				nvlist_free(config);
1496 				nvlist_free(props);
1497 				return (error);
1498 			}
1499 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1500 		}
1501 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1502 		error = zfs_fill_zplprops_root(version, rootprops,
1503 		    zplprops, NULL);
1504 		if (error != 0)
1505 			goto pool_props_bad;
1506 	}
1507 
1508 	error = spa_create(zc->zc_name, config, props, zplprops);
1509 
1510 	/*
1511 	 * Set the remaining root properties
1512 	 */
1513 	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1514 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1515 		(void) spa_destroy(zc->zc_name);
1516 
1517 pool_props_bad:
1518 	nvlist_free(rootprops);
1519 	nvlist_free(zplprops);
1520 	nvlist_free(config);
1521 	nvlist_free(props);
1522 
1523 	return (error);
1524 }
1525 
1526 static int
1527 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1528 {
1529 	int error;
1530 	zfs_log_history(zc);
1531 	error = spa_destroy(zc->zc_name);
1532 	if (error == 0)
1533 		zvol_remove_minors(zc->zc_name);
1534 	return (error);
1535 }
1536 
1537 static int
1538 zfs_ioc_pool_import(zfs_cmd_t *zc)
1539 {
1540 	nvlist_t *config, *props = NULL;
1541 	uint64_t guid;
1542 	int error;
1543 
1544 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1545 	    zc->zc_iflags, &config)) != 0)
1546 		return (error);
1547 
1548 	if (zc->zc_nvlist_src_size != 0 && (error =
1549 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1550 	    zc->zc_iflags, &props))) {
1551 		nvlist_free(config);
1552 		return (error);
1553 	}
1554 
1555 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1556 	    guid != zc->zc_guid)
1557 		error = SET_ERROR(EINVAL);
1558 	else
1559 		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1560 
1561 	if (zc->zc_nvlist_dst != 0) {
1562 		int err;
1563 
1564 		if ((err = put_nvlist(zc, config)) != 0)
1565 			error = err;
1566 	}
1567 
1568 	nvlist_free(config);
1569 
1570 	nvlist_free(props);
1571 
1572 	return (error);
1573 }
1574 
1575 static int
1576 zfs_ioc_pool_export(zfs_cmd_t *zc)
1577 {
1578 	int error;
1579 	boolean_t force = (boolean_t)zc->zc_cookie;
1580 	boolean_t hardforce = (boolean_t)zc->zc_guid;
1581 
1582 	zfs_log_history(zc);
1583 	error = spa_export(zc->zc_name, NULL, force, hardforce);
1584 	if (error == 0)
1585 		zvol_remove_minors(zc->zc_name);
1586 	return (error);
1587 }
1588 
1589 static int
1590 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1591 {
1592 	nvlist_t *configs;
1593 	int error;
1594 
1595 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1596 		return (SET_ERROR(EEXIST));
1597 
1598 	error = put_nvlist(zc, configs);
1599 
1600 	nvlist_free(configs);
1601 
1602 	return (error);
1603 }
1604 
1605 /*
1606  * inputs:
1607  * zc_name		name of the pool
1608  *
1609  * outputs:
1610  * zc_cookie		real errno
1611  * zc_nvlist_dst	config nvlist
1612  * zc_nvlist_dst_size	size of config nvlist
1613  */
1614 static int
1615 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1616 {
1617 	nvlist_t *config;
1618 	int error;
1619 	int ret = 0;
1620 
1621 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1622 	    sizeof (zc->zc_value));
1623 
1624 	if (config != NULL) {
1625 		ret = put_nvlist(zc, config);
1626 		nvlist_free(config);
1627 
1628 		/*
1629 		 * The config may be present even if 'error' is non-zero.
1630 		 * In this case we return success, and preserve the real errno
1631 		 * in 'zc_cookie'.
1632 		 */
1633 		zc->zc_cookie = error;
1634 	} else {
1635 		ret = error;
1636 	}
1637 
1638 	return (ret);
1639 }
1640 
1641 /*
1642  * Try to import the given pool, returning pool stats as appropriate so that
1643  * user land knows which devices are available and overall pool health.
1644  */
1645 static int
1646 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1647 {
1648 	nvlist_t *tryconfig, *config;
1649 	int error;
1650 
1651 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1652 	    zc->zc_iflags, &tryconfig)) != 0)
1653 		return (error);
1654 
1655 	config = spa_tryimport(tryconfig);
1656 
1657 	nvlist_free(tryconfig);
1658 
1659 	if (config == NULL)
1660 		return (SET_ERROR(EINVAL));
1661 
1662 	error = put_nvlist(zc, config);
1663 	nvlist_free(config);
1664 
1665 	return (error);
1666 }
1667 
1668 /*
1669  * inputs:
1670  * zc_name              name of the pool
1671  * zc_cookie            scan func (pool_scan_func_t)
1672  */
1673 static int
1674 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1675 {
1676 	spa_t *spa;
1677 	int error;
1678 
1679 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1680 		return (error);
1681 
1682 	if (zc->zc_cookie == POOL_SCAN_NONE)
1683 		error = spa_scan_stop(spa);
1684 	else
1685 		error = spa_scan(spa, zc->zc_cookie);
1686 
1687 	spa_close(spa, FTAG);
1688 
1689 	return (error);
1690 }
1691 
1692 static int
1693 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1694 {
1695 	spa_t *spa;
1696 	int error;
1697 
1698 	error = spa_open(zc->zc_name, &spa, FTAG);
1699 	if (error == 0) {
1700 		spa_freeze(spa);
1701 		spa_close(spa, FTAG);
1702 	}
1703 	return (error);
1704 }
1705 
1706 static int
1707 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1708 {
1709 	spa_t *spa;
1710 	int error;
1711 
1712 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1713 		return (error);
1714 
1715 	if (zc->zc_cookie < spa_version(spa) ||
1716 	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1717 		spa_close(spa, FTAG);
1718 		return (SET_ERROR(EINVAL));
1719 	}
1720 
1721 	spa_upgrade(spa, zc->zc_cookie);
1722 	spa_close(spa, FTAG);
1723 
1724 	return (error);
1725 }
1726 
1727 static int
1728 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1729 {
1730 	spa_t *spa;
1731 	char *hist_buf;
1732 	uint64_t size;
1733 	int error;
1734 
1735 	if ((size = zc->zc_history_len) == 0)
1736 		return (SET_ERROR(EINVAL));
1737 
1738 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1739 		return (error);
1740 
1741 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1742 		spa_close(spa, FTAG);
1743 		return (SET_ERROR(ENOTSUP));
1744 	}
1745 
1746 	hist_buf = kmem_alloc(size, KM_SLEEP);
1747 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1748 	    &zc->zc_history_len, hist_buf)) == 0) {
1749 		error = ddi_copyout(hist_buf,
1750 		    (void *)(uintptr_t)zc->zc_history,
1751 		    zc->zc_history_len, zc->zc_iflags);
1752 	}
1753 
1754 	spa_close(spa, FTAG);
1755 	kmem_free(hist_buf, size);
1756 	return (error);
1757 }
1758 
1759 static int
1760 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1761 {
1762 	spa_t *spa;
1763 	int error;
1764 
1765 	error = spa_open(zc->zc_name, &spa, FTAG);
1766 	if (error == 0) {
1767 		error = spa_change_guid(spa);
1768 		spa_close(spa, FTAG);
1769 	}
1770 	return (error);
1771 }
1772 
1773 static int
1774 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1775 {
1776 	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1777 }
1778 
1779 /*
1780  * inputs:
1781  * zc_name		name of filesystem
1782  * zc_obj		object to find
1783  *
1784  * outputs:
1785  * zc_value		name of object
1786  */
1787 static int
1788 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1789 {
1790 	objset_t *os;
1791 	int error;
1792 
1793 	/* XXX reading from objset not owned */
1794 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1795 		return (error);
1796 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1797 		dmu_objset_rele(os, FTAG);
1798 		return (SET_ERROR(EINVAL));
1799 	}
1800 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1801 	    sizeof (zc->zc_value));
1802 	dmu_objset_rele(os, FTAG);
1803 
1804 	return (error);
1805 }
1806 
1807 /*
1808  * inputs:
1809  * zc_name		name of filesystem
1810  * zc_obj		object to find
1811  *
1812  * outputs:
1813  * zc_stat		stats on object
1814  * zc_value		path to object
1815  */
1816 static int
1817 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1818 {
1819 	objset_t *os;
1820 	int error;
1821 
1822 	/* XXX reading from objset not owned */
1823 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1824 		return (error);
1825 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1826 		dmu_objset_rele(os, FTAG);
1827 		return (SET_ERROR(EINVAL));
1828 	}
1829 	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1830 	    sizeof (zc->zc_value));
1831 	dmu_objset_rele(os, FTAG);
1832 
1833 	return (error);
1834 }
1835 
1836 static int
1837 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1838 {
1839 	spa_t *spa;
1840 	int error;
1841 	nvlist_t *config, **l2cache, **spares;
1842 	uint_t nl2cache = 0, nspares = 0;
1843 
1844 	error = spa_open(zc->zc_name, &spa, FTAG);
1845 	if (error != 0)
1846 		return (error);
1847 
1848 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1849 	    zc->zc_iflags, &config);
1850 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1851 	    &l2cache, &nl2cache);
1852 
1853 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1854 	    &spares, &nspares);
1855 
1856 	/*
1857 	 * A root pool with concatenated devices is not supported.
1858 	 * Thus, can not add a device to a root pool.
1859 	 *
1860 	 * Intent log device can not be added to a rootpool because
1861 	 * during mountroot, zil is replayed, a seperated log device
1862 	 * can not be accessed during the mountroot time.
1863 	 *
1864 	 * l2cache and spare devices are ok to be added to a rootpool.
1865 	 */
1866 	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1867 		nvlist_free(config);
1868 		spa_close(spa, FTAG);
1869 		return (SET_ERROR(EDOM));
1870 	}
1871 
1872 	if (error == 0) {
1873 		error = spa_vdev_add(spa, config);
1874 		nvlist_free(config);
1875 	}
1876 	spa_close(spa, FTAG);
1877 	return (error);
1878 }
1879 
1880 /*
1881  * inputs:
1882  * zc_name		name of the pool
1883  * zc_nvlist_conf	nvlist of devices to remove
1884  * zc_cookie		to stop the remove?
1885  */
1886 static int
1887 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1888 {
1889 	spa_t *spa;
1890 	int error;
1891 
1892 	error = spa_open(zc->zc_name, &spa, FTAG);
1893 	if (error != 0)
1894 		return (error);
1895 	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1896 	spa_close(spa, FTAG);
1897 	return (error);
1898 }
1899 
1900 static int
1901 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1902 {
1903 	spa_t *spa;
1904 	int error;
1905 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1906 
1907 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1908 		return (error);
1909 	switch (zc->zc_cookie) {
1910 	case VDEV_STATE_ONLINE:
1911 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1912 		break;
1913 
1914 	case VDEV_STATE_OFFLINE:
1915 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1916 		break;
1917 
1918 	case VDEV_STATE_FAULTED:
1919 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1920 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1921 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1922 
1923 		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1924 		break;
1925 
1926 	case VDEV_STATE_DEGRADED:
1927 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1928 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1929 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1930 
1931 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1932 		break;
1933 
1934 	default:
1935 		error = SET_ERROR(EINVAL);
1936 	}
1937 	zc->zc_cookie = newstate;
1938 	spa_close(spa, FTAG);
1939 	return (error);
1940 }
1941 
1942 static int
1943 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1944 {
1945 	spa_t *spa;
1946 	int replacing = zc->zc_cookie;
1947 	nvlist_t *config;
1948 	int error;
1949 
1950 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1951 		return (error);
1952 
1953 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1954 	    zc->zc_iflags, &config)) == 0) {
1955 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1956 		nvlist_free(config);
1957 	}
1958 
1959 	spa_close(spa, FTAG);
1960 	return (error);
1961 }
1962 
1963 static int
1964 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1965 {
1966 	spa_t *spa;
1967 	int error;
1968 
1969 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1970 		return (error);
1971 
1972 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1973 
1974 	spa_close(spa, FTAG);
1975 	return (error);
1976 }
1977 
1978 static int
1979 zfs_ioc_vdev_split(zfs_cmd_t *zc)
1980 {
1981 	spa_t *spa;
1982 	nvlist_t *config, *props = NULL;
1983 	int error;
1984 	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1985 
1986 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1987 		return (error);
1988 
1989 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1990 	    zc->zc_iflags, &config)) {
1991 		spa_close(spa, FTAG);
1992 		return (error);
1993 	}
1994 
1995 	if (zc->zc_nvlist_src_size != 0 && (error =
1996 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1997 	    zc->zc_iflags, &props))) {
1998 		spa_close(spa, FTAG);
1999 		nvlist_free(config);
2000 		return (error);
2001 	}
2002 
2003 	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2004 
2005 	spa_close(spa, FTAG);
2006 
2007 	nvlist_free(config);
2008 	nvlist_free(props);
2009 
2010 	return (error);
2011 }
2012 
2013 static int
2014 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2015 {
2016 	spa_t *spa;
2017 	char *path = zc->zc_value;
2018 	uint64_t guid = zc->zc_guid;
2019 	int error;
2020 
2021 	error = spa_open(zc->zc_name, &spa, FTAG);
2022 	if (error != 0)
2023 		return (error);
2024 
2025 	error = spa_vdev_setpath(spa, guid, path);
2026 	spa_close(spa, FTAG);
2027 	return (error);
2028 }
2029 
2030 static int
2031 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2032 {
2033 	spa_t *spa;
2034 	char *fru = zc->zc_value;
2035 	uint64_t guid = zc->zc_guid;
2036 	int error;
2037 
2038 	error = spa_open(zc->zc_name, &spa, FTAG);
2039 	if (error != 0)
2040 		return (error);
2041 
2042 	error = spa_vdev_setfru(spa, guid, fru);
2043 	spa_close(spa, FTAG);
2044 	return (error);
2045 }
2046 
2047 static int
2048 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2049 {
2050 	int error = 0;
2051 	nvlist_t *nv;
2052 
2053 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2054 
2055 	if (zc->zc_nvlist_dst != 0 &&
2056 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
2057 		dmu_objset_stats(os, nv);
2058 		/*
2059 		 * NB: zvol_get_stats() will read the objset contents,
2060 		 * which we aren't supposed to do with a
2061 		 * DS_MODE_USER hold, because it could be
2062 		 * inconsistent.  So this is a bit of a workaround...
2063 		 * XXX reading with out owning
2064 		 */
2065 		if (!zc->zc_objset_stats.dds_inconsistent &&
2066 		    dmu_objset_type(os) == DMU_OST_ZVOL) {
2067 			error = zvol_get_stats(os, nv);
2068 			if (error == EIO)
2069 				return (error);
2070 			VERIFY0(error);
2071 		}
2072 		error = put_nvlist(zc, nv);
2073 		nvlist_free(nv);
2074 	}
2075 
2076 	return (error);
2077 }
2078 
2079 /*
2080  * inputs:
2081  * zc_name		name of filesystem
2082  * zc_nvlist_dst_size	size of buffer for property nvlist
2083  *
2084  * outputs:
2085  * zc_objset_stats	stats
2086  * zc_nvlist_dst	property nvlist
2087  * zc_nvlist_dst_size	size of property nvlist
2088  */
2089 static int
2090 zfs_ioc_objset_stats(zfs_cmd_t *zc)
2091 {
2092 	objset_t *os;
2093 	int error;
2094 
2095 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2096 	if (error == 0) {
2097 		error = zfs_ioc_objset_stats_impl(zc, os);
2098 		dmu_objset_rele(os, FTAG);
2099 	}
2100 
2101 	return (error);
2102 }
2103 
2104 /*
2105  * inputs:
2106  * zc_name		name of filesystem
2107  * zc_nvlist_dst_size	size of buffer for property nvlist
2108  *
2109  * outputs:
2110  * zc_nvlist_dst	received property nvlist
2111  * zc_nvlist_dst_size	size of received property nvlist
2112  *
2113  * Gets received properties (distinct from local properties on or after
2114  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2115  * local property values.
2116  */
2117 static int
2118 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2119 {
2120 	int error = 0;
2121 	nvlist_t *nv;
2122 
2123 	/*
2124 	 * Without this check, we would return local property values if the
2125 	 * caller has not already received properties on or after
2126 	 * SPA_VERSION_RECVD_PROPS.
2127 	 */
2128 	if (!dsl_prop_get_hasrecvd(zc->zc_name))
2129 		return (SET_ERROR(ENOTSUP));
2130 
2131 	if (zc->zc_nvlist_dst != 0 &&
2132 	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2133 		error = put_nvlist(zc, nv);
2134 		nvlist_free(nv);
2135 	}
2136 
2137 	return (error);
2138 }
2139 
2140 static int
2141 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2142 {
2143 	uint64_t value;
2144 	int error;
2145 
2146 	/*
2147 	 * zfs_get_zplprop() will either find a value or give us
2148 	 * the default value (if there is one).
2149 	 */
2150 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2151 		return (error);
2152 	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2153 	return (0);
2154 }
2155 
2156 /*
2157  * inputs:
2158  * zc_name		name of filesystem
2159  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
2160  *
2161  * outputs:
2162  * zc_nvlist_dst	zpl property nvlist
2163  * zc_nvlist_dst_size	size of zpl property nvlist
2164  */
2165 static int
2166 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2167 {
2168 	objset_t *os;
2169 	int err;
2170 
2171 	/* XXX reading without owning */
2172 	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2173 		return (err);
2174 
2175 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2176 
2177 	/*
2178 	 * NB: nvl_add_zplprop() will read the objset contents,
2179 	 * which we aren't supposed to do with a DS_MODE_USER
2180 	 * hold, because it could be inconsistent.
2181 	 */
2182 	if (zc->zc_nvlist_dst != NULL &&
2183 	    !zc->zc_objset_stats.dds_inconsistent &&
2184 	    dmu_objset_type(os) == DMU_OST_ZFS) {
2185 		nvlist_t *nv;
2186 
2187 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2188 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2189 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2190 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2191 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2192 			err = put_nvlist(zc, nv);
2193 		nvlist_free(nv);
2194 	} else {
2195 		err = SET_ERROR(ENOENT);
2196 	}
2197 	dmu_objset_rele(os, FTAG);
2198 	return (err);
2199 }
2200 
2201 static boolean_t
2202 dataset_name_hidden(const char *name)
2203 {
2204 	/*
2205 	 * Skip over datasets that are not visible in this zone,
2206 	 * internal datasets (which have a $ in their name), and
2207 	 * temporary datasets (which have a % in their name).
2208 	 */
2209 	if (strchr(name, '$') != NULL)
2210 		return (B_TRUE);
2211 	if (strchr(name, '%') != NULL)
2212 		return (B_TRUE);
2213 	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
2214 		return (B_TRUE);
2215 	return (B_FALSE);
2216 }
2217 
2218 /*
2219  * inputs:
2220  * zc_name		name of filesystem
2221  * zc_cookie		zap cursor
2222  * zc_nvlist_dst_size	size of buffer for property nvlist
2223  *
2224  * outputs:
2225  * zc_name		name of next filesystem
2226  * zc_cookie		zap cursor
2227  * zc_objset_stats	stats
2228  * zc_nvlist_dst	property nvlist
2229  * zc_nvlist_dst_size	size of property nvlist
2230  */
2231 static int
2232 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2233 {
2234 	objset_t *os;
2235 	int error;
2236 	char *p;
2237 	size_t orig_len = strlen(zc->zc_name);
2238 
2239 top:
2240 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2241 		if (error == ENOENT)
2242 			error = SET_ERROR(ESRCH);
2243 		return (error);
2244 	}
2245 
2246 	p = strrchr(zc->zc_name, '/');
2247 	if (p == NULL || p[1] != '\0')
2248 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2249 	p = zc->zc_name + strlen(zc->zc_name);
2250 
2251 	do {
2252 		error = dmu_dir_list_next(os,
2253 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2254 		    NULL, &zc->zc_cookie);
2255 		if (error == ENOENT)
2256 			error = SET_ERROR(ESRCH);
2257 	} while (error == 0 && dataset_name_hidden(zc->zc_name));
2258 	dmu_objset_rele(os, FTAG);
2259 
2260 	/*
2261 	 * If it's an internal dataset (ie. with a '$' in its name),
2262 	 * don't try to get stats for it, otherwise we'll return ENOENT.
2263 	 */
2264 	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2265 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2266 		if (error == ENOENT) {
2267 			/* We lost a race with destroy, get the next one. */
2268 			zc->zc_name[orig_len] = '\0';
2269 			goto top;
2270 		}
2271 	}
2272 	return (error);
2273 }
2274 
2275 /*
2276  * inputs:
2277  * zc_name		name of filesystem
2278  * zc_cookie		zap cursor
2279  * zc_nvlist_dst_size	size of buffer for property nvlist
2280  * zc_simple		when set, only name is requested
2281  *
2282  * outputs:
2283  * zc_name		name of next snapshot
2284  * zc_objset_stats	stats
2285  * zc_nvlist_dst	property nvlist
2286  * zc_nvlist_dst_size	size of property nvlist
2287  */
2288 static int
2289 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2290 {
2291 	objset_t *os;
2292 	int error;
2293 
2294 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2295 	if (error != 0) {
2296 		return (error == ENOENT ? ESRCH : error);
2297 	}
2298 
2299 	/*
2300 	 * A dataset name of maximum length cannot have any snapshots,
2301 	 * so exit immediately.
2302 	 */
2303 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2304 	    ZFS_MAX_DATASET_NAME_LEN) {
2305 		dmu_objset_rele(os, FTAG);
2306 		return (SET_ERROR(ESRCH));
2307 	}
2308 
2309 	error = dmu_snapshot_list_next(os,
2310 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
2311 	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2312 	    NULL);
2313 
2314 	if (error == 0 && !zc->zc_simple) {
2315 		dsl_dataset_t *ds;
2316 		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2317 
2318 		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2319 		if (error == 0) {
2320 			objset_t *ossnap;
2321 
2322 			error = dmu_objset_from_ds(ds, &ossnap);
2323 			if (error == 0)
2324 				error = zfs_ioc_objset_stats_impl(zc, ossnap);
2325 			dsl_dataset_rele(ds, FTAG);
2326 		}
2327 	} else if (error == ENOENT) {
2328 		error = SET_ERROR(ESRCH);
2329 	}
2330 
2331 	dmu_objset_rele(os, FTAG);
2332 	/* if we failed, undo the @ that we tacked on to zc_name */
2333 	if (error != 0)
2334 		*strchr(zc->zc_name, '@') = '\0';
2335 	return (error);
2336 }
2337 
2338 static int
2339 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2340 {
2341 	const char *propname = nvpair_name(pair);
2342 	uint64_t *valary;
2343 	unsigned int vallen;
2344 	const char *domain;
2345 	char *dash;
2346 	zfs_userquota_prop_t type;
2347 	uint64_t rid;
2348 	uint64_t quota;
2349 	zfsvfs_t *zfsvfs;
2350 	int err;
2351 
2352 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2353 		nvlist_t *attrs;
2354 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2355 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2356 		    &pair) != 0)
2357 			return (SET_ERROR(EINVAL));
2358 	}
2359 
2360 	/*
2361 	 * A correctly constructed propname is encoded as
2362 	 * userquota@<rid>-<domain>.
2363 	 */
2364 	if ((dash = strchr(propname, '-')) == NULL ||
2365 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2366 	    vallen != 3)
2367 		return (SET_ERROR(EINVAL));
2368 
2369 	domain = dash + 1;
2370 	type = valary[0];
2371 	rid = valary[1];
2372 	quota = valary[2];
2373 
2374 	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2375 	if (err == 0) {
2376 		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2377 		zfsvfs_rele(zfsvfs, FTAG);
2378 	}
2379 
2380 	return (err);
2381 }
2382 
2383 /*
2384  * If the named property is one that has a special function to set its value,
2385  * return 0 on success and a positive error code on failure; otherwise if it is
2386  * not one of the special properties handled by this function, return -1.
2387  *
2388  * XXX: It would be better for callers of the property interface if we handled
2389  * these special cases in dsl_prop.c (in the dsl layer).
2390  */
2391 static int
2392 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2393     nvpair_t *pair)
2394 {
2395 	const char *propname = nvpair_name(pair);
2396 	zfs_prop_t prop = zfs_name_to_prop(propname);
2397 	uint64_t intval;
2398 	int err = -1;
2399 
2400 	if (prop == ZPROP_INVAL) {
2401 		if (zfs_prop_userquota(propname))
2402 			return (zfs_prop_set_userquota(dsname, pair));
2403 		return (-1);
2404 	}
2405 
2406 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2407 		nvlist_t *attrs;
2408 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2409 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2410 		    &pair) == 0);
2411 	}
2412 
2413 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2414 		return (-1);
2415 
2416 	VERIFY(0 == nvpair_value_uint64(pair, &intval));
2417 
2418 	switch (prop) {
2419 	case ZFS_PROP_QUOTA:
2420 		err = dsl_dir_set_quota(dsname, source, intval);
2421 		break;
2422 	case ZFS_PROP_REFQUOTA:
2423 		err = dsl_dataset_set_refquota(dsname, source, intval);
2424 		break;
2425 	case ZFS_PROP_FILESYSTEM_LIMIT:
2426 	case ZFS_PROP_SNAPSHOT_LIMIT:
2427 		if (intval == UINT64_MAX) {
2428 			/* clearing the limit, just do it */
2429 			err = 0;
2430 		} else {
2431 			err = dsl_dir_activate_fs_ss_limit(dsname);
2432 		}
2433 		/*
2434 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2435 		 * default path to set the value in the nvlist.
2436 		 */
2437 		if (err == 0)
2438 			err = -1;
2439 		break;
2440 	case ZFS_PROP_RESERVATION:
2441 		err = dsl_dir_set_reservation(dsname, source, intval);
2442 		break;
2443 	case ZFS_PROP_REFRESERVATION:
2444 		err = dsl_dataset_set_refreservation(dsname, source, intval);
2445 		break;
2446 	case ZFS_PROP_VOLSIZE:
2447 		err = zvol_set_volsize(dsname, intval);
2448 		break;
2449 	case ZFS_PROP_VERSION:
2450 	{
2451 		zfsvfs_t *zfsvfs;
2452 
2453 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2454 			break;
2455 
2456 		err = zfs_set_version(zfsvfs, intval);
2457 		zfsvfs_rele(zfsvfs, FTAG);
2458 
2459 		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2460 			zfs_cmd_t *zc;
2461 
2462 			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2463 			(void) strcpy(zc->zc_name, dsname);
2464 			(void) zfs_ioc_userspace_upgrade(zc);
2465 			kmem_free(zc, sizeof (zfs_cmd_t));
2466 		}
2467 		break;
2468 	}
2469 	default:
2470 		err = -1;
2471 	}
2472 
2473 	return (err);
2474 }
2475 
2476 /*
2477  * This function is best effort. If it fails to set any of the given properties,
2478  * it continues to set as many as it can and returns the last error
2479  * encountered. If the caller provides a non-NULL errlist, it will be filled in
2480  * with the list of names of all the properties that failed along with the
2481  * corresponding error numbers.
2482  *
2483  * If every property is set successfully, zero is returned and errlist is not
2484  * modified.
2485  */
2486 int
2487 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2488     nvlist_t *errlist)
2489 {
2490 	nvpair_t *pair;
2491 	nvpair_t *propval;
2492 	int rv = 0;
2493 	uint64_t intval;
2494 	char *strval;
2495 	nvlist_t *genericnvl = fnvlist_alloc();
2496 	nvlist_t *retrynvl = fnvlist_alloc();
2497 
2498 retry:
2499 	pair = NULL;
2500 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2501 		const char *propname = nvpair_name(pair);
2502 		zfs_prop_t prop = zfs_name_to_prop(propname);
2503 		int err = 0;
2504 
2505 		/* decode the property value */
2506 		propval = pair;
2507 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2508 			nvlist_t *attrs;
2509 			attrs = fnvpair_value_nvlist(pair);
2510 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2511 			    &propval) != 0)
2512 				err = SET_ERROR(EINVAL);
2513 		}
2514 
2515 		/* Validate value type */
2516 		if (err == 0 && prop == ZPROP_INVAL) {
2517 			if (zfs_prop_user(propname)) {
2518 				if (nvpair_type(propval) != DATA_TYPE_STRING)
2519 					err = SET_ERROR(EINVAL);
2520 			} else if (zfs_prop_userquota(propname)) {
2521 				if (nvpair_type(propval) !=
2522 				    DATA_TYPE_UINT64_ARRAY)
2523 					err = SET_ERROR(EINVAL);
2524 			} else {
2525 				err = SET_ERROR(EINVAL);
2526 			}
2527 		} else if (err == 0) {
2528 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2529 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2530 					err = SET_ERROR(EINVAL);
2531 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2532 				const char *unused;
2533 
2534 				intval = fnvpair_value_uint64(propval);
2535 
2536 				switch (zfs_prop_get_type(prop)) {
2537 				case PROP_TYPE_NUMBER:
2538 					break;
2539 				case PROP_TYPE_STRING:
2540 					err = SET_ERROR(EINVAL);
2541 					break;
2542 				case PROP_TYPE_INDEX:
2543 					if (zfs_prop_index_to_string(prop,
2544 					    intval, &unused) != 0)
2545 						err = SET_ERROR(EINVAL);
2546 					break;
2547 				default:
2548 					cmn_err(CE_PANIC,
2549 					    "unknown property type");
2550 				}
2551 			} else {
2552 				err = SET_ERROR(EINVAL);
2553 			}
2554 		}
2555 
2556 		/* Validate permissions */
2557 		if (err == 0)
2558 			err = zfs_check_settable(dsname, pair, CRED());
2559 
2560 		if (err == 0) {
2561 			err = zfs_prop_set_special(dsname, source, pair);
2562 			if (err == -1) {
2563 				/*
2564 				 * For better performance we build up a list of
2565 				 * properties to set in a single transaction.
2566 				 */
2567 				err = nvlist_add_nvpair(genericnvl, pair);
2568 			} else if (err != 0 && nvl != retrynvl) {
2569 				/*
2570 				 * This may be a spurious error caused by
2571 				 * receiving quota and reservation out of order.
2572 				 * Try again in a second pass.
2573 				 */
2574 				err = nvlist_add_nvpair(retrynvl, pair);
2575 			}
2576 		}
2577 
2578 		if (err != 0) {
2579 			if (errlist != NULL)
2580 				fnvlist_add_int32(errlist, propname, err);
2581 			rv = err;
2582 		}
2583 	}
2584 
2585 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2586 		nvl = retrynvl;
2587 		goto retry;
2588 	}
2589 
2590 	if (!nvlist_empty(genericnvl) &&
2591 	    dsl_props_set(dsname, source, genericnvl) != 0) {
2592 		/*
2593 		 * If this fails, we still want to set as many properties as we
2594 		 * can, so try setting them individually.
2595 		 */
2596 		pair = NULL;
2597 		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2598 			const char *propname = nvpair_name(pair);
2599 			int err = 0;
2600 
2601 			propval = pair;
2602 			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2603 				nvlist_t *attrs;
2604 				attrs = fnvpair_value_nvlist(pair);
2605 				propval = fnvlist_lookup_nvpair(attrs,
2606 				    ZPROP_VALUE);
2607 			}
2608 
2609 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2610 				strval = fnvpair_value_string(propval);
2611 				err = dsl_prop_set_string(dsname, propname,
2612 				    source, strval);
2613 			} else {
2614 				intval = fnvpair_value_uint64(propval);
2615 				err = dsl_prop_set_int(dsname, propname, source,
2616 				    intval);
2617 			}
2618 
2619 			if (err != 0) {
2620 				if (errlist != NULL) {
2621 					fnvlist_add_int32(errlist, propname,
2622 					    err);
2623 				}
2624 				rv = err;
2625 			}
2626 		}
2627 	}
2628 	nvlist_free(genericnvl);
2629 	nvlist_free(retrynvl);
2630 
2631 	return (rv);
2632 }
2633 
2634 /*
2635  * Check that all the properties are valid user properties.
2636  */
2637 static int
2638 zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2639 {
2640 	nvpair_t *pair = NULL;
2641 	int error = 0;
2642 
2643 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2644 		const char *propname = nvpair_name(pair);
2645 
2646 		if (!zfs_prop_user(propname) ||
2647 		    nvpair_type(pair) != DATA_TYPE_STRING)
2648 			return (SET_ERROR(EINVAL));
2649 
2650 		if (error = zfs_secpolicy_write_perms(fsname,
2651 		    ZFS_DELEG_PERM_USERPROP, CRED()))
2652 			return (error);
2653 
2654 		if (strlen(propname) >= ZAP_MAXNAMELEN)
2655 			return (SET_ERROR(ENAMETOOLONG));
2656 
2657 		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
2658 			return (E2BIG);
2659 	}
2660 	return (0);
2661 }
2662 
2663 static void
2664 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2665 {
2666 	nvpair_t *pair;
2667 
2668 	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2669 
2670 	pair = NULL;
2671 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2672 		if (nvlist_exists(skipped, nvpair_name(pair)))
2673 			continue;
2674 
2675 		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2676 	}
2677 }
2678 
2679 static int
2680 clear_received_props(const char *dsname, nvlist_t *props,
2681     nvlist_t *skipped)
2682 {
2683 	int err = 0;
2684 	nvlist_t *cleared_props = NULL;
2685 	props_skip(props, skipped, &cleared_props);
2686 	if (!nvlist_empty(cleared_props)) {
2687 		/*
2688 		 * Acts on local properties until the dataset has received
2689 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2690 		 */
2691 		zprop_source_t flags = (ZPROP_SRC_NONE |
2692 		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2693 		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2694 	}
2695 	nvlist_free(cleared_props);
2696 	return (err);
2697 }
2698 
2699 /*
2700  * inputs:
2701  * zc_name		name of filesystem
2702  * zc_value		name of property to set
2703  * zc_nvlist_src{_size}	nvlist of properties to apply
2704  * zc_cookie		received properties flag
2705  *
2706  * outputs:
2707  * zc_nvlist_dst{_size} error for each unapplied received property
2708  */
2709 static int
2710 zfs_ioc_set_prop(zfs_cmd_t *zc)
2711 {
2712 	nvlist_t *nvl;
2713 	boolean_t received = zc->zc_cookie;
2714 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2715 	    ZPROP_SRC_LOCAL);
2716 	nvlist_t *errors;
2717 	int error;
2718 
2719 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2720 	    zc->zc_iflags, &nvl)) != 0)
2721 		return (error);
2722 
2723 	if (received) {
2724 		nvlist_t *origprops;
2725 
2726 		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2727 			(void) clear_received_props(zc->zc_name,
2728 			    origprops, nvl);
2729 			nvlist_free(origprops);
2730 		}
2731 
2732 		error = dsl_prop_set_hasrecvd(zc->zc_name);
2733 	}
2734 
2735 	errors = fnvlist_alloc();
2736 	if (error == 0)
2737 		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2738 
2739 	if (zc->zc_nvlist_dst != NULL && errors != NULL) {
2740 		(void) put_nvlist(zc, errors);
2741 	}
2742 
2743 	nvlist_free(errors);
2744 	nvlist_free(nvl);
2745 	return (error);
2746 }
2747 
2748 /*
2749  * inputs:
2750  * zc_name		name of filesystem
2751  * zc_value		name of property to inherit
2752  * zc_cookie		revert to received value if TRUE
2753  *
2754  * outputs:		none
2755  */
2756 static int
2757 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2758 {
2759 	const char *propname = zc->zc_value;
2760 	zfs_prop_t prop = zfs_name_to_prop(propname);
2761 	boolean_t received = zc->zc_cookie;
2762 	zprop_source_t source = (received
2763 	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2764 	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2765 
2766 	if (received) {
2767 		nvlist_t *dummy;
2768 		nvpair_t *pair;
2769 		zprop_type_t type;
2770 		int err;
2771 
2772 		/*
2773 		 * zfs_prop_set_special() expects properties in the form of an
2774 		 * nvpair with type info.
2775 		 */
2776 		if (prop == ZPROP_INVAL) {
2777 			if (!zfs_prop_user(propname))
2778 				return (SET_ERROR(EINVAL));
2779 
2780 			type = PROP_TYPE_STRING;
2781 		} else if (prop == ZFS_PROP_VOLSIZE ||
2782 		    prop == ZFS_PROP_VERSION) {
2783 			return (SET_ERROR(EINVAL));
2784 		} else {
2785 			type = zfs_prop_get_type(prop);
2786 		}
2787 
2788 		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2789 
2790 		switch (type) {
2791 		case PROP_TYPE_STRING:
2792 			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2793 			break;
2794 		case PROP_TYPE_NUMBER:
2795 		case PROP_TYPE_INDEX:
2796 			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2797 			break;
2798 		default:
2799 			nvlist_free(dummy);
2800 			return (SET_ERROR(EINVAL));
2801 		}
2802 
2803 		pair = nvlist_next_nvpair(dummy, NULL);
2804 		err = zfs_prop_set_special(zc->zc_name, source, pair);
2805 		nvlist_free(dummy);
2806 		if (err != -1)
2807 			return (err); /* special property already handled */
2808 	} else {
2809 		/*
2810 		 * Only check this in the non-received case. We want to allow
2811 		 * 'inherit -S' to revert non-inheritable properties like quota
2812 		 * and reservation to the received or default values even though
2813 		 * they are not considered inheritable.
2814 		 */
2815 		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2816 			return (SET_ERROR(EINVAL));
2817 	}
2818 
2819 	/* property name has been validated by zfs_secpolicy_inherit_prop() */
2820 	return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
2821 }
2822 
2823 static int
2824 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2825 {
2826 	nvlist_t *props;
2827 	spa_t *spa;
2828 	int error;
2829 	nvpair_t *pair;
2830 
2831 	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2832 	    zc->zc_iflags, &props))
2833 		return (error);
2834 
2835 	/*
2836 	 * If the only property is the configfile, then just do a spa_lookup()
2837 	 * to handle the faulted case.
2838 	 */
2839 	pair = nvlist_next_nvpair(props, NULL);
2840 	if (pair != NULL && strcmp(nvpair_name(pair),
2841 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2842 	    nvlist_next_nvpair(props, pair) == NULL) {
2843 		mutex_enter(&spa_namespace_lock);
2844 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2845 			spa_configfile_set(spa, props, B_FALSE);
2846 			spa_config_sync(spa, B_FALSE, B_TRUE);
2847 		}
2848 		mutex_exit(&spa_namespace_lock);
2849 		if (spa != NULL) {
2850 			nvlist_free(props);
2851 			return (0);
2852 		}
2853 	}
2854 
2855 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2856 		nvlist_free(props);
2857 		return (error);
2858 	}
2859 
2860 	error = spa_prop_set(spa, props);
2861 
2862 	nvlist_free(props);
2863 	spa_close(spa, FTAG);
2864 
2865 	return (error);
2866 }
2867 
2868 static int
2869 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2870 {
2871 	spa_t *spa;
2872 	int error;
2873 	nvlist_t *nvp = NULL;
2874 
2875 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2876 		/*
2877 		 * If the pool is faulted, there may be properties we can still
2878 		 * get (such as altroot and cachefile), so attempt to get them
2879 		 * anyway.
2880 		 */
2881 		mutex_enter(&spa_namespace_lock);
2882 		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2883 			error = spa_prop_get(spa, &nvp);
2884 		mutex_exit(&spa_namespace_lock);
2885 	} else {
2886 		error = spa_prop_get(spa, &nvp);
2887 		spa_close(spa, FTAG);
2888 	}
2889 
2890 	if (error == 0 && zc->zc_nvlist_dst != NULL)
2891 		error = put_nvlist(zc, nvp);
2892 	else
2893 		error = SET_ERROR(EFAULT);
2894 
2895 	nvlist_free(nvp);
2896 	return (error);
2897 }
2898 
2899 /*
2900  * inputs:
2901  * zc_name		name of filesystem
2902  * zc_nvlist_src{_size}	nvlist of delegated permissions
2903  * zc_perm_action	allow/unallow flag
2904  *
2905  * outputs:		none
2906  */
2907 static int
2908 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2909 {
2910 	int error;
2911 	nvlist_t *fsaclnv = NULL;
2912 
2913 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2914 	    zc->zc_iflags, &fsaclnv)) != 0)
2915 		return (error);
2916 
2917 	/*
2918 	 * Verify nvlist is constructed correctly
2919 	 */
2920 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2921 		nvlist_free(fsaclnv);
2922 		return (SET_ERROR(EINVAL));
2923 	}
2924 
2925 	/*
2926 	 * If we don't have PRIV_SYS_MOUNT, then validate
2927 	 * that user is allowed to hand out each permission in
2928 	 * the nvlist(s)
2929 	 */
2930 
2931 	error = secpolicy_zfs(CRED());
2932 	if (error != 0) {
2933 		if (zc->zc_perm_action == B_FALSE) {
2934 			error = dsl_deleg_can_allow(zc->zc_name,
2935 			    fsaclnv, CRED());
2936 		} else {
2937 			error = dsl_deleg_can_unallow(zc->zc_name,
2938 			    fsaclnv, CRED());
2939 		}
2940 	}
2941 
2942 	if (error == 0)
2943 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2944 
2945 	nvlist_free(fsaclnv);
2946 	return (error);
2947 }
2948 
2949 /*
2950  * inputs:
2951  * zc_name		name of filesystem
2952  *
2953  * outputs:
2954  * zc_nvlist_src{_size}	nvlist of delegated permissions
2955  */
2956 static int
2957 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2958 {
2959 	nvlist_t *nvp;
2960 	int error;
2961 
2962 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2963 		error = put_nvlist(zc, nvp);
2964 		nvlist_free(nvp);
2965 	}
2966 
2967 	return (error);
2968 }
2969 
2970 /*
2971  * Search the vfs list for a specified resource.  Returns a pointer to it
2972  * or NULL if no suitable entry is found. The caller of this routine
2973  * is responsible for releasing the returned vfs pointer.
2974  */
2975 static vfs_t *
2976 zfs_get_vfs(const char *resource)
2977 {
2978 	struct vfs *vfsp;
2979 	struct vfs *vfs_found = NULL;
2980 
2981 	vfs_list_read_lock();
2982 	vfsp = rootvfs;
2983 	do {
2984 		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
2985 			VFS_HOLD(vfsp);
2986 			vfs_found = vfsp;
2987 			break;
2988 		}
2989 		vfsp = vfsp->vfs_next;
2990 	} while (vfsp != rootvfs);
2991 	vfs_list_unlock();
2992 	return (vfs_found);
2993 }
2994 
2995 /* ARGSUSED */
2996 static void
2997 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2998 {
2999 	zfs_creat_t *zct = arg;
3000 
3001 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3002 }
3003 
3004 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
3005 
3006 /*
3007  * inputs:
3008  * os			parent objset pointer (NULL if root fs)
3009  * fuids_ok		fuids allowed in this version of the spa?
3010  * sa_ok		SAs allowed in this version of the spa?
3011  * createprops		list of properties requested by creator
3012  *
3013  * outputs:
3014  * zplprops	values for the zplprops we attach to the master node object
3015  * is_ci	true if requested file system will be purely case-insensitive
3016  *
3017  * Determine the settings for utf8only, normalization and
3018  * casesensitivity.  Specific values may have been requested by the
3019  * creator and/or we can inherit values from the parent dataset.  If
3020  * the file system is of too early a vintage, a creator can not
3021  * request settings for these properties, even if the requested
3022  * setting is the default value.  We don't actually want to create dsl
3023  * properties for these, so remove them from the source nvlist after
3024  * processing.
3025  */
3026 static int
3027 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3028     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3029     nvlist_t *zplprops, boolean_t *is_ci)
3030 {
3031 	uint64_t sense = ZFS_PROP_UNDEFINED;
3032 	uint64_t norm = ZFS_PROP_UNDEFINED;
3033 	uint64_t u8 = ZFS_PROP_UNDEFINED;
3034 
3035 	ASSERT(zplprops != NULL);
3036 
3037 	/*
3038 	 * Pull out creator prop choices, if any.
3039 	 */
3040 	if (createprops) {
3041 		(void) nvlist_lookup_uint64(createprops,
3042 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3043 		(void) nvlist_lookup_uint64(createprops,
3044 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3045 		(void) nvlist_remove_all(createprops,
3046 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3047 		(void) nvlist_lookup_uint64(createprops,
3048 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3049 		(void) nvlist_remove_all(createprops,
3050 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3051 		(void) nvlist_lookup_uint64(createprops,
3052 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3053 		(void) nvlist_remove_all(createprops,
3054 		    zfs_prop_to_name(ZFS_PROP_CASE));
3055 	}
3056 
3057 	/*
3058 	 * If the zpl version requested is whacky or the file system
3059 	 * or pool is version is too "young" to support normalization
3060 	 * and the creator tried to set a value for one of the props,
3061 	 * error out.
3062 	 */
3063 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3064 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3065 	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3066 	    (zplver < ZPL_VERSION_NORMALIZATION &&
3067 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3068 	    sense != ZFS_PROP_UNDEFINED)))
3069 		return (SET_ERROR(ENOTSUP));
3070 
3071 	/*
3072 	 * Put the version in the zplprops
3073 	 */
3074 	VERIFY(nvlist_add_uint64(zplprops,
3075 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3076 
3077 	if (norm == ZFS_PROP_UNDEFINED)
3078 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3079 	VERIFY(nvlist_add_uint64(zplprops,
3080 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3081 
3082 	/*
3083 	 * If we're normalizing, names must always be valid UTF-8 strings.
3084 	 */
3085 	if (norm)
3086 		u8 = 1;
3087 	if (u8 == ZFS_PROP_UNDEFINED)
3088 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3089 	VERIFY(nvlist_add_uint64(zplprops,
3090 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3091 
3092 	if (sense == ZFS_PROP_UNDEFINED)
3093 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3094 	VERIFY(nvlist_add_uint64(zplprops,
3095 	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3096 
3097 	if (is_ci)
3098 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
3099 
3100 	return (0);
3101 }
3102 
3103 static int
3104 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3105     nvlist_t *zplprops, boolean_t *is_ci)
3106 {
3107 	boolean_t fuids_ok, sa_ok;
3108 	uint64_t zplver = ZPL_VERSION;
3109 	objset_t *os = NULL;
3110 	char parentname[ZFS_MAX_DATASET_NAME_LEN];
3111 	char *cp;
3112 	spa_t *spa;
3113 	uint64_t spa_vers;
3114 	int error;
3115 
3116 	(void) strlcpy(parentname, dataset, sizeof (parentname));
3117 	cp = strrchr(parentname, '/');
3118 	ASSERT(cp != NULL);
3119 	cp[0] = '\0';
3120 
3121 	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3122 		return (error);
3123 
3124 	spa_vers = spa_version(spa);
3125 	spa_close(spa, FTAG);
3126 
3127 	zplver = zfs_zpl_version_map(spa_vers);
3128 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3129 	sa_ok = (zplver >= ZPL_VERSION_SA);
3130 
3131 	/*
3132 	 * Open parent object set so we can inherit zplprop values.
3133 	 */
3134 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3135 		return (error);
3136 
3137 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3138 	    zplprops, is_ci);
3139 	dmu_objset_rele(os, FTAG);
3140 	return (error);
3141 }
3142 
3143 static int
3144 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3145     nvlist_t *zplprops, boolean_t *is_ci)
3146 {
3147 	boolean_t fuids_ok;
3148 	boolean_t sa_ok;
3149 	uint64_t zplver = ZPL_VERSION;
3150 	int error;
3151 
3152 	zplver = zfs_zpl_version_map(spa_vers);
3153 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3154 	sa_ok = (zplver >= ZPL_VERSION_SA);
3155 
3156 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3157 	    createprops, zplprops, is_ci);
3158 	return (error);
3159 }
3160 
3161 /*
3162  * innvl: {
3163  *     "type" -> dmu_objset_type_t (int32)
3164  *     (optional) "props" -> { prop -> value }
3165  * }
3166  *
3167  * outnvl: propname -> error code (int32)
3168  */
3169 static int
3170 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3171 {
3172 	int error = 0;
3173 	zfs_creat_t zct = { 0 };
3174 	nvlist_t *nvprops = NULL;
3175 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3176 	int32_t type32;
3177 	dmu_objset_type_t type;
3178 	boolean_t is_insensitive = B_FALSE;
3179 
3180 	if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3181 		return (SET_ERROR(EINVAL));
3182 	type = type32;
3183 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3184 
3185 	switch (type) {
3186 	case DMU_OST_ZFS:
3187 		cbfunc = zfs_create_cb;
3188 		break;
3189 
3190 	case DMU_OST_ZVOL:
3191 		cbfunc = zvol_create_cb;
3192 		break;
3193 
3194 	default:
3195 		cbfunc = NULL;
3196 		break;
3197 	}
3198 	if (strchr(fsname, '@') ||
3199 	    strchr(fsname, '%'))
3200 		return (SET_ERROR(EINVAL));
3201 
3202 	zct.zct_props = nvprops;
3203 
3204 	if (cbfunc == NULL)
3205 		return (SET_ERROR(EINVAL));
3206 
3207 	if (type == DMU_OST_ZVOL) {
3208 		uint64_t volsize, volblocksize;
3209 
3210 		if (nvprops == NULL)
3211 			return (SET_ERROR(EINVAL));
3212 		if (nvlist_lookup_uint64(nvprops,
3213 		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3214 			return (SET_ERROR(EINVAL));
3215 
3216 		if ((error = nvlist_lookup_uint64(nvprops,
3217 		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3218 		    &volblocksize)) != 0 && error != ENOENT)
3219 			return (SET_ERROR(EINVAL));
3220 
3221 		if (error != 0)
3222 			volblocksize = zfs_prop_default_numeric(
3223 			    ZFS_PROP_VOLBLOCKSIZE);
3224 
3225 		if ((error = zvol_check_volblocksize(
3226 		    volblocksize)) != 0 ||
3227 		    (error = zvol_check_volsize(volsize,
3228 		    volblocksize)) != 0)
3229 			return (error);
3230 	} else if (type == DMU_OST_ZFS) {
3231 		int error;
3232 
3233 		/*
3234 		 * We have to have normalization and
3235 		 * case-folding flags correct when we do the
3236 		 * file system creation, so go figure them out
3237 		 * now.
3238 		 */
3239 		VERIFY(nvlist_alloc(&zct.zct_zplprops,
3240 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
3241 		error = zfs_fill_zplprops(fsname, nvprops,
3242 		    zct.zct_zplprops, &is_insensitive);
3243 		if (error != 0) {
3244 			nvlist_free(zct.zct_zplprops);
3245 			return (error);
3246 		}
3247 	}
3248 
3249 	error = dmu_objset_create(fsname, type,
3250 	    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3251 	nvlist_free(zct.zct_zplprops);
3252 
3253 	/*
3254 	 * It would be nice to do this atomically.
3255 	 */
3256 	if (error == 0) {
3257 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3258 		    nvprops, outnvl);
3259 		if (error != 0)
3260 			(void) dsl_destroy_head(fsname);
3261 	}
3262 	return (error);
3263 }
3264 
3265 /*
3266  * innvl: {
3267  *     "origin" -> name of origin snapshot
3268  *     (optional) "props" -> { prop -> value }
3269  * }
3270  *
3271  * outnvl: propname -> error code (int32)
3272  */
3273 static int
3274 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3275 {
3276 	int error = 0;
3277 	nvlist_t *nvprops = NULL;
3278 	char *origin_name;
3279 
3280 	if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3281 		return (SET_ERROR(EINVAL));
3282 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3283 
3284 	if (strchr(fsname, '@') ||
3285 	    strchr(fsname, '%'))
3286 		return (SET_ERROR(EINVAL));
3287 
3288 	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3289 		return (SET_ERROR(EINVAL));
3290 	error = dmu_objset_clone(fsname, origin_name);
3291 	if (error != 0)
3292 		return (error);
3293 
3294 	/*
3295 	 * It would be nice to do this atomically.
3296 	 */
3297 	if (error == 0) {
3298 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3299 		    nvprops, outnvl);
3300 		if (error != 0)
3301 			(void) dsl_destroy_head(fsname);
3302 	}
3303 	return (error);
3304 }
3305 
3306 /*
3307  * innvl: {
3308  *     "snaps" -> { snapshot1, snapshot2 }
3309  *     (optional) "props" -> { prop -> value (string) }
3310  * }
3311  *
3312  * outnvl: snapshot -> error code (int32)
3313  */
3314 static int
3315 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3316 {
3317 	nvlist_t *snaps;
3318 	nvlist_t *props = NULL;
3319 	int error, poollen;
3320 	nvpair_t *pair;
3321 
3322 	(void) nvlist_lookup_nvlist(innvl, "props", &props);
3323 	if ((error = zfs_check_userprops(poolname, props)) != 0)
3324 		return (error);
3325 
3326 	if (!nvlist_empty(props) &&
3327 	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3328 		return (SET_ERROR(ENOTSUP));
3329 
3330 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3331 		return (SET_ERROR(EINVAL));
3332 	poollen = strlen(poolname);
3333 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3334 	    pair = nvlist_next_nvpair(snaps, pair)) {
3335 		const char *name = nvpair_name(pair);
3336 		const char *cp = strchr(name, '@');
3337 
3338 		/*
3339 		 * The snap name must contain an @, and the part after it must
3340 		 * contain only valid characters.
3341 		 */
3342 		if (cp == NULL ||
3343 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3344 			return (SET_ERROR(EINVAL));
3345 
3346 		/*
3347 		 * The snap must be in the specified pool.
3348 		 */
3349 		if (strncmp(name, poolname, poollen) != 0 ||
3350 		    (name[poollen] != '/' && name[poollen] != '@'))
3351 			return (SET_ERROR(EXDEV));
3352 
3353 		/* This must be the only snap of this fs. */
3354 		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3355 		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3356 			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3357 			    == 0) {
3358 				return (SET_ERROR(EXDEV));
3359 			}
3360 		}
3361 	}
3362 
3363 	error = dsl_dataset_snapshot(snaps, props, outnvl);
3364 	return (error);
3365 }
3366 
3367 /*
3368  * innvl: "message" -> string
3369  */
3370 /* ARGSUSED */
3371 static int
3372 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3373 {
3374 	char *message;
3375 	spa_t *spa;
3376 	int error;
3377 	char *poolname;
3378 
3379 	/*
3380 	 * The poolname in the ioctl is not set, we get it from the TSD,
3381 	 * which was set at the end of the last successful ioctl that allows
3382 	 * logging.  The secpolicy func already checked that it is set.
3383 	 * Only one log ioctl is allowed after each successful ioctl, so
3384 	 * we clear the TSD here.
3385 	 */
3386 	poolname = tsd_get(zfs_allow_log_key);
3387 	(void) tsd_set(zfs_allow_log_key, NULL);
3388 	error = spa_open(poolname, &spa, FTAG);
3389 	strfree(poolname);
3390 	if (error != 0)
3391 		return (error);
3392 
3393 	if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
3394 		spa_close(spa, FTAG);
3395 		return (SET_ERROR(EINVAL));
3396 	}
3397 
3398 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3399 		spa_close(spa, FTAG);
3400 		return (SET_ERROR(ENOTSUP));
3401 	}
3402 
3403 	error = spa_history_log(spa, message);
3404 	spa_close(spa, FTAG);
3405 	return (error);
3406 }
3407 
3408 /*
3409  * The dp_config_rwlock must not be held when calling this, because the
3410  * unmount may need to write out data.
3411  *
3412  * This function is best-effort.  Callers must deal gracefully if it
3413  * remains mounted (or is remounted after this call).
3414  *
3415  * Returns 0 if the argument is not a snapshot, or it is not currently a
3416  * filesystem, or we were able to unmount it.  Returns error code otherwise.
3417  */
3418 int
3419 zfs_unmount_snap(const char *snapname)
3420 {
3421 	vfs_t *vfsp;
3422 	zfsvfs_t *zfsvfs;
3423 	int err;
3424 
3425 	if (strchr(snapname, '@') == NULL)
3426 		return (0);
3427 
3428 	vfsp = zfs_get_vfs(snapname);
3429 	if (vfsp == NULL)
3430 		return (0);
3431 
3432 	zfsvfs = vfsp->vfs_data;
3433 	ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
3434 
3435 	err = vn_vfswlock(vfsp->vfs_vnodecovered);
3436 	VFS_RELE(vfsp);
3437 	if (err != 0)
3438 		return (SET_ERROR(err));
3439 
3440 	/*
3441 	 * Always force the unmount for snapshots.
3442 	 */
3443 	(void) dounmount(vfsp, MS_FORCE, kcred);
3444 	return (0);
3445 }
3446 
3447 /* ARGSUSED */
3448 static int
3449 zfs_unmount_snap_cb(const char *snapname, void *arg)
3450 {
3451 	return (zfs_unmount_snap(snapname));
3452 }
3453 
3454 /*
3455  * When a clone is destroyed, its origin may also need to be destroyed,
3456  * in which case it must be unmounted.  This routine will do that unmount
3457  * if necessary.
3458  */
3459 void
3460 zfs_destroy_unmount_origin(const char *fsname)
3461 {
3462 	int error;
3463 	objset_t *os;
3464 	dsl_dataset_t *ds;
3465 
3466 	error = dmu_objset_hold(fsname, FTAG, &os);
3467 	if (error != 0)
3468 		return;
3469 	ds = dmu_objset_ds(os);
3470 	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3471 		char originname[ZFS_MAX_DATASET_NAME_LEN];
3472 		dsl_dataset_name(ds->ds_prev, originname);
3473 		dmu_objset_rele(os, FTAG);
3474 		(void) zfs_unmount_snap(originname);
3475 	} else {
3476 		dmu_objset_rele(os, FTAG);
3477 	}
3478 }
3479 
3480 /*
3481  * innvl: {
3482  *     "snaps" -> { snapshot1, snapshot2 }
3483  *     (optional boolean) "defer"
3484  * }
3485  *
3486  * outnvl: snapshot -> error code (int32)
3487  *
3488  */
3489 /* ARGSUSED */
3490 static int
3491 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3492 {
3493 	nvlist_t *snaps;
3494 	nvpair_t *pair;
3495 	boolean_t defer;
3496 
3497 	if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3498 		return (SET_ERROR(EINVAL));
3499 	defer = nvlist_exists(innvl, "defer");
3500 
3501 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3502 	    pair = nvlist_next_nvpair(snaps, pair)) {
3503 		(void) zfs_unmount_snap(nvpair_name(pair));
3504 	}
3505 
3506 	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3507 }
3508 
3509 /*
3510  * Create bookmarks.  Bookmark names are of the form <fs>#<bmark>.
3511  * All bookmarks must be in the same pool.
3512  *
3513  * innvl: {
3514  *     bookmark1 -> snapshot1, bookmark2 -> snapshot2
3515  * }
3516  *
3517  * outnvl: bookmark -> error code (int32)
3518  *
3519  */
3520 /* ARGSUSED */
3521 static int
3522 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3523 {
3524 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3525 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3526 		char *snap_name;
3527 
3528 		/*
3529 		 * Verify the snapshot argument.
3530 		 */
3531 		if (nvpair_value_string(pair, &snap_name) != 0)
3532 			return (SET_ERROR(EINVAL));
3533 
3534 
3535 		/* Verify that the keys (bookmarks) are unique */
3536 		for (nvpair_t *pair2 = nvlist_next_nvpair(innvl, pair);
3537 		    pair2 != NULL; pair2 = nvlist_next_nvpair(innvl, pair2)) {
3538 			if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
3539 				return (SET_ERROR(EINVAL));
3540 		}
3541 	}
3542 
3543 	return (dsl_bookmark_create(innvl, outnvl));
3544 }
3545 
3546 /*
3547  * innvl: {
3548  *     property 1, property 2, ...
3549  * }
3550  *
3551  * outnvl: {
3552  *     bookmark name 1 -> { property 1, property 2, ... },
3553  *     bookmark name 2 -> { property 1, property 2, ... }
3554  * }
3555  *
3556  */
3557 static int
3558 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3559 {
3560 	return (dsl_get_bookmarks(fsname, innvl, outnvl));
3561 }
3562 
3563 /*
3564  * innvl: {
3565  *     bookmark name 1, bookmark name 2
3566  * }
3567  *
3568  * outnvl: bookmark -> error code (int32)
3569  *
3570  */
3571 static int
3572 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
3573     nvlist_t *outnvl)
3574 {
3575 	int error, poollen;
3576 
3577 	poollen = strlen(poolname);
3578 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
3579 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
3580 		const char *name = nvpair_name(pair);
3581 		const char *cp = strchr(name, '#');
3582 
3583 		/*
3584 		 * The bookmark name must contain an #, and the part after it
3585 		 * must contain only valid characters.
3586 		 */
3587 		if (cp == NULL ||
3588 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
3589 			return (SET_ERROR(EINVAL));
3590 
3591 		/*
3592 		 * The bookmark must be in the specified pool.
3593 		 */
3594 		if (strncmp(name, poolname, poollen) != 0 ||
3595 		    (name[poollen] != '/' && name[poollen] != '#'))
3596 			return (SET_ERROR(EXDEV));
3597 	}
3598 
3599 	error = dsl_bookmark_destroy(innvl, outnvl);
3600 	return (error);
3601 }
3602 
3603 /*
3604  * inputs:
3605  * zc_name		name of dataset to destroy
3606  * zc_objset_type	type of objset
3607  * zc_defer_destroy	mark for deferred destroy
3608  *
3609  * outputs:		none
3610  */
3611 static int
3612 zfs_ioc_destroy(zfs_cmd_t *zc)
3613 {
3614 	int err;
3615 
3616 	if (zc->zc_objset_type == DMU_OST_ZFS) {
3617 		err = zfs_unmount_snap(zc->zc_name);
3618 		if (err != 0)
3619 			return (err);
3620 	}
3621 
3622 	if (strchr(zc->zc_name, '@'))
3623 		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3624 	else
3625 		err = dsl_destroy_head(zc->zc_name);
3626 	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
3627 		(void) zvol_remove_minor(zc->zc_name);
3628 	return (err);
3629 }
3630 
3631 /*
3632  * fsname is name of dataset to rollback (to most recent snapshot)
3633  *
3634  * innvl is not used.
3635  *
3636  * outnvl: "target" -> name of most recent snapshot
3637  * }
3638  */
3639 /* ARGSUSED */
3640 static int
3641 zfs_ioc_rollback(const char *fsname, nvlist_t *args, nvlist_t *outnvl)
3642 {
3643 	zfsvfs_t *zfsvfs;
3644 	int error;
3645 
3646 	if (getzfsvfs(fsname, &zfsvfs) == 0) {
3647 		error = zfs_suspend_fs(zfsvfs);
3648 		if (error == 0) {
3649 			int resume_err;
3650 
3651 			error = dsl_dataset_rollback(fsname, zfsvfs, outnvl);
3652 			resume_err = zfs_resume_fs(zfsvfs, fsname);
3653 			error = error ? error : resume_err;
3654 		}
3655 		VFS_RELE(zfsvfs->z_vfs);
3656 	} else {
3657 		error = dsl_dataset_rollback(fsname, NULL, outnvl);
3658 	}
3659 	return (error);
3660 }
3661 
3662 static int
3663 recursive_unmount(const char *fsname, void *arg)
3664 {
3665 	const char *snapname = arg;
3666 	char fullname[ZFS_MAX_DATASET_NAME_LEN];
3667 
3668 	(void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
3669 	return (zfs_unmount_snap(fullname));
3670 }
3671 
3672 /*
3673  * inputs:
3674  * zc_name	old name of dataset
3675  * zc_value	new name of dataset
3676  * zc_cookie	recursive flag (only valid for snapshots)
3677  *
3678  * outputs:	none
3679  */
3680 static int
3681 zfs_ioc_rename(zfs_cmd_t *zc)
3682 {
3683 	boolean_t recursive = zc->zc_cookie & 1;
3684 	char *at;
3685 
3686 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3687 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3688 	    strchr(zc->zc_value, '%'))
3689 		return (SET_ERROR(EINVAL));
3690 
3691 	at = strchr(zc->zc_name, '@');
3692 	if (at != NULL) {
3693 		/* snaps must be in same fs */
3694 		int error;
3695 
3696 		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
3697 			return (SET_ERROR(EXDEV));
3698 		*at = '\0';
3699 		if (zc->zc_objset_type == DMU_OST_ZFS) {
3700 			error = dmu_objset_find(zc->zc_name,
3701 			    recursive_unmount, at + 1,
3702 			    recursive ? DS_FIND_CHILDREN : 0);
3703 			if (error != 0) {
3704 				*at = '@';
3705 				return (error);
3706 			}
3707 		}
3708 		error = dsl_dataset_rename_snapshot(zc->zc_name,
3709 		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
3710 		*at = '@';
3711 
3712 		return (error);
3713 	} else {
3714 		if (zc->zc_objset_type == DMU_OST_ZVOL)
3715 			(void) zvol_remove_minor(zc->zc_name);
3716 		return (dsl_dir_rename(zc->zc_name, zc->zc_value));
3717 	}
3718 }
3719 
3720 static int
3721 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3722 {
3723 	const char *propname = nvpair_name(pair);
3724 	boolean_t issnap = (strchr(dsname, '@') != NULL);
3725 	zfs_prop_t prop = zfs_name_to_prop(propname);
3726 	uint64_t intval;
3727 	int err;
3728 
3729 	if (prop == ZPROP_INVAL) {
3730 		if (zfs_prop_user(propname)) {
3731 			if (err = zfs_secpolicy_write_perms(dsname,
3732 			    ZFS_DELEG_PERM_USERPROP, cr))
3733 				return (err);
3734 			return (0);
3735 		}
3736 
3737 		if (!issnap && zfs_prop_userquota(propname)) {
3738 			const char *perm = NULL;
3739 			const char *uq_prefix =
3740 			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3741 			const char *gq_prefix =
3742 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3743 
3744 			if (strncmp(propname, uq_prefix,
3745 			    strlen(uq_prefix)) == 0) {
3746 				perm = ZFS_DELEG_PERM_USERQUOTA;
3747 			} else if (strncmp(propname, gq_prefix,
3748 			    strlen(gq_prefix)) == 0) {
3749 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
3750 			} else {
3751 				/* USERUSED and GROUPUSED are read-only */
3752 				return (SET_ERROR(EINVAL));
3753 			}
3754 
3755 			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3756 				return (err);
3757 			return (0);
3758 		}
3759 
3760 		return (SET_ERROR(EINVAL));
3761 	}
3762 
3763 	if (issnap)
3764 		return (SET_ERROR(EINVAL));
3765 
3766 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3767 		/*
3768 		 * dsl_prop_get_all_impl() returns properties in this
3769 		 * format.
3770 		 */
3771 		nvlist_t *attrs;
3772 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3773 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3774 		    &pair) == 0);
3775 	}
3776 
3777 	/*
3778 	 * Check that this value is valid for this pool version
3779 	 */
3780 	switch (prop) {
3781 	case ZFS_PROP_COMPRESSION:
3782 		/*
3783 		 * If the user specified gzip compression, make sure
3784 		 * the SPA supports it. We ignore any errors here since
3785 		 * we'll catch them later.
3786 		 */
3787 		if (nvpair_value_uint64(pair, &intval) == 0) {
3788 			if (intval >= ZIO_COMPRESS_GZIP_1 &&
3789 			    intval <= ZIO_COMPRESS_GZIP_9 &&
3790 			    zfs_earlier_version(dsname,
3791 			    SPA_VERSION_GZIP_COMPRESSION)) {
3792 				return (SET_ERROR(ENOTSUP));
3793 			}
3794 
3795 			if (intval == ZIO_COMPRESS_ZLE &&
3796 			    zfs_earlier_version(dsname,
3797 			    SPA_VERSION_ZLE_COMPRESSION))
3798 				return (SET_ERROR(ENOTSUP));
3799 
3800 			if (intval == ZIO_COMPRESS_LZ4) {
3801 				spa_t *spa;
3802 
3803 				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3804 					return (err);
3805 
3806 				if (!spa_feature_is_enabled(spa,
3807 				    SPA_FEATURE_LZ4_COMPRESS)) {
3808 					spa_close(spa, FTAG);
3809 					return (SET_ERROR(ENOTSUP));
3810 				}
3811 				spa_close(spa, FTAG);
3812 			}
3813 
3814 			/*
3815 			 * If this is a bootable dataset then
3816 			 * verify that the compression algorithm
3817 			 * is supported for booting. We must return
3818 			 * something other than ENOTSUP since it
3819 			 * implies a downrev pool version.
3820 			 */
3821 			if (zfs_is_bootfs(dsname) &&
3822 			    !BOOTFS_COMPRESS_VALID(intval)) {
3823 				return (SET_ERROR(ERANGE));
3824 			}
3825 		}
3826 		break;
3827 
3828 	case ZFS_PROP_COPIES:
3829 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3830 			return (SET_ERROR(ENOTSUP));
3831 		break;
3832 
3833 	case ZFS_PROP_RECORDSIZE:
3834 		/* Record sizes above 128k need the feature to be enabled */
3835 		if (nvpair_value_uint64(pair, &intval) == 0 &&
3836 		    intval > SPA_OLD_MAXBLOCKSIZE) {
3837 			spa_t *spa;
3838 
3839 			/*
3840 			 * If this is a bootable dataset then
3841 			 * the we don't allow large (>128K) blocks,
3842 			 * because GRUB doesn't support them.
3843 			 */
3844 			if (zfs_is_bootfs(dsname) &&
3845 			    intval > SPA_OLD_MAXBLOCKSIZE) {
3846 				return (SET_ERROR(ERANGE));
3847 			}
3848 
3849 			/*
3850 			 * We don't allow setting the property above 1MB,
3851 			 * unless the tunable has been changed.
3852 			 */
3853 			if (intval > zfs_max_recordsize ||
3854 			    intval > SPA_MAXBLOCKSIZE)
3855 				return (SET_ERROR(ERANGE));
3856 
3857 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3858 				return (err);
3859 
3860 			if (!spa_feature_is_enabled(spa,
3861 			    SPA_FEATURE_LARGE_BLOCKS)) {
3862 				spa_close(spa, FTAG);
3863 				return (SET_ERROR(ENOTSUP));
3864 			}
3865 			spa_close(spa, FTAG);
3866 		}
3867 		break;
3868 
3869 	case ZFS_PROP_SHARESMB:
3870 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3871 			return (SET_ERROR(ENOTSUP));
3872 		break;
3873 
3874 	case ZFS_PROP_ACLINHERIT:
3875 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3876 		    nvpair_value_uint64(pair, &intval) == 0) {
3877 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
3878 			    zfs_earlier_version(dsname,
3879 			    SPA_VERSION_PASSTHROUGH_X))
3880 				return (SET_ERROR(ENOTSUP));
3881 		}
3882 		break;
3883 
3884 	case ZFS_PROP_CHECKSUM:
3885 	case ZFS_PROP_DEDUP:
3886 	{
3887 		spa_feature_t feature;
3888 		spa_t *spa;
3889 
3890 		/* dedup feature version checks */
3891 		if (prop == ZFS_PROP_DEDUP &&
3892 		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3893 			return (SET_ERROR(ENOTSUP));
3894 
3895 		if (nvpair_value_uint64(pair, &intval) != 0)
3896 			return (SET_ERROR(EINVAL));
3897 
3898 		/* check prop value is enabled in features */
3899 		feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
3900 		if (feature == SPA_FEATURE_NONE)
3901 			break;
3902 
3903 		if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3904 			return (err);
3905 		/*
3906 		 * Salted checksums are not supported on root pools.
3907 		 */
3908 		if (spa_bootfs(spa) != 0 &&
3909 		    intval < ZIO_CHECKSUM_FUNCTIONS &&
3910 		    (zio_checksum_table[intval].ci_flags &
3911 		    ZCHECKSUM_FLAG_SALTED)) {
3912 			spa_close(spa, FTAG);
3913 			return (SET_ERROR(ERANGE));
3914 		}
3915 		if (!spa_feature_is_enabled(spa, feature)) {
3916 			spa_close(spa, FTAG);
3917 			return (SET_ERROR(ENOTSUP));
3918 		}
3919 		spa_close(spa, FTAG);
3920 		break;
3921 	}
3922 	}
3923 
3924 	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3925 }
3926 
3927 /*
3928  * Checks for a race condition to make sure we don't increment a feature flag
3929  * multiple times.
3930  */
3931 static int
3932 zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
3933 {
3934 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
3935 	spa_feature_t *featurep = arg;
3936 
3937 	if (!spa_feature_is_active(spa, *featurep))
3938 		return (0);
3939 	else
3940 		return (SET_ERROR(EBUSY));
3941 }
3942 
3943 /*
3944  * The callback invoked on feature activation in the sync task caused by
3945  * zfs_prop_activate_feature.
3946  */
3947 static void
3948 zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
3949 {
3950 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
3951 	spa_feature_t *featurep = arg;
3952 
3953 	spa_feature_incr(spa, *featurep, tx);
3954 }
3955 
3956 /*
3957  * Activates a feature on a pool in response to a property setting. This
3958  * creates a new sync task which modifies the pool to reflect the feature
3959  * as being active.
3960  */
3961 static int
3962 zfs_prop_activate_feature(spa_t *spa, spa_feature_t feature)
3963 {
3964 	int err;
3965 
3966 	/* EBUSY here indicates that the feature is already active */
3967 	err = dsl_sync_task(spa_name(spa),
3968 	    zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
3969 	    &feature, 2, ZFS_SPACE_CHECK_RESERVED);
3970 
3971 	if (err != 0 && err != EBUSY)
3972 		return (err);
3973 	else
3974 		return (0);
3975 }
3976 
3977 /*
3978  * Removes properties from the given props list that fail permission checks
3979  * needed to clear them and to restore them in case of a receive error. For each
3980  * property, make sure we have both set and inherit permissions.
3981  *
3982  * Returns the first error encountered if any permission checks fail. If the
3983  * caller provides a non-NULL errlist, it also gives the complete list of names
3984  * of all the properties that failed a permission check along with the
3985  * corresponding error numbers. The caller is responsible for freeing the
3986  * returned errlist.
3987  *
3988  * If every property checks out successfully, zero is returned and the list
3989  * pointed at by errlist is NULL.
3990  */
3991 static int
3992 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
3993 {
3994 	zfs_cmd_t *zc;
3995 	nvpair_t *pair, *next_pair;
3996 	nvlist_t *errors;
3997 	int err, rv = 0;
3998 
3999 	if (props == NULL)
4000 		return (0);
4001 
4002 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4003 
4004 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
4005 	(void) strcpy(zc->zc_name, dataset);
4006 	pair = nvlist_next_nvpair(props, NULL);
4007 	while (pair != NULL) {
4008 		next_pair = nvlist_next_nvpair(props, pair);
4009 
4010 		(void) strcpy(zc->zc_value, nvpair_name(pair));
4011 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
4012 		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
4013 			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
4014 			VERIFY(nvlist_add_int32(errors,
4015 			    zc->zc_value, err) == 0);
4016 		}
4017 		pair = next_pair;
4018 	}
4019 	kmem_free(zc, sizeof (zfs_cmd_t));
4020 
4021 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
4022 		nvlist_free(errors);
4023 		errors = NULL;
4024 	} else {
4025 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
4026 	}
4027 
4028 	if (errlist == NULL)
4029 		nvlist_free(errors);
4030 	else
4031 		*errlist = errors;
4032 
4033 	return (rv);
4034 }
4035 
4036 static boolean_t
4037 propval_equals(nvpair_t *p1, nvpair_t *p2)
4038 {
4039 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
4040 		/* dsl_prop_get_all_impl() format */
4041 		nvlist_t *attrs;
4042 		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
4043 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4044 		    &p1) == 0);
4045 	}
4046 
4047 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
4048 		nvlist_t *attrs;
4049 		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
4050 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
4051 		    &p2) == 0);
4052 	}
4053 
4054 	if (nvpair_type(p1) != nvpair_type(p2))
4055 		return (B_FALSE);
4056 
4057 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
4058 		char *valstr1, *valstr2;
4059 
4060 		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
4061 		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
4062 		return (strcmp(valstr1, valstr2) == 0);
4063 	} else {
4064 		uint64_t intval1, intval2;
4065 
4066 		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
4067 		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
4068 		return (intval1 == intval2);
4069 	}
4070 }
4071 
4072 /*
4073  * Remove properties from props if they are not going to change (as determined
4074  * by comparison with origprops). Remove them from origprops as well, since we
4075  * do not need to clear or restore properties that won't change.
4076  */
4077 static void
4078 props_reduce(nvlist_t *props, nvlist_t *origprops)
4079 {
4080 	nvpair_t *pair, *next_pair;
4081 
4082 	if (origprops == NULL)
4083 		return; /* all props need to be received */
4084 
4085 	pair = nvlist_next_nvpair(props, NULL);
4086 	while (pair != NULL) {
4087 		const char *propname = nvpair_name(pair);
4088 		nvpair_t *match;
4089 
4090 		next_pair = nvlist_next_nvpair(props, pair);
4091 
4092 		if ((nvlist_lookup_nvpair(origprops, propname,
4093 		    &match) != 0) || !propval_equals(pair, match))
4094 			goto next; /* need to set received value */
4095 
4096 		/* don't clear the existing received value */
4097 		(void) nvlist_remove_nvpair(origprops, match);
4098 		/* don't bother receiving the property */
4099 		(void) nvlist_remove_nvpair(props, pair);
4100 next:
4101 		pair = next_pair;
4102 	}
4103 }
4104 
4105 /*
4106  * Extract properties that cannot be set PRIOR to the receipt of a dataset.
4107  * For example, refquota cannot be set until after the receipt of a dataset,
4108  * because in replication streams, an older/earlier snapshot may exceed the
4109  * refquota.  We want to receive the older/earlier snapshot, but setting
4110  * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
4111  * the older/earlier snapshot from being received (with EDQUOT).
4112  *
4113  * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
4114  *
4115  * libzfs will need to be judicious handling errors encountered by props
4116  * extracted by this function.
4117  */
4118 static nvlist_t *
4119 extract_delay_props(nvlist_t *props)
4120 {
4121 	nvlist_t *delayprops;
4122 	nvpair_t *nvp, *tmp;
4123 	static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
4124 	int i;
4125 
4126 	VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4127 
4128 	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
4129 	    nvp = nvlist_next_nvpair(props, nvp)) {
4130 		/*
4131 		 * strcmp() is safe because zfs_prop_to_name() always returns
4132 		 * a bounded string.
4133 		 */
4134 		for (i = 0; delayable[i] != 0; i++) {
4135 			if (strcmp(zfs_prop_to_name(delayable[i]),
4136 			    nvpair_name(nvp)) == 0) {
4137 				break;
4138 			}
4139 		}
4140 		if (delayable[i] != 0) {
4141 			tmp = nvlist_prev_nvpair(props, nvp);
4142 			VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
4143 			VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
4144 			nvp = tmp;
4145 		}
4146 	}
4147 
4148 	if (nvlist_empty(delayprops)) {
4149 		nvlist_free(delayprops);
4150 		delayprops = NULL;
4151 	}
4152 	return (delayprops);
4153 }
4154 
4155 #ifdef	DEBUG
4156 static boolean_t zfs_ioc_recv_inject_err;
4157 #endif
4158 
4159 /*
4160  * inputs:
4161  * zc_name		name of containing filesystem
4162  * zc_nvlist_src{_size}	nvlist of properties to apply
4163  * zc_value		name of snapshot to create
4164  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
4165  * zc_cookie		file descriptor to recv from
4166  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
4167  * zc_guid		force flag
4168  * zc_cleanup_fd	cleanup-on-exit file descriptor
4169  * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
4170  * zc_resumable		if data is incomplete assume sender will resume
4171  *
4172  * outputs:
4173  * zc_cookie		number of bytes read
4174  * zc_nvlist_dst{_size} error for each unapplied received property
4175  * zc_obj		zprop_errflags_t
4176  * zc_action_handle	handle for this guid/ds mapping
4177  */
4178 static int
4179 zfs_ioc_recv(zfs_cmd_t *zc)
4180 {
4181 	file_t *fp;
4182 	dmu_recv_cookie_t drc;
4183 	boolean_t force = (boolean_t)zc->zc_guid;
4184 	int fd;
4185 	int error = 0;
4186 	int props_error = 0;
4187 	nvlist_t *errors;
4188 	offset_t off;
4189 	nvlist_t *props = NULL; /* sent properties */
4190 	nvlist_t *origprops = NULL; /* existing properties */
4191 	nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
4192 	char *origin = NULL;
4193 	char *tosnap;
4194 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
4195 	boolean_t first_recvd_props = B_FALSE;
4196 
4197 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4198 	    strchr(zc->zc_value, '@') == NULL ||
4199 	    strchr(zc->zc_value, '%'))
4200 		return (SET_ERROR(EINVAL));
4201 
4202 	(void) strcpy(tofs, zc->zc_value);
4203 	tosnap = strchr(tofs, '@');
4204 	*tosnap++ = '\0';
4205 
4206 	if (zc->zc_nvlist_src != NULL &&
4207 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
4208 	    zc->zc_iflags, &props)) != 0)
4209 		return (error);
4210 
4211 	fd = zc->zc_cookie;
4212 	fp = getf(fd);
4213 	if (fp == NULL) {
4214 		nvlist_free(props);
4215 		return (SET_ERROR(EBADF));
4216 	}
4217 
4218 	errors = fnvlist_alloc();
4219 
4220 	if (zc->zc_string[0])
4221 		origin = zc->zc_string;
4222 
4223 	error = dmu_recv_begin(tofs, tosnap,
4224 	    &zc->zc_begin_record, force, zc->zc_resumable, origin, &drc);
4225 	if (error != 0)
4226 		goto out;
4227 
4228 	/*
4229 	 * Set properties before we receive the stream so that they are applied
4230 	 * to the new data. Note that we must call dmu_recv_stream() if
4231 	 * dmu_recv_begin() succeeds.
4232 	 */
4233 	if (props != NULL && !drc.drc_newfs) {
4234 		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4235 		    SPA_VERSION_RECVD_PROPS &&
4236 		    !dsl_prop_get_hasrecvd(tofs))
4237 			first_recvd_props = B_TRUE;
4238 
4239 		/*
4240 		 * If new received properties are supplied, they are to
4241 		 * completely replace the existing received properties, so stash
4242 		 * away the existing ones.
4243 		 */
4244 		if (dsl_prop_get_received(tofs, &origprops) == 0) {
4245 			nvlist_t *errlist = NULL;
4246 			/*
4247 			 * Don't bother writing a property if its value won't
4248 			 * change (and avoid the unnecessary security checks).
4249 			 *
4250 			 * The first receive after SPA_VERSION_RECVD_PROPS is a
4251 			 * special case where we blow away all local properties
4252 			 * regardless.
4253 			 */
4254 			if (!first_recvd_props)
4255 				props_reduce(props, origprops);
4256 			if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
4257 				(void) nvlist_merge(errors, errlist, 0);
4258 			nvlist_free(errlist);
4259 
4260 			if (clear_received_props(tofs, origprops,
4261 			    first_recvd_props ? NULL : props) != 0)
4262 				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4263 		} else {
4264 			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4265 		}
4266 	}
4267 
4268 	if (props != NULL) {
4269 		props_error = dsl_prop_set_hasrecvd(tofs);
4270 
4271 		if (props_error == 0) {
4272 			delayprops = extract_delay_props(props);
4273 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4274 			    props, errors);
4275 		}
4276 	}
4277 
4278 	off = fp->f_offset;
4279 	error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
4280 	    &zc->zc_action_handle);
4281 
4282 	if (error == 0) {
4283 		zfsvfs_t *zfsvfs = NULL;
4284 
4285 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
4286 			/* online recv */
4287 			int end_err;
4288 
4289 			error = zfs_suspend_fs(zfsvfs);
4290 			/*
4291 			 * If the suspend fails, then the recv_end will
4292 			 * likely also fail, and clean up after itself.
4293 			 */
4294 			end_err = dmu_recv_end(&drc, zfsvfs);
4295 			if (error == 0)
4296 				error = zfs_resume_fs(zfsvfs, tofs);
4297 			error = error ? error : end_err;
4298 			VFS_RELE(zfsvfs->z_vfs);
4299 		} else {
4300 			error = dmu_recv_end(&drc, NULL);
4301 		}
4302 
4303 		/* Set delayed properties now, after we're done receiving. */
4304 		if (delayprops != NULL && error == 0) {
4305 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4306 			    delayprops, errors);
4307 		}
4308 	}
4309 
4310 	if (delayprops != NULL) {
4311 		/*
4312 		 * Merge delayed props back in with initial props, in case
4313 		 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
4314 		 * we have to make sure clear_received_props() includes
4315 		 * the delayed properties).
4316 		 *
4317 		 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
4318 		 * using ASSERT() will be just like a VERIFY.
4319 		 */
4320 		ASSERT(nvlist_merge(props, delayprops, 0) == 0);
4321 		nvlist_free(delayprops);
4322 	}
4323 
4324 	/*
4325 	 * Now that all props, initial and delayed, are set, report the prop
4326 	 * errors to the caller.
4327 	 */
4328 	if (zc->zc_nvlist_dst_size != 0 &&
4329 	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
4330 	    put_nvlist(zc, errors) != 0)) {
4331 		/*
4332 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
4333 		 * size or supplied an invalid address.
4334 		 */
4335 		props_error = SET_ERROR(EINVAL);
4336 	}
4337 
4338 	zc->zc_cookie = off - fp->f_offset;
4339 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4340 		fp->f_offset = off;
4341 
4342 #ifdef	DEBUG
4343 	if (zfs_ioc_recv_inject_err) {
4344 		zfs_ioc_recv_inject_err = B_FALSE;
4345 		error = 1;
4346 	}
4347 #endif
4348 	/*
4349 	 * On error, restore the original props.
4350 	 */
4351 	if (error != 0 && props != NULL && !drc.drc_newfs) {
4352 		if (clear_received_props(tofs, props, NULL) != 0) {
4353 			/*
4354 			 * We failed to clear the received properties.
4355 			 * Since we may have left a $recvd value on the
4356 			 * system, we can't clear the $hasrecvd flag.
4357 			 */
4358 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4359 		} else if (first_recvd_props) {
4360 			dsl_prop_unset_hasrecvd(tofs);
4361 		}
4362 
4363 		if (origprops == NULL && !drc.drc_newfs) {
4364 			/* We failed to stash the original properties. */
4365 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4366 		}
4367 
4368 		/*
4369 		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
4370 		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4371 		 * explictly if we're restoring local properties cleared in the
4372 		 * first new-style receive.
4373 		 */
4374 		if (origprops != NULL &&
4375 		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4376 		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4377 		    origprops, NULL) != 0) {
4378 			/*
4379 			 * We stashed the original properties but failed to
4380 			 * restore them.
4381 			 */
4382 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
4383 		}
4384 	}
4385 out:
4386 	nvlist_free(props);
4387 	nvlist_free(origprops);
4388 	nvlist_free(errors);
4389 	releasef(fd);
4390 
4391 	if (error == 0)
4392 		error = props_error;
4393 
4394 	return (error);
4395 }
4396 
4397 /*
4398  * inputs:
4399  * zc_name	name of snapshot to send
4400  * zc_cookie	file descriptor to send stream to
4401  * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
4402  * zc_sendobj	objsetid of snapshot to send
4403  * zc_fromobj	objsetid of incremental fromsnap (may be zero)
4404  * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
4405  *		output size in zc_objset_type.
4406  * zc_flags	lzc_send_flags
4407  *
4408  * outputs:
4409  * zc_objset_type	estimated size, if zc_guid is set
4410  */
4411 static int
4412 zfs_ioc_send(zfs_cmd_t *zc)
4413 {
4414 	int error;
4415 	offset_t off;
4416 	boolean_t estimate = (zc->zc_guid != 0);
4417 	boolean_t embedok = (zc->zc_flags & 0x1);
4418 	boolean_t large_block_ok = (zc->zc_flags & 0x2);
4419 
4420 	if (zc->zc_obj != 0) {
4421 		dsl_pool_t *dp;
4422 		dsl_dataset_t *tosnap;
4423 
4424 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4425 		if (error != 0)
4426 			return (error);
4427 
4428 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4429 		if (error != 0) {
4430 			dsl_pool_rele(dp, FTAG);
4431 			return (error);
4432 		}
4433 
4434 		if (dsl_dir_is_clone(tosnap->ds_dir))
4435 			zc->zc_fromobj =
4436 			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
4437 		dsl_dataset_rele(tosnap, FTAG);
4438 		dsl_pool_rele(dp, FTAG);
4439 	}
4440 
4441 	if (estimate) {
4442 		dsl_pool_t *dp;
4443 		dsl_dataset_t *tosnap;
4444 		dsl_dataset_t *fromsnap = NULL;
4445 
4446 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4447 		if (error != 0)
4448 			return (error);
4449 
4450 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4451 		if (error != 0) {
4452 			dsl_pool_rele(dp, FTAG);
4453 			return (error);
4454 		}
4455 
4456 		if (zc->zc_fromobj != 0) {
4457 			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
4458 			    FTAG, &fromsnap);
4459 			if (error != 0) {
4460 				dsl_dataset_rele(tosnap, FTAG);
4461 				dsl_pool_rele(dp, FTAG);
4462 				return (error);
4463 			}
4464 		}
4465 
4466 		error = dmu_send_estimate(tosnap, fromsnap,
4467 		    &zc->zc_objset_type);
4468 
4469 		if (fromsnap != NULL)
4470 			dsl_dataset_rele(fromsnap, FTAG);
4471 		dsl_dataset_rele(tosnap, FTAG);
4472 		dsl_pool_rele(dp, FTAG);
4473 	} else {
4474 		file_t *fp = getf(zc->zc_cookie);
4475 		if (fp == NULL)
4476 			return (SET_ERROR(EBADF));
4477 
4478 		off = fp->f_offset;
4479 		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
4480 		    zc->zc_fromobj, embedok, large_block_ok,
4481 		    zc->zc_cookie, fp->f_vnode, &off);
4482 
4483 		if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4484 			fp->f_offset = off;
4485 		releasef(zc->zc_cookie);
4486 	}
4487 	return (error);
4488 }
4489 
4490 /*
4491  * inputs:
4492  * zc_name	name of snapshot on which to report progress
4493  * zc_cookie	file descriptor of send stream
4494  *
4495  * outputs:
4496  * zc_cookie	number of bytes written in send stream thus far
4497  */
4498 static int
4499 zfs_ioc_send_progress(zfs_cmd_t *zc)
4500 {
4501 	dsl_pool_t *dp;
4502 	dsl_dataset_t *ds;
4503 	dmu_sendarg_t *dsp = NULL;
4504 	int error;
4505 
4506 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4507 	if (error != 0)
4508 		return (error);
4509 
4510 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
4511 	if (error != 0) {
4512 		dsl_pool_rele(dp, FTAG);
4513 		return (error);
4514 	}
4515 
4516 	mutex_enter(&ds->ds_sendstream_lock);
4517 
4518 	/*
4519 	 * Iterate over all the send streams currently active on this dataset.
4520 	 * If there's one which matches the specified file descriptor _and_ the
4521 	 * stream was started by the current process, return the progress of
4522 	 * that stream.
4523 	 */
4524 	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
4525 	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
4526 		if (dsp->dsa_outfd == zc->zc_cookie &&
4527 		    dsp->dsa_proc == curproc)
4528 			break;
4529 	}
4530 
4531 	if (dsp != NULL)
4532 		zc->zc_cookie = *(dsp->dsa_off);
4533 	else
4534 		error = SET_ERROR(ENOENT);
4535 
4536 	mutex_exit(&ds->ds_sendstream_lock);
4537 	dsl_dataset_rele(ds, FTAG);
4538 	dsl_pool_rele(dp, FTAG);
4539 	return (error);
4540 }
4541 
4542 static int
4543 zfs_ioc_inject_fault(zfs_cmd_t *zc)
4544 {
4545 	int id, error;
4546 
4547 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
4548 	    &zc->zc_inject_record);
4549 
4550 	if (error == 0)
4551 		zc->zc_guid = (uint64_t)id;
4552 
4553 	return (error);
4554 }
4555 
4556 static int
4557 zfs_ioc_clear_fault(zfs_cmd_t *zc)
4558 {
4559 	return (zio_clear_fault((int)zc->zc_guid));
4560 }
4561 
4562 static int
4563 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
4564 {
4565 	int id = (int)zc->zc_guid;
4566 	int error;
4567 
4568 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
4569 	    &zc->zc_inject_record);
4570 
4571 	zc->zc_guid = id;
4572 
4573 	return (error);
4574 }
4575 
4576 static int
4577 zfs_ioc_error_log(zfs_cmd_t *zc)
4578 {
4579 	spa_t *spa;
4580 	int error;
4581 	size_t count = (size_t)zc->zc_nvlist_dst_size;
4582 
4583 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4584 		return (error);
4585 
4586 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
4587 	    &count);
4588 	if (error == 0)
4589 		zc->zc_nvlist_dst_size = count;
4590 	else
4591 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
4592 
4593 	spa_close(spa, FTAG);
4594 
4595 	return (error);
4596 }
4597 
4598 static int
4599 zfs_ioc_clear(zfs_cmd_t *zc)
4600 {
4601 	spa_t *spa;
4602 	vdev_t *vd;
4603 	int error;
4604 
4605 	/*
4606 	 * On zpool clear we also fix up missing slogs
4607 	 */
4608 	mutex_enter(&spa_namespace_lock);
4609 	spa = spa_lookup(zc->zc_name);
4610 	if (spa == NULL) {
4611 		mutex_exit(&spa_namespace_lock);
4612 		return (SET_ERROR(EIO));
4613 	}
4614 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4615 		/* we need to let spa_open/spa_load clear the chains */
4616 		spa_set_log_state(spa, SPA_LOG_CLEAR);
4617 	}
4618 	spa->spa_last_open_failed = 0;
4619 	mutex_exit(&spa_namespace_lock);
4620 
4621 	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4622 		error = spa_open(zc->zc_name, &spa, FTAG);
4623 	} else {
4624 		nvlist_t *policy;
4625 		nvlist_t *config = NULL;
4626 
4627 		if (zc->zc_nvlist_src == NULL)
4628 			return (SET_ERROR(EINVAL));
4629 
4630 		if ((error = get_nvlist(zc->zc_nvlist_src,
4631 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4632 			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4633 			    policy, &config);
4634 			if (config != NULL) {
4635 				int err;
4636 
4637 				if ((err = put_nvlist(zc, config)) != 0)
4638 					error = err;
4639 				nvlist_free(config);
4640 			}
4641 			nvlist_free(policy);
4642 		}
4643 	}
4644 
4645 	if (error != 0)
4646 		return (error);
4647 
4648 	spa_vdev_state_enter(spa, SCL_NONE);
4649 
4650 	if (zc->zc_guid == 0) {
4651 		vd = NULL;
4652 	} else {
4653 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4654 		if (vd == NULL) {
4655 			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
4656 			spa_close(spa, FTAG);
4657 			return (SET_ERROR(ENODEV));
4658 		}
4659 	}
4660 
4661 	vdev_clear(spa, vd);
4662 
4663 	(void) spa_vdev_state_exit(spa, NULL, 0);
4664 
4665 	/*
4666 	 * Resume any suspended I/Os.
4667 	 */
4668 	if (zio_resume(spa) != 0)
4669 		error = SET_ERROR(EIO);
4670 
4671 	spa_close(spa, FTAG);
4672 
4673 	return (error);
4674 }
4675 
4676 static int
4677 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
4678 {
4679 	spa_t *spa;
4680 	int error;
4681 
4682 	error = spa_open(zc->zc_name, &spa, FTAG);
4683 	if (error != 0)
4684 		return (error);
4685 
4686 	spa_vdev_state_enter(spa, SCL_NONE);
4687 
4688 	/*
4689 	 * If a resilver is already in progress then set the
4690 	 * spa_scrub_reopen flag to B_TRUE so that we don't restart
4691 	 * the scan as a side effect of the reopen. Otherwise, let
4692 	 * vdev_open() decided if a resilver is required.
4693 	 */
4694 	spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
4695 	vdev_reopen(spa->spa_root_vdev);
4696 	spa->spa_scrub_reopen = B_FALSE;
4697 
4698 	(void) spa_vdev_state_exit(spa, NULL, 0);
4699 	spa_close(spa, FTAG);
4700 	return (0);
4701 }
4702 /*
4703  * inputs:
4704  * zc_name	name of filesystem
4705  * zc_value	name of origin snapshot
4706  *
4707  * outputs:
4708  * zc_string	name of conflicting snapshot, if there is one
4709  */
4710 static int
4711 zfs_ioc_promote(zfs_cmd_t *zc)
4712 {
4713 	char *cp;
4714 
4715 	/*
4716 	 * We don't need to unmount *all* the origin fs's snapshots, but
4717 	 * it's easier.
4718 	 */
4719 	cp = strchr(zc->zc_value, '@');
4720 	if (cp)
4721 		*cp = '\0';
4722 	(void) dmu_objset_find(zc->zc_value,
4723 	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
4724 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
4725 }
4726 
4727 /*
4728  * Retrieve a single {user|group}{used|quota}@... property.
4729  *
4730  * inputs:
4731  * zc_name	name of filesystem
4732  * zc_objset_type zfs_userquota_prop_t
4733  * zc_value	domain name (eg. "S-1-234-567-89")
4734  * zc_guid	RID/UID/GID
4735  *
4736  * outputs:
4737  * zc_cookie	property value
4738  */
4739 static int
4740 zfs_ioc_userspace_one(zfs_cmd_t *zc)
4741 {
4742 	zfsvfs_t *zfsvfs;
4743 	int error;
4744 
4745 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
4746 		return (SET_ERROR(EINVAL));
4747 
4748 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4749 	if (error != 0)
4750 		return (error);
4751 
4752 	error = zfs_userspace_one(zfsvfs,
4753 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
4754 	zfsvfs_rele(zfsvfs, FTAG);
4755 
4756 	return (error);
4757 }
4758 
4759 /*
4760  * inputs:
4761  * zc_name		name of filesystem
4762  * zc_cookie		zap cursor
4763  * zc_objset_type	zfs_userquota_prop_t
4764  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
4765  *
4766  * outputs:
4767  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
4768  * zc_cookie	zap cursor
4769  */
4770 static int
4771 zfs_ioc_userspace_many(zfs_cmd_t *zc)
4772 {
4773 	zfsvfs_t *zfsvfs;
4774 	int bufsize = zc->zc_nvlist_dst_size;
4775 
4776 	if (bufsize <= 0)
4777 		return (SET_ERROR(ENOMEM));
4778 
4779 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4780 	if (error != 0)
4781 		return (error);
4782 
4783 	void *buf = kmem_alloc(bufsize, KM_SLEEP);
4784 
4785 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
4786 	    buf, &zc->zc_nvlist_dst_size);
4787 
4788 	if (error == 0) {
4789 		error = xcopyout(buf,
4790 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
4791 		    zc->zc_nvlist_dst_size);
4792 	}
4793 	kmem_free(buf, bufsize);
4794 	zfsvfs_rele(zfsvfs, FTAG);
4795 
4796 	return (error);
4797 }
4798 
4799 /*
4800  * inputs:
4801  * zc_name		name of filesystem
4802  *
4803  * outputs:
4804  * none
4805  */
4806 static int
4807 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
4808 {
4809 	objset_t *os;
4810 	int error = 0;
4811 	zfsvfs_t *zfsvfs;
4812 
4813 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
4814 		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
4815 			/*
4816 			 * If userused is not enabled, it may be because the
4817 			 * objset needs to be closed & reopened (to grow the
4818 			 * objset_phys_t).  Suspend/resume the fs will do that.
4819 			 */
4820 			error = zfs_suspend_fs(zfsvfs);
4821 			if (error == 0) {
4822 				dmu_objset_refresh_ownership(zfsvfs->z_os,
4823 				    zfsvfs);
4824 				error = zfs_resume_fs(zfsvfs, zc->zc_name);
4825 			}
4826 		}
4827 		if (error == 0)
4828 			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
4829 		VFS_RELE(zfsvfs->z_vfs);
4830 	} else {
4831 		/* XXX kind of reading contents without owning */
4832 		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4833 		if (error != 0)
4834 			return (error);
4835 
4836 		error = dmu_objset_userspace_upgrade(os);
4837 		dmu_objset_rele(os, FTAG);
4838 	}
4839 
4840 	return (error);
4841 }
4842 
4843 /*
4844  * We don't want to have a hard dependency
4845  * against some special symbols in sharefs
4846  * nfs, and smbsrv.  Determine them if needed when
4847  * the first file system is shared.
4848  * Neither sharefs, nfs or smbsrv are unloadable modules.
4849  */
4850 int (*znfsexport_fs)(void *arg);
4851 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
4852 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
4853 
4854 int zfs_nfsshare_inited;
4855 int zfs_smbshare_inited;
4856 
4857 ddi_modhandle_t nfs_mod;
4858 ddi_modhandle_t sharefs_mod;
4859 ddi_modhandle_t smbsrv_mod;
4860 kmutex_t zfs_share_lock;
4861 
4862 static int
4863 zfs_init_sharefs()
4864 {
4865 	int error;
4866 
4867 	ASSERT(MUTEX_HELD(&zfs_share_lock));
4868 	/* Both NFS and SMB shares also require sharetab support. */
4869 	if (sharefs_mod == NULL && ((sharefs_mod =
4870 	    ddi_modopen("fs/sharefs",
4871 	    KRTLD_MODE_FIRST, &error)) == NULL)) {
4872 		return (SET_ERROR(ENOSYS));
4873 	}
4874 	if (zshare_fs == NULL && ((zshare_fs =
4875 	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
4876 	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
4877 		return (SET_ERROR(ENOSYS));
4878 	}
4879 	return (0);
4880 }
4881 
4882 static int
4883 zfs_ioc_share(zfs_cmd_t *zc)
4884 {
4885 	int error;
4886 	int opcode;
4887 
4888 	switch (zc->zc_share.z_sharetype) {
4889 	case ZFS_SHARE_NFS:
4890 	case ZFS_UNSHARE_NFS:
4891 		if (zfs_nfsshare_inited == 0) {
4892 			mutex_enter(&zfs_share_lock);
4893 			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
4894 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
4895 				mutex_exit(&zfs_share_lock);
4896 				return (SET_ERROR(ENOSYS));
4897 			}
4898 			if (znfsexport_fs == NULL &&
4899 			    ((znfsexport_fs = (int (*)(void *))
4900 			    ddi_modsym(nfs_mod,
4901 			    "nfs_export", &error)) == NULL)) {
4902 				mutex_exit(&zfs_share_lock);
4903 				return (SET_ERROR(ENOSYS));
4904 			}
4905 			error = zfs_init_sharefs();
4906 			if (error != 0) {
4907 				mutex_exit(&zfs_share_lock);
4908 				return (SET_ERROR(ENOSYS));
4909 			}
4910 			zfs_nfsshare_inited = 1;
4911 			mutex_exit(&zfs_share_lock);
4912 		}
4913 		break;
4914 	case ZFS_SHARE_SMB:
4915 	case ZFS_UNSHARE_SMB:
4916 		if (zfs_smbshare_inited == 0) {
4917 			mutex_enter(&zfs_share_lock);
4918 			if (smbsrv_mod == NULL && ((smbsrv_mod =
4919 			    ddi_modopen("drv/smbsrv",
4920 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
4921 				mutex_exit(&zfs_share_lock);
4922 				return (SET_ERROR(ENOSYS));
4923 			}
4924 			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
4925 			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
4926 			    "smb_server_share", &error)) == NULL)) {
4927 				mutex_exit(&zfs_share_lock);
4928 				return (SET_ERROR(ENOSYS));
4929 			}
4930 			error = zfs_init_sharefs();
4931 			if (error != 0) {
4932 				mutex_exit(&zfs_share_lock);
4933 				return (SET_ERROR(ENOSYS));
4934 			}
4935 			zfs_smbshare_inited = 1;
4936 			mutex_exit(&zfs_share_lock);
4937 		}
4938 		break;
4939 	default:
4940 		return (SET_ERROR(EINVAL));
4941 	}
4942 
4943 	switch (zc->zc_share.z_sharetype) {
4944 	case ZFS_SHARE_NFS:
4945 	case ZFS_UNSHARE_NFS:
4946 		if (error =
4947 		    znfsexport_fs((void *)
4948 		    (uintptr_t)zc->zc_share.z_exportdata))
4949 			return (error);
4950 		break;
4951 	case ZFS_SHARE_SMB:
4952 	case ZFS_UNSHARE_SMB:
4953 		if (error = zsmbexport_fs((void *)
4954 		    (uintptr_t)zc->zc_share.z_exportdata,
4955 		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
4956 		    B_TRUE: B_FALSE)) {
4957 			return (error);
4958 		}
4959 		break;
4960 	}
4961 
4962 	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
4963 	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
4964 	    SHAREFS_ADD : SHAREFS_REMOVE;
4965 
4966 	/*
4967 	 * Add or remove share from sharetab
4968 	 */
4969 	error = zshare_fs(opcode,
4970 	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
4971 	    zc->zc_share.z_sharemax);
4972 
4973 	return (error);
4974 
4975 }
4976 
4977 ace_t full_access[] = {
4978 	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
4979 };
4980 
4981 /*
4982  * inputs:
4983  * zc_name		name of containing filesystem
4984  * zc_obj		object # beyond which we want next in-use object #
4985  *
4986  * outputs:
4987  * zc_obj		next in-use object #
4988  */
4989 static int
4990 zfs_ioc_next_obj(zfs_cmd_t *zc)
4991 {
4992 	objset_t *os = NULL;
4993 	int error;
4994 
4995 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4996 	if (error != 0)
4997 		return (error);
4998 
4999 	error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
5000 	    dsl_dataset_phys(os->os_dsl_dataset)->ds_prev_snap_txg);
5001 
5002 	dmu_objset_rele(os, FTAG);
5003 	return (error);
5004 }
5005 
5006 /*
5007  * inputs:
5008  * zc_name		name of filesystem
5009  * zc_value		prefix name for snapshot
5010  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
5011  *
5012  * outputs:
5013  * zc_value		short name of new snapshot
5014  */
5015 static int
5016 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
5017 {
5018 	char *snap_name;
5019 	char *hold_name;
5020 	int error;
5021 	minor_t minor;
5022 
5023 	error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
5024 	if (error != 0)
5025 		return (error);
5026 
5027 	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
5028 	    (u_longlong_t)ddi_get_lbolt64());
5029 	hold_name = kmem_asprintf("%%%s", zc->zc_value);
5030 
5031 	error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
5032 	    hold_name);
5033 	if (error == 0)
5034 		(void) strcpy(zc->zc_value, snap_name);
5035 	strfree(snap_name);
5036 	strfree(hold_name);
5037 	zfs_onexit_fd_rele(zc->zc_cleanup_fd);
5038 	return (error);
5039 }
5040 
5041 /*
5042  * inputs:
5043  * zc_name		name of "to" snapshot
5044  * zc_value		name of "from" snapshot
5045  * zc_cookie		file descriptor to write diff data on
5046  *
5047  * outputs:
5048  * dmu_diff_record_t's to the file descriptor
5049  */
5050 static int
5051 zfs_ioc_diff(zfs_cmd_t *zc)
5052 {
5053 	file_t *fp;
5054 	offset_t off;
5055 	int error;
5056 
5057 	fp = getf(zc->zc_cookie);
5058 	if (fp == NULL)
5059 		return (SET_ERROR(EBADF));
5060 
5061 	off = fp->f_offset;
5062 
5063 	error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
5064 
5065 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5066 		fp->f_offset = off;
5067 	releasef(zc->zc_cookie);
5068 
5069 	return (error);
5070 }
5071 
5072 /*
5073  * Remove all ACL files in shares dir
5074  */
5075 static int
5076 zfs_smb_acl_purge(znode_t *dzp)
5077 {
5078 	zap_cursor_t	zc;
5079 	zap_attribute_t	zap;
5080 	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
5081 	int error;
5082 
5083 	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
5084 	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
5085 	    zap_cursor_advance(&zc)) {
5086 		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
5087 		    NULL, 0)) != 0)
5088 			break;
5089 	}
5090 	zap_cursor_fini(&zc);
5091 	return (error);
5092 }
5093 
5094 static int
5095 zfs_ioc_smb_acl(zfs_cmd_t *zc)
5096 {
5097 	vnode_t *vp;
5098 	znode_t *dzp;
5099 	vnode_t *resourcevp = NULL;
5100 	znode_t *sharedir;
5101 	zfsvfs_t *zfsvfs;
5102 	nvlist_t *nvlist;
5103 	char *src, *target;
5104 	vattr_t vattr;
5105 	vsecattr_t vsec;
5106 	int error = 0;
5107 
5108 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
5109 	    NO_FOLLOW, NULL, &vp)) != 0)
5110 		return (error);
5111 
5112 	/* Now make sure mntpnt and dataset are ZFS */
5113 
5114 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
5115 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
5116 	    zc->zc_name) != 0)) {
5117 		VN_RELE(vp);
5118 		return (SET_ERROR(EINVAL));
5119 	}
5120 
5121 	dzp = VTOZ(vp);
5122 	zfsvfs = dzp->z_zfsvfs;
5123 	ZFS_ENTER(zfsvfs);
5124 
5125 	/*
5126 	 * Create share dir if its missing.
5127 	 */
5128 	mutex_enter(&zfsvfs->z_lock);
5129 	if (zfsvfs->z_shares_dir == 0) {
5130 		dmu_tx_t *tx;
5131 
5132 		tx = dmu_tx_create(zfsvfs->z_os);
5133 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
5134 		    ZFS_SHARES_DIR);
5135 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
5136 		error = dmu_tx_assign(tx, TXG_WAIT);
5137 		if (error != 0) {
5138 			dmu_tx_abort(tx);
5139 		} else {
5140 			error = zfs_create_share_dir(zfsvfs, tx);
5141 			dmu_tx_commit(tx);
5142 		}
5143 		if (error != 0) {
5144 			mutex_exit(&zfsvfs->z_lock);
5145 			VN_RELE(vp);
5146 			ZFS_EXIT(zfsvfs);
5147 			return (error);
5148 		}
5149 	}
5150 	mutex_exit(&zfsvfs->z_lock);
5151 
5152 	ASSERT(zfsvfs->z_shares_dir);
5153 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
5154 		VN_RELE(vp);
5155 		ZFS_EXIT(zfsvfs);
5156 		return (error);
5157 	}
5158 
5159 	switch (zc->zc_cookie) {
5160 	case ZFS_SMB_ACL_ADD:
5161 		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
5162 		vattr.va_type = VREG;
5163 		vattr.va_mode = S_IFREG|0777;
5164 		vattr.va_uid = 0;
5165 		vattr.va_gid = 0;
5166 
5167 		vsec.vsa_mask = VSA_ACE;
5168 		vsec.vsa_aclentp = &full_access;
5169 		vsec.vsa_aclentsz = sizeof (full_access);
5170 		vsec.vsa_aclcnt = 1;
5171 
5172 		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
5173 		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
5174 		if (resourcevp)
5175 			VN_RELE(resourcevp);
5176 		break;
5177 
5178 	case ZFS_SMB_ACL_REMOVE:
5179 		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
5180 		    NULL, 0);
5181 		break;
5182 
5183 	case ZFS_SMB_ACL_RENAME:
5184 		if ((error = get_nvlist(zc->zc_nvlist_src,
5185 		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
5186 			VN_RELE(vp);
5187 			VN_RELE(ZTOV(sharedir));
5188 			ZFS_EXIT(zfsvfs);
5189 			return (error);
5190 		}
5191 		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
5192 		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
5193 		    &target)) {
5194 			VN_RELE(vp);
5195 			VN_RELE(ZTOV(sharedir));
5196 			ZFS_EXIT(zfsvfs);
5197 			nvlist_free(nvlist);
5198 			return (error);
5199 		}
5200 		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
5201 		    kcred, NULL, 0);
5202 		nvlist_free(nvlist);
5203 		break;
5204 
5205 	case ZFS_SMB_ACL_PURGE:
5206 		error = zfs_smb_acl_purge(sharedir);
5207 		break;
5208 
5209 	default:
5210 		error = SET_ERROR(EINVAL);
5211 		break;
5212 	}
5213 
5214 	VN_RELE(vp);
5215 	VN_RELE(ZTOV(sharedir));
5216 
5217 	ZFS_EXIT(zfsvfs);
5218 
5219 	return (error);
5220 }
5221 
5222 /*
5223  * innvl: {
5224  *     "holds" -> { snapname -> holdname (string), ... }
5225  *     (optional) "cleanup_fd" -> fd (int32)
5226  * }
5227  *
5228  * outnvl: {
5229  *     snapname -> error value (int32)
5230  *     ...
5231  * }
5232  */
5233 /* ARGSUSED */
5234 static int
5235 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5236 {
5237 	nvpair_t *pair;
5238 	nvlist_t *holds;
5239 	int cleanup_fd = -1;
5240 	int error;
5241 	minor_t minor = 0;
5242 
5243 	error = nvlist_lookup_nvlist(args, "holds", &holds);
5244 	if (error != 0)
5245 		return (SET_ERROR(EINVAL));
5246 
5247 	/* make sure the user didn't pass us any invalid (empty) tags */
5248 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
5249 	    pair = nvlist_next_nvpair(holds, pair)) {
5250 		char *htag;
5251 
5252 		error = nvpair_value_string(pair, &htag);
5253 		if (error != 0)
5254 			return (SET_ERROR(error));
5255 
5256 		if (strlen(htag) == 0)
5257 			return (SET_ERROR(EINVAL));
5258 	}
5259 
5260 	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
5261 		error = zfs_onexit_fd_hold(cleanup_fd, &minor);
5262 		if (error != 0)
5263 			return (error);
5264 	}
5265 
5266 	error = dsl_dataset_user_hold(holds, minor, errlist);
5267 	if (minor != 0)
5268 		zfs_onexit_fd_rele(cleanup_fd);
5269 	return (error);
5270 }
5271 
5272 /*
5273  * innvl is not used.
5274  *
5275  * outnvl: {
5276  *    holdname -> time added (uint64 seconds since epoch)
5277  *    ...
5278  * }
5279  */
5280 /* ARGSUSED */
5281 static int
5282 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
5283 {
5284 	return (dsl_dataset_get_holds(snapname, outnvl));
5285 }
5286 
5287 /*
5288  * innvl: {
5289  *     snapname -> { holdname, ... }
5290  *     ...
5291  * }
5292  *
5293  * outnvl: {
5294  *     snapname -> error value (int32)
5295  *     ...
5296  * }
5297  */
5298 /* ARGSUSED */
5299 static int
5300 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
5301 {
5302 	return (dsl_dataset_user_release(holds, errlist));
5303 }
5304 
5305 /*
5306  * inputs:
5307  * zc_name		name of new filesystem or snapshot
5308  * zc_value		full name of old snapshot
5309  *
5310  * outputs:
5311  * zc_cookie		space in bytes
5312  * zc_objset_type	compressed space in bytes
5313  * zc_perm_action	uncompressed space in bytes
5314  */
5315 static int
5316 zfs_ioc_space_written(zfs_cmd_t *zc)
5317 {
5318 	int error;
5319 	dsl_pool_t *dp;
5320 	dsl_dataset_t *new, *old;
5321 
5322 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5323 	if (error != 0)
5324 		return (error);
5325 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
5326 	if (error != 0) {
5327 		dsl_pool_rele(dp, FTAG);
5328 		return (error);
5329 	}
5330 	error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
5331 	if (error != 0) {
5332 		dsl_dataset_rele(new, FTAG);
5333 		dsl_pool_rele(dp, FTAG);
5334 		return (error);
5335 	}
5336 
5337 	error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
5338 	    &zc->zc_objset_type, &zc->zc_perm_action);
5339 	dsl_dataset_rele(old, FTAG);
5340 	dsl_dataset_rele(new, FTAG);
5341 	dsl_pool_rele(dp, FTAG);
5342 	return (error);
5343 }
5344 
5345 /*
5346  * innvl: {
5347  *     "firstsnap" -> snapshot name
5348  * }
5349  *
5350  * outnvl: {
5351  *     "used" -> space in bytes
5352  *     "compressed" -> compressed space in bytes
5353  *     "uncompressed" -> uncompressed space in bytes
5354  * }
5355  */
5356 static int
5357 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
5358 {
5359 	int error;
5360 	dsl_pool_t *dp;
5361 	dsl_dataset_t *new, *old;
5362 	char *firstsnap;
5363 	uint64_t used, comp, uncomp;
5364 
5365 	if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
5366 		return (SET_ERROR(EINVAL));
5367 
5368 	error = dsl_pool_hold(lastsnap, FTAG, &dp);
5369 	if (error != 0)
5370 		return (error);
5371 
5372 	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
5373 	if (error == 0 && !new->ds_is_snapshot) {
5374 		dsl_dataset_rele(new, FTAG);
5375 		error = SET_ERROR(EINVAL);
5376 	}
5377 	if (error != 0) {
5378 		dsl_pool_rele(dp, FTAG);
5379 		return (error);
5380 	}
5381 	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
5382 	if (error == 0 && !old->ds_is_snapshot) {
5383 		dsl_dataset_rele(old, FTAG);
5384 		error = SET_ERROR(EINVAL);
5385 	}
5386 	if (error != 0) {
5387 		dsl_dataset_rele(new, FTAG);
5388 		dsl_pool_rele(dp, FTAG);
5389 		return (error);
5390 	}
5391 
5392 	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
5393 	dsl_dataset_rele(old, FTAG);
5394 	dsl_dataset_rele(new, FTAG);
5395 	dsl_pool_rele(dp, FTAG);
5396 	fnvlist_add_uint64(outnvl, "used", used);
5397 	fnvlist_add_uint64(outnvl, "compressed", comp);
5398 	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
5399 	return (error);
5400 }
5401 
5402 /*
5403  * innvl: {
5404  *     "fd" -> file descriptor to write stream to (int32)
5405  *     (optional) "fromsnap" -> full snap name to send an incremental from
5406  *     (optional) "largeblockok" -> (value ignored)
5407  *         indicates that blocks > 128KB are permitted
5408  *     (optional) "embedok" -> (value ignored)
5409  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
5410  *     (optional) "resume_object" and "resume_offset" -> (uint64)
5411  *         if present, resume send stream from specified object and offset.
5412  * }
5413  *
5414  * outnvl is unused
5415  */
5416 /* ARGSUSED */
5417 static int
5418 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5419 {
5420 	int error;
5421 	offset_t off;
5422 	char *fromname = NULL;
5423 	int fd;
5424 	boolean_t largeblockok;
5425 	boolean_t embedok;
5426 	uint64_t resumeobj = 0;
5427 	uint64_t resumeoff = 0;
5428 
5429 	error = nvlist_lookup_int32(innvl, "fd", &fd);
5430 	if (error != 0)
5431 		return (SET_ERROR(EINVAL));
5432 
5433 	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
5434 
5435 	largeblockok = nvlist_exists(innvl, "largeblockok");
5436 	embedok = nvlist_exists(innvl, "embedok");
5437 
5438 	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
5439 	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
5440 
5441 	file_t *fp = getf(fd);
5442 	if (fp == NULL)
5443 		return (SET_ERROR(EBADF));
5444 
5445 	off = fp->f_offset;
5446 	error = dmu_send(snapname, fromname, embedok, largeblockok, fd,
5447 	    resumeobj, resumeoff, fp->f_vnode, &off);
5448 
5449 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5450 		fp->f_offset = off;
5451 	releasef(fd);
5452 	return (error);
5453 }
5454 
5455 /*
5456  * Determine approximately how large a zfs send stream will be -- the number
5457  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
5458  *
5459  * innvl: {
5460  *     (optional) "from" -> full snap or bookmark name to send an incremental
5461  *                          from
5462  * }
5463  *
5464  * outnvl: {
5465  *     "space" -> bytes of space (uint64)
5466  * }
5467  */
5468 static int
5469 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5470 {
5471 	dsl_pool_t *dp;
5472 	dsl_dataset_t *tosnap;
5473 	int error;
5474 	char *fromname;
5475 	uint64_t space;
5476 
5477 	error = dsl_pool_hold(snapname, FTAG, &dp);
5478 	if (error != 0)
5479 		return (error);
5480 
5481 	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
5482 	if (error != 0) {
5483 		dsl_pool_rele(dp, FTAG);
5484 		return (error);
5485 	}
5486 
5487 	error = nvlist_lookup_string(innvl, "from", &fromname);
5488 	if (error == 0) {
5489 		if (strchr(fromname, '@') != NULL) {
5490 			/*
5491 			 * If from is a snapshot, hold it and use the more
5492 			 * efficient dmu_send_estimate to estimate send space
5493 			 * size using deadlists.
5494 			 */
5495 			dsl_dataset_t *fromsnap;
5496 			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
5497 			if (error != 0)
5498 				goto out;
5499 			error = dmu_send_estimate(tosnap, fromsnap, &space);
5500 			dsl_dataset_rele(fromsnap, FTAG);
5501 		} else if (strchr(fromname, '#') != NULL) {
5502 			/*
5503 			 * If from is a bookmark, fetch the creation TXG of the
5504 			 * snapshot it was created from and use that to find
5505 			 * blocks that were born after it.
5506 			 */
5507 			zfs_bookmark_phys_t frombm;
5508 
5509 			error = dsl_bookmark_lookup(dp, fromname, tosnap,
5510 			    &frombm);
5511 			if (error != 0)
5512 				goto out;
5513 			error = dmu_send_estimate_from_txg(tosnap,
5514 			    frombm.zbm_creation_txg, &space);
5515 		} else {
5516 			/*
5517 			 * from is not properly formatted as a snapshot or
5518 			 * bookmark
5519 			 */
5520 			error = SET_ERROR(EINVAL);
5521 			goto out;
5522 		}
5523 	} else {
5524 		// If estimating the size of a full send, use dmu_send_estimate
5525 		error = dmu_send_estimate(tosnap, NULL, &space);
5526 	}
5527 
5528 	fnvlist_add_uint64(outnvl, "space", space);
5529 
5530 out:
5531 	dsl_dataset_rele(tosnap, FTAG);
5532 	dsl_pool_rele(dp, FTAG);
5533 	return (error);
5534 }
5535 
5536 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
5537 
5538 static void
5539 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5540     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5541     boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
5542 {
5543 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5544 
5545 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5546 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5547 	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5548 	ASSERT3P(vec->zvec_func, ==, NULL);
5549 
5550 	vec->zvec_legacy_func = func;
5551 	vec->zvec_secpolicy = secpolicy;
5552 	vec->zvec_namecheck = namecheck;
5553 	vec->zvec_allow_log = log_history;
5554 	vec->zvec_pool_check = pool_check;
5555 }
5556 
5557 /*
5558  * See the block comment at the beginning of this file for details on
5559  * each argument to this function.
5560  */
5561 static void
5562 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
5563     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5564     zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
5565     boolean_t allow_log)
5566 {
5567 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5568 
5569 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5570 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
5571 	ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5572 	ASSERT3P(vec->zvec_func, ==, NULL);
5573 
5574 	/* if we are logging, the name must be valid */
5575 	ASSERT(!allow_log || namecheck != NO_NAME);
5576 
5577 	vec->zvec_name = name;
5578 	vec->zvec_func = func;
5579 	vec->zvec_secpolicy = secpolicy;
5580 	vec->zvec_namecheck = namecheck;
5581 	vec->zvec_pool_check = pool_check;
5582 	vec->zvec_smush_outnvlist = smush_outnvlist;
5583 	vec->zvec_allow_log = allow_log;
5584 }
5585 
5586 static void
5587 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5588     zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
5589     zfs_ioc_poolcheck_t pool_check)
5590 {
5591 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5592 	    POOL_NAME, log_history, pool_check);
5593 }
5594 
5595 static void
5596 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5597     zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
5598 {
5599 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5600 	    DATASET_NAME, B_FALSE, pool_check);
5601 }
5602 
5603 static void
5604 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5605 {
5606 	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
5607 	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5608 }
5609 
5610 static void
5611 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5612     zfs_secpolicy_func_t *secpolicy)
5613 {
5614 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5615 	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
5616 }
5617 
5618 static void
5619 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
5620     zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
5621 {
5622 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5623 	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
5624 }
5625 
5626 static void
5627 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5628 {
5629 	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
5630 	    zfs_secpolicy_read);
5631 }
5632 
5633 static void
5634 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5635     zfs_secpolicy_func_t *secpolicy)
5636 {
5637 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
5638 	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5639 }
5640 
5641 static void
5642 zfs_ioctl_init(void)
5643 {
5644 	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
5645 	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
5646 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5647 
5648 	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
5649 	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
5650 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
5651 
5652 	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
5653 	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
5654 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5655 
5656 	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
5657 	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
5658 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5659 
5660 	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
5661 	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
5662 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5663 
5664 	zfs_ioctl_register("create", ZFS_IOC_CREATE,
5665 	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
5666 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5667 
5668 	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
5669 	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
5670 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5671 
5672 	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
5673 	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
5674 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5675 
5676 	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
5677 	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
5678 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5679 	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
5680 	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
5681 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5682 
5683 	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
5684 	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
5685 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5686 
5687 	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
5688 	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
5689 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
5690 
5691 	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
5692 	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
5693 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5694 
5695 	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
5696 	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
5697 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5698 
5699 	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
5700 	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
5701 	    POOL_NAME,
5702 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5703 
5704 	/* IOCTLS that use the legacy function signature */
5705 
5706 	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
5707 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
5708 
5709 	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
5710 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5711 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
5712 	    zfs_ioc_pool_scan);
5713 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
5714 	    zfs_ioc_pool_upgrade);
5715 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
5716 	    zfs_ioc_vdev_add);
5717 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
5718 	    zfs_ioc_vdev_remove);
5719 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
5720 	    zfs_ioc_vdev_set_state);
5721 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
5722 	    zfs_ioc_vdev_attach);
5723 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
5724 	    zfs_ioc_vdev_detach);
5725 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
5726 	    zfs_ioc_vdev_setpath);
5727 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
5728 	    zfs_ioc_vdev_setfru);
5729 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
5730 	    zfs_ioc_pool_set_props);
5731 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
5732 	    zfs_ioc_vdev_split);
5733 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
5734 	    zfs_ioc_pool_reguid);
5735 
5736 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
5737 	    zfs_ioc_pool_configs, zfs_secpolicy_none);
5738 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
5739 	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
5740 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
5741 	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
5742 	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
5743 	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
5744 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
5745 	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
5746 
5747 	/*
5748 	 * pool destroy, and export don't log the history as part of
5749 	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
5750 	 * does the logging of those commands.
5751 	 */
5752 	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
5753 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
5754 	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
5755 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
5756 
5757 	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
5758 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
5759 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
5760 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
5761 
5762 	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
5763 	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
5764 	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
5765 	    zfs_ioc_dsobj_to_dsname,
5766 	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
5767 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
5768 	    zfs_ioc_pool_get_history,
5769 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
5770 
5771 	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
5772 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5773 
5774 	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
5775 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5776 	zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
5777 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
5778 
5779 	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
5780 	    zfs_ioc_space_written);
5781 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
5782 	    zfs_ioc_objset_recvd_props);
5783 	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
5784 	    zfs_ioc_next_obj);
5785 	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
5786 	    zfs_ioc_get_fsacl);
5787 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
5788 	    zfs_ioc_objset_stats);
5789 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
5790 	    zfs_ioc_objset_zplprops);
5791 	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
5792 	    zfs_ioc_dataset_list_next);
5793 	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
5794 	    zfs_ioc_snapshot_list_next);
5795 	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
5796 	    zfs_ioc_send_progress);
5797 
5798 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
5799 	    zfs_ioc_diff, zfs_secpolicy_diff);
5800 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
5801 	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
5802 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
5803 	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
5804 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
5805 	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
5806 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
5807 	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
5808 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
5809 	    zfs_ioc_send, zfs_secpolicy_send);
5810 
5811 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
5812 	    zfs_secpolicy_none);
5813 	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
5814 	    zfs_secpolicy_destroy);
5815 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
5816 	    zfs_secpolicy_rename);
5817 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
5818 	    zfs_secpolicy_recv);
5819 	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
5820 	    zfs_secpolicy_promote);
5821 	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
5822 	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
5823 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
5824 	    zfs_secpolicy_set_fsacl);
5825 
5826 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
5827 	    zfs_secpolicy_share, POOL_CHECK_NONE);
5828 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
5829 	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
5830 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
5831 	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
5832 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5833 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
5834 	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
5835 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5836 }
5837 
5838 int
5839 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
5840     zfs_ioc_poolcheck_t check)
5841 {
5842 	spa_t *spa;
5843 	int error;
5844 
5845 	ASSERT(type == POOL_NAME || type == DATASET_NAME);
5846 
5847 	if (check & POOL_CHECK_NONE)
5848 		return (0);
5849 
5850 	error = spa_open(name, &spa, FTAG);
5851 	if (error == 0) {
5852 		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
5853 			error = SET_ERROR(EAGAIN);
5854 		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
5855 			error = SET_ERROR(EROFS);
5856 		spa_close(spa, FTAG);
5857 	}
5858 	return (error);
5859 }
5860 
5861 /*
5862  * Find a free minor number.
5863  */
5864 minor_t
5865 zfsdev_minor_alloc(void)
5866 {
5867 	static minor_t last_minor;
5868 	minor_t m;
5869 
5870 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5871 
5872 	for (m = last_minor + 1; m != last_minor; m++) {
5873 		if (m > ZFSDEV_MAX_MINOR)
5874 			m = 1;
5875 		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
5876 			last_minor = m;
5877 			return (m);
5878 		}
5879 	}
5880 
5881 	return (0);
5882 }
5883 
5884 static int
5885 zfs_ctldev_init(dev_t *devp)
5886 {
5887 	minor_t minor;
5888 	zfs_soft_state_t *zs;
5889 
5890 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5891 	ASSERT(getminor(*devp) == 0);
5892 
5893 	minor = zfsdev_minor_alloc();
5894 	if (minor == 0)
5895 		return (SET_ERROR(ENXIO));
5896 
5897 	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
5898 		return (SET_ERROR(EAGAIN));
5899 
5900 	*devp = makedevice(getemajor(*devp), minor);
5901 
5902 	zs = ddi_get_soft_state(zfsdev_state, minor);
5903 	zs->zss_type = ZSST_CTLDEV;
5904 	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
5905 
5906 	return (0);
5907 }
5908 
5909 static void
5910 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
5911 {
5912 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5913 
5914 	zfs_onexit_destroy(zo);
5915 	ddi_soft_state_free(zfsdev_state, minor);
5916 }
5917 
5918 void *
5919 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
5920 {
5921 	zfs_soft_state_t *zp;
5922 
5923 	zp = ddi_get_soft_state(zfsdev_state, minor);
5924 	if (zp == NULL || zp->zss_type != which)
5925 		return (NULL);
5926 
5927 	return (zp->zss_data);
5928 }
5929 
5930 static int
5931 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
5932 {
5933 	int error = 0;
5934 
5935 	if (getminor(*devp) != 0)
5936 		return (zvol_open(devp, flag, otyp, cr));
5937 
5938 	/* This is the control device. Allocate a new minor if requested. */
5939 	if (flag & FEXCL) {
5940 		mutex_enter(&zfsdev_state_lock);
5941 		error = zfs_ctldev_init(devp);
5942 		mutex_exit(&zfsdev_state_lock);
5943 	}
5944 
5945 	return (error);
5946 }
5947 
5948 static int
5949 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
5950 {
5951 	zfs_onexit_t *zo;
5952 	minor_t minor = getminor(dev);
5953 
5954 	if (minor == 0)
5955 		return (0);
5956 
5957 	mutex_enter(&zfsdev_state_lock);
5958 	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
5959 	if (zo == NULL) {
5960 		mutex_exit(&zfsdev_state_lock);
5961 		return (zvol_close(dev, flag, otyp, cr));
5962 	}
5963 	zfs_ctldev_destroy(zo, minor);
5964 	mutex_exit(&zfsdev_state_lock);
5965 
5966 	return (0);
5967 }
5968 
5969 static int
5970 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
5971 {
5972 	zfs_cmd_t *zc;
5973 	uint_t vecnum;
5974 	int error, rc, len;
5975 	minor_t minor = getminor(dev);
5976 	const zfs_ioc_vec_t *vec;
5977 	char *saved_poolname = NULL;
5978 	nvlist_t *innvl = NULL;
5979 
5980 	if (minor != 0 &&
5981 	    zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
5982 		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
5983 
5984 	vecnum = cmd - ZFS_IOC_FIRST;
5985 	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
5986 
5987 	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
5988 		return (SET_ERROR(EINVAL));
5989 	vec = &zfs_ioc_vec[vecnum];
5990 
5991 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
5992 
5993 	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
5994 	if (error != 0) {
5995 		error = SET_ERROR(EFAULT);
5996 		goto out;
5997 	}
5998 
5999 	zc->zc_iflags = flag & FKIOCTL;
6000 	if (zc->zc_nvlist_src_size != 0) {
6001 		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
6002 		    zc->zc_iflags, &innvl);
6003 		if (error != 0)
6004 			goto out;
6005 	}
6006 
6007 	/*
6008 	 * Ensure that all pool/dataset names are valid before we pass down to
6009 	 * the lower layers.
6010 	 */
6011 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6012 	switch (vec->zvec_namecheck) {
6013 	case POOL_NAME:
6014 		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
6015 			error = SET_ERROR(EINVAL);
6016 		else
6017 			error = pool_status_check(zc->zc_name,
6018 			    vec->zvec_namecheck, vec->zvec_pool_check);
6019 		break;
6020 
6021 	case DATASET_NAME:
6022 		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
6023 			error = SET_ERROR(EINVAL);
6024 		else
6025 			error = pool_status_check(zc->zc_name,
6026 			    vec->zvec_namecheck, vec->zvec_pool_check);
6027 		break;
6028 
6029 	case NO_NAME:
6030 		break;
6031 	}
6032 
6033 
6034 	if (error == 0)
6035 		error = vec->zvec_secpolicy(zc, innvl, cr);
6036 
6037 	if (error != 0)
6038 		goto out;
6039 
6040 	/* legacy ioctls can modify zc_name */
6041 	len = strcspn(zc->zc_name, "/@#") + 1;
6042 	saved_poolname = kmem_alloc(len, KM_SLEEP);
6043 	(void) strlcpy(saved_poolname, zc->zc_name, len);
6044 
6045 	if (vec->zvec_func != NULL) {
6046 		nvlist_t *outnvl;
6047 		int puterror = 0;
6048 		spa_t *spa;
6049 		nvlist_t *lognv = NULL;
6050 
6051 		ASSERT(vec->zvec_legacy_func == NULL);
6052 
6053 		/*
6054 		 * Add the innvl to the lognv before calling the func,
6055 		 * in case the func changes the innvl.
6056 		 */
6057 		if (vec->zvec_allow_log) {
6058 			lognv = fnvlist_alloc();
6059 			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
6060 			    vec->zvec_name);
6061 			if (!nvlist_empty(innvl)) {
6062 				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
6063 				    innvl);
6064 			}
6065 		}
6066 
6067 		outnvl = fnvlist_alloc();
6068 		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
6069 
6070 		if (error == 0 && vec->zvec_allow_log &&
6071 		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
6072 			if (!nvlist_empty(outnvl)) {
6073 				fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
6074 				    outnvl);
6075 			}
6076 			(void) spa_history_log_nvl(spa, lognv);
6077 			spa_close(spa, FTAG);
6078 		}
6079 		fnvlist_free(lognv);
6080 
6081 		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
6082 			int smusherror = 0;
6083 			if (vec->zvec_smush_outnvlist) {
6084 				smusherror = nvlist_smush(outnvl,
6085 				    zc->zc_nvlist_dst_size);
6086 			}
6087 			if (smusherror == 0)
6088 				puterror = put_nvlist(zc, outnvl);
6089 		}
6090 
6091 		if (puterror != 0)
6092 			error = puterror;
6093 
6094 		nvlist_free(outnvl);
6095 	} else {
6096 		error = vec->zvec_legacy_func(zc);
6097 	}
6098 
6099 out:
6100 	nvlist_free(innvl);
6101 	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
6102 	if (error == 0 && rc != 0)
6103 		error = SET_ERROR(EFAULT);
6104 	if (error == 0 && vec->zvec_allow_log) {
6105 		char *s = tsd_get(zfs_allow_log_key);
6106 		if (s != NULL)
6107 			strfree(s);
6108 		(void) tsd_set(zfs_allow_log_key, saved_poolname);
6109 	} else {
6110 		if (saved_poolname != NULL)
6111 			strfree(saved_poolname);
6112 	}
6113 
6114 	kmem_free(zc, sizeof (zfs_cmd_t));
6115 	return (error);
6116 }
6117 
6118 static int
6119 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
6120 {
6121 	if (cmd != DDI_ATTACH)
6122 		return (DDI_FAILURE);
6123 
6124 	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
6125 	    DDI_PSEUDO, 0) == DDI_FAILURE)
6126 		return (DDI_FAILURE);
6127 
6128 	zfs_dip = dip;
6129 
6130 	ddi_report_dev(dip);
6131 
6132 	return (DDI_SUCCESS);
6133 }
6134 
6135 static int
6136 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6137 {
6138 	if (spa_busy() || zfs_busy() || zvol_busy())
6139 		return (DDI_FAILURE);
6140 
6141 	if (cmd != DDI_DETACH)
6142 		return (DDI_FAILURE);
6143 
6144 	zfs_dip = NULL;
6145 
6146 	ddi_prop_remove_all(dip);
6147 	ddi_remove_minor_node(dip, NULL);
6148 
6149 	return (DDI_SUCCESS);
6150 }
6151 
6152 /*ARGSUSED*/
6153 static int
6154 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
6155 {
6156 	switch (infocmd) {
6157 	case DDI_INFO_DEVT2DEVINFO:
6158 		*result = zfs_dip;
6159 		return (DDI_SUCCESS);
6160 
6161 	case DDI_INFO_DEVT2INSTANCE:
6162 		*result = (void *)0;
6163 		return (DDI_SUCCESS);
6164 	}
6165 
6166 	return (DDI_FAILURE);
6167 }
6168 
6169 /*
6170  * OK, so this is a little weird.
6171  *
6172  * /dev/zfs is the control node, i.e. minor 0.
6173  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
6174  *
6175  * /dev/zfs has basically nothing to do except serve up ioctls,
6176  * so most of the standard driver entry points are in zvol.c.
6177  */
6178 static struct cb_ops zfs_cb_ops = {
6179 	zfsdev_open,	/* open */
6180 	zfsdev_close,	/* close */
6181 	zvol_strategy,	/* strategy */
6182 	nodev,		/* print */
6183 	zvol_dump,	/* dump */
6184 	zvol_read,	/* read */
6185 	zvol_write,	/* write */
6186 	zfsdev_ioctl,	/* ioctl */
6187 	nodev,		/* devmap */
6188 	nodev,		/* mmap */
6189 	nodev,		/* segmap */
6190 	nochpoll,	/* poll */
6191 	ddi_prop_op,	/* prop_op */
6192 	NULL,		/* streamtab */
6193 	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
6194 	CB_REV,		/* version */
6195 	nodev,		/* async read */
6196 	nodev,		/* async write */
6197 };
6198 
6199 static struct dev_ops zfs_dev_ops = {
6200 	DEVO_REV,	/* version */
6201 	0,		/* refcnt */
6202 	zfs_info,	/* info */
6203 	nulldev,	/* identify */
6204 	nulldev,	/* probe */
6205 	zfs_attach,	/* attach */
6206 	zfs_detach,	/* detach */
6207 	nodev,		/* reset */
6208 	&zfs_cb_ops,	/* driver operations */
6209 	NULL,		/* no bus operations */
6210 	NULL,		/* power */
6211 	ddi_quiesce_not_needed,	/* quiesce */
6212 };
6213 
6214 static struct modldrv zfs_modldrv = {
6215 	&mod_driverops,
6216 	"ZFS storage pool",
6217 	&zfs_dev_ops
6218 };
6219 
6220 static struct modlinkage modlinkage = {
6221 	MODREV_1,
6222 	(void *)&zfs_modlfs,
6223 	(void *)&zfs_modldrv,
6224 	NULL
6225 };
6226 
6227 static void
6228 zfs_allow_log_destroy(void *arg)
6229 {
6230 	char *poolname = arg;
6231 	strfree(poolname);
6232 }
6233 
6234 int
6235 _init(void)
6236 {
6237 	int error;
6238 
6239 	spa_init(FREAD | FWRITE);
6240 	zfs_init();
6241 	zvol_init();
6242 	zfs_ioctl_init();
6243 
6244 	if ((error = mod_install(&modlinkage)) != 0) {
6245 		zvol_fini();
6246 		zfs_fini();
6247 		spa_fini();
6248 		return (error);
6249 	}
6250 
6251 	tsd_create(&zfs_fsyncer_key, NULL);
6252 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
6253 	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
6254 
6255 	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
6256 	ASSERT(error == 0);
6257 	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
6258 
6259 	return (0);
6260 }
6261 
6262 int
6263 _fini(void)
6264 {
6265 	int error;
6266 
6267 	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
6268 		return (SET_ERROR(EBUSY));
6269 
6270 	if ((error = mod_remove(&modlinkage)) != 0)
6271 		return (error);
6272 
6273 	zvol_fini();
6274 	zfs_fini();
6275 	spa_fini();
6276 	if (zfs_nfsshare_inited)
6277 		(void) ddi_modclose(nfs_mod);
6278 	if (zfs_smbshare_inited)
6279 		(void) ddi_modclose(smbsrv_mod);
6280 	if (zfs_nfsshare_inited || zfs_smbshare_inited)
6281 		(void) ddi_modclose(sharefs_mod);
6282 
6283 	tsd_destroy(&zfs_fsyncer_key);
6284 	ldi_ident_release(zfs_li);
6285 	zfs_li = NULL;
6286 	mutex_destroy(&zfs_share_lock);
6287 
6288 	return (error);
6289 }
6290 
6291 int
6292 _info(struct modinfo *modinfop)
6293 {
6294 	return (mod_info(&modlinkage, modinfop));
6295 }
6296