xref: /titanic_44/usr/src/uts/common/fs/zfs/zfs_ioctl.c (revision cb04b8739c50e3e6d12e89b790fa7b8d0d899865)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/types.h>
26 #include <sys/param.h>
27 #include <sys/errno.h>
28 #include <sys/uio.h>
29 #include <sys/buf.h>
30 #include <sys/modctl.h>
31 #include <sys/open.h>
32 #include <sys/file.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/cmn_err.h>
36 #include <sys/stat.h>
37 #include <sys/zfs_ioctl.h>
38 #include <sys/zfs_vfsops.h>
39 #include <sys/zfs_znode.h>
40 #include <sys/zap.h>
41 #include <sys/spa.h>
42 #include <sys/spa_impl.h>
43 #include <sys/vdev.h>
44 #include <sys/priv_impl.h>
45 #include <sys/dmu.h>
46 #include <sys/dsl_dir.h>
47 #include <sys/dsl_dataset.h>
48 #include <sys/dsl_prop.h>
49 #include <sys/dsl_deleg.h>
50 #include <sys/dmu_objset.h>
51 #include <sys/ddi.h>
52 #include <sys/sunddi.h>
53 #include <sys/sunldi.h>
54 #include <sys/policy.h>
55 #include <sys/zone.h>
56 #include <sys/nvpair.h>
57 #include <sys/pathname.h>
58 #include <sys/mount.h>
59 #include <sys/sdt.h>
60 #include <sys/fs/zfs.h>
61 #include <sys/zfs_ctldir.h>
62 #include <sys/zfs_dir.h>
63 #include <sys/zfs_onexit.h>
64 #include <sys/zvol.h>
65 #include <sys/dsl_scan.h>
66 #include <sharefs/share.h>
67 #include <sys/dmu_objset.h>
68 
69 #include "zfs_namecheck.h"
70 #include "zfs_prop.h"
71 #include "zfs_deleg.h"
72 #include "zfs_comutil.h"
73 
74 extern struct modlfs zfs_modlfs;
75 
76 extern void zfs_init(void);
77 extern void zfs_fini(void);
78 
79 ldi_ident_t zfs_li = NULL;
80 dev_info_t *zfs_dip;
81 
82 typedef int zfs_ioc_func_t(zfs_cmd_t *);
83 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
84 
85 typedef enum {
86 	NO_NAME,
87 	POOL_NAME,
88 	DATASET_NAME
89 } zfs_ioc_namecheck_t;
90 
91 typedef struct zfs_ioc_vec {
92 	zfs_ioc_func_t		*zvec_func;
93 	zfs_secpolicy_func_t	*zvec_secpolicy;
94 	zfs_ioc_namecheck_t	zvec_namecheck;
95 	boolean_t		zvec_his_log;
96 	boolean_t		zvec_pool_check;
97 } zfs_ioc_vec_t;
98 
99 /* This array is indexed by zfs_userquota_prop_t */
100 static const char *userquota_perms[] = {
101 	ZFS_DELEG_PERM_USERUSED,
102 	ZFS_DELEG_PERM_USERQUOTA,
103 	ZFS_DELEG_PERM_GROUPUSED,
104 	ZFS_DELEG_PERM_GROUPQUOTA,
105 };
106 
107 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
108 static int zfs_check_settable(const char *name, nvpair_t *property,
109     cred_t *cr);
110 static int zfs_check_clearable(char *dataset, nvlist_t *props,
111     nvlist_t **errors);
112 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
113     boolean_t *);
114 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **);
115 
116 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
117 void
118 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
119 {
120 	const char *newfile;
121 	char buf[512];
122 	va_list adx;
123 
124 	/*
125 	 * Get rid of annoying "../common/" prefix to filename.
126 	 */
127 	newfile = strrchr(file, '/');
128 	if (newfile != NULL) {
129 		newfile = newfile + 1; /* Get rid of leading / */
130 	} else {
131 		newfile = file;
132 	}
133 
134 	va_start(adx, fmt);
135 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
136 	va_end(adx);
137 
138 	/*
139 	 * To get this data, use the zfs-dprintf probe as so:
140 	 * dtrace -q -n 'zfs-dprintf \
141 	 *	/stringof(arg0) == "dbuf.c"/ \
142 	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
143 	 * arg0 = file name
144 	 * arg1 = function name
145 	 * arg2 = line number
146 	 * arg3 = message
147 	 */
148 	DTRACE_PROBE4(zfs__dprintf,
149 	    char *, newfile, char *, func, int, line, char *, buf);
150 }
151 
152 static void
153 history_str_free(char *buf)
154 {
155 	kmem_free(buf, HIS_MAX_RECORD_LEN);
156 }
157 
158 static char *
159 history_str_get(zfs_cmd_t *zc)
160 {
161 	char *buf;
162 
163 	if (zc->zc_history == NULL)
164 		return (NULL);
165 
166 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
167 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
168 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
169 		history_str_free(buf);
170 		return (NULL);
171 	}
172 
173 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
174 
175 	return (buf);
176 }
177 
178 /*
179  * Check to see if the named dataset is currently defined as bootable
180  */
181 static boolean_t
182 zfs_is_bootfs(const char *name)
183 {
184 	objset_t *os;
185 
186 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
187 		boolean_t ret;
188 		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
189 		dmu_objset_rele(os, FTAG);
190 		return (ret);
191 	}
192 	return (B_FALSE);
193 }
194 
195 /*
196  * zfs_earlier_version
197  *
198  *	Return non-zero if the spa version is less than requested version.
199  */
200 static int
201 zfs_earlier_version(const char *name, int version)
202 {
203 	spa_t *spa;
204 
205 	if (spa_open(name, &spa, FTAG) == 0) {
206 		if (spa_version(spa) < version) {
207 			spa_close(spa, FTAG);
208 			return (1);
209 		}
210 		spa_close(spa, FTAG);
211 	}
212 	return (0);
213 }
214 
215 /*
216  * zpl_earlier_version
217  *
218  * Return TRUE if the ZPL version is less than requested version.
219  */
220 static boolean_t
221 zpl_earlier_version(const char *name, int version)
222 {
223 	objset_t *os;
224 	boolean_t rc = B_TRUE;
225 
226 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
227 		uint64_t zplversion;
228 
229 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
230 			dmu_objset_rele(os, FTAG);
231 			return (B_TRUE);
232 		}
233 		/* XXX reading from non-owned objset */
234 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
235 			rc = zplversion < version;
236 		dmu_objset_rele(os, FTAG);
237 	}
238 	return (rc);
239 }
240 
241 static void
242 zfs_log_history(zfs_cmd_t *zc)
243 {
244 	spa_t *spa;
245 	char *buf;
246 
247 	if ((buf = history_str_get(zc)) == NULL)
248 		return;
249 
250 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
251 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
252 			(void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
253 		spa_close(spa, FTAG);
254 	}
255 	history_str_free(buf);
256 }
257 
258 /*
259  * Policy for top-level read operations (list pools).  Requires no privileges,
260  * and can be used in the local zone, as there is no associated dataset.
261  */
262 /* ARGSUSED */
263 static int
264 zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
265 {
266 	return (0);
267 }
268 
269 /*
270  * Policy for dataset read operations (list children, get statistics).  Requires
271  * no privileges, but must be visible in the local zone.
272  */
273 /* ARGSUSED */
274 static int
275 zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
276 {
277 	if (INGLOBALZONE(curproc) ||
278 	    zone_dataset_visible(zc->zc_name, NULL))
279 		return (0);
280 
281 	return (ENOENT);
282 }
283 
284 static int
285 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
286 {
287 	int writable = 1;
288 
289 	/*
290 	 * The dataset must be visible by this zone -- check this first
291 	 * so they don't see EPERM on something they shouldn't know about.
292 	 */
293 	if (!INGLOBALZONE(curproc) &&
294 	    !zone_dataset_visible(dataset, &writable))
295 		return (ENOENT);
296 
297 	if (INGLOBALZONE(curproc)) {
298 		/*
299 		 * If the fs is zoned, only root can access it from the
300 		 * global zone.
301 		 */
302 		if (secpolicy_zfs(cr) && zoned)
303 			return (EPERM);
304 	} else {
305 		/*
306 		 * If we are in a local zone, the 'zoned' property must be set.
307 		 */
308 		if (!zoned)
309 			return (EPERM);
310 
311 		/* must be writable by this zone */
312 		if (!writable)
313 			return (EPERM);
314 	}
315 	return (0);
316 }
317 
318 static int
319 zfs_dozonecheck(const char *dataset, cred_t *cr)
320 {
321 	uint64_t zoned;
322 
323 	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
324 		return (ENOENT);
325 
326 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
327 }
328 
329 static int
330 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
331 {
332 	uint64_t zoned;
333 
334 	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
335 	if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL)) {
336 		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
337 		return (ENOENT);
338 	}
339 	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
340 
341 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
342 }
343 
344 int
345 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
346 {
347 	int error;
348 
349 	error = zfs_dozonecheck(name, cr);
350 	if (error == 0) {
351 		error = secpolicy_zfs(cr);
352 		if (error)
353 			error = dsl_deleg_access(name, perm, cr);
354 	}
355 	return (error);
356 }
357 
358 int
359 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
360     const char *perm, cred_t *cr)
361 {
362 	int error;
363 
364 	error = zfs_dozonecheck_ds(name, ds, cr);
365 	if (error == 0) {
366 		error = secpolicy_zfs(cr);
367 		if (error)
368 			error = dsl_deleg_access_impl(ds, perm, cr);
369 	}
370 	return (error);
371 }
372 
373 /*
374  * Policy for setting the security label property.
375  *
376  * Returns 0 for success, non-zero for access and other errors.
377  */
378 static int
379 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
380 {
381 	char		ds_hexsl[MAXNAMELEN];
382 	bslabel_t	ds_sl, new_sl;
383 	boolean_t	new_default = FALSE;
384 	uint64_t	zoned;
385 	int		needed_priv = -1;
386 	int		error;
387 
388 	/* First get the existing dataset label. */
389 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
390 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
391 	if (error)
392 		return (EPERM);
393 
394 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
395 		new_default = TRUE;
396 
397 	/* The label must be translatable */
398 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
399 		return (EINVAL);
400 
401 	/*
402 	 * In a non-global zone, disallow attempts to set a label that
403 	 * doesn't match that of the zone; otherwise no other checks
404 	 * are needed.
405 	 */
406 	if (!INGLOBALZONE(curproc)) {
407 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
408 			return (EPERM);
409 		return (0);
410 	}
411 
412 	/*
413 	 * For global-zone datasets (i.e., those whose zoned property is
414 	 * "off", verify that the specified new label is valid for the
415 	 * global zone.
416 	 */
417 	if (dsl_prop_get_integer(name,
418 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
419 		return (EPERM);
420 	if (!zoned) {
421 		if (zfs_check_global_label(name, strval) != 0)
422 			return (EPERM);
423 	}
424 
425 	/*
426 	 * If the existing dataset label is nondefault, check if the
427 	 * dataset is mounted (label cannot be changed while mounted).
428 	 * Get the zfsvfs; if there isn't one, then the dataset isn't
429 	 * mounted (or isn't a dataset, doesn't exist, ...).
430 	 */
431 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
432 		objset_t *os;
433 		static char *setsl_tag = "setsl_tag";
434 
435 		/*
436 		 * Try to own the dataset; abort if there is any error,
437 		 * (e.g., already mounted, in use, or other error).
438 		 */
439 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
440 		    setsl_tag, &os);
441 		if (error)
442 			return (EPERM);
443 
444 		dmu_objset_disown(os, setsl_tag);
445 
446 		if (new_default) {
447 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
448 			goto out_check;
449 		}
450 
451 		if (hexstr_to_label(strval, &new_sl) != 0)
452 			return (EPERM);
453 
454 		if (blstrictdom(&ds_sl, &new_sl))
455 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
456 		else if (blstrictdom(&new_sl, &ds_sl))
457 			needed_priv = PRIV_FILE_UPGRADE_SL;
458 	} else {
459 		/* dataset currently has a default label */
460 		if (!new_default)
461 			needed_priv = PRIV_FILE_UPGRADE_SL;
462 	}
463 
464 out_check:
465 	if (needed_priv != -1)
466 		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
467 	return (0);
468 }
469 
470 static int
471 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
472     cred_t *cr)
473 {
474 	char *strval;
475 
476 	/*
477 	 * Check permissions for special properties.
478 	 */
479 	switch (prop) {
480 	case ZFS_PROP_ZONED:
481 		/*
482 		 * Disallow setting of 'zoned' from within a local zone.
483 		 */
484 		if (!INGLOBALZONE(curproc))
485 			return (EPERM);
486 		break;
487 
488 	case ZFS_PROP_QUOTA:
489 		if (!INGLOBALZONE(curproc)) {
490 			uint64_t zoned;
491 			char setpoint[MAXNAMELEN];
492 			/*
493 			 * Unprivileged users are allowed to modify the
494 			 * quota on things *under* (ie. contained by)
495 			 * the thing they own.
496 			 */
497 			if (dsl_prop_get_integer(dsname, "zoned", &zoned,
498 			    setpoint))
499 				return (EPERM);
500 			if (!zoned || strlen(dsname) <= strlen(setpoint))
501 				return (EPERM);
502 		}
503 		break;
504 
505 	case ZFS_PROP_MLSLABEL:
506 		if (!is_system_labeled())
507 			return (EPERM);
508 
509 		if (nvpair_value_string(propval, &strval) == 0) {
510 			int err;
511 
512 			err = zfs_set_slabel_policy(dsname, strval, CRED());
513 			if (err != 0)
514 				return (err);
515 		}
516 		break;
517 	}
518 
519 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
520 }
521 
522 int
523 zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
524 {
525 	int error;
526 
527 	error = zfs_dozonecheck(zc->zc_name, cr);
528 	if (error)
529 		return (error);
530 
531 	/*
532 	 * permission to set permissions will be evaluated later in
533 	 * dsl_deleg_can_allow()
534 	 */
535 	return (0);
536 }
537 
538 int
539 zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
540 {
541 	return (zfs_secpolicy_write_perms(zc->zc_name,
542 	    ZFS_DELEG_PERM_ROLLBACK, cr));
543 }
544 
545 int
546 zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
547 {
548 	spa_t *spa;
549 	dsl_pool_t *dp;
550 	dsl_dataset_t *ds;
551 	char *cp;
552 	int error;
553 
554 	/*
555 	 * Generate the current snapshot name from the given objsetid, then
556 	 * use that name for the secpolicy/zone checks.
557 	 */
558 	cp = strchr(zc->zc_name, '@');
559 	if (cp == NULL)
560 		return (EINVAL);
561 	error = spa_open(zc->zc_name, &spa, FTAG);
562 	if (error)
563 		return (error);
564 
565 	dp = spa_get_dsl(spa);
566 	rw_enter(&dp->dp_config_rwlock, RW_READER);
567 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
568 	rw_exit(&dp->dp_config_rwlock);
569 	spa_close(spa, FTAG);
570 	if (error)
571 		return (error);
572 
573 	dsl_dataset_name(ds, zc->zc_name);
574 
575 	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
576 	    ZFS_DELEG_PERM_SEND, cr);
577 	dsl_dataset_rele(ds, FTAG);
578 
579 	return (error);
580 }
581 
582 static int
583 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
584 {
585 	vnode_t *vp;
586 	int error;
587 
588 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
589 	    NO_FOLLOW, NULL, &vp)) != 0)
590 		return (error);
591 
592 	/* Now make sure mntpnt and dataset are ZFS */
593 
594 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
595 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
596 	    zc->zc_name) != 0)) {
597 		VN_RELE(vp);
598 		return (EPERM);
599 	}
600 
601 	VN_RELE(vp);
602 	return (dsl_deleg_access(zc->zc_name,
603 	    ZFS_DELEG_PERM_SHARE, cr));
604 }
605 
606 int
607 zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
608 {
609 	if (!INGLOBALZONE(curproc))
610 		return (EPERM);
611 
612 	if (secpolicy_nfs(cr) == 0) {
613 		return (0);
614 	} else {
615 		return (zfs_secpolicy_deleg_share(zc, cr));
616 	}
617 }
618 
619 int
620 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
621 {
622 	if (!INGLOBALZONE(curproc))
623 		return (EPERM);
624 
625 	if (secpolicy_smb(cr) == 0) {
626 		return (0);
627 	} else {
628 		return (zfs_secpolicy_deleg_share(zc, cr));
629 	}
630 }
631 
632 static int
633 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
634 {
635 	char *cp;
636 
637 	/*
638 	 * Remove the @bla or /bla from the end of the name to get the parent.
639 	 */
640 	(void) strncpy(parent, datasetname, parentsize);
641 	cp = strrchr(parent, '@');
642 	if (cp != NULL) {
643 		cp[0] = '\0';
644 	} else {
645 		cp = strrchr(parent, '/');
646 		if (cp == NULL)
647 			return (ENOENT);
648 		cp[0] = '\0';
649 	}
650 
651 	return (0);
652 }
653 
654 int
655 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
656 {
657 	int error;
658 
659 	if ((error = zfs_secpolicy_write_perms(name,
660 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
661 		return (error);
662 
663 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
664 }
665 
666 static int
667 zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
668 {
669 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
670 }
671 
672 /*
673  * Destroying snapshots with delegated permissions requires
674  * descendent mount and destroy permissions.
675  * Reassemble the full filesystem@snap name so dsl_deleg_access()
676  * can do the correct permission check.
677  *
678  * Since this routine is used when doing a recursive destroy of snapshots
679  * and destroying snapshots requires descendent permissions, a successfull
680  * check of the top level snapshot applies to snapshots of all descendent
681  * datasets as well.
682  */
683 static int
684 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr)
685 {
686 	int error;
687 	char *dsname;
688 
689 	dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value);
690 
691 	error = zfs_secpolicy_destroy_perms(dsname, cr);
692 
693 	strfree(dsname);
694 	return (error);
695 }
696 
697 int
698 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
699 {
700 	char	parentname[MAXNAMELEN];
701 	int	error;
702 
703 	if ((error = zfs_secpolicy_write_perms(from,
704 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
705 		return (error);
706 
707 	if ((error = zfs_secpolicy_write_perms(from,
708 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
709 		return (error);
710 
711 	if ((error = zfs_get_parent(to, parentname,
712 	    sizeof (parentname))) != 0)
713 		return (error);
714 
715 	if ((error = zfs_secpolicy_write_perms(parentname,
716 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
717 		return (error);
718 
719 	if ((error = zfs_secpolicy_write_perms(parentname,
720 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
721 		return (error);
722 
723 	return (error);
724 }
725 
726 static int
727 zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
728 {
729 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
730 }
731 
732 static int
733 zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
734 {
735 	char	parentname[MAXNAMELEN];
736 	objset_t *clone;
737 	int error;
738 
739 	error = zfs_secpolicy_write_perms(zc->zc_name,
740 	    ZFS_DELEG_PERM_PROMOTE, cr);
741 	if (error)
742 		return (error);
743 
744 	error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
745 
746 	if (error == 0) {
747 		dsl_dataset_t *pclone = NULL;
748 		dsl_dir_t *dd;
749 		dd = clone->os_dsl_dataset->ds_dir;
750 
751 		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
752 		error = dsl_dataset_hold_obj(dd->dd_pool,
753 		    dd->dd_phys->dd_origin_obj, FTAG, &pclone);
754 		rw_exit(&dd->dd_pool->dp_config_rwlock);
755 		if (error) {
756 			dmu_objset_rele(clone, FTAG);
757 			return (error);
758 		}
759 
760 		error = zfs_secpolicy_write_perms(zc->zc_name,
761 		    ZFS_DELEG_PERM_MOUNT, cr);
762 
763 		dsl_dataset_name(pclone, parentname);
764 		dmu_objset_rele(clone, FTAG);
765 		dsl_dataset_rele(pclone, FTAG);
766 		if (error == 0)
767 			error = zfs_secpolicy_write_perms(parentname,
768 			    ZFS_DELEG_PERM_PROMOTE, cr);
769 	}
770 	return (error);
771 }
772 
773 static int
774 zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
775 {
776 	int error;
777 
778 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
779 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
780 		return (error);
781 
782 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
783 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
784 		return (error);
785 
786 	return (zfs_secpolicy_write_perms(zc->zc_name,
787 	    ZFS_DELEG_PERM_CREATE, cr));
788 }
789 
790 int
791 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
792 {
793 	return (zfs_secpolicy_write_perms(name,
794 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
795 }
796 
797 static int
798 zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
799 {
800 
801 	return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
802 }
803 
804 static int
805 zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
806 {
807 	char	parentname[MAXNAMELEN];
808 	int	error;
809 
810 	if ((error = zfs_get_parent(zc->zc_name, parentname,
811 	    sizeof (parentname))) != 0)
812 		return (error);
813 
814 	if (zc->zc_value[0] != '\0') {
815 		if ((error = zfs_secpolicy_write_perms(zc->zc_value,
816 		    ZFS_DELEG_PERM_CLONE, cr)) != 0)
817 			return (error);
818 	}
819 
820 	if ((error = zfs_secpolicy_write_perms(parentname,
821 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
822 		return (error);
823 
824 	error = zfs_secpolicy_write_perms(parentname,
825 	    ZFS_DELEG_PERM_MOUNT, cr);
826 
827 	return (error);
828 }
829 
830 static int
831 zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
832 {
833 	int error;
834 
835 	error = secpolicy_fs_unmount(cr, NULL);
836 	if (error) {
837 		error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
838 	}
839 	return (error);
840 }
841 
842 /*
843  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
844  * SYS_CONFIG privilege, which is not available in a local zone.
845  */
846 /* ARGSUSED */
847 static int
848 zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
849 {
850 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
851 		return (EPERM);
852 
853 	return (0);
854 }
855 
856 /*
857  * Policy for fault injection.  Requires all privileges.
858  */
859 /* ARGSUSED */
860 static int
861 zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
862 {
863 	return (secpolicy_zinject(cr));
864 }
865 
866 static int
867 zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
868 {
869 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
870 
871 	if (prop == ZPROP_INVAL) {
872 		if (!zfs_prop_user(zc->zc_value))
873 			return (EINVAL);
874 		return (zfs_secpolicy_write_perms(zc->zc_name,
875 		    ZFS_DELEG_PERM_USERPROP, cr));
876 	} else {
877 		return (zfs_secpolicy_setprop(zc->zc_name, prop,
878 		    NULL, cr));
879 	}
880 }
881 
882 static int
883 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
884 {
885 	int err = zfs_secpolicy_read(zc, cr);
886 	if (err)
887 		return (err);
888 
889 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
890 		return (EINVAL);
891 
892 	if (zc->zc_value[0] == 0) {
893 		/*
894 		 * They are asking about a posix uid/gid.  If it's
895 		 * themself, allow it.
896 		 */
897 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
898 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
899 			if (zc->zc_guid == crgetuid(cr))
900 				return (0);
901 		} else {
902 			if (groupmember(zc->zc_guid, cr))
903 				return (0);
904 		}
905 	}
906 
907 	return (zfs_secpolicy_write_perms(zc->zc_name,
908 	    userquota_perms[zc->zc_objset_type], cr));
909 }
910 
911 static int
912 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
913 {
914 	int err = zfs_secpolicy_read(zc, cr);
915 	if (err)
916 		return (err);
917 
918 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
919 		return (EINVAL);
920 
921 	return (zfs_secpolicy_write_perms(zc->zc_name,
922 	    userquota_perms[zc->zc_objset_type], cr));
923 }
924 
925 static int
926 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
927 {
928 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
929 	    NULL, cr));
930 }
931 
932 static int
933 zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
934 {
935 	return (zfs_secpolicy_write_perms(zc->zc_name,
936 	    ZFS_DELEG_PERM_HOLD, cr));
937 }
938 
939 static int
940 zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
941 {
942 	return (zfs_secpolicy_write_perms(zc->zc_name,
943 	    ZFS_DELEG_PERM_RELEASE, cr));
944 }
945 
946 /*
947  * Returns the nvlist as specified by the user in the zfs_cmd_t.
948  */
949 static int
950 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
951 {
952 	char *packed;
953 	int error;
954 	nvlist_t *list = NULL;
955 
956 	/*
957 	 * Read in and unpack the user-supplied nvlist.
958 	 */
959 	if (size == 0)
960 		return (EINVAL);
961 
962 	packed = kmem_alloc(size, KM_SLEEP);
963 
964 	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
965 	    iflag)) != 0) {
966 		kmem_free(packed, size);
967 		return (error);
968 	}
969 
970 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
971 		kmem_free(packed, size);
972 		return (error);
973 	}
974 
975 	kmem_free(packed, size);
976 
977 	*nvp = list;
978 	return (0);
979 }
980 
981 static int
982 fit_error_list(zfs_cmd_t *zc, nvlist_t **errors)
983 {
984 	size_t size;
985 
986 	VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
987 
988 	if (size > zc->zc_nvlist_dst_size) {
989 		nvpair_t *more_errors;
990 		int n = 0;
991 
992 		if (zc->zc_nvlist_dst_size < 1024)
993 			return (ENOMEM);
994 
995 		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0);
996 		more_errors = nvlist_prev_nvpair(*errors, NULL);
997 
998 		do {
999 			nvpair_t *pair = nvlist_prev_nvpair(*errors,
1000 			    more_errors);
1001 			VERIFY(nvlist_remove_nvpair(*errors, pair) == 0);
1002 			n++;
1003 			VERIFY(nvlist_size(*errors, &size,
1004 			    NV_ENCODE_NATIVE) == 0);
1005 		} while (size > zc->zc_nvlist_dst_size);
1006 
1007 		VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0);
1008 		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0);
1009 		ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
1010 		ASSERT(size <= zc->zc_nvlist_dst_size);
1011 	}
1012 
1013 	return (0);
1014 }
1015 
1016 static int
1017 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1018 {
1019 	char *packed = NULL;
1020 	int error = 0;
1021 	size_t size;
1022 
1023 	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
1024 
1025 	if (size > zc->zc_nvlist_dst_size) {
1026 		error = ENOMEM;
1027 	} else {
1028 		packed = kmem_alloc(size, KM_SLEEP);
1029 		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
1030 		    KM_SLEEP) == 0);
1031 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1032 		    size, zc->zc_iflags) != 0)
1033 			error = EFAULT;
1034 		kmem_free(packed, size);
1035 	}
1036 
1037 	zc->zc_nvlist_dst_size = size;
1038 	return (error);
1039 }
1040 
1041 static int
1042 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1043 {
1044 	objset_t *os;
1045 	int error;
1046 
1047 	error = dmu_objset_hold(dsname, FTAG, &os);
1048 	if (error)
1049 		return (error);
1050 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1051 		dmu_objset_rele(os, FTAG);
1052 		return (EINVAL);
1053 	}
1054 
1055 	mutex_enter(&os->os_user_ptr_lock);
1056 	*zfvp = dmu_objset_get_user(os);
1057 	if (*zfvp) {
1058 		VFS_HOLD((*zfvp)->z_vfs);
1059 	} else {
1060 		error = ESRCH;
1061 	}
1062 	mutex_exit(&os->os_user_ptr_lock);
1063 	dmu_objset_rele(os, FTAG);
1064 	return (error);
1065 }
1066 
1067 /*
1068  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1069  * case its z_vfs will be NULL, and it will be opened as the owner.
1070  */
1071 static int
1072 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1073 {
1074 	int error = 0;
1075 
1076 	if (getzfsvfs(name, zfvp) != 0)
1077 		error = zfsvfs_create(name, zfvp);
1078 	if (error == 0) {
1079 		rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1080 		    RW_READER, tag);
1081 		if ((*zfvp)->z_unmounted) {
1082 			/*
1083 			 * XXX we could probably try again, since the unmounting
1084 			 * thread should be just about to disassociate the
1085 			 * objset from the zfsvfs.
1086 			 */
1087 			rrw_exit(&(*zfvp)->z_teardown_lock, tag);
1088 			return (EBUSY);
1089 		}
1090 	}
1091 	return (error);
1092 }
1093 
1094 static void
1095 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1096 {
1097 	rrw_exit(&zfsvfs->z_teardown_lock, tag);
1098 
1099 	if (zfsvfs->z_vfs) {
1100 		VFS_RELE(zfsvfs->z_vfs);
1101 	} else {
1102 		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1103 		zfsvfs_free(zfsvfs);
1104 	}
1105 }
1106 
1107 static int
1108 zfs_ioc_pool_create(zfs_cmd_t *zc)
1109 {
1110 	int error;
1111 	nvlist_t *config, *props = NULL;
1112 	nvlist_t *rootprops = NULL;
1113 	nvlist_t *zplprops = NULL;
1114 	char *buf;
1115 
1116 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1117 	    zc->zc_iflags, &config))
1118 		return (error);
1119 
1120 	if (zc->zc_nvlist_src_size != 0 && (error =
1121 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1122 	    zc->zc_iflags, &props))) {
1123 		nvlist_free(config);
1124 		return (error);
1125 	}
1126 
1127 	if (props) {
1128 		nvlist_t *nvl = NULL;
1129 		uint64_t version = SPA_VERSION;
1130 
1131 		(void) nvlist_lookup_uint64(props,
1132 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1133 		if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) {
1134 			error = EINVAL;
1135 			goto pool_props_bad;
1136 		}
1137 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1138 		if (nvl) {
1139 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1140 			if (error != 0) {
1141 				nvlist_free(config);
1142 				nvlist_free(props);
1143 				return (error);
1144 			}
1145 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1146 		}
1147 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1148 		error = zfs_fill_zplprops_root(version, rootprops,
1149 		    zplprops, NULL);
1150 		if (error)
1151 			goto pool_props_bad;
1152 	}
1153 
1154 	buf = history_str_get(zc);
1155 
1156 	error = spa_create(zc->zc_name, config, props, buf, zplprops);
1157 
1158 	/*
1159 	 * Set the remaining root properties
1160 	 */
1161 	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1162 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1163 		(void) spa_destroy(zc->zc_name);
1164 
1165 	if (buf != NULL)
1166 		history_str_free(buf);
1167 
1168 pool_props_bad:
1169 	nvlist_free(rootprops);
1170 	nvlist_free(zplprops);
1171 	nvlist_free(config);
1172 	nvlist_free(props);
1173 
1174 	return (error);
1175 }
1176 
1177 static int
1178 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1179 {
1180 	int error;
1181 	zfs_log_history(zc);
1182 	error = spa_destroy(zc->zc_name);
1183 	if (error == 0)
1184 		zvol_remove_minors(zc->zc_name);
1185 	return (error);
1186 }
1187 
1188 static int
1189 zfs_ioc_pool_import(zfs_cmd_t *zc)
1190 {
1191 	nvlist_t *config, *props = NULL;
1192 	uint64_t guid;
1193 	int error;
1194 
1195 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1196 	    zc->zc_iflags, &config)) != 0)
1197 		return (error);
1198 
1199 	if (zc->zc_nvlist_src_size != 0 && (error =
1200 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1201 	    zc->zc_iflags, &props))) {
1202 		nvlist_free(config);
1203 		return (error);
1204 	}
1205 
1206 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1207 	    guid != zc->zc_guid)
1208 		error = EINVAL;
1209 	else
1210 		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1211 
1212 	if (zc->zc_nvlist_dst != 0) {
1213 		int err;
1214 
1215 		if ((err = put_nvlist(zc, config)) != 0)
1216 			error = err;
1217 	}
1218 
1219 	nvlist_free(config);
1220 
1221 	if (props)
1222 		nvlist_free(props);
1223 
1224 	return (error);
1225 }
1226 
1227 static int
1228 zfs_ioc_pool_export(zfs_cmd_t *zc)
1229 {
1230 	int error;
1231 	boolean_t force = (boolean_t)zc->zc_cookie;
1232 	boolean_t hardforce = (boolean_t)zc->zc_guid;
1233 
1234 	zfs_log_history(zc);
1235 	error = spa_export(zc->zc_name, NULL, force, hardforce);
1236 	if (error == 0)
1237 		zvol_remove_minors(zc->zc_name);
1238 	return (error);
1239 }
1240 
1241 static int
1242 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1243 {
1244 	nvlist_t *configs;
1245 	int error;
1246 
1247 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1248 		return (EEXIST);
1249 
1250 	error = put_nvlist(zc, configs);
1251 
1252 	nvlist_free(configs);
1253 
1254 	return (error);
1255 }
1256 
1257 static int
1258 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1259 {
1260 	nvlist_t *config;
1261 	int error;
1262 	int ret = 0;
1263 
1264 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1265 	    sizeof (zc->zc_value));
1266 
1267 	if (config != NULL) {
1268 		ret = put_nvlist(zc, config);
1269 		nvlist_free(config);
1270 
1271 		/*
1272 		 * The config may be present even if 'error' is non-zero.
1273 		 * In this case we return success, and preserve the real errno
1274 		 * in 'zc_cookie'.
1275 		 */
1276 		zc->zc_cookie = error;
1277 	} else {
1278 		ret = error;
1279 	}
1280 
1281 	return (ret);
1282 }
1283 
1284 /*
1285  * Try to import the given pool, returning pool stats as appropriate so that
1286  * user land knows which devices are available and overall pool health.
1287  */
1288 static int
1289 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1290 {
1291 	nvlist_t *tryconfig, *config;
1292 	int error;
1293 
1294 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1295 	    zc->zc_iflags, &tryconfig)) != 0)
1296 		return (error);
1297 
1298 	config = spa_tryimport(tryconfig);
1299 
1300 	nvlist_free(tryconfig);
1301 
1302 	if (config == NULL)
1303 		return (EINVAL);
1304 
1305 	error = put_nvlist(zc, config);
1306 	nvlist_free(config);
1307 
1308 	return (error);
1309 }
1310 
1311 /*
1312  * inputs:
1313  * zc_name              name of the pool
1314  * zc_cookie            scan func (pool_scan_func_t)
1315  */
1316 static int
1317 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1318 {
1319 	spa_t *spa;
1320 	int error;
1321 
1322 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1323 		return (error);
1324 
1325 	if (zc->zc_cookie == POOL_SCAN_NONE)
1326 		error = spa_scan_stop(spa);
1327 	else
1328 		error = spa_scan(spa, zc->zc_cookie);
1329 
1330 	spa_close(spa, FTAG);
1331 
1332 	return (error);
1333 }
1334 
1335 static int
1336 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1337 {
1338 	spa_t *spa;
1339 	int error;
1340 
1341 	error = spa_open(zc->zc_name, &spa, FTAG);
1342 	if (error == 0) {
1343 		spa_freeze(spa);
1344 		spa_close(spa, FTAG);
1345 	}
1346 	return (error);
1347 }
1348 
1349 static int
1350 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1351 {
1352 	spa_t *spa;
1353 	int error;
1354 
1355 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1356 		return (error);
1357 
1358 	if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
1359 		spa_close(spa, FTAG);
1360 		return (EINVAL);
1361 	}
1362 
1363 	spa_upgrade(spa, zc->zc_cookie);
1364 	spa_close(spa, FTAG);
1365 
1366 	return (error);
1367 }
1368 
1369 static int
1370 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1371 {
1372 	spa_t *spa;
1373 	char *hist_buf;
1374 	uint64_t size;
1375 	int error;
1376 
1377 	if ((size = zc->zc_history_len) == 0)
1378 		return (EINVAL);
1379 
1380 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1381 		return (error);
1382 
1383 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1384 		spa_close(spa, FTAG);
1385 		return (ENOTSUP);
1386 	}
1387 
1388 	hist_buf = kmem_alloc(size, KM_SLEEP);
1389 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1390 	    &zc->zc_history_len, hist_buf)) == 0) {
1391 		error = ddi_copyout(hist_buf,
1392 		    (void *)(uintptr_t)zc->zc_history,
1393 		    zc->zc_history_len, zc->zc_iflags);
1394 	}
1395 
1396 	spa_close(spa, FTAG);
1397 	kmem_free(hist_buf, size);
1398 	return (error);
1399 }
1400 
1401 static int
1402 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1403 {
1404 	int error;
1405 
1406 	if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
1407 		return (error);
1408 
1409 	return (0);
1410 }
1411 
1412 /*
1413  * inputs:
1414  * zc_name		name of filesystem
1415  * zc_obj		object to find
1416  *
1417  * outputs:
1418  * zc_value		name of object
1419  */
1420 static int
1421 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1422 {
1423 	objset_t *os;
1424 	int error;
1425 
1426 	/* XXX reading from objset not owned */
1427 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1428 		return (error);
1429 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1430 		dmu_objset_rele(os, FTAG);
1431 		return (EINVAL);
1432 	}
1433 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1434 	    sizeof (zc->zc_value));
1435 	dmu_objset_rele(os, FTAG);
1436 
1437 	return (error);
1438 }
1439 
1440 static int
1441 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1442 {
1443 	spa_t *spa;
1444 	int error;
1445 	nvlist_t *config, **l2cache, **spares;
1446 	uint_t nl2cache = 0, nspares = 0;
1447 
1448 	error = spa_open(zc->zc_name, &spa, FTAG);
1449 	if (error != 0)
1450 		return (error);
1451 
1452 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1453 	    zc->zc_iflags, &config);
1454 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1455 	    &l2cache, &nl2cache);
1456 
1457 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1458 	    &spares, &nspares);
1459 
1460 	/*
1461 	 * A root pool with concatenated devices is not supported.
1462 	 * Thus, can not add a device to a root pool.
1463 	 *
1464 	 * Intent log device can not be added to a rootpool because
1465 	 * during mountroot, zil is replayed, a seperated log device
1466 	 * can not be accessed during the mountroot time.
1467 	 *
1468 	 * l2cache and spare devices are ok to be added to a rootpool.
1469 	 */
1470 	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1471 		nvlist_free(config);
1472 		spa_close(spa, FTAG);
1473 		return (EDOM);
1474 	}
1475 
1476 	if (error == 0) {
1477 		error = spa_vdev_add(spa, config);
1478 		nvlist_free(config);
1479 	}
1480 	spa_close(spa, FTAG);
1481 	return (error);
1482 }
1483 
1484 /*
1485  * inputs:
1486  * zc_name		name of the pool
1487  * zc_nvlist_conf	nvlist of devices to remove
1488  * zc_cookie		to stop the remove?
1489  */
1490 static int
1491 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1492 {
1493 	spa_t *spa;
1494 	int error;
1495 
1496 	error = spa_open(zc->zc_name, &spa, FTAG);
1497 	if (error != 0)
1498 		return (error);
1499 	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1500 	spa_close(spa, FTAG);
1501 	return (error);
1502 }
1503 
1504 static int
1505 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1506 {
1507 	spa_t *spa;
1508 	int error;
1509 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1510 
1511 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1512 		return (error);
1513 	switch (zc->zc_cookie) {
1514 	case VDEV_STATE_ONLINE:
1515 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1516 		break;
1517 
1518 	case VDEV_STATE_OFFLINE:
1519 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1520 		break;
1521 
1522 	case VDEV_STATE_FAULTED:
1523 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1524 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1525 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1526 
1527 		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1528 		break;
1529 
1530 	case VDEV_STATE_DEGRADED:
1531 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1532 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1533 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1534 
1535 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1536 		break;
1537 
1538 	default:
1539 		error = EINVAL;
1540 	}
1541 	zc->zc_cookie = newstate;
1542 	spa_close(spa, FTAG);
1543 	return (error);
1544 }
1545 
1546 static int
1547 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1548 {
1549 	spa_t *spa;
1550 	int replacing = zc->zc_cookie;
1551 	nvlist_t *config;
1552 	int error;
1553 
1554 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1555 		return (error);
1556 
1557 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1558 	    zc->zc_iflags, &config)) == 0) {
1559 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1560 		nvlist_free(config);
1561 	}
1562 
1563 	spa_close(spa, FTAG);
1564 	return (error);
1565 }
1566 
1567 static int
1568 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1569 {
1570 	spa_t *spa;
1571 	int error;
1572 
1573 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1574 		return (error);
1575 
1576 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1577 
1578 	spa_close(spa, FTAG);
1579 	return (error);
1580 }
1581 
1582 static int
1583 zfs_ioc_vdev_split(zfs_cmd_t *zc)
1584 {
1585 	spa_t *spa;
1586 	nvlist_t *config, *props = NULL;
1587 	int error;
1588 	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1589 
1590 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1591 		return (error);
1592 
1593 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1594 	    zc->zc_iflags, &config)) {
1595 		spa_close(spa, FTAG);
1596 		return (error);
1597 	}
1598 
1599 	if (zc->zc_nvlist_src_size != 0 && (error =
1600 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1601 	    zc->zc_iflags, &props))) {
1602 		spa_close(spa, FTAG);
1603 		nvlist_free(config);
1604 		return (error);
1605 	}
1606 
1607 	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
1608 
1609 	spa_close(spa, FTAG);
1610 
1611 	nvlist_free(config);
1612 	nvlist_free(props);
1613 
1614 	return (error);
1615 }
1616 
1617 static int
1618 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1619 {
1620 	spa_t *spa;
1621 	char *path = zc->zc_value;
1622 	uint64_t guid = zc->zc_guid;
1623 	int error;
1624 
1625 	error = spa_open(zc->zc_name, &spa, FTAG);
1626 	if (error != 0)
1627 		return (error);
1628 
1629 	error = spa_vdev_setpath(spa, guid, path);
1630 	spa_close(spa, FTAG);
1631 	return (error);
1632 }
1633 
1634 static int
1635 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
1636 {
1637 	spa_t *spa;
1638 	char *fru = zc->zc_value;
1639 	uint64_t guid = zc->zc_guid;
1640 	int error;
1641 
1642 	error = spa_open(zc->zc_name, &spa, FTAG);
1643 	if (error != 0)
1644 		return (error);
1645 
1646 	error = spa_vdev_setfru(spa, guid, fru);
1647 	spa_close(spa, FTAG);
1648 	return (error);
1649 }
1650 
1651 static int
1652 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
1653 {
1654 	int error = 0;
1655 	nvlist_t *nv;
1656 
1657 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1658 
1659 	if (zc->zc_nvlist_dst != 0 &&
1660 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
1661 		dmu_objset_stats(os, nv);
1662 		/*
1663 		 * NB: zvol_get_stats() will read the objset contents,
1664 		 * which we aren't supposed to do with a
1665 		 * DS_MODE_USER hold, because it could be
1666 		 * inconsistent.  So this is a bit of a workaround...
1667 		 * XXX reading with out owning
1668 		 */
1669 		if (!zc->zc_objset_stats.dds_inconsistent) {
1670 			if (dmu_objset_type(os) == DMU_OST_ZVOL)
1671 				VERIFY(zvol_get_stats(os, nv) == 0);
1672 		}
1673 		error = put_nvlist(zc, nv);
1674 		nvlist_free(nv);
1675 	}
1676 
1677 	return (error);
1678 }
1679 
1680 /*
1681  * inputs:
1682  * zc_name		name of filesystem
1683  * zc_nvlist_dst_size	size of buffer for property nvlist
1684  *
1685  * outputs:
1686  * zc_objset_stats	stats
1687  * zc_nvlist_dst	property nvlist
1688  * zc_nvlist_dst_size	size of property nvlist
1689  */
1690 static int
1691 zfs_ioc_objset_stats(zfs_cmd_t *zc)
1692 {
1693 	objset_t *os = NULL;
1694 	int error;
1695 
1696 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1697 		return (error);
1698 
1699 	error = zfs_ioc_objset_stats_impl(zc, os);
1700 
1701 	dmu_objset_rele(os, FTAG);
1702 
1703 	return (error);
1704 }
1705 
1706 /*
1707  * inputs:
1708  * zc_name		name of filesystem
1709  * zc_nvlist_dst_size	size of buffer for property nvlist
1710  *
1711  * outputs:
1712  * zc_nvlist_dst	received property nvlist
1713  * zc_nvlist_dst_size	size of received property nvlist
1714  *
1715  * Gets received properties (distinct from local properties on or after
1716  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
1717  * local property values.
1718  */
1719 static int
1720 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
1721 {
1722 	objset_t *os = NULL;
1723 	int error;
1724 	nvlist_t *nv;
1725 
1726 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1727 		return (error);
1728 
1729 	/*
1730 	 * Without this check, we would return local property values if the
1731 	 * caller has not already received properties on or after
1732 	 * SPA_VERSION_RECVD_PROPS.
1733 	 */
1734 	if (!dsl_prop_get_hasrecvd(os)) {
1735 		dmu_objset_rele(os, FTAG);
1736 		return (ENOTSUP);
1737 	}
1738 
1739 	if (zc->zc_nvlist_dst != 0 &&
1740 	    (error = dsl_prop_get_received(os, &nv)) == 0) {
1741 		error = put_nvlist(zc, nv);
1742 		nvlist_free(nv);
1743 	}
1744 
1745 	dmu_objset_rele(os, FTAG);
1746 	return (error);
1747 }
1748 
1749 static int
1750 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
1751 {
1752 	uint64_t value;
1753 	int error;
1754 
1755 	/*
1756 	 * zfs_get_zplprop() will either find a value or give us
1757 	 * the default value (if there is one).
1758 	 */
1759 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
1760 		return (error);
1761 	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
1762 	return (0);
1763 }
1764 
1765 /*
1766  * inputs:
1767  * zc_name		name of filesystem
1768  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
1769  *
1770  * outputs:
1771  * zc_nvlist_dst	zpl property nvlist
1772  * zc_nvlist_dst_size	size of zpl property nvlist
1773  */
1774 static int
1775 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
1776 {
1777 	objset_t *os;
1778 	int err;
1779 
1780 	/* XXX reading without owning */
1781 	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
1782 		return (err);
1783 
1784 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1785 
1786 	/*
1787 	 * NB: nvl_add_zplprop() will read the objset contents,
1788 	 * which we aren't supposed to do with a DS_MODE_USER
1789 	 * hold, because it could be inconsistent.
1790 	 */
1791 	if (zc->zc_nvlist_dst != NULL &&
1792 	    !zc->zc_objset_stats.dds_inconsistent &&
1793 	    dmu_objset_type(os) == DMU_OST_ZFS) {
1794 		nvlist_t *nv;
1795 
1796 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1797 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
1798 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
1799 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
1800 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
1801 			err = put_nvlist(zc, nv);
1802 		nvlist_free(nv);
1803 	} else {
1804 		err = ENOENT;
1805 	}
1806 	dmu_objset_rele(os, FTAG);
1807 	return (err);
1808 }
1809 
1810 static boolean_t
1811 dataset_name_hidden(const char *name)
1812 {
1813 	/*
1814 	 * Skip over datasets that are not visible in this zone,
1815 	 * internal datasets (which have a $ in their name), and
1816 	 * temporary datasets (which have a % in their name).
1817 	 */
1818 	if (strchr(name, '$') != NULL)
1819 		return (B_TRUE);
1820 	if (strchr(name, '%') != NULL)
1821 		return (B_TRUE);
1822 	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
1823 		return (B_TRUE);
1824 	return (B_FALSE);
1825 }
1826 
1827 /*
1828  * inputs:
1829  * zc_name		name of filesystem
1830  * zc_cookie		zap cursor
1831  * zc_nvlist_dst_size	size of buffer for property nvlist
1832  *
1833  * outputs:
1834  * zc_name		name of next filesystem
1835  * zc_cookie		zap cursor
1836  * zc_objset_stats	stats
1837  * zc_nvlist_dst	property nvlist
1838  * zc_nvlist_dst_size	size of property nvlist
1839  */
1840 static int
1841 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
1842 {
1843 	objset_t *os;
1844 	int error;
1845 	char *p;
1846 	size_t orig_len = strlen(zc->zc_name);
1847 
1848 top:
1849 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
1850 		if (error == ENOENT)
1851 			error = ESRCH;
1852 		return (error);
1853 	}
1854 
1855 	p = strrchr(zc->zc_name, '/');
1856 	if (p == NULL || p[1] != '\0')
1857 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
1858 	p = zc->zc_name + strlen(zc->zc_name);
1859 
1860 	/*
1861 	 * Pre-fetch the datasets.  dmu_objset_prefetch() always returns 0
1862 	 * but is not declared void because its called by dmu_objset_find().
1863 	 */
1864 	if (zc->zc_cookie == 0) {
1865 		uint64_t cookie = 0;
1866 		int len = sizeof (zc->zc_name) - (p - zc->zc_name);
1867 
1868 		while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0)
1869 			(void) dmu_objset_prefetch(p, NULL);
1870 	}
1871 
1872 	do {
1873 		error = dmu_dir_list_next(os,
1874 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
1875 		    NULL, &zc->zc_cookie);
1876 		if (error == ENOENT)
1877 			error = ESRCH;
1878 	} while (error == 0 && dataset_name_hidden(zc->zc_name) &&
1879 	    !(zc->zc_iflags & FKIOCTL));
1880 	dmu_objset_rele(os, FTAG);
1881 
1882 	/*
1883 	 * If it's an internal dataset (ie. with a '$' in its name),
1884 	 * don't try to get stats for it, otherwise we'll return ENOENT.
1885 	 */
1886 	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
1887 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1888 		if (error == ENOENT) {
1889 			/* We lost a race with destroy, get the next one. */
1890 			zc->zc_name[orig_len] = '\0';
1891 			goto top;
1892 		}
1893 	}
1894 	return (error);
1895 }
1896 
1897 /*
1898  * inputs:
1899  * zc_name		name of filesystem
1900  * zc_cookie		zap cursor
1901  * zc_nvlist_dst_size	size of buffer for property nvlist
1902  *
1903  * outputs:
1904  * zc_name		name of next snapshot
1905  * zc_objset_stats	stats
1906  * zc_nvlist_dst	property nvlist
1907  * zc_nvlist_dst_size	size of property nvlist
1908  */
1909 static int
1910 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
1911 {
1912 	objset_t *os;
1913 	int error;
1914 
1915 top:
1916 	if (zc->zc_cookie == 0)
1917 		(void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
1918 		    NULL, DS_FIND_SNAPSHOTS);
1919 
1920 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
1921 	if (error)
1922 		return (error == ENOENT ? ESRCH : error);
1923 
1924 	/*
1925 	 * A dataset name of maximum length cannot have any snapshots,
1926 	 * so exit immediately.
1927 	 */
1928 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
1929 		dmu_objset_rele(os, FTAG);
1930 		return (ESRCH);
1931 	}
1932 
1933 	error = dmu_snapshot_list_next(os,
1934 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
1935 	    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
1936 	    NULL);
1937 
1938 	if (error == 0) {
1939 		dsl_dataset_t *ds;
1940 		dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
1941 
1942 		/*
1943 		 * Since we probably don't have a hold on this snapshot,
1944 		 * it's possible that the objsetid could have been destroyed
1945 		 * and reused for a new objset. It's OK if this happens during
1946 		 * a zfs send operation, since the new createtxg will be
1947 		 * beyond the range we're interested in.
1948 		 */
1949 		rw_enter(&dp->dp_config_rwlock, RW_READER);
1950 		error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
1951 		rw_exit(&dp->dp_config_rwlock);
1952 		if (error) {
1953 			if (error == ENOENT) {
1954 				/* Racing with destroy, get the next one. */
1955 				*strchr(zc->zc_name, '@') = '\0';
1956 				dmu_objset_rele(os, FTAG);
1957 				goto top;
1958 			}
1959 		} else {
1960 			objset_t *ossnap;
1961 
1962 			error = dmu_objset_from_ds(ds, &ossnap);
1963 			if (error == 0)
1964 				error = zfs_ioc_objset_stats_impl(zc, ossnap);
1965 			dsl_dataset_rele(ds, FTAG);
1966 		}
1967 	} else if (error == ENOENT) {
1968 		error = ESRCH;
1969 	}
1970 
1971 	dmu_objset_rele(os, FTAG);
1972 	/* if we failed, undo the @ that we tacked on to zc_name */
1973 	if (error)
1974 		*strchr(zc->zc_name, '@') = '\0';
1975 	return (error);
1976 }
1977 
1978 static int
1979 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
1980 {
1981 	const char *propname = nvpair_name(pair);
1982 	uint64_t *valary;
1983 	unsigned int vallen;
1984 	const char *domain;
1985 	char *dash;
1986 	zfs_userquota_prop_t type;
1987 	uint64_t rid;
1988 	uint64_t quota;
1989 	zfsvfs_t *zfsvfs;
1990 	int err;
1991 
1992 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
1993 		nvlist_t *attrs;
1994 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
1995 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
1996 		    &pair) != 0)
1997 			return (EINVAL);
1998 	}
1999 
2000 	/*
2001 	 * A correctly constructed propname is encoded as
2002 	 * userquota@<rid>-<domain>.
2003 	 */
2004 	if ((dash = strchr(propname, '-')) == NULL ||
2005 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2006 	    vallen != 3)
2007 		return (EINVAL);
2008 
2009 	domain = dash + 1;
2010 	type = valary[0];
2011 	rid = valary[1];
2012 	quota = valary[2];
2013 
2014 	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2015 	if (err == 0) {
2016 		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2017 		zfsvfs_rele(zfsvfs, FTAG);
2018 	}
2019 
2020 	return (err);
2021 }
2022 
2023 /*
2024  * If the named property is one that has a special function to set its value,
2025  * return 0 on success and a positive error code on failure; otherwise if it is
2026  * not one of the special properties handled by this function, return -1.
2027  *
2028  * XXX: It would be better for callers of the property interface if we handled
2029  * these special cases in dsl_prop.c (in the dsl layer).
2030  */
2031 static int
2032 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2033     nvpair_t *pair)
2034 {
2035 	const char *propname = nvpair_name(pair);
2036 	zfs_prop_t prop = zfs_name_to_prop(propname);
2037 	uint64_t intval;
2038 	int err;
2039 
2040 	if (prop == ZPROP_INVAL) {
2041 		if (zfs_prop_userquota(propname))
2042 			return (zfs_prop_set_userquota(dsname, pair));
2043 		return (-1);
2044 	}
2045 
2046 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2047 		nvlist_t *attrs;
2048 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2049 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2050 		    &pair) == 0);
2051 	}
2052 
2053 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2054 		return (-1);
2055 
2056 	VERIFY(0 == nvpair_value_uint64(pair, &intval));
2057 
2058 	switch (prop) {
2059 	case ZFS_PROP_QUOTA:
2060 		err = dsl_dir_set_quota(dsname, source, intval);
2061 		break;
2062 	case ZFS_PROP_REFQUOTA:
2063 		err = dsl_dataset_set_quota(dsname, source, intval);
2064 		break;
2065 	case ZFS_PROP_RESERVATION:
2066 		err = dsl_dir_set_reservation(dsname, source, intval);
2067 		break;
2068 	case ZFS_PROP_REFRESERVATION:
2069 		err = dsl_dataset_set_reservation(dsname, source, intval);
2070 		break;
2071 	case ZFS_PROP_VOLSIZE:
2072 		err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip),
2073 		    intval);
2074 		break;
2075 	case ZFS_PROP_VERSION:
2076 	{
2077 		zfsvfs_t *zfsvfs;
2078 
2079 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2080 			break;
2081 
2082 		err = zfs_set_version(zfsvfs, intval);
2083 		zfsvfs_rele(zfsvfs, FTAG);
2084 
2085 		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2086 			zfs_cmd_t *zc;
2087 
2088 			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2089 			(void) strcpy(zc->zc_name, dsname);
2090 			(void) zfs_ioc_userspace_upgrade(zc);
2091 			kmem_free(zc, sizeof (zfs_cmd_t));
2092 		}
2093 		break;
2094 	}
2095 
2096 	default:
2097 		err = -1;
2098 	}
2099 
2100 	return (err);
2101 }
2102 
2103 /*
2104  * This function is best effort. If it fails to set any of the given properties,
2105  * it continues to set as many as it can and returns the first error
2106  * encountered. If the caller provides a non-NULL errlist, it also gives the
2107  * complete list of names of all the properties it failed to set along with the
2108  * corresponding error numbers. The caller is responsible for freeing the
2109  * returned errlist.
2110  *
2111  * If every property is set successfully, zero is returned and the list pointed
2112  * at by errlist is NULL.
2113  */
2114 int
2115 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2116     nvlist_t **errlist)
2117 {
2118 	nvpair_t *pair;
2119 	nvpair_t *propval;
2120 	int rv = 0;
2121 	uint64_t intval;
2122 	char *strval;
2123 	nvlist_t *genericnvl;
2124 	nvlist_t *errors;
2125 	nvlist_t *retrynvl;
2126 
2127 	VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2128 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2129 	VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2130 
2131 retry:
2132 	pair = NULL;
2133 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2134 		const char *propname = nvpair_name(pair);
2135 		zfs_prop_t prop = zfs_name_to_prop(propname);
2136 		int err = 0;
2137 
2138 		/* decode the property value */
2139 		propval = pair;
2140 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2141 			nvlist_t *attrs;
2142 			VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2143 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2144 			    &propval) != 0)
2145 				err = EINVAL;
2146 		}
2147 
2148 		/* Validate value type */
2149 		if (err == 0 && prop == ZPROP_INVAL) {
2150 			if (zfs_prop_user(propname)) {
2151 				if (nvpair_type(propval) != DATA_TYPE_STRING)
2152 					err = EINVAL;
2153 			} else if (zfs_prop_userquota(propname)) {
2154 				if (nvpair_type(propval) !=
2155 				    DATA_TYPE_UINT64_ARRAY)
2156 					err = EINVAL;
2157 			}
2158 		} else if (err == 0) {
2159 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2160 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2161 					err = EINVAL;
2162 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2163 				const char *unused;
2164 
2165 				VERIFY(nvpair_value_uint64(propval,
2166 				    &intval) == 0);
2167 
2168 				switch (zfs_prop_get_type(prop)) {
2169 				case PROP_TYPE_NUMBER:
2170 					break;
2171 				case PROP_TYPE_STRING:
2172 					err = EINVAL;
2173 					break;
2174 				case PROP_TYPE_INDEX:
2175 					if (zfs_prop_index_to_string(prop,
2176 					    intval, &unused) != 0)
2177 						err = EINVAL;
2178 					break;
2179 				default:
2180 					cmn_err(CE_PANIC,
2181 					    "unknown property type");
2182 				}
2183 			} else {
2184 				err = EINVAL;
2185 			}
2186 		}
2187 
2188 		/* Validate permissions */
2189 		if (err == 0)
2190 			err = zfs_check_settable(dsname, pair, CRED());
2191 
2192 		if (err == 0) {
2193 			err = zfs_prop_set_special(dsname, source, pair);
2194 			if (err == -1) {
2195 				/*
2196 				 * For better performance we build up a list of
2197 				 * properties to set in a single transaction.
2198 				 */
2199 				err = nvlist_add_nvpair(genericnvl, pair);
2200 			} else if (err != 0 && nvl != retrynvl) {
2201 				/*
2202 				 * This may be a spurious error caused by
2203 				 * receiving quota and reservation out of order.
2204 				 * Try again in a second pass.
2205 				 */
2206 				err = nvlist_add_nvpair(retrynvl, pair);
2207 			}
2208 		}
2209 
2210 		if (err != 0)
2211 			VERIFY(nvlist_add_int32(errors, propname, err) == 0);
2212 	}
2213 
2214 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2215 		nvl = retrynvl;
2216 		goto retry;
2217 	}
2218 
2219 	if (!nvlist_empty(genericnvl) &&
2220 	    dsl_props_set(dsname, source, genericnvl) != 0) {
2221 		/*
2222 		 * If this fails, we still want to set as many properties as we
2223 		 * can, so try setting them individually.
2224 		 */
2225 		pair = NULL;
2226 		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2227 			const char *propname = nvpair_name(pair);
2228 			int err = 0;
2229 
2230 			propval = pair;
2231 			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2232 				nvlist_t *attrs;
2233 				VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2234 				VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2235 				    &propval) == 0);
2236 			}
2237 
2238 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2239 				VERIFY(nvpair_value_string(propval,
2240 				    &strval) == 0);
2241 				err = dsl_prop_set(dsname, propname, source, 1,
2242 				    strlen(strval) + 1, strval);
2243 			} else {
2244 				VERIFY(nvpair_value_uint64(propval,
2245 				    &intval) == 0);
2246 				err = dsl_prop_set(dsname, propname, source, 8,
2247 				    1, &intval);
2248 			}
2249 
2250 			if (err != 0) {
2251 				VERIFY(nvlist_add_int32(errors, propname,
2252 				    err) == 0);
2253 			}
2254 		}
2255 	}
2256 	nvlist_free(genericnvl);
2257 	nvlist_free(retrynvl);
2258 
2259 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
2260 		nvlist_free(errors);
2261 		errors = NULL;
2262 	} else {
2263 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
2264 	}
2265 
2266 	if (errlist == NULL)
2267 		nvlist_free(errors);
2268 	else
2269 		*errlist = errors;
2270 
2271 	return (rv);
2272 }
2273 
2274 /*
2275  * Check that all the properties are valid user properties.
2276  */
2277 static int
2278 zfs_check_userprops(char *fsname, nvlist_t *nvl)
2279 {
2280 	nvpair_t *pair = NULL;
2281 	int error = 0;
2282 
2283 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2284 		const char *propname = nvpair_name(pair);
2285 		char *valstr;
2286 
2287 		if (!zfs_prop_user(propname) ||
2288 		    nvpair_type(pair) != DATA_TYPE_STRING)
2289 			return (EINVAL);
2290 
2291 		if (error = zfs_secpolicy_write_perms(fsname,
2292 		    ZFS_DELEG_PERM_USERPROP, CRED()))
2293 			return (error);
2294 
2295 		if (strlen(propname) >= ZAP_MAXNAMELEN)
2296 			return (ENAMETOOLONG);
2297 
2298 		VERIFY(nvpair_value_string(pair, &valstr) == 0);
2299 		if (strlen(valstr) >= ZAP_MAXVALUELEN)
2300 			return (E2BIG);
2301 	}
2302 	return (0);
2303 }
2304 
2305 static void
2306 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2307 {
2308 	nvpair_t *pair;
2309 
2310 	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2311 
2312 	pair = NULL;
2313 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2314 		if (nvlist_exists(skipped, nvpair_name(pair)))
2315 			continue;
2316 
2317 		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2318 	}
2319 }
2320 
2321 static int
2322 clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
2323     nvlist_t *skipped)
2324 {
2325 	int err = 0;
2326 	nvlist_t *cleared_props = NULL;
2327 	props_skip(props, skipped, &cleared_props);
2328 	if (!nvlist_empty(cleared_props)) {
2329 		/*
2330 		 * Acts on local properties until the dataset has received
2331 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2332 		 */
2333 		zprop_source_t flags = (ZPROP_SRC_NONE |
2334 		    (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0));
2335 		err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL);
2336 	}
2337 	nvlist_free(cleared_props);
2338 	return (err);
2339 }
2340 
2341 /*
2342  * inputs:
2343  * zc_name		name of filesystem
2344  * zc_value		name of property to set
2345  * zc_nvlist_src{_size}	nvlist of properties to apply
2346  * zc_cookie		received properties flag
2347  *
2348  * outputs:
2349  * zc_nvlist_dst{_size} error for each unapplied received property
2350  */
2351 static int
2352 zfs_ioc_set_prop(zfs_cmd_t *zc)
2353 {
2354 	nvlist_t *nvl;
2355 	boolean_t received = zc->zc_cookie;
2356 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2357 	    ZPROP_SRC_LOCAL);
2358 	nvlist_t *errors = NULL;
2359 	int error;
2360 
2361 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2362 	    zc->zc_iflags, &nvl)) != 0)
2363 		return (error);
2364 
2365 	if (received) {
2366 		nvlist_t *origprops;
2367 		objset_t *os;
2368 
2369 		if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
2370 			if (dsl_prop_get_received(os, &origprops) == 0) {
2371 				(void) clear_received_props(os,
2372 				    zc->zc_name, origprops, nvl);
2373 				nvlist_free(origprops);
2374 			}
2375 
2376 			dsl_prop_set_hasrecvd(os);
2377 			dmu_objset_rele(os, FTAG);
2378 		}
2379 	}
2380 
2381 	error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors);
2382 
2383 	if (zc->zc_nvlist_dst != NULL && errors != NULL) {
2384 		(void) put_nvlist(zc, errors);
2385 	}
2386 
2387 	nvlist_free(errors);
2388 	nvlist_free(nvl);
2389 	return (error);
2390 }
2391 
2392 /*
2393  * inputs:
2394  * zc_name		name of filesystem
2395  * zc_value		name of property to inherit
2396  * zc_cookie		revert to received value if TRUE
2397  *
2398  * outputs:		none
2399  */
2400 static int
2401 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2402 {
2403 	const char *propname = zc->zc_value;
2404 	zfs_prop_t prop = zfs_name_to_prop(propname);
2405 	boolean_t received = zc->zc_cookie;
2406 	zprop_source_t source = (received
2407 	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2408 	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2409 
2410 	if (received) {
2411 		nvlist_t *dummy;
2412 		nvpair_t *pair;
2413 		zprop_type_t type;
2414 		int err;
2415 
2416 		/*
2417 		 * zfs_prop_set_special() expects properties in the form of an
2418 		 * nvpair with type info.
2419 		 */
2420 		if (prop == ZPROP_INVAL) {
2421 			if (!zfs_prop_user(propname))
2422 				return (EINVAL);
2423 
2424 			type = PROP_TYPE_STRING;
2425 		} else if (prop == ZFS_PROP_VOLSIZE ||
2426 		    prop == ZFS_PROP_VERSION) {
2427 			return (EINVAL);
2428 		} else {
2429 			type = zfs_prop_get_type(prop);
2430 		}
2431 
2432 		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2433 
2434 		switch (type) {
2435 		case PROP_TYPE_STRING:
2436 			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2437 			break;
2438 		case PROP_TYPE_NUMBER:
2439 		case PROP_TYPE_INDEX:
2440 			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2441 			break;
2442 		default:
2443 			nvlist_free(dummy);
2444 			return (EINVAL);
2445 		}
2446 
2447 		pair = nvlist_next_nvpair(dummy, NULL);
2448 		err = zfs_prop_set_special(zc->zc_name, source, pair);
2449 		nvlist_free(dummy);
2450 		if (err != -1)
2451 			return (err); /* special property already handled */
2452 	} else {
2453 		/*
2454 		 * Only check this in the non-received case. We want to allow
2455 		 * 'inherit -S' to revert non-inheritable properties like quota
2456 		 * and reservation to the received or default values even though
2457 		 * they are not considered inheritable.
2458 		 */
2459 		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2460 			return (EINVAL);
2461 	}
2462 
2463 	/* the property name has been validated by zfs_secpolicy_inherit() */
2464 	return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
2465 }
2466 
2467 static int
2468 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2469 {
2470 	nvlist_t *props;
2471 	spa_t *spa;
2472 	int error;
2473 	nvpair_t *pair;
2474 
2475 	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2476 	    zc->zc_iflags, &props))
2477 		return (error);
2478 
2479 	/*
2480 	 * If the only property is the configfile, then just do a spa_lookup()
2481 	 * to handle the faulted case.
2482 	 */
2483 	pair = nvlist_next_nvpair(props, NULL);
2484 	if (pair != NULL && strcmp(nvpair_name(pair),
2485 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2486 	    nvlist_next_nvpair(props, pair) == NULL) {
2487 		mutex_enter(&spa_namespace_lock);
2488 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2489 			spa_configfile_set(spa, props, B_FALSE);
2490 			spa_config_sync(spa, B_FALSE, B_TRUE);
2491 		}
2492 		mutex_exit(&spa_namespace_lock);
2493 		if (spa != NULL) {
2494 			nvlist_free(props);
2495 			return (0);
2496 		}
2497 	}
2498 
2499 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2500 		nvlist_free(props);
2501 		return (error);
2502 	}
2503 
2504 	error = spa_prop_set(spa, props);
2505 
2506 	nvlist_free(props);
2507 	spa_close(spa, FTAG);
2508 
2509 	return (error);
2510 }
2511 
2512 static int
2513 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2514 {
2515 	spa_t *spa;
2516 	int error;
2517 	nvlist_t *nvp = NULL;
2518 
2519 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2520 		/*
2521 		 * If the pool is faulted, there may be properties we can still
2522 		 * get (such as altroot and cachefile), so attempt to get them
2523 		 * anyway.
2524 		 */
2525 		mutex_enter(&spa_namespace_lock);
2526 		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2527 			error = spa_prop_get(spa, &nvp);
2528 		mutex_exit(&spa_namespace_lock);
2529 	} else {
2530 		error = spa_prop_get(spa, &nvp);
2531 		spa_close(spa, FTAG);
2532 	}
2533 
2534 	if (error == 0 && zc->zc_nvlist_dst != NULL)
2535 		error = put_nvlist(zc, nvp);
2536 	else
2537 		error = EFAULT;
2538 
2539 	nvlist_free(nvp);
2540 	return (error);
2541 }
2542 
2543 /*
2544  * inputs:
2545  * zc_name		name of filesystem
2546  * zc_nvlist_src{_size}	nvlist of delegated permissions
2547  * zc_perm_action	allow/unallow flag
2548  *
2549  * outputs:		none
2550  */
2551 static int
2552 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2553 {
2554 	int error;
2555 	nvlist_t *fsaclnv = NULL;
2556 
2557 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2558 	    zc->zc_iflags, &fsaclnv)) != 0)
2559 		return (error);
2560 
2561 	/*
2562 	 * Verify nvlist is constructed correctly
2563 	 */
2564 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2565 		nvlist_free(fsaclnv);
2566 		return (EINVAL);
2567 	}
2568 
2569 	/*
2570 	 * If we don't have PRIV_SYS_MOUNT, then validate
2571 	 * that user is allowed to hand out each permission in
2572 	 * the nvlist(s)
2573 	 */
2574 
2575 	error = secpolicy_zfs(CRED());
2576 	if (error) {
2577 		if (zc->zc_perm_action == B_FALSE) {
2578 			error = dsl_deleg_can_allow(zc->zc_name,
2579 			    fsaclnv, CRED());
2580 		} else {
2581 			error = dsl_deleg_can_unallow(zc->zc_name,
2582 			    fsaclnv, CRED());
2583 		}
2584 	}
2585 
2586 	if (error == 0)
2587 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2588 
2589 	nvlist_free(fsaclnv);
2590 	return (error);
2591 }
2592 
2593 /*
2594  * inputs:
2595  * zc_name		name of filesystem
2596  *
2597  * outputs:
2598  * zc_nvlist_src{_size}	nvlist of delegated permissions
2599  */
2600 static int
2601 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2602 {
2603 	nvlist_t *nvp;
2604 	int error;
2605 
2606 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2607 		error = put_nvlist(zc, nvp);
2608 		nvlist_free(nvp);
2609 	}
2610 
2611 	return (error);
2612 }
2613 
2614 /*
2615  * Search the vfs list for a specified resource.  Returns a pointer to it
2616  * or NULL if no suitable entry is found. The caller of this routine
2617  * is responsible for releasing the returned vfs pointer.
2618  */
2619 static vfs_t *
2620 zfs_get_vfs(const char *resource)
2621 {
2622 	struct vfs *vfsp;
2623 	struct vfs *vfs_found = NULL;
2624 
2625 	vfs_list_read_lock();
2626 	vfsp = rootvfs;
2627 	do {
2628 		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
2629 			VFS_HOLD(vfsp);
2630 			vfs_found = vfsp;
2631 			break;
2632 		}
2633 		vfsp = vfsp->vfs_next;
2634 	} while (vfsp != rootvfs);
2635 	vfs_list_unlock();
2636 	return (vfs_found);
2637 }
2638 
2639 /* ARGSUSED */
2640 static void
2641 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2642 {
2643 	zfs_creat_t *zct = arg;
2644 
2645 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
2646 }
2647 
2648 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
2649 
2650 /*
2651  * inputs:
2652  * createprops		list of properties requested by creator
2653  * default_zplver	zpl version to use if unspecified in createprops
2654  * fuids_ok		fuids allowed in this version of the spa?
2655  * os			parent objset pointer (NULL if root fs)
2656  *
2657  * outputs:
2658  * zplprops	values for the zplprops we attach to the master node object
2659  * is_ci	true if requested file system will be purely case-insensitive
2660  *
2661  * Determine the settings for utf8only, normalization and
2662  * casesensitivity.  Specific values may have been requested by the
2663  * creator and/or we can inherit values from the parent dataset.  If
2664  * the file system is of too early a vintage, a creator can not
2665  * request settings for these properties, even if the requested
2666  * setting is the default value.  We don't actually want to create dsl
2667  * properties for these, so remove them from the source nvlist after
2668  * processing.
2669  */
2670 static int
2671 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
2672     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
2673     nvlist_t *zplprops, boolean_t *is_ci)
2674 {
2675 	uint64_t sense = ZFS_PROP_UNDEFINED;
2676 	uint64_t norm = ZFS_PROP_UNDEFINED;
2677 	uint64_t u8 = ZFS_PROP_UNDEFINED;
2678 
2679 	ASSERT(zplprops != NULL);
2680 
2681 	/*
2682 	 * Pull out creator prop choices, if any.
2683 	 */
2684 	if (createprops) {
2685 		(void) nvlist_lookup_uint64(createprops,
2686 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
2687 		(void) nvlist_lookup_uint64(createprops,
2688 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
2689 		(void) nvlist_remove_all(createprops,
2690 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
2691 		(void) nvlist_lookup_uint64(createprops,
2692 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
2693 		(void) nvlist_remove_all(createprops,
2694 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
2695 		(void) nvlist_lookup_uint64(createprops,
2696 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
2697 		(void) nvlist_remove_all(createprops,
2698 		    zfs_prop_to_name(ZFS_PROP_CASE));
2699 	}
2700 
2701 	/*
2702 	 * If the zpl version requested is whacky or the file system
2703 	 * or pool is version is too "young" to support normalization
2704 	 * and the creator tried to set a value for one of the props,
2705 	 * error out.
2706 	 */
2707 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
2708 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
2709 	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
2710 	    (zplver < ZPL_VERSION_NORMALIZATION &&
2711 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
2712 	    sense != ZFS_PROP_UNDEFINED)))
2713 		return (ENOTSUP);
2714 
2715 	/*
2716 	 * Put the version in the zplprops
2717 	 */
2718 	VERIFY(nvlist_add_uint64(zplprops,
2719 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
2720 
2721 	if (norm == ZFS_PROP_UNDEFINED)
2722 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
2723 	VERIFY(nvlist_add_uint64(zplprops,
2724 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
2725 
2726 	/*
2727 	 * If we're normalizing, names must always be valid UTF-8 strings.
2728 	 */
2729 	if (norm)
2730 		u8 = 1;
2731 	if (u8 == ZFS_PROP_UNDEFINED)
2732 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
2733 	VERIFY(nvlist_add_uint64(zplprops,
2734 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
2735 
2736 	if (sense == ZFS_PROP_UNDEFINED)
2737 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
2738 	VERIFY(nvlist_add_uint64(zplprops,
2739 	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
2740 
2741 	if (is_ci)
2742 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
2743 
2744 	return (0);
2745 }
2746 
2747 static int
2748 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
2749     nvlist_t *zplprops, boolean_t *is_ci)
2750 {
2751 	boolean_t fuids_ok, sa_ok;
2752 	uint64_t zplver = ZPL_VERSION;
2753 	objset_t *os = NULL;
2754 	char parentname[MAXNAMELEN];
2755 	char *cp;
2756 	spa_t *spa;
2757 	uint64_t spa_vers;
2758 	int error;
2759 
2760 	(void) strlcpy(parentname, dataset, sizeof (parentname));
2761 	cp = strrchr(parentname, '/');
2762 	ASSERT(cp != NULL);
2763 	cp[0] = '\0';
2764 
2765 	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
2766 		return (error);
2767 
2768 	spa_vers = spa_version(spa);
2769 	spa_close(spa, FTAG);
2770 
2771 	zplver = zfs_zpl_version_map(spa_vers);
2772 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
2773 	sa_ok = (zplver >= ZPL_VERSION_SA);
2774 
2775 	/*
2776 	 * Open parent object set so we can inherit zplprop values.
2777 	 */
2778 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
2779 		return (error);
2780 
2781 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
2782 	    zplprops, is_ci);
2783 	dmu_objset_rele(os, FTAG);
2784 	return (error);
2785 }
2786 
2787 static int
2788 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
2789     nvlist_t *zplprops, boolean_t *is_ci)
2790 {
2791 	boolean_t fuids_ok;
2792 	boolean_t sa_ok;
2793 	uint64_t zplver = ZPL_VERSION;
2794 	int error;
2795 
2796 	zplver = zfs_zpl_version_map(spa_vers);
2797 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
2798 	sa_ok = (zplver >= ZPL_VERSION_SA);
2799 
2800 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
2801 	    createprops, zplprops, is_ci);
2802 	return (error);
2803 }
2804 
2805 /*
2806  * inputs:
2807  * zc_objset_type	type of objset to create (fs vs zvol)
2808  * zc_name		name of new objset
2809  * zc_value		name of snapshot to clone from (may be empty)
2810  * zc_nvlist_src{_size}	nvlist of properties to apply
2811  *
2812  * outputs: none
2813  */
2814 static int
2815 zfs_ioc_create(zfs_cmd_t *zc)
2816 {
2817 	objset_t *clone;
2818 	int error = 0;
2819 	zfs_creat_t zct;
2820 	nvlist_t *nvprops = NULL;
2821 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
2822 	dmu_objset_type_t type = zc->zc_objset_type;
2823 
2824 	switch (type) {
2825 
2826 	case DMU_OST_ZFS:
2827 		cbfunc = zfs_create_cb;
2828 		break;
2829 
2830 	case DMU_OST_ZVOL:
2831 		cbfunc = zvol_create_cb;
2832 		break;
2833 
2834 	default:
2835 		cbfunc = NULL;
2836 		break;
2837 	}
2838 	if (strchr(zc->zc_name, '@') ||
2839 	    strchr(zc->zc_name, '%'))
2840 		return (EINVAL);
2841 
2842 	if (zc->zc_nvlist_src != NULL &&
2843 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2844 	    zc->zc_iflags, &nvprops)) != 0)
2845 		return (error);
2846 
2847 	zct.zct_zplprops = NULL;
2848 	zct.zct_props = nvprops;
2849 
2850 	if (zc->zc_value[0] != '\0') {
2851 		/*
2852 		 * We're creating a clone of an existing snapshot.
2853 		 */
2854 		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2855 		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
2856 			nvlist_free(nvprops);
2857 			return (EINVAL);
2858 		}
2859 
2860 		error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
2861 		if (error) {
2862 			nvlist_free(nvprops);
2863 			return (error);
2864 		}
2865 
2866 		error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
2867 		dmu_objset_rele(clone, FTAG);
2868 		if (error) {
2869 			nvlist_free(nvprops);
2870 			return (error);
2871 		}
2872 	} else {
2873 		boolean_t is_insensitive = B_FALSE;
2874 
2875 		if (cbfunc == NULL) {
2876 			nvlist_free(nvprops);
2877 			return (EINVAL);
2878 		}
2879 
2880 		if (type == DMU_OST_ZVOL) {
2881 			uint64_t volsize, volblocksize;
2882 
2883 			if (nvprops == NULL ||
2884 			    nvlist_lookup_uint64(nvprops,
2885 			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
2886 			    &volsize) != 0) {
2887 				nvlist_free(nvprops);
2888 				return (EINVAL);
2889 			}
2890 
2891 			if ((error = nvlist_lookup_uint64(nvprops,
2892 			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
2893 			    &volblocksize)) != 0 && error != ENOENT) {
2894 				nvlist_free(nvprops);
2895 				return (EINVAL);
2896 			}
2897 
2898 			if (error != 0)
2899 				volblocksize = zfs_prop_default_numeric(
2900 				    ZFS_PROP_VOLBLOCKSIZE);
2901 
2902 			if ((error = zvol_check_volblocksize(
2903 			    volblocksize)) != 0 ||
2904 			    (error = zvol_check_volsize(volsize,
2905 			    volblocksize)) != 0) {
2906 				nvlist_free(nvprops);
2907 				return (error);
2908 			}
2909 		} else if (type == DMU_OST_ZFS) {
2910 			int error;
2911 
2912 			/*
2913 			 * We have to have normalization and
2914 			 * case-folding flags correct when we do the
2915 			 * file system creation, so go figure them out
2916 			 * now.
2917 			 */
2918 			VERIFY(nvlist_alloc(&zct.zct_zplprops,
2919 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
2920 			error = zfs_fill_zplprops(zc->zc_name, nvprops,
2921 			    zct.zct_zplprops, &is_insensitive);
2922 			if (error != 0) {
2923 				nvlist_free(nvprops);
2924 				nvlist_free(zct.zct_zplprops);
2925 				return (error);
2926 			}
2927 		}
2928 		error = dmu_objset_create(zc->zc_name, type,
2929 		    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
2930 		nvlist_free(zct.zct_zplprops);
2931 	}
2932 
2933 	/*
2934 	 * It would be nice to do this atomically.
2935 	 */
2936 	if (error == 0) {
2937 		error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
2938 		    nvprops, NULL);
2939 		if (error != 0)
2940 			(void) dmu_objset_destroy(zc->zc_name, B_FALSE);
2941 	}
2942 	nvlist_free(nvprops);
2943 	return (error);
2944 }
2945 
2946 /*
2947  * inputs:
2948  * zc_name	name of filesystem
2949  * zc_value	short name of snapshot
2950  * zc_cookie	recursive flag
2951  * zc_nvlist_src[_size] property list
2952  *
2953  * outputs:
2954  * zc_value	short snapname (i.e. part after the '@')
2955  */
2956 static int
2957 zfs_ioc_snapshot(zfs_cmd_t *zc)
2958 {
2959 	nvlist_t *nvprops = NULL;
2960 	int error;
2961 	boolean_t recursive = zc->zc_cookie;
2962 
2963 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2964 		return (EINVAL);
2965 
2966 	if (zc->zc_nvlist_src != NULL &&
2967 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2968 	    zc->zc_iflags, &nvprops)) != 0)
2969 		return (error);
2970 
2971 	error = zfs_check_userprops(zc->zc_name, nvprops);
2972 	if (error)
2973 		goto out;
2974 
2975 	if (!nvlist_empty(nvprops) &&
2976 	    zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) {
2977 		error = ENOTSUP;
2978 		goto out;
2979 	}
2980 
2981 	error = dmu_objset_snapshot(zc->zc_name, zc->zc_value,
2982 	    nvprops, recursive);
2983 
2984 out:
2985 	nvlist_free(nvprops);
2986 	return (error);
2987 }
2988 
2989 int
2990 zfs_unmount_snap(const char *name, void *arg)
2991 {
2992 	vfs_t *vfsp = NULL;
2993 
2994 	if (arg) {
2995 		char *snapname = arg;
2996 		char *fullname = kmem_asprintf("%s@%s", name, snapname);
2997 		vfsp = zfs_get_vfs(fullname);
2998 		strfree(fullname);
2999 	} else if (strchr(name, '@')) {
3000 		vfsp = zfs_get_vfs(name);
3001 	}
3002 
3003 	if (vfsp) {
3004 		/*
3005 		 * Always force the unmount for snapshots.
3006 		 */
3007 		int flag = MS_FORCE;
3008 		int err;
3009 
3010 		if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
3011 			VFS_RELE(vfsp);
3012 			return (err);
3013 		}
3014 		VFS_RELE(vfsp);
3015 		if ((err = dounmount(vfsp, flag, kcred)) != 0)
3016 			return (err);
3017 	}
3018 	return (0);
3019 }
3020 
3021 /*
3022  * inputs:
3023  * zc_name		name of filesystem
3024  * zc_value		short name of snapshot
3025  * zc_defer_destroy	mark for deferred destroy
3026  *
3027  * outputs:	none
3028  */
3029 static int
3030 zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
3031 {
3032 	int err;
3033 
3034 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
3035 		return (EINVAL);
3036 	err = dmu_objset_find(zc->zc_name,
3037 	    zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
3038 	if (err)
3039 		return (err);
3040 	return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value,
3041 	    zc->zc_defer_destroy));
3042 }
3043 
3044 /*
3045  * inputs:
3046  * zc_name		name of dataset to destroy
3047  * zc_objset_type	type of objset
3048  * zc_defer_destroy	mark for deferred destroy
3049  *
3050  * outputs:		none
3051  */
3052 static int
3053 zfs_ioc_destroy(zfs_cmd_t *zc)
3054 {
3055 	int err;
3056 	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
3057 		err = zfs_unmount_snap(zc->zc_name, NULL);
3058 		if (err)
3059 			return (err);
3060 	}
3061 
3062 	err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy);
3063 	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
3064 		(void) zvol_remove_minor(zc->zc_name);
3065 	return (err);
3066 }
3067 
3068 /*
3069  * inputs:
3070  * zc_name	name of dataset to rollback (to most recent snapshot)
3071  *
3072  * outputs:	none
3073  */
3074 static int
3075 zfs_ioc_rollback(zfs_cmd_t *zc)
3076 {
3077 	dsl_dataset_t *ds, *clone;
3078 	int error;
3079 	zfsvfs_t *zfsvfs;
3080 	char *clone_name;
3081 
3082 	error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
3083 	if (error)
3084 		return (error);
3085 
3086 	/* must not be a snapshot */
3087 	if (dsl_dataset_is_snapshot(ds)) {
3088 		dsl_dataset_rele(ds, FTAG);
3089 		return (EINVAL);
3090 	}
3091 
3092 	/* must have a most recent snapshot */
3093 	if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
3094 		dsl_dataset_rele(ds, FTAG);
3095 		return (EINVAL);
3096 	}
3097 
3098 	/*
3099 	 * Create clone of most recent snapshot.
3100 	 */
3101 	clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
3102 	error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
3103 	if (error)
3104 		goto out;
3105 
3106 	error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
3107 	if (error)
3108 		goto out;
3109 
3110 	/*
3111 	 * Do clone swap.
3112 	 */
3113 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
3114 		error = zfs_suspend_fs(zfsvfs);
3115 		if (error == 0) {
3116 			int resume_err;
3117 
3118 			if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3119 				error = dsl_dataset_clone_swap(clone, ds,
3120 				    B_TRUE);
3121 				dsl_dataset_disown(ds, FTAG);
3122 				ds = NULL;
3123 			} else {
3124 				error = EBUSY;
3125 			}
3126 			resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
3127 			error = error ? error : resume_err;
3128 		}
3129 		VFS_RELE(zfsvfs->z_vfs);
3130 	} else {
3131 		if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3132 			error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
3133 			dsl_dataset_disown(ds, FTAG);
3134 			ds = NULL;
3135 		} else {
3136 			error = EBUSY;
3137 		}
3138 	}
3139 
3140 	/*
3141 	 * Destroy clone (which also closes it).
3142 	 */
3143 	(void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
3144 
3145 out:
3146 	strfree(clone_name);
3147 	if (ds)
3148 		dsl_dataset_rele(ds, FTAG);
3149 	return (error);
3150 }
3151 
3152 /*
3153  * inputs:
3154  * zc_name	old name of dataset
3155  * zc_value	new name of dataset
3156  * zc_cookie	recursive flag (only valid for snapshots)
3157  *
3158  * outputs:	none
3159  */
3160 static int
3161 zfs_ioc_rename(zfs_cmd_t *zc)
3162 {
3163 	boolean_t recursive = zc->zc_cookie & 1;
3164 
3165 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3166 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3167 	    strchr(zc->zc_value, '%'))
3168 		return (EINVAL);
3169 
3170 	/*
3171 	 * Unmount snapshot unless we're doing a recursive rename,
3172 	 * in which case the dataset code figures out which snapshots
3173 	 * to unmount.
3174 	 */
3175 	if (!recursive && strchr(zc->zc_name, '@') != NULL &&
3176 	    zc->zc_objset_type == DMU_OST_ZFS) {
3177 		int err = zfs_unmount_snap(zc->zc_name, NULL);
3178 		if (err)
3179 			return (err);
3180 	}
3181 	if (zc->zc_objset_type == DMU_OST_ZVOL)
3182 		(void) zvol_remove_minor(zc->zc_name);
3183 	return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
3184 }
3185 
3186 static int
3187 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3188 {
3189 	const char *propname = nvpair_name(pair);
3190 	boolean_t issnap = (strchr(dsname, '@') != NULL);
3191 	zfs_prop_t prop = zfs_name_to_prop(propname);
3192 	uint64_t intval;
3193 	int err;
3194 
3195 	if (prop == ZPROP_INVAL) {
3196 		if (zfs_prop_user(propname)) {
3197 			if (err = zfs_secpolicy_write_perms(dsname,
3198 			    ZFS_DELEG_PERM_USERPROP, cr))
3199 				return (err);
3200 			return (0);
3201 		}
3202 
3203 		if (!issnap && zfs_prop_userquota(propname)) {
3204 			const char *perm = NULL;
3205 			const char *uq_prefix =
3206 			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3207 			const char *gq_prefix =
3208 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3209 
3210 			if (strncmp(propname, uq_prefix,
3211 			    strlen(uq_prefix)) == 0) {
3212 				perm = ZFS_DELEG_PERM_USERQUOTA;
3213 			} else if (strncmp(propname, gq_prefix,
3214 			    strlen(gq_prefix)) == 0) {
3215 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
3216 			} else {
3217 				/* USERUSED and GROUPUSED are read-only */
3218 				return (EINVAL);
3219 			}
3220 
3221 			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3222 				return (err);
3223 			return (0);
3224 		}
3225 
3226 		return (EINVAL);
3227 	}
3228 
3229 	if (issnap)
3230 		return (EINVAL);
3231 
3232 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3233 		/*
3234 		 * dsl_prop_get_all_impl() returns properties in this
3235 		 * format.
3236 		 */
3237 		nvlist_t *attrs;
3238 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3239 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3240 		    &pair) == 0);
3241 	}
3242 
3243 	/*
3244 	 * Check that this value is valid for this pool version
3245 	 */
3246 	switch (prop) {
3247 	case ZFS_PROP_COMPRESSION:
3248 		/*
3249 		 * If the user specified gzip compression, make sure
3250 		 * the SPA supports it. We ignore any errors here since
3251 		 * we'll catch them later.
3252 		 */
3253 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3254 		    nvpair_value_uint64(pair, &intval) == 0) {
3255 			if (intval >= ZIO_COMPRESS_GZIP_1 &&
3256 			    intval <= ZIO_COMPRESS_GZIP_9 &&
3257 			    zfs_earlier_version(dsname,
3258 			    SPA_VERSION_GZIP_COMPRESSION)) {
3259 				return (ENOTSUP);
3260 			}
3261 
3262 			if (intval == ZIO_COMPRESS_ZLE &&
3263 			    zfs_earlier_version(dsname,
3264 			    SPA_VERSION_ZLE_COMPRESSION))
3265 				return (ENOTSUP);
3266 
3267 			/*
3268 			 * If this is a bootable dataset then
3269 			 * verify that the compression algorithm
3270 			 * is supported for booting. We must return
3271 			 * something other than ENOTSUP since it
3272 			 * implies a downrev pool version.
3273 			 */
3274 			if (zfs_is_bootfs(dsname) &&
3275 			    !BOOTFS_COMPRESS_VALID(intval)) {
3276 				return (ERANGE);
3277 			}
3278 		}
3279 		break;
3280 
3281 	case ZFS_PROP_COPIES:
3282 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3283 			return (ENOTSUP);
3284 		break;
3285 
3286 	case ZFS_PROP_DEDUP:
3287 		if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3288 			return (ENOTSUP);
3289 		break;
3290 
3291 	case ZFS_PROP_SHARESMB:
3292 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3293 			return (ENOTSUP);
3294 		break;
3295 
3296 	case ZFS_PROP_ACLINHERIT:
3297 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3298 		    nvpair_value_uint64(pair, &intval) == 0) {
3299 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
3300 			    zfs_earlier_version(dsname,
3301 			    SPA_VERSION_PASSTHROUGH_X))
3302 				return (ENOTSUP);
3303 		}
3304 		break;
3305 	}
3306 
3307 	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3308 }
3309 
3310 /*
3311  * Removes properties from the given props list that fail permission checks
3312  * needed to clear them and to restore them in case of a receive error. For each
3313  * property, make sure we have both set and inherit permissions.
3314  *
3315  * Returns the first error encountered if any permission checks fail. If the
3316  * caller provides a non-NULL errlist, it also gives the complete list of names
3317  * of all the properties that failed a permission check along with the
3318  * corresponding error numbers. The caller is responsible for freeing the
3319  * returned errlist.
3320  *
3321  * If every property checks out successfully, zero is returned and the list
3322  * pointed at by errlist is NULL.
3323  */
3324 static int
3325 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
3326 {
3327 	zfs_cmd_t *zc;
3328 	nvpair_t *pair, *next_pair;
3329 	nvlist_t *errors;
3330 	int err, rv = 0;
3331 
3332 	if (props == NULL)
3333 		return (0);
3334 
3335 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3336 
3337 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
3338 	(void) strcpy(zc->zc_name, dataset);
3339 	pair = nvlist_next_nvpair(props, NULL);
3340 	while (pair != NULL) {
3341 		next_pair = nvlist_next_nvpair(props, pair);
3342 
3343 		(void) strcpy(zc->zc_value, nvpair_name(pair));
3344 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
3345 		    (err = zfs_secpolicy_inherit(zc, CRED())) != 0) {
3346 			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
3347 			VERIFY(nvlist_add_int32(errors,
3348 			    zc->zc_value, err) == 0);
3349 		}
3350 		pair = next_pair;
3351 	}
3352 	kmem_free(zc, sizeof (zfs_cmd_t));
3353 
3354 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
3355 		nvlist_free(errors);
3356 		errors = NULL;
3357 	} else {
3358 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
3359 	}
3360 
3361 	if (errlist == NULL)
3362 		nvlist_free(errors);
3363 	else
3364 		*errlist = errors;
3365 
3366 	return (rv);
3367 }
3368 
3369 static boolean_t
3370 propval_equals(nvpair_t *p1, nvpair_t *p2)
3371 {
3372 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
3373 		/* dsl_prop_get_all_impl() format */
3374 		nvlist_t *attrs;
3375 		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
3376 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3377 		    &p1) == 0);
3378 	}
3379 
3380 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
3381 		nvlist_t *attrs;
3382 		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
3383 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3384 		    &p2) == 0);
3385 	}
3386 
3387 	if (nvpair_type(p1) != nvpair_type(p2))
3388 		return (B_FALSE);
3389 
3390 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
3391 		char *valstr1, *valstr2;
3392 
3393 		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
3394 		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
3395 		return (strcmp(valstr1, valstr2) == 0);
3396 	} else {
3397 		uint64_t intval1, intval2;
3398 
3399 		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
3400 		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
3401 		return (intval1 == intval2);
3402 	}
3403 }
3404 
3405 /*
3406  * Remove properties from props if they are not going to change (as determined
3407  * by comparison with origprops). Remove them from origprops as well, since we
3408  * do not need to clear or restore properties that won't change.
3409  */
3410 static void
3411 props_reduce(nvlist_t *props, nvlist_t *origprops)
3412 {
3413 	nvpair_t *pair, *next_pair;
3414 
3415 	if (origprops == NULL)
3416 		return; /* all props need to be received */
3417 
3418 	pair = nvlist_next_nvpair(props, NULL);
3419 	while (pair != NULL) {
3420 		const char *propname = nvpair_name(pair);
3421 		nvpair_t *match;
3422 
3423 		next_pair = nvlist_next_nvpair(props, pair);
3424 
3425 		if ((nvlist_lookup_nvpair(origprops, propname,
3426 		    &match) != 0) || !propval_equals(pair, match))
3427 			goto next; /* need to set received value */
3428 
3429 		/* don't clear the existing received value */
3430 		(void) nvlist_remove_nvpair(origprops, match);
3431 		/* don't bother receiving the property */
3432 		(void) nvlist_remove_nvpair(props, pair);
3433 next:
3434 		pair = next_pair;
3435 	}
3436 }
3437 
3438 #ifdef	DEBUG
3439 static boolean_t zfs_ioc_recv_inject_err;
3440 #endif
3441 
3442 /*
3443  * inputs:
3444  * zc_name		name of containing filesystem
3445  * zc_nvlist_src{_size}	nvlist of properties to apply
3446  * zc_value		name of snapshot to create
3447  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
3448  * zc_cookie		file descriptor to recv from
3449  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
3450  * zc_guid		force flag
3451  * zc_cleanup_fd	cleanup-on-exit file descriptor
3452  * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
3453  *
3454  * outputs:
3455  * zc_cookie		number of bytes read
3456  * zc_nvlist_dst{_size} error for each unapplied received property
3457  * zc_obj		zprop_errflags_t
3458  * zc_action_handle	handle for this guid/ds mapping
3459  */
3460 static int
3461 zfs_ioc_recv(zfs_cmd_t *zc)
3462 {
3463 	file_t *fp;
3464 	objset_t *os;
3465 	dmu_recv_cookie_t drc;
3466 	boolean_t force = (boolean_t)zc->zc_guid;
3467 	int fd;
3468 	int error = 0;
3469 	int props_error = 0;
3470 	nvlist_t *errors;
3471 	offset_t off;
3472 	nvlist_t *props = NULL; /* sent properties */
3473 	nvlist_t *origprops = NULL; /* existing properties */
3474 	objset_t *origin = NULL;
3475 	char *tosnap;
3476 	char tofs[ZFS_MAXNAMELEN];
3477 	boolean_t first_recvd_props = B_FALSE;
3478 
3479 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3480 	    strchr(zc->zc_value, '@') == NULL ||
3481 	    strchr(zc->zc_value, '%'))
3482 		return (EINVAL);
3483 
3484 	(void) strcpy(tofs, zc->zc_value);
3485 	tosnap = strchr(tofs, '@');
3486 	*tosnap++ = '\0';
3487 
3488 	if (zc->zc_nvlist_src != NULL &&
3489 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3490 	    zc->zc_iflags, &props)) != 0)
3491 		return (error);
3492 
3493 	fd = zc->zc_cookie;
3494 	fp = getf(fd);
3495 	if (fp == NULL) {
3496 		nvlist_free(props);
3497 		return (EBADF);
3498 	}
3499 
3500 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3501 
3502 	if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
3503 		if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) &&
3504 		    !dsl_prop_get_hasrecvd(os)) {
3505 			first_recvd_props = B_TRUE;
3506 		}
3507 
3508 		/*
3509 		 * If new received properties are supplied, they are to
3510 		 * completely replace the existing received properties, so stash
3511 		 * away the existing ones.
3512 		 */
3513 		if (dsl_prop_get_received(os, &origprops) == 0) {
3514 			nvlist_t *errlist = NULL;
3515 			/*
3516 			 * Don't bother writing a property if its value won't
3517 			 * change (and avoid the unnecessary security checks).
3518 			 *
3519 			 * The first receive after SPA_VERSION_RECVD_PROPS is a
3520 			 * special case where we blow away all local properties
3521 			 * regardless.
3522 			 */
3523 			if (!first_recvd_props)
3524 				props_reduce(props, origprops);
3525 			if (zfs_check_clearable(tofs, origprops,
3526 			    &errlist) != 0)
3527 				(void) nvlist_merge(errors, errlist, 0);
3528 			nvlist_free(errlist);
3529 		}
3530 
3531 		dmu_objset_rele(os, FTAG);
3532 	}
3533 
3534 	if (zc->zc_string[0]) {
3535 		error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
3536 		if (error)
3537 			goto out;
3538 	}
3539 
3540 	error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds,
3541 	    &zc->zc_begin_record, force, origin, &drc);
3542 	if (origin)
3543 		dmu_objset_rele(origin, FTAG);
3544 	if (error)
3545 		goto out;
3546 
3547 	/*
3548 	 * Set properties before we receive the stream so that they are applied
3549 	 * to the new data. Note that we must call dmu_recv_stream() if
3550 	 * dmu_recv_begin() succeeds.
3551 	 */
3552 	if (props) {
3553 		nvlist_t *errlist;
3554 
3555 		if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {
3556 			if (drc.drc_newfs) {
3557 				if (spa_version(os->os_spa) >=
3558 				    SPA_VERSION_RECVD_PROPS)
3559 					first_recvd_props = B_TRUE;
3560 			} else if (origprops != NULL) {
3561 				if (clear_received_props(os, tofs, origprops,
3562 				    first_recvd_props ? NULL : props) != 0)
3563 					zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3564 			} else {
3565 				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3566 			}
3567 			dsl_prop_set_hasrecvd(os);
3568 		} else if (!drc.drc_newfs) {
3569 			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3570 		}
3571 
3572 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
3573 		    props, &errlist);
3574 		(void) nvlist_merge(errors, errlist, 0);
3575 		nvlist_free(errlist);
3576 	}
3577 
3578 	if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) {
3579 		/*
3580 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
3581 		 * size or supplied an invalid address.
3582 		 */
3583 		props_error = EINVAL;
3584 	}
3585 
3586 	off = fp->f_offset;
3587 	error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
3588 	    &zc->zc_action_handle);
3589 
3590 	if (error == 0) {
3591 		zfsvfs_t *zfsvfs = NULL;
3592 
3593 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
3594 			/* online recv */
3595 			int end_err;
3596 
3597 			error = zfs_suspend_fs(zfsvfs);
3598 			/*
3599 			 * If the suspend fails, then the recv_end will
3600 			 * likely also fail, and clean up after itself.
3601 			 */
3602 			end_err = dmu_recv_end(&drc);
3603 			if (error == 0)
3604 				error = zfs_resume_fs(zfsvfs, tofs);
3605 			error = error ? error : end_err;
3606 			VFS_RELE(zfsvfs->z_vfs);
3607 		} else {
3608 			error = dmu_recv_end(&drc);
3609 		}
3610 	}
3611 
3612 	zc->zc_cookie = off - fp->f_offset;
3613 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3614 		fp->f_offset = off;
3615 
3616 #ifdef	DEBUG
3617 	if (zfs_ioc_recv_inject_err) {
3618 		zfs_ioc_recv_inject_err = B_FALSE;
3619 		error = 1;
3620 	}
3621 #endif
3622 	/*
3623 	 * On error, restore the original props.
3624 	 */
3625 	if (error && props) {
3626 		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
3627 			if (clear_received_props(os, tofs, props, NULL) != 0) {
3628 				/*
3629 				 * We failed to clear the received properties.
3630 				 * Since we may have left a $recvd value on the
3631 				 * system, we can't clear the $hasrecvd flag.
3632 				 */
3633 				zc->zc_obj |= ZPROP_ERR_NORESTORE;
3634 			} else if (first_recvd_props) {
3635 				dsl_prop_unset_hasrecvd(os);
3636 			}
3637 			dmu_objset_rele(os, FTAG);
3638 		} else if (!drc.drc_newfs) {
3639 			/* We failed to clear the received properties. */
3640 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3641 		}
3642 
3643 		if (origprops == NULL && !drc.drc_newfs) {
3644 			/* We failed to stash the original properties. */
3645 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3646 		}
3647 
3648 		/*
3649 		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
3650 		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
3651 		 * explictly if we're restoring local properties cleared in the
3652 		 * first new-style receive.
3653 		 */
3654 		if (origprops != NULL &&
3655 		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
3656 		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
3657 		    origprops, NULL) != 0) {
3658 			/*
3659 			 * We stashed the original properties but failed to
3660 			 * restore them.
3661 			 */
3662 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3663 		}
3664 	}
3665 out:
3666 	nvlist_free(props);
3667 	nvlist_free(origprops);
3668 	nvlist_free(errors);
3669 	releasef(fd);
3670 
3671 	if (error == 0)
3672 		error = props_error;
3673 
3674 	return (error);
3675 }
3676 
3677 /*
3678  * inputs:
3679  * zc_name	name of snapshot to send
3680  * zc_cookie	file descriptor to send stream to
3681  * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
3682  * zc_sendobj	objsetid of snapshot to send
3683  * zc_fromobj	objsetid of incremental fromsnap (may be zero)
3684  *
3685  * outputs: none
3686  */
3687 static int
3688 zfs_ioc_send(zfs_cmd_t *zc)
3689 {
3690 	objset_t *fromsnap = NULL;
3691 	objset_t *tosnap;
3692 	file_t *fp;
3693 	int error;
3694 	offset_t off;
3695 	dsl_dataset_t *ds;
3696 	dsl_dataset_t *dsfrom = NULL;
3697 	spa_t *spa;
3698 	dsl_pool_t *dp;
3699 
3700 	error = spa_open(zc->zc_name, &spa, FTAG);
3701 	if (error)
3702 		return (error);
3703 
3704 	dp = spa_get_dsl(spa);
3705 	rw_enter(&dp->dp_config_rwlock, RW_READER);
3706 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
3707 	rw_exit(&dp->dp_config_rwlock);
3708 	if (error) {
3709 		spa_close(spa, FTAG);
3710 		return (error);
3711 	}
3712 
3713 	error = dmu_objset_from_ds(ds, &tosnap);
3714 	if (error) {
3715 		dsl_dataset_rele(ds, FTAG);
3716 		spa_close(spa, FTAG);
3717 		return (error);
3718 	}
3719 
3720 	if (zc->zc_fromobj != 0) {
3721 		rw_enter(&dp->dp_config_rwlock, RW_READER);
3722 		error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom);
3723 		rw_exit(&dp->dp_config_rwlock);
3724 		spa_close(spa, FTAG);
3725 		if (error) {
3726 			dsl_dataset_rele(ds, FTAG);
3727 			return (error);
3728 		}
3729 		error = dmu_objset_from_ds(dsfrom, &fromsnap);
3730 		if (error) {
3731 			dsl_dataset_rele(dsfrom, FTAG);
3732 			dsl_dataset_rele(ds, FTAG);
3733 			return (error);
3734 		}
3735 	} else {
3736 		spa_close(spa, FTAG);
3737 	}
3738 
3739 	fp = getf(zc->zc_cookie);
3740 	if (fp == NULL) {
3741 		dsl_dataset_rele(ds, FTAG);
3742 		if (dsfrom)
3743 			dsl_dataset_rele(dsfrom, FTAG);
3744 		return (EBADF);
3745 	}
3746 
3747 	off = fp->f_offset;
3748 	error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off);
3749 
3750 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3751 		fp->f_offset = off;
3752 	releasef(zc->zc_cookie);
3753 	if (dsfrom)
3754 		dsl_dataset_rele(dsfrom, FTAG);
3755 	dsl_dataset_rele(ds, FTAG);
3756 	return (error);
3757 }
3758 
3759 static int
3760 zfs_ioc_inject_fault(zfs_cmd_t *zc)
3761 {
3762 	int id, error;
3763 
3764 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
3765 	    &zc->zc_inject_record);
3766 
3767 	if (error == 0)
3768 		zc->zc_guid = (uint64_t)id;
3769 
3770 	return (error);
3771 }
3772 
3773 static int
3774 zfs_ioc_clear_fault(zfs_cmd_t *zc)
3775 {
3776 	return (zio_clear_fault((int)zc->zc_guid));
3777 }
3778 
3779 static int
3780 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
3781 {
3782 	int id = (int)zc->zc_guid;
3783 	int error;
3784 
3785 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
3786 	    &zc->zc_inject_record);
3787 
3788 	zc->zc_guid = id;
3789 
3790 	return (error);
3791 }
3792 
3793 static int
3794 zfs_ioc_error_log(zfs_cmd_t *zc)
3795 {
3796 	spa_t *spa;
3797 	int error;
3798 	size_t count = (size_t)zc->zc_nvlist_dst_size;
3799 
3800 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
3801 		return (error);
3802 
3803 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
3804 	    &count);
3805 	if (error == 0)
3806 		zc->zc_nvlist_dst_size = count;
3807 	else
3808 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
3809 
3810 	spa_close(spa, FTAG);
3811 
3812 	return (error);
3813 }
3814 
3815 static int
3816 zfs_ioc_clear(zfs_cmd_t *zc)
3817 {
3818 	spa_t *spa;
3819 	vdev_t *vd;
3820 	int error;
3821 
3822 	/*
3823 	 * On zpool clear we also fix up missing slogs
3824 	 */
3825 	mutex_enter(&spa_namespace_lock);
3826 	spa = spa_lookup(zc->zc_name);
3827 	if (spa == NULL) {
3828 		mutex_exit(&spa_namespace_lock);
3829 		return (EIO);
3830 	}
3831 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
3832 		/* we need to let spa_open/spa_load clear the chains */
3833 		spa_set_log_state(spa, SPA_LOG_CLEAR);
3834 	}
3835 	spa->spa_last_open_failed = 0;
3836 	mutex_exit(&spa_namespace_lock);
3837 
3838 	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
3839 		error = spa_open(zc->zc_name, &spa, FTAG);
3840 	} else {
3841 		nvlist_t *policy;
3842 		nvlist_t *config = NULL;
3843 
3844 		if (zc->zc_nvlist_src == NULL)
3845 			return (EINVAL);
3846 
3847 		if ((error = get_nvlist(zc->zc_nvlist_src,
3848 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
3849 			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
3850 			    policy, &config);
3851 			if (config != NULL) {
3852 				int err;
3853 
3854 				if ((err = put_nvlist(zc, config)) != 0)
3855 					error = err;
3856 				nvlist_free(config);
3857 			}
3858 			nvlist_free(policy);
3859 		}
3860 	}
3861 
3862 	if (error)
3863 		return (error);
3864 
3865 	spa_vdev_state_enter(spa, SCL_NONE);
3866 
3867 	if (zc->zc_guid == 0) {
3868 		vd = NULL;
3869 	} else {
3870 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
3871 		if (vd == NULL) {
3872 			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
3873 			spa_close(spa, FTAG);
3874 			return (ENODEV);
3875 		}
3876 	}
3877 
3878 	vdev_clear(spa, vd);
3879 
3880 	(void) spa_vdev_state_exit(spa, NULL, 0);
3881 
3882 	/*
3883 	 * Resume any suspended I/Os.
3884 	 */
3885 	if (zio_resume(spa) != 0)
3886 		error = EIO;
3887 
3888 	spa_close(spa, FTAG);
3889 
3890 	return (error);
3891 }
3892 
3893 /*
3894  * inputs:
3895  * zc_name	name of filesystem
3896  * zc_value	name of origin snapshot
3897  *
3898  * outputs:
3899  * zc_string	name of conflicting snapshot, if there is one
3900  */
3901 static int
3902 zfs_ioc_promote(zfs_cmd_t *zc)
3903 {
3904 	char *cp;
3905 
3906 	/*
3907 	 * We don't need to unmount *all* the origin fs's snapshots, but
3908 	 * it's easier.
3909 	 */
3910 	cp = strchr(zc->zc_value, '@');
3911 	if (cp)
3912 		*cp = '\0';
3913 	(void) dmu_objset_find(zc->zc_value,
3914 	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
3915 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
3916 }
3917 
3918 /*
3919  * Retrieve a single {user|group}{used|quota}@... property.
3920  *
3921  * inputs:
3922  * zc_name	name of filesystem
3923  * zc_objset_type zfs_userquota_prop_t
3924  * zc_value	domain name (eg. "S-1-234-567-89")
3925  * zc_guid	RID/UID/GID
3926  *
3927  * outputs:
3928  * zc_cookie	property value
3929  */
3930 static int
3931 zfs_ioc_userspace_one(zfs_cmd_t *zc)
3932 {
3933 	zfsvfs_t *zfsvfs;
3934 	int error;
3935 
3936 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
3937 		return (EINVAL);
3938 
3939 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
3940 	if (error)
3941 		return (error);
3942 
3943 	error = zfs_userspace_one(zfsvfs,
3944 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
3945 	zfsvfs_rele(zfsvfs, FTAG);
3946 
3947 	return (error);
3948 }
3949 
3950 /*
3951  * inputs:
3952  * zc_name		name of filesystem
3953  * zc_cookie		zap cursor
3954  * zc_objset_type	zfs_userquota_prop_t
3955  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
3956  *
3957  * outputs:
3958  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
3959  * zc_cookie	zap cursor
3960  */
3961 static int
3962 zfs_ioc_userspace_many(zfs_cmd_t *zc)
3963 {
3964 	zfsvfs_t *zfsvfs;
3965 	int bufsize = zc->zc_nvlist_dst_size;
3966 
3967 	if (bufsize <= 0)
3968 		return (ENOMEM);
3969 
3970 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
3971 	if (error)
3972 		return (error);
3973 
3974 	void *buf = kmem_alloc(bufsize, KM_SLEEP);
3975 
3976 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
3977 	    buf, &zc->zc_nvlist_dst_size);
3978 
3979 	if (error == 0) {
3980 		error = xcopyout(buf,
3981 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
3982 		    zc->zc_nvlist_dst_size);
3983 	}
3984 	kmem_free(buf, bufsize);
3985 	zfsvfs_rele(zfsvfs, FTAG);
3986 
3987 	return (error);
3988 }
3989 
3990 /*
3991  * inputs:
3992  * zc_name		name of filesystem
3993  *
3994  * outputs:
3995  * none
3996  */
3997 static int
3998 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
3999 {
4000 	objset_t *os;
4001 	int error = 0;
4002 	zfsvfs_t *zfsvfs;
4003 
4004 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
4005 		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
4006 			/*
4007 			 * If userused is not enabled, it may be because the
4008 			 * objset needs to be closed & reopened (to grow the
4009 			 * objset_phys_t).  Suspend/resume the fs will do that.
4010 			 */
4011 			error = zfs_suspend_fs(zfsvfs);
4012 			if (error == 0)
4013 				error = zfs_resume_fs(zfsvfs, zc->zc_name);
4014 		}
4015 		if (error == 0)
4016 			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
4017 		VFS_RELE(zfsvfs->z_vfs);
4018 	} else {
4019 		/* XXX kind of reading contents without owning */
4020 		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4021 		if (error)
4022 			return (error);
4023 
4024 		error = dmu_objset_userspace_upgrade(os);
4025 		dmu_objset_rele(os, FTAG);
4026 	}
4027 
4028 	return (error);
4029 }
4030 
4031 /*
4032  * We don't want to have a hard dependency
4033  * against some special symbols in sharefs
4034  * nfs, and smbsrv.  Determine them if needed when
4035  * the first file system is shared.
4036  * Neither sharefs, nfs or smbsrv are unloadable modules.
4037  */
4038 int (*znfsexport_fs)(void *arg);
4039 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
4040 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
4041 
4042 int zfs_nfsshare_inited;
4043 int zfs_smbshare_inited;
4044 
4045 ddi_modhandle_t nfs_mod;
4046 ddi_modhandle_t sharefs_mod;
4047 ddi_modhandle_t smbsrv_mod;
4048 kmutex_t zfs_share_lock;
4049 
4050 static int
4051 zfs_init_sharefs()
4052 {
4053 	int error;
4054 
4055 	ASSERT(MUTEX_HELD(&zfs_share_lock));
4056 	/* Both NFS and SMB shares also require sharetab support. */
4057 	if (sharefs_mod == NULL && ((sharefs_mod =
4058 	    ddi_modopen("fs/sharefs",
4059 	    KRTLD_MODE_FIRST, &error)) == NULL)) {
4060 		return (ENOSYS);
4061 	}
4062 	if (zshare_fs == NULL && ((zshare_fs =
4063 	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
4064 	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
4065 		return (ENOSYS);
4066 	}
4067 	return (0);
4068 }
4069 
4070 static int
4071 zfs_ioc_share(zfs_cmd_t *zc)
4072 {
4073 	int error;
4074 	int opcode;
4075 
4076 	switch (zc->zc_share.z_sharetype) {
4077 	case ZFS_SHARE_NFS:
4078 	case ZFS_UNSHARE_NFS:
4079 		if (zfs_nfsshare_inited == 0) {
4080 			mutex_enter(&zfs_share_lock);
4081 			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
4082 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
4083 				mutex_exit(&zfs_share_lock);
4084 				return (ENOSYS);
4085 			}
4086 			if (znfsexport_fs == NULL &&
4087 			    ((znfsexport_fs = (int (*)(void *))
4088 			    ddi_modsym(nfs_mod,
4089 			    "nfs_export", &error)) == NULL)) {
4090 				mutex_exit(&zfs_share_lock);
4091 				return (ENOSYS);
4092 			}
4093 			error = zfs_init_sharefs();
4094 			if (error) {
4095 				mutex_exit(&zfs_share_lock);
4096 				return (ENOSYS);
4097 			}
4098 			zfs_nfsshare_inited = 1;
4099 			mutex_exit(&zfs_share_lock);
4100 		}
4101 		break;
4102 	case ZFS_SHARE_SMB:
4103 	case ZFS_UNSHARE_SMB:
4104 		if (zfs_smbshare_inited == 0) {
4105 			mutex_enter(&zfs_share_lock);
4106 			if (smbsrv_mod == NULL && ((smbsrv_mod =
4107 			    ddi_modopen("drv/smbsrv",
4108 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
4109 				mutex_exit(&zfs_share_lock);
4110 				return (ENOSYS);
4111 			}
4112 			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
4113 			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
4114 			    "smb_server_share", &error)) == NULL)) {
4115 				mutex_exit(&zfs_share_lock);
4116 				return (ENOSYS);
4117 			}
4118 			error = zfs_init_sharefs();
4119 			if (error) {
4120 				mutex_exit(&zfs_share_lock);
4121 				return (ENOSYS);
4122 			}
4123 			zfs_smbshare_inited = 1;
4124 			mutex_exit(&zfs_share_lock);
4125 		}
4126 		break;
4127 	default:
4128 		return (EINVAL);
4129 	}
4130 
4131 	switch (zc->zc_share.z_sharetype) {
4132 	case ZFS_SHARE_NFS:
4133 	case ZFS_UNSHARE_NFS:
4134 		if (error =
4135 		    znfsexport_fs((void *)
4136 		    (uintptr_t)zc->zc_share.z_exportdata))
4137 			return (error);
4138 		break;
4139 	case ZFS_SHARE_SMB:
4140 	case ZFS_UNSHARE_SMB:
4141 		if (error = zsmbexport_fs((void *)
4142 		    (uintptr_t)zc->zc_share.z_exportdata,
4143 		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
4144 		    B_TRUE: B_FALSE)) {
4145 			return (error);
4146 		}
4147 		break;
4148 	}
4149 
4150 	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
4151 	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
4152 	    SHAREFS_ADD : SHAREFS_REMOVE;
4153 
4154 	/*
4155 	 * Add or remove share from sharetab
4156 	 */
4157 	error = zshare_fs(opcode,
4158 	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
4159 	    zc->zc_share.z_sharemax);
4160 
4161 	return (error);
4162 
4163 }
4164 
4165 ace_t full_access[] = {
4166 	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
4167 };
4168 
4169 /*
4170  * Remove all ACL files in shares dir
4171  */
4172 static int
4173 zfs_smb_acl_purge(znode_t *dzp)
4174 {
4175 	zap_cursor_t	zc;
4176 	zap_attribute_t	zap;
4177 	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
4178 	int error;
4179 
4180 	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
4181 	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
4182 	    zap_cursor_advance(&zc)) {
4183 		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
4184 		    NULL, 0)) != 0)
4185 			break;
4186 	}
4187 	zap_cursor_fini(&zc);
4188 	return (error);
4189 }
4190 
4191 static int
4192 zfs_ioc_smb_acl(zfs_cmd_t *zc)
4193 {
4194 	vnode_t *vp;
4195 	znode_t *dzp;
4196 	vnode_t *resourcevp = NULL;
4197 	znode_t *sharedir;
4198 	zfsvfs_t *zfsvfs;
4199 	nvlist_t *nvlist;
4200 	char *src, *target;
4201 	vattr_t vattr;
4202 	vsecattr_t vsec;
4203 	int error = 0;
4204 
4205 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
4206 	    NO_FOLLOW, NULL, &vp)) != 0)
4207 		return (error);
4208 
4209 	/* Now make sure mntpnt and dataset are ZFS */
4210 
4211 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
4212 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
4213 	    zc->zc_name) != 0)) {
4214 		VN_RELE(vp);
4215 		return (EINVAL);
4216 	}
4217 
4218 	dzp = VTOZ(vp);
4219 	zfsvfs = dzp->z_zfsvfs;
4220 	ZFS_ENTER(zfsvfs);
4221 
4222 	/*
4223 	 * Create share dir if its missing.
4224 	 */
4225 	mutex_enter(&zfsvfs->z_lock);
4226 	if (zfsvfs->z_shares_dir == 0) {
4227 		dmu_tx_t *tx;
4228 
4229 		tx = dmu_tx_create(zfsvfs->z_os);
4230 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
4231 		    ZFS_SHARES_DIR);
4232 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
4233 		error = dmu_tx_assign(tx, TXG_WAIT);
4234 		if (error) {
4235 			dmu_tx_abort(tx);
4236 		} else {
4237 			error = zfs_create_share_dir(zfsvfs, tx);
4238 			dmu_tx_commit(tx);
4239 		}
4240 		if (error) {
4241 			mutex_exit(&zfsvfs->z_lock);
4242 			VN_RELE(vp);
4243 			ZFS_EXIT(zfsvfs);
4244 			return (error);
4245 		}
4246 	}
4247 	mutex_exit(&zfsvfs->z_lock);
4248 
4249 	ASSERT(zfsvfs->z_shares_dir);
4250 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
4251 		VN_RELE(vp);
4252 		ZFS_EXIT(zfsvfs);
4253 		return (error);
4254 	}
4255 
4256 	switch (zc->zc_cookie) {
4257 	case ZFS_SMB_ACL_ADD:
4258 		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
4259 		vattr.va_type = VREG;
4260 		vattr.va_mode = S_IFREG|0777;
4261 		vattr.va_uid = 0;
4262 		vattr.va_gid = 0;
4263 
4264 		vsec.vsa_mask = VSA_ACE;
4265 		vsec.vsa_aclentp = &full_access;
4266 		vsec.vsa_aclentsz = sizeof (full_access);
4267 		vsec.vsa_aclcnt = 1;
4268 
4269 		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
4270 		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
4271 		if (resourcevp)
4272 			VN_RELE(resourcevp);
4273 		break;
4274 
4275 	case ZFS_SMB_ACL_REMOVE:
4276 		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
4277 		    NULL, 0);
4278 		break;
4279 
4280 	case ZFS_SMB_ACL_RENAME:
4281 		if ((error = get_nvlist(zc->zc_nvlist_src,
4282 		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
4283 			VN_RELE(vp);
4284 			ZFS_EXIT(zfsvfs);
4285 			return (error);
4286 		}
4287 		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
4288 		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
4289 		    &target)) {
4290 			VN_RELE(vp);
4291 			VN_RELE(ZTOV(sharedir));
4292 			ZFS_EXIT(zfsvfs);
4293 			nvlist_free(nvlist);
4294 			return (error);
4295 		}
4296 		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
4297 		    kcred, NULL, 0);
4298 		nvlist_free(nvlist);
4299 		break;
4300 
4301 	case ZFS_SMB_ACL_PURGE:
4302 		error = zfs_smb_acl_purge(sharedir);
4303 		break;
4304 
4305 	default:
4306 		error = EINVAL;
4307 		break;
4308 	}
4309 
4310 	VN_RELE(vp);
4311 	VN_RELE(ZTOV(sharedir));
4312 
4313 	ZFS_EXIT(zfsvfs);
4314 
4315 	return (error);
4316 }
4317 
4318 /*
4319  * inputs:
4320  * zc_name		name of filesystem
4321  * zc_value		short name of snap
4322  * zc_string		user-supplied tag for this hold
4323  * zc_cookie		recursive flag
4324  * zc_temphold		set if hold is temporary
4325  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
4326  * zc_sendobj		if non-zero, the objid for zc_name@zc_value
4327  * zc_createtxg		if zc_sendobj is non-zero, snap must have zc_createtxg
4328  *
4329  * outputs:		none
4330  */
4331 static int
4332 zfs_ioc_hold(zfs_cmd_t *zc)
4333 {
4334 	boolean_t recursive = zc->zc_cookie;
4335 	spa_t *spa;
4336 	dsl_pool_t *dp;
4337 	dsl_dataset_t *ds;
4338 	int error;
4339 	minor_t minor = 0;
4340 
4341 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4342 		return (EINVAL);
4343 
4344 	if (zc->zc_sendobj == 0) {
4345 		return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
4346 		    zc->zc_string, recursive, zc->zc_temphold,
4347 		    zc->zc_cleanup_fd));
4348 	}
4349 
4350 	if (recursive)
4351 		return (EINVAL);
4352 
4353 	error = spa_open(zc->zc_name, &spa, FTAG);
4354 	if (error)
4355 		return (error);
4356 
4357 	dp = spa_get_dsl(spa);
4358 	rw_enter(&dp->dp_config_rwlock, RW_READER);
4359 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
4360 	rw_exit(&dp->dp_config_rwlock);
4361 	spa_close(spa, FTAG);
4362 	if (error)
4363 		return (error);
4364 
4365 	/*
4366 	 * Until we have a hold on this snapshot, it's possible that
4367 	 * zc_sendobj could've been destroyed and reused as part
4368 	 * of a later txg.  Make sure we're looking at the right object.
4369 	 */
4370 	if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) {
4371 		dsl_dataset_rele(ds, FTAG);
4372 		return (ENOENT);
4373 	}
4374 
4375 	if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) {
4376 		error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
4377 		if (error) {
4378 			dsl_dataset_rele(ds, FTAG);
4379 			return (error);
4380 		}
4381 	}
4382 
4383 	error = dsl_dataset_user_hold_for_send(ds, zc->zc_string,
4384 	    zc->zc_temphold);
4385 	if (minor != 0) {
4386 		if (error == 0) {
4387 			dsl_register_onexit_hold_cleanup(ds, zc->zc_string,
4388 			    minor);
4389 		}
4390 		zfs_onexit_fd_rele(zc->zc_cleanup_fd);
4391 	}
4392 	dsl_dataset_rele(ds, FTAG);
4393 
4394 	return (error);
4395 }
4396 
4397 /*
4398  * inputs:
4399  * zc_name	name of dataset from which we're releasing a user hold
4400  * zc_value	short name of snap
4401  * zc_string	user-supplied tag for this hold
4402  * zc_cookie	recursive flag
4403  *
4404  * outputs:	none
4405  */
4406 static int
4407 zfs_ioc_release(zfs_cmd_t *zc)
4408 {
4409 	boolean_t recursive = zc->zc_cookie;
4410 
4411 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4412 		return (EINVAL);
4413 
4414 	return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
4415 	    zc->zc_string, recursive));
4416 }
4417 
4418 /*
4419  * inputs:
4420  * zc_name		name of filesystem
4421  *
4422  * outputs:
4423  * zc_nvlist_src{_size}	nvlist of snapshot holds
4424  */
4425 static int
4426 zfs_ioc_get_holds(zfs_cmd_t *zc)
4427 {
4428 	nvlist_t *nvp;
4429 	int error;
4430 
4431 	if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
4432 		error = put_nvlist(zc, nvp);
4433 		nvlist_free(nvp);
4434 	}
4435 
4436 	return (error);
4437 }
4438 
4439 /*
4440  * pool create, destroy, and export don't log the history as part of
4441  * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
4442  * do the logging of those commands.
4443  */
4444 static zfs_ioc_vec_t zfs_ioc_vec[] = {
4445 	{ zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4446 	    B_FALSE },
4447 	{ zfs_ioc_pool_destroy,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4448 	    B_FALSE },
4449 	{ zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4450 	    B_FALSE },
4451 	{ zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4452 	    B_FALSE },
4453 	{ zfs_ioc_pool_configs,	zfs_secpolicy_none, NO_NAME, B_FALSE,
4454 	    B_FALSE },
4455 	{ zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4456 	    B_FALSE },
4457 	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
4458 	    B_FALSE },
4459 	{ zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4460 	    B_TRUE },
4461 	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
4462 	    B_FALSE },
4463 	{ zfs_ioc_pool_upgrade,	zfs_secpolicy_config, POOL_NAME, B_TRUE,
4464 	    B_TRUE },
4465 	{ zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4466 	    B_FALSE },
4467 	{ zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4468 	    B_TRUE },
4469 	{ zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4470 	    B_TRUE },
4471 	{ zfs_ioc_vdev_set_state, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
4472 	    B_FALSE },
4473 	{ zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4474 	    B_TRUE },
4475 	{ zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4476 	    B_TRUE },
4477 	{ zfs_ioc_vdev_setpath,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4478 	    B_TRUE },
4479 	{ zfs_ioc_vdev_setfru,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4480 	    B_TRUE },
4481 	{ zfs_ioc_objset_stats,	zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4482 	    B_TRUE },
4483 	{ zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4484 	    B_FALSE },
4485 	{ zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4486 	    B_TRUE },
4487 	{ zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4488 	    B_TRUE },
4489 	{ zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE, B_TRUE },
4490 	{ zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE, B_TRUE },
4491 	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE,
4492 	    B_TRUE},
4493 	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE,
4494 	    B_TRUE },
4495 	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE, B_TRUE },
4496 	{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, B_TRUE },
4497 	{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE, B_FALSE },
4498 	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE,
4499 	    B_FALSE },
4500 	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4501 	    B_FALSE },
4502 	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4503 	    B_FALSE },
4504 	{ zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE,
4505 	    B_FALSE },
4506 	{ zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, B_FALSE },
4507 	{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
4508 	    B_TRUE },
4509 	{ zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, DATASET_NAME,
4510 	    B_TRUE, B_TRUE },
4511 	{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
4512 	    B_TRUE },
4513 	{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4514 	    B_FALSE },
4515 	{ zfs_ioc_obj_to_path, zfs_secpolicy_config, DATASET_NAME, B_FALSE,
4516 	    B_TRUE },
4517 	{ zfs_ioc_pool_set_props, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
4518 	    B_TRUE },
4519 	{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4520 	    B_FALSE },
4521 	{ zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE,
4522 	    B_TRUE },
4523 	{ zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4524 	    B_FALSE },
4525 	{ zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE, B_FALSE },
4526 	{ zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
4527 	    B_TRUE },
4528 	{ zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
4529 	    B_FALSE },
4530 	{ zfs_ioc_userspace_one, zfs_secpolicy_userspace_one,
4531 	    DATASET_NAME, B_FALSE, B_FALSE },
4532 	{ zfs_ioc_userspace_many, zfs_secpolicy_userspace_many,
4533 	    DATASET_NAME, B_FALSE, B_FALSE },
4534 	{ zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
4535 	    DATASET_NAME, B_FALSE, B_TRUE },
4536 	{ zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, B_TRUE },
4537 	{ zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
4538 	    B_TRUE },
4539 	{ zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4540 	    B_TRUE },
4541 	{ zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4542 	    B_FALSE },
4543 	{ zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4544 	    B_TRUE }
4545 };
4546 
4547 int
4548 pool_status_check(const char *name, zfs_ioc_namecheck_t type)
4549 {
4550 	spa_t *spa;
4551 	int error;
4552 
4553 	ASSERT(type == POOL_NAME || type == DATASET_NAME);
4554 
4555 	error = spa_open(name, &spa, FTAG);
4556 	if (error == 0) {
4557 		if (spa_suspended(spa))
4558 			error = EAGAIN;
4559 		spa_close(spa, FTAG);
4560 	}
4561 	return (error);
4562 }
4563 
4564 /*
4565  * Find a free minor number.
4566  */
4567 minor_t
4568 zfsdev_minor_alloc(void)
4569 {
4570 	static minor_t last_minor;
4571 	minor_t m;
4572 
4573 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
4574 
4575 	for (m = last_minor + 1; m != last_minor; m++) {
4576 		if (m > ZFSDEV_MAX_MINOR)
4577 			m = 1;
4578 		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
4579 			last_minor = m;
4580 			return (m);
4581 		}
4582 	}
4583 
4584 	return (0);
4585 }
4586 
4587 static int
4588 zfs_ctldev_init(dev_t *devp)
4589 {
4590 	minor_t minor;
4591 	zfs_soft_state_t *zs;
4592 
4593 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
4594 	ASSERT(getminor(*devp) == 0);
4595 
4596 	minor = zfsdev_minor_alloc();
4597 	if (minor == 0)
4598 		return (ENXIO);
4599 
4600 	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
4601 		return (EAGAIN);
4602 
4603 	*devp = makedevice(getemajor(*devp), minor);
4604 
4605 	zs = ddi_get_soft_state(zfsdev_state, minor);
4606 	zs->zss_type = ZSST_CTLDEV;
4607 	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
4608 
4609 	return (0);
4610 }
4611 
4612 static void
4613 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
4614 {
4615 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
4616 
4617 	zfs_onexit_destroy(zo);
4618 	ddi_soft_state_free(zfsdev_state, minor);
4619 }
4620 
4621 void *
4622 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
4623 {
4624 	zfs_soft_state_t *zp;
4625 
4626 	zp = ddi_get_soft_state(zfsdev_state, minor);
4627 	if (zp == NULL || zp->zss_type != which)
4628 		return (NULL);
4629 
4630 	return (zp->zss_data);
4631 }
4632 
4633 static int
4634 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
4635 {
4636 	int error = 0;
4637 
4638 	if (getminor(*devp) != 0)
4639 		return (zvol_open(devp, flag, otyp, cr));
4640 
4641 	/* This is the control device. Allocate a new minor if requested. */
4642 	if (flag & FEXCL) {
4643 		mutex_enter(&zfsdev_state_lock);
4644 		error = zfs_ctldev_init(devp);
4645 		mutex_exit(&zfsdev_state_lock);
4646 	}
4647 
4648 	return (error);
4649 }
4650 
4651 static int
4652 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
4653 {
4654 	zfs_onexit_t *zo;
4655 	minor_t minor = getminor(dev);
4656 
4657 	if (minor == 0)
4658 		return (0);
4659 
4660 	mutex_enter(&zfsdev_state_lock);
4661 	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
4662 	if (zo == NULL) {
4663 		mutex_exit(&zfsdev_state_lock);
4664 		return (zvol_close(dev, flag, otyp, cr));
4665 	}
4666 	zfs_ctldev_destroy(zo, minor);
4667 	mutex_exit(&zfsdev_state_lock);
4668 
4669 	return (0);
4670 }
4671 
4672 static int
4673 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
4674 {
4675 	zfs_cmd_t *zc;
4676 	uint_t vec;
4677 	int error, rc;
4678 	minor_t minor = getminor(dev);
4679 
4680 	if (minor != 0 &&
4681 	    zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
4682 		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
4683 
4684 	vec = cmd - ZFS_IOC;
4685 	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
4686 
4687 	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
4688 		return (EINVAL);
4689 
4690 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
4691 
4692 	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
4693 	if (error != 0)
4694 		error = EFAULT;
4695 
4696 	if ((error == 0) && !(flag & FKIOCTL))
4697 		error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr);
4698 
4699 	/*
4700 	 * Ensure that all pool/dataset names are valid before we pass down to
4701 	 * the lower layers.
4702 	 */
4703 	if (error == 0) {
4704 		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4705 		zc->zc_iflags = flag & FKIOCTL;
4706 		switch (zfs_ioc_vec[vec].zvec_namecheck) {
4707 		case POOL_NAME:
4708 			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
4709 				error = EINVAL;
4710 			if (zfs_ioc_vec[vec].zvec_pool_check)
4711 				error = pool_status_check(zc->zc_name,
4712 				    zfs_ioc_vec[vec].zvec_namecheck);
4713 			break;
4714 
4715 		case DATASET_NAME:
4716 			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
4717 				error = EINVAL;
4718 			if (zfs_ioc_vec[vec].zvec_pool_check)
4719 				error = pool_status_check(zc->zc_name,
4720 				    zfs_ioc_vec[vec].zvec_namecheck);
4721 			break;
4722 
4723 		case NO_NAME:
4724 			break;
4725 		}
4726 	}
4727 
4728 	if (error == 0)
4729 		error = zfs_ioc_vec[vec].zvec_func(zc);
4730 
4731 	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
4732 	if (error == 0) {
4733 		if (rc != 0)
4734 			error = EFAULT;
4735 		if (zfs_ioc_vec[vec].zvec_his_log)
4736 			zfs_log_history(zc);
4737 	}
4738 
4739 	kmem_free(zc, sizeof (zfs_cmd_t));
4740 	return (error);
4741 }
4742 
4743 static int
4744 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4745 {
4746 	if (cmd != DDI_ATTACH)
4747 		return (DDI_FAILURE);
4748 
4749 	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
4750 	    DDI_PSEUDO, 0) == DDI_FAILURE)
4751 		return (DDI_FAILURE);
4752 
4753 	zfs_dip = dip;
4754 
4755 	ddi_report_dev(dip);
4756 
4757 	return (DDI_SUCCESS);
4758 }
4759 
4760 static int
4761 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4762 {
4763 	if (spa_busy() || zfs_busy() || zvol_busy())
4764 		return (DDI_FAILURE);
4765 
4766 	if (cmd != DDI_DETACH)
4767 		return (DDI_FAILURE);
4768 
4769 	zfs_dip = NULL;
4770 
4771 	ddi_prop_remove_all(dip);
4772 	ddi_remove_minor_node(dip, NULL);
4773 
4774 	return (DDI_SUCCESS);
4775 }
4776 
4777 /*ARGSUSED*/
4778 static int
4779 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
4780 {
4781 	switch (infocmd) {
4782 	case DDI_INFO_DEVT2DEVINFO:
4783 		*result = zfs_dip;
4784 		return (DDI_SUCCESS);
4785 
4786 	case DDI_INFO_DEVT2INSTANCE:
4787 		*result = (void *)0;
4788 		return (DDI_SUCCESS);
4789 	}
4790 
4791 	return (DDI_FAILURE);
4792 }
4793 
4794 /*
4795  * OK, so this is a little weird.
4796  *
4797  * /dev/zfs is the control node, i.e. minor 0.
4798  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
4799  *
4800  * /dev/zfs has basically nothing to do except serve up ioctls,
4801  * so most of the standard driver entry points are in zvol.c.
4802  */
4803 static struct cb_ops zfs_cb_ops = {
4804 	zfsdev_open,	/* open */
4805 	zfsdev_close,	/* close */
4806 	zvol_strategy,	/* strategy */
4807 	nodev,		/* print */
4808 	zvol_dump,	/* dump */
4809 	zvol_read,	/* read */
4810 	zvol_write,	/* write */
4811 	zfsdev_ioctl,	/* ioctl */
4812 	nodev,		/* devmap */
4813 	nodev,		/* mmap */
4814 	nodev,		/* segmap */
4815 	nochpoll,	/* poll */
4816 	ddi_prop_op,	/* prop_op */
4817 	NULL,		/* streamtab */
4818 	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
4819 	CB_REV,		/* version */
4820 	nodev,		/* async read */
4821 	nodev,		/* async write */
4822 };
4823 
4824 static struct dev_ops zfs_dev_ops = {
4825 	DEVO_REV,	/* version */
4826 	0,		/* refcnt */
4827 	zfs_info,	/* info */
4828 	nulldev,	/* identify */
4829 	nulldev,	/* probe */
4830 	zfs_attach,	/* attach */
4831 	zfs_detach,	/* detach */
4832 	nodev,		/* reset */
4833 	&zfs_cb_ops,	/* driver operations */
4834 	NULL,		/* no bus operations */
4835 	NULL,		/* power */
4836 	ddi_quiesce_not_needed,	/* quiesce */
4837 };
4838 
4839 static struct modldrv zfs_modldrv = {
4840 	&mod_driverops,
4841 	"ZFS storage pool",
4842 	&zfs_dev_ops
4843 };
4844 
4845 static struct modlinkage modlinkage = {
4846 	MODREV_1,
4847 	(void *)&zfs_modlfs,
4848 	(void *)&zfs_modldrv,
4849 	NULL
4850 };
4851 
4852 
4853 uint_t zfs_fsyncer_key;
4854 extern uint_t rrw_tsd_key;
4855 
4856 int
4857 _init(void)
4858 {
4859 	int error;
4860 
4861 	spa_init(FREAD | FWRITE);
4862 	zfs_init();
4863 	zvol_init();
4864 
4865 	if ((error = mod_install(&modlinkage)) != 0) {
4866 		zvol_fini();
4867 		zfs_fini();
4868 		spa_fini();
4869 		return (error);
4870 	}
4871 
4872 	tsd_create(&zfs_fsyncer_key, NULL);
4873 	tsd_create(&rrw_tsd_key, NULL);
4874 
4875 	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
4876 	ASSERT(error == 0);
4877 	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
4878 
4879 	return (0);
4880 }
4881 
4882 int
4883 _fini(void)
4884 {
4885 	int error;
4886 
4887 	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
4888 		return (EBUSY);
4889 
4890 	if ((error = mod_remove(&modlinkage)) != 0)
4891 		return (error);
4892 
4893 	zvol_fini();
4894 	zfs_fini();
4895 	spa_fini();
4896 	if (zfs_nfsshare_inited)
4897 		(void) ddi_modclose(nfs_mod);
4898 	if (zfs_smbshare_inited)
4899 		(void) ddi_modclose(smbsrv_mod);
4900 	if (zfs_nfsshare_inited || zfs_smbshare_inited)
4901 		(void) ddi_modclose(sharefs_mod);
4902 
4903 	tsd_destroy(&zfs_fsyncer_key);
4904 	ldi_ident_release(zfs_li);
4905 	zfs_li = NULL;
4906 	mutex_destroy(&zfs_share_lock);
4907 
4908 	return (error);
4909 }
4910 
4911 int
4912 _info(struct modinfo *modinfop)
4913 {
4914 	return (mod_info(&modlinkage, modinfop));
4915 }
4916