xref: /titanic_50/usr/src/uts/common/fs/zfs/zfs_ioctl.c (revision 80f5ed8169a6e7282d99231044035a818806371c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/types.h>
26 #include <sys/param.h>
27 #include <sys/errno.h>
28 #include <sys/uio.h>
29 #include <sys/buf.h>
30 #include <sys/modctl.h>
31 #include <sys/open.h>
32 #include <sys/file.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/cmn_err.h>
36 #include <sys/stat.h>
37 #include <sys/zfs_ioctl.h>
38 #include <sys/zfs_vfsops.h>
39 #include <sys/zfs_znode.h>
40 #include <sys/zap.h>
41 #include <sys/spa.h>
42 #include <sys/spa_impl.h>
43 #include <sys/vdev.h>
44 #include <sys/priv_impl.h>
45 #include <sys/dmu.h>
46 #include <sys/dsl_dir.h>
47 #include <sys/dsl_dataset.h>
48 #include <sys/dsl_prop.h>
49 #include <sys/dsl_deleg.h>
50 #include <sys/dmu_objset.h>
51 #include <sys/ddi.h>
52 #include <sys/sunddi.h>
53 #include <sys/sunldi.h>
54 #include <sys/policy.h>
55 #include <sys/zone.h>
56 #include <sys/nvpair.h>
57 #include <sys/pathname.h>
58 #include <sys/mount.h>
59 #include <sys/sdt.h>
60 #include <sys/fs/zfs.h>
61 #include <sys/zfs_ctldir.h>
62 #include <sys/zfs_dir.h>
63 #include <sys/zfs_onexit.h>
64 #include <sys/zvol.h>
65 #include <sys/dsl_scan.h>
66 #include <sharefs/share.h>
67 #include <sys/dmu_objset.h>
68 
69 #include "zfs_namecheck.h"
70 #include "zfs_prop.h"
71 #include "zfs_deleg.h"
72 #include "zfs_comutil.h"
73 
74 extern struct modlfs zfs_modlfs;
75 
76 extern void zfs_init(void);
77 extern void zfs_fini(void);
78 
79 ldi_ident_t zfs_li = NULL;
80 dev_info_t *zfs_dip;
81 
82 typedef int zfs_ioc_func_t(zfs_cmd_t *);
83 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
84 
85 typedef enum {
86 	NO_NAME,
87 	POOL_NAME,
88 	DATASET_NAME
89 } zfs_ioc_namecheck_t;
90 
91 typedef struct zfs_ioc_vec {
92 	zfs_ioc_func_t		*zvec_func;
93 	zfs_secpolicy_func_t	*zvec_secpolicy;
94 	zfs_ioc_namecheck_t	zvec_namecheck;
95 	boolean_t		zvec_his_log;
96 	boolean_t		zvec_pool_check;
97 } zfs_ioc_vec_t;
98 
99 /* This array is indexed by zfs_userquota_prop_t */
100 static const char *userquota_perms[] = {
101 	ZFS_DELEG_PERM_USERUSED,
102 	ZFS_DELEG_PERM_USERQUOTA,
103 	ZFS_DELEG_PERM_GROUPUSED,
104 	ZFS_DELEG_PERM_GROUPQUOTA,
105 };
106 
107 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
108 static int zfs_check_settable(const char *name, nvpair_t *property,
109     cred_t *cr);
110 static int zfs_check_clearable(char *dataset, nvlist_t *props,
111     nvlist_t **errors);
112 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
113     boolean_t *);
114 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **);
115 
116 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
117 void
118 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
119 {
120 	const char *newfile;
121 	char buf[512];
122 	va_list adx;
123 
124 	/*
125 	 * Get rid of annoying "../common/" prefix to filename.
126 	 */
127 	newfile = strrchr(file, '/');
128 	if (newfile != NULL) {
129 		newfile = newfile + 1; /* Get rid of leading / */
130 	} else {
131 		newfile = file;
132 	}
133 
134 	va_start(adx, fmt);
135 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
136 	va_end(adx);
137 
138 	/*
139 	 * To get this data, use the zfs-dprintf probe as so:
140 	 * dtrace -q -n 'zfs-dprintf \
141 	 *	/stringof(arg0) == "dbuf.c"/ \
142 	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
143 	 * arg0 = file name
144 	 * arg1 = function name
145 	 * arg2 = line number
146 	 * arg3 = message
147 	 */
148 	DTRACE_PROBE4(zfs__dprintf,
149 	    char *, newfile, char *, func, int, line, char *, buf);
150 }
151 
152 static void
153 history_str_free(char *buf)
154 {
155 	kmem_free(buf, HIS_MAX_RECORD_LEN);
156 }
157 
158 static char *
159 history_str_get(zfs_cmd_t *zc)
160 {
161 	char *buf;
162 
163 	if (zc->zc_history == NULL)
164 		return (NULL);
165 
166 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
167 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
168 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
169 		history_str_free(buf);
170 		return (NULL);
171 	}
172 
173 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
174 
175 	return (buf);
176 }
177 
178 /*
179  * Check to see if the named dataset is currently defined as bootable
180  */
181 static boolean_t
182 zfs_is_bootfs(const char *name)
183 {
184 	objset_t *os;
185 
186 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
187 		boolean_t ret;
188 		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
189 		dmu_objset_rele(os, FTAG);
190 		return (ret);
191 	}
192 	return (B_FALSE);
193 }
194 
195 /*
196  * zfs_earlier_version
197  *
198  *	Return non-zero if the spa version is less than requested version.
199  */
200 static int
201 zfs_earlier_version(const char *name, int version)
202 {
203 	spa_t *spa;
204 
205 	if (spa_open(name, &spa, FTAG) == 0) {
206 		if (spa_version(spa) < version) {
207 			spa_close(spa, FTAG);
208 			return (1);
209 		}
210 		spa_close(spa, FTAG);
211 	}
212 	return (0);
213 }
214 
215 /*
216  * zpl_earlier_version
217  *
218  * Return TRUE if the ZPL version is less than requested version.
219  */
220 static boolean_t
221 zpl_earlier_version(const char *name, int version)
222 {
223 	objset_t *os;
224 	boolean_t rc = B_TRUE;
225 
226 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
227 		uint64_t zplversion;
228 
229 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
230 			dmu_objset_rele(os, FTAG);
231 			return (B_TRUE);
232 		}
233 		/* XXX reading from non-owned objset */
234 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
235 			rc = zplversion < version;
236 		dmu_objset_rele(os, FTAG);
237 	}
238 	return (rc);
239 }
240 
241 static void
242 zfs_log_history(zfs_cmd_t *zc)
243 {
244 	spa_t *spa;
245 	char *buf;
246 
247 	if ((buf = history_str_get(zc)) == NULL)
248 		return;
249 
250 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
251 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
252 			(void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
253 		spa_close(spa, FTAG);
254 	}
255 	history_str_free(buf);
256 }
257 
258 /*
259  * Policy for top-level read operations (list pools).  Requires no privileges,
260  * and can be used in the local zone, as there is no associated dataset.
261  */
262 /* ARGSUSED */
263 static int
264 zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
265 {
266 	return (0);
267 }
268 
269 /*
270  * Policy for dataset read operations (list children, get statistics).  Requires
271  * no privileges, but must be visible in the local zone.
272  */
273 /* ARGSUSED */
274 static int
275 zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
276 {
277 	if (INGLOBALZONE(curproc) ||
278 	    zone_dataset_visible(zc->zc_name, NULL))
279 		return (0);
280 
281 	return (ENOENT);
282 }
283 
284 static int
285 zfs_dozonecheck(const char *dataset, cred_t *cr)
286 {
287 	uint64_t zoned;
288 	int writable = 1;
289 
290 	/*
291 	 * The dataset must be visible by this zone -- check this first
292 	 * so they don't see EPERM on something they shouldn't know about.
293 	 */
294 	if (!INGLOBALZONE(curproc) &&
295 	    !zone_dataset_visible(dataset, &writable))
296 		return (ENOENT);
297 
298 	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
299 		return (ENOENT);
300 
301 	if (INGLOBALZONE(curproc)) {
302 		/*
303 		 * If the fs is zoned, only root can access it from the
304 		 * global zone.
305 		 */
306 		if (secpolicy_zfs(cr) && zoned)
307 			return (EPERM);
308 	} else {
309 		/*
310 		 * If we are in a local zone, the 'zoned' property must be set.
311 		 */
312 		if (!zoned)
313 			return (EPERM);
314 
315 		/* must be writable by this zone */
316 		if (!writable)
317 			return (EPERM);
318 	}
319 	return (0);
320 }
321 
322 int
323 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
324 {
325 	int error;
326 
327 	error = zfs_dozonecheck(name, cr);
328 	if (error == 0) {
329 		error = secpolicy_zfs(cr);
330 		if (error)
331 			error = dsl_deleg_access(name, perm, cr);
332 	}
333 	return (error);
334 }
335 
336 /*
337  * Policy for setting the security label property.
338  *
339  * Returns 0 for success, non-zero for access and other errors.
340  */
341 static int
342 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
343 {
344 	char		ds_hexsl[MAXNAMELEN];
345 	bslabel_t	ds_sl, new_sl;
346 	boolean_t	new_default = FALSE;
347 	uint64_t	zoned;
348 	int		needed_priv = -1;
349 	int		error;
350 
351 	/* First get the existing dataset label. */
352 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
353 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
354 	if (error)
355 		return (EPERM);
356 
357 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
358 		new_default = TRUE;
359 
360 	/* The label must be translatable */
361 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
362 		return (EINVAL);
363 
364 	/*
365 	 * In a non-global zone, disallow attempts to set a label that
366 	 * doesn't match that of the zone; otherwise no other checks
367 	 * are needed.
368 	 */
369 	if (!INGLOBALZONE(curproc)) {
370 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
371 			return (EPERM);
372 		return (0);
373 	}
374 
375 	/*
376 	 * For global-zone datasets (i.e., those whose zoned property is
377 	 * "off", verify that the specified new label is valid for the
378 	 * global zone.
379 	 */
380 	if (dsl_prop_get_integer(name,
381 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
382 		return (EPERM);
383 	if (!zoned) {
384 		if (zfs_check_global_label(name, strval) != 0)
385 			return (EPERM);
386 	}
387 
388 	/*
389 	 * If the existing dataset label is nondefault, check if the
390 	 * dataset is mounted (label cannot be changed while mounted).
391 	 * Get the zfsvfs; if there isn't one, then the dataset isn't
392 	 * mounted (or isn't a dataset, doesn't exist, ...).
393 	 */
394 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
395 		objset_t *os;
396 		static char *setsl_tag = "setsl_tag";
397 
398 		/*
399 		 * Try to own the dataset; abort if there is any error,
400 		 * (e.g., already mounted, in use, or other error).
401 		 */
402 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
403 		    setsl_tag, &os);
404 		if (error)
405 			return (EPERM);
406 
407 		dmu_objset_disown(os, setsl_tag);
408 
409 		if (new_default) {
410 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
411 			goto out_check;
412 		}
413 
414 		if (hexstr_to_label(strval, &new_sl) != 0)
415 			return (EPERM);
416 
417 		if (blstrictdom(&ds_sl, &new_sl))
418 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
419 		else if (blstrictdom(&new_sl, &ds_sl))
420 			needed_priv = PRIV_FILE_UPGRADE_SL;
421 	} else {
422 		/* dataset currently has a default label */
423 		if (!new_default)
424 			needed_priv = PRIV_FILE_UPGRADE_SL;
425 	}
426 
427 out_check:
428 	if (needed_priv != -1)
429 		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
430 	return (0);
431 }
432 
433 static int
434 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
435     cred_t *cr)
436 {
437 	char *strval;
438 
439 	/*
440 	 * Check permissions for special properties.
441 	 */
442 	switch (prop) {
443 	case ZFS_PROP_ZONED:
444 		/*
445 		 * Disallow setting of 'zoned' from within a local zone.
446 		 */
447 		if (!INGLOBALZONE(curproc))
448 			return (EPERM);
449 		break;
450 
451 	case ZFS_PROP_QUOTA:
452 		if (!INGLOBALZONE(curproc)) {
453 			uint64_t zoned;
454 			char setpoint[MAXNAMELEN];
455 			/*
456 			 * Unprivileged users are allowed to modify the
457 			 * quota on things *under* (ie. contained by)
458 			 * the thing they own.
459 			 */
460 			if (dsl_prop_get_integer(dsname, "zoned", &zoned,
461 			    setpoint))
462 				return (EPERM);
463 			if (!zoned || strlen(dsname) <= strlen(setpoint))
464 				return (EPERM);
465 		}
466 		break;
467 
468 	case ZFS_PROP_MLSLABEL:
469 		if (!is_system_labeled())
470 			return (EPERM);
471 
472 		if (nvpair_value_string(propval, &strval) == 0) {
473 			int err;
474 
475 			err = zfs_set_slabel_policy(dsname, strval, CRED());
476 			if (err != 0)
477 				return (err);
478 		}
479 		break;
480 	}
481 
482 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
483 }
484 
485 int
486 zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
487 {
488 	int error;
489 
490 	error = zfs_dozonecheck(zc->zc_name, cr);
491 	if (error)
492 		return (error);
493 
494 	/*
495 	 * permission to set permissions will be evaluated later in
496 	 * dsl_deleg_can_allow()
497 	 */
498 	return (0);
499 }
500 
501 int
502 zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
503 {
504 	return (zfs_secpolicy_write_perms(zc->zc_name,
505 	    ZFS_DELEG_PERM_ROLLBACK, cr));
506 }
507 
508 int
509 zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
510 {
511 	return (zfs_secpolicy_write_perms(zc->zc_name,
512 	    ZFS_DELEG_PERM_SEND, cr));
513 }
514 
515 static int
516 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
517 {
518 	vnode_t *vp;
519 	int error;
520 
521 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
522 	    NO_FOLLOW, NULL, &vp)) != 0)
523 		return (error);
524 
525 	/* Now make sure mntpnt and dataset are ZFS */
526 
527 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
528 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
529 	    zc->zc_name) != 0)) {
530 		VN_RELE(vp);
531 		return (EPERM);
532 	}
533 
534 	VN_RELE(vp);
535 	return (dsl_deleg_access(zc->zc_name,
536 	    ZFS_DELEG_PERM_SHARE, cr));
537 }
538 
539 int
540 zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
541 {
542 	if (!INGLOBALZONE(curproc))
543 		return (EPERM);
544 
545 	if (secpolicy_nfs(cr) == 0) {
546 		return (0);
547 	} else {
548 		return (zfs_secpolicy_deleg_share(zc, cr));
549 	}
550 }
551 
552 int
553 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
554 {
555 	if (!INGLOBALZONE(curproc))
556 		return (EPERM);
557 
558 	if (secpolicy_smb(cr) == 0) {
559 		return (0);
560 	} else {
561 		return (zfs_secpolicy_deleg_share(zc, cr));
562 	}
563 }
564 
565 static int
566 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
567 {
568 	char *cp;
569 
570 	/*
571 	 * Remove the @bla or /bla from the end of the name to get the parent.
572 	 */
573 	(void) strncpy(parent, datasetname, parentsize);
574 	cp = strrchr(parent, '@');
575 	if (cp != NULL) {
576 		cp[0] = '\0';
577 	} else {
578 		cp = strrchr(parent, '/');
579 		if (cp == NULL)
580 			return (ENOENT);
581 		cp[0] = '\0';
582 	}
583 
584 	return (0);
585 }
586 
587 int
588 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
589 {
590 	int error;
591 
592 	if ((error = zfs_secpolicy_write_perms(name,
593 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
594 		return (error);
595 
596 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
597 }
598 
599 static int
600 zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
601 {
602 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
603 }
604 
605 /*
606  * Destroying snapshots with delegated permissions requires
607  * descendent mount and destroy permissions.
608  * Reassemble the full filesystem@snap name so dsl_deleg_access()
609  * can do the correct permission check.
610  *
611  * Since this routine is used when doing a recursive destroy of snapshots
612  * and destroying snapshots requires descendent permissions, a successfull
613  * check of the top level snapshot applies to snapshots of all descendent
614  * datasets as well.
615  */
616 static int
617 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr)
618 {
619 	int error;
620 	char *dsname;
621 
622 	dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value);
623 
624 	error = zfs_secpolicy_destroy_perms(dsname, cr);
625 
626 	strfree(dsname);
627 	return (error);
628 }
629 
630 int
631 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
632 {
633 	char	parentname[MAXNAMELEN];
634 	int	error;
635 
636 	if ((error = zfs_secpolicy_write_perms(from,
637 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
638 		return (error);
639 
640 	if ((error = zfs_secpolicy_write_perms(from,
641 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
642 		return (error);
643 
644 	if ((error = zfs_get_parent(to, parentname,
645 	    sizeof (parentname))) != 0)
646 		return (error);
647 
648 	if ((error = zfs_secpolicy_write_perms(parentname,
649 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
650 		return (error);
651 
652 	if ((error = zfs_secpolicy_write_perms(parentname,
653 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
654 		return (error);
655 
656 	return (error);
657 }
658 
659 static int
660 zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
661 {
662 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
663 }
664 
665 static int
666 zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
667 {
668 	char	parentname[MAXNAMELEN];
669 	objset_t *clone;
670 	int error;
671 
672 	error = zfs_secpolicy_write_perms(zc->zc_name,
673 	    ZFS_DELEG_PERM_PROMOTE, cr);
674 	if (error)
675 		return (error);
676 
677 	error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
678 
679 	if (error == 0) {
680 		dsl_dataset_t *pclone = NULL;
681 		dsl_dir_t *dd;
682 		dd = clone->os_dsl_dataset->ds_dir;
683 
684 		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
685 		error = dsl_dataset_hold_obj(dd->dd_pool,
686 		    dd->dd_phys->dd_origin_obj, FTAG, &pclone);
687 		rw_exit(&dd->dd_pool->dp_config_rwlock);
688 		if (error) {
689 			dmu_objset_rele(clone, FTAG);
690 			return (error);
691 		}
692 
693 		error = zfs_secpolicy_write_perms(zc->zc_name,
694 		    ZFS_DELEG_PERM_MOUNT, cr);
695 
696 		dsl_dataset_name(pclone, parentname);
697 		dmu_objset_rele(clone, FTAG);
698 		dsl_dataset_rele(pclone, FTAG);
699 		if (error == 0)
700 			error = zfs_secpolicy_write_perms(parentname,
701 			    ZFS_DELEG_PERM_PROMOTE, cr);
702 	}
703 	return (error);
704 }
705 
706 static int
707 zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
708 {
709 	int error;
710 
711 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
712 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
713 		return (error);
714 
715 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
716 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
717 		return (error);
718 
719 	return (zfs_secpolicy_write_perms(zc->zc_name,
720 	    ZFS_DELEG_PERM_CREATE, cr));
721 }
722 
723 int
724 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
725 {
726 	return (zfs_secpolicy_write_perms(name,
727 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
728 }
729 
730 static int
731 zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
732 {
733 
734 	return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
735 }
736 
737 static int
738 zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
739 {
740 	char	parentname[MAXNAMELEN];
741 	int	error;
742 
743 	if ((error = zfs_get_parent(zc->zc_name, parentname,
744 	    sizeof (parentname))) != 0)
745 		return (error);
746 
747 	if (zc->zc_value[0] != '\0') {
748 		if ((error = zfs_secpolicy_write_perms(zc->zc_value,
749 		    ZFS_DELEG_PERM_CLONE, cr)) != 0)
750 			return (error);
751 	}
752 
753 	if ((error = zfs_secpolicy_write_perms(parentname,
754 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
755 		return (error);
756 
757 	error = zfs_secpolicy_write_perms(parentname,
758 	    ZFS_DELEG_PERM_MOUNT, cr);
759 
760 	return (error);
761 }
762 
763 static int
764 zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
765 {
766 	int error;
767 
768 	error = secpolicy_fs_unmount(cr, NULL);
769 	if (error) {
770 		error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
771 	}
772 	return (error);
773 }
774 
775 /*
776  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
777  * SYS_CONFIG privilege, which is not available in a local zone.
778  */
779 /* ARGSUSED */
780 static int
781 zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
782 {
783 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
784 		return (EPERM);
785 
786 	return (0);
787 }
788 
789 /*
790  * Policy for fault injection.  Requires all privileges.
791  */
792 /* ARGSUSED */
793 static int
794 zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
795 {
796 	return (secpolicy_zinject(cr));
797 }
798 
799 static int
800 zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
801 {
802 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
803 
804 	if (prop == ZPROP_INVAL) {
805 		if (!zfs_prop_user(zc->zc_value))
806 			return (EINVAL);
807 		return (zfs_secpolicy_write_perms(zc->zc_name,
808 		    ZFS_DELEG_PERM_USERPROP, cr));
809 	} else {
810 		return (zfs_secpolicy_setprop(zc->zc_name, prop,
811 		    NULL, cr));
812 	}
813 }
814 
815 static int
816 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
817 {
818 	int err = zfs_secpolicy_read(zc, cr);
819 	if (err)
820 		return (err);
821 
822 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
823 		return (EINVAL);
824 
825 	if (zc->zc_value[0] == 0) {
826 		/*
827 		 * They are asking about a posix uid/gid.  If it's
828 		 * themself, allow it.
829 		 */
830 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
831 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
832 			if (zc->zc_guid == crgetuid(cr))
833 				return (0);
834 		} else {
835 			if (groupmember(zc->zc_guid, cr))
836 				return (0);
837 		}
838 	}
839 
840 	return (zfs_secpolicy_write_perms(zc->zc_name,
841 	    userquota_perms[zc->zc_objset_type], cr));
842 }
843 
844 static int
845 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
846 {
847 	int err = zfs_secpolicy_read(zc, cr);
848 	if (err)
849 		return (err);
850 
851 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
852 		return (EINVAL);
853 
854 	return (zfs_secpolicy_write_perms(zc->zc_name,
855 	    userquota_perms[zc->zc_objset_type], cr));
856 }
857 
858 static int
859 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
860 {
861 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
862 	    NULL, cr));
863 }
864 
865 static int
866 zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
867 {
868 	return (zfs_secpolicy_write_perms(zc->zc_name,
869 	    ZFS_DELEG_PERM_HOLD, cr));
870 }
871 
872 static int
873 zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
874 {
875 	return (zfs_secpolicy_write_perms(zc->zc_name,
876 	    ZFS_DELEG_PERM_RELEASE, cr));
877 }
878 
879 /*
880  * Returns the nvlist as specified by the user in the zfs_cmd_t.
881  */
882 static int
883 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
884 {
885 	char *packed;
886 	int error;
887 	nvlist_t *list = NULL;
888 
889 	/*
890 	 * Read in and unpack the user-supplied nvlist.
891 	 */
892 	if (size == 0)
893 		return (EINVAL);
894 
895 	packed = kmem_alloc(size, KM_SLEEP);
896 
897 	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
898 	    iflag)) != 0) {
899 		kmem_free(packed, size);
900 		return (error);
901 	}
902 
903 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
904 		kmem_free(packed, size);
905 		return (error);
906 	}
907 
908 	kmem_free(packed, size);
909 
910 	*nvp = list;
911 	return (0);
912 }
913 
914 static int
915 fit_error_list(zfs_cmd_t *zc, nvlist_t **errors)
916 {
917 	size_t size;
918 
919 	VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
920 
921 	if (size > zc->zc_nvlist_dst_size) {
922 		nvpair_t *more_errors;
923 		int n = 0;
924 
925 		if (zc->zc_nvlist_dst_size < 1024)
926 			return (ENOMEM);
927 
928 		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0);
929 		more_errors = nvlist_prev_nvpair(*errors, NULL);
930 
931 		do {
932 			nvpair_t *pair = nvlist_prev_nvpair(*errors,
933 			    more_errors);
934 			VERIFY(nvlist_remove_nvpair(*errors, pair) == 0);
935 			n++;
936 			VERIFY(nvlist_size(*errors, &size,
937 			    NV_ENCODE_NATIVE) == 0);
938 		} while (size > zc->zc_nvlist_dst_size);
939 
940 		VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0);
941 		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0);
942 		ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
943 		ASSERT(size <= zc->zc_nvlist_dst_size);
944 	}
945 
946 	return (0);
947 }
948 
949 static int
950 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
951 {
952 	char *packed = NULL;
953 	int error = 0;
954 	size_t size;
955 
956 	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
957 
958 	if (size > zc->zc_nvlist_dst_size) {
959 		error = ENOMEM;
960 	} else {
961 		packed = kmem_alloc(size, KM_SLEEP);
962 		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
963 		    KM_SLEEP) == 0);
964 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
965 		    size, zc->zc_iflags) != 0)
966 			error = EFAULT;
967 		kmem_free(packed, size);
968 	}
969 
970 	zc->zc_nvlist_dst_size = size;
971 	return (error);
972 }
973 
974 static int
975 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
976 {
977 	objset_t *os;
978 	int error;
979 
980 	error = dmu_objset_hold(dsname, FTAG, &os);
981 	if (error)
982 		return (error);
983 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
984 		dmu_objset_rele(os, FTAG);
985 		return (EINVAL);
986 	}
987 
988 	mutex_enter(&os->os_user_ptr_lock);
989 	*zfvp = dmu_objset_get_user(os);
990 	if (*zfvp) {
991 		VFS_HOLD((*zfvp)->z_vfs);
992 	} else {
993 		error = ESRCH;
994 	}
995 	mutex_exit(&os->os_user_ptr_lock);
996 	dmu_objset_rele(os, FTAG);
997 	return (error);
998 }
999 
1000 /*
1001  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1002  * case its z_vfs will be NULL, and it will be opened as the owner.
1003  */
1004 static int
1005 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1006 {
1007 	int error = 0;
1008 
1009 	if (getzfsvfs(name, zfvp) != 0)
1010 		error = zfsvfs_create(name, zfvp);
1011 	if (error == 0) {
1012 		rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1013 		    RW_READER, tag);
1014 		if ((*zfvp)->z_unmounted) {
1015 			/*
1016 			 * XXX we could probably try again, since the unmounting
1017 			 * thread should be just about to disassociate the
1018 			 * objset from the zfsvfs.
1019 			 */
1020 			rrw_exit(&(*zfvp)->z_teardown_lock, tag);
1021 			return (EBUSY);
1022 		}
1023 	}
1024 	return (error);
1025 }
1026 
1027 static void
1028 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1029 {
1030 	rrw_exit(&zfsvfs->z_teardown_lock, tag);
1031 
1032 	if (zfsvfs->z_vfs) {
1033 		VFS_RELE(zfsvfs->z_vfs);
1034 	} else {
1035 		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1036 		zfsvfs_free(zfsvfs);
1037 	}
1038 }
1039 
1040 static int
1041 zfs_ioc_pool_create(zfs_cmd_t *zc)
1042 {
1043 	int error;
1044 	nvlist_t *config, *props = NULL;
1045 	nvlist_t *rootprops = NULL;
1046 	nvlist_t *zplprops = NULL;
1047 	char *buf;
1048 
1049 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1050 	    zc->zc_iflags, &config))
1051 		return (error);
1052 
1053 	if (zc->zc_nvlist_src_size != 0 && (error =
1054 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1055 	    zc->zc_iflags, &props))) {
1056 		nvlist_free(config);
1057 		return (error);
1058 	}
1059 
1060 	if (props) {
1061 		nvlist_t *nvl = NULL;
1062 		uint64_t version = SPA_VERSION;
1063 
1064 		(void) nvlist_lookup_uint64(props,
1065 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1066 		if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) {
1067 			error = EINVAL;
1068 			goto pool_props_bad;
1069 		}
1070 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1071 		if (nvl) {
1072 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1073 			if (error != 0) {
1074 				nvlist_free(config);
1075 				nvlist_free(props);
1076 				return (error);
1077 			}
1078 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1079 		}
1080 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1081 		error = zfs_fill_zplprops_root(version, rootprops,
1082 		    zplprops, NULL);
1083 		if (error)
1084 			goto pool_props_bad;
1085 	}
1086 
1087 	buf = history_str_get(zc);
1088 
1089 	error = spa_create(zc->zc_name, config, props, buf, zplprops);
1090 
1091 	/*
1092 	 * Set the remaining root properties
1093 	 */
1094 	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1095 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1096 		(void) spa_destroy(zc->zc_name);
1097 
1098 	if (buf != NULL)
1099 		history_str_free(buf);
1100 
1101 pool_props_bad:
1102 	nvlist_free(rootprops);
1103 	nvlist_free(zplprops);
1104 	nvlist_free(config);
1105 	nvlist_free(props);
1106 
1107 	return (error);
1108 }
1109 
1110 static int
1111 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1112 {
1113 	int error;
1114 	zfs_log_history(zc);
1115 	error = spa_destroy(zc->zc_name);
1116 	if (error == 0)
1117 		zvol_remove_minors(zc->zc_name);
1118 	return (error);
1119 }
1120 
1121 static int
1122 zfs_ioc_pool_import(zfs_cmd_t *zc)
1123 {
1124 	nvlist_t *config, *props = NULL;
1125 	uint64_t guid;
1126 	int error;
1127 
1128 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1129 	    zc->zc_iflags, &config)) != 0)
1130 		return (error);
1131 
1132 	if (zc->zc_nvlist_src_size != 0 && (error =
1133 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1134 	    zc->zc_iflags, &props))) {
1135 		nvlist_free(config);
1136 		return (error);
1137 	}
1138 
1139 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1140 	    guid != zc->zc_guid)
1141 		error = EINVAL;
1142 	else if (zc->zc_cookie)
1143 		error = spa_import_verbatim(zc->zc_name, config, props);
1144 	else
1145 		error = spa_import(zc->zc_name, config, props);
1146 
1147 	if (zc->zc_nvlist_dst != 0)
1148 		(void) put_nvlist(zc, config);
1149 
1150 	nvlist_free(config);
1151 
1152 	if (props)
1153 		nvlist_free(props);
1154 
1155 	return (error);
1156 }
1157 
1158 static int
1159 zfs_ioc_pool_export(zfs_cmd_t *zc)
1160 {
1161 	int error;
1162 	boolean_t force = (boolean_t)zc->zc_cookie;
1163 	boolean_t hardforce = (boolean_t)zc->zc_guid;
1164 
1165 	zfs_log_history(zc);
1166 	error = spa_export(zc->zc_name, NULL, force, hardforce);
1167 	if (error == 0)
1168 		zvol_remove_minors(zc->zc_name);
1169 	return (error);
1170 }
1171 
1172 static int
1173 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1174 {
1175 	nvlist_t *configs;
1176 	int error;
1177 
1178 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1179 		return (EEXIST);
1180 
1181 	error = put_nvlist(zc, configs);
1182 
1183 	nvlist_free(configs);
1184 
1185 	return (error);
1186 }
1187 
1188 static int
1189 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1190 {
1191 	nvlist_t *config;
1192 	int error;
1193 	int ret = 0;
1194 
1195 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1196 	    sizeof (zc->zc_value));
1197 
1198 	if (config != NULL) {
1199 		ret = put_nvlist(zc, config);
1200 		nvlist_free(config);
1201 
1202 		/*
1203 		 * The config may be present even if 'error' is non-zero.
1204 		 * In this case we return success, and preserve the real errno
1205 		 * in 'zc_cookie'.
1206 		 */
1207 		zc->zc_cookie = error;
1208 	} else {
1209 		ret = error;
1210 	}
1211 
1212 	return (ret);
1213 }
1214 
1215 /*
1216  * Try to import the given pool, returning pool stats as appropriate so that
1217  * user land knows which devices are available and overall pool health.
1218  */
1219 static int
1220 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1221 {
1222 	nvlist_t *tryconfig, *config;
1223 	int error;
1224 
1225 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1226 	    zc->zc_iflags, &tryconfig)) != 0)
1227 		return (error);
1228 
1229 	config = spa_tryimport(tryconfig);
1230 
1231 	nvlist_free(tryconfig);
1232 
1233 	if (config == NULL)
1234 		return (EINVAL);
1235 
1236 	error = put_nvlist(zc, config);
1237 	nvlist_free(config);
1238 
1239 	return (error);
1240 }
1241 
1242 /*
1243  * inputs:
1244  * zc_name              name of the pool
1245  * zc_cookie            scan func (pool_scan_func_t)
1246  */
1247 static int
1248 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1249 {
1250 	spa_t *spa;
1251 	int error;
1252 
1253 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1254 		return (error);
1255 
1256 	if (zc->zc_cookie == POOL_SCAN_NONE)
1257 		error = spa_scan_stop(spa);
1258 	else
1259 		error = spa_scan(spa, zc->zc_cookie);
1260 
1261 	spa_close(spa, FTAG);
1262 
1263 	return (error);
1264 }
1265 
1266 static int
1267 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1268 {
1269 	spa_t *spa;
1270 	int error;
1271 
1272 	error = spa_open(zc->zc_name, &spa, FTAG);
1273 	if (error == 0) {
1274 		spa_freeze(spa);
1275 		spa_close(spa, FTAG);
1276 	}
1277 	return (error);
1278 }
1279 
1280 static int
1281 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1282 {
1283 	spa_t *spa;
1284 	int error;
1285 
1286 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1287 		return (error);
1288 
1289 	if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
1290 		spa_close(spa, FTAG);
1291 		return (EINVAL);
1292 	}
1293 
1294 	spa_upgrade(spa, zc->zc_cookie);
1295 	spa_close(spa, FTAG);
1296 
1297 	return (error);
1298 }
1299 
1300 static int
1301 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1302 {
1303 	spa_t *spa;
1304 	char *hist_buf;
1305 	uint64_t size;
1306 	int error;
1307 
1308 	if ((size = zc->zc_history_len) == 0)
1309 		return (EINVAL);
1310 
1311 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1312 		return (error);
1313 
1314 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1315 		spa_close(spa, FTAG);
1316 		return (ENOTSUP);
1317 	}
1318 
1319 	hist_buf = kmem_alloc(size, KM_SLEEP);
1320 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1321 	    &zc->zc_history_len, hist_buf)) == 0) {
1322 		error = ddi_copyout(hist_buf,
1323 		    (void *)(uintptr_t)zc->zc_history,
1324 		    zc->zc_history_len, zc->zc_iflags);
1325 	}
1326 
1327 	spa_close(spa, FTAG);
1328 	kmem_free(hist_buf, size);
1329 	return (error);
1330 }
1331 
1332 static int
1333 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1334 {
1335 	int error;
1336 
1337 	if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
1338 		return (error);
1339 
1340 	return (0);
1341 }
1342 
1343 /*
1344  * inputs:
1345  * zc_name		name of filesystem
1346  * zc_obj		object to find
1347  *
1348  * outputs:
1349  * zc_value		name of object
1350  */
1351 static int
1352 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1353 {
1354 	objset_t *os;
1355 	int error;
1356 
1357 	/* XXX reading from objset not owned */
1358 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1359 		return (error);
1360 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1361 		dmu_objset_rele(os, FTAG);
1362 		return (EINVAL);
1363 	}
1364 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1365 	    sizeof (zc->zc_value));
1366 	dmu_objset_rele(os, FTAG);
1367 
1368 	return (error);
1369 }
1370 
1371 static int
1372 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1373 {
1374 	spa_t *spa;
1375 	int error;
1376 	nvlist_t *config, **l2cache, **spares;
1377 	uint_t nl2cache = 0, nspares = 0;
1378 
1379 	error = spa_open(zc->zc_name, &spa, FTAG);
1380 	if (error != 0)
1381 		return (error);
1382 
1383 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1384 	    zc->zc_iflags, &config);
1385 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1386 	    &l2cache, &nl2cache);
1387 
1388 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1389 	    &spares, &nspares);
1390 
1391 	/*
1392 	 * A root pool with concatenated devices is not supported.
1393 	 * Thus, can not add a device to a root pool.
1394 	 *
1395 	 * Intent log device can not be added to a rootpool because
1396 	 * during mountroot, zil is replayed, a seperated log device
1397 	 * can not be accessed during the mountroot time.
1398 	 *
1399 	 * l2cache and spare devices are ok to be added to a rootpool.
1400 	 */
1401 	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1402 		nvlist_free(config);
1403 		spa_close(spa, FTAG);
1404 		return (EDOM);
1405 	}
1406 
1407 	if (error == 0) {
1408 		error = spa_vdev_add(spa, config);
1409 		nvlist_free(config);
1410 	}
1411 	spa_close(spa, FTAG);
1412 	return (error);
1413 }
1414 
1415 /*
1416  * inputs:
1417  * zc_name		name of the pool
1418  * zc_nvlist_conf	nvlist of devices to remove
1419  * zc_cookie		to stop the remove?
1420  */
1421 static int
1422 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1423 {
1424 	spa_t *spa;
1425 	int error;
1426 
1427 	error = spa_open(zc->zc_name, &spa, FTAG);
1428 	if (error != 0)
1429 		return (error);
1430 	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1431 	spa_close(spa, FTAG);
1432 	return (error);
1433 }
1434 
1435 static int
1436 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1437 {
1438 	spa_t *spa;
1439 	int error;
1440 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1441 
1442 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1443 		return (error);
1444 	switch (zc->zc_cookie) {
1445 	case VDEV_STATE_ONLINE:
1446 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1447 		break;
1448 
1449 	case VDEV_STATE_OFFLINE:
1450 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1451 		break;
1452 
1453 	case VDEV_STATE_FAULTED:
1454 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1455 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1456 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1457 
1458 		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1459 		break;
1460 
1461 	case VDEV_STATE_DEGRADED:
1462 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1463 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1464 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1465 
1466 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1467 		break;
1468 
1469 	default:
1470 		error = EINVAL;
1471 	}
1472 	zc->zc_cookie = newstate;
1473 	spa_close(spa, FTAG);
1474 	return (error);
1475 }
1476 
1477 static int
1478 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1479 {
1480 	spa_t *spa;
1481 	int replacing = zc->zc_cookie;
1482 	nvlist_t *config;
1483 	int error;
1484 
1485 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1486 		return (error);
1487 
1488 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1489 	    zc->zc_iflags, &config)) == 0) {
1490 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1491 		nvlist_free(config);
1492 	}
1493 
1494 	spa_close(spa, FTAG);
1495 	return (error);
1496 }
1497 
1498 static int
1499 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1500 {
1501 	spa_t *spa;
1502 	int error;
1503 
1504 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1505 		return (error);
1506 
1507 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1508 
1509 	spa_close(spa, FTAG);
1510 	return (error);
1511 }
1512 
1513 static int
1514 zfs_ioc_vdev_split(zfs_cmd_t *zc)
1515 {
1516 	spa_t *spa;
1517 	nvlist_t *config, *props = NULL;
1518 	int error;
1519 	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1520 
1521 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1522 		return (error);
1523 
1524 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1525 	    zc->zc_iflags, &config)) {
1526 		spa_close(spa, FTAG);
1527 		return (error);
1528 	}
1529 
1530 	if (zc->zc_nvlist_src_size != 0 && (error =
1531 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1532 	    zc->zc_iflags, &props))) {
1533 		spa_close(spa, FTAG);
1534 		nvlist_free(config);
1535 		return (error);
1536 	}
1537 
1538 	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
1539 
1540 	spa_close(spa, FTAG);
1541 
1542 	nvlist_free(config);
1543 	nvlist_free(props);
1544 
1545 	return (error);
1546 }
1547 
1548 static int
1549 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1550 {
1551 	spa_t *spa;
1552 	char *path = zc->zc_value;
1553 	uint64_t guid = zc->zc_guid;
1554 	int error;
1555 
1556 	error = spa_open(zc->zc_name, &spa, FTAG);
1557 	if (error != 0)
1558 		return (error);
1559 
1560 	error = spa_vdev_setpath(spa, guid, path);
1561 	spa_close(spa, FTAG);
1562 	return (error);
1563 }
1564 
1565 static int
1566 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
1567 {
1568 	spa_t *spa;
1569 	char *fru = zc->zc_value;
1570 	uint64_t guid = zc->zc_guid;
1571 	int error;
1572 
1573 	error = spa_open(zc->zc_name, &spa, FTAG);
1574 	if (error != 0)
1575 		return (error);
1576 
1577 	error = spa_vdev_setfru(spa, guid, fru);
1578 	spa_close(spa, FTAG);
1579 	return (error);
1580 }
1581 
1582 /*
1583  * inputs:
1584  * zc_name		name of filesystem
1585  * zc_nvlist_dst_size	size of buffer for property nvlist
1586  *
1587  * outputs:
1588  * zc_objset_stats	stats
1589  * zc_nvlist_dst	property nvlist
1590  * zc_nvlist_dst_size	size of property nvlist
1591  */
1592 static int
1593 zfs_ioc_objset_stats(zfs_cmd_t *zc)
1594 {
1595 	objset_t *os = NULL;
1596 	int error;
1597 	nvlist_t *nv;
1598 
1599 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1600 		return (error);
1601 
1602 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1603 
1604 	if (zc->zc_nvlist_dst != 0 &&
1605 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
1606 		dmu_objset_stats(os, nv);
1607 		/*
1608 		 * NB: zvol_get_stats() will read the objset contents,
1609 		 * which we aren't supposed to do with a
1610 		 * DS_MODE_USER hold, because it could be
1611 		 * inconsistent.  So this is a bit of a workaround...
1612 		 * XXX reading with out owning
1613 		 */
1614 		if (!zc->zc_objset_stats.dds_inconsistent) {
1615 			if (dmu_objset_type(os) == DMU_OST_ZVOL)
1616 				VERIFY(zvol_get_stats(os, nv) == 0);
1617 		}
1618 		error = put_nvlist(zc, nv);
1619 		nvlist_free(nv);
1620 	}
1621 
1622 	dmu_objset_rele(os, FTAG);
1623 	return (error);
1624 }
1625 
1626 /*
1627  * inputs:
1628  * zc_name		name of filesystem
1629  * zc_nvlist_dst_size	size of buffer for property nvlist
1630  *
1631  * outputs:
1632  * zc_nvlist_dst	received property nvlist
1633  * zc_nvlist_dst_size	size of received property nvlist
1634  *
1635  * Gets received properties (distinct from local properties on or after
1636  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
1637  * local property values.
1638  */
1639 static int
1640 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
1641 {
1642 	objset_t *os = NULL;
1643 	int error;
1644 	nvlist_t *nv;
1645 
1646 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1647 		return (error);
1648 
1649 	/*
1650 	 * Without this check, we would return local property values if the
1651 	 * caller has not already received properties on or after
1652 	 * SPA_VERSION_RECVD_PROPS.
1653 	 */
1654 	if (!dsl_prop_get_hasrecvd(os)) {
1655 		dmu_objset_rele(os, FTAG);
1656 		return (ENOTSUP);
1657 	}
1658 
1659 	if (zc->zc_nvlist_dst != 0 &&
1660 	    (error = dsl_prop_get_received(os, &nv)) == 0) {
1661 		error = put_nvlist(zc, nv);
1662 		nvlist_free(nv);
1663 	}
1664 
1665 	dmu_objset_rele(os, FTAG);
1666 	return (error);
1667 }
1668 
1669 static int
1670 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
1671 {
1672 	uint64_t value;
1673 	int error;
1674 
1675 	/*
1676 	 * zfs_get_zplprop() will either find a value or give us
1677 	 * the default value (if there is one).
1678 	 */
1679 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
1680 		return (error);
1681 	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
1682 	return (0);
1683 }
1684 
1685 /*
1686  * inputs:
1687  * zc_name		name of filesystem
1688  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
1689  *
1690  * outputs:
1691  * zc_nvlist_dst	zpl property nvlist
1692  * zc_nvlist_dst_size	size of zpl property nvlist
1693  */
1694 static int
1695 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
1696 {
1697 	objset_t *os;
1698 	int err;
1699 
1700 	/* XXX reading without owning */
1701 	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
1702 		return (err);
1703 
1704 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1705 
1706 	/*
1707 	 * NB: nvl_add_zplprop() will read the objset contents,
1708 	 * which we aren't supposed to do with a DS_MODE_USER
1709 	 * hold, because it could be inconsistent.
1710 	 */
1711 	if (zc->zc_nvlist_dst != NULL &&
1712 	    !zc->zc_objset_stats.dds_inconsistent &&
1713 	    dmu_objset_type(os) == DMU_OST_ZFS) {
1714 		nvlist_t *nv;
1715 
1716 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1717 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
1718 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
1719 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
1720 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
1721 			err = put_nvlist(zc, nv);
1722 		nvlist_free(nv);
1723 	} else {
1724 		err = ENOENT;
1725 	}
1726 	dmu_objset_rele(os, FTAG);
1727 	return (err);
1728 }
1729 
1730 static boolean_t
1731 dataset_name_hidden(const char *name)
1732 {
1733 	/*
1734 	 * Skip over datasets that are not visible in this zone,
1735 	 * internal datasets (which have a $ in their name), and
1736 	 * temporary datasets (which have a % in their name).
1737 	 */
1738 	if (strchr(name, '$') != NULL)
1739 		return (B_TRUE);
1740 	if (strchr(name, '%') != NULL)
1741 		return (B_TRUE);
1742 	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
1743 		return (B_TRUE);
1744 	return (B_FALSE);
1745 }
1746 
1747 /*
1748  * inputs:
1749  * zc_name		name of filesystem
1750  * zc_cookie		zap cursor
1751  * zc_nvlist_dst_size	size of buffer for property nvlist
1752  *
1753  * outputs:
1754  * zc_name		name of next filesystem
1755  * zc_cookie		zap cursor
1756  * zc_objset_stats	stats
1757  * zc_nvlist_dst	property nvlist
1758  * zc_nvlist_dst_size	size of property nvlist
1759  */
1760 static int
1761 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
1762 {
1763 	objset_t *os;
1764 	int error;
1765 	char *p;
1766 	size_t orig_len = strlen(zc->zc_name);
1767 
1768 top:
1769 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
1770 		if (error == ENOENT)
1771 			error = ESRCH;
1772 		return (error);
1773 	}
1774 
1775 	p = strrchr(zc->zc_name, '/');
1776 	if (p == NULL || p[1] != '\0')
1777 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
1778 	p = zc->zc_name + strlen(zc->zc_name);
1779 
1780 	/*
1781 	 * Pre-fetch the datasets.  dmu_objset_prefetch() always returns 0
1782 	 * but is not declared void because its called by dmu_objset_find().
1783 	 */
1784 	if (zc->zc_cookie == 0) {
1785 		uint64_t cookie = 0;
1786 		int len = sizeof (zc->zc_name) - (p - zc->zc_name);
1787 
1788 		while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0)
1789 			(void) dmu_objset_prefetch(p, NULL);
1790 	}
1791 
1792 	do {
1793 		error = dmu_dir_list_next(os,
1794 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
1795 		    NULL, &zc->zc_cookie);
1796 		if (error == ENOENT)
1797 			error = ESRCH;
1798 	} while (error == 0 && dataset_name_hidden(zc->zc_name) &&
1799 	    !(zc->zc_iflags & FKIOCTL));
1800 	dmu_objset_rele(os, FTAG);
1801 
1802 	/*
1803 	 * If it's an internal dataset (ie. with a '$' in its name),
1804 	 * don't try to get stats for it, otherwise we'll return ENOENT.
1805 	 */
1806 	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
1807 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1808 		if (error == ENOENT) {
1809 			/* We lost a race with destroy, get the next one. */
1810 			zc->zc_name[orig_len] = '\0';
1811 			goto top;
1812 		}
1813 	}
1814 	return (error);
1815 }
1816 
1817 /*
1818  * inputs:
1819  * zc_name		name of filesystem
1820  * zc_cookie		zap cursor
1821  * zc_nvlist_dst_size	size of buffer for property nvlist
1822  *
1823  * outputs:
1824  * zc_name		name of next snapshot
1825  * zc_objset_stats	stats
1826  * zc_nvlist_dst	property nvlist
1827  * zc_nvlist_dst_size	size of property nvlist
1828  */
1829 static int
1830 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
1831 {
1832 	objset_t *os;
1833 	int error;
1834 
1835 top:
1836 	if (zc->zc_cookie == 0)
1837 		(void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
1838 		    NULL, DS_FIND_SNAPSHOTS);
1839 
1840 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
1841 	if (error)
1842 		return (error == ENOENT ? ESRCH : error);
1843 
1844 	/*
1845 	 * A dataset name of maximum length cannot have any snapshots,
1846 	 * so exit immediately.
1847 	 */
1848 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
1849 		dmu_objset_rele(os, FTAG);
1850 		return (ESRCH);
1851 	}
1852 
1853 	error = dmu_snapshot_list_next(os,
1854 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
1855 	    zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie, NULL);
1856 	dmu_objset_rele(os, FTAG);
1857 	if (error == 0) {
1858 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1859 		if (error == ENOENT)  {
1860 			/* We lost a race with destroy, get the next one. */
1861 			*strchr(zc->zc_name, '@') = '\0';
1862 			goto top;
1863 		}
1864 	} else if (error == ENOENT) {
1865 		error = ESRCH;
1866 	}
1867 
1868 	/* if we failed, undo the @ that we tacked on to zc_name */
1869 	if (error)
1870 		*strchr(zc->zc_name, '@') = '\0';
1871 	return (error);
1872 }
1873 
1874 static int
1875 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
1876 {
1877 	const char *propname = nvpair_name(pair);
1878 	uint64_t *valary;
1879 	unsigned int vallen;
1880 	const char *domain;
1881 	char *dash;
1882 	zfs_userquota_prop_t type;
1883 	uint64_t rid;
1884 	uint64_t quota;
1885 	zfsvfs_t *zfsvfs;
1886 	int err;
1887 
1888 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
1889 		nvlist_t *attrs;
1890 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
1891 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
1892 		    &pair) != 0)
1893 			return (EINVAL);
1894 	}
1895 
1896 	/*
1897 	 * A correctly constructed propname is encoded as
1898 	 * userquota@<rid>-<domain>.
1899 	 */
1900 	if ((dash = strchr(propname, '-')) == NULL ||
1901 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
1902 	    vallen != 3)
1903 		return (EINVAL);
1904 
1905 	domain = dash + 1;
1906 	type = valary[0];
1907 	rid = valary[1];
1908 	quota = valary[2];
1909 
1910 	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
1911 	if (err == 0) {
1912 		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
1913 		zfsvfs_rele(zfsvfs, FTAG);
1914 	}
1915 
1916 	return (err);
1917 }
1918 
1919 /*
1920  * If the named property is one that has a special function to set its value,
1921  * return 0 on success and a positive error code on failure; otherwise if it is
1922  * not one of the special properties handled by this function, return -1.
1923  *
1924  * XXX: It would be better for callers of the property interface if we handled
1925  * these special cases in dsl_prop.c (in the dsl layer).
1926  */
1927 static int
1928 zfs_prop_set_special(const char *dsname, zprop_source_t source,
1929     nvpair_t *pair)
1930 {
1931 	const char *propname = nvpair_name(pair);
1932 	zfs_prop_t prop = zfs_name_to_prop(propname);
1933 	uint64_t intval;
1934 	int err;
1935 
1936 	if (prop == ZPROP_INVAL) {
1937 		if (zfs_prop_userquota(propname))
1938 			return (zfs_prop_set_userquota(dsname, pair));
1939 		return (-1);
1940 	}
1941 
1942 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
1943 		nvlist_t *attrs;
1944 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
1945 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
1946 		    &pair) == 0);
1947 	}
1948 
1949 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
1950 		return (-1);
1951 
1952 	VERIFY(0 == nvpair_value_uint64(pair, &intval));
1953 
1954 	switch (prop) {
1955 	case ZFS_PROP_QUOTA:
1956 		err = dsl_dir_set_quota(dsname, source, intval);
1957 		break;
1958 	case ZFS_PROP_REFQUOTA:
1959 		err = dsl_dataset_set_quota(dsname, source, intval);
1960 		break;
1961 	case ZFS_PROP_RESERVATION:
1962 		err = dsl_dir_set_reservation(dsname, source, intval);
1963 		break;
1964 	case ZFS_PROP_REFRESERVATION:
1965 		err = dsl_dataset_set_reservation(dsname, source, intval);
1966 		break;
1967 	case ZFS_PROP_VOLSIZE:
1968 		err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip),
1969 		    intval);
1970 		break;
1971 	case ZFS_PROP_VERSION:
1972 	{
1973 		zfsvfs_t *zfsvfs;
1974 
1975 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
1976 			break;
1977 
1978 		err = zfs_set_version(zfsvfs, intval);
1979 		zfsvfs_rele(zfsvfs, FTAG);
1980 
1981 		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
1982 			zfs_cmd_t *zc;
1983 
1984 			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
1985 			(void) strcpy(zc->zc_name, dsname);
1986 			(void) zfs_ioc_userspace_upgrade(zc);
1987 			kmem_free(zc, sizeof (zfs_cmd_t));
1988 		}
1989 		break;
1990 	}
1991 
1992 	default:
1993 		err = -1;
1994 	}
1995 
1996 	return (err);
1997 }
1998 
1999 /*
2000  * This function is best effort. If it fails to set any of the given properties,
2001  * it continues to set as many as it can and returns the first error
2002  * encountered. If the caller provides a non-NULL errlist, it also gives the
2003  * complete list of names of all the properties it failed to set along with the
2004  * corresponding error numbers. The caller is responsible for freeing the
2005  * returned errlist.
2006  *
2007  * If every property is set successfully, zero is returned and the list pointed
2008  * at by errlist is NULL.
2009  */
2010 int
2011 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2012     nvlist_t **errlist)
2013 {
2014 	nvpair_t *pair;
2015 	nvpair_t *propval;
2016 	int rv = 0;
2017 	uint64_t intval;
2018 	char *strval;
2019 	nvlist_t *genericnvl;
2020 	nvlist_t *errors;
2021 	nvlist_t *retrynvl;
2022 
2023 	VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2024 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2025 	VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2026 
2027 retry:
2028 	pair = NULL;
2029 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2030 		const char *propname = nvpair_name(pair);
2031 		zfs_prop_t prop = zfs_name_to_prop(propname);
2032 		int err = 0;
2033 
2034 		/* decode the property value */
2035 		propval = pair;
2036 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2037 			nvlist_t *attrs;
2038 			VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2039 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2040 			    &propval) != 0)
2041 				err = EINVAL;
2042 		}
2043 
2044 		/* Validate value type */
2045 		if (err == 0 && prop == ZPROP_INVAL) {
2046 			if (zfs_prop_user(propname)) {
2047 				if (nvpair_type(propval) != DATA_TYPE_STRING)
2048 					err = EINVAL;
2049 			} else if (zfs_prop_userquota(propname)) {
2050 				if (nvpair_type(propval) !=
2051 				    DATA_TYPE_UINT64_ARRAY)
2052 					err = EINVAL;
2053 			}
2054 		} else if (err == 0) {
2055 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2056 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2057 					err = EINVAL;
2058 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2059 				const char *unused;
2060 
2061 				VERIFY(nvpair_value_uint64(propval,
2062 				    &intval) == 0);
2063 
2064 				switch (zfs_prop_get_type(prop)) {
2065 				case PROP_TYPE_NUMBER:
2066 					break;
2067 				case PROP_TYPE_STRING:
2068 					err = EINVAL;
2069 					break;
2070 				case PROP_TYPE_INDEX:
2071 					if (zfs_prop_index_to_string(prop,
2072 					    intval, &unused) != 0)
2073 						err = EINVAL;
2074 					break;
2075 				default:
2076 					cmn_err(CE_PANIC,
2077 					    "unknown property type");
2078 				}
2079 			} else {
2080 				err = EINVAL;
2081 			}
2082 		}
2083 
2084 		/* Validate permissions */
2085 		if (err == 0)
2086 			err = zfs_check_settable(dsname, pair, CRED());
2087 
2088 		if (err == 0) {
2089 			err = zfs_prop_set_special(dsname, source, pair);
2090 			if (err == -1) {
2091 				/*
2092 				 * For better performance we build up a list of
2093 				 * properties to set in a single transaction.
2094 				 */
2095 				err = nvlist_add_nvpair(genericnvl, pair);
2096 			} else if (err != 0 && nvl != retrynvl) {
2097 				/*
2098 				 * This may be a spurious error caused by
2099 				 * receiving quota and reservation out of order.
2100 				 * Try again in a second pass.
2101 				 */
2102 				err = nvlist_add_nvpair(retrynvl, pair);
2103 			}
2104 		}
2105 
2106 		if (err != 0)
2107 			VERIFY(nvlist_add_int32(errors, propname, err) == 0);
2108 	}
2109 
2110 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2111 		nvl = retrynvl;
2112 		goto retry;
2113 	}
2114 
2115 	if (!nvlist_empty(genericnvl) &&
2116 	    dsl_props_set(dsname, source, genericnvl) != 0) {
2117 		/*
2118 		 * If this fails, we still want to set as many properties as we
2119 		 * can, so try setting them individually.
2120 		 */
2121 		pair = NULL;
2122 		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2123 			const char *propname = nvpair_name(pair);
2124 			int err = 0;
2125 
2126 			propval = pair;
2127 			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2128 				nvlist_t *attrs;
2129 				VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2130 				VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2131 				    &propval) == 0);
2132 			}
2133 
2134 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2135 				VERIFY(nvpair_value_string(propval,
2136 				    &strval) == 0);
2137 				err = dsl_prop_set(dsname, propname, source, 1,
2138 				    strlen(strval) + 1, strval);
2139 			} else {
2140 				VERIFY(nvpair_value_uint64(propval,
2141 				    &intval) == 0);
2142 				err = dsl_prop_set(dsname, propname, source, 8,
2143 				    1, &intval);
2144 			}
2145 
2146 			if (err != 0) {
2147 				VERIFY(nvlist_add_int32(errors, propname,
2148 				    err) == 0);
2149 			}
2150 		}
2151 	}
2152 	nvlist_free(genericnvl);
2153 	nvlist_free(retrynvl);
2154 
2155 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
2156 		nvlist_free(errors);
2157 		errors = NULL;
2158 	} else {
2159 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
2160 	}
2161 
2162 	if (errlist == NULL)
2163 		nvlist_free(errors);
2164 	else
2165 		*errlist = errors;
2166 
2167 	return (rv);
2168 }
2169 
2170 /*
2171  * Check that all the properties are valid user properties.
2172  */
2173 static int
2174 zfs_check_userprops(char *fsname, nvlist_t *nvl)
2175 {
2176 	nvpair_t *pair = NULL;
2177 	int error = 0;
2178 
2179 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2180 		const char *propname = nvpair_name(pair);
2181 		char *valstr;
2182 
2183 		if (!zfs_prop_user(propname) ||
2184 		    nvpair_type(pair) != DATA_TYPE_STRING)
2185 			return (EINVAL);
2186 
2187 		if (error = zfs_secpolicy_write_perms(fsname,
2188 		    ZFS_DELEG_PERM_USERPROP, CRED()))
2189 			return (error);
2190 
2191 		if (strlen(propname) >= ZAP_MAXNAMELEN)
2192 			return (ENAMETOOLONG);
2193 
2194 		VERIFY(nvpair_value_string(pair, &valstr) == 0);
2195 		if (strlen(valstr) >= ZAP_MAXVALUELEN)
2196 			return (E2BIG);
2197 	}
2198 	return (0);
2199 }
2200 
2201 static void
2202 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2203 {
2204 	nvpair_t *pair;
2205 
2206 	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2207 
2208 	pair = NULL;
2209 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2210 		if (nvlist_exists(skipped, nvpair_name(pair)))
2211 			continue;
2212 
2213 		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2214 	}
2215 }
2216 
2217 static int
2218 clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
2219     nvlist_t *skipped)
2220 {
2221 	int err = 0;
2222 	nvlist_t *cleared_props = NULL;
2223 	props_skip(props, skipped, &cleared_props);
2224 	if (!nvlist_empty(cleared_props)) {
2225 		/*
2226 		 * Acts on local properties until the dataset has received
2227 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2228 		 */
2229 		zprop_source_t flags = (ZPROP_SRC_NONE |
2230 		    (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0));
2231 		err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL);
2232 	}
2233 	nvlist_free(cleared_props);
2234 	return (err);
2235 }
2236 
2237 /*
2238  * inputs:
2239  * zc_name		name of filesystem
2240  * zc_value		name of property to set
2241  * zc_nvlist_src{_size}	nvlist of properties to apply
2242  * zc_cookie		received properties flag
2243  *
2244  * outputs:
2245  * zc_nvlist_dst{_size} error for each unapplied received property
2246  */
2247 static int
2248 zfs_ioc_set_prop(zfs_cmd_t *zc)
2249 {
2250 	nvlist_t *nvl;
2251 	boolean_t received = zc->zc_cookie;
2252 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2253 	    ZPROP_SRC_LOCAL);
2254 	nvlist_t *errors = NULL;
2255 	int error;
2256 
2257 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2258 	    zc->zc_iflags, &nvl)) != 0)
2259 		return (error);
2260 
2261 	if (received) {
2262 		nvlist_t *origprops;
2263 		objset_t *os;
2264 
2265 		if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
2266 			if (dsl_prop_get_received(os, &origprops) == 0) {
2267 				(void) clear_received_props(os,
2268 				    zc->zc_name, origprops, nvl);
2269 				nvlist_free(origprops);
2270 			}
2271 
2272 			dsl_prop_set_hasrecvd(os);
2273 			dmu_objset_rele(os, FTAG);
2274 		}
2275 	}
2276 
2277 	error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors);
2278 
2279 	if (zc->zc_nvlist_dst != NULL && errors != NULL) {
2280 		(void) put_nvlist(zc, errors);
2281 	}
2282 
2283 	nvlist_free(errors);
2284 	nvlist_free(nvl);
2285 	return (error);
2286 }
2287 
2288 /*
2289  * inputs:
2290  * zc_name		name of filesystem
2291  * zc_value		name of property to inherit
2292  * zc_cookie		revert to received value if TRUE
2293  *
2294  * outputs:		none
2295  */
2296 static int
2297 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2298 {
2299 	const char *propname = zc->zc_value;
2300 	zfs_prop_t prop = zfs_name_to_prop(propname);
2301 	boolean_t received = zc->zc_cookie;
2302 	zprop_source_t source = (received
2303 	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2304 	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2305 
2306 	if (received) {
2307 		nvlist_t *dummy;
2308 		nvpair_t *pair;
2309 		zprop_type_t type;
2310 		int err;
2311 
2312 		/*
2313 		 * zfs_prop_set_special() expects properties in the form of an
2314 		 * nvpair with type info.
2315 		 */
2316 		if (prop == ZPROP_INVAL) {
2317 			if (!zfs_prop_user(propname))
2318 				return (EINVAL);
2319 
2320 			type = PROP_TYPE_STRING;
2321 		} else if (prop == ZFS_PROP_VOLSIZE ||
2322 		    prop == ZFS_PROP_VERSION) {
2323 			return (EINVAL);
2324 		} else {
2325 			type = zfs_prop_get_type(prop);
2326 		}
2327 
2328 		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2329 
2330 		switch (type) {
2331 		case PROP_TYPE_STRING:
2332 			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2333 			break;
2334 		case PROP_TYPE_NUMBER:
2335 		case PROP_TYPE_INDEX:
2336 			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2337 			break;
2338 		default:
2339 			nvlist_free(dummy);
2340 			return (EINVAL);
2341 		}
2342 
2343 		pair = nvlist_next_nvpair(dummy, NULL);
2344 		err = zfs_prop_set_special(zc->zc_name, source, pair);
2345 		nvlist_free(dummy);
2346 		if (err != -1)
2347 			return (err); /* special property already handled */
2348 	} else {
2349 		/*
2350 		 * Only check this in the non-received case. We want to allow
2351 		 * 'inherit -S' to revert non-inheritable properties like quota
2352 		 * and reservation to the received or default values even though
2353 		 * they are not considered inheritable.
2354 		 */
2355 		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2356 			return (EINVAL);
2357 	}
2358 
2359 	/* the property name has been validated by zfs_secpolicy_inherit() */
2360 	return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
2361 }
2362 
2363 static int
2364 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2365 {
2366 	nvlist_t *props;
2367 	spa_t *spa;
2368 	int error;
2369 	nvpair_t *pair;
2370 
2371 	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2372 	    zc->zc_iflags, &props))
2373 		return (error);
2374 
2375 	/*
2376 	 * If the only property is the configfile, then just do a spa_lookup()
2377 	 * to handle the faulted case.
2378 	 */
2379 	pair = nvlist_next_nvpair(props, NULL);
2380 	if (pair != NULL && strcmp(nvpair_name(pair),
2381 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2382 	    nvlist_next_nvpair(props, pair) == NULL) {
2383 		mutex_enter(&spa_namespace_lock);
2384 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2385 			spa_configfile_set(spa, props, B_FALSE);
2386 			spa_config_sync(spa, B_FALSE, B_TRUE);
2387 		}
2388 		mutex_exit(&spa_namespace_lock);
2389 		if (spa != NULL) {
2390 			nvlist_free(props);
2391 			return (0);
2392 		}
2393 	}
2394 
2395 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2396 		nvlist_free(props);
2397 		return (error);
2398 	}
2399 
2400 	error = spa_prop_set(spa, props);
2401 
2402 	nvlist_free(props);
2403 	spa_close(spa, FTAG);
2404 
2405 	return (error);
2406 }
2407 
2408 static int
2409 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2410 {
2411 	spa_t *spa;
2412 	int error;
2413 	nvlist_t *nvp = NULL;
2414 
2415 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2416 		/*
2417 		 * If the pool is faulted, there may be properties we can still
2418 		 * get (such as altroot and cachefile), so attempt to get them
2419 		 * anyway.
2420 		 */
2421 		mutex_enter(&spa_namespace_lock);
2422 		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2423 			error = spa_prop_get(spa, &nvp);
2424 		mutex_exit(&spa_namespace_lock);
2425 	} else {
2426 		error = spa_prop_get(spa, &nvp);
2427 		spa_close(spa, FTAG);
2428 	}
2429 
2430 	if (error == 0 && zc->zc_nvlist_dst != NULL)
2431 		error = put_nvlist(zc, nvp);
2432 	else
2433 		error = EFAULT;
2434 
2435 	nvlist_free(nvp);
2436 	return (error);
2437 }
2438 
2439 /*
2440  * inputs:
2441  * zc_name		name of filesystem
2442  * zc_nvlist_src{_size}	nvlist of delegated permissions
2443  * zc_perm_action	allow/unallow flag
2444  *
2445  * outputs:		none
2446  */
2447 static int
2448 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2449 {
2450 	int error;
2451 	nvlist_t *fsaclnv = NULL;
2452 
2453 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2454 	    zc->zc_iflags, &fsaclnv)) != 0)
2455 		return (error);
2456 
2457 	/*
2458 	 * Verify nvlist is constructed correctly
2459 	 */
2460 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2461 		nvlist_free(fsaclnv);
2462 		return (EINVAL);
2463 	}
2464 
2465 	/*
2466 	 * If we don't have PRIV_SYS_MOUNT, then validate
2467 	 * that user is allowed to hand out each permission in
2468 	 * the nvlist(s)
2469 	 */
2470 
2471 	error = secpolicy_zfs(CRED());
2472 	if (error) {
2473 		if (zc->zc_perm_action == B_FALSE) {
2474 			error = dsl_deleg_can_allow(zc->zc_name,
2475 			    fsaclnv, CRED());
2476 		} else {
2477 			error = dsl_deleg_can_unallow(zc->zc_name,
2478 			    fsaclnv, CRED());
2479 		}
2480 	}
2481 
2482 	if (error == 0)
2483 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2484 
2485 	nvlist_free(fsaclnv);
2486 	return (error);
2487 }
2488 
2489 /*
2490  * inputs:
2491  * zc_name		name of filesystem
2492  *
2493  * outputs:
2494  * zc_nvlist_src{_size}	nvlist of delegated permissions
2495  */
2496 static int
2497 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2498 {
2499 	nvlist_t *nvp;
2500 	int error;
2501 
2502 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2503 		error = put_nvlist(zc, nvp);
2504 		nvlist_free(nvp);
2505 	}
2506 
2507 	return (error);
2508 }
2509 
2510 /*
2511  * Search the vfs list for a specified resource.  Returns a pointer to it
2512  * or NULL if no suitable entry is found. The caller of this routine
2513  * is responsible for releasing the returned vfs pointer.
2514  */
2515 static vfs_t *
2516 zfs_get_vfs(const char *resource)
2517 {
2518 	struct vfs *vfsp;
2519 	struct vfs *vfs_found = NULL;
2520 
2521 	vfs_list_read_lock();
2522 	vfsp = rootvfs;
2523 	do {
2524 		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
2525 			VFS_HOLD(vfsp);
2526 			vfs_found = vfsp;
2527 			break;
2528 		}
2529 		vfsp = vfsp->vfs_next;
2530 	} while (vfsp != rootvfs);
2531 	vfs_list_unlock();
2532 	return (vfs_found);
2533 }
2534 
2535 /* ARGSUSED */
2536 static void
2537 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2538 {
2539 	zfs_creat_t *zct = arg;
2540 
2541 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
2542 }
2543 
2544 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
2545 
2546 /*
2547  * inputs:
2548  * createprops		list of properties requested by creator
2549  * default_zplver	zpl version to use if unspecified in createprops
2550  * fuids_ok		fuids allowed in this version of the spa?
2551  * os			parent objset pointer (NULL if root fs)
2552  *
2553  * outputs:
2554  * zplprops	values for the zplprops we attach to the master node object
2555  * is_ci	true if requested file system will be purely case-insensitive
2556  *
2557  * Determine the settings for utf8only, normalization and
2558  * casesensitivity.  Specific values may have been requested by the
2559  * creator and/or we can inherit values from the parent dataset.  If
2560  * the file system is of too early a vintage, a creator can not
2561  * request settings for these properties, even if the requested
2562  * setting is the default value.  We don't actually want to create dsl
2563  * properties for these, so remove them from the source nvlist after
2564  * processing.
2565  */
2566 static int
2567 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
2568     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
2569     nvlist_t *zplprops, boolean_t *is_ci)
2570 {
2571 	uint64_t sense = ZFS_PROP_UNDEFINED;
2572 	uint64_t norm = ZFS_PROP_UNDEFINED;
2573 	uint64_t u8 = ZFS_PROP_UNDEFINED;
2574 
2575 	ASSERT(zplprops != NULL);
2576 
2577 	/*
2578 	 * Pull out creator prop choices, if any.
2579 	 */
2580 	if (createprops) {
2581 		(void) nvlist_lookup_uint64(createprops,
2582 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
2583 		(void) nvlist_lookup_uint64(createprops,
2584 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
2585 		(void) nvlist_remove_all(createprops,
2586 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
2587 		(void) nvlist_lookup_uint64(createprops,
2588 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
2589 		(void) nvlist_remove_all(createprops,
2590 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
2591 		(void) nvlist_lookup_uint64(createprops,
2592 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
2593 		(void) nvlist_remove_all(createprops,
2594 		    zfs_prop_to_name(ZFS_PROP_CASE));
2595 	}
2596 
2597 	/*
2598 	 * If the zpl version requested is whacky or the file system
2599 	 * or pool is version is too "young" to support normalization
2600 	 * and the creator tried to set a value for one of the props,
2601 	 * error out.
2602 	 */
2603 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
2604 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
2605 	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
2606 	    (zplver < ZPL_VERSION_NORMALIZATION &&
2607 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
2608 	    sense != ZFS_PROP_UNDEFINED)))
2609 		return (ENOTSUP);
2610 
2611 	/*
2612 	 * Put the version in the zplprops
2613 	 */
2614 	VERIFY(nvlist_add_uint64(zplprops,
2615 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
2616 
2617 	if (norm == ZFS_PROP_UNDEFINED)
2618 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
2619 	VERIFY(nvlist_add_uint64(zplprops,
2620 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
2621 
2622 	/*
2623 	 * If we're normalizing, names must always be valid UTF-8 strings.
2624 	 */
2625 	if (norm)
2626 		u8 = 1;
2627 	if (u8 == ZFS_PROP_UNDEFINED)
2628 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
2629 	VERIFY(nvlist_add_uint64(zplprops,
2630 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
2631 
2632 	if (sense == ZFS_PROP_UNDEFINED)
2633 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
2634 	VERIFY(nvlist_add_uint64(zplprops,
2635 	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
2636 
2637 	if (is_ci)
2638 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
2639 
2640 	return (0);
2641 }
2642 
2643 static int
2644 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
2645     nvlist_t *zplprops, boolean_t *is_ci)
2646 {
2647 	boolean_t fuids_ok, sa_ok;
2648 	uint64_t zplver = ZPL_VERSION;
2649 	objset_t *os = NULL;
2650 	char parentname[MAXNAMELEN];
2651 	char *cp;
2652 	spa_t *spa;
2653 	uint64_t spa_vers;
2654 	int error;
2655 
2656 	(void) strlcpy(parentname, dataset, sizeof (parentname));
2657 	cp = strrchr(parentname, '/');
2658 	ASSERT(cp != NULL);
2659 	cp[0] = '\0';
2660 
2661 	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
2662 		return (error);
2663 
2664 	spa_vers = spa_version(spa);
2665 	spa_close(spa, FTAG);
2666 
2667 	zplver = zfs_zpl_version_map(spa_vers);
2668 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
2669 	sa_ok = (zplver >= ZPL_VERSION_SA);
2670 
2671 	/*
2672 	 * Open parent object set so we can inherit zplprop values.
2673 	 */
2674 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
2675 		return (error);
2676 
2677 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
2678 	    zplprops, is_ci);
2679 	dmu_objset_rele(os, FTAG);
2680 	return (error);
2681 }
2682 
2683 static int
2684 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
2685     nvlist_t *zplprops, boolean_t *is_ci)
2686 {
2687 	boolean_t fuids_ok;
2688 	boolean_t sa_ok;
2689 	uint64_t zplver = ZPL_VERSION;
2690 	int error;
2691 
2692 	zplver = zfs_zpl_version_map(spa_vers);
2693 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
2694 	sa_ok = (zplver >= ZPL_VERSION_SA);
2695 
2696 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
2697 	    createprops, zplprops, is_ci);
2698 	return (error);
2699 }
2700 
2701 /*
2702  * inputs:
2703  * zc_objset_type	type of objset to create (fs vs zvol)
2704  * zc_name		name of new objset
2705  * zc_value		name of snapshot to clone from (may be empty)
2706  * zc_nvlist_src{_size}	nvlist of properties to apply
2707  *
2708  * outputs: none
2709  */
2710 static int
2711 zfs_ioc_create(zfs_cmd_t *zc)
2712 {
2713 	objset_t *clone;
2714 	int error = 0;
2715 	zfs_creat_t zct;
2716 	nvlist_t *nvprops = NULL;
2717 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
2718 	dmu_objset_type_t type = zc->zc_objset_type;
2719 
2720 	switch (type) {
2721 
2722 	case DMU_OST_ZFS:
2723 		cbfunc = zfs_create_cb;
2724 		break;
2725 
2726 	case DMU_OST_ZVOL:
2727 		cbfunc = zvol_create_cb;
2728 		break;
2729 
2730 	default:
2731 		cbfunc = NULL;
2732 		break;
2733 	}
2734 	if (strchr(zc->zc_name, '@') ||
2735 	    strchr(zc->zc_name, '%'))
2736 		return (EINVAL);
2737 
2738 	if (zc->zc_nvlist_src != NULL &&
2739 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2740 	    zc->zc_iflags, &nvprops)) != 0)
2741 		return (error);
2742 
2743 	zct.zct_zplprops = NULL;
2744 	zct.zct_props = nvprops;
2745 
2746 	if (zc->zc_value[0] != '\0') {
2747 		/*
2748 		 * We're creating a clone of an existing snapshot.
2749 		 */
2750 		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2751 		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
2752 			nvlist_free(nvprops);
2753 			return (EINVAL);
2754 		}
2755 
2756 		error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
2757 		if (error) {
2758 			nvlist_free(nvprops);
2759 			return (error);
2760 		}
2761 
2762 		error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
2763 		dmu_objset_rele(clone, FTAG);
2764 		if (error) {
2765 			nvlist_free(nvprops);
2766 			return (error);
2767 		}
2768 	} else {
2769 		boolean_t is_insensitive = B_FALSE;
2770 
2771 		if (cbfunc == NULL) {
2772 			nvlist_free(nvprops);
2773 			return (EINVAL);
2774 		}
2775 
2776 		if (type == DMU_OST_ZVOL) {
2777 			uint64_t volsize, volblocksize;
2778 
2779 			if (nvprops == NULL ||
2780 			    nvlist_lookup_uint64(nvprops,
2781 			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
2782 			    &volsize) != 0) {
2783 				nvlist_free(nvprops);
2784 				return (EINVAL);
2785 			}
2786 
2787 			if ((error = nvlist_lookup_uint64(nvprops,
2788 			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
2789 			    &volblocksize)) != 0 && error != ENOENT) {
2790 				nvlist_free(nvprops);
2791 				return (EINVAL);
2792 			}
2793 
2794 			if (error != 0)
2795 				volblocksize = zfs_prop_default_numeric(
2796 				    ZFS_PROP_VOLBLOCKSIZE);
2797 
2798 			if ((error = zvol_check_volblocksize(
2799 			    volblocksize)) != 0 ||
2800 			    (error = zvol_check_volsize(volsize,
2801 			    volblocksize)) != 0) {
2802 				nvlist_free(nvprops);
2803 				return (error);
2804 			}
2805 		} else if (type == DMU_OST_ZFS) {
2806 			int error;
2807 
2808 			/*
2809 			 * We have to have normalization and
2810 			 * case-folding flags correct when we do the
2811 			 * file system creation, so go figure them out
2812 			 * now.
2813 			 */
2814 			VERIFY(nvlist_alloc(&zct.zct_zplprops,
2815 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
2816 			error = zfs_fill_zplprops(zc->zc_name, nvprops,
2817 			    zct.zct_zplprops, &is_insensitive);
2818 			if (error != 0) {
2819 				nvlist_free(nvprops);
2820 				nvlist_free(zct.zct_zplprops);
2821 				return (error);
2822 			}
2823 		}
2824 		error = dmu_objset_create(zc->zc_name, type,
2825 		    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
2826 		nvlist_free(zct.zct_zplprops);
2827 	}
2828 
2829 	/*
2830 	 * It would be nice to do this atomically.
2831 	 */
2832 	if (error == 0) {
2833 		error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
2834 		    nvprops, NULL);
2835 		if (error != 0)
2836 			(void) dmu_objset_destroy(zc->zc_name, B_FALSE);
2837 	}
2838 	nvlist_free(nvprops);
2839 	return (error);
2840 }
2841 
2842 /*
2843  * inputs:
2844  * zc_name	name of filesystem
2845  * zc_value	short name of snapshot
2846  * zc_cookie	recursive flag
2847  * zc_nvlist_src[_size] property list
2848  *
2849  * outputs:
2850  * zc_value	short snapname (i.e. part after the '@')
2851  */
2852 static int
2853 zfs_ioc_snapshot(zfs_cmd_t *zc)
2854 {
2855 	nvlist_t *nvprops = NULL;
2856 	int error;
2857 	boolean_t recursive = zc->zc_cookie;
2858 
2859 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2860 		return (EINVAL);
2861 
2862 	if (zc->zc_nvlist_src != NULL &&
2863 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2864 	    zc->zc_iflags, &nvprops)) != 0)
2865 		return (error);
2866 
2867 	error = zfs_check_userprops(zc->zc_name, nvprops);
2868 	if (error)
2869 		goto out;
2870 
2871 	if (!nvlist_empty(nvprops) &&
2872 	    zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) {
2873 		error = ENOTSUP;
2874 		goto out;
2875 	}
2876 
2877 	error = dmu_objset_snapshot(zc->zc_name, zc->zc_value,
2878 	    nvprops, recursive);
2879 
2880 out:
2881 	nvlist_free(nvprops);
2882 	return (error);
2883 }
2884 
2885 int
2886 zfs_unmount_snap(const char *name, void *arg)
2887 {
2888 	vfs_t *vfsp = NULL;
2889 
2890 	if (arg) {
2891 		char *snapname = arg;
2892 		char *fullname = kmem_asprintf("%s@%s", name, snapname);
2893 		vfsp = zfs_get_vfs(fullname);
2894 		strfree(fullname);
2895 	} else if (strchr(name, '@')) {
2896 		vfsp = zfs_get_vfs(name);
2897 	}
2898 
2899 	if (vfsp) {
2900 		/*
2901 		 * Always force the unmount for snapshots.
2902 		 */
2903 		int flag = MS_FORCE;
2904 		int err;
2905 
2906 		if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
2907 			VFS_RELE(vfsp);
2908 			return (err);
2909 		}
2910 		VFS_RELE(vfsp);
2911 		if ((err = dounmount(vfsp, flag, kcred)) != 0)
2912 			return (err);
2913 	}
2914 	return (0);
2915 }
2916 
2917 /*
2918  * inputs:
2919  * zc_name		name of filesystem
2920  * zc_value		short name of snapshot
2921  * zc_defer_destroy	mark for deferred destroy
2922  *
2923  * outputs:	none
2924  */
2925 static int
2926 zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
2927 {
2928 	int err;
2929 
2930 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2931 		return (EINVAL);
2932 	err = dmu_objset_find(zc->zc_name,
2933 	    zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
2934 	if (err)
2935 		return (err);
2936 	return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value,
2937 	    zc->zc_defer_destroy));
2938 }
2939 
2940 /*
2941  * inputs:
2942  * zc_name		name of dataset to destroy
2943  * zc_objset_type	type of objset
2944  * zc_defer_destroy	mark for deferred destroy
2945  *
2946  * outputs:		none
2947  */
2948 static int
2949 zfs_ioc_destroy(zfs_cmd_t *zc)
2950 {
2951 	int err;
2952 	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
2953 		err = zfs_unmount_snap(zc->zc_name, NULL);
2954 		if (err)
2955 			return (err);
2956 	}
2957 
2958 	err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy);
2959 	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
2960 		(void) zvol_remove_minor(zc->zc_name);
2961 	return (err);
2962 }
2963 
2964 /*
2965  * inputs:
2966  * zc_name	name of dataset to rollback (to most recent snapshot)
2967  *
2968  * outputs:	none
2969  */
2970 static int
2971 zfs_ioc_rollback(zfs_cmd_t *zc)
2972 {
2973 	dsl_dataset_t *ds, *clone;
2974 	int error;
2975 	zfsvfs_t *zfsvfs;
2976 	char *clone_name;
2977 
2978 	error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
2979 	if (error)
2980 		return (error);
2981 
2982 	/* must not be a snapshot */
2983 	if (dsl_dataset_is_snapshot(ds)) {
2984 		dsl_dataset_rele(ds, FTAG);
2985 		return (EINVAL);
2986 	}
2987 
2988 	/* must have a most recent snapshot */
2989 	if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
2990 		dsl_dataset_rele(ds, FTAG);
2991 		return (EINVAL);
2992 	}
2993 
2994 	/*
2995 	 * Create clone of most recent snapshot.
2996 	 */
2997 	clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
2998 	error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
2999 	if (error)
3000 		goto out;
3001 
3002 	error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
3003 	if (error)
3004 		goto out;
3005 
3006 	/*
3007 	 * Do clone swap.
3008 	 */
3009 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
3010 		error = zfs_suspend_fs(zfsvfs);
3011 		if (error == 0) {
3012 			int resume_err;
3013 
3014 			if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3015 				error = dsl_dataset_clone_swap(clone, ds,
3016 				    B_TRUE);
3017 				dsl_dataset_disown(ds, FTAG);
3018 				ds = NULL;
3019 			} else {
3020 				error = EBUSY;
3021 			}
3022 			resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
3023 			error = error ? error : resume_err;
3024 		}
3025 		VFS_RELE(zfsvfs->z_vfs);
3026 	} else {
3027 		if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3028 			error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
3029 			dsl_dataset_disown(ds, FTAG);
3030 			ds = NULL;
3031 		} else {
3032 			error = EBUSY;
3033 		}
3034 	}
3035 
3036 	/*
3037 	 * Destroy clone (which also closes it).
3038 	 */
3039 	(void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
3040 
3041 out:
3042 	strfree(clone_name);
3043 	if (ds)
3044 		dsl_dataset_rele(ds, FTAG);
3045 	return (error);
3046 }
3047 
3048 /*
3049  * inputs:
3050  * zc_name	old name of dataset
3051  * zc_value	new name of dataset
3052  * zc_cookie	recursive flag (only valid for snapshots)
3053  *
3054  * outputs:	none
3055  */
3056 static int
3057 zfs_ioc_rename(zfs_cmd_t *zc)
3058 {
3059 	boolean_t recursive = zc->zc_cookie & 1;
3060 
3061 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3062 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3063 	    strchr(zc->zc_value, '%'))
3064 		return (EINVAL);
3065 
3066 	/*
3067 	 * Unmount snapshot unless we're doing a recursive rename,
3068 	 * in which case the dataset code figures out which snapshots
3069 	 * to unmount.
3070 	 */
3071 	if (!recursive && strchr(zc->zc_name, '@') != NULL &&
3072 	    zc->zc_objset_type == DMU_OST_ZFS) {
3073 		int err = zfs_unmount_snap(zc->zc_name, NULL);
3074 		if (err)
3075 			return (err);
3076 	}
3077 	if (zc->zc_objset_type == DMU_OST_ZVOL)
3078 		(void) zvol_remove_minor(zc->zc_name);
3079 	return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
3080 }
3081 
3082 static int
3083 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3084 {
3085 	const char *propname = nvpair_name(pair);
3086 	boolean_t issnap = (strchr(dsname, '@') != NULL);
3087 	zfs_prop_t prop = zfs_name_to_prop(propname);
3088 	uint64_t intval;
3089 	int err;
3090 
3091 	if (prop == ZPROP_INVAL) {
3092 		if (zfs_prop_user(propname)) {
3093 			if (err = zfs_secpolicy_write_perms(dsname,
3094 			    ZFS_DELEG_PERM_USERPROP, cr))
3095 				return (err);
3096 			return (0);
3097 		}
3098 
3099 		if (!issnap && zfs_prop_userquota(propname)) {
3100 			const char *perm = NULL;
3101 			const char *uq_prefix =
3102 			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3103 			const char *gq_prefix =
3104 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3105 
3106 			if (strncmp(propname, uq_prefix,
3107 			    strlen(uq_prefix)) == 0) {
3108 				perm = ZFS_DELEG_PERM_USERQUOTA;
3109 			} else if (strncmp(propname, gq_prefix,
3110 			    strlen(gq_prefix)) == 0) {
3111 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
3112 			} else {
3113 				/* USERUSED and GROUPUSED are read-only */
3114 				return (EINVAL);
3115 			}
3116 
3117 			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3118 				return (err);
3119 			return (0);
3120 		}
3121 
3122 		return (EINVAL);
3123 	}
3124 
3125 	if (issnap)
3126 		return (EINVAL);
3127 
3128 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3129 		/*
3130 		 * dsl_prop_get_all_impl() returns properties in this
3131 		 * format.
3132 		 */
3133 		nvlist_t *attrs;
3134 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3135 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3136 		    &pair) == 0);
3137 	}
3138 
3139 	/*
3140 	 * Check that this value is valid for this pool version
3141 	 */
3142 	switch (prop) {
3143 	case ZFS_PROP_COMPRESSION:
3144 		/*
3145 		 * If the user specified gzip compression, make sure
3146 		 * the SPA supports it. We ignore any errors here since
3147 		 * we'll catch them later.
3148 		 */
3149 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3150 		    nvpair_value_uint64(pair, &intval) == 0) {
3151 			if (intval >= ZIO_COMPRESS_GZIP_1 &&
3152 			    intval <= ZIO_COMPRESS_GZIP_9 &&
3153 			    zfs_earlier_version(dsname,
3154 			    SPA_VERSION_GZIP_COMPRESSION)) {
3155 				return (ENOTSUP);
3156 			}
3157 
3158 			if (intval == ZIO_COMPRESS_ZLE &&
3159 			    zfs_earlier_version(dsname,
3160 			    SPA_VERSION_ZLE_COMPRESSION))
3161 				return (ENOTSUP);
3162 
3163 			/*
3164 			 * If this is a bootable dataset then
3165 			 * verify that the compression algorithm
3166 			 * is supported for booting. We must return
3167 			 * something other than ENOTSUP since it
3168 			 * implies a downrev pool version.
3169 			 */
3170 			if (zfs_is_bootfs(dsname) &&
3171 			    !BOOTFS_COMPRESS_VALID(intval)) {
3172 				return (ERANGE);
3173 			}
3174 		}
3175 		break;
3176 
3177 	case ZFS_PROP_COPIES:
3178 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3179 			return (ENOTSUP);
3180 		break;
3181 
3182 	case ZFS_PROP_DEDUP:
3183 		if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3184 			return (ENOTSUP);
3185 		break;
3186 
3187 	case ZFS_PROP_SHARESMB:
3188 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3189 			return (ENOTSUP);
3190 		break;
3191 
3192 	case ZFS_PROP_ACLINHERIT:
3193 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3194 		    nvpair_value_uint64(pair, &intval) == 0) {
3195 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
3196 			    zfs_earlier_version(dsname,
3197 			    SPA_VERSION_PASSTHROUGH_X))
3198 				return (ENOTSUP);
3199 		}
3200 		break;
3201 	}
3202 
3203 	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3204 }
3205 
3206 /*
3207  * Removes properties from the given props list that fail permission checks
3208  * needed to clear them and to restore them in case of a receive error. For each
3209  * property, make sure we have both set and inherit permissions.
3210  *
3211  * Returns the first error encountered if any permission checks fail. If the
3212  * caller provides a non-NULL errlist, it also gives the complete list of names
3213  * of all the properties that failed a permission check along with the
3214  * corresponding error numbers. The caller is responsible for freeing the
3215  * returned errlist.
3216  *
3217  * If every property checks out successfully, zero is returned and the list
3218  * pointed at by errlist is NULL.
3219  */
3220 static int
3221 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
3222 {
3223 	zfs_cmd_t *zc;
3224 	nvpair_t *pair, *next_pair;
3225 	nvlist_t *errors;
3226 	int err, rv = 0;
3227 
3228 	if (props == NULL)
3229 		return (0);
3230 
3231 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3232 
3233 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
3234 	(void) strcpy(zc->zc_name, dataset);
3235 	pair = nvlist_next_nvpair(props, NULL);
3236 	while (pair != NULL) {
3237 		next_pair = nvlist_next_nvpair(props, pair);
3238 
3239 		(void) strcpy(zc->zc_value, nvpair_name(pair));
3240 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
3241 		    (err = zfs_secpolicy_inherit(zc, CRED())) != 0) {
3242 			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
3243 			VERIFY(nvlist_add_int32(errors,
3244 			    zc->zc_value, err) == 0);
3245 		}
3246 		pair = next_pair;
3247 	}
3248 	kmem_free(zc, sizeof (zfs_cmd_t));
3249 
3250 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
3251 		nvlist_free(errors);
3252 		errors = NULL;
3253 	} else {
3254 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
3255 	}
3256 
3257 	if (errlist == NULL)
3258 		nvlist_free(errors);
3259 	else
3260 		*errlist = errors;
3261 
3262 	return (rv);
3263 }
3264 
3265 static boolean_t
3266 propval_equals(nvpair_t *p1, nvpair_t *p2)
3267 {
3268 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
3269 		/* dsl_prop_get_all_impl() format */
3270 		nvlist_t *attrs;
3271 		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
3272 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3273 		    &p1) == 0);
3274 	}
3275 
3276 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
3277 		nvlist_t *attrs;
3278 		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
3279 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3280 		    &p2) == 0);
3281 	}
3282 
3283 	if (nvpair_type(p1) != nvpair_type(p2))
3284 		return (B_FALSE);
3285 
3286 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
3287 		char *valstr1, *valstr2;
3288 
3289 		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
3290 		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
3291 		return (strcmp(valstr1, valstr2) == 0);
3292 	} else {
3293 		uint64_t intval1, intval2;
3294 
3295 		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
3296 		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
3297 		return (intval1 == intval2);
3298 	}
3299 }
3300 
3301 /*
3302  * Remove properties from props if they are not going to change (as determined
3303  * by comparison with origprops). Remove them from origprops as well, since we
3304  * do not need to clear or restore properties that won't change.
3305  */
3306 static void
3307 props_reduce(nvlist_t *props, nvlist_t *origprops)
3308 {
3309 	nvpair_t *pair, *next_pair;
3310 
3311 	if (origprops == NULL)
3312 		return; /* all props need to be received */
3313 
3314 	pair = nvlist_next_nvpair(props, NULL);
3315 	while (pair != NULL) {
3316 		const char *propname = nvpair_name(pair);
3317 		nvpair_t *match;
3318 
3319 		next_pair = nvlist_next_nvpair(props, pair);
3320 
3321 		if ((nvlist_lookup_nvpair(origprops, propname,
3322 		    &match) != 0) || !propval_equals(pair, match))
3323 			goto next; /* need to set received value */
3324 
3325 		/* don't clear the existing received value */
3326 		(void) nvlist_remove_nvpair(origprops, match);
3327 		/* don't bother receiving the property */
3328 		(void) nvlist_remove_nvpair(props, pair);
3329 next:
3330 		pair = next_pair;
3331 	}
3332 }
3333 
3334 #ifdef	DEBUG
3335 static boolean_t zfs_ioc_recv_inject_err;
3336 #endif
3337 
3338 /*
3339  * inputs:
3340  * zc_name		name of containing filesystem
3341  * zc_nvlist_src{_size}	nvlist of properties to apply
3342  * zc_value		name of snapshot to create
3343  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
3344  * zc_cookie		file descriptor to recv from
3345  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
3346  * zc_guid		force flag
3347  * zc_cleanup_fd	cleanup-on-exit file descriptor
3348  * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
3349  *
3350  * outputs:
3351  * zc_cookie		number of bytes read
3352  * zc_nvlist_dst{_size} error for each unapplied received property
3353  * zc_obj		zprop_errflags_t
3354  * zc_action_handle	handle for this guid/ds mapping
3355  */
3356 static int
3357 zfs_ioc_recv(zfs_cmd_t *zc)
3358 {
3359 	file_t *fp;
3360 	objset_t *os;
3361 	dmu_recv_cookie_t drc;
3362 	boolean_t force = (boolean_t)zc->zc_guid;
3363 	int fd;
3364 	int error = 0;
3365 	int props_error = 0;
3366 	nvlist_t *errors;
3367 	offset_t off;
3368 	nvlist_t *props = NULL; /* sent properties */
3369 	nvlist_t *origprops = NULL; /* existing properties */
3370 	objset_t *origin = NULL;
3371 	char *tosnap;
3372 	char tofs[ZFS_MAXNAMELEN];
3373 	boolean_t first_recvd_props = B_FALSE;
3374 
3375 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3376 	    strchr(zc->zc_value, '@') == NULL ||
3377 	    strchr(zc->zc_value, '%'))
3378 		return (EINVAL);
3379 
3380 	(void) strcpy(tofs, zc->zc_value);
3381 	tosnap = strchr(tofs, '@');
3382 	*tosnap++ = '\0';
3383 
3384 	if (zc->zc_nvlist_src != NULL &&
3385 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3386 	    zc->zc_iflags, &props)) != 0)
3387 		return (error);
3388 
3389 	fd = zc->zc_cookie;
3390 	fp = getf(fd);
3391 	if (fp == NULL) {
3392 		nvlist_free(props);
3393 		return (EBADF);
3394 	}
3395 
3396 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3397 
3398 	if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
3399 		if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) &&
3400 		    !dsl_prop_get_hasrecvd(os)) {
3401 			first_recvd_props = B_TRUE;
3402 		}
3403 
3404 		/*
3405 		 * If new received properties are supplied, they are to
3406 		 * completely replace the existing received properties, so stash
3407 		 * away the existing ones.
3408 		 */
3409 		if (dsl_prop_get_received(os, &origprops) == 0) {
3410 			nvlist_t *errlist = NULL;
3411 			/*
3412 			 * Don't bother writing a property if its value won't
3413 			 * change (and avoid the unnecessary security checks).
3414 			 *
3415 			 * The first receive after SPA_VERSION_RECVD_PROPS is a
3416 			 * special case where we blow away all local properties
3417 			 * regardless.
3418 			 */
3419 			if (!first_recvd_props)
3420 				props_reduce(props, origprops);
3421 			if (zfs_check_clearable(tofs, origprops,
3422 			    &errlist) != 0)
3423 				(void) nvlist_merge(errors, errlist, 0);
3424 			nvlist_free(errlist);
3425 		}
3426 
3427 		dmu_objset_rele(os, FTAG);
3428 	}
3429 
3430 	if (zc->zc_string[0]) {
3431 		error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
3432 		if (error)
3433 			goto out;
3434 	}
3435 
3436 	error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds,
3437 	    &zc->zc_begin_record, force, origin, &drc);
3438 	if (origin)
3439 		dmu_objset_rele(origin, FTAG);
3440 	if (error)
3441 		goto out;
3442 
3443 	/*
3444 	 * Set properties before we receive the stream so that they are applied
3445 	 * to the new data. Note that we must call dmu_recv_stream() if
3446 	 * dmu_recv_begin() succeeds.
3447 	 */
3448 	if (props) {
3449 		nvlist_t *errlist;
3450 
3451 		if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {
3452 			if (drc.drc_newfs) {
3453 				if (spa_version(os->os_spa) >=
3454 				    SPA_VERSION_RECVD_PROPS)
3455 					first_recvd_props = B_TRUE;
3456 			} else if (origprops != NULL) {
3457 				if (clear_received_props(os, tofs, origprops,
3458 				    first_recvd_props ? NULL : props) != 0)
3459 					zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3460 			} else {
3461 				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3462 			}
3463 			dsl_prop_set_hasrecvd(os);
3464 		} else if (!drc.drc_newfs) {
3465 			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3466 		}
3467 
3468 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
3469 		    props, &errlist);
3470 		(void) nvlist_merge(errors, errlist, 0);
3471 		nvlist_free(errlist);
3472 	}
3473 
3474 	if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) {
3475 		/*
3476 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
3477 		 * size or supplied an invalid address.
3478 		 */
3479 		props_error = EINVAL;
3480 	}
3481 
3482 	off = fp->f_offset;
3483 	error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
3484 	    &zc->zc_action_handle);
3485 
3486 	if (error == 0) {
3487 		zfsvfs_t *zfsvfs = NULL;
3488 
3489 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
3490 			/* online recv */
3491 			int end_err;
3492 
3493 			error = zfs_suspend_fs(zfsvfs);
3494 			/*
3495 			 * If the suspend fails, then the recv_end will
3496 			 * likely also fail, and clean up after itself.
3497 			 */
3498 			end_err = dmu_recv_end(&drc);
3499 			if (error == 0)
3500 				error = zfs_resume_fs(zfsvfs, tofs);
3501 			error = error ? error : end_err;
3502 			VFS_RELE(zfsvfs->z_vfs);
3503 		} else {
3504 			error = dmu_recv_end(&drc);
3505 		}
3506 	}
3507 
3508 	zc->zc_cookie = off - fp->f_offset;
3509 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3510 		fp->f_offset = off;
3511 
3512 #ifdef	DEBUG
3513 	if (zfs_ioc_recv_inject_err) {
3514 		zfs_ioc_recv_inject_err = B_FALSE;
3515 		error = 1;
3516 	}
3517 #endif
3518 	/*
3519 	 * On error, restore the original props.
3520 	 */
3521 	if (error && props) {
3522 		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
3523 			if (clear_received_props(os, tofs, props, NULL) != 0) {
3524 				/*
3525 				 * We failed to clear the received properties.
3526 				 * Since we may have left a $recvd value on the
3527 				 * system, we can't clear the $hasrecvd flag.
3528 				 */
3529 				zc->zc_obj |= ZPROP_ERR_NORESTORE;
3530 			} else if (first_recvd_props) {
3531 				dsl_prop_unset_hasrecvd(os);
3532 			}
3533 			dmu_objset_rele(os, FTAG);
3534 		} else if (!drc.drc_newfs) {
3535 			/* We failed to clear the received properties. */
3536 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3537 		}
3538 
3539 		if (origprops == NULL && !drc.drc_newfs) {
3540 			/* We failed to stash the original properties. */
3541 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3542 		}
3543 
3544 		/*
3545 		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
3546 		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
3547 		 * explictly if we're restoring local properties cleared in the
3548 		 * first new-style receive.
3549 		 */
3550 		if (origprops != NULL &&
3551 		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
3552 		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
3553 		    origprops, NULL) != 0) {
3554 			/*
3555 			 * We stashed the original properties but failed to
3556 			 * restore them.
3557 			 */
3558 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3559 		}
3560 	}
3561 out:
3562 	nvlist_free(props);
3563 	nvlist_free(origprops);
3564 	nvlist_free(errors);
3565 	releasef(fd);
3566 
3567 	if (error == 0)
3568 		error = props_error;
3569 
3570 	return (error);
3571 }
3572 
3573 /*
3574  * inputs:
3575  * zc_name	name of snapshot to send
3576  * zc_value	short name of incremental fromsnap (may be empty)
3577  * zc_cookie	file descriptor to send stream to
3578  * zc_obj	fromorigin flag (mutually exclusive with zc_value)
3579  *
3580  * outputs: none
3581  */
3582 static int
3583 zfs_ioc_send(zfs_cmd_t *zc)
3584 {
3585 	objset_t *fromsnap = NULL;
3586 	objset_t *tosnap;
3587 	file_t *fp;
3588 	int error;
3589 	offset_t off;
3590 
3591 	error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
3592 	if (error)
3593 		return (error);
3594 
3595 	if (zc->zc_value[0] != '\0') {
3596 		char *buf;
3597 		char *cp;
3598 
3599 		buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3600 		(void) strncpy(buf, zc->zc_name, MAXPATHLEN);
3601 		cp = strchr(buf, '@');
3602 		if (cp)
3603 			*(cp+1) = 0;
3604 		(void) strncat(buf, zc->zc_value, MAXPATHLEN);
3605 		error = dmu_objset_hold(buf, FTAG, &fromsnap);
3606 		kmem_free(buf, MAXPATHLEN);
3607 		if (error) {
3608 			dmu_objset_rele(tosnap, FTAG);
3609 			return (error);
3610 		}
3611 	}
3612 
3613 	fp = getf(zc->zc_cookie);
3614 	if (fp == NULL) {
3615 		dmu_objset_rele(tosnap, FTAG);
3616 		if (fromsnap)
3617 			dmu_objset_rele(fromsnap, FTAG);
3618 		return (EBADF);
3619 	}
3620 
3621 	off = fp->f_offset;
3622 	error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off);
3623 
3624 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3625 		fp->f_offset = off;
3626 	releasef(zc->zc_cookie);
3627 	if (fromsnap)
3628 		dmu_objset_rele(fromsnap, FTAG);
3629 	dmu_objset_rele(tosnap, FTAG);
3630 	return (error);
3631 }
3632 
3633 static int
3634 zfs_ioc_inject_fault(zfs_cmd_t *zc)
3635 {
3636 	int id, error;
3637 
3638 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
3639 	    &zc->zc_inject_record);
3640 
3641 	if (error == 0)
3642 		zc->zc_guid = (uint64_t)id;
3643 
3644 	return (error);
3645 }
3646 
3647 static int
3648 zfs_ioc_clear_fault(zfs_cmd_t *zc)
3649 {
3650 	return (zio_clear_fault((int)zc->zc_guid));
3651 }
3652 
3653 static int
3654 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
3655 {
3656 	int id = (int)zc->zc_guid;
3657 	int error;
3658 
3659 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
3660 	    &zc->zc_inject_record);
3661 
3662 	zc->zc_guid = id;
3663 
3664 	return (error);
3665 }
3666 
3667 static int
3668 zfs_ioc_error_log(zfs_cmd_t *zc)
3669 {
3670 	spa_t *spa;
3671 	int error;
3672 	size_t count = (size_t)zc->zc_nvlist_dst_size;
3673 
3674 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
3675 		return (error);
3676 
3677 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
3678 	    &count);
3679 	if (error == 0)
3680 		zc->zc_nvlist_dst_size = count;
3681 	else
3682 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
3683 
3684 	spa_close(spa, FTAG);
3685 
3686 	return (error);
3687 }
3688 
3689 static int
3690 zfs_ioc_clear(zfs_cmd_t *zc)
3691 {
3692 	spa_t *spa;
3693 	vdev_t *vd;
3694 	int error;
3695 
3696 	/*
3697 	 * On zpool clear we also fix up missing slogs
3698 	 */
3699 	mutex_enter(&spa_namespace_lock);
3700 	spa = spa_lookup(zc->zc_name);
3701 	if (spa == NULL) {
3702 		mutex_exit(&spa_namespace_lock);
3703 		return (EIO);
3704 	}
3705 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
3706 		/* we need to let spa_open/spa_load clear the chains */
3707 		spa_set_log_state(spa, SPA_LOG_CLEAR);
3708 	}
3709 	spa->spa_last_open_failed = 0;
3710 	mutex_exit(&spa_namespace_lock);
3711 
3712 	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
3713 		error = spa_open(zc->zc_name, &spa, FTAG);
3714 	} else {
3715 		nvlist_t *policy;
3716 		nvlist_t *config = NULL;
3717 
3718 		if (zc->zc_nvlist_src == NULL)
3719 			return (EINVAL);
3720 
3721 		if ((error = get_nvlist(zc->zc_nvlist_src,
3722 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
3723 			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
3724 			    policy, &config);
3725 			if (config != NULL) {
3726 				(void) put_nvlist(zc, config);
3727 				nvlist_free(config);
3728 			}
3729 			nvlist_free(policy);
3730 		}
3731 	}
3732 
3733 	if (error)
3734 		return (error);
3735 
3736 	spa_vdev_state_enter(spa, SCL_NONE);
3737 
3738 	if (zc->zc_guid == 0) {
3739 		vd = NULL;
3740 	} else {
3741 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
3742 		if (vd == NULL) {
3743 			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
3744 			spa_close(spa, FTAG);
3745 			return (ENODEV);
3746 		}
3747 	}
3748 
3749 	vdev_clear(spa, vd);
3750 
3751 	(void) spa_vdev_state_exit(spa, NULL, 0);
3752 
3753 	/*
3754 	 * Resume any suspended I/Os.
3755 	 */
3756 	if (zio_resume(spa) != 0)
3757 		error = EIO;
3758 
3759 	spa_close(spa, FTAG);
3760 
3761 	return (error);
3762 }
3763 
3764 /*
3765  * inputs:
3766  * zc_name	name of filesystem
3767  * zc_value	name of origin snapshot
3768  *
3769  * outputs:
3770  * zc_string	name of conflicting snapshot, if there is one
3771  */
3772 static int
3773 zfs_ioc_promote(zfs_cmd_t *zc)
3774 {
3775 	char *cp;
3776 
3777 	/*
3778 	 * We don't need to unmount *all* the origin fs's snapshots, but
3779 	 * it's easier.
3780 	 */
3781 	cp = strchr(zc->zc_value, '@');
3782 	if (cp)
3783 		*cp = '\0';
3784 	(void) dmu_objset_find(zc->zc_value,
3785 	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
3786 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
3787 }
3788 
3789 /*
3790  * Retrieve a single {user|group}{used|quota}@... property.
3791  *
3792  * inputs:
3793  * zc_name	name of filesystem
3794  * zc_objset_type zfs_userquota_prop_t
3795  * zc_value	domain name (eg. "S-1-234-567-89")
3796  * zc_guid	RID/UID/GID
3797  *
3798  * outputs:
3799  * zc_cookie	property value
3800  */
3801 static int
3802 zfs_ioc_userspace_one(zfs_cmd_t *zc)
3803 {
3804 	zfsvfs_t *zfsvfs;
3805 	int error;
3806 
3807 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
3808 		return (EINVAL);
3809 
3810 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
3811 	if (error)
3812 		return (error);
3813 
3814 	error = zfs_userspace_one(zfsvfs,
3815 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
3816 	zfsvfs_rele(zfsvfs, FTAG);
3817 
3818 	return (error);
3819 }
3820 
3821 /*
3822  * inputs:
3823  * zc_name		name of filesystem
3824  * zc_cookie		zap cursor
3825  * zc_objset_type	zfs_userquota_prop_t
3826  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
3827  *
3828  * outputs:
3829  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
3830  * zc_cookie	zap cursor
3831  */
3832 static int
3833 zfs_ioc_userspace_many(zfs_cmd_t *zc)
3834 {
3835 	zfsvfs_t *zfsvfs;
3836 	int bufsize = zc->zc_nvlist_dst_size;
3837 
3838 	if (bufsize <= 0)
3839 		return (ENOMEM);
3840 
3841 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
3842 	if (error)
3843 		return (error);
3844 
3845 	void *buf = kmem_alloc(bufsize, KM_SLEEP);
3846 
3847 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
3848 	    buf, &zc->zc_nvlist_dst_size);
3849 
3850 	if (error == 0) {
3851 		error = xcopyout(buf,
3852 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
3853 		    zc->zc_nvlist_dst_size);
3854 	}
3855 	kmem_free(buf, bufsize);
3856 	zfsvfs_rele(zfsvfs, FTAG);
3857 
3858 	return (error);
3859 }
3860 
3861 /*
3862  * inputs:
3863  * zc_name		name of filesystem
3864  *
3865  * outputs:
3866  * none
3867  */
3868 static int
3869 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
3870 {
3871 	objset_t *os;
3872 	int error = 0;
3873 	zfsvfs_t *zfsvfs;
3874 
3875 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
3876 		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
3877 			/*
3878 			 * If userused is not enabled, it may be because the
3879 			 * objset needs to be closed & reopened (to grow the
3880 			 * objset_phys_t).  Suspend/resume the fs will do that.
3881 			 */
3882 			error = zfs_suspend_fs(zfsvfs);
3883 			if (error == 0)
3884 				error = zfs_resume_fs(zfsvfs, zc->zc_name);
3885 		}
3886 		if (error == 0)
3887 			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
3888 		VFS_RELE(zfsvfs->z_vfs);
3889 	} else {
3890 		/* XXX kind of reading contents without owning */
3891 		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
3892 		if (error)
3893 			return (error);
3894 
3895 		error = dmu_objset_userspace_upgrade(os);
3896 		dmu_objset_rele(os, FTAG);
3897 	}
3898 
3899 	return (error);
3900 }
3901 
3902 /*
3903  * We don't want to have a hard dependency
3904  * against some special symbols in sharefs
3905  * nfs, and smbsrv.  Determine them if needed when
3906  * the first file system is shared.
3907  * Neither sharefs, nfs or smbsrv are unloadable modules.
3908  */
3909 int (*znfsexport_fs)(void *arg);
3910 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
3911 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
3912 
3913 int zfs_nfsshare_inited;
3914 int zfs_smbshare_inited;
3915 
3916 ddi_modhandle_t nfs_mod;
3917 ddi_modhandle_t sharefs_mod;
3918 ddi_modhandle_t smbsrv_mod;
3919 kmutex_t zfs_share_lock;
3920 
3921 static int
3922 zfs_init_sharefs()
3923 {
3924 	int error;
3925 
3926 	ASSERT(MUTEX_HELD(&zfs_share_lock));
3927 	/* Both NFS and SMB shares also require sharetab support. */
3928 	if (sharefs_mod == NULL && ((sharefs_mod =
3929 	    ddi_modopen("fs/sharefs",
3930 	    KRTLD_MODE_FIRST, &error)) == NULL)) {
3931 		return (ENOSYS);
3932 	}
3933 	if (zshare_fs == NULL && ((zshare_fs =
3934 	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
3935 	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
3936 		return (ENOSYS);
3937 	}
3938 	return (0);
3939 }
3940 
3941 static int
3942 zfs_ioc_share(zfs_cmd_t *zc)
3943 {
3944 	int error;
3945 	int opcode;
3946 
3947 	switch (zc->zc_share.z_sharetype) {
3948 	case ZFS_SHARE_NFS:
3949 	case ZFS_UNSHARE_NFS:
3950 		if (zfs_nfsshare_inited == 0) {
3951 			mutex_enter(&zfs_share_lock);
3952 			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
3953 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
3954 				mutex_exit(&zfs_share_lock);
3955 				return (ENOSYS);
3956 			}
3957 			if (znfsexport_fs == NULL &&
3958 			    ((znfsexport_fs = (int (*)(void *))
3959 			    ddi_modsym(nfs_mod,
3960 			    "nfs_export", &error)) == NULL)) {
3961 				mutex_exit(&zfs_share_lock);
3962 				return (ENOSYS);
3963 			}
3964 			error = zfs_init_sharefs();
3965 			if (error) {
3966 				mutex_exit(&zfs_share_lock);
3967 				return (ENOSYS);
3968 			}
3969 			zfs_nfsshare_inited = 1;
3970 			mutex_exit(&zfs_share_lock);
3971 		}
3972 		break;
3973 	case ZFS_SHARE_SMB:
3974 	case ZFS_UNSHARE_SMB:
3975 		if (zfs_smbshare_inited == 0) {
3976 			mutex_enter(&zfs_share_lock);
3977 			if (smbsrv_mod == NULL && ((smbsrv_mod =
3978 			    ddi_modopen("drv/smbsrv",
3979 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
3980 				mutex_exit(&zfs_share_lock);
3981 				return (ENOSYS);
3982 			}
3983 			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
3984 			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
3985 			    "smb_server_share", &error)) == NULL)) {
3986 				mutex_exit(&zfs_share_lock);
3987 				return (ENOSYS);
3988 			}
3989 			error = zfs_init_sharefs();
3990 			if (error) {
3991 				mutex_exit(&zfs_share_lock);
3992 				return (ENOSYS);
3993 			}
3994 			zfs_smbshare_inited = 1;
3995 			mutex_exit(&zfs_share_lock);
3996 		}
3997 		break;
3998 	default:
3999 		return (EINVAL);
4000 	}
4001 
4002 	switch (zc->zc_share.z_sharetype) {
4003 	case ZFS_SHARE_NFS:
4004 	case ZFS_UNSHARE_NFS:
4005 		if (error =
4006 		    znfsexport_fs((void *)
4007 		    (uintptr_t)zc->zc_share.z_exportdata))
4008 			return (error);
4009 		break;
4010 	case ZFS_SHARE_SMB:
4011 	case ZFS_UNSHARE_SMB:
4012 		if (error = zsmbexport_fs((void *)
4013 		    (uintptr_t)zc->zc_share.z_exportdata,
4014 		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
4015 		    B_TRUE: B_FALSE)) {
4016 			return (error);
4017 		}
4018 		break;
4019 	}
4020 
4021 	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
4022 	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
4023 	    SHAREFS_ADD : SHAREFS_REMOVE;
4024 
4025 	/*
4026 	 * Add or remove share from sharetab
4027 	 */
4028 	error = zshare_fs(opcode,
4029 	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
4030 	    zc->zc_share.z_sharemax);
4031 
4032 	return (error);
4033 
4034 }
4035 
4036 ace_t full_access[] = {
4037 	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
4038 };
4039 
4040 /*
4041  * Remove all ACL files in shares dir
4042  */
4043 static int
4044 zfs_smb_acl_purge(znode_t *dzp)
4045 {
4046 	zap_cursor_t	zc;
4047 	zap_attribute_t	zap;
4048 	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
4049 	int error;
4050 
4051 	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
4052 	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
4053 	    zap_cursor_advance(&zc)) {
4054 		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
4055 		    NULL, 0)) != 0)
4056 			break;
4057 	}
4058 	zap_cursor_fini(&zc);
4059 	return (error);
4060 }
4061 
4062 static int
4063 zfs_ioc_smb_acl(zfs_cmd_t *zc)
4064 {
4065 	vnode_t *vp;
4066 	znode_t *dzp;
4067 	vnode_t *resourcevp = NULL;
4068 	znode_t *sharedir;
4069 	zfsvfs_t *zfsvfs;
4070 	nvlist_t *nvlist;
4071 	char *src, *target;
4072 	vattr_t vattr;
4073 	vsecattr_t vsec;
4074 	int error = 0;
4075 
4076 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
4077 	    NO_FOLLOW, NULL, &vp)) != 0)
4078 		return (error);
4079 
4080 	/* Now make sure mntpnt and dataset are ZFS */
4081 
4082 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
4083 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
4084 	    zc->zc_name) != 0)) {
4085 		VN_RELE(vp);
4086 		return (EINVAL);
4087 	}
4088 
4089 	dzp = VTOZ(vp);
4090 	zfsvfs = dzp->z_zfsvfs;
4091 	ZFS_ENTER(zfsvfs);
4092 
4093 	/*
4094 	 * Create share dir if its missing.
4095 	 */
4096 	mutex_enter(&zfsvfs->z_lock);
4097 	if (zfsvfs->z_shares_dir == 0) {
4098 		dmu_tx_t *tx;
4099 
4100 		tx = dmu_tx_create(zfsvfs->z_os);
4101 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
4102 		    ZFS_SHARES_DIR);
4103 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
4104 		error = dmu_tx_assign(tx, TXG_WAIT);
4105 		if (error) {
4106 			dmu_tx_abort(tx);
4107 		} else {
4108 			error = zfs_create_share_dir(zfsvfs, tx);
4109 			dmu_tx_commit(tx);
4110 		}
4111 		if (error) {
4112 			mutex_exit(&zfsvfs->z_lock);
4113 			VN_RELE(vp);
4114 			ZFS_EXIT(zfsvfs);
4115 			return (error);
4116 		}
4117 	}
4118 	mutex_exit(&zfsvfs->z_lock);
4119 
4120 	ASSERT(zfsvfs->z_shares_dir);
4121 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
4122 		VN_RELE(vp);
4123 		ZFS_EXIT(zfsvfs);
4124 		return (error);
4125 	}
4126 
4127 	switch (zc->zc_cookie) {
4128 	case ZFS_SMB_ACL_ADD:
4129 		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
4130 		vattr.va_type = VREG;
4131 		vattr.va_mode = S_IFREG|0777;
4132 		vattr.va_uid = 0;
4133 		vattr.va_gid = 0;
4134 
4135 		vsec.vsa_mask = VSA_ACE;
4136 		vsec.vsa_aclentp = &full_access;
4137 		vsec.vsa_aclentsz = sizeof (full_access);
4138 		vsec.vsa_aclcnt = 1;
4139 
4140 		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
4141 		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
4142 		if (resourcevp)
4143 			VN_RELE(resourcevp);
4144 		break;
4145 
4146 	case ZFS_SMB_ACL_REMOVE:
4147 		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
4148 		    NULL, 0);
4149 		break;
4150 
4151 	case ZFS_SMB_ACL_RENAME:
4152 		if ((error = get_nvlist(zc->zc_nvlist_src,
4153 		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
4154 			VN_RELE(vp);
4155 			ZFS_EXIT(zfsvfs);
4156 			return (error);
4157 		}
4158 		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
4159 		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
4160 		    &target)) {
4161 			VN_RELE(vp);
4162 			VN_RELE(ZTOV(sharedir));
4163 			ZFS_EXIT(zfsvfs);
4164 			nvlist_free(nvlist);
4165 			return (error);
4166 		}
4167 		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
4168 		    kcred, NULL, 0);
4169 		nvlist_free(nvlist);
4170 		break;
4171 
4172 	case ZFS_SMB_ACL_PURGE:
4173 		error = zfs_smb_acl_purge(sharedir);
4174 		break;
4175 
4176 	default:
4177 		error = EINVAL;
4178 		break;
4179 	}
4180 
4181 	VN_RELE(vp);
4182 	VN_RELE(ZTOV(sharedir));
4183 
4184 	ZFS_EXIT(zfsvfs);
4185 
4186 	return (error);
4187 }
4188 
4189 /*
4190  * inputs:
4191  * zc_name		name of filesystem
4192  * zc_value		short name of snap
4193  * zc_string		user-supplied tag for this hold
4194  * zc_cookie		recursive flag
4195  * zc_temphold		set if hold is temporary
4196  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
4197  *
4198  * outputs:		none
4199  */
4200 static int
4201 zfs_ioc_hold(zfs_cmd_t *zc)
4202 {
4203 	boolean_t recursive = zc->zc_cookie;
4204 
4205 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4206 		return (EINVAL);
4207 
4208 	return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
4209 	    zc->zc_string, recursive, zc->zc_temphold, zc->zc_cleanup_fd));
4210 }
4211 
4212 /*
4213  * inputs:
4214  * zc_name	name of dataset from which we're releasing a user hold
4215  * zc_value	short name of snap
4216  * zc_string	user-supplied tag for this hold
4217  * zc_cookie	recursive flag
4218  *
4219  * outputs:	none
4220  */
4221 static int
4222 zfs_ioc_release(zfs_cmd_t *zc)
4223 {
4224 	boolean_t recursive = zc->zc_cookie;
4225 
4226 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4227 		return (EINVAL);
4228 
4229 	return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
4230 	    zc->zc_string, recursive));
4231 }
4232 
4233 /*
4234  * inputs:
4235  * zc_name		name of filesystem
4236  *
4237  * outputs:
4238  * zc_nvlist_src{_size}	nvlist of snapshot holds
4239  */
4240 static int
4241 zfs_ioc_get_holds(zfs_cmd_t *zc)
4242 {
4243 	nvlist_t *nvp;
4244 	int error;
4245 
4246 	if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
4247 		error = put_nvlist(zc, nvp);
4248 		nvlist_free(nvp);
4249 	}
4250 
4251 	return (error);
4252 }
4253 
4254 /*
4255  * pool create, destroy, and export don't log the history as part of
4256  * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
4257  * do the logging of those commands.
4258  */
4259 static zfs_ioc_vec_t zfs_ioc_vec[] = {
4260 	{ zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4261 	    B_FALSE },
4262 	{ zfs_ioc_pool_destroy,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4263 	    B_FALSE },
4264 	{ zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4265 	    B_FALSE },
4266 	{ zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4267 	    B_FALSE },
4268 	{ zfs_ioc_pool_configs,	zfs_secpolicy_none, NO_NAME, B_FALSE,
4269 	    B_FALSE },
4270 	{ zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4271 	    B_FALSE },
4272 	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
4273 	    B_FALSE },
4274 	{ zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4275 	    B_TRUE },
4276 	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
4277 	    B_FALSE },
4278 	{ zfs_ioc_pool_upgrade,	zfs_secpolicy_config, POOL_NAME, B_TRUE,
4279 	    B_TRUE },
4280 	{ zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4281 	    B_FALSE },
4282 	{ zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4283 	    B_TRUE },
4284 	{ zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4285 	    B_TRUE },
4286 	{ zfs_ioc_vdev_set_state, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
4287 	    B_FALSE },
4288 	{ zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4289 	    B_TRUE },
4290 	{ zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4291 	    B_TRUE },
4292 	{ zfs_ioc_vdev_setpath,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4293 	    B_TRUE },
4294 	{ zfs_ioc_vdev_setfru,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4295 	    B_TRUE },
4296 	{ zfs_ioc_objset_stats,	zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4297 	    B_TRUE },
4298 	{ zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4299 	    B_FALSE },
4300 	{ zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4301 	    B_TRUE },
4302 	{ zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4303 	    B_TRUE },
4304 	{ zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE, B_TRUE },
4305 	{ zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE, B_TRUE },
4306 	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE,
4307 	    B_TRUE},
4308 	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE,
4309 	    B_TRUE },
4310 	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE, B_TRUE },
4311 	{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, B_TRUE },
4312 	{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE, B_FALSE },
4313 	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE,
4314 	    B_FALSE },
4315 	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4316 	    B_FALSE },
4317 	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4318 	    B_FALSE },
4319 	{ zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE,
4320 	    B_FALSE },
4321 	{ zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, B_FALSE },
4322 	{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
4323 	    B_TRUE },
4324 	{ zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, DATASET_NAME,
4325 	    B_TRUE, B_TRUE },
4326 	{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
4327 	    B_TRUE },
4328 	{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4329 	    B_FALSE },
4330 	{ zfs_ioc_obj_to_path, zfs_secpolicy_config, DATASET_NAME, B_FALSE,
4331 	    B_TRUE },
4332 	{ zfs_ioc_pool_set_props, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
4333 	    B_TRUE },
4334 	{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4335 	    B_FALSE },
4336 	{ zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE,
4337 	    B_TRUE },
4338 	{ zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4339 	    B_FALSE },
4340 	{ zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE, B_FALSE },
4341 	{ zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
4342 	    B_TRUE },
4343 	{ zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
4344 	    B_FALSE },
4345 	{ zfs_ioc_userspace_one, zfs_secpolicy_userspace_one,
4346 	    DATASET_NAME, B_FALSE, B_FALSE },
4347 	{ zfs_ioc_userspace_many, zfs_secpolicy_userspace_many,
4348 	    DATASET_NAME, B_FALSE, B_FALSE },
4349 	{ zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
4350 	    DATASET_NAME, B_FALSE, B_TRUE },
4351 	{ zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, B_TRUE },
4352 	{ zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
4353 	    B_TRUE },
4354 	{ zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4355 	    B_TRUE },
4356 	{ zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4357 	    B_FALSE },
4358 	{ zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4359 	    B_TRUE }
4360 };
4361 
4362 int
4363 pool_status_check(const char *name, zfs_ioc_namecheck_t type)
4364 {
4365 	spa_t *spa;
4366 	int error;
4367 
4368 	ASSERT(type == POOL_NAME || type == DATASET_NAME);
4369 
4370 	error = spa_open(name, &spa, FTAG);
4371 	if (error == 0) {
4372 		if (spa_suspended(spa))
4373 			error = EAGAIN;
4374 		spa_close(spa, FTAG);
4375 	}
4376 	return (error);
4377 }
4378 
4379 /*
4380  * Find a free minor number.
4381  */
4382 minor_t
4383 zfsdev_minor_alloc(void)
4384 {
4385 	static minor_t last_minor;
4386 	minor_t m;
4387 
4388 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
4389 
4390 	for (m = last_minor + 1; m != last_minor; m++) {
4391 		if (m > ZFSDEV_MAX_MINOR)
4392 			m = 1;
4393 		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
4394 			last_minor = m;
4395 			return (m);
4396 		}
4397 	}
4398 
4399 	return (0);
4400 }
4401 
4402 static int
4403 zfs_ctldev_init(dev_t *devp)
4404 {
4405 	minor_t minor;
4406 	zfs_soft_state_t *zs;
4407 
4408 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
4409 	ASSERT(getminor(*devp) == 0);
4410 
4411 	minor = zfsdev_minor_alloc();
4412 	if (minor == 0)
4413 		return (ENXIO);
4414 
4415 	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
4416 		return (EAGAIN);
4417 
4418 	*devp = makedevice(getemajor(*devp), minor);
4419 
4420 	zs = ddi_get_soft_state(zfsdev_state, minor);
4421 	zs->zss_type = ZSST_CTLDEV;
4422 	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
4423 
4424 	return (0);
4425 }
4426 
4427 static void
4428 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
4429 {
4430 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
4431 
4432 	zfs_onexit_destroy(zo);
4433 	ddi_soft_state_free(zfsdev_state, minor);
4434 }
4435 
4436 void *
4437 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
4438 {
4439 	zfs_soft_state_t *zp;
4440 
4441 	zp = ddi_get_soft_state(zfsdev_state, minor);
4442 	if (zp == NULL || zp->zss_type != which)
4443 		return (NULL);
4444 
4445 	return (zp->zss_data);
4446 }
4447 
4448 static int
4449 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
4450 {
4451 	int error = 0;
4452 
4453 	if (getminor(*devp) != 0)
4454 		return (zvol_open(devp, flag, otyp, cr));
4455 
4456 	/* This is the control device. Allocate a new minor if requested. */
4457 	if (flag & FEXCL) {
4458 		mutex_enter(&zfsdev_state_lock);
4459 		error = zfs_ctldev_init(devp);
4460 		mutex_exit(&zfsdev_state_lock);
4461 	}
4462 
4463 	return (error);
4464 }
4465 
4466 static int
4467 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
4468 {
4469 	zfs_onexit_t *zo;
4470 	minor_t minor = getminor(dev);
4471 
4472 	if (minor == 0)
4473 		return (0);
4474 
4475 	mutex_enter(&zfsdev_state_lock);
4476 	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
4477 	if (zo == NULL) {
4478 		mutex_exit(&zfsdev_state_lock);
4479 		return (zvol_close(dev, flag, otyp, cr));
4480 	}
4481 	zfs_ctldev_destroy(zo, minor);
4482 	mutex_exit(&zfsdev_state_lock);
4483 
4484 	return (0);
4485 }
4486 
4487 static int
4488 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
4489 {
4490 	zfs_cmd_t *zc;
4491 	uint_t vec;
4492 	int error, rc;
4493 	minor_t minor = getminor(dev);
4494 
4495 	if (minor != 0 &&
4496 	    zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
4497 		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
4498 
4499 	vec = cmd - ZFS_IOC;
4500 	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
4501 
4502 	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
4503 		return (EINVAL);
4504 
4505 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
4506 
4507 	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
4508 	if (error != 0)
4509 		error = EFAULT;
4510 
4511 	if ((error == 0) && !(flag & FKIOCTL))
4512 		error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr);
4513 
4514 	/*
4515 	 * Ensure that all pool/dataset names are valid before we pass down to
4516 	 * the lower layers.
4517 	 */
4518 	if (error == 0) {
4519 		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4520 		zc->zc_iflags = flag & FKIOCTL;
4521 		switch (zfs_ioc_vec[vec].zvec_namecheck) {
4522 		case POOL_NAME:
4523 			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
4524 				error = EINVAL;
4525 			if (zfs_ioc_vec[vec].zvec_pool_check)
4526 				error = pool_status_check(zc->zc_name,
4527 				    zfs_ioc_vec[vec].zvec_namecheck);
4528 			break;
4529 
4530 		case DATASET_NAME:
4531 			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
4532 				error = EINVAL;
4533 			if (zfs_ioc_vec[vec].zvec_pool_check)
4534 				error = pool_status_check(zc->zc_name,
4535 				    zfs_ioc_vec[vec].zvec_namecheck);
4536 			break;
4537 
4538 		case NO_NAME:
4539 			break;
4540 		}
4541 	}
4542 
4543 	if (error == 0)
4544 		error = zfs_ioc_vec[vec].zvec_func(zc);
4545 
4546 	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
4547 	if (error == 0) {
4548 		if (rc != 0)
4549 			error = EFAULT;
4550 		if (zfs_ioc_vec[vec].zvec_his_log)
4551 			zfs_log_history(zc);
4552 	}
4553 
4554 	kmem_free(zc, sizeof (zfs_cmd_t));
4555 	return (error);
4556 }
4557 
4558 static int
4559 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4560 {
4561 	if (cmd != DDI_ATTACH)
4562 		return (DDI_FAILURE);
4563 
4564 	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
4565 	    DDI_PSEUDO, 0) == DDI_FAILURE)
4566 		return (DDI_FAILURE);
4567 
4568 	zfs_dip = dip;
4569 
4570 	ddi_report_dev(dip);
4571 
4572 	return (DDI_SUCCESS);
4573 }
4574 
4575 static int
4576 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4577 {
4578 	if (spa_busy() || zfs_busy() || zvol_busy())
4579 		return (DDI_FAILURE);
4580 
4581 	if (cmd != DDI_DETACH)
4582 		return (DDI_FAILURE);
4583 
4584 	zfs_dip = NULL;
4585 
4586 	ddi_prop_remove_all(dip);
4587 	ddi_remove_minor_node(dip, NULL);
4588 
4589 	return (DDI_SUCCESS);
4590 }
4591 
4592 /*ARGSUSED*/
4593 static int
4594 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
4595 {
4596 	switch (infocmd) {
4597 	case DDI_INFO_DEVT2DEVINFO:
4598 		*result = zfs_dip;
4599 		return (DDI_SUCCESS);
4600 
4601 	case DDI_INFO_DEVT2INSTANCE:
4602 		*result = (void *)0;
4603 		return (DDI_SUCCESS);
4604 	}
4605 
4606 	return (DDI_FAILURE);
4607 }
4608 
4609 /*
4610  * OK, so this is a little weird.
4611  *
4612  * /dev/zfs is the control node, i.e. minor 0.
4613  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
4614  *
4615  * /dev/zfs has basically nothing to do except serve up ioctls,
4616  * so most of the standard driver entry points are in zvol.c.
4617  */
4618 static struct cb_ops zfs_cb_ops = {
4619 	zfsdev_open,	/* open */
4620 	zfsdev_close,	/* close */
4621 	zvol_strategy,	/* strategy */
4622 	nodev,		/* print */
4623 	zvol_dump,	/* dump */
4624 	zvol_read,	/* read */
4625 	zvol_write,	/* write */
4626 	zfsdev_ioctl,	/* ioctl */
4627 	nodev,		/* devmap */
4628 	nodev,		/* mmap */
4629 	nodev,		/* segmap */
4630 	nochpoll,	/* poll */
4631 	ddi_prop_op,	/* prop_op */
4632 	NULL,		/* streamtab */
4633 	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
4634 	CB_REV,		/* version */
4635 	nodev,		/* async read */
4636 	nodev,		/* async write */
4637 };
4638 
4639 static struct dev_ops zfs_dev_ops = {
4640 	DEVO_REV,	/* version */
4641 	0,		/* refcnt */
4642 	zfs_info,	/* info */
4643 	nulldev,	/* identify */
4644 	nulldev,	/* probe */
4645 	zfs_attach,	/* attach */
4646 	zfs_detach,	/* detach */
4647 	nodev,		/* reset */
4648 	&zfs_cb_ops,	/* driver operations */
4649 	NULL,		/* no bus operations */
4650 	NULL,		/* power */
4651 	ddi_quiesce_not_needed,	/* quiesce */
4652 };
4653 
4654 static struct modldrv zfs_modldrv = {
4655 	&mod_driverops,
4656 	"ZFS storage pool",
4657 	&zfs_dev_ops
4658 };
4659 
4660 static struct modlinkage modlinkage = {
4661 	MODREV_1,
4662 	(void *)&zfs_modlfs,
4663 	(void *)&zfs_modldrv,
4664 	NULL
4665 };
4666 
4667 
4668 uint_t zfs_fsyncer_key;
4669 extern uint_t rrw_tsd_key;
4670 
4671 int
4672 _init(void)
4673 {
4674 	int error;
4675 
4676 	spa_init(FREAD | FWRITE);
4677 	zfs_init();
4678 	zvol_init();
4679 
4680 	if ((error = mod_install(&modlinkage)) != 0) {
4681 		zvol_fini();
4682 		zfs_fini();
4683 		spa_fini();
4684 		return (error);
4685 	}
4686 
4687 	tsd_create(&zfs_fsyncer_key, NULL);
4688 	tsd_create(&rrw_tsd_key, NULL);
4689 
4690 	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
4691 	ASSERT(error == 0);
4692 	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
4693 
4694 	return (0);
4695 }
4696 
4697 int
4698 _fini(void)
4699 {
4700 	int error;
4701 
4702 	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
4703 		return (EBUSY);
4704 
4705 	if ((error = mod_remove(&modlinkage)) != 0)
4706 		return (error);
4707 
4708 	zvol_fini();
4709 	zfs_fini();
4710 	spa_fini();
4711 	if (zfs_nfsshare_inited)
4712 		(void) ddi_modclose(nfs_mod);
4713 	if (zfs_smbshare_inited)
4714 		(void) ddi_modclose(smbsrv_mod);
4715 	if (zfs_nfsshare_inited || zfs_smbshare_inited)
4716 		(void) ddi_modclose(sharefs_mod);
4717 
4718 	tsd_destroy(&zfs_fsyncer_key);
4719 	ldi_ident_release(zfs_li);
4720 	zfs_li = NULL;
4721 	mutex_destroy(&zfs_share_lock);
4722 
4723 	return (error);
4724 }
4725 
4726 int
4727 _info(struct modinfo *modinfop)
4728 {
4729 	return (mod_info(&modlinkage, modinfop));
4730 }
4731