xref: /titanic_52/usr/src/uts/common/fs/zfs/zfs_ioctl.c (revision e74a1fb927623517f38eb4d4ccab4b4869949ba4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/types.h>
26 #include <sys/param.h>
27 #include <sys/errno.h>
28 #include <sys/uio.h>
29 #include <sys/buf.h>
30 #include <sys/modctl.h>
31 #include <sys/open.h>
32 #include <sys/file.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/cmn_err.h>
36 #include <sys/stat.h>
37 #include <sys/zfs_ioctl.h>
38 #include <sys/zfs_vfsops.h>
39 #include <sys/zfs_znode.h>
40 #include <sys/zap.h>
41 #include <sys/spa.h>
42 #include <sys/spa_impl.h>
43 #include <sys/vdev.h>
44 #include <sys/priv_impl.h>
45 #include <sys/dmu.h>
46 #include <sys/dsl_dir.h>
47 #include <sys/dsl_dataset.h>
48 #include <sys/dsl_prop.h>
49 #include <sys/dsl_deleg.h>
50 #include <sys/dmu_objset.h>
51 #include <sys/ddi.h>
52 #include <sys/sunddi.h>
53 #include <sys/sunldi.h>
54 #include <sys/policy.h>
55 #include <sys/zone.h>
56 #include <sys/nvpair.h>
57 #include <sys/pathname.h>
58 #include <sys/mount.h>
59 #include <sys/sdt.h>
60 #include <sys/fs/zfs.h>
61 #include <sys/zfs_ctldir.h>
62 #include <sys/zfs_dir.h>
63 #include <sys/zfs_onexit.h>
64 #include <sys/zvol.h>
65 #include <sys/dsl_scan.h>
66 #include <sharefs/share.h>
67 #include <sys/dmu_objset.h>
68 
69 #include "zfs_namecheck.h"
70 #include "zfs_prop.h"
71 #include "zfs_deleg.h"
72 #include "zfs_comutil.h"
73 
74 extern struct modlfs zfs_modlfs;
75 
76 extern void zfs_init(void);
77 extern void zfs_fini(void);
78 
79 ldi_ident_t zfs_li = NULL;
80 dev_info_t *zfs_dip;
81 
82 typedef int zfs_ioc_func_t(zfs_cmd_t *);
83 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
84 
85 typedef enum {
86 	NO_NAME,
87 	POOL_NAME,
88 	DATASET_NAME
89 } zfs_ioc_namecheck_t;
90 
91 typedef struct zfs_ioc_vec {
92 	zfs_ioc_func_t		*zvec_func;
93 	zfs_secpolicy_func_t	*zvec_secpolicy;
94 	zfs_ioc_namecheck_t	zvec_namecheck;
95 	boolean_t		zvec_his_log;
96 	boolean_t		zvec_pool_check;
97 } zfs_ioc_vec_t;
98 
99 /* This array is indexed by zfs_userquota_prop_t */
100 static const char *userquota_perms[] = {
101 	ZFS_DELEG_PERM_USERUSED,
102 	ZFS_DELEG_PERM_USERQUOTA,
103 	ZFS_DELEG_PERM_GROUPUSED,
104 	ZFS_DELEG_PERM_GROUPQUOTA,
105 };
106 
107 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
108 static int zfs_check_settable(const char *name, nvpair_t *property,
109     cred_t *cr);
110 static int zfs_check_clearable(char *dataset, nvlist_t *props,
111     nvlist_t **errors);
112 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
113     boolean_t *);
114 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **);
115 
116 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
117 void
118 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
119 {
120 	const char *newfile;
121 	char buf[512];
122 	va_list adx;
123 
124 	/*
125 	 * Get rid of annoying "../common/" prefix to filename.
126 	 */
127 	newfile = strrchr(file, '/');
128 	if (newfile != NULL) {
129 		newfile = newfile + 1; /* Get rid of leading / */
130 	} else {
131 		newfile = file;
132 	}
133 
134 	va_start(adx, fmt);
135 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
136 	va_end(adx);
137 
138 	/*
139 	 * To get this data, use the zfs-dprintf probe as so:
140 	 * dtrace -q -n 'zfs-dprintf \
141 	 *	/stringof(arg0) == "dbuf.c"/ \
142 	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
143 	 * arg0 = file name
144 	 * arg1 = function name
145 	 * arg2 = line number
146 	 * arg3 = message
147 	 */
148 	DTRACE_PROBE4(zfs__dprintf,
149 	    char *, newfile, char *, func, int, line, char *, buf);
150 }
151 
152 static void
153 history_str_free(char *buf)
154 {
155 	kmem_free(buf, HIS_MAX_RECORD_LEN);
156 }
157 
158 static char *
159 history_str_get(zfs_cmd_t *zc)
160 {
161 	char *buf;
162 
163 	if (zc->zc_history == NULL)
164 		return (NULL);
165 
166 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
167 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
168 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
169 		history_str_free(buf);
170 		return (NULL);
171 	}
172 
173 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
174 
175 	return (buf);
176 }
177 
178 /*
179  * Check to see if the named dataset is currently defined as bootable
180  */
181 static boolean_t
182 zfs_is_bootfs(const char *name)
183 {
184 	objset_t *os;
185 
186 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
187 		boolean_t ret;
188 		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
189 		dmu_objset_rele(os, FTAG);
190 		return (ret);
191 	}
192 	return (B_FALSE);
193 }
194 
195 /*
196  * zfs_earlier_version
197  *
198  *	Return non-zero if the spa version is less than requested version.
199  */
200 static int
201 zfs_earlier_version(const char *name, int version)
202 {
203 	spa_t *spa;
204 
205 	if (spa_open(name, &spa, FTAG) == 0) {
206 		if (spa_version(spa) < version) {
207 			spa_close(spa, FTAG);
208 			return (1);
209 		}
210 		spa_close(spa, FTAG);
211 	}
212 	return (0);
213 }
214 
215 /*
216  * zpl_earlier_version
217  *
218  * Return TRUE if the ZPL version is less than requested version.
219  */
220 static boolean_t
221 zpl_earlier_version(const char *name, int version)
222 {
223 	objset_t *os;
224 	boolean_t rc = B_TRUE;
225 
226 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
227 		uint64_t zplversion;
228 
229 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
230 			dmu_objset_rele(os, FTAG);
231 			return (B_TRUE);
232 		}
233 		/* XXX reading from non-owned objset */
234 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
235 			rc = zplversion < version;
236 		dmu_objset_rele(os, FTAG);
237 	}
238 	return (rc);
239 }
240 
241 static void
242 zfs_log_history(zfs_cmd_t *zc)
243 {
244 	spa_t *spa;
245 	char *buf;
246 
247 	if ((buf = history_str_get(zc)) == NULL)
248 		return;
249 
250 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
251 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
252 			(void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
253 		spa_close(spa, FTAG);
254 	}
255 	history_str_free(buf);
256 }
257 
258 /*
259  * Policy for top-level read operations (list pools).  Requires no privileges,
260  * and can be used in the local zone, as there is no associated dataset.
261  */
262 /* ARGSUSED */
263 static int
264 zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
265 {
266 	return (0);
267 }
268 
269 /*
270  * Policy for dataset read operations (list children, get statistics).  Requires
271  * no privileges, but must be visible in the local zone.
272  */
273 /* ARGSUSED */
274 static int
275 zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
276 {
277 	if (INGLOBALZONE(curproc) ||
278 	    zone_dataset_visible(zc->zc_name, NULL))
279 		return (0);
280 
281 	return (ENOENT);
282 }
283 
284 static int
285 zfs_dozonecheck(const char *dataset, cred_t *cr)
286 {
287 	uint64_t zoned;
288 	int writable = 1;
289 
290 	/*
291 	 * The dataset must be visible by this zone -- check this first
292 	 * so they don't see EPERM on something they shouldn't know about.
293 	 */
294 	if (!INGLOBALZONE(curproc) &&
295 	    !zone_dataset_visible(dataset, &writable))
296 		return (ENOENT);
297 
298 	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
299 		return (ENOENT);
300 
301 	if (INGLOBALZONE(curproc)) {
302 		/*
303 		 * If the fs is zoned, only root can access it from the
304 		 * global zone.
305 		 */
306 		if (secpolicy_zfs(cr) && zoned)
307 			return (EPERM);
308 	} else {
309 		/*
310 		 * If we are in a local zone, the 'zoned' property must be set.
311 		 */
312 		if (!zoned)
313 			return (EPERM);
314 
315 		/* must be writable by this zone */
316 		if (!writable)
317 			return (EPERM);
318 	}
319 	return (0);
320 }
321 
322 int
323 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
324 {
325 	int error;
326 
327 	error = zfs_dozonecheck(name, cr);
328 	if (error == 0) {
329 		error = secpolicy_zfs(cr);
330 		if (error)
331 			error = dsl_deleg_access(name, perm, cr);
332 	}
333 	return (error);
334 }
335 
336 /*
337  * Policy for setting the security label property.
338  *
339  * Returns 0 for success, non-zero for access and other errors.
340  */
341 static int
342 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
343 {
344 	char		ds_hexsl[MAXNAMELEN];
345 	bslabel_t	ds_sl, new_sl;
346 	boolean_t	new_default = FALSE;
347 	uint64_t	zoned;
348 	int		needed_priv = -1;
349 	int		error;
350 
351 	/* First get the existing dataset label. */
352 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
353 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
354 	if (error)
355 		return (EPERM);
356 
357 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
358 		new_default = TRUE;
359 
360 	/* The label must be translatable */
361 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
362 		return (EINVAL);
363 
364 	/*
365 	 * In a non-global zone, disallow attempts to set a label that
366 	 * doesn't match that of the zone; otherwise no other checks
367 	 * are needed.
368 	 */
369 	if (!INGLOBALZONE(curproc)) {
370 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
371 			return (EPERM);
372 		return (0);
373 	}
374 
375 	/*
376 	 * For global-zone datasets (i.e., those whose zoned property is
377 	 * "off", verify that the specified new label is valid for the
378 	 * global zone.
379 	 */
380 	if (dsl_prop_get_integer(name,
381 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
382 		return (EPERM);
383 	if (!zoned) {
384 		if (zfs_check_global_label(name, strval) != 0)
385 			return (EPERM);
386 	}
387 
388 	/*
389 	 * If the existing dataset label is nondefault, check if the
390 	 * dataset is mounted (label cannot be changed while mounted).
391 	 * Get the zfsvfs; if there isn't one, then the dataset isn't
392 	 * mounted (or isn't a dataset, doesn't exist, ...).
393 	 */
394 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
395 		objset_t *os;
396 		static char *setsl_tag = "setsl_tag";
397 
398 		/*
399 		 * Try to own the dataset; abort if there is any error,
400 		 * (e.g., already mounted, in use, or other error).
401 		 */
402 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
403 		    setsl_tag, &os);
404 		if (error)
405 			return (EPERM);
406 
407 		dmu_objset_disown(os, setsl_tag);
408 
409 		if (new_default) {
410 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
411 			goto out_check;
412 		}
413 
414 		if (hexstr_to_label(strval, &new_sl) != 0)
415 			return (EPERM);
416 
417 		if (blstrictdom(&ds_sl, &new_sl))
418 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
419 		else if (blstrictdom(&new_sl, &ds_sl))
420 			needed_priv = PRIV_FILE_UPGRADE_SL;
421 	} else {
422 		/* dataset currently has a default label */
423 		if (!new_default)
424 			needed_priv = PRIV_FILE_UPGRADE_SL;
425 	}
426 
427 out_check:
428 	if (needed_priv != -1)
429 		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
430 	return (0);
431 }
432 
433 static int
434 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
435     cred_t *cr)
436 {
437 	char *strval;
438 
439 	/*
440 	 * Check permissions for special properties.
441 	 */
442 	switch (prop) {
443 	case ZFS_PROP_ZONED:
444 		/*
445 		 * Disallow setting of 'zoned' from within a local zone.
446 		 */
447 		if (!INGLOBALZONE(curproc))
448 			return (EPERM);
449 		break;
450 
451 	case ZFS_PROP_QUOTA:
452 		if (!INGLOBALZONE(curproc)) {
453 			uint64_t zoned;
454 			char setpoint[MAXNAMELEN];
455 			/*
456 			 * Unprivileged users are allowed to modify the
457 			 * quota on things *under* (ie. contained by)
458 			 * the thing they own.
459 			 */
460 			if (dsl_prop_get_integer(dsname, "zoned", &zoned,
461 			    setpoint))
462 				return (EPERM);
463 			if (!zoned || strlen(dsname) <= strlen(setpoint))
464 				return (EPERM);
465 		}
466 		break;
467 
468 	case ZFS_PROP_MLSLABEL:
469 		if (!is_system_labeled())
470 			return (EPERM);
471 
472 		if (nvpair_value_string(propval, &strval) == 0) {
473 			int err;
474 
475 			err = zfs_set_slabel_policy(dsname, strval, CRED());
476 			if (err != 0)
477 				return (err);
478 		}
479 		break;
480 	}
481 
482 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
483 }
484 
485 int
486 zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
487 {
488 	int error;
489 
490 	error = zfs_dozonecheck(zc->zc_name, cr);
491 	if (error)
492 		return (error);
493 
494 	/*
495 	 * permission to set permissions will be evaluated later in
496 	 * dsl_deleg_can_allow()
497 	 */
498 	return (0);
499 }
500 
501 int
502 zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
503 {
504 	return (zfs_secpolicy_write_perms(zc->zc_name,
505 	    ZFS_DELEG_PERM_ROLLBACK, cr));
506 }
507 
508 int
509 zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
510 {
511 	return (zfs_secpolicy_write_perms(zc->zc_name,
512 	    ZFS_DELEG_PERM_SEND, cr));
513 }
514 
515 static int
516 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
517 {
518 	vnode_t *vp;
519 	int error;
520 
521 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
522 	    NO_FOLLOW, NULL, &vp)) != 0)
523 		return (error);
524 
525 	/* Now make sure mntpnt and dataset are ZFS */
526 
527 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
528 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
529 	    zc->zc_name) != 0)) {
530 		VN_RELE(vp);
531 		return (EPERM);
532 	}
533 
534 	VN_RELE(vp);
535 	return (dsl_deleg_access(zc->zc_name,
536 	    ZFS_DELEG_PERM_SHARE, cr));
537 }
538 
539 int
540 zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
541 {
542 	if (!INGLOBALZONE(curproc))
543 		return (EPERM);
544 
545 	if (secpolicy_nfs(cr) == 0) {
546 		return (0);
547 	} else {
548 		return (zfs_secpolicy_deleg_share(zc, cr));
549 	}
550 }
551 
552 int
553 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
554 {
555 	if (!INGLOBALZONE(curproc))
556 		return (EPERM);
557 
558 	if (secpolicy_smb(cr) == 0) {
559 		return (0);
560 	} else {
561 		return (zfs_secpolicy_deleg_share(zc, cr));
562 	}
563 }
564 
565 static int
566 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
567 {
568 	char *cp;
569 
570 	/*
571 	 * Remove the @bla or /bla from the end of the name to get the parent.
572 	 */
573 	(void) strncpy(parent, datasetname, parentsize);
574 	cp = strrchr(parent, '@');
575 	if (cp != NULL) {
576 		cp[0] = '\0';
577 	} else {
578 		cp = strrchr(parent, '/');
579 		if (cp == NULL)
580 			return (ENOENT);
581 		cp[0] = '\0';
582 	}
583 
584 	return (0);
585 }
586 
587 int
588 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
589 {
590 	int error;
591 
592 	if ((error = zfs_secpolicy_write_perms(name,
593 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
594 		return (error);
595 
596 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
597 }
598 
599 static int
600 zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
601 {
602 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
603 }
604 
605 /*
606  * Destroying snapshots with delegated permissions requires
607  * descendent mount and destroy permissions.
608  * Reassemble the full filesystem@snap name so dsl_deleg_access()
609  * can do the correct permission check.
610  *
611  * Since this routine is used when doing a recursive destroy of snapshots
612  * and destroying snapshots requires descendent permissions, a successfull
613  * check of the top level snapshot applies to snapshots of all descendent
614  * datasets as well.
615  */
616 static int
617 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr)
618 {
619 	int error;
620 	char *dsname;
621 
622 	dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value);
623 
624 	error = zfs_secpolicy_destroy_perms(dsname, cr);
625 
626 	strfree(dsname);
627 	return (error);
628 }
629 
630 int
631 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
632 {
633 	char	parentname[MAXNAMELEN];
634 	int	error;
635 
636 	if ((error = zfs_secpolicy_write_perms(from,
637 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
638 		return (error);
639 
640 	if ((error = zfs_secpolicy_write_perms(from,
641 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
642 		return (error);
643 
644 	if ((error = zfs_get_parent(to, parentname,
645 	    sizeof (parentname))) != 0)
646 		return (error);
647 
648 	if ((error = zfs_secpolicy_write_perms(parentname,
649 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
650 		return (error);
651 
652 	if ((error = zfs_secpolicy_write_perms(parentname,
653 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
654 		return (error);
655 
656 	return (error);
657 }
658 
659 static int
660 zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
661 {
662 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
663 }
664 
665 static int
666 zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
667 {
668 	char	parentname[MAXNAMELEN];
669 	objset_t *clone;
670 	int error;
671 
672 	error = zfs_secpolicy_write_perms(zc->zc_name,
673 	    ZFS_DELEG_PERM_PROMOTE, cr);
674 	if (error)
675 		return (error);
676 
677 	error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
678 
679 	if (error == 0) {
680 		dsl_dataset_t *pclone = NULL;
681 		dsl_dir_t *dd;
682 		dd = clone->os_dsl_dataset->ds_dir;
683 
684 		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
685 		error = dsl_dataset_hold_obj(dd->dd_pool,
686 		    dd->dd_phys->dd_origin_obj, FTAG, &pclone);
687 		rw_exit(&dd->dd_pool->dp_config_rwlock);
688 		if (error) {
689 			dmu_objset_rele(clone, FTAG);
690 			return (error);
691 		}
692 
693 		error = zfs_secpolicy_write_perms(zc->zc_name,
694 		    ZFS_DELEG_PERM_MOUNT, cr);
695 
696 		dsl_dataset_name(pclone, parentname);
697 		dmu_objset_rele(clone, FTAG);
698 		dsl_dataset_rele(pclone, FTAG);
699 		if (error == 0)
700 			error = zfs_secpolicy_write_perms(parentname,
701 			    ZFS_DELEG_PERM_PROMOTE, cr);
702 	}
703 	return (error);
704 }
705 
706 static int
707 zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
708 {
709 	int error;
710 
711 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
712 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
713 		return (error);
714 
715 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
716 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
717 		return (error);
718 
719 	return (zfs_secpolicy_write_perms(zc->zc_name,
720 	    ZFS_DELEG_PERM_CREATE, cr));
721 }
722 
723 int
724 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
725 {
726 	return (zfs_secpolicy_write_perms(name,
727 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
728 }
729 
730 static int
731 zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
732 {
733 
734 	return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
735 }
736 
737 static int
738 zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
739 {
740 	char	parentname[MAXNAMELEN];
741 	int	error;
742 
743 	if ((error = zfs_get_parent(zc->zc_name, parentname,
744 	    sizeof (parentname))) != 0)
745 		return (error);
746 
747 	if (zc->zc_value[0] != '\0') {
748 		if ((error = zfs_secpolicy_write_perms(zc->zc_value,
749 		    ZFS_DELEG_PERM_CLONE, cr)) != 0)
750 			return (error);
751 	}
752 
753 	if ((error = zfs_secpolicy_write_perms(parentname,
754 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
755 		return (error);
756 
757 	error = zfs_secpolicy_write_perms(parentname,
758 	    ZFS_DELEG_PERM_MOUNT, cr);
759 
760 	return (error);
761 }
762 
763 static int
764 zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
765 {
766 	int error;
767 
768 	error = secpolicy_fs_unmount(cr, NULL);
769 	if (error) {
770 		error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
771 	}
772 	return (error);
773 }
774 
775 /*
776  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
777  * SYS_CONFIG privilege, which is not available in a local zone.
778  */
779 /* ARGSUSED */
780 static int
781 zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
782 {
783 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
784 		return (EPERM);
785 
786 	return (0);
787 }
788 
789 /*
790  * Policy for fault injection.  Requires all privileges.
791  */
792 /* ARGSUSED */
793 static int
794 zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
795 {
796 	return (secpolicy_zinject(cr));
797 }
798 
799 static int
800 zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
801 {
802 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
803 
804 	if (prop == ZPROP_INVAL) {
805 		if (!zfs_prop_user(zc->zc_value))
806 			return (EINVAL);
807 		return (zfs_secpolicy_write_perms(zc->zc_name,
808 		    ZFS_DELEG_PERM_USERPROP, cr));
809 	} else {
810 		return (zfs_secpolicy_setprop(zc->zc_name, prop,
811 		    NULL, cr));
812 	}
813 }
814 
815 static int
816 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
817 {
818 	int err = zfs_secpolicy_read(zc, cr);
819 	if (err)
820 		return (err);
821 
822 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
823 		return (EINVAL);
824 
825 	if (zc->zc_value[0] == 0) {
826 		/*
827 		 * They are asking about a posix uid/gid.  If it's
828 		 * themself, allow it.
829 		 */
830 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
831 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
832 			if (zc->zc_guid == crgetuid(cr))
833 				return (0);
834 		} else {
835 			if (groupmember(zc->zc_guid, cr))
836 				return (0);
837 		}
838 	}
839 
840 	return (zfs_secpolicy_write_perms(zc->zc_name,
841 	    userquota_perms[zc->zc_objset_type], cr));
842 }
843 
844 static int
845 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
846 {
847 	int err = zfs_secpolicy_read(zc, cr);
848 	if (err)
849 		return (err);
850 
851 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
852 		return (EINVAL);
853 
854 	return (zfs_secpolicy_write_perms(zc->zc_name,
855 	    userquota_perms[zc->zc_objset_type], cr));
856 }
857 
858 static int
859 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
860 {
861 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
862 	    NULL, cr));
863 }
864 
865 static int
866 zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
867 {
868 	return (zfs_secpolicy_write_perms(zc->zc_name,
869 	    ZFS_DELEG_PERM_HOLD, cr));
870 }
871 
872 static int
873 zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
874 {
875 	return (zfs_secpolicy_write_perms(zc->zc_name,
876 	    ZFS_DELEG_PERM_RELEASE, cr));
877 }
878 
879 /*
880  * Returns the nvlist as specified by the user in the zfs_cmd_t.
881  */
882 static int
883 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
884 {
885 	char *packed;
886 	int error;
887 	nvlist_t *list = NULL;
888 
889 	/*
890 	 * Read in and unpack the user-supplied nvlist.
891 	 */
892 	if (size == 0)
893 		return (EINVAL);
894 
895 	packed = kmem_alloc(size, KM_SLEEP);
896 
897 	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
898 	    iflag)) != 0) {
899 		kmem_free(packed, size);
900 		return (error);
901 	}
902 
903 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
904 		kmem_free(packed, size);
905 		return (error);
906 	}
907 
908 	kmem_free(packed, size);
909 
910 	*nvp = list;
911 	return (0);
912 }
913 
914 static int
915 fit_error_list(zfs_cmd_t *zc, nvlist_t **errors)
916 {
917 	size_t size;
918 
919 	VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
920 
921 	if (size > zc->zc_nvlist_dst_size) {
922 		nvpair_t *more_errors;
923 		int n = 0;
924 
925 		if (zc->zc_nvlist_dst_size < 1024)
926 			return (ENOMEM);
927 
928 		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0);
929 		more_errors = nvlist_prev_nvpair(*errors, NULL);
930 
931 		do {
932 			nvpair_t *pair = nvlist_prev_nvpair(*errors,
933 			    more_errors);
934 			VERIFY(nvlist_remove_nvpair(*errors, pair) == 0);
935 			n++;
936 			VERIFY(nvlist_size(*errors, &size,
937 			    NV_ENCODE_NATIVE) == 0);
938 		} while (size > zc->zc_nvlist_dst_size);
939 
940 		VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0);
941 		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0);
942 		ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
943 		ASSERT(size <= zc->zc_nvlist_dst_size);
944 	}
945 
946 	return (0);
947 }
948 
949 static int
950 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
951 {
952 	char *packed = NULL;
953 	int error = 0;
954 	size_t size;
955 
956 	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
957 
958 	if (size > zc->zc_nvlist_dst_size) {
959 		error = ENOMEM;
960 	} else {
961 		packed = kmem_alloc(size, KM_SLEEP);
962 		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
963 		    KM_SLEEP) == 0);
964 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
965 		    size, zc->zc_iflags) != 0)
966 			error = EFAULT;
967 		kmem_free(packed, size);
968 	}
969 
970 	zc->zc_nvlist_dst_size = size;
971 	return (error);
972 }
973 
974 static int
975 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
976 {
977 	objset_t *os;
978 	int error;
979 
980 	error = dmu_objset_hold(dsname, FTAG, &os);
981 	if (error)
982 		return (error);
983 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
984 		dmu_objset_rele(os, FTAG);
985 		return (EINVAL);
986 	}
987 
988 	mutex_enter(&os->os_user_ptr_lock);
989 	*zfvp = dmu_objset_get_user(os);
990 	if (*zfvp) {
991 		VFS_HOLD((*zfvp)->z_vfs);
992 	} else {
993 		error = ESRCH;
994 	}
995 	mutex_exit(&os->os_user_ptr_lock);
996 	dmu_objset_rele(os, FTAG);
997 	return (error);
998 }
999 
1000 /*
1001  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1002  * case its z_vfs will be NULL, and it will be opened as the owner.
1003  */
1004 static int
1005 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp)
1006 {
1007 	int error = 0;
1008 
1009 	if (getzfsvfs(name, zfvp) != 0)
1010 		error = zfsvfs_create(name, zfvp);
1011 	if (error == 0) {
1012 		rrw_enter(&(*zfvp)->z_teardown_lock, RW_READER, tag);
1013 		if ((*zfvp)->z_unmounted) {
1014 			/*
1015 			 * XXX we could probably try again, since the unmounting
1016 			 * thread should be just about to disassociate the
1017 			 * objset from the zfsvfs.
1018 			 */
1019 			rrw_exit(&(*zfvp)->z_teardown_lock, tag);
1020 			return (EBUSY);
1021 		}
1022 	}
1023 	return (error);
1024 }
1025 
1026 static void
1027 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1028 {
1029 	rrw_exit(&zfsvfs->z_teardown_lock, tag);
1030 
1031 	if (zfsvfs->z_vfs) {
1032 		VFS_RELE(zfsvfs->z_vfs);
1033 	} else {
1034 		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1035 		zfsvfs_free(zfsvfs);
1036 	}
1037 }
1038 
1039 static int
1040 zfs_ioc_pool_create(zfs_cmd_t *zc)
1041 {
1042 	int error;
1043 	nvlist_t *config, *props = NULL;
1044 	nvlist_t *rootprops = NULL;
1045 	nvlist_t *zplprops = NULL;
1046 	char *buf;
1047 
1048 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1049 	    zc->zc_iflags, &config))
1050 		return (error);
1051 
1052 	if (zc->zc_nvlist_src_size != 0 && (error =
1053 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1054 	    zc->zc_iflags, &props))) {
1055 		nvlist_free(config);
1056 		return (error);
1057 	}
1058 
1059 	if (props) {
1060 		nvlist_t *nvl = NULL;
1061 		uint64_t version = SPA_VERSION;
1062 
1063 		(void) nvlist_lookup_uint64(props,
1064 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1065 		if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) {
1066 			error = EINVAL;
1067 			goto pool_props_bad;
1068 		}
1069 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1070 		if (nvl) {
1071 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1072 			if (error != 0) {
1073 				nvlist_free(config);
1074 				nvlist_free(props);
1075 				return (error);
1076 			}
1077 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1078 		}
1079 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1080 		error = zfs_fill_zplprops_root(version, rootprops,
1081 		    zplprops, NULL);
1082 		if (error)
1083 			goto pool_props_bad;
1084 	}
1085 
1086 	buf = history_str_get(zc);
1087 
1088 	error = spa_create(zc->zc_name, config, props, buf, zplprops);
1089 
1090 	/*
1091 	 * Set the remaining root properties
1092 	 */
1093 	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1094 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1095 		(void) spa_destroy(zc->zc_name);
1096 
1097 	if (buf != NULL)
1098 		history_str_free(buf);
1099 
1100 pool_props_bad:
1101 	nvlist_free(rootprops);
1102 	nvlist_free(zplprops);
1103 	nvlist_free(config);
1104 	nvlist_free(props);
1105 
1106 	return (error);
1107 }
1108 
1109 static int
1110 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1111 {
1112 	int error;
1113 	zfs_log_history(zc);
1114 	error = spa_destroy(zc->zc_name);
1115 	if (error == 0)
1116 		zvol_remove_minors(zc->zc_name);
1117 	return (error);
1118 }
1119 
1120 static int
1121 zfs_ioc_pool_import(zfs_cmd_t *zc)
1122 {
1123 	nvlist_t *config, *props = NULL;
1124 	uint64_t guid;
1125 	int error;
1126 
1127 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1128 	    zc->zc_iflags, &config)) != 0)
1129 		return (error);
1130 
1131 	if (zc->zc_nvlist_src_size != 0 && (error =
1132 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1133 	    zc->zc_iflags, &props))) {
1134 		nvlist_free(config);
1135 		return (error);
1136 	}
1137 
1138 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1139 	    guid != zc->zc_guid)
1140 		error = EINVAL;
1141 	else if (zc->zc_cookie)
1142 		error = spa_import_verbatim(zc->zc_name, config, props);
1143 	else
1144 		error = spa_import(zc->zc_name, config, props);
1145 
1146 	if (zc->zc_nvlist_dst != 0)
1147 		(void) put_nvlist(zc, config);
1148 
1149 	nvlist_free(config);
1150 
1151 	if (props)
1152 		nvlist_free(props);
1153 
1154 	return (error);
1155 }
1156 
1157 static int
1158 zfs_ioc_pool_export(zfs_cmd_t *zc)
1159 {
1160 	int error;
1161 	boolean_t force = (boolean_t)zc->zc_cookie;
1162 	boolean_t hardforce = (boolean_t)zc->zc_guid;
1163 
1164 	zfs_log_history(zc);
1165 	error = spa_export(zc->zc_name, NULL, force, hardforce);
1166 	if (error == 0)
1167 		zvol_remove_minors(zc->zc_name);
1168 	return (error);
1169 }
1170 
1171 static int
1172 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1173 {
1174 	nvlist_t *configs;
1175 	int error;
1176 
1177 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1178 		return (EEXIST);
1179 
1180 	error = put_nvlist(zc, configs);
1181 
1182 	nvlist_free(configs);
1183 
1184 	return (error);
1185 }
1186 
1187 static int
1188 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1189 {
1190 	nvlist_t *config;
1191 	int error;
1192 	int ret = 0;
1193 
1194 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1195 	    sizeof (zc->zc_value));
1196 
1197 	if (config != NULL) {
1198 		ret = put_nvlist(zc, config);
1199 		nvlist_free(config);
1200 
1201 		/*
1202 		 * The config may be present even if 'error' is non-zero.
1203 		 * In this case we return success, and preserve the real errno
1204 		 * in 'zc_cookie'.
1205 		 */
1206 		zc->zc_cookie = error;
1207 	} else {
1208 		ret = error;
1209 	}
1210 
1211 	return (ret);
1212 }
1213 
1214 /*
1215  * Try to import the given pool, returning pool stats as appropriate so that
1216  * user land knows which devices are available and overall pool health.
1217  */
1218 static int
1219 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1220 {
1221 	nvlist_t *tryconfig, *config;
1222 	int error;
1223 
1224 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1225 	    zc->zc_iflags, &tryconfig)) != 0)
1226 		return (error);
1227 
1228 	config = spa_tryimport(tryconfig);
1229 
1230 	nvlist_free(tryconfig);
1231 
1232 	if (config == NULL)
1233 		return (EINVAL);
1234 
1235 	error = put_nvlist(zc, config);
1236 	nvlist_free(config);
1237 
1238 	return (error);
1239 }
1240 
1241 /*
1242  * inputs:
1243  * zc_name              name of the pool
1244  * zc_cookie            scan func (pool_scan_func_t)
1245  */
1246 static int
1247 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1248 {
1249 	spa_t *spa;
1250 	int error;
1251 
1252 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1253 		return (error);
1254 
1255 	if (zc->zc_cookie == POOL_SCAN_NONE)
1256 		error = spa_scan_stop(spa);
1257 	else
1258 		error = spa_scan(spa, zc->zc_cookie);
1259 
1260 	spa_close(spa, FTAG);
1261 
1262 	return (error);
1263 }
1264 
1265 static int
1266 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1267 {
1268 	spa_t *spa;
1269 	int error;
1270 
1271 	error = spa_open(zc->zc_name, &spa, FTAG);
1272 	if (error == 0) {
1273 		spa_freeze(spa);
1274 		spa_close(spa, FTAG);
1275 	}
1276 	return (error);
1277 }
1278 
1279 static int
1280 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1281 {
1282 	spa_t *spa;
1283 	int error;
1284 
1285 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1286 		return (error);
1287 
1288 	if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
1289 		spa_close(spa, FTAG);
1290 		return (EINVAL);
1291 	}
1292 
1293 	spa_upgrade(spa, zc->zc_cookie);
1294 	spa_close(spa, FTAG);
1295 
1296 	return (error);
1297 }
1298 
1299 static int
1300 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1301 {
1302 	spa_t *spa;
1303 	char *hist_buf;
1304 	uint64_t size;
1305 	int error;
1306 
1307 	if ((size = zc->zc_history_len) == 0)
1308 		return (EINVAL);
1309 
1310 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1311 		return (error);
1312 
1313 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1314 		spa_close(spa, FTAG);
1315 		return (ENOTSUP);
1316 	}
1317 
1318 	hist_buf = kmem_alloc(size, KM_SLEEP);
1319 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1320 	    &zc->zc_history_len, hist_buf)) == 0) {
1321 		error = ddi_copyout(hist_buf,
1322 		    (void *)(uintptr_t)zc->zc_history,
1323 		    zc->zc_history_len, zc->zc_iflags);
1324 	}
1325 
1326 	spa_close(spa, FTAG);
1327 	kmem_free(hist_buf, size);
1328 	return (error);
1329 }
1330 
1331 static int
1332 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1333 {
1334 	int error;
1335 
1336 	if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
1337 		return (error);
1338 
1339 	return (0);
1340 }
1341 
1342 /*
1343  * inputs:
1344  * zc_name		name of filesystem
1345  * zc_obj		object to find
1346  *
1347  * outputs:
1348  * zc_value		name of object
1349  */
1350 static int
1351 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1352 {
1353 	objset_t *os;
1354 	int error;
1355 
1356 	/* XXX reading from objset not owned */
1357 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1358 		return (error);
1359 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1360 		dmu_objset_rele(os, FTAG);
1361 		return (EINVAL);
1362 	}
1363 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1364 	    sizeof (zc->zc_value));
1365 	dmu_objset_rele(os, FTAG);
1366 
1367 	return (error);
1368 }
1369 
1370 static int
1371 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1372 {
1373 	spa_t *spa;
1374 	int error;
1375 	nvlist_t *config, **l2cache, **spares;
1376 	uint_t nl2cache = 0, nspares = 0;
1377 
1378 	error = spa_open(zc->zc_name, &spa, FTAG);
1379 	if (error != 0)
1380 		return (error);
1381 
1382 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1383 	    zc->zc_iflags, &config);
1384 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1385 	    &l2cache, &nl2cache);
1386 
1387 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1388 	    &spares, &nspares);
1389 
1390 	/*
1391 	 * A root pool with concatenated devices is not supported.
1392 	 * Thus, can not add a device to a root pool.
1393 	 *
1394 	 * Intent log device can not be added to a rootpool because
1395 	 * during mountroot, zil is replayed, a seperated log device
1396 	 * can not be accessed during the mountroot time.
1397 	 *
1398 	 * l2cache and spare devices are ok to be added to a rootpool.
1399 	 */
1400 	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1401 		nvlist_free(config);
1402 		spa_close(spa, FTAG);
1403 		return (EDOM);
1404 	}
1405 
1406 	if (error == 0) {
1407 		error = spa_vdev_add(spa, config);
1408 		nvlist_free(config);
1409 	}
1410 	spa_close(spa, FTAG);
1411 	return (error);
1412 }
1413 
1414 /*
1415  * inputs:
1416  * zc_name		name of the pool
1417  * zc_nvlist_conf	nvlist of devices to remove
1418  * zc_cookie		to stop the remove?
1419  */
1420 static int
1421 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1422 {
1423 	spa_t *spa;
1424 	int error;
1425 
1426 	error = spa_open(zc->zc_name, &spa, FTAG);
1427 	if (error != 0)
1428 		return (error);
1429 	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1430 	spa_close(spa, FTAG);
1431 	return (error);
1432 }
1433 
1434 static int
1435 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1436 {
1437 	spa_t *spa;
1438 	int error;
1439 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1440 
1441 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1442 		return (error);
1443 	switch (zc->zc_cookie) {
1444 	case VDEV_STATE_ONLINE:
1445 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1446 		break;
1447 
1448 	case VDEV_STATE_OFFLINE:
1449 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1450 		break;
1451 
1452 	case VDEV_STATE_FAULTED:
1453 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1454 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1455 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1456 
1457 		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1458 		break;
1459 
1460 	case VDEV_STATE_DEGRADED:
1461 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1462 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1463 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1464 
1465 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1466 		break;
1467 
1468 	default:
1469 		error = EINVAL;
1470 	}
1471 	zc->zc_cookie = newstate;
1472 	spa_close(spa, FTAG);
1473 	return (error);
1474 }
1475 
1476 static int
1477 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1478 {
1479 	spa_t *spa;
1480 	int replacing = zc->zc_cookie;
1481 	nvlist_t *config;
1482 	int error;
1483 
1484 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1485 		return (error);
1486 
1487 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1488 	    zc->zc_iflags, &config)) == 0) {
1489 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1490 		nvlist_free(config);
1491 	}
1492 
1493 	spa_close(spa, FTAG);
1494 	return (error);
1495 }
1496 
1497 static int
1498 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1499 {
1500 	spa_t *spa;
1501 	int error;
1502 
1503 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1504 		return (error);
1505 
1506 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1507 
1508 	spa_close(spa, FTAG);
1509 	return (error);
1510 }
1511 
1512 static int
1513 zfs_ioc_vdev_split(zfs_cmd_t *zc)
1514 {
1515 	spa_t *spa;
1516 	nvlist_t *config, *props = NULL;
1517 	int error;
1518 	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1519 
1520 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1521 		return (error);
1522 
1523 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1524 	    zc->zc_iflags, &config)) {
1525 		spa_close(spa, FTAG);
1526 		return (error);
1527 	}
1528 
1529 	if (zc->zc_nvlist_src_size != 0 && (error =
1530 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1531 	    zc->zc_iflags, &props))) {
1532 		spa_close(spa, FTAG);
1533 		nvlist_free(config);
1534 		return (error);
1535 	}
1536 
1537 	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
1538 
1539 	spa_close(spa, FTAG);
1540 
1541 	nvlist_free(config);
1542 	nvlist_free(props);
1543 
1544 	return (error);
1545 }
1546 
1547 static int
1548 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1549 {
1550 	spa_t *spa;
1551 	char *path = zc->zc_value;
1552 	uint64_t guid = zc->zc_guid;
1553 	int error;
1554 
1555 	error = spa_open(zc->zc_name, &spa, FTAG);
1556 	if (error != 0)
1557 		return (error);
1558 
1559 	error = spa_vdev_setpath(spa, guid, path);
1560 	spa_close(spa, FTAG);
1561 	return (error);
1562 }
1563 
1564 static int
1565 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
1566 {
1567 	spa_t *spa;
1568 	char *fru = zc->zc_value;
1569 	uint64_t guid = zc->zc_guid;
1570 	int error;
1571 
1572 	error = spa_open(zc->zc_name, &spa, FTAG);
1573 	if (error != 0)
1574 		return (error);
1575 
1576 	error = spa_vdev_setfru(spa, guid, fru);
1577 	spa_close(spa, FTAG);
1578 	return (error);
1579 }
1580 
1581 /*
1582  * inputs:
1583  * zc_name		name of filesystem
1584  * zc_nvlist_dst_size	size of buffer for property nvlist
1585  *
1586  * outputs:
1587  * zc_objset_stats	stats
1588  * zc_nvlist_dst	property nvlist
1589  * zc_nvlist_dst_size	size of property nvlist
1590  */
1591 static int
1592 zfs_ioc_objset_stats(zfs_cmd_t *zc)
1593 {
1594 	objset_t *os = NULL;
1595 	int error;
1596 	nvlist_t *nv;
1597 
1598 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1599 		return (error);
1600 
1601 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1602 
1603 	if (zc->zc_nvlist_dst != 0 &&
1604 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
1605 		dmu_objset_stats(os, nv);
1606 		/*
1607 		 * NB: zvol_get_stats() will read the objset contents,
1608 		 * which we aren't supposed to do with a
1609 		 * DS_MODE_USER hold, because it could be
1610 		 * inconsistent.  So this is a bit of a workaround...
1611 		 * XXX reading with out owning
1612 		 */
1613 		if (!zc->zc_objset_stats.dds_inconsistent) {
1614 			if (dmu_objset_type(os) == DMU_OST_ZVOL)
1615 				VERIFY(zvol_get_stats(os, nv) == 0);
1616 		}
1617 		error = put_nvlist(zc, nv);
1618 		nvlist_free(nv);
1619 	}
1620 
1621 	dmu_objset_rele(os, FTAG);
1622 	return (error);
1623 }
1624 
1625 /*
1626  * inputs:
1627  * zc_name		name of filesystem
1628  * zc_nvlist_dst_size	size of buffer for property nvlist
1629  *
1630  * outputs:
1631  * zc_nvlist_dst	received property nvlist
1632  * zc_nvlist_dst_size	size of received property nvlist
1633  *
1634  * Gets received properties (distinct from local properties on or after
1635  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
1636  * local property values.
1637  */
1638 static int
1639 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
1640 {
1641 	objset_t *os = NULL;
1642 	int error;
1643 	nvlist_t *nv;
1644 
1645 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1646 		return (error);
1647 
1648 	/*
1649 	 * Without this check, we would return local property values if the
1650 	 * caller has not already received properties on or after
1651 	 * SPA_VERSION_RECVD_PROPS.
1652 	 */
1653 	if (!dsl_prop_get_hasrecvd(os)) {
1654 		dmu_objset_rele(os, FTAG);
1655 		return (ENOTSUP);
1656 	}
1657 
1658 	if (zc->zc_nvlist_dst != 0 &&
1659 	    (error = dsl_prop_get_received(os, &nv)) == 0) {
1660 		error = put_nvlist(zc, nv);
1661 		nvlist_free(nv);
1662 	}
1663 
1664 	dmu_objset_rele(os, FTAG);
1665 	return (error);
1666 }
1667 
1668 static int
1669 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
1670 {
1671 	uint64_t value;
1672 	int error;
1673 
1674 	/*
1675 	 * zfs_get_zplprop() will either find a value or give us
1676 	 * the default value (if there is one).
1677 	 */
1678 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
1679 		return (error);
1680 	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
1681 	return (0);
1682 }
1683 
1684 /*
1685  * inputs:
1686  * zc_name		name of filesystem
1687  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
1688  *
1689  * outputs:
1690  * zc_nvlist_dst	zpl property nvlist
1691  * zc_nvlist_dst_size	size of zpl property nvlist
1692  */
1693 static int
1694 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
1695 {
1696 	objset_t *os;
1697 	int err;
1698 
1699 	/* XXX reading without owning */
1700 	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
1701 		return (err);
1702 
1703 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1704 
1705 	/*
1706 	 * NB: nvl_add_zplprop() will read the objset contents,
1707 	 * which we aren't supposed to do with a DS_MODE_USER
1708 	 * hold, because it could be inconsistent.
1709 	 */
1710 	if (zc->zc_nvlist_dst != NULL &&
1711 	    !zc->zc_objset_stats.dds_inconsistent &&
1712 	    dmu_objset_type(os) == DMU_OST_ZFS) {
1713 		nvlist_t *nv;
1714 
1715 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1716 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
1717 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
1718 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
1719 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
1720 			err = put_nvlist(zc, nv);
1721 		nvlist_free(nv);
1722 	} else {
1723 		err = ENOENT;
1724 	}
1725 	dmu_objset_rele(os, FTAG);
1726 	return (err);
1727 }
1728 
1729 static boolean_t
1730 dataset_name_hidden(const char *name)
1731 {
1732 	/*
1733 	 * Skip over datasets that are not visible in this zone,
1734 	 * internal datasets (which have a $ in their name), and
1735 	 * temporary datasets (which have a % in their name).
1736 	 */
1737 	if (strchr(name, '$') != NULL)
1738 		return (B_TRUE);
1739 	if (strchr(name, '%') != NULL)
1740 		return (B_TRUE);
1741 	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
1742 		return (B_TRUE);
1743 	return (B_FALSE);
1744 }
1745 
1746 /*
1747  * inputs:
1748  * zc_name		name of filesystem
1749  * zc_cookie		zap cursor
1750  * zc_nvlist_dst_size	size of buffer for property nvlist
1751  *
1752  * outputs:
1753  * zc_name		name of next filesystem
1754  * zc_cookie		zap cursor
1755  * zc_objset_stats	stats
1756  * zc_nvlist_dst	property nvlist
1757  * zc_nvlist_dst_size	size of property nvlist
1758  */
1759 static int
1760 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
1761 {
1762 	objset_t *os;
1763 	int error;
1764 	char *p;
1765 	size_t orig_len = strlen(zc->zc_name);
1766 
1767 top:
1768 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
1769 		if (error == ENOENT)
1770 			error = ESRCH;
1771 		return (error);
1772 	}
1773 
1774 	p = strrchr(zc->zc_name, '/');
1775 	if (p == NULL || p[1] != '\0')
1776 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
1777 	p = zc->zc_name + strlen(zc->zc_name);
1778 
1779 	/*
1780 	 * Pre-fetch the datasets.  dmu_objset_prefetch() always returns 0
1781 	 * but is not declared void because its called by dmu_objset_find().
1782 	 */
1783 	if (zc->zc_cookie == 0) {
1784 		uint64_t cookie = 0;
1785 		int len = sizeof (zc->zc_name) - (p - zc->zc_name);
1786 
1787 		while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0)
1788 			(void) dmu_objset_prefetch(p, NULL);
1789 	}
1790 
1791 	do {
1792 		error = dmu_dir_list_next(os,
1793 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
1794 		    NULL, &zc->zc_cookie);
1795 		if (error == ENOENT)
1796 			error = ESRCH;
1797 	} while (error == 0 && dataset_name_hidden(zc->zc_name) &&
1798 	    !(zc->zc_iflags & FKIOCTL));
1799 	dmu_objset_rele(os, FTAG);
1800 
1801 	/*
1802 	 * If it's an internal dataset (ie. with a '$' in its name),
1803 	 * don't try to get stats for it, otherwise we'll return ENOENT.
1804 	 */
1805 	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
1806 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1807 		if (error == ENOENT) {
1808 			/* We lost a race with destroy, get the next one. */
1809 			zc->zc_name[orig_len] = '\0';
1810 			goto top;
1811 		}
1812 	}
1813 	return (error);
1814 }
1815 
1816 /*
1817  * inputs:
1818  * zc_name		name of filesystem
1819  * zc_cookie		zap cursor
1820  * zc_nvlist_dst_size	size of buffer for property nvlist
1821  *
1822  * outputs:
1823  * zc_name		name of next snapshot
1824  * zc_objset_stats	stats
1825  * zc_nvlist_dst	property nvlist
1826  * zc_nvlist_dst_size	size of property nvlist
1827  */
1828 static int
1829 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
1830 {
1831 	objset_t *os;
1832 	int error;
1833 
1834 top:
1835 	if (zc->zc_cookie == 0)
1836 		(void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
1837 		    NULL, DS_FIND_SNAPSHOTS);
1838 
1839 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
1840 	if (error)
1841 		return (error == ENOENT ? ESRCH : error);
1842 
1843 	/*
1844 	 * A dataset name of maximum length cannot have any snapshots,
1845 	 * so exit immediately.
1846 	 */
1847 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
1848 		dmu_objset_rele(os, FTAG);
1849 		return (ESRCH);
1850 	}
1851 
1852 	error = dmu_snapshot_list_next(os,
1853 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
1854 	    zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie, NULL);
1855 	dmu_objset_rele(os, FTAG);
1856 	if (error == 0) {
1857 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1858 		if (error == ENOENT)  {
1859 			/* We lost a race with destroy, get the next one. */
1860 			*strchr(zc->zc_name, '@') = '\0';
1861 			goto top;
1862 		}
1863 	} else if (error == ENOENT) {
1864 		error = ESRCH;
1865 	}
1866 
1867 	/* if we failed, undo the @ that we tacked on to zc_name */
1868 	if (error)
1869 		*strchr(zc->zc_name, '@') = '\0';
1870 	return (error);
1871 }
1872 
1873 static int
1874 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
1875 {
1876 	const char *propname = nvpair_name(pair);
1877 	uint64_t *valary;
1878 	unsigned int vallen;
1879 	const char *domain;
1880 	char *dash;
1881 	zfs_userquota_prop_t type;
1882 	uint64_t rid;
1883 	uint64_t quota;
1884 	zfsvfs_t *zfsvfs;
1885 	int err;
1886 
1887 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
1888 		nvlist_t *attrs;
1889 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
1890 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
1891 		    &pair) != 0)
1892 			return (EINVAL);
1893 	}
1894 
1895 	/*
1896 	 * A correctly constructed propname is encoded as
1897 	 * userquota@<rid>-<domain>.
1898 	 */
1899 	if ((dash = strchr(propname, '-')) == NULL ||
1900 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
1901 	    vallen != 3)
1902 		return (EINVAL);
1903 
1904 	domain = dash + 1;
1905 	type = valary[0];
1906 	rid = valary[1];
1907 	quota = valary[2];
1908 
1909 	err = zfsvfs_hold(dsname, FTAG, &zfsvfs);
1910 	if (err == 0) {
1911 		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
1912 		zfsvfs_rele(zfsvfs, FTAG);
1913 	}
1914 
1915 	return (err);
1916 }
1917 
1918 /*
1919  * If the named property is one that has a special function to set its value,
1920  * return 0 on success and a positive error code on failure; otherwise if it is
1921  * not one of the special properties handled by this function, return -1.
1922  *
1923  * XXX: It would be better for callers of the property interface if we handled
1924  * these special cases in dsl_prop.c (in the dsl layer).
1925  */
1926 static int
1927 zfs_prop_set_special(const char *dsname, zprop_source_t source,
1928     nvpair_t *pair)
1929 {
1930 	const char *propname = nvpair_name(pair);
1931 	zfs_prop_t prop = zfs_name_to_prop(propname);
1932 	uint64_t intval;
1933 	int err;
1934 
1935 	if (prop == ZPROP_INVAL) {
1936 		if (zfs_prop_userquota(propname))
1937 			return (zfs_prop_set_userquota(dsname, pair));
1938 		return (-1);
1939 	}
1940 
1941 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
1942 		nvlist_t *attrs;
1943 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
1944 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
1945 		    &pair) == 0);
1946 	}
1947 
1948 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
1949 		return (-1);
1950 
1951 	VERIFY(0 == nvpair_value_uint64(pair, &intval));
1952 
1953 	switch (prop) {
1954 	case ZFS_PROP_QUOTA:
1955 		err = dsl_dir_set_quota(dsname, source, intval);
1956 		break;
1957 	case ZFS_PROP_REFQUOTA:
1958 		err = dsl_dataset_set_quota(dsname, source, intval);
1959 		break;
1960 	case ZFS_PROP_RESERVATION:
1961 		err = dsl_dir_set_reservation(dsname, source, intval);
1962 		break;
1963 	case ZFS_PROP_REFRESERVATION:
1964 		err = dsl_dataset_set_reservation(dsname, source, intval);
1965 		break;
1966 	case ZFS_PROP_VOLSIZE:
1967 		err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip),
1968 		    intval);
1969 		break;
1970 	case ZFS_PROP_VERSION:
1971 	{
1972 		zfsvfs_t *zfsvfs;
1973 
1974 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs)) != 0)
1975 			break;
1976 
1977 		err = zfs_set_version(zfsvfs, intval);
1978 		zfsvfs_rele(zfsvfs, FTAG);
1979 
1980 		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
1981 			zfs_cmd_t *zc;
1982 
1983 			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
1984 			(void) strcpy(zc->zc_name, dsname);
1985 			(void) zfs_ioc_userspace_upgrade(zc);
1986 			kmem_free(zc, sizeof (zfs_cmd_t));
1987 		}
1988 		break;
1989 	}
1990 
1991 	default:
1992 		err = -1;
1993 	}
1994 
1995 	return (err);
1996 }
1997 
1998 /*
1999  * This function is best effort. If it fails to set any of the given properties,
2000  * it continues to set as many as it can and returns the first error
2001  * encountered. If the caller provides a non-NULL errlist, it also gives the
2002  * complete list of names of all the properties it failed to set along with the
2003  * corresponding error numbers. The caller is responsible for freeing the
2004  * returned errlist.
2005  *
2006  * If every property is set successfully, zero is returned and the list pointed
2007  * at by errlist is NULL.
2008  */
2009 int
2010 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2011     nvlist_t **errlist)
2012 {
2013 	nvpair_t *pair;
2014 	nvpair_t *propval;
2015 	int rv = 0;
2016 	uint64_t intval;
2017 	char *strval;
2018 	nvlist_t *genericnvl;
2019 	nvlist_t *errors;
2020 	nvlist_t *retrynvl;
2021 
2022 	VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2023 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2024 	VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2025 
2026 retry:
2027 	pair = NULL;
2028 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2029 		const char *propname = nvpair_name(pair);
2030 		zfs_prop_t prop = zfs_name_to_prop(propname);
2031 		int err = 0;
2032 
2033 		/* decode the property value */
2034 		propval = pair;
2035 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2036 			nvlist_t *attrs;
2037 			VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2038 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2039 			    &propval) != 0)
2040 				err = EINVAL;
2041 		}
2042 
2043 		/* Validate value type */
2044 		if (err == 0 && prop == ZPROP_INVAL) {
2045 			if (zfs_prop_user(propname)) {
2046 				if (nvpair_type(propval) != DATA_TYPE_STRING)
2047 					err = EINVAL;
2048 			} else if (zfs_prop_userquota(propname)) {
2049 				if (nvpair_type(propval) !=
2050 				    DATA_TYPE_UINT64_ARRAY)
2051 					err = EINVAL;
2052 			}
2053 		} else if (err == 0) {
2054 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2055 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2056 					err = EINVAL;
2057 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2058 				const char *unused;
2059 
2060 				VERIFY(nvpair_value_uint64(propval,
2061 				    &intval) == 0);
2062 
2063 				switch (zfs_prop_get_type(prop)) {
2064 				case PROP_TYPE_NUMBER:
2065 					break;
2066 				case PROP_TYPE_STRING:
2067 					err = EINVAL;
2068 					break;
2069 				case PROP_TYPE_INDEX:
2070 					if (zfs_prop_index_to_string(prop,
2071 					    intval, &unused) != 0)
2072 						err = EINVAL;
2073 					break;
2074 				default:
2075 					cmn_err(CE_PANIC,
2076 					    "unknown property type");
2077 				}
2078 			} else {
2079 				err = EINVAL;
2080 			}
2081 		}
2082 
2083 		/* Validate permissions */
2084 		if (err == 0)
2085 			err = zfs_check_settable(dsname, pair, CRED());
2086 
2087 		if (err == 0) {
2088 			err = zfs_prop_set_special(dsname, source, pair);
2089 			if (err == -1) {
2090 				/*
2091 				 * For better performance we build up a list of
2092 				 * properties to set in a single transaction.
2093 				 */
2094 				err = nvlist_add_nvpair(genericnvl, pair);
2095 			} else if (err != 0 && nvl != retrynvl) {
2096 				/*
2097 				 * This may be a spurious error caused by
2098 				 * receiving quota and reservation out of order.
2099 				 * Try again in a second pass.
2100 				 */
2101 				err = nvlist_add_nvpair(retrynvl, pair);
2102 			}
2103 		}
2104 
2105 		if (err != 0)
2106 			VERIFY(nvlist_add_int32(errors, propname, err) == 0);
2107 	}
2108 
2109 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2110 		nvl = retrynvl;
2111 		goto retry;
2112 	}
2113 
2114 	if (!nvlist_empty(genericnvl) &&
2115 	    dsl_props_set(dsname, source, genericnvl) != 0) {
2116 		/*
2117 		 * If this fails, we still want to set as many properties as we
2118 		 * can, so try setting them individually.
2119 		 */
2120 		pair = NULL;
2121 		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2122 			const char *propname = nvpair_name(pair);
2123 			int err = 0;
2124 
2125 			propval = pair;
2126 			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2127 				nvlist_t *attrs;
2128 				VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2129 				VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2130 				    &propval) == 0);
2131 			}
2132 
2133 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2134 				VERIFY(nvpair_value_string(propval,
2135 				    &strval) == 0);
2136 				err = dsl_prop_set(dsname, propname, source, 1,
2137 				    strlen(strval) + 1, strval);
2138 			} else {
2139 				VERIFY(nvpair_value_uint64(propval,
2140 				    &intval) == 0);
2141 				err = dsl_prop_set(dsname, propname, source, 8,
2142 				    1, &intval);
2143 			}
2144 
2145 			if (err != 0) {
2146 				VERIFY(nvlist_add_int32(errors, propname,
2147 				    err) == 0);
2148 			}
2149 		}
2150 	}
2151 	nvlist_free(genericnvl);
2152 	nvlist_free(retrynvl);
2153 
2154 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
2155 		nvlist_free(errors);
2156 		errors = NULL;
2157 	} else {
2158 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
2159 	}
2160 
2161 	if (errlist == NULL)
2162 		nvlist_free(errors);
2163 	else
2164 		*errlist = errors;
2165 
2166 	return (rv);
2167 }
2168 
2169 /*
2170  * Check that all the properties are valid user properties.
2171  */
2172 static int
2173 zfs_check_userprops(char *fsname, nvlist_t *nvl)
2174 {
2175 	nvpair_t *pair = NULL;
2176 	int error = 0;
2177 
2178 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2179 		const char *propname = nvpair_name(pair);
2180 		char *valstr;
2181 
2182 		if (!zfs_prop_user(propname) ||
2183 		    nvpair_type(pair) != DATA_TYPE_STRING)
2184 			return (EINVAL);
2185 
2186 		if (error = zfs_secpolicy_write_perms(fsname,
2187 		    ZFS_DELEG_PERM_USERPROP, CRED()))
2188 			return (error);
2189 
2190 		if (strlen(propname) >= ZAP_MAXNAMELEN)
2191 			return (ENAMETOOLONG);
2192 
2193 		VERIFY(nvpair_value_string(pair, &valstr) == 0);
2194 		if (strlen(valstr) >= ZAP_MAXVALUELEN)
2195 			return (E2BIG);
2196 	}
2197 	return (0);
2198 }
2199 
2200 static void
2201 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2202 {
2203 	nvpair_t *pair;
2204 
2205 	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2206 
2207 	pair = NULL;
2208 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2209 		if (nvlist_exists(skipped, nvpair_name(pair)))
2210 			continue;
2211 
2212 		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2213 	}
2214 }
2215 
2216 static int
2217 clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
2218     nvlist_t *skipped)
2219 {
2220 	int err = 0;
2221 	nvlist_t *cleared_props = NULL;
2222 	props_skip(props, skipped, &cleared_props);
2223 	if (!nvlist_empty(cleared_props)) {
2224 		/*
2225 		 * Acts on local properties until the dataset has received
2226 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2227 		 */
2228 		zprop_source_t flags = (ZPROP_SRC_NONE |
2229 		    (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0));
2230 		err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL);
2231 	}
2232 	nvlist_free(cleared_props);
2233 	return (err);
2234 }
2235 
2236 /*
2237  * inputs:
2238  * zc_name		name of filesystem
2239  * zc_value		name of property to set
2240  * zc_nvlist_src{_size}	nvlist of properties to apply
2241  * zc_cookie		received properties flag
2242  *
2243  * outputs:
2244  * zc_nvlist_dst{_size} error for each unapplied received property
2245  */
2246 static int
2247 zfs_ioc_set_prop(zfs_cmd_t *zc)
2248 {
2249 	nvlist_t *nvl;
2250 	boolean_t received = zc->zc_cookie;
2251 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2252 	    ZPROP_SRC_LOCAL);
2253 	nvlist_t *errors = NULL;
2254 	int error;
2255 
2256 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2257 	    zc->zc_iflags, &nvl)) != 0)
2258 		return (error);
2259 
2260 	if (received) {
2261 		nvlist_t *origprops;
2262 		objset_t *os;
2263 
2264 		if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
2265 			if (dsl_prop_get_received(os, &origprops) == 0) {
2266 				(void) clear_received_props(os,
2267 				    zc->zc_name, origprops, nvl);
2268 				nvlist_free(origprops);
2269 			}
2270 
2271 			dsl_prop_set_hasrecvd(os);
2272 			dmu_objset_rele(os, FTAG);
2273 		}
2274 	}
2275 
2276 	error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors);
2277 
2278 	if (zc->zc_nvlist_dst != NULL && errors != NULL) {
2279 		(void) put_nvlist(zc, errors);
2280 	}
2281 
2282 	nvlist_free(errors);
2283 	nvlist_free(nvl);
2284 	return (error);
2285 }
2286 
2287 /*
2288  * inputs:
2289  * zc_name		name of filesystem
2290  * zc_value		name of property to inherit
2291  * zc_cookie		revert to received value if TRUE
2292  *
2293  * outputs:		none
2294  */
2295 static int
2296 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2297 {
2298 	const char *propname = zc->zc_value;
2299 	zfs_prop_t prop = zfs_name_to_prop(propname);
2300 	boolean_t received = zc->zc_cookie;
2301 	zprop_source_t source = (received
2302 	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2303 	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2304 
2305 	if (received) {
2306 		nvlist_t *dummy;
2307 		nvpair_t *pair;
2308 		zprop_type_t type;
2309 		int err;
2310 
2311 		/*
2312 		 * zfs_prop_set_special() expects properties in the form of an
2313 		 * nvpair with type info.
2314 		 */
2315 		if (prop == ZPROP_INVAL) {
2316 			if (!zfs_prop_user(propname))
2317 				return (EINVAL);
2318 
2319 			type = PROP_TYPE_STRING;
2320 		} else if (prop == ZFS_PROP_VOLSIZE ||
2321 		    prop == ZFS_PROP_VERSION) {
2322 			return (EINVAL);
2323 		} else {
2324 			type = zfs_prop_get_type(prop);
2325 		}
2326 
2327 		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2328 
2329 		switch (type) {
2330 		case PROP_TYPE_STRING:
2331 			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2332 			break;
2333 		case PROP_TYPE_NUMBER:
2334 		case PROP_TYPE_INDEX:
2335 			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2336 			break;
2337 		default:
2338 			nvlist_free(dummy);
2339 			return (EINVAL);
2340 		}
2341 
2342 		pair = nvlist_next_nvpair(dummy, NULL);
2343 		err = zfs_prop_set_special(zc->zc_name, source, pair);
2344 		nvlist_free(dummy);
2345 		if (err != -1)
2346 			return (err); /* special property already handled */
2347 	} else {
2348 		/*
2349 		 * Only check this in the non-received case. We want to allow
2350 		 * 'inherit -S' to revert non-inheritable properties like quota
2351 		 * and reservation to the received or default values even though
2352 		 * they are not considered inheritable.
2353 		 */
2354 		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2355 			return (EINVAL);
2356 	}
2357 
2358 	/* the property name has been validated by zfs_secpolicy_inherit() */
2359 	return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
2360 }
2361 
2362 static int
2363 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2364 {
2365 	nvlist_t *props;
2366 	spa_t *spa;
2367 	int error;
2368 	nvpair_t *pair;
2369 
2370 	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2371 	    zc->zc_iflags, &props))
2372 		return (error);
2373 
2374 	/*
2375 	 * If the only property is the configfile, then just do a spa_lookup()
2376 	 * to handle the faulted case.
2377 	 */
2378 	pair = nvlist_next_nvpair(props, NULL);
2379 	if (pair != NULL && strcmp(nvpair_name(pair),
2380 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2381 	    nvlist_next_nvpair(props, pair) == NULL) {
2382 		mutex_enter(&spa_namespace_lock);
2383 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2384 			spa_configfile_set(spa, props, B_FALSE);
2385 			spa_config_sync(spa, B_FALSE, B_TRUE);
2386 		}
2387 		mutex_exit(&spa_namespace_lock);
2388 		if (spa != NULL) {
2389 			nvlist_free(props);
2390 			return (0);
2391 		}
2392 	}
2393 
2394 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2395 		nvlist_free(props);
2396 		return (error);
2397 	}
2398 
2399 	error = spa_prop_set(spa, props);
2400 
2401 	nvlist_free(props);
2402 	spa_close(spa, FTAG);
2403 
2404 	return (error);
2405 }
2406 
2407 static int
2408 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2409 {
2410 	spa_t *spa;
2411 	int error;
2412 	nvlist_t *nvp = NULL;
2413 
2414 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2415 		/*
2416 		 * If the pool is faulted, there may be properties we can still
2417 		 * get (such as altroot and cachefile), so attempt to get them
2418 		 * anyway.
2419 		 */
2420 		mutex_enter(&spa_namespace_lock);
2421 		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2422 			error = spa_prop_get(spa, &nvp);
2423 		mutex_exit(&spa_namespace_lock);
2424 	} else {
2425 		error = spa_prop_get(spa, &nvp);
2426 		spa_close(spa, FTAG);
2427 	}
2428 
2429 	if (error == 0 && zc->zc_nvlist_dst != NULL)
2430 		error = put_nvlist(zc, nvp);
2431 	else
2432 		error = EFAULT;
2433 
2434 	nvlist_free(nvp);
2435 	return (error);
2436 }
2437 
2438 /*
2439  * inputs:
2440  * zc_name		name of filesystem
2441  * zc_nvlist_src{_size}	nvlist of delegated permissions
2442  * zc_perm_action	allow/unallow flag
2443  *
2444  * outputs:		none
2445  */
2446 static int
2447 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2448 {
2449 	int error;
2450 	nvlist_t *fsaclnv = NULL;
2451 
2452 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2453 	    zc->zc_iflags, &fsaclnv)) != 0)
2454 		return (error);
2455 
2456 	/*
2457 	 * Verify nvlist is constructed correctly
2458 	 */
2459 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2460 		nvlist_free(fsaclnv);
2461 		return (EINVAL);
2462 	}
2463 
2464 	/*
2465 	 * If we don't have PRIV_SYS_MOUNT, then validate
2466 	 * that user is allowed to hand out each permission in
2467 	 * the nvlist(s)
2468 	 */
2469 
2470 	error = secpolicy_zfs(CRED());
2471 	if (error) {
2472 		if (zc->zc_perm_action == B_FALSE) {
2473 			error = dsl_deleg_can_allow(zc->zc_name,
2474 			    fsaclnv, CRED());
2475 		} else {
2476 			error = dsl_deleg_can_unallow(zc->zc_name,
2477 			    fsaclnv, CRED());
2478 		}
2479 	}
2480 
2481 	if (error == 0)
2482 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2483 
2484 	nvlist_free(fsaclnv);
2485 	return (error);
2486 }
2487 
2488 /*
2489  * inputs:
2490  * zc_name		name of filesystem
2491  *
2492  * outputs:
2493  * zc_nvlist_src{_size}	nvlist of delegated permissions
2494  */
2495 static int
2496 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2497 {
2498 	nvlist_t *nvp;
2499 	int error;
2500 
2501 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2502 		error = put_nvlist(zc, nvp);
2503 		nvlist_free(nvp);
2504 	}
2505 
2506 	return (error);
2507 }
2508 
2509 /*
2510  * Search the vfs list for a specified resource.  Returns a pointer to it
2511  * or NULL if no suitable entry is found. The caller of this routine
2512  * is responsible for releasing the returned vfs pointer.
2513  */
2514 static vfs_t *
2515 zfs_get_vfs(const char *resource)
2516 {
2517 	struct vfs *vfsp;
2518 	struct vfs *vfs_found = NULL;
2519 
2520 	vfs_list_read_lock();
2521 	vfsp = rootvfs;
2522 	do {
2523 		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
2524 			VFS_HOLD(vfsp);
2525 			vfs_found = vfsp;
2526 			break;
2527 		}
2528 		vfsp = vfsp->vfs_next;
2529 	} while (vfsp != rootvfs);
2530 	vfs_list_unlock();
2531 	return (vfs_found);
2532 }
2533 
2534 /* ARGSUSED */
2535 static void
2536 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2537 {
2538 	zfs_creat_t *zct = arg;
2539 
2540 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
2541 }
2542 
2543 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
2544 
2545 /*
2546  * inputs:
2547  * createprops		list of properties requested by creator
2548  * default_zplver	zpl version to use if unspecified in createprops
2549  * fuids_ok		fuids allowed in this version of the spa?
2550  * os			parent objset pointer (NULL if root fs)
2551  *
2552  * outputs:
2553  * zplprops	values for the zplprops we attach to the master node object
2554  * is_ci	true if requested file system will be purely case-insensitive
2555  *
2556  * Determine the settings for utf8only, normalization and
2557  * casesensitivity.  Specific values may have been requested by the
2558  * creator and/or we can inherit values from the parent dataset.  If
2559  * the file system is of too early a vintage, a creator can not
2560  * request settings for these properties, even if the requested
2561  * setting is the default value.  We don't actually want to create dsl
2562  * properties for these, so remove them from the source nvlist after
2563  * processing.
2564  */
2565 static int
2566 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
2567     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
2568     nvlist_t *zplprops, boolean_t *is_ci)
2569 {
2570 	uint64_t sense = ZFS_PROP_UNDEFINED;
2571 	uint64_t norm = ZFS_PROP_UNDEFINED;
2572 	uint64_t u8 = ZFS_PROP_UNDEFINED;
2573 
2574 	ASSERT(zplprops != NULL);
2575 
2576 	/*
2577 	 * Pull out creator prop choices, if any.
2578 	 */
2579 	if (createprops) {
2580 		(void) nvlist_lookup_uint64(createprops,
2581 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
2582 		(void) nvlist_lookup_uint64(createprops,
2583 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
2584 		(void) nvlist_remove_all(createprops,
2585 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
2586 		(void) nvlist_lookup_uint64(createprops,
2587 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
2588 		(void) nvlist_remove_all(createprops,
2589 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
2590 		(void) nvlist_lookup_uint64(createprops,
2591 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
2592 		(void) nvlist_remove_all(createprops,
2593 		    zfs_prop_to_name(ZFS_PROP_CASE));
2594 	}
2595 
2596 	/*
2597 	 * If the zpl version requested is whacky or the file system
2598 	 * or pool is version is too "young" to support normalization
2599 	 * and the creator tried to set a value for one of the props,
2600 	 * error out.
2601 	 */
2602 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
2603 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
2604 	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
2605 	    (zplver < ZPL_VERSION_NORMALIZATION &&
2606 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
2607 	    sense != ZFS_PROP_UNDEFINED)))
2608 		return (ENOTSUP);
2609 
2610 	/*
2611 	 * Put the version in the zplprops
2612 	 */
2613 	VERIFY(nvlist_add_uint64(zplprops,
2614 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
2615 
2616 	if (norm == ZFS_PROP_UNDEFINED)
2617 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
2618 	VERIFY(nvlist_add_uint64(zplprops,
2619 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
2620 
2621 	/*
2622 	 * If we're normalizing, names must always be valid UTF-8 strings.
2623 	 */
2624 	if (norm)
2625 		u8 = 1;
2626 	if (u8 == ZFS_PROP_UNDEFINED)
2627 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
2628 	VERIFY(nvlist_add_uint64(zplprops,
2629 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
2630 
2631 	if (sense == ZFS_PROP_UNDEFINED)
2632 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
2633 	VERIFY(nvlist_add_uint64(zplprops,
2634 	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
2635 
2636 	if (is_ci)
2637 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
2638 
2639 	return (0);
2640 }
2641 
2642 static int
2643 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
2644     nvlist_t *zplprops, boolean_t *is_ci)
2645 {
2646 	boolean_t fuids_ok, sa_ok;
2647 	uint64_t zplver = ZPL_VERSION;
2648 	objset_t *os = NULL;
2649 	char parentname[MAXNAMELEN];
2650 	char *cp;
2651 	spa_t *spa;
2652 	uint64_t spa_vers;
2653 	int error;
2654 
2655 	(void) strlcpy(parentname, dataset, sizeof (parentname));
2656 	cp = strrchr(parentname, '/');
2657 	ASSERT(cp != NULL);
2658 	cp[0] = '\0';
2659 
2660 	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
2661 		return (error);
2662 
2663 	spa_vers = spa_version(spa);
2664 	spa_close(spa, FTAG);
2665 
2666 	zplver = zfs_zpl_version_map(spa_vers);
2667 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
2668 	sa_ok = (zplver >= ZPL_VERSION_SA);
2669 
2670 	/*
2671 	 * Open parent object set so we can inherit zplprop values.
2672 	 */
2673 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
2674 		return (error);
2675 
2676 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
2677 	    zplprops, is_ci);
2678 	dmu_objset_rele(os, FTAG);
2679 	return (error);
2680 }
2681 
2682 static int
2683 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
2684     nvlist_t *zplprops, boolean_t *is_ci)
2685 {
2686 	boolean_t fuids_ok;
2687 	boolean_t sa_ok;
2688 	uint64_t zplver = ZPL_VERSION;
2689 	int error;
2690 
2691 	zplver = zfs_zpl_version_map(spa_vers);
2692 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
2693 	sa_ok = (zplver >= ZPL_VERSION_SA);
2694 
2695 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
2696 	    createprops, zplprops, is_ci);
2697 	return (error);
2698 }
2699 
2700 /*
2701  * inputs:
2702  * zc_objset_type	type of objset to create (fs vs zvol)
2703  * zc_name		name of new objset
2704  * zc_value		name of snapshot to clone from (may be empty)
2705  * zc_nvlist_src{_size}	nvlist of properties to apply
2706  *
2707  * outputs: none
2708  */
2709 static int
2710 zfs_ioc_create(zfs_cmd_t *zc)
2711 {
2712 	objset_t *clone;
2713 	int error = 0;
2714 	zfs_creat_t zct;
2715 	nvlist_t *nvprops = NULL;
2716 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
2717 	dmu_objset_type_t type = zc->zc_objset_type;
2718 
2719 	switch (type) {
2720 
2721 	case DMU_OST_ZFS:
2722 		cbfunc = zfs_create_cb;
2723 		break;
2724 
2725 	case DMU_OST_ZVOL:
2726 		cbfunc = zvol_create_cb;
2727 		break;
2728 
2729 	default:
2730 		cbfunc = NULL;
2731 		break;
2732 	}
2733 	if (strchr(zc->zc_name, '@') ||
2734 	    strchr(zc->zc_name, '%'))
2735 		return (EINVAL);
2736 
2737 	if (zc->zc_nvlist_src != NULL &&
2738 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2739 	    zc->zc_iflags, &nvprops)) != 0)
2740 		return (error);
2741 
2742 	zct.zct_zplprops = NULL;
2743 	zct.zct_props = nvprops;
2744 
2745 	if (zc->zc_value[0] != '\0') {
2746 		/*
2747 		 * We're creating a clone of an existing snapshot.
2748 		 */
2749 		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2750 		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
2751 			nvlist_free(nvprops);
2752 			return (EINVAL);
2753 		}
2754 
2755 		error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
2756 		if (error) {
2757 			nvlist_free(nvprops);
2758 			return (error);
2759 		}
2760 
2761 		error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
2762 		dmu_objset_rele(clone, FTAG);
2763 		if (error) {
2764 			nvlist_free(nvprops);
2765 			return (error);
2766 		}
2767 	} else {
2768 		boolean_t is_insensitive = B_FALSE;
2769 
2770 		if (cbfunc == NULL) {
2771 			nvlist_free(nvprops);
2772 			return (EINVAL);
2773 		}
2774 
2775 		if (type == DMU_OST_ZVOL) {
2776 			uint64_t volsize, volblocksize;
2777 
2778 			if (nvprops == NULL ||
2779 			    nvlist_lookup_uint64(nvprops,
2780 			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
2781 			    &volsize) != 0) {
2782 				nvlist_free(nvprops);
2783 				return (EINVAL);
2784 			}
2785 
2786 			if ((error = nvlist_lookup_uint64(nvprops,
2787 			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
2788 			    &volblocksize)) != 0 && error != ENOENT) {
2789 				nvlist_free(nvprops);
2790 				return (EINVAL);
2791 			}
2792 
2793 			if (error != 0)
2794 				volblocksize = zfs_prop_default_numeric(
2795 				    ZFS_PROP_VOLBLOCKSIZE);
2796 
2797 			if ((error = zvol_check_volblocksize(
2798 			    volblocksize)) != 0 ||
2799 			    (error = zvol_check_volsize(volsize,
2800 			    volblocksize)) != 0) {
2801 				nvlist_free(nvprops);
2802 				return (error);
2803 			}
2804 		} else if (type == DMU_OST_ZFS) {
2805 			int error;
2806 
2807 			/*
2808 			 * We have to have normalization and
2809 			 * case-folding flags correct when we do the
2810 			 * file system creation, so go figure them out
2811 			 * now.
2812 			 */
2813 			VERIFY(nvlist_alloc(&zct.zct_zplprops,
2814 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
2815 			error = zfs_fill_zplprops(zc->zc_name, nvprops,
2816 			    zct.zct_zplprops, &is_insensitive);
2817 			if (error != 0) {
2818 				nvlist_free(nvprops);
2819 				nvlist_free(zct.zct_zplprops);
2820 				return (error);
2821 			}
2822 		}
2823 		error = dmu_objset_create(zc->zc_name, type,
2824 		    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
2825 		nvlist_free(zct.zct_zplprops);
2826 	}
2827 
2828 	/*
2829 	 * It would be nice to do this atomically.
2830 	 */
2831 	if (error == 0) {
2832 		error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
2833 		    nvprops, NULL);
2834 		if (error != 0)
2835 			(void) dmu_objset_destroy(zc->zc_name, B_FALSE);
2836 	}
2837 	nvlist_free(nvprops);
2838 	return (error);
2839 }
2840 
2841 /*
2842  * inputs:
2843  * zc_name	name of filesystem
2844  * zc_value	short name of snapshot
2845  * zc_cookie	recursive flag
2846  * zc_nvlist_src[_size] property list
2847  *
2848  * outputs:
2849  * zc_value	short snapname (i.e. part after the '@')
2850  */
2851 static int
2852 zfs_ioc_snapshot(zfs_cmd_t *zc)
2853 {
2854 	nvlist_t *nvprops = NULL;
2855 	int error;
2856 	boolean_t recursive = zc->zc_cookie;
2857 
2858 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2859 		return (EINVAL);
2860 
2861 	if (zc->zc_nvlist_src != NULL &&
2862 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2863 	    zc->zc_iflags, &nvprops)) != 0)
2864 		return (error);
2865 
2866 	error = zfs_check_userprops(zc->zc_name, nvprops);
2867 	if (error)
2868 		goto out;
2869 
2870 	if (!nvlist_empty(nvprops) &&
2871 	    zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) {
2872 		error = ENOTSUP;
2873 		goto out;
2874 	}
2875 
2876 	error = dmu_objset_snapshot(zc->zc_name, zc->zc_value,
2877 	    nvprops, recursive);
2878 
2879 out:
2880 	nvlist_free(nvprops);
2881 	return (error);
2882 }
2883 
2884 int
2885 zfs_unmount_snap(const char *name, void *arg)
2886 {
2887 	vfs_t *vfsp = NULL;
2888 
2889 	if (arg) {
2890 		char *snapname = arg;
2891 		char *fullname = kmem_asprintf("%s@%s", name, snapname);
2892 		vfsp = zfs_get_vfs(fullname);
2893 		strfree(fullname);
2894 	} else if (strchr(name, '@')) {
2895 		vfsp = zfs_get_vfs(name);
2896 	}
2897 
2898 	if (vfsp) {
2899 		/*
2900 		 * Always force the unmount for snapshots.
2901 		 */
2902 		int flag = MS_FORCE;
2903 		int err;
2904 
2905 		if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
2906 			VFS_RELE(vfsp);
2907 			return (err);
2908 		}
2909 		VFS_RELE(vfsp);
2910 		if ((err = dounmount(vfsp, flag, kcred)) != 0)
2911 			return (err);
2912 	}
2913 	return (0);
2914 }
2915 
2916 /*
2917  * inputs:
2918  * zc_name		name of filesystem
2919  * zc_value		short name of snapshot
2920  * zc_defer_destroy	mark for deferred destroy
2921  *
2922  * outputs:	none
2923  */
2924 static int
2925 zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
2926 {
2927 	int err;
2928 
2929 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2930 		return (EINVAL);
2931 	err = dmu_objset_find(zc->zc_name,
2932 	    zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
2933 	if (err)
2934 		return (err);
2935 	return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value,
2936 	    zc->zc_defer_destroy));
2937 }
2938 
2939 /*
2940  * inputs:
2941  * zc_name		name of dataset to destroy
2942  * zc_objset_type	type of objset
2943  * zc_defer_destroy	mark for deferred destroy
2944  *
2945  * outputs:		none
2946  */
2947 static int
2948 zfs_ioc_destroy(zfs_cmd_t *zc)
2949 {
2950 	int err;
2951 	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
2952 		err = zfs_unmount_snap(zc->zc_name, NULL);
2953 		if (err)
2954 			return (err);
2955 	}
2956 
2957 	err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy);
2958 	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
2959 		(void) zvol_remove_minor(zc->zc_name);
2960 	return (err);
2961 }
2962 
2963 /*
2964  * inputs:
2965  * zc_name	name of dataset to rollback (to most recent snapshot)
2966  *
2967  * outputs:	none
2968  */
2969 static int
2970 zfs_ioc_rollback(zfs_cmd_t *zc)
2971 {
2972 	dsl_dataset_t *ds, *clone;
2973 	int error;
2974 	zfsvfs_t *zfsvfs;
2975 	char *clone_name;
2976 
2977 	error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
2978 	if (error)
2979 		return (error);
2980 
2981 	/* must not be a snapshot */
2982 	if (dsl_dataset_is_snapshot(ds)) {
2983 		dsl_dataset_rele(ds, FTAG);
2984 		return (EINVAL);
2985 	}
2986 
2987 	/* must have a most recent snapshot */
2988 	if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
2989 		dsl_dataset_rele(ds, FTAG);
2990 		return (EINVAL);
2991 	}
2992 
2993 	/*
2994 	 * Create clone of most recent snapshot.
2995 	 */
2996 	clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
2997 	error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
2998 	if (error)
2999 		goto out;
3000 
3001 	error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
3002 	if (error)
3003 		goto out;
3004 
3005 	/*
3006 	 * Do clone swap.
3007 	 */
3008 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
3009 		error = zfs_suspend_fs(zfsvfs);
3010 		if (error == 0) {
3011 			int resume_err;
3012 
3013 			if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3014 				error = dsl_dataset_clone_swap(clone, ds,
3015 				    B_TRUE);
3016 				dsl_dataset_disown(ds, FTAG);
3017 				ds = NULL;
3018 			} else {
3019 				error = EBUSY;
3020 			}
3021 			resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
3022 			error = error ? error : resume_err;
3023 		}
3024 		VFS_RELE(zfsvfs->z_vfs);
3025 	} else {
3026 		if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3027 			error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
3028 			dsl_dataset_disown(ds, FTAG);
3029 			ds = NULL;
3030 		} else {
3031 			error = EBUSY;
3032 		}
3033 	}
3034 
3035 	/*
3036 	 * Destroy clone (which also closes it).
3037 	 */
3038 	(void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
3039 
3040 out:
3041 	strfree(clone_name);
3042 	if (ds)
3043 		dsl_dataset_rele(ds, FTAG);
3044 	return (error);
3045 }
3046 
3047 /*
3048  * inputs:
3049  * zc_name	old name of dataset
3050  * zc_value	new name of dataset
3051  * zc_cookie	recursive flag (only valid for snapshots)
3052  *
3053  * outputs:	none
3054  */
3055 static int
3056 zfs_ioc_rename(zfs_cmd_t *zc)
3057 {
3058 	boolean_t recursive = zc->zc_cookie & 1;
3059 
3060 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3061 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3062 	    strchr(zc->zc_value, '%'))
3063 		return (EINVAL);
3064 
3065 	/*
3066 	 * Unmount snapshot unless we're doing a recursive rename,
3067 	 * in which case the dataset code figures out which snapshots
3068 	 * to unmount.
3069 	 */
3070 	if (!recursive && strchr(zc->zc_name, '@') != NULL &&
3071 	    zc->zc_objset_type == DMU_OST_ZFS) {
3072 		int err = zfs_unmount_snap(zc->zc_name, NULL);
3073 		if (err)
3074 			return (err);
3075 	}
3076 	if (zc->zc_objset_type == DMU_OST_ZVOL)
3077 		(void) zvol_remove_minor(zc->zc_name);
3078 	return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
3079 }
3080 
3081 static int
3082 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3083 {
3084 	const char *propname = nvpair_name(pair);
3085 	boolean_t issnap = (strchr(dsname, '@') != NULL);
3086 	zfs_prop_t prop = zfs_name_to_prop(propname);
3087 	uint64_t intval;
3088 	int err;
3089 
3090 	if (prop == ZPROP_INVAL) {
3091 		if (zfs_prop_user(propname)) {
3092 			if (err = zfs_secpolicy_write_perms(dsname,
3093 			    ZFS_DELEG_PERM_USERPROP, cr))
3094 				return (err);
3095 			return (0);
3096 		}
3097 
3098 		if (!issnap && zfs_prop_userquota(propname)) {
3099 			const char *perm = NULL;
3100 			const char *uq_prefix =
3101 			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3102 			const char *gq_prefix =
3103 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3104 
3105 			if (strncmp(propname, uq_prefix,
3106 			    strlen(uq_prefix)) == 0) {
3107 				perm = ZFS_DELEG_PERM_USERQUOTA;
3108 			} else if (strncmp(propname, gq_prefix,
3109 			    strlen(gq_prefix)) == 0) {
3110 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
3111 			} else {
3112 				/* USERUSED and GROUPUSED are read-only */
3113 				return (EINVAL);
3114 			}
3115 
3116 			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3117 				return (err);
3118 			return (0);
3119 		}
3120 
3121 		return (EINVAL);
3122 	}
3123 
3124 	if (issnap)
3125 		return (EINVAL);
3126 
3127 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3128 		/*
3129 		 * dsl_prop_get_all_impl() returns properties in this
3130 		 * format.
3131 		 */
3132 		nvlist_t *attrs;
3133 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3134 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3135 		    &pair) == 0);
3136 	}
3137 
3138 	/*
3139 	 * Check that this value is valid for this pool version
3140 	 */
3141 	switch (prop) {
3142 	case ZFS_PROP_COMPRESSION:
3143 		/*
3144 		 * If the user specified gzip compression, make sure
3145 		 * the SPA supports it. We ignore any errors here since
3146 		 * we'll catch them later.
3147 		 */
3148 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3149 		    nvpair_value_uint64(pair, &intval) == 0) {
3150 			if (intval >= ZIO_COMPRESS_GZIP_1 &&
3151 			    intval <= ZIO_COMPRESS_GZIP_9 &&
3152 			    zfs_earlier_version(dsname,
3153 			    SPA_VERSION_GZIP_COMPRESSION)) {
3154 				return (ENOTSUP);
3155 			}
3156 
3157 			if (intval == ZIO_COMPRESS_ZLE &&
3158 			    zfs_earlier_version(dsname,
3159 			    SPA_VERSION_ZLE_COMPRESSION))
3160 				return (ENOTSUP);
3161 
3162 			/*
3163 			 * If this is a bootable dataset then
3164 			 * verify that the compression algorithm
3165 			 * is supported for booting. We must return
3166 			 * something other than ENOTSUP since it
3167 			 * implies a downrev pool version.
3168 			 */
3169 			if (zfs_is_bootfs(dsname) &&
3170 			    !BOOTFS_COMPRESS_VALID(intval)) {
3171 				return (ERANGE);
3172 			}
3173 		}
3174 		break;
3175 
3176 	case ZFS_PROP_COPIES:
3177 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3178 			return (ENOTSUP);
3179 		break;
3180 
3181 	case ZFS_PROP_DEDUP:
3182 		if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3183 			return (ENOTSUP);
3184 		break;
3185 
3186 	case ZFS_PROP_SHARESMB:
3187 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3188 			return (ENOTSUP);
3189 		break;
3190 
3191 	case ZFS_PROP_ACLINHERIT:
3192 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3193 		    nvpair_value_uint64(pair, &intval) == 0) {
3194 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
3195 			    zfs_earlier_version(dsname,
3196 			    SPA_VERSION_PASSTHROUGH_X))
3197 				return (ENOTSUP);
3198 		}
3199 		break;
3200 	}
3201 
3202 	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3203 }
3204 
3205 /*
3206  * Removes properties from the given props list that fail permission checks
3207  * needed to clear them and to restore them in case of a receive error. For each
3208  * property, make sure we have both set and inherit permissions.
3209  *
3210  * Returns the first error encountered if any permission checks fail. If the
3211  * caller provides a non-NULL errlist, it also gives the complete list of names
3212  * of all the properties that failed a permission check along with the
3213  * corresponding error numbers. The caller is responsible for freeing the
3214  * returned errlist.
3215  *
3216  * If every property checks out successfully, zero is returned and the list
3217  * pointed at by errlist is NULL.
3218  */
3219 static int
3220 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
3221 {
3222 	zfs_cmd_t *zc;
3223 	nvpair_t *pair, *next_pair;
3224 	nvlist_t *errors;
3225 	int err, rv = 0;
3226 
3227 	if (props == NULL)
3228 		return (0);
3229 
3230 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3231 
3232 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
3233 	(void) strcpy(zc->zc_name, dataset);
3234 	pair = nvlist_next_nvpair(props, NULL);
3235 	while (pair != NULL) {
3236 		next_pair = nvlist_next_nvpair(props, pair);
3237 
3238 		(void) strcpy(zc->zc_value, nvpair_name(pair));
3239 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
3240 		    (err = zfs_secpolicy_inherit(zc, CRED())) != 0) {
3241 			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
3242 			VERIFY(nvlist_add_int32(errors,
3243 			    zc->zc_value, err) == 0);
3244 		}
3245 		pair = next_pair;
3246 	}
3247 	kmem_free(zc, sizeof (zfs_cmd_t));
3248 
3249 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
3250 		nvlist_free(errors);
3251 		errors = NULL;
3252 	} else {
3253 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
3254 	}
3255 
3256 	if (errlist == NULL)
3257 		nvlist_free(errors);
3258 	else
3259 		*errlist = errors;
3260 
3261 	return (rv);
3262 }
3263 
3264 static boolean_t
3265 propval_equals(nvpair_t *p1, nvpair_t *p2)
3266 {
3267 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
3268 		/* dsl_prop_get_all_impl() format */
3269 		nvlist_t *attrs;
3270 		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
3271 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3272 		    &p1) == 0);
3273 	}
3274 
3275 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
3276 		nvlist_t *attrs;
3277 		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
3278 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3279 		    &p2) == 0);
3280 	}
3281 
3282 	if (nvpair_type(p1) != nvpair_type(p2))
3283 		return (B_FALSE);
3284 
3285 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
3286 		char *valstr1, *valstr2;
3287 
3288 		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
3289 		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
3290 		return (strcmp(valstr1, valstr2) == 0);
3291 	} else {
3292 		uint64_t intval1, intval2;
3293 
3294 		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
3295 		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
3296 		return (intval1 == intval2);
3297 	}
3298 }
3299 
3300 /*
3301  * Remove properties from props if they are not going to change (as determined
3302  * by comparison with origprops). Remove them from origprops as well, since we
3303  * do not need to clear or restore properties that won't change.
3304  */
3305 static void
3306 props_reduce(nvlist_t *props, nvlist_t *origprops)
3307 {
3308 	nvpair_t *pair, *next_pair;
3309 
3310 	if (origprops == NULL)
3311 		return; /* all props need to be received */
3312 
3313 	pair = nvlist_next_nvpair(props, NULL);
3314 	while (pair != NULL) {
3315 		const char *propname = nvpair_name(pair);
3316 		nvpair_t *match;
3317 
3318 		next_pair = nvlist_next_nvpair(props, pair);
3319 
3320 		if ((nvlist_lookup_nvpair(origprops, propname,
3321 		    &match) != 0) || !propval_equals(pair, match))
3322 			goto next; /* need to set received value */
3323 
3324 		/* don't clear the existing received value */
3325 		(void) nvlist_remove_nvpair(origprops, match);
3326 		/* don't bother receiving the property */
3327 		(void) nvlist_remove_nvpair(props, pair);
3328 next:
3329 		pair = next_pair;
3330 	}
3331 }
3332 
3333 #ifdef	DEBUG
3334 static boolean_t zfs_ioc_recv_inject_err;
3335 #endif
3336 
3337 /*
3338  * inputs:
3339  * zc_name		name of containing filesystem
3340  * zc_nvlist_src{_size}	nvlist of properties to apply
3341  * zc_value		name of snapshot to create
3342  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
3343  * zc_cookie		file descriptor to recv from
3344  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
3345  * zc_guid		force flag
3346  * zc_cleanup_fd	cleanup-on-exit file descriptor
3347  * zc_action_handle	handle for this guid/ds mapping (or zero on first call)
3348  *
3349  * outputs:
3350  * zc_cookie		number of bytes read
3351  * zc_nvlist_dst{_size} error for each unapplied received property
3352  * zc_obj		zprop_errflags_t
3353  * zc_action_handle	handle for this guid/ds mapping
3354  */
3355 static int
3356 zfs_ioc_recv(zfs_cmd_t *zc)
3357 {
3358 	file_t *fp;
3359 	objset_t *os;
3360 	dmu_recv_cookie_t drc;
3361 	boolean_t force = (boolean_t)zc->zc_guid;
3362 	int fd;
3363 	int error = 0;
3364 	int props_error = 0;
3365 	nvlist_t *errors;
3366 	offset_t off;
3367 	nvlist_t *props = NULL; /* sent properties */
3368 	nvlist_t *origprops = NULL; /* existing properties */
3369 	objset_t *origin = NULL;
3370 	char *tosnap;
3371 	char tofs[ZFS_MAXNAMELEN];
3372 	boolean_t first_recvd_props = B_FALSE;
3373 
3374 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3375 	    strchr(zc->zc_value, '@') == NULL ||
3376 	    strchr(zc->zc_value, '%'))
3377 		return (EINVAL);
3378 
3379 	(void) strcpy(tofs, zc->zc_value);
3380 	tosnap = strchr(tofs, '@');
3381 	*tosnap++ = '\0';
3382 
3383 	if (zc->zc_nvlist_src != NULL &&
3384 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3385 	    zc->zc_iflags, &props)) != 0)
3386 		return (error);
3387 
3388 	fd = zc->zc_cookie;
3389 	fp = getf(fd);
3390 	if (fp == NULL) {
3391 		nvlist_free(props);
3392 		return (EBADF);
3393 	}
3394 
3395 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3396 
3397 	if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
3398 		if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) &&
3399 		    !dsl_prop_get_hasrecvd(os)) {
3400 			first_recvd_props = B_TRUE;
3401 		}
3402 
3403 		/*
3404 		 * If new received properties are supplied, they are to
3405 		 * completely replace the existing received properties, so stash
3406 		 * away the existing ones.
3407 		 */
3408 		if (dsl_prop_get_received(os, &origprops) == 0) {
3409 			nvlist_t *errlist = NULL;
3410 			/*
3411 			 * Don't bother writing a property if its value won't
3412 			 * change (and avoid the unnecessary security checks).
3413 			 *
3414 			 * The first receive after SPA_VERSION_RECVD_PROPS is a
3415 			 * special case where we blow away all local properties
3416 			 * regardless.
3417 			 */
3418 			if (!first_recvd_props)
3419 				props_reduce(props, origprops);
3420 			if (zfs_check_clearable(tofs, origprops,
3421 			    &errlist) != 0)
3422 				(void) nvlist_merge(errors, errlist, 0);
3423 			nvlist_free(errlist);
3424 		}
3425 
3426 		dmu_objset_rele(os, FTAG);
3427 	}
3428 
3429 	if (zc->zc_string[0]) {
3430 		error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
3431 		if (error)
3432 			goto out;
3433 	}
3434 
3435 	error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds,
3436 	    &zc->zc_begin_record, force, origin, &drc);
3437 	if (origin)
3438 		dmu_objset_rele(origin, FTAG);
3439 	if (error)
3440 		goto out;
3441 
3442 	/*
3443 	 * Set properties before we receive the stream so that they are applied
3444 	 * to the new data. Note that we must call dmu_recv_stream() if
3445 	 * dmu_recv_begin() succeeds.
3446 	 */
3447 	if (props) {
3448 		nvlist_t *errlist;
3449 
3450 		if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {
3451 			if (drc.drc_newfs) {
3452 				if (spa_version(os->os_spa) >=
3453 				    SPA_VERSION_RECVD_PROPS)
3454 					first_recvd_props = B_TRUE;
3455 			} else if (origprops != NULL) {
3456 				if (clear_received_props(os, tofs, origprops,
3457 				    first_recvd_props ? NULL : props) != 0)
3458 					zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3459 			} else {
3460 				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3461 			}
3462 			dsl_prop_set_hasrecvd(os);
3463 		} else if (!drc.drc_newfs) {
3464 			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3465 		}
3466 
3467 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
3468 		    props, &errlist);
3469 		(void) nvlist_merge(errors, errlist, 0);
3470 		nvlist_free(errlist);
3471 	}
3472 
3473 	if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) {
3474 		/*
3475 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
3476 		 * size or supplied an invalid address.
3477 		 */
3478 		props_error = EINVAL;
3479 	}
3480 
3481 	off = fp->f_offset;
3482 	error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
3483 	    &zc->zc_action_handle);
3484 
3485 	if (error == 0) {
3486 		zfsvfs_t *zfsvfs = NULL;
3487 
3488 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
3489 			/* online recv */
3490 			int end_err;
3491 
3492 			error = zfs_suspend_fs(zfsvfs);
3493 			/*
3494 			 * If the suspend fails, then the recv_end will
3495 			 * likely also fail, and clean up after itself.
3496 			 */
3497 			end_err = dmu_recv_end(&drc);
3498 			if (error == 0)
3499 				error = zfs_resume_fs(zfsvfs, tofs);
3500 			error = error ? error : end_err;
3501 			VFS_RELE(zfsvfs->z_vfs);
3502 		} else {
3503 			error = dmu_recv_end(&drc);
3504 		}
3505 	}
3506 
3507 	zc->zc_cookie = off - fp->f_offset;
3508 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3509 		fp->f_offset = off;
3510 
3511 #ifdef	DEBUG
3512 	if (zfs_ioc_recv_inject_err) {
3513 		zfs_ioc_recv_inject_err = B_FALSE;
3514 		error = 1;
3515 	}
3516 #endif
3517 	/*
3518 	 * On error, restore the original props.
3519 	 */
3520 	if (error && props) {
3521 		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
3522 			if (clear_received_props(os, tofs, props, NULL) != 0) {
3523 				/*
3524 				 * We failed to clear the received properties.
3525 				 * Since we may have left a $recvd value on the
3526 				 * system, we can't clear the $hasrecvd flag.
3527 				 */
3528 				zc->zc_obj |= ZPROP_ERR_NORESTORE;
3529 			} else if (first_recvd_props) {
3530 				dsl_prop_unset_hasrecvd(os);
3531 			}
3532 			dmu_objset_rele(os, FTAG);
3533 		} else if (!drc.drc_newfs) {
3534 			/* We failed to clear the received properties. */
3535 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3536 		}
3537 
3538 		if (origprops == NULL && !drc.drc_newfs) {
3539 			/* We failed to stash the original properties. */
3540 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3541 		}
3542 
3543 		/*
3544 		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
3545 		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
3546 		 * explictly if we're restoring local properties cleared in the
3547 		 * first new-style receive.
3548 		 */
3549 		if (origprops != NULL &&
3550 		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
3551 		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
3552 		    origprops, NULL) != 0) {
3553 			/*
3554 			 * We stashed the original properties but failed to
3555 			 * restore them.
3556 			 */
3557 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3558 		}
3559 	}
3560 out:
3561 	nvlist_free(props);
3562 	nvlist_free(origprops);
3563 	nvlist_free(errors);
3564 	releasef(fd);
3565 
3566 	if (error == 0)
3567 		error = props_error;
3568 
3569 	return (error);
3570 }
3571 
3572 /*
3573  * inputs:
3574  * zc_name	name of snapshot to send
3575  * zc_value	short name of incremental fromsnap (may be empty)
3576  * zc_cookie	file descriptor to send stream to
3577  * zc_obj	fromorigin flag (mutually exclusive with zc_value)
3578  *
3579  * outputs: none
3580  */
3581 static int
3582 zfs_ioc_send(zfs_cmd_t *zc)
3583 {
3584 	objset_t *fromsnap = NULL;
3585 	objset_t *tosnap;
3586 	file_t *fp;
3587 	int error;
3588 	offset_t off;
3589 
3590 	error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
3591 	if (error)
3592 		return (error);
3593 
3594 	if (zc->zc_value[0] != '\0') {
3595 		char *buf;
3596 		char *cp;
3597 
3598 		buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3599 		(void) strncpy(buf, zc->zc_name, MAXPATHLEN);
3600 		cp = strchr(buf, '@');
3601 		if (cp)
3602 			*(cp+1) = 0;
3603 		(void) strncat(buf, zc->zc_value, MAXPATHLEN);
3604 		error = dmu_objset_hold(buf, FTAG, &fromsnap);
3605 		kmem_free(buf, MAXPATHLEN);
3606 		if (error) {
3607 			dmu_objset_rele(tosnap, FTAG);
3608 			return (error);
3609 		}
3610 	}
3611 
3612 	fp = getf(zc->zc_cookie);
3613 	if (fp == NULL) {
3614 		dmu_objset_rele(tosnap, FTAG);
3615 		if (fromsnap)
3616 			dmu_objset_rele(fromsnap, FTAG);
3617 		return (EBADF);
3618 	}
3619 
3620 	off = fp->f_offset;
3621 	error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off);
3622 
3623 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3624 		fp->f_offset = off;
3625 	releasef(zc->zc_cookie);
3626 	if (fromsnap)
3627 		dmu_objset_rele(fromsnap, FTAG);
3628 	dmu_objset_rele(tosnap, FTAG);
3629 	return (error);
3630 }
3631 
3632 static int
3633 zfs_ioc_inject_fault(zfs_cmd_t *zc)
3634 {
3635 	int id, error;
3636 
3637 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
3638 	    &zc->zc_inject_record);
3639 
3640 	if (error == 0)
3641 		zc->zc_guid = (uint64_t)id;
3642 
3643 	return (error);
3644 }
3645 
3646 static int
3647 zfs_ioc_clear_fault(zfs_cmd_t *zc)
3648 {
3649 	return (zio_clear_fault((int)zc->zc_guid));
3650 }
3651 
3652 static int
3653 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
3654 {
3655 	int id = (int)zc->zc_guid;
3656 	int error;
3657 
3658 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
3659 	    &zc->zc_inject_record);
3660 
3661 	zc->zc_guid = id;
3662 
3663 	return (error);
3664 }
3665 
3666 static int
3667 zfs_ioc_error_log(zfs_cmd_t *zc)
3668 {
3669 	spa_t *spa;
3670 	int error;
3671 	size_t count = (size_t)zc->zc_nvlist_dst_size;
3672 
3673 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
3674 		return (error);
3675 
3676 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
3677 	    &count);
3678 	if (error == 0)
3679 		zc->zc_nvlist_dst_size = count;
3680 	else
3681 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
3682 
3683 	spa_close(spa, FTAG);
3684 
3685 	return (error);
3686 }
3687 
3688 static int
3689 zfs_ioc_clear(zfs_cmd_t *zc)
3690 {
3691 	spa_t *spa;
3692 	vdev_t *vd;
3693 	int error;
3694 
3695 	/*
3696 	 * On zpool clear we also fix up missing slogs
3697 	 */
3698 	mutex_enter(&spa_namespace_lock);
3699 	spa = spa_lookup(zc->zc_name);
3700 	if (spa == NULL) {
3701 		mutex_exit(&spa_namespace_lock);
3702 		return (EIO);
3703 	}
3704 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
3705 		/* we need to let spa_open/spa_load clear the chains */
3706 		spa_set_log_state(spa, SPA_LOG_CLEAR);
3707 	}
3708 	spa->spa_last_open_failed = 0;
3709 	mutex_exit(&spa_namespace_lock);
3710 
3711 	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
3712 		error = spa_open(zc->zc_name, &spa, FTAG);
3713 	} else {
3714 		nvlist_t *policy;
3715 		nvlist_t *config = NULL;
3716 
3717 		if (zc->zc_nvlist_src == NULL)
3718 			return (EINVAL);
3719 
3720 		if ((error = get_nvlist(zc->zc_nvlist_src,
3721 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
3722 			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
3723 			    policy, &config);
3724 			if (config != NULL) {
3725 				(void) put_nvlist(zc, config);
3726 				nvlist_free(config);
3727 			}
3728 			nvlist_free(policy);
3729 		}
3730 	}
3731 
3732 	if (error)
3733 		return (error);
3734 
3735 	spa_vdev_state_enter(spa, SCL_NONE);
3736 
3737 	if (zc->zc_guid == 0) {
3738 		vd = NULL;
3739 	} else {
3740 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
3741 		if (vd == NULL) {
3742 			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
3743 			spa_close(spa, FTAG);
3744 			return (ENODEV);
3745 		}
3746 	}
3747 
3748 	vdev_clear(spa, vd);
3749 
3750 	(void) spa_vdev_state_exit(spa, NULL, 0);
3751 
3752 	/*
3753 	 * Resume any suspended I/Os.
3754 	 */
3755 	if (zio_resume(spa) != 0)
3756 		error = EIO;
3757 
3758 	spa_close(spa, FTAG);
3759 
3760 	return (error);
3761 }
3762 
3763 /*
3764  * inputs:
3765  * zc_name	name of filesystem
3766  * zc_value	name of origin snapshot
3767  *
3768  * outputs:
3769  * zc_string	name of conflicting snapshot, if there is one
3770  */
3771 static int
3772 zfs_ioc_promote(zfs_cmd_t *zc)
3773 {
3774 	char *cp;
3775 
3776 	/*
3777 	 * We don't need to unmount *all* the origin fs's snapshots, but
3778 	 * it's easier.
3779 	 */
3780 	cp = strchr(zc->zc_value, '@');
3781 	if (cp)
3782 		*cp = '\0';
3783 	(void) dmu_objset_find(zc->zc_value,
3784 	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
3785 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
3786 }
3787 
3788 /*
3789  * Retrieve a single {user|group}{used|quota}@... property.
3790  *
3791  * inputs:
3792  * zc_name	name of filesystem
3793  * zc_objset_type zfs_userquota_prop_t
3794  * zc_value	domain name (eg. "S-1-234-567-89")
3795  * zc_guid	RID/UID/GID
3796  *
3797  * outputs:
3798  * zc_cookie	property value
3799  */
3800 static int
3801 zfs_ioc_userspace_one(zfs_cmd_t *zc)
3802 {
3803 	zfsvfs_t *zfsvfs;
3804 	int error;
3805 
3806 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
3807 		return (EINVAL);
3808 
3809 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs);
3810 	if (error)
3811 		return (error);
3812 
3813 	error = zfs_userspace_one(zfsvfs,
3814 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
3815 	zfsvfs_rele(zfsvfs, FTAG);
3816 
3817 	return (error);
3818 }
3819 
3820 /*
3821  * inputs:
3822  * zc_name		name of filesystem
3823  * zc_cookie		zap cursor
3824  * zc_objset_type	zfs_userquota_prop_t
3825  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
3826  *
3827  * outputs:
3828  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
3829  * zc_cookie	zap cursor
3830  */
3831 static int
3832 zfs_ioc_userspace_many(zfs_cmd_t *zc)
3833 {
3834 	zfsvfs_t *zfsvfs;
3835 	int bufsize = zc->zc_nvlist_dst_size;
3836 
3837 	if (bufsize <= 0)
3838 		return (ENOMEM);
3839 
3840 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs);
3841 	if (error)
3842 		return (error);
3843 
3844 	void *buf = kmem_alloc(bufsize, KM_SLEEP);
3845 
3846 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
3847 	    buf, &zc->zc_nvlist_dst_size);
3848 
3849 	if (error == 0) {
3850 		error = xcopyout(buf,
3851 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
3852 		    zc->zc_nvlist_dst_size);
3853 	}
3854 	kmem_free(buf, bufsize);
3855 	zfsvfs_rele(zfsvfs, FTAG);
3856 
3857 	return (error);
3858 }
3859 
3860 /*
3861  * inputs:
3862  * zc_name		name of filesystem
3863  *
3864  * outputs:
3865  * none
3866  */
3867 static int
3868 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
3869 {
3870 	objset_t *os;
3871 	int error = 0;
3872 	zfsvfs_t *zfsvfs;
3873 
3874 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
3875 		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
3876 			/*
3877 			 * If userused is not enabled, it may be because the
3878 			 * objset needs to be closed & reopened (to grow the
3879 			 * objset_phys_t).  Suspend/resume the fs will do that.
3880 			 */
3881 			error = zfs_suspend_fs(zfsvfs);
3882 			if (error == 0)
3883 				error = zfs_resume_fs(zfsvfs, zc->zc_name);
3884 		}
3885 		if (error == 0)
3886 			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
3887 		VFS_RELE(zfsvfs->z_vfs);
3888 	} else {
3889 		/* XXX kind of reading contents without owning */
3890 		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
3891 		if (error)
3892 			return (error);
3893 
3894 		error = dmu_objset_userspace_upgrade(os);
3895 		dmu_objset_rele(os, FTAG);
3896 	}
3897 
3898 	return (error);
3899 }
3900 
3901 /*
3902  * We don't want to have a hard dependency
3903  * against some special symbols in sharefs
3904  * nfs, and smbsrv.  Determine them if needed when
3905  * the first file system is shared.
3906  * Neither sharefs, nfs or smbsrv are unloadable modules.
3907  */
3908 int (*znfsexport_fs)(void *arg);
3909 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
3910 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
3911 
3912 int zfs_nfsshare_inited;
3913 int zfs_smbshare_inited;
3914 
3915 ddi_modhandle_t nfs_mod;
3916 ddi_modhandle_t sharefs_mod;
3917 ddi_modhandle_t smbsrv_mod;
3918 kmutex_t zfs_share_lock;
3919 
3920 static int
3921 zfs_init_sharefs()
3922 {
3923 	int error;
3924 
3925 	ASSERT(MUTEX_HELD(&zfs_share_lock));
3926 	/* Both NFS and SMB shares also require sharetab support. */
3927 	if (sharefs_mod == NULL && ((sharefs_mod =
3928 	    ddi_modopen("fs/sharefs",
3929 	    KRTLD_MODE_FIRST, &error)) == NULL)) {
3930 		return (ENOSYS);
3931 	}
3932 	if (zshare_fs == NULL && ((zshare_fs =
3933 	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
3934 	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
3935 		return (ENOSYS);
3936 	}
3937 	return (0);
3938 }
3939 
3940 static int
3941 zfs_ioc_share(zfs_cmd_t *zc)
3942 {
3943 	int error;
3944 	int opcode;
3945 
3946 	switch (zc->zc_share.z_sharetype) {
3947 	case ZFS_SHARE_NFS:
3948 	case ZFS_UNSHARE_NFS:
3949 		if (zfs_nfsshare_inited == 0) {
3950 			mutex_enter(&zfs_share_lock);
3951 			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
3952 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
3953 				mutex_exit(&zfs_share_lock);
3954 				return (ENOSYS);
3955 			}
3956 			if (znfsexport_fs == NULL &&
3957 			    ((znfsexport_fs = (int (*)(void *))
3958 			    ddi_modsym(nfs_mod,
3959 			    "nfs_export", &error)) == NULL)) {
3960 				mutex_exit(&zfs_share_lock);
3961 				return (ENOSYS);
3962 			}
3963 			error = zfs_init_sharefs();
3964 			if (error) {
3965 				mutex_exit(&zfs_share_lock);
3966 				return (ENOSYS);
3967 			}
3968 			zfs_nfsshare_inited = 1;
3969 			mutex_exit(&zfs_share_lock);
3970 		}
3971 		break;
3972 	case ZFS_SHARE_SMB:
3973 	case ZFS_UNSHARE_SMB:
3974 		if (zfs_smbshare_inited == 0) {
3975 			mutex_enter(&zfs_share_lock);
3976 			if (smbsrv_mod == NULL && ((smbsrv_mod =
3977 			    ddi_modopen("drv/smbsrv",
3978 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
3979 				mutex_exit(&zfs_share_lock);
3980 				return (ENOSYS);
3981 			}
3982 			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
3983 			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
3984 			    "smb_server_share", &error)) == NULL)) {
3985 				mutex_exit(&zfs_share_lock);
3986 				return (ENOSYS);
3987 			}
3988 			error = zfs_init_sharefs();
3989 			if (error) {
3990 				mutex_exit(&zfs_share_lock);
3991 				return (ENOSYS);
3992 			}
3993 			zfs_smbshare_inited = 1;
3994 			mutex_exit(&zfs_share_lock);
3995 		}
3996 		break;
3997 	default:
3998 		return (EINVAL);
3999 	}
4000 
4001 	switch (zc->zc_share.z_sharetype) {
4002 	case ZFS_SHARE_NFS:
4003 	case ZFS_UNSHARE_NFS:
4004 		if (error =
4005 		    znfsexport_fs((void *)
4006 		    (uintptr_t)zc->zc_share.z_exportdata))
4007 			return (error);
4008 		break;
4009 	case ZFS_SHARE_SMB:
4010 	case ZFS_UNSHARE_SMB:
4011 		if (error = zsmbexport_fs((void *)
4012 		    (uintptr_t)zc->zc_share.z_exportdata,
4013 		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
4014 		    B_TRUE: B_FALSE)) {
4015 			return (error);
4016 		}
4017 		break;
4018 	}
4019 
4020 	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
4021 	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
4022 	    SHAREFS_ADD : SHAREFS_REMOVE;
4023 
4024 	/*
4025 	 * Add or remove share from sharetab
4026 	 */
4027 	error = zshare_fs(opcode,
4028 	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
4029 	    zc->zc_share.z_sharemax);
4030 
4031 	return (error);
4032 
4033 }
4034 
4035 ace_t full_access[] = {
4036 	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
4037 };
4038 
4039 /*
4040  * Remove all ACL files in shares dir
4041  */
4042 static int
4043 zfs_smb_acl_purge(znode_t *dzp)
4044 {
4045 	zap_cursor_t	zc;
4046 	zap_attribute_t	zap;
4047 	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
4048 	int error;
4049 
4050 	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
4051 	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
4052 	    zap_cursor_advance(&zc)) {
4053 		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
4054 		    NULL, 0)) != 0)
4055 			break;
4056 	}
4057 	zap_cursor_fini(&zc);
4058 	return (error);
4059 }
4060 
4061 static int
4062 zfs_ioc_smb_acl(zfs_cmd_t *zc)
4063 {
4064 	vnode_t *vp;
4065 	znode_t *dzp;
4066 	vnode_t *resourcevp = NULL;
4067 	znode_t *sharedir;
4068 	zfsvfs_t *zfsvfs;
4069 	nvlist_t *nvlist;
4070 	char *src, *target;
4071 	vattr_t vattr;
4072 	vsecattr_t vsec;
4073 	int error = 0;
4074 
4075 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
4076 	    NO_FOLLOW, NULL, &vp)) != 0)
4077 		return (error);
4078 
4079 	/* Now make sure mntpnt and dataset are ZFS */
4080 
4081 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
4082 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
4083 	    zc->zc_name) != 0)) {
4084 		VN_RELE(vp);
4085 		return (EINVAL);
4086 	}
4087 
4088 	dzp = VTOZ(vp);
4089 	zfsvfs = dzp->z_zfsvfs;
4090 	ZFS_ENTER(zfsvfs);
4091 
4092 	/*
4093 	 * Create share dir if its missing.
4094 	 */
4095 	mutex_enter(&zfsvfs->z_lock);
4096 	if (zfsvfs->z_shares_dir == 0) {
4097 		dmu_tx_t *tx;
4098 
4099 		tx = dmu_tx_create(zfsvfs->z_os);
4100 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
4101 		    ZFS_SHARES_DIR);
4102 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
4103 		error = dmu_tx_assign(tx, TXG_WAIT);
4104 		if (error) {
4105 			dmu_tx_abort(tx);
4106 		} else {
4107 			error = zfs_create_share_dir(zfsvfs, tx);
4108 			dmu_tx_commit(tx);
4109 		}
4110 		if (error) {
4111 			mutex_exit(&zfsvfs->z_lock);
4112 			VN_RELE(vp);
4113 			ZFS_EXIT(zfsvfs);
4114 			return (error);
4115 		}
4116 	}
4117 	mutex_exit(&zfsvfs->z_lock);
4118 
4119 	ASSERT(zfsvfs->z_shares_dir);
4120 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
4121 		VN_RELE(vp);
4122 		ZFS_EXIT(zfsvfs);
4123 		return (error);
4124 	}
4125 
4126 	switch (zc->zc_cookie) {
4127 	case ZFS_SMB_ACL_ADD:
4128 		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
4129 		vattr.va_type = VREG;
4130 		vattr.va_mode = S_IFREG|0777;
4131 		vattr.va_uid = 0;
4132 		vattr.va_gid = 0;
4133 
4134 		vsec.vsa_mask = VSA_ACE;
4135 		vsec.vsa_aclentp = &full_access;
4136 		vsec.vsa_aclentsz = sizeof (full_access);
4137 		vsec.vsa_aclcnt = 1;
4138 
4139 		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
4140 		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
4141 		if (resourcevp)
4142 			VN_RELE(resourcevp);
4143 		break;
4144 
4145 	case ZFS_SMB_ACL_REMOVE:
4146 		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
4147 		    NULL, 0);
4148 		break;
4149 
4150 	case ZFS_SMB_ACL_RENAME:
4151 		if ((error = get_nvlist(zc->zc_nvlist_src,
4152 		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
4153 			VN_RELE(vp);
4154 			ZFS_EXIT(zfsvfs);
4155 			return (error);
4156 		}
4157 		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
4158 		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
4159 		    &target)) {
4160 			VN_RELE(vp);
4161 			VN_RELE(ZTOV(sharedir));
4162 			ZFS_EXIT(zfsvfs);
4163 			nvlist_free(nvlist);
4164 			return (error);
4165 		}
4166 		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
4167 		    kcred, NULL, 0);
4168 		nvlist_free(nvlist);
4169 		break;
4170 
4171 	case ZFS_SMB_ACL_PURGE:
4172 		error = zfs_smb_acl_purge(sharedir);
4173 		break;
4174 
4175 	default:
4176 		error = EINVAL;
4177 		break;
4178 	}
4179 
4180 	VN_RELE(vp);
4181 	VN_RELE(ZTOV(sharedir));
4182 
4183 	ZFS_EXIT(zfsvfs);
4184 
4185 	return (error);
4186 }
4187 
4188 /*
4189  * inputs:
4190  * zc_name		name of filesystem
4191  * zc_value		short name of snap
4192  * zc_string		user-supplied tag for this hold
4193  * zc_cookie		recursive flag
4194  * zc_temphold		set if hold is temporary
4195  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
4196  *
4197  * outputs:		none
4198  */
4199 static int
4200 zfs_ioc_hold(zfs_cmd_t *zc)
4201 {
4202 	boolean_t recursive = zc->zc_cookie;
4203 
4204 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4205 		return (EINVAL);
4206 
4207 	return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
4208 	    zc->zc_string, recursive, zc->zc_temphold, zc->zc_cleanup_fd));
4209 }
4210 
4211 /*
4212  * inputs:
4213  * zc_name	name of dataset from which we're releasing a user hold
4214  * zc_value	short name of snap
4215  * zc_string	user-supplied tag for this hold
4216  * zc_cookie	recursive flag
4217  *
4218  * outputs:	none
4219  */
4220 static int
4221 zfs_ioc_release(zfs_cmd_t *zc)
4222 {
4223 	boolean_t recursive = zc->zc_cookie;
4224 
4225 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4226 		return (EINVAL);
4227 
4228 	return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
4229 	    zc->zc_string, recursive));
4230 }
4231 
4232 /*
4233  * inputs:
4234  * zc_name		name of filesystem
4235  *
4236  * outputs:
4237  * zc_nvlist_src{_size}	nvlist of snapshot holds
4238  */
4239 static int
4240 zfs_ioc_get_holds(zfs_cmd_t *zc)
4241 {
4242 	nvlist_t *nvp;
4243 	int error;
4244 
4245 	if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
4246 		error = put_nvlist(zc, nvp);
4247 		nvlist_free(nvp);
4248 	}
4249 
4250 	return (error);
4251 }
4252 
4253 /*
4254  * pool create, destroy, and export don't log the history as part of
4255  * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
4256  * do the logging of those commands.
4257  */
4258 static zfs_ioc_vec_t zfs_ioc_vec[] = {
4259 	{ zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4260 	    B_FALSE },
4261 	{ zfs_ioc_pool_destroy,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4262 	    B_FALSE },
4263 	{ zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4264 	    B_FALSE },
4265 	{ zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4266 	    B_FALSE },
4267 	{ zfs_ioc_pool_configs,	zfs_secpolicy_none, NO_NAME, B_FALSE,
4268 	    B_FALSE },
4269 	{ zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4270 	    B_FALSE },
4271 	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
4272 	    B_FALSE },
4273 	{ zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4274 	    B_TRUE },
4275 	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
4276 	    B_FALSE },
4277 	{ zfs_ioc_pool_upgrade,	zfs_secpolicy_config, POOL_NAME, B_TRUE,
4278 	    B_TRUE },
4279 	{ zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4280 	    B_FALSE },
4281 	{ zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4282 	    B_TRUE },
4283 	{ zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4284 	    B_TRUE },
4285 	{ zfs_ioc_vdev_set_state, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
4286 	    B_FALSE },
4287 	{ zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4288 	    B_TRUE },
4289 	{ zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4290 	    B_TRUE },
4291 	{ zfs_ioc_vdev_setpath,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4292 	    B_TRUE },
4293 	{ zfs_ioc_vdev_setfru,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4294 	    B_TRUE },
4295 	{ zfs_ioc_objset_stats,	zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4296 	    B_TRUE },
4297 	{ zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4298 	    B_FALSE },
4299 	{ zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4300 	    B_TRUE },
4301 	{ zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4302 	    B_TRUE },
4303 	{ zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE, B_TRUE },
4304 	{ zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE, B_TRUE },
4305 	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE,
4306 	    B_TRUE},
4307 	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE,
4308 	    B_TRUE },
4309 	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE, B_TRUE },
4310 	{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, B_TRUE },
4311 	{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE, B_FALSE },
4312 	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE,
4313 	    B_FALSE },
4314 	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4315 	    B_FALSE },
4316 	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4317 	    B_FALSE },
4318 	{ zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE,
4319 	    B_FALSE },
4320 	{ zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, B_FALSE },
4321 	{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
4322 	    B_TRUE },
4323 	{ zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, DATASET_NAME,
4324 	    B_TRUE, B_TRUE },
4325 	{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
4326 	    B_TRUE },
4327 	{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4328 	    B_FALSE },
4329 	{ zfs_ioc_obj_to_path, zfs_secpolicy_config, DATASET_NAME, B_FALSE,
4330 	    B_TRUE },
4331 	{ zfs_ioc_pool_set_props, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
4332 	    B_TRUE },
4333 	{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4334 	    B_FALSE },
4335 	{ zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE,
4336 	    B_TRUE },
4337 	{ zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4338 	    B_FALSE },
4339 	{ zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE, B_FALSE },
4340 	{ zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
4341 	    B_TRUE },
4342 	{ zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
4343 	    B_FALSE },
4344 	{ zfs_ioc_userspace_one, zfs_secpolicy_userspace_one,
4345 	    DATASET_NAME, B_FALSE, B_FALSE },
4346 	{ zfs_ioc_userspace_many, zfs_secpolicy_userspace_many,
4347 	    DATASET_NAME, B_FALSE, B_FALSE },
4348 	{ zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
4349 	    DATASET_NAME, B_FALSE, B_TRUE },
4350 	{ zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, B_TRUE },
4351 	{ zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
4352 	    B_TRUE },
4353 	{ zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4354 	    B_TRUE },
4355 	{ zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4356 	    B_FALSE },
4357 	{ zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4358 	    B_TRUE }
4359 };
4360 
4361 int
4362 pool_status_check(const char *name, zfs_ioc_namecheck_t type)
4363 {
4364 	spa_t *spa;
4365 	int error;
4366 
4367 	ASSERT(type == POOL_NAME || type == DATASET_NAME);
4368 
4369 	error = spa_open(name, &spa, FTAG);
4370 	if (error == 0) {
4371 		if (spa_suspended(spa))
4372 			error = EAGAIN;
4373 		spa_close(spa, FTAG);
4374 	}
4375 	return (error);
4376 }
4377 
4378 /*
4379  * Find a free minor number.
4380  */
4381 minor_t
4382 zfsdev_minor_alloc(void)
4383 {
4384 	static minor_t last_minor;
4385 	minor_t m;
4386 
4387 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
4388 
4389 	for (m = last_minor + 1; m != last_minor; m++) {
4390 		if (m > ZFSDEV_MAX_MINOR)
4391 			m = 1;
4392 		if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
4393 			last_minor = m;
4394 			return (m);
4395 		}
4396 	}
4397 
4398 	return (0);
4399 }
4400 
4401 static int
4402 zfs_ctldev_init(dev_t *devp)
4403 {
4404 	minor_t minor;
4405 	zfs_soft_state_t *zs;
4406 
4407 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
4408 	ASSERT(getminor(*devp) == 0);
4409 
4410 	minor = zfsdev_minor_alloc();
4411 	if (minor == 0)
4412 		return (ENXIO);
4413 
4414 	if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
4415 		return (EAGAIN);
4416 
4417 	*devp = makedevice(getemajor(*devp), minor);
4418 
4419 	zs = ddi_get_soft_state(zfsdev_state, minor);
4420 	zs->zss_type = ZSST_CTLDEV;
4421 	zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
4422 
4423 	return (0);
4424 }
4425 
4426 static void
4427 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
4428 {
4429 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
4430 
4431 	zfs_onexit_destroy(zo);
4432 	ddi_soft_state_free(zfsdev_state, minor);
4433 }
4434 
4435 void *
4436 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
4437 {
4438 	zfs_soft_state_t *zp;
4439 
4440 	zp = ddi_get_soft_state(zfsdev_state, minor);
4441 	if (zp == NULL || zp->zss_type != which)
4442 		return (NULL);
4443 
4444 	return (zp->zss_data);
4445 }
4446 
4447 static int
4448 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
4449 {
4450 	int error = 0;
4451 
4452 	if (getminor(*devp) != 0)
4453 		return (zvol_open(devp, flag, otyp, cr));
4454 
4455 	/* This is the control device. Allocate a new minor if requested. */
4456 	if (flag & FEXCL) {
4457 		mutex_enter(&zfsdev_state_lock);
4458 		error = zfs_ctldev_init(devp);
4459 		mutex_exit(&zfsdev_state_lock);
4460 	}
4461 
4462 	return (error);
4463 }
4464 
4465 static int
4466 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
4467 {
4468 	zfs_onexit_t *zo;
4469 	minor_t minor = getminor(dev);
4470 
4471 	if (minor == 0)
4472 		return (0);
4473 
4474 	mutex_enter(&zfsdev_state_lock);
4475 	zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
4476 	if (zo == NULL) {
4477 		mutex_exit(&zfsdev_state_lock);
4478 		return (zvol_close(dev, flag, otyp, cr));
4479 	}
4480 	zfs_ctldev_destroy(zo, minor);
4481 	mutex_exit(&zfsdev_state_lock);
4482 
4483 	return (0);
4484 }
4485 
4486 static int
4487 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
4488 {
4489 	zfs_cmd_t *zc;
4490 	uint_t vec;
4491 	int error, rc;
4492 	minor_t minor = getminor(dev);
4493 
4494 	if (minor != 0 &&
4495 	    zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
4496 		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
4497 
4498 	vec = cmd - ZFS_IOC;
4499 	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
4500 
4501 	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
4502 		return (EINVAL);
4503 
4504 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
4505 
4506 	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
4507 	if (error != 0)
4508 		error = EFAULT;
4509 
4510 	if ((error == 0) && !(flag & FKIOCTL))
4511 		error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr);
4512 
4513 	/*
4514 	 * Ensure that all pool/dataset names are valid before we pass down to
4515 	 * the lower layers.
4516 	 */
4517 	if (error == 0) {
4518 		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4519 		zc->zc_iflags = flag & FKIOCTL;
4520 		switch (zfs_ioc_vec[vec].zvec_namecheck) {
4521 		case POOL_NAME:
4522 			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
4523 				error = EINVAL;
4524 			if (zfs_ioc_vec[vec].zvec_pool_check)
4525 				error = pool_status_check(zc->zc_name,
4526 				    zfs_ioc_vec[vec].zvec_namecheck);
4527 			break;
4528 
4529 		case DATASET_NAME:
4530 			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
4531 				error = EINVAL;
4532 			if (zfs_ioc_vec[vec].zvec_pool_check)
4533 				error = pool_status_check(zc->zc_name,
4534 				    zfs_ioc_vec[vec].zvec_namecheck);
4535 			break;
4536 
4537 		case NO_NAME:
4538 			break;
4539 		}
4540 	}
4541 
4542 	if (error == 0)
4543 		error = zfs_ioc_vec[vec].zvec_func(zc);
4544 
4545 	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
4546 	if (error == 0) {
4547 		if (rc != 0)
4548 			error = EFAULT;
4549 		if (zfs_ioc_vec[vec].zvec_his_log)
4550 			zfs_log_history(zc);
4551 	}
4552 
4553 	kmem_free(zc, sizeof (zfs_cmd_t));
4554 	return (error);
4555 }
4556 
4557 static int
4558 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4559 {
4560 	if (cmd != DDI_ATTACH)
4561 		return (DDI_FAILURE);
4562 
4563 	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
4564 	    DDI_PSEUDO, 0) == DDI_FAILURE)
4565 		return (DDI_FAILURE);
4566 
4567 	zfs_dip = dip;
4568 
4569 	ddi_report_dev(dip);
4570 
4571 	return (DDI_SUCCESS);
4572 }
4573 
4574 static int
4575 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4576 {
4577 	if (spa_busy() || zfs_busy() || zvol_busy())
4578 		return (DDI_FAILURE);
4579 
4580 	if (cmd != DDI_DETACH)
4581 		return (DDI_FAILURE);
4582 
4583 	zfs_dip = NULL;
4584 
4585 	ddi_prop_remove_all(dip);
4586 	ddi_remove_minor_node(dip, NULL);
4587 
4588 	return (DDI_SUCCESS);
4589 }
4590 
4591 /*ARGSUSED*/
4592 static int
4593 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
4594 {
4595 	switch (infocmd) {
4596 	case DDI_INFO_DEVT2DEVINFO:
4597 		*result = zfs_dip;
4598 		return (DDI_SUCCESS);
4599 
4600 	case DDI_INFO_DEVT2INSTANCE:
4601 		*result = (void *)0;
4602 		return (DDI_SUCCESS);
4603 	}
4604 
4605 	return (DDI_FAILURE);
4606 }
4607 
4608 /*
4609  * OK, so this is a little weird.
4610  *
4611  * /dev/zfs is the control node, i.e. minor 0.
4612  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
4613  *
4614  * /dev/zfs has basically nothing to do except serve up ioctls,
4615  * so most of the standard driver entry points are in zvol.c.
4616  */
4617 static struct cb_ops zfs_cb_ops = {
4618 	zfsdev_open,	/* open */
4619 	zfsdev_close,	/* close */
4620 	zvol_strategy,	/* strategy */
4621 	nodev,		/* print */
4622 	zvol_dump,	/* dump */
4623 	zvol_read,	/* read */
4624 	zvol_write,	/* write */
4625 	zfsdev_ioctl,	/* ioctl */
4626 	nodev,		/* devmap */
4627 	nodev,		/* mmap */
4628 	nodev,		/* segmap */
4629 	nochpoll,	/* poll */
4630 	ddi_prop_op,	/* prop_op */
4631 	NULL,		/* streamtab */
4632 	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
4633 	CB_REV,		/* version */
4634 	nodev,		/* async read */
4635 	nodev,		/* async write */
4636 };
4637 
4638 static struct dev_ops zfs_dev_ops = {
4639 	DEVO_REV,	/* version */
4640 	0,		/* refcnt */
4641 	zfs_info,	/* info */
4642 	nulldev,	/* identify */
4643 	nulldev,	/* probe */
4644 	zfs_attach,	/* attach */
4645 	zfs_detach,	/* detach */
4646 	nodev,		/* reset */
4647 	&zfs_cb_ops,	/* driver operations */
4648 	NULL,		/* no bus operations */
4649 	NULL,		/* power */
4650 	ddi_quiesce_not_needed,	/* quiesce */
4651 };
4652 
4653 static struct modldrv zfs_modldrv = {
4654 	&mod_driverops,
4655 	"ZFS storage pool",
4656 	&zfs_dev_ops
4657 };
4658 
4659 static struct modlinkage modlinkage = {
4660 	MODREV_1,
4661 	(void *)&zfs_modlfs,
4662 	(void *)&zfs_modldrv,
4663 	NULL
4664 };
4665 
4666 
4667 uint_t zfs_fsyncer_key;
4668 extern uint_t rrw_tsd_key;
4669 
4670 int
4671 _init(void)
4672 {
4673 	int error;
4674 
4675 	spa_init(FREAD | FWRITE);
4676 	zfs_init();
4677 	zvol_init();
4678 
4679 	if ((error = mod_install(&modlinkage)) != 0) {
4680 		zvol_fini();
4681 		zfs_fini();
4682 		spa_fini();
4683 		return (error);
4684 	}
4685 
4686 	tsd_create(&zfs_fsyncer_key, NULL);
4687 	tsd_create(&rrw_tsd_key, NULL);
4688 
4689 	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
4690 	ASSERT(error == 0);
4691 	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
4692 
4693 	return (0);
4694 }
4695 
4696 int
4697 _fini(void)
4698 {
4699 	int error;
4700 
4701 	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
4702 		return (EBUSY);
4703 
4704 	if ((error = mod_remove(&modlinkage)) != 0)
4705 		return (error);
4706 
4707 	zvol_fini();
4708 	zfs_fini();
4709 	spa_fini();
4710 	if (zfs_nfsshare_inited)
4711 		(void) ddi_modclose(nfs_mod);
4712 	if (zfs_smbshare_inited)
4713 		(void) ddi_modclose(smbsrv_mod);
4714 	if (zfs_nfsshare_inited || zfs_smbshare_inited)
4715 		(void) ddi_modclose(sharefs_mod);
4716 
4717 	tsd_destroy(&zfs_fsyncer_key);
4718 	ldi_ident_release(zfs_li);
4719 	zfs_li = NULL;
4720 	mutex_destroy(&zfs_share_lock);
4721 
4722 	return (error);
4723 }
4724 
4725 int
4726 _info(struct modinfo *modinfop)
4727 {
4728 	return (mod_info(&modlinkage, modinfop));
4729 }
4730