xref: /titanic_52/usr/src/uts/common/fs/zfs/zfs_ioctl.c (revision 3f9d6ad73e45c6823b409f93b0c8d4f62861d2d5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/types.h>
26 #include <sys/param.h>
27 #include <sys/errno.h>
28 #include <sys/uio.h>
29 #include <sys/buf.h>
30 #include <sys/modctl.h>
31 #include <sys/open.h>
32 #include <sys/file.h>
33 #include <sys/kmem.h>
34 #include <sys/conf.h>
35 #include <sys/cmn_err.h>
36 #include <sys/stat.h>
37 #include <sys/zfs_ioctl.h>
38 #include <sys/zfs_vfsops.h>
39 #include <sys/zfs_znode.h>
40 #include <sys/zap.h>
41 #include <sys/spa.h>
42 #include <sys/spa_impl.h>
43 #include <sys/vdev.h>
44 #include <sys/priv_impl.h>
45 #include <sys/dmu.h>
46 #include <sys/dsl_dir.h>
47 #include <sys/dsl_dataset.h>
48 #include <sys/dsl_prop.h>
49 #include <sys/dsl_deleg.h>
50 #include <sys/dmu_objset.h>
51 #include <sys/ddi.h>
52 #include <sys/sunddi.h>
53 #include <sys/sunldi.h>
54 #include <sys/policy.h>
55 #include <sys/zone.h>
56 #include <sys/nvpair.h>
57 #include <sys/pathname.h>
58 #include <sys/mount.h>
59 #include <sys/sdt.h>
60 #include <sys/fs/zfs.h>
61 #include <sys/zfs_ctldir.h>
62 #include <sys/zfs_dir.h>
63 #include <sys/zvol.h>
64 #include <sys/dsl_scan.h>
65 #include <sharefs/share.h>
66 #include <sys/dmu_objset.h>
67 
68 #include "zfs_namecheck.h"
69 #include "zfs_prop.h"
70 #include "zfs_deleg.h"
71 #include "zfs_comutil.h"
72 
73 extern struct modlfs zfs_modlfs;
74 
75 extern void zfs_init(void);
76 extern void zfs_fini(void);
77 
78 ldi_ident_t zfs_li = NULL;
79 dev_info_t *zfs_dip;
80 
81 typedef int zfs_ioc_func_t(zfs_cmd_t *);
82 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
83 
84 typedef enum {
85 	NO_NAME,
86 	POOL_NAME,
87 	DATASET_NAME
88 } zfs_ioc_namecheck_t;
89 
90 typedef struct zfs_ioc_vec {
91 	zfs_ioc_func_t		*zvec_func;
92 	zfs_secpolicy_func_t	*zvec_secpolicy;
93 	zfs_ioc_namecheck_t	zvec_namecheck;
94 	boolean_t		zvec_his_log;
95 	boolean_t		zvec_pool_check;
96 } zfs_ioc_vec_t;
97 
98 /* This array is indexed by zfs_userquota_prop_t */
99 static const char *userquota_perms[] = {
100 	ZFS_DELEG_PERM_USERUSED,
101 	ZFS_DELEG_PERM_USERQUOTA,
102 	ZFS_DELEG_PERM_GROUPUSED,
103 	ZFS_DELEG_PERM_GROUPQUOTA,
104 };
105 
106 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
107 static int zfs_check_settable(const char *name, nvpair_t *property,
108     cred_t *cr);
109 static int zfs_check_clearable(char *dataset, nvlist_t *props,
110     nvlist_t **errors);
111 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
112     boolean_t *);
113 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **);
114 
115 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
116 void
117 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
118 {
119 	const char *newfile;
120 	char buf[512];
121 	va_list adx;
122 
123 	/*
124 	 * Get rid of annoying "../common/" prefix to filename.
125 	 */
126 	newfile = strrchr(file, '/');
127 	if (newfile != NULL) {
128 		newfile = newfile + 1; /* Get rid of leading / */
129 	} else {
130 		newfile = file;
131 	}
132 
133 	va_start(adx, fmt);
134 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
135 	va_end(adx);
136 
137 	/*
138 	 * To get this data, use the zfs-dprintf probe as so:
139 	 * dtrace -q -n 'zfs-dprintf \
140 	 *	/stringof(arg0) == "dbuf.c"/ \
141 	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
142 	 * arg0 = file name
143 	 * arg1 = function name
144 	 * arg2 = line number
145 	 * arg3 = message
146 	 */
147 	DTRACE_PROBE4(zfs__dprintf,
148 	    char *, newfile, char *, func, int, line, char *, buf);
149 }
150 
151 static void
152 history_str_free(char *buf)
153 {
154 	kmem_free(buf, HIS_MAX_RECORD_LEN);
155 }
156 
157 static char *
158 history_str_get(zfs_cmd_t *zc)
159 {
160 	char *buf;
161 
162 	if (zc->zc_history == NULL)
163 		return (NULL);
164 
165 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
166 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
167 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
168 		history_str_free(buf);
169 		return (NULL);
170 	}
171 
172 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
173 
174 	return (buf);
175 }
176 
177 /*
178  * Check to see if the named dataset is currently defined as bootable
179  */
180 static boolean_t
181 zfs_is_bootfs(const char *name)
182 {
183 	objset_t *os;
184 
185 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
186 		boolean_t ret;
187 		ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
188 		dmu_objset_rele(os, FTAG);
189 		return (ret);
190 	}
191 	return (B_FALSE);
192 }
193 
194 /*
195  * zfs_earlier_version
196  *
197  *	Return non-zero if the spa version is less than requested version.
198  */
199 static int
200 zfs_earlier_version(const char *name, int version)
201 {
202 	spa_t *spa;
203 
204 	if (spa_open(name, &spa, FTAG) == 0) {
205 		if (spa_version(spa) < version) {
206 			spa_close(spa, FTAG);
207 			return (1);
208 		}
209 		spa_close(spa, FTAG);
210 	}
211 	return (0);
212 }
213 
214 /*
215  * zpl_earlier_version
216  *
217  * Return TRUE if the ZPL version is less than requested version.
218  */
219 static boolean_t
220 zpl_earlier_version(const char *name, int version)
221 {
222 	objset_t *os;
223 	boolean_t rc = B_TRUE;
224 
225 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
226 		uint64_t zplversion;
227 
228 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
229 			dmu_objset_rele(os, FTAG);
230 			return (B_TRUE);
231 		}
232 		/* XXX reading from non-owned objset */
233 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
234 			rc = zplversion < version;
235 		dmu_objset_rele(os, FTAG);
236 	}
237 	return (rc);
238 }
239 
240 static void
241 zfs_log_history(zfs_cmd_t *zc)
242 {
243 	spa_t *spa;
244 	char *buf;
245 
246 	if ((buf = history_str_get(zc)) == NULL)
247 		return;
248 
249 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
250 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
251 			(void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
252 		spa_close(spa, FTAG);
253 	}
254 	history_str_free(buf);
255 }
256 
257 /*
258  * Policy for top-level read operations (list pools).  Requires no privileges,
259  * and can be used in the local zone, as there is no associated dataset.
260  */
261 /* ARGSUSED */
262 static int
263 zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
264 {
265 	return (0);
266 }
267 
268 /*
269  * Policy for dataset read operations (list children, get statistics).  Requires
270  * no privileges, but must be visible in the local zone.
271  */
272 /* ARGSUSED */
273 static int
274 zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
275 {
276 	if (INGLOBALZONE(curproc) ||
277 	    zone_dataset_visible(zc->zc_name, NULL))
278 		return (0);
279 
280 	return (ENOENT);
281 }
282 
283 static int
284 zfs_dozonecheck(const char *dataset, cred_t *cr)
285 {
286 	uint64_t zoned;
287 	int writable = 1;
288 
289 	/*
290 	 * The dataset must be visible by this zone -- check this first
291 	 * so they don't see EPERM on something they shouldn't know about.
292 	 */
293 	if (!INGLOBALZONE(curproc) &&
294 	    !zone_dataset_visible(dataset, &writable))
295 		return (ENOENT);
296 
297 	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
298 		return (ENOENT);
299 
300 	if (INGLOBALZONE(curproc)) {
301 		/*
302 		 * If the fs is zoned, only root can access it from the
303 		 * global zone.
304 		 */
305 		if (secpolicy_zfs(cr) && zoned)
306 			return (EPERM);
307 	} else {
308 		/*
309 		 * If we are in a local zone, the 'zoned' property must be set.
310 		 */
311 		if (!zoned)
312 			return (EPERM);
313 
314 		/* must be writable by this zone */
315 		if (!writable)
316 			return (EPERM);
317 	}
318 	return (0);
319 }
320 
321 int
322 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
323 {
324 	int error;
325 
326 	error = zfs_dozonecheck(name, cr);
327 	if (error == 0) {
328 		error = secpolicy_zfs(cr);
329 		if (error)
330 			error = dsl_deleg_access(name, perm, cr);
331 	}
332 	return (error);
333 }
334 
335 /*
336  * Policy for setting the security label property.
337  *
338  * Returns 0 for success, non-zero for access and other errors.
339  */
340 static int
341 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
342 {
343 	char		ds_hexsl[MAXNAMELEN];
344 	bslabel_t	ds_sl, new_sl;
345 	boolean_t	new_default = FALSE;
346 	uint64_t	zoned;
347 	int		needed_priv = -1;
348 	int		error;
349 
350 	/* First get the existing dataset label. */
351 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
352 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
353 	if (error)
354 		return (EPERM);
355 
356 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
357 		new_default = TRUE;
358 
359 	/* The label must be translatable */
360 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
361 		return (EINVAL);
362 
363 	/*
364 	 * In a non-global zone, disallow attempts to set a label that
365 	 * doesn't match that of the zone; otherwise no other checks
366 	 * are needed.
367 	 */
368 	if (!INGLOBALZONE(curproc)) {
369 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
370 			return (EPERM);
371 		return (0);
372 	}
373 
374 	/*
375 	 * For global-zone datasets (i.e., those whose zoned property is
376 	 * "off", verify that the specified new label is valid for the
377 	 * global zone.
378 	 */
379 	if (dsl_prop_get_integer(name,
380 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
381 		return (EPERM);
382 	if (!zoned) {
383 		if (zfs_check_global_label(name, strval) != 0)
384 			return (EPERM);
385 	}
386 
387 	/*
388 	 * If the existing dataset label is nondefault, check if the
389 	 * dataset is mounted (label cannot be changed while mounted).
390 	 * Get the zfsvfs; if there isn't one, then the dataset isn't
391 	 * mounted (or isn't a dataset, doesn't exist, ...).
392 	 */
393 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
394 		objset_t *os;
395 		static char *setsl_tag = "setsl_tag";
396 
397 		/*
398 		 * Try to own the dataset; abort if there is any error,
399 		 * (e.g., already mounted, in use, or other error).
400 		 */
401 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
402 		    setsl_tag, &os);
403 		if (error)
404 			return (EPERM);
405 
406 		dmu_objset_disown(os, setsl_tag);
407 
408 		if (new_default) {
409 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
410 			goto out_check;
411 		}
412 
413 		if (hexstr_to_label(strval, &new_sl) != 0)
414 			return (EPERM);
415 
416 		if (blstrictdom(&ds_sl, &new_sl))
417 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
418 		else if (blstrictdom(&new_sl, &ds_sl))
419 			needed_priv = PRIV_FILE_UPGRADE_SL;
420 	} else {
421 		/* dataset currently has a default label */
422 		if (!new_default)
423 			needed_priv = PRIV_FILE_UPGRADE_SL;
424 	}
425 
426 out_check:
427 	if (needed_priv != -1)
428 		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
429 	return (0);
430 }
431 
432 static int
433 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
434     cred_t *cr)
435 {
436 	char *strval;
437 
438 	/*
439 	 * Check permissions for special properties.
440 	 */
441 	switch (prop) {
442 	case ZFS_PROP_ZONED:
443 		/*
444 		 * Disallow setting of 'zoned' from within a local zone.
445 		 */
446 		if (!INGLOBALZONE(curproc))
447 			return (EPERM);
448 		break;
449 
450 	case ZFS_PROP_QUOTA:
451 		if (!INGLOBALZONE(curproc)) {
452 			uint64_t zoned;
453 			char setpoint[MAXNAMELEN];
454 			/*
455 			 * Unprivileged users are allowed to modify the
456 			 * quota on things *under* (ie. contained by)
457 			 * the thing they own.
458 			 */
459 			if (dsl_prop_get_integer(dsname, "zoned", &zoned,
460 			    setpoint))
461 				return (EPERM);
462 			if (!zoned || strlen(dsname) <= strlen(setpoint))
463 				return (EPERM);
464 		}
465 		break;
466 
467 	case ZFS_PROP_MLSLABEL:
468 		if (!is_system_labeled())
469 			return (EPERM);
470 
471 		if (nvpair_value_string(propval, &strval) == 0) {
472 			int err;
473 
474 			err = zfs_set_slabel_policy(dsname, strval, CRED());
475 			if (err != 0)
476 				return (err);
477 		}
478 		break;
479 	}
480 
481 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
482 }
483 
484 int
485 zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
486 {
487 	int error;
488 
489 	error = zfs_dozonecheck(zc->zc_name, cr);
490 	if (error)
491 		return (error);
492 
493 	/*
494 	 * permission to set permissions will be evaluated later in
495 	 * dsl_deleg_can_allow()
496 	 */
497 	return (0);
498 }
499 
500 int
501 zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
502 {
503 	return (zfs_secpolicy_write_perms(zc->zc_name,
504 	    ZFS_DELEG_PERM_ROLLBACK, cr));
505 }
506 
507 int
508 zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
509 {
510 	return (zfs_secpolicy_write_perms(zc->zc_name,
511 	    ZFS_DELEG_PERM_SEND, cr));
512 }
513 
514 static int
515 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
516 {
517 	vnode_t *vp;
518 	int error;
519 
520 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
521 	    NO_FOLLOW, NULL, &vp)) != 0)
522 		return (error);
523 
524 	/* Now make sure mntpnt and dataset are ZFS */
525 
526 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
527 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
528 	    zc->zc_name) != 0)) {
529 		VN_RELE(vp);
530 		return (EPERM);
531 	}
532 
533 	VN_RELE(vp);
534 	return (dsl_deleg_access(zc->zc_name,
535 	    ZFS_DELEG_PERM_SHARE, cr));
536 }
537 
538 int
539 zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
540 {
541 	if (!INGLOBALZONE(curproc))
542 		return (EPERM);
543 
544 	if (secpolicy_nfs(cr) == 0) {
545 		return (0);
546 	} else {
547 		return (zfs_secpolicy_deleg_share(zc, cr));
548 	}
549 }
550 
551 int
552 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
553 {
554 	if (!INGLOBALZONE(curproc))
555 		return (EPERM);
556 
557 	if (secpolicy_smb(cr) == 0) {
558 		return (0);
559 	} else {
560 		return (zfs_secpolicy_deleg_share(zc, cr));
561 	}
562 }
563 
564 static int
565 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
566 {
567 	char *cp;
568 
569 	/*
570 	 * Remove the @bla or /bla from the end of the name to get the parent.
571 	 */
572 	(void) strncpy(parent, datasetname, parentsize);
573 	cp = strrchr(parent, '@');
574 	if (cp != NULL) {
575 		cp[0] = '\0';
576 	} else {
577 		cp = strrchr(parent, '/');
578 		if (cp == NULL)
579 			return (ENOENT);
580 		cp[0] = '\0';
581 	}
582 
583 	return (0);
584 }
585 
586 int
587 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
588 {
589 	int error;
590 
591 	if ((error = zfs_secpolicy_write_perms(name,
592 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
593 		return (error);
594 
595 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
596 }
597 
598 static int
599 zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
600 {
601 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
602 }
603 
604 /*
605  * Destroying snapshots with delegated permissions requires
606  * descendent mount and destroy permissions.
607  * Reassemble the full filesystem@snap name so dsl_deleg_access()
608  * can do the correct permission check.
609  *
610  * Since this routine is used when doing a recursive destroy of snapshots
611  * and destroying snapshots requires descendent permissions, a successfull
612  * check of the top level snapshot applies to snapshots of all descendent
613  * datasets as well.
614  */
615 static int
616 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr)
617 {
618 	int error;
619 	char *dsname;
620 
621 	dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value);
622 
623 	error = zfs_secpolicy_destroy_perms(dsname, cr);
624 
625 	strfree(dsname);
626 	return (error);
627 }
628 
629 int
630 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
631 {
632 	char	parentname[MAXNAMELEN];
633 	int	error;
634 
635 	if ((error = zfs_secpolicy_write_perms(from,
636 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
637 		return (error);
638 
639 	if ((error = zfs_secpolicy_write_perms(from,
640 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
641 		return (error);
642 
643 	if ((error = zfs_get_parent(to, parentname,
644 	    sizeof (parentname))) != 0)
645 		return (error);
646 
647 	if ((error = zfs_secpolicy_write_perms(parentname,
648 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
649 		return (error);
650 
651 	if ((error = zfs_secpolicy_write_perms(parentname,
652 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
653 		return (error);
654 
655 	return (error);
656 }
657 
658 static int
659 zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
660 {
661 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
662 }
663 
664 static int
665 zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
666 {
667 	char	parentname[MAXNAMELEN];
668 	objset_t *clone;
669 	int error;
670 
671 	error = zfs_secpolicy_write_perms(zc->zc_name,
672 	    ZFS_DELEG_PERM_PROMOTE, cr);
673 	if (error)
674 		return (error);
675 
676 	error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
677 
678 	if (error == 0) {
679 		dsl_dataset_t *pclone = NULL;
680 		dsl_dir_t *dd;
681 		dd = clone->os_dsl_dataset->ds_dir;
682 
683 		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
684 		error = dsl_dataset_hold_obj(dd->dd_pool,
685 		    dd->dd_phys->dd_origin_obj, FTAG, &pclone);
686 		rw_exit(&dd->dd_pool->dp_config_rwlock);
687 		if (error) {
688 			dmu_objset_rele(clone, FTAG);
689 			return (error);
690 		}
691 
692 		error = zfs_secpolicy_write_perms(zc->zc_name,
693 		    ZFS_DELEG_PERM_MOUNT, cr);
694 
695 		dsl_dataset_name(pclone, parentname);
696 		dmu_objset_rele(clone, FTAG);
697 		dsl_dataset_rele(pclone, FTAG);
698 		if (error == 0)
699 			error = zfs_secpolicy_write_perms(parentname,
700 			    ZFS_DELEG_PERM_PROMOTE, cr);
701 	}
702 	return (error);
703 }
704 
705 static int
706 zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
707 {
708 	int error;
709 
710 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
711 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
712 		return (error);
713 
714 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
715 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
716 		return (error);
717 
718 	return (zfs_secpolicy_write_perms(zc->zc_name,
719 	    ZFS_DELEG_PERM_CREATE, cr));
720 }
721 
722 int
723 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
724 {
725 	return (zfs_secpolicy_write_perms(name,
726 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
727 }
728 
729 static int
730 zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
731 {
732 
733 	return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
734 }
735 
736 static int
737 zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
738 {
739 	char	parentname[MAXNAMELEN];
740 	int	error;
741 
742 	if ((error = zfs_get_parent(zc->zc_name, parentname,
743 	    sizeof (parentname))) != 0)
744 		return (error);
745 
746 	if (zc->zc_value[0] != '\0') {
747 		if ((error = zfs_secpolicy_write_perms(zc->zc_value,
748 		    ZFS_DELEG_PERM_CLONE, cr)) != 0)
749 			return (error);
750 	}
751 
752 	if ((error = zfs_secpolicy_write_perms(parentname,
753 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
754 		return (error);
755 
756 	error = zfs_secpolicy_write_perms(parentname,
757 	    ZFS_DELEG_PERM_MOUNT, cr);
758 
759 	return (error);
760 }
761 
762 static int
763 zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
764 {
765 	int error;
766 
767 	error = secpolicy_fs_unmount(cr, NULL);
768 	if (error) {
769 		error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
770 	}
771 	return (error);
772 }
773 
774 /*
775  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
776  * SYS_CONFIG privilege, which is not available in a local zone.
777  */
778 /* ARGSUSED */
779 static int
780 zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
781 {
782 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
783 		return (EPERM);
784 
785 	return (0);
786 }
787 
788 /*
789  * Policy for fault injection.  Requires all privileges.
790  */
791 /* ARGSUSED */
792 static int
793 zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
794 {
795 	return (secpolicy_zinject(cr));
796 }
797 
798 static int
799 zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
800 {
801 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
802 
803 	if (prop == ZPROP_INVAL) {
804 		if (!zfs_prop_user(zc->zc_value))
805 			return (EINVAL);
806 		return (zfs_secpolicy_write_perms(zc->zc_name,
807 		    ZFS_DELEG_PERM_USERPROP, cr));
808 	} else {
809 		return (zfs_secpolicy_setprop(zc->zc_name, prop,
810 		    NULL, cr));
811 	}
812 }
813 
814 static int
815 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
816 {
817 	int err = zfs_secpolicy_read(zc, cr);
818 	if (err)
819 		return (err);
820 
821 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
822 		return (EINVAL);
823 
824 	if (zc->zc_value[0] == 0) {
825 		/*
826 		 * They are asking about a posix uid/gid.  If it's
827 		 * themself, allow it.
828 		 */
829 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
830 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
831 			if (zc->zc_guid == crgetuid(cr))
832 				return (0);
833 		} else {
834 			if (groupmember(zc->zc_guid, cr))
835 				return (0);
836 		}
837 	}
838 
839 	return (zfs_secpolicy_write_perms(zc->zc_name,
840 	    userquota_perms[zc->zc_objset_type], cr));
841 }
842 
843 static int
844 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
845 {
846 	int err = zfs_secpolicy_read(zc, cr);
847 	if (err)
848 		return (err);
849 
850 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
851 		return (EINVAL);
852 
853 	return (zfs_secpolicy_write_perms(zc->zc_name,
854 	    userquota_perms[zc->zc_objset_type], cr));
855 }
856 
857 static int
858 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
859 {
860 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
861 	    NULL, cr));
862 }
863 
864 static int
865 zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
866 {
867 	return (zfs_secpolicy_write_perms(zc->zc_name,
868 	    ZFS_DELEG_PERM_HOLD, cr));
869 }
870 
871 static int
872 zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
873 {
874 	return (zfs_secpolicy_write_perms(zc->zc_name,
875 	    ZFS_DELEG_PERM_RELEASE, cr));
876 }
877 
878 /*
879  * Returns the nvlist as specified by the user in the zfs_cmd_t.
880  */
881 static int
882 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
883 {
884 	char *packed;
885 	int error;
886 	nvlist_t *list = NULL;
887 
888 	/*
889 	 * Read in and unpack the user-supplied nvlist.
890 	 */
891 	if (size == 0)
892 		return (EINVAL);
893 
894 	packed = kmem_alloc(size, KM_SLEEP);
895 
896 	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
897 	    iflag)) != 0) {
898 		kmem_free(packed, size);
899 		return (error);
900 	}
901 
902 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
903 		kmem_free(packed, size);
904 		return (error);
905 	}
906 
907 	kmem_free(packed, size);
908 
909 	*nvp = list;
910 	return (0);
911 }
912 
913 static int
914 fit_error_list(zfs_cmd_t *zc, nvlist_t **errors)
915 {
916 	size_t size;
917 
918 	VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
919 
920 	if (size > zc->zc_nvlist_dst_size) {
921 		nvpair_t *more_errors;
922 		int n = 0;
923 
924 		if (zc->zc_nvlist_dst_size < 1024)
925 			return (ENOMEM);
926 
927 		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0);
928 		more_errors = nvlist_prev_nvpair(*errors, NULL);
929 
930 		do {
931 			nvpair_t *pair = nvlist_prev_nvpair(*errors,
932 			    more_errors);
933 			VERIFY(nvlist_remove_nvpair(*errors, pair) == 0);
934 			n++;
935 			VERIFY(nvlist_size(*errors, &size,
936 			    NV_ENCODE_NATIVE) == 0);
937 		} while (size > zc->zc_nvlist_dst_size);
938 
939 		VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0);
940 		VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0);
941 		ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
942 		ASSERT(size <= zc->zc_nvlist_dst_size);
943 	}
944 
945 	return (0);
946 }
947 
948 static int
949 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
950 {
951 	char *packed = NULL;
952 	int error = 0;
953 	size_t size;
954 
955 	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
956 
957 	if (size > zc->zc_nvlist_dst_size) {
958 		error = ENOMEM;
959 	} else {
960 		packed = kmem_alloc(size, KM_SLEEP);
961 		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
962 		    KM_SLEEP) == 0);
963 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
964 		    size, zc->zc_iflags) != 0)
965 			error = EFAULT;
966 		kmem_free(packed, size);
967 	}
968 
969 	zc->zc_nvlist_dst_size = size;
970 	return (error);
971 }
972 
973 static int
974 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
975 {
976 	objset_t *os;
977 	int error;
978 
979 	error = dmu_objset_hold(dsname, FTAG, &os);
980 	if (error)
981 		return (error);
982 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
983 		dmu_objset_rele(os, FTAG);
984 		return (EINVAL);
985 	}
986 
987 	mutex_enter(&os->os_user_ptr_lock);
988 	*zfvp = dmu_objset_get_user(os);
989 	if (*zfvp) {
990 		VFS_HOLD((*zfvp)->z_vfs);
991 	} else {
992 		error = ESRCH;
993 	}
994 	mutex_exit(&os->os_user_ptr_lock);
995 	dmu_objset_rele(os, FTAG);
996 	return (error);
997 }
998 
999 /*
1000  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1001  * case its z_vfs will be NULL, and it will be opened as the owner.
1002  */
1003 static int
1004 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp)
1005 {
1006 	int error = 0;
1007 
1008 	if (getzfsvfs(name, zfvp) != 0)
1009 		error = zfsvfs_create(name, zfvp);
1010 	if (error == 0) {
1011 		rrw_enter(&(*zfvp)->z_teardown_lock, RW_READER, tag);
1012 		if ((*zfvp)->z_unmounted) {
1013 			/*
1014 			 * XXX we could probably try again, since the unmounting
1015 			 * thread should be just about to disassociate the
1016 			 * objset from the zfsvfs.
1017 			 */
1018 			rrw_exit(&(*zfvp)->z_teardown_lock, tag);
1019 			return (EBUSY);
1020 		}
1021 	}
1022 	return (error);
1023 }
1024 
1025 static void
1026 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1027 {
1028 	rrw_exit(&zfsvfs->z_teardown_lock, tag);
1029 
1030 	if (zfsvfs->z_vfs) {
1031 		VFS_RELE(zfsvfs->z_vfs);
1032 	} else {
1033 		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1034 		zfsvfs_free(zfsvfs);
1035 	}
1036 }
1037 
1038 static int
1039 zfs_ioc_pool_create(zfs_cmd_t *zc)
1040 {
1041 	int error;
1042 	nvlist_t *config, *props = NULL;
1043 	nvlist_t *rootprops = NULL;
1044 	nvlist_t *zplprops = NULL;
1045 	char *buf;
1046 
1047 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1048 	    zc->zc_iflags, &config))
1049 		return (error);
1050 
1051 	if (zc->zc_nvlist_src_size != 0 && (error =
1052 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1053 	    zc->zc_iflags, &props))) {
1054 		nvlist_free(config);
1055 		return (error);
1056 	}
1057 
1058 	if (props) {
1059 		nvlist_t *nvl = NULL;
1060 		uint64_t version = SPA_VERSION;
1061 
1062 		(void) nvlist_lookup_uint64(props,
1063 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1064 		if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) {
1065 			error = EINVAL;
1066 			goto pool_props_bad;
1067 		}
1068 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1069 		if (nvl) {
1070 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1071 			if (error != 0) {
1072 				nvlist_free(config);
1073 				nvlist_free(props);
1074 				return (error);
1075 			}
1076 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1077 		}
1078 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1079 		error = zfs_fill_zplprops_root(version, rootprops,
1080 		    zplprops, NULL);
1081 		if (error)
1082 			goto pool_props_bad;
1083 	}
1084 
1085 	buf = history_str_get(zc);
1086 
1087 	error = spa_create(zc->zc_name, config, props, buf, zplprops);
1088 
1089 	/*
1090 	 * Set the remaining root properties
1091 	 */
1092 	if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1093 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1094 		(void) spa_destroy(zc->zc_name);
1095 
1096 	if (buf != NULL)
1097 		history_str_free(buf);
1098 
1099 pool_props_bad:
1100 	nvlist_free(rootprops);
1101 	nvlist_free(zplprops);
1102 	nvlist_free(config);
1103 	nvlist_free(props);
1104 
1105 	return (error);
1106 }
1107 
1108 static int
1109 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1110 {
1111 	int error;
1112 	zfs_log_history(zc);
1113 	error = spa_destroy(zc->zc_name);
1114 	if (error == 0)
1115 		zvol_remove_minors(zc->zc_name);
1116 	return (error);
1117 }
1118 
1119 static int
1120 zfs_ioc_pool_import(zfs_cmd_t *zc)
1121 {
1122 	nvlist_t *config, *props = NULL;
1123 	uint64_t guid;
1124 	int error;
1125 
1126 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1127 	    zc->zc_iflags, &config)) != 0)
1128 		return (error);
1129 
1130 	if (zc->zc_nvlist_src_size != 0 && (error =
1131 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1132 	    zc->zc_iflags, &props))) {
1133 		nvlist_free(config);
1134 		return (error);
1135 	}
1136 
1137 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1138 	    guid != zc->zc_guid)
1139 		error = EINVAL;
1140 	else if (zc->zc_cookie)
1141 		error = spa_import_verbatim(zc->zc_name, config, props);
1142 	else
1143 		error = spa_import(zc->zc_name, config, props);
1144 
1145 	if (zc->zc_nvlist_dst != 0)
1146 		(void) put_nvlist(zc, config);
1147 
1148 	nvlist_free(config);
1149 
1150 	if (props)
1151 		nvlist_free(props);
1152 
1153 	return (error);
1154 }
1155 
1156 static int
1157 zfs_ioc_pool_export(zfs_cmd_t *zc)
1158 {
1159 	int error;
1160 	boolean_t force = (boolean_t)zc->zc_cookie;
1161 	boolean_t hardforce = (boolean_t)zc->zc_guid;
1162 
1163 	zfs_log_history(zc);
1164 	error = spa_export(zc->zc_name, NULL, force, hardforce);
1165 	if (error == 0)
1166 		zvol_remove_minors(zc->zc_name);
1167 	return (error);
1168 }
1169 
1170 static int
1171 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1172 {
1173 	nvlist_t *configs;
1174 	int error;
1175 
1176 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1177 		return (EEXIST);
1178 
1179 	error = put_nvlist(zc, configs);
1180 
1181 	nvlist_free(configs);
1182 
1183 	return (error);
1184 }
1185 
1186 static int
1187 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1188 {
1189 	nvlist_t *config;
1190 	int error;
1191 	int ret = 0;
1192 
1193 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1194 	    sizeof (zc->zc_value));
1195 
1196 	if (config != NULL) {
1197 		ret = put_nvlist(zc, config);
1198 		nvlist_free(config);
1199 
1200 		/*
1201 		 * The config may be present even if 'error' is non-zero.
1202 		 * In this case we return success, and preserve the real errno
1203 		 * in 'zc_cookie'.
1204 		 */
1205 		zc->zc_cookie = error;
1206 	} else {
1207 		ret = error;
1208 	}
1209 
1210 	return (ret);
1211 }
1212 
1213 /*
1214  * Try to import the given pool, returning pool stats as appropriate so that
1215  * user land knows which devices are available and overall pool health.
1216  */
1217 static int
1218 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1219 {
1220 	nvlist_t *tryconfig, *config;
1221 	int error;
1222 
1223 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1224 	    zc->zc_iflags, &tryconfig)) != 0)
1225 		return (error);
1226 
1227 	config = spa_tryimport(tryconfig);
1228 
1229 	nvlist_free(tryconfig);
1230 
1231 	if (config == NULL)
1232 		return (EINVAL);
1233 
1234 	error = put_nvlist(zc, config);
1235 	nvlist_free(config);
1236 
1237 	return (error);
1238 }
1239 
1240 /*
1241  * inputs:
1242  * zc_name              name of the pool
1243  * zc_cookie            scan func (pool_scan_func_t)
1244  */
1245 static int
1246 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1247 {
1248 	spa_t *spa;
1249 	int error;
1250 
1251 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1252 		return (error);
1253 
1254 	if (zc->zc_cookie == POOL_SCAN_NONE)
1255 		error = spa_scan_stop(spa);
1256 	else
1257 		error = spa_scan(spa, zc->zc_cookie);
1258 
1259 	spa_close(spa, FTAG);
1260 
1261 	return (error);
1262 }
1263 
1264 static int
1265 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1266 {
1267 	spa_t *spa;
1268 	int error;
1269 
1270 	error = spa_open(zc->zc_name, &spa, FTAG);
1271 	if (error == 0) {
1272 		spa_freeze(spa);
1273 		spa_close(spa, FTAG);
1274 	}
1275 	return (error);
1276 }
1277 
1278 static int
1279 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1280 {
1281 	spa_t *spa;
1282 	int error;
1283 
1284 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1285 		return (error);
1286 
1287 	if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
1288 		spa_close(spa, FTAG);
1289 		return (EINVAL);
1290 	}
1291 
1292 	spa_upgrade(spa, zc->zc_cookie);
1293 	spa_close(spa, FTAG);
1294 
1295 	return (error);
1296 }
1297 
1298 static int
1299 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1300 {
1301 	spa_t *spa;
1302 	char *hist_buf;
1303 	uint64_t size;
1304 	int error;
1305 
1306 	if ((size = zc->zc_history_len) == 0)
1307 		return (EINVAL);
1308 
1309 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1310 		return (error);
1311 
1312 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1313 		spa_close(spa, FTAG);
1314 		return (ENOTSUP);
1315 	}
1316 
1317 	hist_buf = kmem_alloc(size, KM_SLEEP);
1318 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1319 	    &zc->zc_history_len, hist_buf)) == 0) {
1320 		error = ddi_copyout(hist_buf,
1321 		    (void *)(uintptr_t)zc->zc_history,
1322 		    zc->zc_history_len, zc->zc_iflags);
1323 	}
1324 
1325 	spa_close(spa, FTAG);
1326 	kmem_free(hist_buf, size);
1327 	return (error);
1328 }
1329 
1330 static int
1331 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1332 {
1333 	int error;
1334 
1335 	if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
1336 		return (error);
1337 
1338 	return (0);
1339 }
1340 
1341 /*
1342  * inputs:
1343  * zc_name		name of filesystem
1344  * zc_obj		object to find
1345  *
1346  * outputs:
1347  * zc_value		name of object
1348  */
1349 static int
1350 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1351 {
1352 	objset_t *os;
1353 	int error;
1354 
1355 	/* XXX reading from objset not owned */
1356 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1357 		return (error);
1358 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1359 		dmu_objset_rele(os, FTAG);
1360 		return (EINVAL);
1361 	}
1362 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1363 	    sizeof (zc->zc_value));
1364 	dmu_objset_rele(os, FTAG);
1365 
1366 	return (error);
1367 }
1368 
1369 static int
1370 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1371 {
1372 	spa_t *spa;
1373 	int error;
1374 	nvlist_t *config, **l2cache, **spares;
1375 	uint_t nl2cache = 0, nspares = 0;
1376 
1377 	error = spa_open(zc->zc_name, &spa, FTAG);
1378 	if (error != 0)
1379 		return (error);
1380 
1381 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1382 	    zc->zc_iflags, &config);
1383 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1384 	    &l2cache, &nl2cache);
1385 
1386 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1387 	    &spares, &nspares);
1388 
1389 	/*
1390 	 * A root pool with concatenated devices is not supported.
1391 	 * Thus, can not add a device to a root pool.
1392 	 *
1393 	 * Intent log device can not be added to a rootpool because
1394 	 * during mountroot, zil is replayed, a seperated log device
1395 	 * can not be accessed during the mountroot time.
1396 	 *
1397 	 * l2cache and spare devices are ok to be added to a rootpool.
1398 	 */
1399 	if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1400 		nvlist_free(config);
1401 		spa_close(spa, FTAG);
1402 		return (EDOM);
1403 	}
1404 
1405 	if (error == 0) {
1406 		error = spa_vdev_add(spa, config);
1407 		nvlist_free(config);
1408 	}
1409 	spa_close(spa, FTAG);
1410 	return (error);
1411 }
1412 
1413 /*
1414  * inputs:
1415  * zc_name		name of the pool
1416  * zc_nvlist_conf	nvlist of devices to remove
1417  * zc_cookie		to stop the remove?
1418  */
1419 static int
1420 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1421 {
1422 	spa_t *spa;
1423 	int error;
1424 
1425 	error = spa_open(zc->zc_name, &spa, FTAG);
1426 	if (error != 0)
1427 		return (error);
1428 	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1429 	spa_close(spa, FTAG);
1430 	return (error);
1431 }
1432 
1433 static int
1434 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1435 {
1436 	spa_t *spa;
1437 	int error;
1438 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1439 
1440 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1441 		return (error);
1442 	switch (zc->zc_cookie) {
1443 	case VDEV_STATE_ONLINE:
1444 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1445 		break;
1446 
1447 	case VDEV_STATE_OFFLINE:
1448 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1449 		break;
1450 
1451 	case VDEV_STATE_FAULTED:
1452 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1453 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1454 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1455 
1456 		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1457 		break;
1458 
1459 	case VDEV_STATE_DEGRADED:
1460 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1461 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
1462 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1463 
1464 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1465 		break;
1466 
1467 	default:
1468 		error = EINVAL;
1469 	}
1470 	zc->zc_cookie = newstate;
1471 	spa_close(spa, FTAG);
1472 	return (error);
1473 }
1474 
1475 static int
1476 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1477 {
1478 	spa_t *spa;
1479 	int replacing = zc->zc_cookie;
1480 	nvlist_t *config;
1481 	int error;
1482 
1483 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1484 		return (error);
1485 
1486 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1487 	    zc->zc_iflags, &config)) == 0) {
1488 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1489 		nvlist_free(config);
1490 	}
1491 
1492 	spa_close(spa, FTAG);
1493 	return (error);
1494 }
1495 
1496 static int
1497 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1498 {
1499 	spa_t *spa;
1500 	int error;
1501 
1502 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1503 		return (error);
1504 
1505 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1506 
1507 	spa_close(spa, FTAG);
1508 	return (error);
1509 }
1510 
1511 static int
1512 zfs_ioc_vdev_split(zfs_cmd_t *zc)
1513 {
1514 	spa_t *spa;
1515 	nvlist_t *config, *props = NULL;
1516 	int error;
1517 	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1518 
1519 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1520 		return (error);
1521 
1522 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1523 	    zc->zc_iflags, &config)) {
1524 		spa_close(spa, FTAG);
1525 		return (error);
1526 	}
1527 
1528 	if (zc->zc_nvlist_src_size != 0 && (error =
1529 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1530 	    zc->zc_iflags, &props))) {
1531 		spa_close(spa, FTAG);
1532 		nvlist_free(config);
1533 		return (error);
1534 	}
1535 
1536 	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
1537 
1538 	spa_close(spa, FTAG);
1539 
1540 	nvlist_free(config);
1541 	nvlist_free(props);
1542 
1543 	return (error);
1544 }
1545 
1546 static int
1547 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1548 {
1549 	spa_t *spa;
1550 	char *path = zc->zc_value;
1551 	uint64_t guid = zc->zc_guid;
1552 	int error;
1553 
1554 	error = spa_open(zc->zc_name, &spa, FTAG);
1555 	if (error != 0)
1556 		return (error);
1557 
1558 	error = spa_vdev_setpath(spa, guid, path);
1559 	spa_close(spa, FTAG);
1560 	return (error);
1561 }
1562 
1563 static int
1564 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
1565 {
1566 	spa_t *spa;
1567 	char *fru = zc->zc_value;
1568 	uint64_t guid = zc->zc_guid;
1569 	int error;
1570 
1571 	error = spa_open(zc->zc_name, &spa, FTAG);
1572 	if (error != 0)
1573 		return (error);
1574 
1575 	error = spa_vdev_setfru(spa, guid, fru);
1576 	spa_close(spa, FTAG);
1577 	return (error);
1578 }
1579 
1580 /*
1581  * inputs:
1582  * zc_name		name of filesystem
1583  * zc_nvlist_dst_size	size of buffer for property nvlist
1584  *
1585  * outputs:
1586  * zc_objset_stats	stats
1587  * zc_nvlist_dst	property nvlist
1588  * zc_nvlist_dst_size	size of property nvlist
1589  */
1590 static int
1591 zfs_ioc_objset_stats(zfs_cmd_t *zc)
1592 {
1593 	objset_t *os = NULL;
1594 	int error;
1595 	nvlist_t *nv;
1596 
1597 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1598 		return (error);
1599 
1600 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1601 
1602 	if (zc->zc_nvlist_dst != 0 &&
1603 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
1604 		dmu_objset_stats(os, nv);
1605 		/*
1606 		 * NB: zvol_get_stats() will read the objset contents,
1607 		 * which we aren't supposed to do with a
1608 		 * DS_MODE_USER hold, because it could be
1609 		 * inconsistent.  So this is a bit of a workaround...
1610 		 * XXX reading with out owning
1611 		 */
1612 		if (!zc->zc_objset_stats.dds_inconsistent) {
1613 			if (dmu_objset_type(os) == DMU_OST_ZVOL)
1614 				VERIFY(zvol_get_stats(os, nv) == 0);
1615 		}
1616 		error = put_nvlist(zc, nv);
1617 		nvlist_free(nv);
1618 	}
1619 
1620 	dmu_objset_rele(os, FTAG);
1621 	return (error);
1622 }
1623 
1624 /*
1625  * inputs:
1626  * zc_name		name of filesystem
1627  * zc_nvlist_dst_size	size of buffer for property nvlist
1628  *
1629  * outputs:
1630  * zc_nvlist_dst	received property nvlist
1631  * zc_nvlist_dst_size	size of received property nvlist
1632  *
1633  * Gets received properties (distinct from local properties on or after
1634  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
1635  * local property values.
1636  */
1637 static int
1638 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
1639 {
1640 	objset_t *os = NULL;
1641 	int error;
1642 	nvlist_t *nv;
1643 
1644 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1645 		return (error);
1646 
1647 	/*
1648 	 * Without this check, we would return local property values if the
1649 	 * caller has not already received properties on or after
1650 	 * SPA_VERSION_RECVD_PROPS.
1651 	 */
1652 	if (!dsl_prop_get_hasrecvd(os)) {
1653 		dmu_objset_rele(os, FTAG);
1654 		return (ENOTSUP);
1655 	}
1656 
1657 	if (zc->zc_nvlist_dst != 0 &&
1658 	    (error = dsl_prop_get_received(os, &nv)) == 0) {
1659 		error = put_nvlist(zc, nv);
1660 		nvlist_free(nv);
1661 	}
1662 
1663 	dmu_objset_rele(os, FTAG);
1664 	return (error);
1665 }
1666 
1667 static int
1668 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
1669 {
1670 	uint64_t value;
1671 	int error;
1672 
1673 	/*
1674 	 * zfs_get_zplprop() will either find a value or give us
1675 	 * the default value (if there is one).
1676 	 */
1677 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
1678 		return (error);
1679 	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
1680 	return (0);
1681 }
1682 
1683 /*
1684  * inputs:
1685  * zc_name		name of filesystem
1686  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
1687  *
1688  * outputs:
1689  * zc_nvlist_dst	zpl property nvlist
1690  * zc_nvlist_dst_size	size of zpl property nvlist
1691  */
1692 static int
1693 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
1694 {
1695 	objset_t *os;
1696 	int err;
1697 
1698 	/* XXX reading without owning */
1699 	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
1700 		return (err);
1701 
1702 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1703 
1704 	/*
1705 	 * NB: nvl_add_zplprop() will read the objset contents,
1706 	 * which we aren't supposed to do with a DS_MODE_USER
1707 	 * hold, because it could be inconsistent.
1708 	 */
1709 	if (zc->zc_nvlist_dst != NULL &&
1710 	    !zc->zc_objset_stats.dds_inconsistent &&
1711 	    dmu_objset_type(os) == DMU_OST_ZFS) {
1712 		nvlist_t *nv;
1713 
1714 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1715 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
1716 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
1717 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
1718 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
1719 			err = put_nvlist(zc, nv);
1720 		nvlist_free(nv);
1721 	} else {
1722 		err = ENOENT;
1723 	}
1724 	dmu_objset_rele(os, FTAG);
1725 	return (err);
1726 }
1727 
1728 static boolean_t
1729 dataset_name_hidden(const char *name)
1730 {
1731 	/*
1732 	 * Skip over datasets that are not visible in this zone,
1733 	 * internal datasets (which have a $ in their name), and
1734 	 * temporary datasets (which have a % in their name).
1735 	 */
1736 	if (strchr(name, '$') != NULL)
1737 		return (B_TRUE);
1738 	if (strchr(name, '%') != NULL)
1739 		return (B_TRUE);
1740 	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
1741 		return (B_TRUE);
1742 	return (B_FALSE);
1743 }
1744 
1745 /*
1746  * inputs:
1747  * zc_name		name of filesystem
1748  * zc_cookie		zap cursor
1749  * zc_nvlist_dst_size	size of buffer for property nvlist
1750  *
1751  * outputs:
1752  * zc_name		name of next filesystem
1753  * zc_cookie		zap cursor
1754  * zc_objset_stats	stats
1755  * zc_nvlist_dst	property nvlist
1756  * zc_nvlist_dst_size	size of property nvlist
1757  */
1758 static int
1759 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
1760 {
1761 	objset_t *os;
1762 	int error;
1763 	char *p;
1764 	size_t orig_len = strlen(zc->zc_name);
1765 
1766 top:
1767 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
1768 		if (error == ENOENT)
1769 			error = ESRCH;
1770 		return (error);
1771 	}
1772 
1773 	p = strrchr(zc->zc_name, '/');
1774 	if (p == NULL || p[1] != '\0')
1775 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
1776 	p = zc->zc_name + strlen(zc->zc_name);
1777 
1778 	/*
1779 	 * Pre-fetch the datasets.  dmu_objset_prefetch() always returns 0
1780 	 * but is not declared void because its called by dmu_objset_find().
1781 	 */
1782 	if (zc->zc_cookie == 0) {
1783 		uint64_t cookie = 0;
1784 		int len = sizeof (zc->zc_name) - (p - zc->zc_name);
1785 
1786 		while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0)
1787 			(void) dmu_objset_prefetch(p, NULL);
1788 	}
1789 
1790 	do {
1791 		error = dmu_dir_list_next(os,
1792 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
1793 		    NULL, &zc->zc_cookie);
1794 		if (error == ENOENT)
1795 			error = ESRCH;
1796 	} while (error == 0 && dataset_name_hidden(zc->zc_name) &&
1797 	    !(zc->zc_iflags & FKIOCTL));
1798 	dmu_objset_rele(os, FTAG);
1799 
1800 	/*
1801 	 * If it's an internal dataset (ie. with a '$' in its name),
1802 	 * don't try to get stats for it, otherwise we'll return ENOENT.
1803 	 */
1804 	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
1805 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1806 		if (error == ENOENT) {
1807 			/* We lost a race with destroy, get the next one. */
1808 			zc->zc_name[orig_len] = '\0';
1809 			goto top;
1810 		}
1811 	}
1812 	return (error);
1813 }
1814 
1815 /*
1816  * inputs:
1817  * zc_name		name of filesystem
1818  * zc_cookie		zap cursor
1819  * zc_nvlist_dst_size	size of buffer for property nvlist
1820  *
1821  * outputs:
1822  * zc_name		name of next snapshot
1823  * zc_objset_stats	stats
1824  * zc_nvlist_dst	property nvlist
1825  * zc_nvlist_dst_size	size of property nvlist
1826  */
1827 static int
1828 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
1829 {
1830 	objset_t *os;
1831 	int error;
1832 
1833 top:
1834 	if (zc->zc_cookie == 0)
1835 		(void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
1836 		    NULL, DS_FIND_SNAPSHOTS);
1837 
1838 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
1839 	if (error)
1840 		return (error == ENOENT ? ESRCH : error);
1841 
1842 	/*
1843 	 * A dataset name of maximum length cannot have any snapshots,
1844 	 * so exit immediately.
1845 	 */
1846 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
1847 		dmu_objset_rele(os, FTAG);
1848 		return (ESRCH);
1849 	}
1850 
1851 	error = dmu_snapshot_list_next(os,
1852 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
1853 	    zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie, NULL);
1854 	dmu_objset_rele(os, FTAG);
1855 	if (error == 0) {
1856 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1857 		if (error == ENOENT)  {
1858 			/* We lost a race with destroy, get the next one. */
1859 			*strchr(zc->zc_name, '@') = '\0';
1860 			goto top;
1861 		}
1862 	} else if (error == ENOENT) {
1863 		error = ESRCH;
1864 	}
1865 
1866 	/* if we failed, undo the @ that we tacked on to zc_name */
1867 	if (error)
1868 		*strchr(zc->zc_name, '@') = '\0';
1869 	return (error);
1870 }
1871 
1872 static int
1873 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
1874 {
1875 	const char *propname = nvpair_name(pair);
1876 	uint64_t *valary;
1877 	unsigned int vallen;
1878 	const char *domain;
1879 	char *dash;
1880 	zfs_userquota_prop_t type;
1881 	uint64_t rid;
1882 	uint64_t quota;
1883 	zfsvfs_t *zfsvfs;
1884 	int err;
1885 
1886 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
1887 		nvlist_t *attrs;
1888 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
1889 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
1890 		    &pair) != 0)
1891 			return (EINVAL);
1892 	}
1893 
1894 	/*
1895 	 * A correctly constructed propname is encoded as
1896 	 * userquota@<rid>-<domain>.
1897 	 */
1898 	if ((dash = strchr(propname, '-')) == NULL ||
1899 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
1900 	    vallen != 3)
1901 		return (EINVAL);
1902 
1903 	domain = dash + 1;
1904 	type = valary[0];
1905 	rid = valary[1];
1906 	quota = valary[2];
1907 
1908 	err = zfsvfs_hold(dsname, FTAG, &zfsvfs);
1909 	if (err == 0) {
1910 		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
1911 		zfsvfs_rele(zfsvfs, FTAG);
1912 	}
1913 
1914 	return (err);
1915 }
1916 
1917 /*
1918  * If the named property is one that has a special function to set its value,
1919  * return 0 on success and a positive error code on failure; otherwise if it is
1920  * not one of the special properties handled by this function, return -1.
1921  *
1922  * XXX: It would be better for callers of the property interface if we handled
1923  * these special cases in dsl_prop.c (in the dsl layer).
1924  */
1925 static int
1926 zfs_prop_set_special(const char *dsname, zprop_source_t source,
1927     nvpair_t *pair)
1928 {
1929 	const char *propname = nvpair_name(pair);
1930 	zfs_prop_t prop = zfs_name_to_prop(propname);
1931 	uint64_t intval;
1932 	int err;
1933 
1934 	if (prop == ZPROP_INVAL) {
1935 		if (zfs_prop_userquota(propname))
1936 			return (zfs_prop_set_userquota(dsname, pair));
1937 		return (-1);
1938 	}
1939 
1940 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
1941 		nvlist_t *attrs;
1942 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
1943 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
1944 		    &pair) == 0);
1945 	}
1946 
1947 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
1948 		return (-1);
1949 
1950 	VERIFY(0 == nvpair_value_uint64(pair, &intval));
1951 
1952 	switch (prop) {
1953 	case ZFS_PROP_QUOTA:
1954 		err = dsl_dir_set_quota(dsname, source, intval);
1955 		break;
1956 	case ZFS_PROP_REFQUOTA:
1957 		err = dsl_dataset_set_quota(dsname, source, intval);
1958 		break;
1959 	case ZFS_PROP_RESERVATION:
1960 		err = dsl_dir_set_reservation(dsname, source, intval);
1961 		break;
1962 	case ZFS_PROP_REFRESERVATION:
1963 		err = dsl_dataset_set_reservation(dsname, source, intval);
1964 		break;
1965 	case ZFS_PROP_VOLSIZE:
1966 		err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip),
1967 		    intval);
1968 		break;
1969 	case ZFS_PROP_VERSION:
1970 	{
1971 		zfsvfs_t *zfsvfs;
1972 
1973 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs)) != 0)
1974 			break;
1975 
1976 		err = zfs_set_version(zfsvfs, intval);
1977 		zfsvfs_rele(zfsvfs, FTAG);
1978 
1979 		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
1980 			zfs_cmd_t *zc;
1981 
1982 			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
1983 			(void) strcpy(zc->zc_name, dsname);
1984 			(void) zfs_ioc_userspace_upgrade(zc);
1985 			kmem_free(zc, sizeof (zfs_cmd_t));
1986 		}
1987 		break;
1988 	}
1989 
1990 	default:
1991 		err = -1;
1992 	}
1993 
1994 	return (err);
1995 }
1996 
1997 /*
1998  * This function is best effort. If it fails to set any of the given properties,
1999  * it continues to set as many as it can and returns the first error
2000  * encountered. If the caller provides a non-NULL errlist, it also gives the
2001  * complete list of names of all the properties it failed to set along with the
2002  * corresponding error numbers. The caller is responsible for freeing the
2003  * returned errlist.
2004  *
2005  * If every property is set successfully, zero is returned and the list pointed
2006  * at by errlist is NULL.
2007  */
2008 int
2009 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2010     nvlist_t **errlist)
2011 {
2012 	nvpair_t *pair;
2013 	nvpair_t *propval;
2014 	int rv = 0;
2015 	uint64_t intval;
2016 	char *strval;
2017 	nvlist_t *genericnvl;
2018 	nvlist_t *errors;
2019 	nvlist_t *retrynvl;
2020 
2021 	VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2022 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2023 	VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2024 
2025 retry:
2026 	pair = NULL;
2027 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2028 		const char *propname = nvpair_name(pair);
2029 		zfs_prop_t prop = zfs_name_to_prop(propname);
2030 		int err = 0;
2031 
2032 		/* decode the property value */
2033 		propval = pair;
2034 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2035 			nvlist_t *attrs;
2036 			VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2037 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2038 			    &propval) != 0)
2039 				err = EINVAL;
2040 		}
2041 
2042 		/* Validate value type */
2043 		if (err == 0 && prop == ZPROP_INVAL) {
2044 			if (zfs_prop_user(propname)) {
2045 				if (nvpair_type(propval) != DATA_TYPE_STRING)
2046 					err = EINVAL;
2047 			} else if (zfs_prop_userquota(propname)) {
2048 				if (nvpair_type(propval) !=
2049 				    DATA_TYPE_UINT64_ARRAY)
2050 					err = EINVAL;
2051 			}
2052 		} else if (err == 0) {
2053 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2054 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2055 					err = EINVAL;
2056 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2057 				const char *unused;
2058 
2059 				VERIFY(nvpair_value_uint64(propval,
2060 				    &intval) == 0);
2061 
2062 				switch (zfs_prop_get_type(prop)) {
2063 				case PROP_TYPE_NUMBER:
2064 					break;
2065 				case PROP_TYPE_STRING:
2066 					err = EINVAL;
2067 					break;
2068 				case PROP_TYPE_INDEX:
2069 					if (zfs_prop_index_to_string(prop,
2070 					    intval, &unused) != 0)
2071 						err = EINVAL;
2072 					break;
2073 				default:
2074 					cmn_err(CE_PANIC,
2075 					    "unknown property type");
2076 				}
2077 			} else {
2078 				err = EINVAL;
2079 			}
2080 		}
2081 
2082 		/* Validate permissions */
2083 		if (err == 0)
2084 			err = zfs_check_settable(dsname, pair, CRED());
2085 
2086 		if (err == 0) {
2087 			err = zfs_prop_set_special(dsname, source, pair);
2088 			if (err == -1) {
2089 				/*
2090 				 * For better performance we build up a list of
2091 				 * properties to set in a single transaction.
2092 				 */
2093 				err = nvlist_add_nvpair(genericnvl, pair);
2094 			} else if (err != 0 && nvl != retrynvl) {
2095 				/*
2096 				 * This may be a spurious error caused by
2097 				 * receiving quota and reservation out of order.
2098 				 * Try again in a second pass.
2099 				 */
2100 				err = nvlist_add_nvpair(retrynvl, pair);
2101 			}
2102 		}
2103 
2104 		if (err != 0)
2105 			VERIFY(nvlist_add_int32(errors, propname, err) == 0);
2106 	}
2107 
2108 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2109 		nvl = retrynvl;
2110 		goto retry;
2111 	}
2112 
2113 	if (!nvlist_empty(genericnvl) &&
2114 	    dsl_props_set(dsname, source, genericnvl) != 0) {
2115 		/*
2116 		 * If this fails, we still want to set as many properties as we
2117 		 * can, so try setting them individually.
2118 		 */
2119 		pair = NULL;
2120 		while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2121 			const char *propname = nvpair_name(pair);
2122 			int err = 0;
2123 
2124 			propval = pair;
2125 			if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2126 				nvlist_t *attrs;
2127 				VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2128 				VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2129 				    &propval) == 0);
2130 			}
2131 
2132 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
2133 				VERIFY(nvpair_value_string(propval,
2134 				    &strval) == 0);
2135 				err = dsl_prop_set(dsname, propname, source, 1,
2136 				    strlen(strval) + 1, strval);
2137 			} else {
2138 				VERIFY(nvpair_value_uint64(propval,
2139 				    &intval) == 0);
2140 				err = dsl_prop_set(dsname, propname, source, 8,
2141 				    1, &intval);
2142 			}
2143 
2144 			if (err != 0) {
2145 				VERIFY(nvlist_add_int32(errors, propname,
2146 				    err) == 0);
2147 			}
2148 		}
2149 	}
2150 	nvlist_free(genericnvl);
2151 	nvlist_free(retrynvl);
2152 
2153 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
2154 		nvlist_free(errors);
2155 		errors = NULL;
2156 	} else {
2157 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
2158 	}
2159 
2160 	if (errlist == NULL)
2161 		nvlist_free(errors);
2162 	else
2163 		*errlist = errors;
2164 
2165 	return (rv);
2166 }
2167 
2168 /*
2169  * Check that all the properties are valid user properties.
2170  */
2171 static int
2172 zfs_check_userprops(char *fsname, nvlist_t *nvl)
2173 {
2174 	nvpair_t *pair = NULL;
2175 	int error = 0;
2176 
2177 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2178 		const char *propname = nvpair_name(pair);
2179 		char *valstr;
2180 
2181 		if (!zfs_prop_user(propname) ||
2182 		    nvpair_type(pair) != DATA_TYPE_STRING)
2183 			return (EINVAL);
2184 
2185 		if (error = zfs_secpolicy_write_perms(fsname,
2186 		    ZFS_DELEG_PERM_USERPROP, CRED()))
2187 			return (error);
2188 
2189 		if (strlen(propname) >= ZAP_MAXNAMELEN)
2190 			return (ENAMETOOLONG);
2191 
2192 		VERIFY(nvpair_value_string(pair, &valstr) == 0);
2193 		if (strlen(valstr) >= ZAP_MAXVALUELEN)
2194 			return (E2BIG);
2195 	}
2196 	return (0);
2197 }
2198 
2199 static void
2200 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2201 {
2202 	nvpair_t *pair;
2203 
2204 	VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2205 
2206 	pair = NULL;
2207 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2208 		if (nvlist_exists(skipped, nvpair_name(pair)))
2209 			continue;
2210 
2211 		VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2212 	}
2213 }
2214 
2215 static int
2216 clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
2217     nvlist_t *skipped)
2218 {
2219 	int err = 0;
2220 	nvlist_t *cleared_props = NULL;
2221 	props_skip(props, skipped, &cleared_props);
2222 	if (!nvlist_empty(cleared_props)) {
2223 		/*
2224 		 * Acts on local properties until the dataset has received
2225 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2226 		 */
2227 		zprop_source_t flags = (ZPROP_SRC_NONE |
2228 		    (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0));
2229 		err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL);
2230 	}
2231 	nvlist_free(cleared_props);
2232 	return (err);
2233 }
2234 
2235 /*
2236  * inputs:
2237  * zc_name		name of filesystem
2238  * zc_value		name of property to set
2239  * zc_nvlist_src{_size}	nvlist of properties to apply
2240  * zc_cookie		received properties flag
2241  *
2242  * outputs:
2243  * zc_nvlist_dst{_size} error for each unapplied received property
2244  */
2245 static int
2246 zfs_ioc_set_prop(zfs_cmd_t *zc)
2247 {
2248 	nvlist_t *nvl;
2249 	boolean_t received = zc->zc_cookie;
2250 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2251 	    ZPROP_SRC_LOCAL);
2252 	nvlist_t *errors = NULL;
2253 	int error;
2254 
2255 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2256 	    zc->zc_iflags, &nvl)) != 0)
2257 		return (error);
2258 
2259 	if (received) {
2260 		nvlist_t *origprops;
2261 		objset_t *os;
2262 
2263 		if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
2264 			if (dsl_prop_get_received(os, &origprops) == 0) {
2265 				(void) clear_received_props(os,
2266 				    zc->zc_name, origprops, nvl);
2267 				nvlist_free(origprops);
2268 			}
2269 
2270 			dsl_prop_set_hasrecvd(os);
2271 			dmu_objset_rele(os, FTAG);
2272 		}
2273 	}
2274 
2275 	error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors);
2276 
2277 	if (zc->zc_nvlist_dst != NULL && errors != NULL) {
2278 		(void) put_nvlist(zc, errors);
2279 	}
2280 
2281 	nvlist_free(errors);
2282 	nvlist_free(nvl);
2283 	return (error);
2284 }
2285 
2286 /*
2287  * inputs:
2288  * zc_name		name of filesystem
2289  * zc_value		name of property to inherit
2290  * zc_cookie		revert to received value if TRUE
2291  *
2292  * outputs:		none
2293  */
2294 static int
2295 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2296 {
2297 	const char *propname = zc->zc_value;
2298 	zfs_prop_t prop = zfs_name_to_prop(propname);
2299 	boolean_t received = zc->zc_cookie;
2300 	zprop_source_t source = (received
2301 	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
2302 	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
2303 
2304 	if (received) {
2305 		nvlist_t *dummy;
2306 		nvpair_t *pair;
2307 		zprop_type_t type;
2308 		int err;
2309 
2310 		/*
2311 		 * zfs_prop_set_special() expects properties in the form of an
2312 		 * nvpair with type info.
2313 		 */
2314 		if (prop == ZPROP_INVAL) {
2315 			if (!zfs_prop_user(propname))
2316 				return (EINVAL);
2317 
2318 			type = PROP_TYPE_STRING;
2319 		} else if (prop == ZFS_PROP_VOLSIZE ||
2320 		    prop == ZFS_PROP_VERSION) {
2321 			return (EINVAL);
2322 		} else {
2323 			type = zfs_prop_get_type(prop);
2324 		}
2325 
2326 		VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2327 
2328 		switch (type) {
2329 		case PROP_TYPE_STRING:
2330 			VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2331 			break;
2332 		case PROP_TYPE_NUMBER:
2333 		case PROP_TYPE_INDEX:
2334 			VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2335 			break;
2336 		default:
2337 			nvlist_free(dummy);
2338 			return (EINVAL);
2339 		}
2340 
2341 		pair = nvlist_next_nvpair(dummy, NULL);
2342 		err = zfs_prop_set_special(zc->zc_name, source, pair);
2343 		nvlist_free(dummy);
2344 		if (err != -1)
2345 			return (err); /* special property already handled */
2346 	} else {
2347 		/*
2348 		 * Only check this in the non-received case. We want to allow
2349 		 * 'inherit -S' to revert non-inheritable properties like quota
2350 		 * and reservation to the received or default values even though
2351 		 * they are not considered inheritable.
2352 		 */
2353 		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2354 			return (EINVAL);
2355 	}
2356 
2357 	/* the property name has been validated by zfs_secpolicy_inherit() */
2358 	return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
2359 }
2360 
2361 static int
2362 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2363 {
2364 	nvlist_t *props;
2365 	spa_t *spa;
2366 	int error;
2367 	nvpair_t *pair;
2368 
2369 	if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2370 	    zc->zc_iflags, &props))
2371 		return (error);
2372 
2373 	/*
2374 	 * If the only property is the configfile, then just do a spa_lookup()
2375 	 * to handle the faulted case.
2376 	 */
2377 	pair = nvlist_next_nvpair(props, NULL);
2378 	if (pair != NULL && strcmp(nvpair_name(pair),
2379 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2380 	    nvlist_next_nvpair(props, pair) == NULL) {
2381 		mutex_enter(&spa_namespace_lock);
2382 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2383 			spa_configfile_set(spa, props, B_FALSE);
2384 			spa_config_sync(spa, B_FALSE, B_TRUE);
2385 		}
2386 		mutex_exit(&spa_namespace_lock);
2387 		if (spa != NULL) {
2388 			nvlist_free(props);
2389 			return (0);
2390 		}
2391 	}
2392 
2393 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2394 		nvlist_free(props);
2395 		return (error);
2396 	}
2397 
2398 	error = spa_prop_set(spa, props);
2399 
2400 	nvlist_free(props);
2401 	spa_close(spa, FTAG);
2402 
2403 	return (error);
2404 }
2405 
2406 static int
2407 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2408 {
2409 	spa_t *spa;
2410 	int error;
2411 	nvlist_t *nvp = NULL;
2412 
2413 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2414 		/*
2415 		 * If the pool is faulted, there may be properties we can still
2416 		 * get (such as altroot and cachefile), so attempt to get them
2417 		 * anyway.
2418 		 */
2419 		mutex_enter(&spa_namespace_lock);
2420 		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2421 			error = spa_prop_get(spa, &nvp);
2422 		mutex_exit(&spa_namespace_lock);
2423 	} else {
2424 		error = spa_prop_get(spa, &nvp);
2425 		spa_close(spa, FTAG);
2426 	}
2427 
2428 	if (error == 0 && zc->zc_nvlist_dst != NULL)
2429 		error = put_nvlist(zc, nvp);
2430 	else
2431 		error = EFAULT;
2432 
2433 	nvlist_free(nvp);
2434 	return (error);
2435 }
2436 
2437 /*
2438  * inputs:
2439  * zc_name		name of filesystem
2440  * zc_nvlist_src{_size}	nvlist of delegated permissions
2441  * zc_perm_action	allow/unallow flag
2442  *
2443  * outputs:		none
2444  */
2445 static int
2446 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2447 {
2448 	int error;
2449 	nvlist_t *fsaclnv = NULL;
2450 
2451 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2452 	    zc->zc_iflags, &fsaclnv)) != 0)
2453 		return (error);
2454 
2455 	/*
2456 	 * Verify nvlist is constructed correctly
2457 	 */
2458 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2459 		nvlist_free(fsaclnv);
2460 		return (EINVAL);
2461 	}
2462 
2463 	/*
2464 	 * If we don't have PRIV_SYS_MOUNT, then validate
2465 	 * that user is allowed to hand out each permission in
2466 	 * the nvlist(s)
2467 	 */
2468 
2469 	error = secpolicy_zfs(CRED());
2470 	if (error) {
2471 		if (zc->zc_perm_action == B_FALSE) {
2472 			error = dsl_deleg_can_allow(zc->zc_name,
2473 			    fsaclnv, CRED());
2474 		} else {
2475 			error = dsl_deleg_can_unallow(zc->zc_name,
2476 			    fsaclnv, CRED());
2477 		}
2478 	}
2479 
2480 	if (error == 0)
2481 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2482 
2483 	nvlist_free(fsaclnv);
2484 	return (error);
2485 }
2486 
2487 /*
2488  * inputs:
2489  * zc_name		name of filesystem
2490  *
2491  * outputs:
2492  * zc_nvlist_src{_size}	nvlist of delegated permissions
2493  */
2494 static int
2495 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2496 {
2497 	nvlist_t *nvp;
2498 	int error;
2499 
2500 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2501 		error = put_nvlist(zc, nvp);
2502 		nvlist_free(nvp);
2503 	}
2504 
2505 	return (error);
2506 }
2507 
2508 /*
2509  * Search the vfs list for a specified resource.  Returns a pointer to it
2510  * or NULL if no suitable entry is found. The caller of this routine
2511  * is responsible for releasing the returned vfs pointer.
2512  */
2513 static vfs_t *
2514 zfs_get_vfs(const char *resource)
2515 {
2516 	struct vfs *vfsp;
2517 	struct vfs *vfs_found = NULL;
2518 
2519 	vfs_list_read_lock();
2520 	vfsp = rootvfs;
2521 	do {
2522 		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
2523 			VFS_HOLD(vfsp);
2524 			vfs_found = vfsp;
2525 			break;
2526 		}
2527 		vfsp = vfsp->vfs_next;
2528 	} while (vfsp != rootvfs);
2529 	vfs_list_unlock();
2530 	return (vfs_found);
2531 }
2532 
2533 /* ARGSUSED */
2534 static void
2535 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2536 {
2537 	zfs_creat_t *zct = arg;
2538 
2539 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
2540 }
2541 
2542 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
2543 
2544 /*
2545  * inputs:
2546  * createprops		list of properties requested by creator
2547  * default_zplver	zpl version to use if unspecified in createprops
2548  * fuids_ok		fuids allowed in this version of the spa?
2549  * os			parent objset pointer (NULL if root fs)
2550  *
2551  * outputs:
2552  * zplprops	values for the zplprops we attach to the master node object
2553  * is_ci	true if requested file system will be purely case-insensitive
2554  *
2555  * Determine the settings for utf8only, normalization and
2556  * casesensitivity.  Specific values may have been requested by the
2557  * creator and/or we can inherit values from the parent dataset.  If
2558  * the file system is of too early a vintage, a creator can not
2559  * request settings for these properties, even if the requested
2560  * setting is the default value.  We don't actually want to create dsl
2561  * properties for these, so remove them from the source nvlist after
2562  * processing.
2563  */
2564 static int
2565 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
2566     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
2567     nvlist_t *zplprops, boolean_t *is_ci)
2568 {
2569 	uint64_t sense = ZFS_PROP_UNDEFINED;
2570 	uint64_t norm = ZFS_PROP_UNDEFINED;
2571 	uint64_t u8 = ZFS_PROP_UNDEFINED;
2572 
2573 	ASSERT(zplprops != NULL);
2574 
2575 	/*
2576 	 * Pull out creator prop choices, if any.
2577 	 */
2578 	if (createprops) {
2579 		(void) nvlist_lookup_uint64(createprops,
2580 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
2581 		(void) nvlist_lookup_uint64(createprops,
2582 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
2583 		(void) nvlist_remove_all(createprops,
2584 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
2585 		(void) nvlist_lookup_uint64(createprops,
2586 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
2587 		(void) nvlist_remove_all(createprops,
2588 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
2589 		(void) nvlist_lookup_uint64(createprops,
2590 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
2591 		(void) nvlist_remove_all(createprops,
2592 		    zfs_prop_to_name(ZFS_PROP_CASE));
2593 	}
2594 
2595 	/*
2596 	 * If the zpl version requested is whacky or the file system
2597 	 * or pool is version is too "young" to support normalization
2598 	 * and the creator tried to set a value for one of the props,
2599 	 * error out.
2600 	 */
2601 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
2602 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
2603 	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
2604 	    (zplver < ZPL_VERSION_NORMALIZATION &&
2605 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
2606 	    sense != ZFS_PROP_UNDEFINED)))
2607 		return (ENOTSUP);
2608 
2609 	/*
2610 	 * Put the version in the zplprops
2611 	 */
2612 	VERIFY(nvlist_add_uint64(zplprops,
2613 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
2614 
2615 	if (norm == ZFS_PROP_UNDEFINED)
2616 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
2617 	VERIFY(nvlist_add_uint64(zplprops,
2618 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
2619 
2620 	/*
2621 	 * If we're normalizing, names must always be valid UTF-8 strings.
2622 	 */
2623 	if (norm)
2624 		u8 = 1;
2625 	if (u8 == ZFS_PROP_UNDEFINED)
2626 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
2627 	VERIFY(nvlist_add_uint64(zplprops,
2628 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
2629 
2630 	if (sense == ZFS_PROP_UNDEFINED)
2631 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
2632 	VERIFY(nvlist_add_uint64(zplprops,
2633 	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
2634 
2635 	if (is_ci)
2636 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
2637 
2638 	return (0);
2639 }
2640 
2641 static int
2642 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
2643     nvlist_t *zplprops, boolean_t *is_ci)
2644 {
2645 	boolean_t fuids_ok, sa_ok;
2646 	uint64_t zplver = ZPL_VERSION;
2647 	objset_t *os = NULL;
2648 	char parentname[MAXNAMELEN];
2649 	char *cp;
2650 	spa_t *spa;
2651 	uint64_t spa_vers;
2652 	int error;
2653 
2654 	(void) strlcpy(parentname, dataset, sizeof (parentname));
2655 	cp = strrchr(parentname, '/');
2656 	ASSERT(cp != NULL);
2657 	cp[0] = '\0';
2658 
2659 	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
2660 		return (error);
2661 
2662 	spa_vers = spa_version(spa);
2663 	spa_close(spa, FTAG);
2664 
2665 	zplver = zfs_zpl_version_map(spa_vers);
2666 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
2667 	sa_ok = (zplver >= ZPL_VERSION_SA);
2668 
2669 	/*
2670 	 * Open parent object set so we can inherit zplprop values.
2671 	 */
2672 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
2673 		return (error);
2674 
2675 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
2676 	    zplprops, is_ci);
2677 	dmu_objset_rele(os, FTAG);
2678 	return (error);
2679 }
2680 
2681 static int
2682 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
2683     nvlist_t *zplprops, boolean_t *is_ci)
2684 {
2685 	boolean_t fuids_ok;
2686 	boolean_t sa_ok;
2687 	uint64_t zplver = ZPL_VERSION;
2688 	int error;
2689 
2690 	zplver = zfs_zpl_version_map(spa_vers);
2691 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
2692 	sa_ok = (zplver >= ZPL_VERSION_SA);
2693 
2694 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
2695 	    createprops, zplprops, is_ci);
2696 	return (error);
2697 }
2698 
2699 /*
2700  * inputs:
2701  * zc_objset_type	type of objset to create (fs vs zvol)
2702  * zc_name		name of new objset
2703  * zc_value		name of snapshot to clone from (may be empty)
2704  * zc_nvlist_src{_size}	nvlist of properties to apply
2705  *
2706  * outputs: none
2707  */
2708 static int
2709 zfs_ioc_create(zfs_cmd_t *zc)
2710 {
2711 	objset_t *clone;
2712 	int error = 0;
2713 	zfs_creat_t zct;
2714 	nvlist_t *nvprops = NULL;
2715 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
2716 	dmu_objset_type_t type = zc->zc_objset_type;
2717 
2718 	switch (type) {
2719 
2720 	case DMU_OST_ZFS:
2721 		cbfunc = zfs_create_cb;
2722 		break;
2723 
2724 	case DMU_OST_ZVOL:
2725 		cbfunc = zvol_create_cb;
2726 		break;
2727 
2728 	default:
2729 		cbfunc = NULL;
2730 		break;
2731 	}
2732 	if (strchr(zc->zc_name, '@') ||
2733 	    strchr(zc->zc_name, '%'))
2734 		return (EINVAL);
2735 
2736 	if (zc->zc_nvlist_src != NULL &&
2737 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2738 	    zc->zc_iflags, &nvprops)) != 0)
2739 		return (error);
2740 
2741 	zct.zct_zplprops = NULL;
2742 	zct.zct_props = nvprops;
2743 
2744 	if (zc->zc_value[0] != '\0') {
2745 		/*
2746 		 * We're creating a clone of an existing snapshot.
2747 		 */
2748 		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2749 		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
2750 			nvlist_free(nvprops);
2751 			return (EINVAL);
2752 		}
2753 
2754 		error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
2755 		if (error) {
2756 			nvlist_free(nvprops);
2757 			return (error);
2758 		}
2759 
2760 		error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
2761 		dmu_objset_rele(clone, FTAG);
2762 		if (error) {
2763 			nvlist_free(nvprops);
2764 			return (error);
2765 		}
2766 	} else {
2767 		boolean_t is_insensitive = B_FALSE;
2768 
2769 		if (cbfunc == NULL) {
2770 			nvlist_free(nvprops);
2771 			return (EINVAL);
2772 		}
2773 
2774 		if (type == DMU_OST_ZVOL) {
2775 			uint64_t volsize, volblocksize;
2776 
2777 			if (nvprops == NULL ||
2778 			    nvlist_lookup_uint64(nvprops,
2779 			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
2780 			    &volsize) != 0) {
2781 				nvlist_free(nvprops);
2782 				return (EINVAL);
2783 			}
2784 
2785 			if ((error = nvlist_lookup_uint64(nvprops,
2786 			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
2787 			    &volblocksize)) != 0 && error != ENOENT) {
2788 				nvlist_free(nvprops);
2789 				return (EINVAL);
2790 			}
2791 
2792 			if (error != 0)
2793 				volblocksize = zfs_prop_default_numeric(
2794 				    ZFS_PROP_VOLBLOCKSIZE);
2795 
2796 			if ((error = zvol_check_volblocksize(
2797 			    volblocksize)) != 0 ||
2798 			    (error = zvol_check_volsize(volsize,
2799 			    volblocksize)) != 0) {
2800 				nvlist_free(nvprops);
2801 				return (error);
2802 			}
2803 		} else if (type == DMU_OST_ZFS) {
2804 			int error;
2805 
2806 			/*
2807 			 * We have to have normalization and
2808 			 * case-folding flags correct when we do the
2809 			 * file system creation, so go figure them out
2810 			 * now.
2811 			 */
2812 			VERIFY(nvlist_alloc(&zct.zct_zplprops,
2813 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
2814 			error = zfs_fill_zplprops(zc->zc_name, nvprops,
2815 			    zct.zct_zplprops, &is_insensitive);
2816 			if (error != 0) {
2817 				nvlist_free(nvprops);
2818 				nvlist_free(zct.zct_zplprops);
2819 				return (error);
2820 			}
2821 		}
2822 		error = dmu_objset_create(zc->zc_name, type,
2823 		    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
2824 		nvlist_free(zct.zct_zplprops);
2825 	}
2826 
2827 	/*
2828 	 * It would be nice to do this atomically.
2829 	 */
2830 	if (error == 0) {
2831 		error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
2832 		    nvprops, NULL);
2833 		if (error != 0)
2834 			(void) dmu_objset_destroy(zc->zc_name, B_FALSE);
2835 	}
2836 	nvlist_free(nvprops);
2837 	return (error);
2838 }
2839 
2840 /*
2841  * inputs:
2842  * zc_name	name of filesystem
2843  * zc_value	short name of snapshot
2844  * zc_cookie	recursive flag
2845  * zc_nvlist_src[_size] property list
2846  *
2847  * outputs:
2848  * zc_value	short snapname (i.e. part after the '@')
2849  */
2850 static int
2851 zfs_ioc_snapshot(zfs_cmd_t *zc)
2852 {
2853 	nvlist_t *nvprops = NULL;
2854 	int error;
2855 	boolean_t recursive = zc->zc_cookie;
2856 
2857 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2858 		return (EINVAL);
2859 
2860 	if (zc->zc_nvlist_src != NULL &&
2861 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2862 	    zc->zc_iflags, &nvprops)) != 0)
2863 		return (error);
2864 
2865 	error = zfs_check_userprops(zc->zc_name, nvprops);
2866 	if (error)
2867 		goto out;
2868 
2869 	if (!nvlist_empty(nvprops) &&
2870 	    zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) {
2871 		error = ENOTSUP;
2872 		goto out;
2873 	}
2874 
2875 	error = dmu_objset_snapshot(zc->zc_name, zc->zc_value,
2876 	    nvprops, recursive);
2877 
2878 out:
2879 	nvlist_free(nvprops);
2880 	return (error);
2881 }
2882 
2883 int
2884 zfs_unmount_snap(const char *name, void *arg)
2885 {
2886 	vfs_t *vfsp = NULL;
2887 
2888 	if (arg) {
2889 		char *snapname = arg;
2890 		char *fullname = kmem_asprintf("%s@%s", name, snapname);
2891 		vfsp = zfs_get_vfs(fullname);
2892 		strfree(fullname);
2893 	} else if (strchr(name, '@')) {
2894 		vfsp = zfs_get_vfs(name);
2895 	}
2896 
2897 	if (vfsp) {
2898 		/*
2899 		 * Always force the unmount for snapshots.
2900 		 */
2901 		int flag = MS_FORCE;
2902 		int err;
2903 
2904 		if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
2905 			VFS_RELE(vfsp);
2906 			return (err);
2907 		}
2908 		VFS_RELE(vfsp);
2909 		if ((err = dounmount(vfsp, flag, kcred)) != 0)
2910 			return (err);
2911 	}
2912 	return (0);
2913 }
2914 
2915 /*
2916  * inputs:
2917  * zc_name		name of filesystem
2918  * zc_value		short name of snapshot
2919  * zc_defer_destroy	mark for deferred destroy
2920  *
2921  * outputs:	none
2922  */
2923 static int
2924 zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
2925 {
2926 	int err;
2927 
2928 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2929 		return (EINVAL);
2930 	err = dmu_objset_find(zc->zc_name,
2931 	    zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
2932 	if (err)
2933 		return (err);
2934 	return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value,
2935 	    zc->zc_defer_destroy));
2936 }
2937 
2938 /*
2939  * inputs:
2940  * zc_name		name of dataset to destroy
2941  * zc_objset_type	type of objset
2942  * zc_defer_destroy	mark for deferred destroy
2943  *
2944  * outputs:		none
2945  */
2946 static int
2947 zfs_ioc_destroy(zfs_cmd_t *zc)
2948 {
2949 	int err;
2950 	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
2951 		err = zfs_unmount_snap(zc->zc_name, NULL);
2952 		if (err)
2953 			return (err);
2954 	}
2955 
2956 	err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy);
2957 	if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
2958 		(void) zvol_remove_minor(zc->zc_name);
2959 	return (err);
2960 }
2961 
2962 /*
2963  * inputs:
2964  * zc_name	name of dataset to rollback (to most recent snapshot)
2965  *
2966  * outputs:	none
2967  */
2968 static int
2969 zfs_ioc_rollback(zfs_cmd_t *zc)
2970 {
2971 	dsl_dataset_t *ds, *clone;
2972 	int error;
2973 	zfsvfs_t *zfsvfs;
2974 	char *clone_name;
2975 
2976 	error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
2977 	if (error)
2978 		return (error);
2979 
2980 	/* must not be a snapshot */
2981 	if (dsl_dataset_is_snapshot(ds)) {
2982 		dsl_dataset_rele(ds, FTAG);
2983 		return (EINVAL);
2984 	}
2985 
2986 	/* must have a most recent snapshot */
2987 	if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
2988 		dsl_dataset_rele(ds, FTAG);
2989 		return (EINVAL);
2990 	}
2991 
2992 	/*
2993 	 * Create clone of most recent snapshot.
2994 	 */
2995 	clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
2996 	error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
2997 	if (error)
2998 		goto out;
2999 
3000 	error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
3001 	if (error)
3002 		goto out;
3003 
3004 	/*
3005 	 * Do clone swap.
3006 	 */
3007 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
3008 		error = zfs_suspend_fs(zfsvfs);
3009 		if (error == 0) {
3010 			int resume_err;
3011 
3012 			if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3013 				error = dsl_dataset_clone_swap(clone, ds,
3014 				    B_TRUE);
3015 				dsl_dataset_disown(ds, FTAG);
3016 				ds = NULL;
3017 			} else {
3018 				error = EBUSY;
3019 			}
3020 			resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
3021 			error = error ? error : resume_err;
3022 		}
3023 		VFS_RELE(zfsvfs->z_vfs);
3024 	} else {
3025 		if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3026 			error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
3027 			dsl_dataset_disown(ds, FTAG);
3028 			ds = NULL;
3029 		} else {
3030 			error = EBUSY;
3031 		}
3032 	}
3033 
3034 	/*
3035 	 * Destroy clone (which also closes it).
3036 	 */
3037 	(void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
3038 
3039 out:
3040 	strfree(clone_name);
3041 	if (ds)
3042 		dsl_dataset_rele(ds, FTAG);
3043 	return (error);
3044 }
3045 
3046 /*
3047  * inputs:
3048  * zc_name	old name of dataset
3049  * zc_value	new name of dataset
3050  * zc_cookie	recursive flag (only valid for snapshots)
3051  *
3052  * outputs:	none
3053  */
3054 static int
3055 zfs_ioc_rename(zfs_cmd_t *zc)
3056 {
3057 	boolean_t recursive = zc->zc_cookie & 1;
3058 
3059 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3060 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3061 	    strchr(zc->zc_value, '%'))
3062 		return (EINVAL);
3063 
3064 	/*
3065 	 * Unmount snapshot unless we're doing a recursive rename,
3066 	 * in which case the dataset code figures out which snapshots
3067 	 * to unmount.
3068 	 */
3069 	if (!recursive && strchr(zc->zc_name, '@') != NULL &&
3070 	    zc->zc_objset_type == DMU_OST_ZFS) {
3071 		int err = zfs_unmount_snap(zc->zc_name, NULL);
3072 		if (err)
3073 			return (err);
3074 	}
3075 	if (zc->zc_objset_type == DMU_OST_ZVOL)
3076 		(void) zvol_remove_minor(zc->zc_name);
3077 	return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
3078 }
3079 
3080 static int
3081 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3082 {
3083 	const char *propname = nvpair_name(pair);
3084 	boolean_t issnap = (strchr(dsname, '@') != NULL);
3085 	zfs_prop_t prop = zfs_name_to_prop(propname);
3086 	uint64_t intval;
3087 	int err;
3088 
3089 	if (prop == ZPROP_INVAL) {
3090 		if (zfs_prop_user(propname)) {
3091 			if (err = zfs_secpolicy_write_perms(dsname,
3092 			    ZFS_DELEG_PERM_USERPROP, cr))
3093 				return (err);
3094 			return (0);
3095 		}
3096 
3097 		if (!issnap && zfs_prop_userquota(propname)) {
3098 			const char *perm = NULL;
3099 			const char *uq_prefix =
3100 			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3101 			const char *gq_prefix =
3102 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3103 
3104 			if (strncmp(propname, uq_prefix,
3105 			    strlen(uq_prefix)) == 0) {
3106 				perm = ZFS_DELEG_PERM_USERQUOTA;
3107 			} else if (strncmp(propname, gq_prefix,
3108 			    strlen(gq_prefix)) == 0) {
3109 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
3110 			} else {
3111 				/* USERUSED and GROUPUSED are read-only */
3112 				return (EINVAL);
3113 			}
3114 
3115 			if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3116 				return (err);
3117 			return (0);
3118 		}
3119 
3120 		return (EINVAL);
3121 	}
3122 
3123 	if (issnap)
3124 		return (EINVAL);
3125 
3126 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3127 		/*
3128 		 * dsl_prop_get_all_impl() returns properties in this
3129 		 * format.
3130 		 */
3131 		nvlist_t *attrs;
3132 		VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3133 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3134 		    &pair) == 0);
3135 	}
3136 
3137 	/*
3138 	 * Check that this value is valid for this pool version
3139 	 */
3140 	switch (prop) {
3141 	case ZFS_PROP_COMPRESSION:
3142 		/*
3143 		 * If the user specified gzip compression, make sure
3144 		 * the SPA supports it. We ignore any errors here since
3145 		 * we'll catch them later.
3146 		 */
3147 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3148 		    nvpair_value_uint64(pair, &intval) == 0) {
3149 			if (intval >= ZIO_COMPRESS_GZIP_1 &&
3150 			    intval <= ZIO_COMPRESS_GZIP_9 &&
3151 			    zfs_earlier_version(dsname,
3152 			    SPA_VERSION_GZIP_COMPRESSION)) {
3153 				return (ENOTSUP);
3154 			}
3155 
3156 			if (intval == ZIO_COMPRESS_ZLE &&
3157 			    zfs_earlier_version(dsname,
3158 			    SPA_VERSION_ZLE_COMPRESSION))
3159 				return (ENOTSUP);
3160 
3161 			/*
3162 			 * If this is a bootable dataset then
3163 			 * verify that the compression algorithm
3164 			 * is supported for booting. We must return
3165 			 * something other than ENOTSUP since it
3166 			 * implies a downrev pool version.
3167 			 */
3168 			if (zfs_is_bootfs(dsname) &&
3169 			    !BOOTFS_COMPRESS_VALID(intval)) {
3170 				return (ERANGE);
3171 			}
3172 		}
3173 		break;
3174 
3175 	case ZFS_PROP_COPIES:
3176 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3177 			return (ENOTSUP);
3178 		break;
3179 
3180 	case ZFS_PROP_DEDUP:
3181 		if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3182 			return (ENOTSUP);
3183 		break;
3184 
3185 	case ZFS_PROP_SHARESMB:
3186 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3187 			return (ENOTSUP);
3188 		break;
3189 
3190 	case ZFS_PROP_ACLINHERIT:
3191 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3192 		    nvpair_value_uint64(pair, &intval) == 0) {
3193 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
3194 			    zfs_earlier_version(dsname,
3195 			    SPA_VERSION_PASSTHROUGH_X))
3196 				return (ENOTSUP);
3197 		}
3198 		break;
3199 	}
3200 
3201 	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3202 }
3203 
3204 /*
3205  * Removes properties from the given props list that fail permission checks
3206  * needed to clear them and to restore them in case of a receive error. For each
3207  * property, make sure we have both set and inherit permissions.
3208  *
3209  * Returns the first error encountered if any permission checks fail. If the
3210  * caller provides a non-NULL errlist, it also gives the complete list of names
3211  * of all the properties that failed a permission check along with the
3212  * corresponding error numbers. The caller is responsible for freeing the
3213  * returned errlist.
3214  *
3215  * If every property checks out successfully, zero is returned and the list
3216  * pointed at by errlist is NULL.
3217  */
3218 static int
3219 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
3220 {
3221 	zfs_cmd_t *zc;
3222 	nvpair_t *pair, *next_pair;
3223 	nvlist_t *errors;
3224 	int err, rv = 0;
3225 
3226 	if (props == NULL)
3227 		return (0);
3228 
3229 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3230 
3231 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
3232 	(void) strcpy(zc->zc_name, dataset);
3233 	pair = nvlist_next_nvpair(props, NULL);
3234 	while (pair != NULL) {
3235 		next_pair = nvlist_next_nvpair(props, pair);
3236 
3237 		(void) strcpy(zc->zc_value, nvpair_name(pair));
3238 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
3239 		    (err = zfs_secpolicy_inherit(zc, CRED())) != 0) {
3240 			VERIFY(nvlist_remove_nvpair(props, pair) == 0);
3241 			VERIFY(nvlist_add_int32(errors,
3242 			    zc->zc_value, err) == 0);
3243 		}
3244 		pair = next_pair;
3245 	}
3246 	kmem_free(zc, sizeof (zfs_cmd_t));
3247 
3248 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
3249 		nvlist_free(errors);
3250 		errors = NULL;
3251 	} else {
3252 		VERIFY(nvpair_value_int32(pair, &rv) == 0);
3253 	}
3254 
3255 	if (errlist == NULL)
3256 		nvlist_free(errors);
3257 	else
3258 		*errlist = errors;
3259 
3260 	return (rv);
3261 }
3262 
3263 static boolean_t
3264 propval_equals(nvpair_t *p1, nvpair_t *p2)
3265 {
3266 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
3267 		/* dsl_prop_get_all_impl() format */
3268 		nvlist_t *attrs;
3269 		VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
3270 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3271 		    &p1) == 0);
3272 	}
3273 
3274 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
3275 		nvlist_t *attrs;
3276 		VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
3277 		VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3278 		    &p2) == 0);
3279 	}
3280 
3281 	if (nvpair_type(p1) != nvpair_type(p2))
3282 		return (B_FALSE);
3283 
3284 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
3285 		char *valstr1, *valstr2;
3286 
3287 		VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
3288 		VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
3289 		return (strcmp(valstr1, valstr2) == 0);
3290 	} else {
3291 		uint64_t intval1, intval2;
3292 
3293 		VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
3294 		VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
3295 		return (intval1 == intval2);
3296 	}
3297 }
3298 
3299 /*
3300  * Remove properties from props if they are not going to change (as determined
3301  * by comparison with origprops). Remove them from origprops as well, since we
3302  * do not need to clear or restore properties that won't change.
3303  */
3304 static void
3305 props_reduce(nvlist_t *props, nvlist_t *origprops)
3306 {
3307 	nvpair_t *pair, *next_pair;
3308 
3309 	if (origprops == NULL)
3310 		return; /* all props need to be received */
3311 
3312 	pair = nvlist_next_nvpair(props, NULL);
3313 	while (pair != NULL) {
3314 		const char *propname = nvpair_name(pair);
3315 		nvpair_t *match;
3316 
3317 		next_pair = nvlist_next_nvpair(props, pair);
3318 
3319 		if ((nvlist_lookup_nvpair(origprops, propname,
3320 		    &match) != 0) || !propval_equals(pair, match))
3321 			goto next; /* need to set received value */
3322 
3323 		/* don't clear the existing received value */
3324 		(void) nvlist_remove_nvpair(origprops, match);
3325 		/* don't bother receiving the property */
3326 		(void) nvlist_remove_nvpair(props, pair);
3327 next:
3328 		pair = next_pair;
3329 	}
3330 }
3331 
3332 #ifdef	DEBUG
3333 static boolean_t zfs_ioc_recv_inject_err;
3334 #endif
3335 
3336 /*
3337  * inputs:
3338  * zc_name		name of containing filesystem
3339  * zc_nvlist_src{_size}	nvlist of properties to apply
3340  * zc_value		name of snapshot to create
3341  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
3342  * zc_cookie		file descriptor to recv from
3343  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
3344  * zc_guid		force flag
3345  *
3346  * outputs:
3347  * zc_cookie		number of bytes read
3348  * zc_nvlist_dst{_size} error for each unapplied received property
3349  * zc_obj		zprop_errflags_t
3350  */
3351 static int
3352 zfs_ioc_recv(zfs_cmd_t *zc)
3353 {
3354 	file_t *fp;
3355 	objset_t *os;
3356 	dmu_recv_cookie_t drc;
3357 	boolean_t force = (boolean_t)zc->zc_guid;
3358 	int fd;
3359 	int error = 0;
3360 	int props_error = 0;
3361 	nvlist_t *errors;
3362 	offset_t off;
3363 	nvlist_t *props = NULL; /* sent properties */
3364 	nvlist_t *origprops = NULL; /* existing properties */
3365 	objset_t *origin = NULL;
3366 	char *tosnap;
3367 	char tofs[ZFS_MAXNAMELEN];
3368 	boolean_t first_recvd_props = B_FALSE;
3369 
3370 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3371 	    strchr(zc->zc_value, '@') == NULL ||
3372 	    strchr(zc->zc_value, '%'))
3373 		return (EINVAL);
3374 
3375 	(void) strcpy(tofs, zc->zc_value);
3376 	tosnap = strchr(tofs, '@');
3377 	*tosnap++ = '\0';
3378 
3379 	if (zc->zc_nvlist_src != NULL &&
3380 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3381 	    zc->zc_iflags, &props)) != 0)
3382 		return (error);
3383 
3384 	fd = zc->zc_cookie;
3385 	fp = getf(fd);
3386 	if (fp == NULL) {
3387 		nvlist_free(props);
3388 		return (EBADF);
3389 	}
3390 
3391 	VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3392 
3393 	if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
3394 		if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) &&
3395 		    !dsl_prop_get_hasrecvd(os)) {
3396 			first_recvd_props = B_TRUE;
3397 		}
3398 
3399 		/*
3400 		 * If new received properties are supplied, they are to
3401 		 * completely replace the existing received properties, so stash
3402 		 * away the existing ones.
3403 		 */
3404 		if (dsl_prop_get_received(os, &origprops) == 0) {
3405 			nvlist_t *errlist = NULL;
3406 			/*
3407 			 * Don't bother writing a property if its value won't
3408 			 * change (and avoid the unnecessary security checks).
3409 			 *
3410 			 * The first receive after SPA_VERSION_RECVD_PROPS is a
3411 			 * special case where we blow away all local properties
3412 			 * regardless.
3413 			 */
3414 			if (!first_recvd_props)
3415 				props_reduce(props, origprops);
3416 			if (zfs_check_clearable(tofs, origprops,
3417 			    &errlist) != 0)
3418 				(void) nvlist_merge(errors, errlist, 0);
3419 			nvlist_free(errlist);
3420 		}
3421 
3422 		dmu_objset_rele(os, FTAG);
3423 	}
3424 
3425 	if (zc->zc_string[0]) {
3426 		error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
3427 		if (error)
3428 			goto out;
3429 	}
3430 
3431 	error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds,
3432 	    &zc->zc_begin_record, force, origin, &drc);
3433 	if (origin)
3434 		dmu_objset_rele(origin, FTAG);
3435 	if (error)
3436 		goto out;
3437 
3438 	/*
3439 	 * Set properties before we receive the stream so that they are applied
3440 	 * to the new data. Note that we must call dmu_recv_stream() if
3441 	 * dmu_recv_begin() succeeds.
3442 	 */
3443 	if (props) {
3444 		nvlist_t *errlist;
3445 
3446 		if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {
3447 			if (drc.drc_newfs) {
3448 				if (spa_version(os->os_spa) >=
3449 				    SPA_VERSION_RECVD_PROPS)
3450 					first_recvd_props = B_TRUE;
3451 			} else if (origprops != NULL) {
3452 				if (clear_received_props(os, tofs, origprops,
3453 				    first_recvd_props ? NULL : props) != 0)
3454 					zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3455 			} else {
3456 				zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3457 			}
3458 			dsl_prop_set_hasrecvd(os);
3459 		} else if (!drc.drc_newfs) {
3460 			zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3461 		}
3462 
3463 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
3464 		    props, &errlist);
3465 		(void) nvlist_merge(errors, errlist, 0);
3466 		nvlist_free(errlist);
3467 	}
3468 
3469 	if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) {
3470 		/*
3471 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
3472 		 * size or supplied an invalid address.
3473 		 */
3474 		props_error = EINVAL;
3475 	}
3476 
3477 	off = fp->f_offset;
3478 	error = dmu_recv_stream(&drc, fp->f_vnode, &off);
3479 
3480 	if (error == 0) {
3481 		zfsvfs_t *zfsvfs = NULL;
3482 
3483 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
3484 			/* online recv */
3485 			int end_err;
3486 
3487 			error = zfs_suspend_fs(zfsvfs);
3488 			/*
3489 			 * If the suspend fails, then the recv_end will
3490 			 * likely also fail, and clean up after itself.
3491 			 */
3492 			end_err = dmu_recv_end(&drc);
3493 			if (error == 0)
3494 				error = zfs_resume_fs(zfsvfs, tofs);
3495 			error = error ? error : end_err;
3496 			VFS_RELE(zfsvfs->z_vfs);
3497 		} else {
3498 			error = dmu_recv_end(&drc);
3499 		}
3500 	}
3501 
3502 	zc->zc_cookie = off - fp->f_offset;
3503 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3504 		fp->f_offset = off;
3505 
3506 #ifdef	DEBUG
3507 	if (zfs_ioc_recv_inject_err) {
3508 		zfs_ioc_recv_inject_err = B_FALSE;
3509 		error = 1;
3510 	}
3511 #endif
3512 	/*
3513 	 * On error, restore the original props.
3514 	 */
3515 	if (error && props) {
3516 		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
3517 			if (clear_received_props(os, tofs, props, NULL) != 0) {
3518 				/*
3519 				 * We failed to clear the received properties.
3520 				 * Since we may have left a $recvd value on the
3521 				 * system, we can't clear the $hasrecvd flag.
3522 				 */
3523 				zc->zc_obj |= ZPROP_ERR_NORESTORE;
3524 			} else if (first_recvd_props) {
3525 				dsl_prop_unset_hasrecvd(os);
3526 			}
3527 			dmu_objset_rele(os, FTAG);
3528 		} else if (!drc.drc_newfs) {
3529 			/* We failed to clear the received properties. */
3530 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3531 		}
3532 
3533 		if (origprops == NULL && !drc.drc_newfs) {
3534 			/* We failed to stash the original properties. */
3535 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3536 		}
3537 
3538 		/*
3539 		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
3540 		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
3541 		 * explictly if we're restoring local properties cleared in the
3542 		 * first new-style receive.
3543 		 */
3544 		if (origprops != NULL &&
3545 		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
3546 		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
3547 		    origprops, NULL) != 0) {
3548 			/*
3549 			 * We stashed the original properties but failed to
3550 			 * restore them.
3551 			 */
3552 			zc->zc_obj |= ZPROP_ERR_NORESTORE;
3553 		}
3554 	}
3555 out:
3556 	nvlist_free(props);
3557 	nvlist_free(origprops);
3558 	nvlist_free(errors);
3559 	releasef(fd);
3560 
3561 	if (error == 0)
3562 		error = props_error;
3563 
3564 	return (error);
3565 }
3566 
3567 /*
3568  * inputs:
3569  * zc_name	name of snapshot to send
3570  * zc_value	short name of incremental fromsnap (may be empty)
3571  * zc_cookie	file descriptor to send stream to
3572  * zc_obj	fromorigin flag (mutually exclusive with zc_value)
3573  *
3574  * outputs: none
3575  */
3576 static int
3577 zfs_ioc_send(zfs_cmd_t *zc)
3578 {
3579 	objset_t *fromsnap = NULL;
3580 	objset_t *tosnap;
3581 	file_t *fp;
3582 	int error;
3583 	offset_t off;
3584 
3585 	error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
3586 	if (error)
3587 		return (error);
3588 
3589 	if (zc->zc_value[0] != '\0') {
3590 		char *buf;
3591 		char *cp;
3592 
3593 		buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3594 		(void) strncpy(buf, zc->zc_name, MAXPATHLEN);
3595 		cp = strchr(buf, '@');
3596 		if (cp)
3597 			*(cp+1) = 0;
3598 		(void) strncat(buf, zc->zc_value, MAXPATHLEN);
3599 		error = dmu_objset_hold(buf, FTAG, &fromsnap);
3600 		kmem_free(buf, MAXPATHLEN);
3601 		if (error) {
3602 			dmu_objset_rele(tosnap, FTAG);
3603 			return (error);
3604 		}
3605 	}
3606 
3607 	fp = getf(zc->zc_cookie);
3608 	if (fp == NULL) {
3609 		dmu_objset_rele(tosnap, FTAG);
3610 		if (fromsnap)
3611 			dmu_objset_rele(fromsnap, FTAG);
3612 		return (EBADF);
3613 	}
3614 
3615 	off = fp->f_offset;
3616 	error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off);
3617 
3618 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3619 		fp->f_offset = off;
3620 	releasef(zc->zc_cookie);
3621 	if (fromsnap)
3622 		dmu_objset_rele(fromsnap, FTAG);
3623 	dmu_objset_rele(tosnap, FTAG);
3624 	return (error);
3625 }
3626 
3627 static int
3628 zfs_ioc_inject_fault(zfs_cmd_t *zc)
3629 {
3630 	int id, error;
3631 
3632 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
3633 	    &zc->zc_inject_record);
3634 
3635 	if (error == 0)
3636 		zc->zc_guid = (uint64_t)id;
3637 
3638 	return (error);
3639 }
3640 
3641 static int
3642 zfs_ioc_clear_fault(zfs_cmd_t *zc)
3643 {
3644 	return (zio_clear_fault((int)zc->zc_guid));
3645 }
3646 
3647 static int
3648 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
3649 {
3650 	int id = (int)zc->zc_guid;
3651 	int error;
3652 
3653 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
3654 	    &zc->zc_inject_record);
3655 
3656 	zc->zc_guid = id;
3657 
3658 	return (error);
3659 }
3660 
3661 static int
3662 zfs_ioc_error_log(zfs_cmd_t *zc)
3663 {
3664 	spa_t *spa;
3665 	int error;
3666 	size_t count = (size_t)zc->zc_nvlist_dst_size;
3667 
3668 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
3669 		return (error);
3670 
3671 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
3672 	    &count);
3673 	if (error == 0)
3674 		zc->zc_nvlist_dst_size = count;
3675 	else
3676 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
3677 
3678 	spa_close(spa, FTAG);
3679 
3680 	return (error);
3681 }
3682 
3683 static int
3684 zfs_ioc_clear(zfs_cmd_t *zc)
3685 {
3686 	spa_t *spa;
3687 	vdev_t *vd;
3688 	int error;
3689 
3690 	/*
3691 	 * On zpool clear we also fix up missing slogs
3692 	 */
3693 	mutex_enter(&spa_namespace_lock);
3694 	spa = spa_lookup(zc->zc_name);
3695 	if (spa == NULL) {
3696 		mutex_exit(&spa_namespace_lock);
3697 		return (EIO);
3698 	}
3699 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
3700 		/* we need to let spa_open/spa_load clear the chains */
3701 		spa_set_log_state(spa, SPA_LOG_CLEAR);
3702 	}
3703 	spa->spa_last_open_failed = 0;
3704 	mutex_exit(&spa_namespace_lock);
3705 
3706 	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
3707 		error = spa_open(zc->zc_name, &spa, FTAG);
3708 	} else {
3709 		nvlist_t *policy;
3710 		nvlist_t *config = NULL;
3711 
3712 		if (zc->zc_nvlist_src == NULL)
3713 			return (EINVAL);
3714 
3715 		if ((error = get_nvlist(zc->zc_nvlist_src,
3716 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
3717 			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
3718 			    policy, &config);
3719 			if (config != NULL) {
3720 				(void) put_nvlist(zc, config);
3721 				nvlist_free(config);
3722 			}
3723 			nvlist_free(policy);
3724 		}
3725 	}
3726 
3727 	if (error)
3728 		return (error);
3729 
3730 	spa_vdev_state_enter(spa, SCL_NONE);
3731 
3732 	if (zc->zc_guid == 0) {
3733 		vd = NULL;
3734 	} else {
3735 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
3736 		if (vd == NULL) {
3737 			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
3738 			spa_close(spa, FTAG);
3739 			return (ENODEV);
3740 		}
3741 	}
3742 
3743 	vdev_clear(spa, vd);
3744 
3745 	(void) spa_vdev_state_exit(spa, NULL, 0);
3746 
3747 	/*
3748 	 * Resume any suspended I/Os.
3749 	 */
3750 	if (zio_resume(spa) != 0)
3751 		error = EIO;
3752 
3753 	spa_close(spa, FTAG);
3754 
3755 	return (error);
3756 }
3757 
3758 /*
3759  * inputs:
3760  * zc_name	name of filesystem
3761  * zc_value	name of origin snapshot
3762  *
3763  * outputs:
3764  * zc_string	name of conflicting snapshot, if there is one
3765  */
3766 static int
3767 zfs_ioc_promote(zfs_cmd_t *zc)
3768 {
3769 	char *cp;
3770 
3771 	/*
3772 	 * We don't need to unmount *all* the origin fs's snapshots, but
3773 	 * it's easier.
3774 	 */
3775 	cp = strchr(zc->zc_value, '@');
3776 	if (cp)
3777 		*cp = '\0';
3778 	(void) dmu_objset_find(zc->zc_value,
3779 	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
3780 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
3781 }
3782 
3783 /*
3784  * Retrieve a single {user|group}{used|quota}@... property.
3785  *
3786  * inputs:
3787  * zc_name	name of filesystem
3788  * zc_objset_type zfs_userquota_prop_t
3789  * zc_value	domain name (eg. "S-1-234-567-89")
3790  * zc_guid	RID/UID/GID
3791  *
3792  * outputs:
3793  * zc_cookie	property value
3794  */
3795 static int
3796 zfs_ioc_userspace_one(zfs_cmd_t *zc)
3797 {
3798 	zfsvfs_t *zfsvfs;
3799 	int error;
3800 
3801 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
3802 		return (EINVAL);
3803 
3804 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs);
3805 	if (error)
3806 		return (error);
3807 
3808 	error = zfs_userspace_one(zfsvfs,
3809 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
3810 	zfsvfs_rele(zfsvfs, FTAG);
3811 
3812 	return (error);
3813 }
3814 
3815 /*
3816  * inputs:
3817  * zc_name		name of filesystem
3818  * zc_cookie		zap cursor
3819  * zc_objset_type	zfs_userquota_prop_t
3820  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
3821  *
3822  * outputs:
3823  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
3824  * zc_cookie	zap cursor
3825  */
3826 static int
3827 zfs_ioc_userspace_many(zfs_cmd_t *zc)
3828 {
3829 	zfsvfs_t *zfsvfs;
3830 	int bufsize = zc->zc_nvlist_dst_size;
3831 
3832 	if (bufsize <= 0)
3833 		return (ENOMEM);
3834 
3835 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs);
3836 	if (error)
3837 		return (error);
3838 
3839 	void *buf = kmem_alloc(bufsize, KM_SLEEP);
3840 
3841 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
3842 	    buf, &zc->zc_nvlist_dst_size);
3843 
3844 	if (error == 0) {
3845 		error = xcopyout(buf,
3846 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
3847 		    zc->zc_nvlist_dst_size);
3848 	}
3849 	kmem_free(buf, bufsize);
3850 	zfsvfs_rele(zfsvfs, FTAG);
3851 
3852 	return (error);
3853 }
3854 
3855 /*
3856  * inputs:
3857  * zc_name		name of filesystem
3858  *
3859  * outputs:
3860  * none
3861  */
3862 static int
3863 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
3864 {
3865 	objset_t *os;
3866 	int error = 0;
3867 	zfsvfs_t *zfsvfs;
3868 
3869 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
3870 		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
3871 			/*
3872 			 * If userused is not enabled, it may be because the
3873 			 * objset needs to be closed & reopened (to grow the
3874 			 * objset_phys_t).  Suspend/resume the fs will do that.
3875 			 */
3876 			error = zfs_suspend_fs(zfsvfs);
3877 			if (error == 0)
3878 				error = zfs_resume_fs(zfsvfs, zc->zc_name);
3879 		}
3880 		if (error == 0)
3881 			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
3882 		VFS_RELE(zfsvfs->z_vfs);
3883 	} else {
3884 		/* XXX kind of reading contents without owning */
3885 		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
3886 		if (error)
3887 			return (error);
3888 
3889 		error = dmu_objset_userspace_upgrade(os);
3890 		dmu_objset_rele(os, FTAG);
3891 	}
3892 
3893 	return (error);
3894 }
3895 
3896 /*
3897  * We don't want to have a hard dependency
3898  * against some special symbols in sharefs
3899  * nfs, and smbsrv.  Determine them if needed when
3900  * the first file system is shared.
3901  * Neither sharefs, nfs or smbsrv are unloadable modules.
3902  */
3903 int (*znfsexport_fs)(void *arg);
3904 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
3905 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
3906 
3907 int zfs_nfsshare_inited;
3908 int zfs_smbshare_inited;
3909 
3910 ddi_modhandle_t nfs_mod;
3911 ddi_modhandle_t sharefs_mod;
3912 ddi_modhandle_t smbsrv_mod;
3913 kmutex_t zfs_share_lock;
3914 
3915 static int
3916 zfs_init_sharefs()
3917 {
3918 	int error;
3919 
3920 	ASSERT(MUTEX_HELD(&zfs_share_lock));
3921 	/* Both NFS and SMB shares also require sharetab support. */
3922 	if (sharefs_mod == NULL && ((sharefs_mod =
3923 	    ddi_modopen("fs/sharefs",
3924 	    KRTLD_MODE_FIRST, &error)) == NULL)) {
3925 		return (ENOSYS);
3926 	}
3927 	if (zshare_fs == NULL && ((zshare_fs =
3928 	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
3929 	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
3930 		return (ENOSYS);
3931 	}
3932 	return (0);
3933 }
3934 
3935 static int
3936 zfs_ioc_share(zfs_cmd_t *zc)
3937 {
3938 	int error;
3939 	int opcode;
3940 
3941 	switch (zc->zc_share.z_sharetype) {
3942 	case ZFS_SHARE_NFS:
3943 	case ZFS_UNSHARE_NFS:
3944 		if (zfs_nfsshare_inited == 0) {
3945 			mutex_enter(&zfs_share_lock);
3946 			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
3947 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
3948 				mutex_exit(&zfs_share_lock);
3949 				return (ENOSYS);
3950 			}
3951 			if (znfsexport_fs == NULL &&
3952 			    ((znfsexport_fs = (int (*)(void *))
3953 			    ddi_modsym(nfs_mod,
3954 			    "nfs_export", &error)) == NULL)) {
3955 				mutex_exit(&zfs_share_lock);
3956 				return (ENOSYS);
3957 			}
3958 			error = zfs_init_sharefs();
3959 			if (error) {
3960 				mutex_exit(&zfs_share_lock);
3961 				return (ENOSYS);
3962 			}
3963 			zfs_nfsshare_inited = 1;
3964 			mutex_exit(&zfs_share_lock);
3965 		}
3966 		break;
3967 	case ZFS_SHARE_SMB:
3968 	case ZFS_UNSHARE_SMB:
3969 		if (zfs_smbshare_inited == 0) {
3970 			mutex_enter(&zfs_share_lock);
3971 			if (smbsrv_mod == NULL && ((smbsrv_mod =
3972 			    ddi_modopen("drv/smbsrv",
3973 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
3974 				mutex_exit(&zfs_share_lock);
3975 				return (ENOSYS);
3976 			}
3977 			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
3978 			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
3979 			    "smb_server_share", &error)) == NULL)) {
3980 				mutex_exit(&zfs_share_lock);
3981 				return (ENOSYS);
3982 			}
3983 			error = zfs_init_sharefs();
3984 			if (error) {
3985 				mutex_exit(&zfs_share_lock);
3986 				return (ENOSYS);
3987 			}
3988 			zfs_smbshare_inited = 1;
3989 			mutex_exit(&zfs_share_lock);
3990 		}
3991 		break;
3992 	default:
3993 		return (EINVAL);
3994 	}
3995 
3996 	switch (zc->zc_share.z_sharetype) {
3997 	case ZFS_SHARE_NFS:
3998 	case ZFS_UNSHARE_NFS:
3999 		if (error =
4000 		    znfsexport_fs((void *)
4001 		    (uintptr_t)zc->zc_share.z_exportdata))
4002 			return (error);
4003 		break;
4004 	case ZFS_SHARE_SMB:
4005 	case ZFS_UNSHARE_SMB:
4006 		if (error = zsmbexport_fs((void *)
4007 		    (uintptr_t)zc->zc_share.z_exportdata,
4008 		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
4009 		    B_TRUE: B_FALSE)) {
4010 			return (error);
4011 		}
4012 		break;
4013 	}
4014 
4015 	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
4016 	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
4017 	    SHAREFS_ADD : SHAREFS_REMOVE;
4018 
4019 	/*
4020 	 * Add or remove share from sharetab
4021 	 */
4022 	error = zshare_fs(opcode,
4023 	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
4024 	    zc->zc_share.z_sharemax);
4025 
4026 	return (error);
4027 
4028 }
4029 
4030 ace_t full_access[] = {
4031 	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
4032 };
4033 
4034 /*
4035  * Remove all ACL files in shares dir
4036  */
4037 static int
4038 zfs_smb_acl_purge(znode_t *dzp)
4039 {
4040 	zap_cursor_t	zc;
4041 	zap_attribute_t	zap;
4042 	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
4043 	int error;
4044 
4045 	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
4046 	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
4047 	    zap_cursor_advance(&zc)) {
4048 		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
4049 		    NULL, 0)) != 0)
4050 			break;
4051 	}
4052 	zap_cursor_fini(&zc);
4053 	return (error);
4054 }
4055 
4056 static int
4057 zfs_ioc_smb_acl(zfs_cmd_t *zc)
4058 {
4059 	vnode_t *vp;
4060 	znode_t *dzp;
4061 	vnode_t *resourcevp = NULL;
4062 	znode_t *sharedir;
4063 	zfsvfs_t *zfsvfs;
4064 	nvlist_t *nvlist;
4065 	char *src, *target;
4066 	vattr_t vattr;
4067 	vsecattr_t vsec;
4068 	int error = 0;
4069 
4070 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
4071 	    NO_FOLLOW, NULL, &vp)) != 0)
4072 		return (error);
4073 
4074 	/* Now make sure mntpnt and dataset are ZFS */
4075 
4076 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
4077 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
4078 	    zc->zc_name) != 0)) {
4079 		VN_RELE(vp);
4080 		return (EINVAL);
4081 	}
4082 
4083 	dzp = VTOZ(vp);
4084 	zfsvfs = dzp->z_zfsvfs;
4085 	ZFS_ENTER(zfsvfs);
4086 
4087 	/*
4088 	 * Create share dir if its missing.
4089 	 */
4090 	mutex_enter(&zfsvfs->z_lock);
4091 	if (zfsvfs->z_shares_dir == 0) {
4092 		dmu_tx_t *tx;
4093 
4094 		tx = dmu_tx_create(zfsvfs->z_os);
4095 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
4096 		    ZFS_SHARES_DIR);
4097 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
4098 		error = dmu_tx_assign(tx, TXG_WAIT);
4099 		if (error) {
4100 			dmu_tx_abort(tx);
4101 		} else {
4102 			error = zfs_create_share_dir(zfsvfs, tx);
4103 			dmu_tx_commit(tx);
4104 		}
4105 		if (error) {
4106 			mutex_exit(&zfsvfs->z_lock);
4107 			VN_RELE(vp);
4108 			ZFS_EXIT(zfsvfs);
4109 			return (error);
4110 		}
4111 	}
4112 	mutex_exit(&zfsvfs->z_lock);
4113 
4114 	ASSERT(zfsvfs->z_shares_dir);
4115 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
4116 		VN_RELE(vp);
4117 		ZFS_EXIT(zfsvfs);
4118 		return (error);
4119 	}
4120 
4121 	switch (zc->zc_cookie) {
4122 	case ZFS_SMB_ACL_ADD:
4123 		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
4124 		vattr.va_type = VREG;
4125 		vattr.va_mode = S_IFREG|0777;
4126 		vattr.va_uid = 0;
4127 		vattr.va_gid = 0;
4128 
4129 		vsec.vsa_mask = VSA_ACE;
4130 		vsec.vsa_aclentp = &full_access;
4131 		vsec.vsa_aclentsz = sizeof (full_access);
4132 		vsec.vsa_aclcnt = 1;
4133 
4134 		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
4135 		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
4136 		if (resourcevp)
4137 			VN_RELE(resourcevp);
4138 		break;
4139 
4140 	case ZFS_SMB_ACL_REMOVE:
4141 		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
4142 		    NULL, 0);
4143 		break;
4144 
4145 	case ZFS_SMB_ACL_RENAME:
4146 		if ((error = get_nvlist(zc->zc_nvlist_src,
4147 		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
4148 			VN_RELE(vp);
4149 			ZFS_EXIT(zfsvfs);
4150 			return (error);
4151 		}
4152 		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
4153 		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
4154 		    &target)) {
4155 			VN_RELE(vp);
4156 			VN_RELE(ZTOV(sharedir));
4157 			ZFS_EXIT(zfsvfs);
4158 			nvlist_free(nvlist);
4159 			return (error);
4160 		}
4161 		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
4162 		    kcred, NULL, 0);
4163 		nvlist_free(nvlist);
4164 		break;
4165 
4166 	case ZFS_SMB_ACL_PURGE:
4167 		error = zfs_smb_acl_purge(sharedir);
4168 		break;
4169 
4170 	default:
4171 		error = EINVAL;
4172 		break;
4173 	}
4174 
4175 	VN_RELE(vp);
4176 	VN_RELE(ZTOV(sharedir));
4177 
4178 	ZFS_EXIT(zfsvfs);
4179 
4180 	return (error);
4181 }
4182 
4183 /*
4184  * inputs:
4185  * zc_name	name of filesystem
4186  * zc_value	short name of snap
4187  * zc_string	user-supplied tag for this reference
4188  * zc_cookie	recursive flag
4189  * zc_temphold	set if hold is temporary
4190  *
4191  * outputs:		none
4192  */
4193 static int
4194 zfs_ioc_hold(zfs_cmd_t *zc)
4195 {
4196 	boolean_t recursive = zc->zc_cookie;
4197 
4198 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4199 		return (EINVAL);
4200 
4201 	return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
4202 	    zc->zc_string, recursive, zc->zc_temphold));
4203 }
4204 
4205 /*
4206  * inputs:
4207  * zc_name	name of dataset from which we're releasing a user reference
4208  * zc_value	short name of snap
4209  * zc_string	user-supplied tag for this reference
4210  * zc_cookie	recursive flag
4211  *
4212  * outputs:		none
4213  */
4214 static int
4215 zfs_ioc_release(zfs_cmd_t *zc)
4216 {
4217 	boolean_t recursive = zc->zc_cookie;
4218 
4219 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4220 		return (EINVAL);
4221 
4222 	return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
4223 	    zc->zc_string, recursive));
4224 }
4225 
4226 /*
4227  * inputs:
4228  * zc_name		name of filesystem
4229  *
4230  * outputs:
4231  * zc_nvlist_src{_size}	nvlist of snapshot holds
4232  */
4233 static int
4234 zfs_ioc_get_holds(zfs_cmd_t *zc)
4235 {
4236 	nvlist_t *nvp;
4237 	int error;
4238 
4239 	if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
4240 		error = put_nvlist(zc, nvp);
4241 		nvlist_free(nvp);
4242 	}
4243 
4244 	return (error);
4245 }
4246 
4247 /*
4248  * pool create, destroy, and export don't log the history as part of
4249  * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
4250  * do the logging of those commands.
4251  */
4252 static zfs_ioc_vec_t zfs_ioc_vec[] = {
4253 	{ zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4254 	    B_FALSE },
4255 	{ zfs_ioc_pool_destroy,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4256 	    B_FALSE },
4257 	{ zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4258 	    B_FALSE },
4259 	{ zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4260 	    B_FALSE },
4261 	{ zfs_ioc_pool_configs,	zfs_secpolicy_none, NO_NAME, B_FALSE,
4262 	    B_FALSE },
4263 	{ zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4264 	    B_FALSE },
4265 	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
4266 	    B_FALSE },
4267 	{ zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4268 	    B_TRUE },
4269 	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
4270 	    B_FALSE },
4271 	{ zfs_ioc_pool_upgrade,	zfs_secpolicy_config, POOL_NAME, B_TRUE,
4272 	    B_TRUE },
4273 	{ zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4274 	    B_FALSE },
4275 	{ zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4276 	    B_TRUE },
4277 	{ zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4278 	    B_TRUE },
4279 	{ zfs_ioc_vdev_set_state, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
4280 	    B_FALSE },
4281 	{ zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4282 	    B_TRUE },
4283 	{ zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4284 	    B_TRUE },
4285 	{ zfs_ioc_vdev_setpath,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4286 	    B_TRUE },
4287 	{ zfs_ioc_vdev_setfru,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
4288 	    B_TRUE },
4289 	{ zfs_ioc_objset_stats,	zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4290 	    B_TRUE },
4291 	{ zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4292 	    B_FALSE },
4293 	{ zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4294 	    B_TRUE },
4295 	{ zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4296 	    B_TRUE },
4297 	{ zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE, B_TRUE },
4298 	{ zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE, B_TRUE },
4299 	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE,
4300 	    B_TRUE},
4301 	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE,
4302 	    B_TRUE },
4303 	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE, B_TRUE },
4304 	{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, B_TRUE },
4305 	{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE, B_FALSE },
4306 	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE,
4307 	    B_FALSE },
4308 	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4309 	    B_FALSE },
4310 	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4311 	    B_FALSE },
4312 	{ zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE,
4313 	    B_FALSE },
4314 	{ zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, B_FALSE },
4315 	{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
4316 	    B_TRUE },
4317 	{ zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, DATASET_NAME,
4318 	    B_TRUE, B_TRUE },
4319 	{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
4320 	    B_TRUE },
4321 	{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4322 	    B_FALSE },
4323 	{ zfs_ioc_obj_to_path, zfs_secpolicy_config, DATASET_NAME, B_FALSE,
4324 	    B_TRUE },
4325 	{ zfs_ioc_pool_set_props, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
4326 	    B_TRUE },
4327 	{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4328 	    B_FALSE },
4329 	{ zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE,
4330 	    B_TRUE },
4331 	{ zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4332 	    B_FALSE },
4333 	{ zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE, B_FALSE },
4334 	{ zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
4335 	    B_TRUE },
4336 	{ zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
4337 	    B_FALSE },
4338 	{ zfs_ioc_userspace_one, zfs_secpolicy_userspace_one,
4339 	    DATASET_NAME, B_FALSE, B_FALSE },
4340 	{ zfs_ioc_userspace_many, zfs_secpolicy_userspace_many,
4341 	    DATASET_NAME, B_FALSE, B_FALSE },
4342 	{ zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
4343 	    DATASET_NAME, B_FALSE, B_TRUE },
4344 	{ zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, B_TRUE },
4345 	{ zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
4346 	    B_TRUE },
4347 	{ zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4348 	    B_TRUE },
4349 	{ zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4350 	    B_FALSE },
4351 	{ zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4352 	    B_TRUE }
4353 };
4354 
4355 int
4356 pool_status_check(const char *name, zfs_ioc_namecheck_t type)
4357 {
4358 	spa_t *spa;
4359 	int error;
4360 
4361 	ASSERT(type == POOL_NAME || type == DATASET_NAME);
4362 
4363 	error = spa_open(name, &spa, FTAG);
4364 	if (error == 0) {
4365 		if (spa_suspended(spa))
4366 			error = EAGAIN;
4367 		spa_close(spa, FTAG);
4368 	}
4369 	return (error);
4370 }
4371 
4372 static int
4373 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
4374 {
4375 	zfs_cmd_t *zc;
4376 	uint_t vec;
4377 	int error, rc;
4378 
4379 	if (getminor(dev) != 0)
4380 		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
4381 
4382 	vec = cmd - ZFS_IOC;
4383 	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
4384 
4385 	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
4386 		return (EINVAL);
4387 
4388 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
4389 
4390 	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
4391 	if (error != 0)
4392 		error = EFAULT;
4393 
4394 	if ((error == 0) && !(flag & FKIOCTL))
4395 		error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr);
4396 
4397 	/*
4398 	 * Ensure that all pool/dataset names are valid before we pass down to
4399 	 * the lower layers.
4400 	 */
4401 	if (error == 0) {
4402 		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
4403 		zc->zc_iflags = flag & FKIOCTL;
4404 		switch (zfs_ioc_vec[vec].zvec_namecheck) {
4405 		case POOL_NAME:
4406 			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
4407 				error = EINVAL;
4408 			if (zfs_ioc_vec[vec].zvec_pool_check)
4409 				error = pool_status_check(zc->zc_name,
4410 				    zfs_ioc_vec[vec].zvec_namecheck);
4411 			break;
4412 
4413 		case DATASET_NAME:
4414 			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
4415 				error = EINVAL;
4416 			if (zfs_ioc_vec[vec].zvec_pool_check)
4417 				error = pool_status_check(zc->zc_name,
4418 				    zfs_ioc_vec[vec].zvec_namecheck);
4419 			break;
4420 
4421 		case NO_NAME:
4422 			break;
4423 		}
4424 	}
4425 
4426 	if (error == 0)
4427 		error = zfs_ioc_vec[vec].zvec_func(zc);
4428 
4429 	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
4430 	if (error == 0) {
4431 		if (rc != 0)
4432 			error = EFAULT;
4433 		if (zfs_ioc_vec[vec].zvec_his_log)
4434 			zfs_log_history(zc);
4435 	}
4436 
4437 	kmem_free(zc, sizeof (zfs_cmd_t));
4438 	return (error);
4439 }
4440 
4441 static int
4442 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4443 {
4444 	if (cmd != DDI_ATTACH)
4445 		return (DDI_FAILURE);
4446 
4447 	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
4448 	    DDI_PSEUDO, 0) == DDI_FAILURE)
4449 		return (DDI_FAILURE);
4450 
4451 	zfs_dip = dip;
4452 
4453 	ddi_report_dev(dip);
4454 
4455 	return (DDI_SUCCESS);
4456 }
4457 
4458 static int
4459 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
4460 {
4461 	if (spa_busy() || zfs_busy() || zvol_busy())
4462 		return (DDI_FAILURE);
4463 
4464 	if (cmd != DDI_DETACH)
4465 		return (DDI_FAILURE);
4466 
4467 	zfs_dip = NULL;
4468 
4469 	ddi_prop_remove_all(dip);
4470 	ddi_remove_minor_node(dip, NULL);
4471 
4472 	return (DDI_SUCCESS);
4473 }
4474 
4475 /*ARGSUSED*/
4476 static int
4477 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
4478 {
4479 	switch (infocmd) {
4480 	case DDI_INFO_DEVT2DEVINFO:
4481 		*result = zfs_dip;
4482 		return (DDI_SUCCESS);
4483 
4484 	case DDI_INFO_DEVT2INSTANCE:
4485 		*result = (void *)0;
4486 		return (DDI_SUCCESS);
4487 	}
4488 
4489 	return (DDI_FAILURE);
4490 }
4491 
4492 /*
4493  * OK, so this is a little weird.
4494  *
4495  * /dev/zfs is the control node, i.e. minor 0.
4496  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
4497  *
4498  * /dev/zfs has basically nothing to do except serve up ioctls,
4499  * so most of the standard driver entry points are in zvol.c.
4500  */
4501 static struct cb_ops zfs_cb_ops = {
4502 	zvol_open,	/* open */
4503 	zvol_close,	/* close */
4504 	zvol_strategy,	/* strategy */
4505 	nodev,		/* print */
4506 	zvol_dump,	/* dump */
4507 	zvol_read,	/* read */
4508 	zvol_write,	/* write */
4509 	zfsdev_ioctl,	/* ioctl */
4510 	nodev,		/* devmap */
4511 	nodev,		/* mmap */
4512 	nodev,		/* segmap */
4513 	nochpoll,	/* poll */
4514 	ddi_prop_op,	/* prop_op */
4515 	NULL,		/* streamtab */
4516 	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
4517 	CB_REV,		/* version */
4518 	nodev,		/* async read */
4519 	nodev,		/* async write */
4520 };
4521 
4522 static struct dev_ops zfs_dev_ops = {
4523 	DEVO_REV,	/* version */
4524 	0,		/* refcnt */
4525 	zfs_info,	/* info */
4526 	nulldev,	/* identify */
4527 	nulldev,	/* probe */
4528 	zfs_attach,	/* attach */
4529 	zfs_detach,	/* detach */
4530 	nodev,		/* reset */
4531 	&zfs_cb_ops,	/* driver operations */
4532 	NULL,		/* no bus operations */
4533 	NULL,		/* power */
4534 	ddi_quiesce_not_needed,	/* quiesce */
4535 };
4536 
4537 static struct modldrv zfs_modldrv = {
4538 	&mod_driverops,
4539 	"ZFS storage pool",
4540 	&zfs_dev_ops
4541 };
4542 
4543 static struct modlinkage modlinkage = {
4544 	MODREV_1,
4545 	(void *)&zfs_modlfs,
4546 	(void *)&zfs_modldrv,
4547 	NULL
4548 };
4549 
4550 
4551 uint_t zfs_fsyncer_key;
4552 extern uint_t rrw_tsd_key;
4553 
4554 int
4555 _init(void)
4556 {
4557 	int error;
4558 
4559 	spa_init(FREAD | FWRITE);
4560 	zfs_init();
4561 	zvol_init();
4562 
4563 	if ((error = mod_install(&modlinkage)) != 0) {
4564 		zvol_fini();
4565 		zfs_fini();
4566 		spa_fini();
4567 		return (error);
4568 	}
4569 
4570 	tsd_create(&zfs_fsyncer_key, NULL);
4571 	tsd_create(&rrw_tsd_key, NULL);
4572 
4573 	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
4574 	ASSERT(error == 0);
4575 	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
4576 
4577 	return (0);
4578 }
4579 
4580 int
4581 _fini(void)
4582 {
4583 	int error;
4584 
4585 	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
4586 		return (EBUSY);
4587 
4588 	if ((error = mod_remove(&modlinkage)) != 0)
4589 		return (error);
4590 
4591 	zvol_fini();
4592 	zfs_fini();
4593 	spa_fini();
4594 	if (zfs_nfsshare_inited)
4595 		(void) ddi_modclose(nfs_mod);
4596 	if (zfs_smbshare_inited)
4597 		(void) ddi_modclose(smbsrv_mod);
4598 	if (zfs_nfsshare_inited || zfs_smbshare_inited)
4599 		(void) ddi_modclose(sharefs_mod);
4600 
4601 	tsd_destroy(&zfs_fsyncer_key);
4602 	ldi_ident_release(zfs_li);
4603 	zfs_li = NULL;
4604 	mutex_destroy(&zfs_share_lock);
4605 
4606 	return (error);
4607 }
4608 
4609 int
4610 _info(struct modinfo *modinfop)
4611 {
4612 	return (mod_info(&modlinkage, modinfop));
4613 }
4614