xref: /titanic_50/usr/src/uts/common/fs/zfs/zfs_ioctl.c (revision ac83d6047a381a86653d795f3dea67ae64eb5549)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/errno.h>
29 #include <sys/uio.h>
30 #include <sys/buf.h>
31 #include <sys/modctl.h>
32 #include <sys/open.h>
33 #include <sys/file.h>
34 #include <sys/kmem.h>
35 #include <sys/conf.h>
36 #include <sys/cmn_err.h>
37 #include <sys/stat.h>
38 #include <sys/zfs_ioctl.h>
39 #include <sys/zfs_znode.h>
40 #include <sys/zap.h>
41 #include <sys/spa.h>
42 #include <sys/spa_impl.h>
43 #include <sys/vdev.h>
44 #include <sys/vdev_impl.h>
45 #include <sys/dmu.h>
46 #include <sys/dsl_dir.h>
47 #include <sys/dsl_dataset.h>
48 #include <sys/dsl_prop.h>
49 #include <sys/dsl_deleg.h>
50 #include <sys/dmu_objset.h>
51 #include <sys/ddi.h>
52 #include <sys/sunddi.h>
53 #include <sys/sunldi.h>
54 #include <sys/policy.h>
55 #include <sys/zone.h>
56 #include <sys/nvpair.h>
57 #include <sys/pathname.h>
58 #include <sys/mount.h>
59 #include <sys/sdt.h>
60 #include <sys/fs/zfs.h>
61 #include <sys/zfs_ctldir.h>
62 #include <sys/zfs_dir.h>
63 #include <sys/zvol.h>
64 #include <sharefs/share.h>
65 #include <sys/dmu_objset.h>
66 
67 #include "zfs_namecheck.h"
68 #include "zfs_prop.h"
69 #include "zfs_deleg.h"
70 
71 extern struct modlfs zfs_modlfs;
72 
73 extern void zfs_init(void);
74 extern void zfs_fini(void);
75 
76 ldi_ident_t zfs_li = NULL;
77 dev_info_t *zfs_dip;
78 
79 typedef int zfs_ioc_func_t(zfs_cmd_t *);
80 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
81 
82 typedef enum {
83 	NO_NAME,
84 	POOL_NAME,
85 	DATASET_NAME
86 } zfs_ioc_namecheck_t;
87 
88 typedef struct zfs_ioc_vec {
89 	zfs_ioc_func_t		*zvec_func;
90 	zfs_secpolicy_func_t	*zvec_secpolicy;
91 	zfs_ioc_namecheck_t	zvec_namecheck;
92 	boolean_t		zvec_his_log;
93 	boolean_t		zvec_pool_check;
94 } zfs_ioc_vec_t;
95 
96 /* This array is indexed by zfs_userquota_prop_t */
97 static const char *userquota_perms[] = {
98 	ZFS_DELEG_PERM_USERUSED,
99 	ZFS_DELEG_PERM_USERQUOTA,
100 	ZFS_DELEG_PERM_GROUPUSED,
101 	ZFS_DELEG_PERM_GROUPQUOTA,
102 };
103 
104 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
105 static void clear_props(char *dataset, nvlist_t *props, nvlist_t *newprops);
106 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
107     boolean_t *);
108 int zfs_set_prop_nvlist(const char *, nvlist_t *);
109 
110 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
111 void
112 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
113 {
114 	const char *newfile;
115 	char buf[256];
116 	va_list adx;
117 
118 	/*
119 	 * Get rid of annoying "../common/" prefix to filename.
120 	 */
121 	newfile = strrchr(file, '/');
122 	if (newfile != NULL) {
123 		newfile = newfile + 1; /* Get rid of leading / */
124 	} else {
125 		newfile = file;
126 	}
127 
128 	va_start(adx, fmt);
129 	(void) vsnprintf(buf, sizeof (buf), fmt, adx);
130 	va_end(adx);
131 
132 	/*
133 	 * To get this data, use the zfs-dprintf probe as so:
134 	 * dtrace -q -n 'zfs-dprintf \
135 	 *	/stringof(arg0) == "dbuf.c"/ \
136 	 *	{printf("%s: %s", stringof(arg1), stringof(arg3))}'
137 	 * arg0 = file name
138 	 * arg1 = function name
139 	 * arg2 = line number
140 	 * arg3 = message
141 	 */
142 	DTRACE_PROBE4(zfs__dprintf,
143 	    char *, newfile, char *, func, int, line, char *, buf);
144 }
145 
146 static void
147 history_str_free(char *buf)
148 {
149 	kmem_free(buf, HIS_MAX_RECORD_LEN);
150 }
151 
152 static char *
153 history_str_get(zfs_cmd_t *zc)
154 {
155 	char *buf;
156 
157 	if (zc->zc_history == NULL)
158 		return (NULL);
159 
160 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
161 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
162 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
163 		history_str_free(buf);
164 		return (NULL);
165 	}
166 
167 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
168 
169 	return (buf);
170 }
171 
172 /*
173  * Check to see if the named dataset is currently defined as bootable
174  */
175 static boolean_t
176 zfs_is_bootfs(const char *name)
177 {
178 	objset_t *os;
179 
180 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
181 		boolean_t ret;
182 		ret = (dmu_objset_id(os) == dmu_objset_spa(os)->spa_bootfs);
183 		dmu_objset_rele(os, FTAG);
184 		return (ret);
185 	}
186 	return (B_FALSE);
187 }
188 
189 /*
190  * zfs_earlier_version
191  *
192  *	Return non-zero if the spa version is less than requested version.
193  */
194 static int
195 zfs_earlier_version(const char *name, int version)
196 {
197 	spa_t *spa;
198 
199 	if (spa_open(name, &spa, FTAG) == 0) {
200 		if (spa_version(spa) < version) {
201 			spa_close(spa, FTAG);
202 			return (1);
203 		}
204 		spa_close(spa, FTAG);
205 	}
206 	return (0);
207 }
208 
209 /*
210  * zpl_earlier_version
211  *
212  * Return TRUE if the ZPL version is less than requested version.
213  */
214 static boolean_t
215 zpl_earlier_version(const char *name, int version)
216 {
217 	objset_t *os;
218 	boolean_t rc = B_TRUE;
219 
220 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
221 		uint64_t zplversion;
222 
223 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
224 			dmu_objset_rele(os, FTAG);
225 			return (B_TRUE);
226 		}
227 		/* XXX reading from non-owned objset */
228 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
229 			rc = zplversion < version;
230 		dmu_objset_rele(os, FTAG);
231 	}
232 	return (rc);
233 }
234 
235 static void
236 zfs_log_history(zfs_cmd_t *zc)
237 {
238 	spa_t *spa;
239 	char *buf;
240 
241 	if ((buf = history_str_get(zc)) == NULL)
242 		return;
243 
244 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
245 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
246 			(void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
247 		spa_close(spa, FTAG);
248 	}
249 	history_str_free(buf);
250 }
251 
252 /*
253  * Policy for top-level read operations (list pools).  Requires no privileges,
254  * and can be used in the local zone, as there is no associated dataset.
255  */
256 /* ARGSUSED */
257 static int
258 zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
259 {
260 	return (0);
261 }
262 
263 /*
264  * Policy for dataset read operations (list children, get statistics).  Requires
265  * no privileges, but must be visible in the local zone.
266  */
267 /* ARGSUSED */
268 static int
269 zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
270 {
271 	if (INGLOBALZONE(curproc) ||
272 	    zone_dataset_visible(zc->zc_name, NULL))
273 		return (0);
274 
275 	return (ENOENT);
276 }
277 
278 static int
279 zfs_dozonecheck(const char *dataset, cred_t *cr)
280 {
281 	uint64_t zoned;
282 	int writable = 1;
283 
284 	/*
285 	 * The dataset must be visible by this zone -- check this first
286 	 * so they don't see EPERM on something they shouldn't know about.
287 	 */
288 	if (!INGLOBALZONE(curproc) &&
289 	    !zone_dataset_visible(dataset, &writable))
290 		return (ENOENT);
291 
292 	if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
293 		return (ENOENT);
294 
295 	if (INGLOBALZONE(curproc)) {
296 		/*
297 		 * If the fs is zoned, only root can access it from the
298 		 * global zone.
299 		 */
300 		if (secpolicy_zfs(cr) && zoned)
301 			return (EPERM);
302 	} else {
303 		/*
304 		 * If we are in a local zone, the 'zoned' property must be set.
305 		 */
306 		if (!zoned)
307 			return (EPERM);
308 
309 		/* must be writable by this zone */
310 		if (!writable)
311 			return (EPERM);
312 	}
313 	return (0);
314 }
315 
316 int
317 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
318 {
319 	int error;
320 
321 	error = zfs_dozonecheck(name, cr);
322 	if (error == 0) {
323 		error = secpolicy_zfs(cr);
324 		if (error)
325 			error = dsl_deleg_access(name, perm, cr);
326 	}
327 	return (error);
328 }
329 
330 static int
331 zfs_secpolicy_setprop(const char *name, zfs_prop_t prop, cred_t *cr)
332 {
333 	/*
334 	 * Check permissions for special properties.
335 	 */
336 	switch (prop) {
337 	case ZFS_PROP_ZONED:
338 		/*
339 		 * Disallow setting of 'zoned' from within a local zone.
340 		 */
341 		if (!INGLOBALZONE(curproc))
342 			return (EPERM);
343 		break;
344 
345 	case ZFS_PROP_QUOTA:
346 		if (!INGLOBALZONE(curproc)) {
347 			uint64_t zoned;
348 			char setpoint[MAXNAMELEN];
349 			/*
350 			 * Unprivileged users are allowed to modify the
351 			 * quota on things *under* (ie. contained by)
352 			 * the thing they own.
353 			 */
354 			if (dsl_prop_get_integer(name, "zoned", &zoned,
355 			    setpoint))
356 				return (EPERM);
357 			if (!zoned || strlen(name) <= strlen(setpoint))
358 				return (EPERM);
359 		}
360 		break;
361 	}
362 
363 	return (zfs_secpolicy_write_perms(name, zfs_prop_to_name(prop), cr));
364 }
365 
366 int
367 zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
368 {
369 	int error;
370 
371 	error = zfs_dozonecheck(zc->zc_name, cr);
372 	if (error)
373 		return (error);
374 
375 	/*
376 	 * permission to set permissions will be evaluated later in
377 	 * dsl_deleg_can_allow()
378 	 */
379 	return (0);
380 }
381 
382 int
383 zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
384 {
385 	int error;
386 	error = zfs_secpolicy_write_perms(zc->zc_name,
387 	    ZFS_DELEG_PERM_ROLLBACK, cr);
388 	if (error == 0)
389 		error = zfs_secpolicy_write_perms(zc->zc_name,
390 		    ZFS_DELEG_PERM_MOUNT, cr);
391 	return (error);
392 }
393 
394 int
395 zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
396 {
397 	return (zfs_secpolicy_write_perms(zc->zc_name,
398 	    ZFS_DELEG_PERM_SEND, cr));
399 }
400 
401 static int
402 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
403 {
404 	vnode_t *vp;
405 	int error;
406 
407 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
408 	    NO_FOLLOW, NULL, &vp)) != 0)
409 		return (error);
410 
411 	/* Now make sure mntpnt and dataset are ZFS */
412 
413 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
414 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
415 	    zc->zc_name) != 0)) {
416 		VN_RELE(vp);
417 		return (EPERM);
418 	}
419 
420 	VN_RELE(vp);
421 	return (dsl_deleg_access(zc->zc_name,
422 	    ZFS_DELEG_PERM_SHARE, cr));
423 }
424 
425 int
426 zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
427 {
428 	if (!INGLOBALZONE(curproc))
429 		return (EPERM);
430 
431 	if (secpolicy_nfs(cr) == 0) {
432 		return (0);
433 	} else {
434 		return (zfs_secpolicy_deleg_share(zc, cr));
435 	}
436 }
437 
438 int
439 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
440 {
441 	if (!INGLOBALZONE(curproc))
442 		return (EPERM);
443 
444 	if (secpolicy_smb(cr) == 0) {
445 		return (0);
446 	} else {
447 		return (zfs_secpolicy_deleg_share(zc, cr));
448 	}
449 }
450 
451 static int
452 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
453 {
454 	char *cp;
455 
456 	/*
457 	 * Remove the @bla or /bla from the end of the name to get the parent.
458 	 */
459 	(void) strncpy(parent, datasetname, parentsize);
460 	cp = strrchr(parent, '@');
461 	if (cp != NULL) {
462 		cp[0] = '\0';
463 	} else {
464 		cp = strrchr(parent, '/');
465 		if (cp == NULL)
466 			return (ENOENT);
467 		cp[0] = '\0';
468 	}
469 
470 	return (0);
471 }
472 
473 int
474 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
475 {
476 	int error;
477 
478 	if ((error = zfs_secpolicy_write_perms(name,
479 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
480 		return (error);
481 
482 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
483 }
484 
485 static int
486 zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
487 {
488 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
489 }
490 
491 /*
492  * Must have sys_config privilege to check the iscsi permission
493  */
494 /* ARGSUSED */
495 static int
496 zfs_secpolicy_iscsi(zfs_cmd_t *zc, cred_t *cr)
497 {
498 	return (secpolicy_zfs(cr));
499 }
500 
501 int
502 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
503 {
504 	char 	parentname[MAXNAMELEN];
505 	int	error;
506 
507 	if ((error = zfs_secpolicy_write_perms(from,
508 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
509 		return (error);
510 
511 	if ((error = zfs_secpolicy_write_perms(from,
512 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
513 		return (error);
514 
515 	if ((error = zfs_get_parent(to, parentname,
516 	    sizeof (parentname))) != 0)
517 		return (error);
518 
519 	if ((error = zfs_secpolicy_write_perms(parentname,
520 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
521 		return (error);
522 
523 	if ((error = zfs_secpolicy_write_perms(parentname,
524 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
525 		return (error);
526 
527 	return (error);
528 }
529 
530 static int
531 zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
532 {
533 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
534 }
535 
536 static int
537 zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
538 {
539 	char 	parentname[MAXNAMELEN];
540 	objset_t *clone;
541 	int error;
542 
543 	error = zfs_secpolicy_write_perms(zc->zc_name,
544 	    ZFS_DELEG_PERM_PROMOTE, cr);
545 	if (error)
546 		return (error);
547 
548 	error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
549 
550 	if (error == 0) {
551 		dsl_dataset_t *pclone = NULL;
552 		dsl_dir_t *dd;
553 		dd = clone->os_dsl_dataset->ds_dir;
554 
555 		rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
556 		error = dsl_dataset_hold_obj(dd->dd_pool,
557 		    dd->dd_phys->dd_origin_obj, FTAG, &pclone);
558 		rw_exit(&dd->dd_pool->dp_config_rwlock);
559 		if (error) {
560 			dmu_objset_rele(clone, FTAG);
561 			return (error);
562 		}
563 
564 		error = zfs_secpolicy_write_perms(zc->zc_name,
565 		    ZFS_DELEG_PERM_MOUNT, cr);
566 
567 		dsl_dataset_name(pclone, parentname);
568 		dmu_objset_rele(clone, FTAG);
569 		dsl_dataset_rele(pclone, FTAG);
570 		if (error == 0)
571 			error = zfs_secpolicy_write_perms(parentname,
572 			    ZFS_DELEG_PERM_PROMOTE, cr);
573 	}
574 	return (error);
575 }
576 
577 static int
578 zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
579 {
580 	int error;
581 
582 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
583 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
584 		return (error);
585 
586 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
587 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
588 		return (error);
589 
590 	return (zfs_secpolicy_write_perms(zc->zc_name,
591 	    ZFS_DELEG_PERM_CREATE, cr));
592 }
593 
594 int
595 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
596 {
597 	int error;
598 
599 	if ((error = zfs_secpolicy_write_perms(name,
600 	    ZFS_DELEG_PERM_SNAPSHOT, cr)) != 0)
601 		return (error);
602 
603 	error = zfs_secpolicy_write_perms(name,
604 	    ZFS_DELEG_PERM_MOUNT, cr);
605 
606 	return (error);
607 }
608 
609 static int
610 zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
611 {
612 
613 	return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
614 }
615 
616 static int
617 zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
618 {
619 	char 	parentname[MAXNAMELEN];
620 	int 	error;
621 
622 	if ((error = zfs_get_parent(zc->zc_name, parentname,
623 	    sizeof (parentname))) != 0)
624 		return (error);
625 
626 	if (zc->zc_value[0] != '\0') {
627 		if ((error = zfs_secpolicy_write_perms(zc->zc_value,
628 		    ZFS_DELEG_PERM_CLONE, cr)) != 0)
629 			return (error);
630 	}
631 
632 	if ((error = zfs_secpolicy_write_perms(parentname,
633 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
634 		return (error);
635 
636 	error = zfs_secpolicy_write_perms(parentname,
637 	    ZFS_DELEG_PERM_MOUNT, cr);
638 
639 	return (error);
640 }
641 
642 static int
643 zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
644 {
645 	int error;
646 
647 	error = secpolicy_fs_unmount(cr, NULL);
648 	if (error) {
649 		error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
650 	}
651 	return (error);
652 }
653 
654 /*
655  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
656  * SYS_CONFIG privilege, which is not available in a local zone.
657  */
658 /* ARGSUSED */
659 static int
660 zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
661 {
662 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
663 		return (EPERM);
664 
665 	return (0);
666 }
667 
668 /*
669  * Just like zfs_secpolicy_config, except that we will check for
670  * mount permission on the dataset for permission to create/remove
671  * the minor nodes.
672  */
673 static int
674 zfs_secpolicy_minor(zfs_cmd_t *zc, cred_t *cr)
675 {
676 	if (secpolicy_sys_config(cr, B_FALSE) != 0) {
677 		return (dsl_deleg_access(zc->zc_name,
678 		    ZFS_DELEG_PERM_MOUNT, cr));
679 	}
680 
681 	return (0);
682 }
683 
684 /*
685  * Policy for fault injection.  Requires all privileges.
686  */
687 /* ARGSUSED */
688 static int
689 zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
690 {
691 	return (secpolicy_zinject(cr));
692 }
693 
694 static int
695 zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
696 {
697 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
698 
699 	if (prop == ZPROP_INVAL) {
700 		if (!zfs_prop_user(zc->zc_value))
701 			return (EINVAL);
702 		return (zfs_secpolicy_write_perms(zc->zc_name,
703 		    ZFS_DELEG_PERM_USERPROP, cr));
704 	} else {
705 		if (!zfs_prop_inheritable(prop))
706 			return (EINVAL);
707 		return (zfs_secpolicy_setprop(zc->zc_name, prop, cr));
708 	}
709 }
710 
711 static int
712 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
713 {
714 	int err = zfs_secpolicy_read(zc, cr);
715 	if (err)
716 		return (err);
717 
718 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
719 		return (EINVAL);
720 
721 	if (zc->zc_value[0] == 0) {
722 		/*
723 		 * They are asking about a posix uid/gid.  If it's
724 		 * themself, allow it.
725 		 */
726 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
727 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
728 			if (zc->zc_guid == crgetuid(cr))
729 				return (0);
730 		} else {
731 			if (groupmember(zc->zc_guid, cr))
732 				return (0);
733 		}
734 	}
735 
736 	return (zfs_secpolicy_write_perms(zc->zc_name,
737 	    userquota_perms[zc->zc_objset_type], cr));
738 }
739 
740 static int
741 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
742 {
743 	int err = zfs_secpolicy_read(zc, cr);
744 	if (err)
745 		return (err);
746 
747 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
748 		return (EINVAL);
749 
750 	return (zfs_secpolicy_write_perms(zc->zc_name,
751 	    userquota_perms[zc->zc_objset_type], cr));
752 }
753 
754 static int
755 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
756 {
757 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION, cr));
758 }
759 
760 static int
761 zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
762 {
763 	return (zfs_secpolicy_write_perms(zc->zc_name,
764 	    ZFS_DELEG_PERM_HOLD, cr));
765 }
766 
767 static int
768 zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
769 {
770 	return (zfs_secpolicy_write_perms(zc->zc_name,
771 	    ZFS_DELEG_PERM_RELEASE, cr));
772 }
773 
774 /*
775  * Returns the nvlist as specified by the user in the zfs_cmd_t.
776  */
777 static int
778 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
779 {
780 	char *packed;
781 	int error;
782 	nvlist_t *list = NULL;
783 
784 	/*
785 	 * Read in and unpack the user-supplied nvlist.
786 	 */
787 	if (size == 0)
788 		return (EINVAL);
789 
790 	packed = kmem_alloc(size, KM_SLEEP);
791 
792 	if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
793 	    iflag)) != 0) {
794 		kmem_free(packed, size);
795 		return (error);
796 	}
797 
798 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
799 		kmem_free(packed, size);
800 		return (error);
801 	}
802 
803 	kmem_free(packed, size);
804 
805 	*nvp = list;
806 	return (0);
807 }
808 
809 static int
810 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
811 {
812 	char *packed = NULL;
813 	size_t size;
814 	int error;
815 
816 	VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
817 
818 	if (size > zc->zc_nvlist_dst_size) {
819 		error = ENOMEM;
820 	} else {
821 		packed = kmem_alloc(size, KM_SLEEP);
822 		VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
823 		    KM_SLEEP) == 0);
824 		error = ddi_copyout(packed,
825 		    (void *)(uintptr_t)zc->zc_nvlist_dst, size, zc->zc_iflags);
826 		kmem_free(packed, size);
827 	}
828 
829 	zc->zc_nvlist_dst_size = size;
830 	return (error);
831 }
832 
833 static int
834 getzfsvfs(const char *dsname, zfsvfs_t **zvp)
835 {
836 	objset_t *os;
837 	int error;
838 
839 	error = dmu_objset_hold(dsname, FTAG, &os);
840 	if (error)
841 		return (error);
842 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
843 		dmu_objset_rele(os, FTAG);
844 		return (EINVAL);
845 	}
846 
847 	mutex_enter(&os->os_user_ptr_lock);
848 	*zvp = dmu_objset_get_user(os);
849 	if (*zvp) {
850 		VFS_HOLD((*zvp)->z_vfs);
851 	} else {
852 		error = ESRCH;
853 	}
854 	mutex_exit(&os->os_user_ptr_lock);
855 	dmu_objset_rele(os, FTAG);
856 	return (error);
857 }
858 
859 /*
860  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
861  * case its z_vfs will be NULL, and it will be opened as the owner.
862  */
863 static int
864 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zvp)
865 {
866 	int error = 0;
867 
868 	if (getzfsvfs(name, zvp) != 0)
869 		error = zfsvfs_create(name, zvp);
870 	if (error == 0) {
871 		rrw_enter(&(*zvp)->z_teardown_lock, RW_READER, tag);
872 		if ((*zvp)->z_unmounted) {
873 			/*
874 			 * XXX we could probably try again, since the unmounting
875 			 * thread should be just about to disassociate the
876 			 * objset from the zfsvfs.
877 			 */
878 			rrw_exit(&(*zvp)->z_teardown_lock, tag);
879 			return (EBUSY);
880 		}
881 	}
882 	return (error);
883 }
884 
885 static void
886 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
887 {
888 	rrw_exit(&zfsvfs->z_teardown_lock, tag);
889 
890 	if (zfsvfs->z_vfs) {
891 		VFS_RELE(zfsvfs->z_vfs);
892 	} else {
893 		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
894 		zfsvfs_free(zfsvfs);
895 	}
896 }
897 
898 static int
899 zfs_ioc_pool_create(zfs_cmd_t *zc)
900 {
901 	int error;
902 	nvlist_t *config, *props = NULL;
903 	nvlist_t *rootprops = NULL;
904 	nvlist_t *zplprops = NULL;
905 	char *buf;
906 
907 	if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
908 	    zc->zc_iflags, &config))
909 		return (error);
910 
911 	if (zc->zc_nvlist_src_size != 0 && (error =
912 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
913 	    zc->zc_iflags, &props))) {
914 		nvlist_free(config);
915 		return (error);
916 	}
917 
918 	if (props) {
919 		nvlist_t *nvl = NULL;
920 		uint64_t version = SPA_VERSION;
921 
922 		(void) nvlist_lookup_uint64(props,
923 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
924 		if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) {
925 			error = EINVAL;
926 			goto pool_props_bad;
927 		}
928 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
929 		if (nvl) {
930 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
931 			if (error != 0) {
932 				nvlist_free(config);
933 				nvlist_free(props);
934 				return (error);
935 			}
936 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
937 		}
938 		VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
939 		error = zfs_fill_zplprops_root(version, rootprops,
940 		    zplprops, NULL);
941 		if (error)
942 			goto pool_props_bad;
943 	}
944 
945 	buf = history_str_get(zc);
946 
947 	error = spa_create(zc->zc_name, config, props, buf, zplprops);
948 
949 	/*
950 	 * Set the remaining root properties
951 	 */
952 	if (!error &&
953 	    (error = zfs_set_prop_nvlist(zc->zc_name, rootprops)) != 0)
954 		(void) spa_destroy(zc->zc_name);
955 
956 	if (buf != NULL)
957 		history_str_free(buf);
958 
959 pool_props_bad:
960 	nvlist_free(rootprops);
961 	nvlist_free(zplprops);
962 	nvlist_free(config);
963 	nvlist_free(props);
964 
965 	return (error);
966 }
967 
968 static int
969 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
970 {
971 	int error;
972 	zfs_log_history(zc);
973 	error = spa_destroy(zc->zc_name);
974 	return (error);
975 }
976 
977 static int
978 zfs_ioc_pool_import(zfs_cmd_t *zc)
979 {
980 	int error;
981 	nvlist_t *config, *props = NULL;
982 	uint64_t guid;
983 
984 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
985 	    zc->zc_iflags, &config)) != 0)
986 		return (error);
987 
988 	if (zc->zc_nvlist_src_size != 0 && (error =
989 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
990 	    zc->zc_iflags, &props))) {
991 		nvlist_free(config);
992 		return (error);
993 	}
994 
995 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
996 	    guid != zc->zc_guid)
997 		error = EINVAL;
998 	else if (zc->zc_cookie)
999 		error = spa_import_verbatim(zc->zc_name, config,
1000 		    props);
1001 	else
1002 		error = spa_import(zc->zc_name, config, props);
1003 
1004 	nvlist_free(config);
1005 
1006 	if (props)
1007 		nvlist_free(props);
1008 
1009 	return (error);
1010 }
1011 
1012 static int
1013 zfs_ioc_pool_export(zfs_cmd_t *zc)
1014 {
1015 	int error;
1016 	boolean_t force = (boolean_t)zc->zc_cookie;
1017 	boolean_t hardforce = (boolean_t)zc->zc_guid;
1018 
1019 	zfs_log_history(zc);
1020 	error = spa_export(zc->zc_name, NULL, force, hardforce);
1021 	return (error);
1022 }
1023 
1024 static int
1025 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1026 {
1027 	nvlist_t *configs;
1028 	int error;
1029 
1030 	if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1031 		return (EEXIST);
1032 
1033 	error = put_nvlist(zc, configs);
1034 
1035 	nvlist_free(configs);
1036 
1037 	return (error);
1038 }
1039 
1040 static int
1041 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1042 {
1043 	nvlist_t *config;
1044 	int error;
1045 	int ret = 0;
1046 
1047 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1048 	    sizeof (zc->zc_value));
1049 
1050 	if (config != NULL) {
1051 		ret = put_nvlist(zc, config);
1052 		nvlist_free(config);
1053 
1054 		/*
1055 		 * The config may be present even if 'error' is non-zero.
1056 		 * In this case we return success, and preserve the real errno
1057 		 * in 'zc_cookie'.
1058 		 */
1059 		zc->zc_cookie = error;
1060 	} else {
1061 		ret = error;
1062 	}
1063 
1064 	return (ret);
1065 }
1066 
1067 /*
1068  * Try to import the given pool, returning pool stats as appropriate so that
1069  * user land knows which devices are available and overall pool health.
1070  */
1071 static int
1072 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1073 {
1074 	nvlist_t *tryconfig, *config;
1075 	int error;
1076 
1077 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1078 	    zc->zc_iflags, &tryconfig)) != 0)
1079 		return (error);
1080 
1081 	config = spa_tryimport(tryconfig);
1082 
1083 	nvlist_free(tryconfig);
1084 
1085 	if (config == NULL)
1086 		return (EINVAL);
1087 
1088 	error = put_nvlist(zc, config);
1089 	nvlist_free(config);
1090 
1091 	return (error);
1092 }
1093 
1094 static int
1095 zfs_ioc_pool_scrub(zfs_cmd_t *zc)
1096 {
1097 	spa_t *spa;
1098 	int error;
1099 
1100 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1101 		return (error);
1102 
1103 	error = spa_scrub(spa, zc->zc_cookie);
1104 
1105 	spa_close(spa, FTAG);
1106 
1107 	return (error);
1108 }
1109 
1110 static int
1111 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1112 {
1113 	spa_t *spa;
1114 	int error;
1115 
1116 	error = spa_open(zc->zc_name, &spa, FTAG);
1117 	if (error == 0) {
1118 		spa_freeze(spa);
1119 		spa_close(spa, FTAG);
1120 	}
1121 	return (error);
1122 }
1123 
1124 static int
1125 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1126 {
1127 	spa_t *spa;
1128 	int error;
1129 
1130 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1131 		return (error);
1132 
1133 	if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
1134 		spa_close(spa, FTAG);
1135 		return (EINVAL);
1136 	}
1137 
1138 	spa_upgrade(spa, zc->zc_cookie);
1139 	spa_close(spa, FTAG);
1140 
1141 	return (error);
1142 }
1143 
1144 static int
1145 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1146 {
1147 	spa_t *spa;
1148 	char *hist_buf;
1149 	uint64_t size;
1150 	int error;
1151 
1152 	if ((size = zc->zc_history_len) == 0)
1153 		return (EINVAL);
1154 
1155 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1156 		return (error);
1157 
1158 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1159 		spa_close(spa, FTAG);
1160 		return (ENOTSUP);
1161 	}
1162 
1163 	hist_buf = kmem_alloc(size, KM_SLEEP);
1164 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
1165 	    &zc->zc_history_len, hist_buf)) == 0) {
1166 		error = ddi_copyout(hist_buf,
1167 		    (void *)(uintptr_t)zc->zc_history,
1168 		    zc->zc_history_len, zc->zc_iflags);
1169 	}
1170 
1171 	spa_close(spa, FTAG);
1172 	kmem_free(hist_buf, size);
1173 	return (error);
1174 }
1175 
1176 static int
1177 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1178 {
1179 	int error;
1180 
1181 	if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
1182 		return (error);
1183 
1184 	return (0);
1185 }
1186 
1187 /*
1188  * inputs:
1189  * zc_name		name of filesystem
1190  * zc_obj		object to find
1191  *
1192  * outputs:
1193  * zc_value		name of object
1194  */
1195 static int
1196 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1197 {
1198 	objset_t *os;
1199 	int error;
1200 
1201 	/* XXX reading from objset not owned */
1202 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1203 		return (error);
1204 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1205 		dmu_objset_rele(os, FTAG);
1206 		return (EINVAL);
1207 	}
1208 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1209 	    sizeof (zc->zc_value));
1210 	dmu_objset_rele(os, FTAG);
1211 
1212 	return (error);
1213 }
1214 
1215 static int
1216 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1217 {
1218 	spa_t *spa;
1219 	int error;
1220 	nvlist_t *config, **l2cache, **spares;
1221 	uint_t nl2cache = 0, nspares = 0;
1222 
1223 	error = spa_open(zc->zc_name, &spa, FTAG);
1224 	if (error != 0)
1225 		return (error);
1226 
1227 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1228 	    zc->zc_iflags, &config);
1229 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1230 	    &l2cache, &nl2cache);
1231 
1232 	(void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1233 	    &spares, &nspares);
1234 
1235 	/*
1236 	 * A root pool with concatenated devices is not supported.
1237 	 * Thus, can not add a device to a root pool.
1238 	 *
1239 	 * Intent log device can not be added to a rootpool because
1240 	 * during mountroot, zil is replayed, a seperated log device
1241 	 * can not be accessed during the mountroot time.
1242 	 *
1243 	 * l2cache and spare devices are ok to be added to a rootpool.
1244 	 */
1245 	if (spa->spa_bootfs != 0 && nl2cache == 0 && nspares == 0) {
1246 		spa_close(spa, FTAG);
1247 		return (EDOM);
1248 	}
1249 
1250 	if (error == 0) {
1251 		error = spa_vdev_add(spa, config);
1252 		nvlist_free(config);
1253 	}
1254 	spa_close(spa, FTAG);
1255 	return (error);
1256 }
1257 
1258 static int
1259 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1260 {
1261 	spa_t *spa;
1262 	int error;
1263 
1264 	error = spa_open(zc->zc_name, &spa, FTAG);
1265 	if (error != 0)
1266 		return (error);
1267 	error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1268 	spa_close(spa, FTAG);
1269 	return (error);
1270 }
1271 
1272 static int
1273 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1274 {
1275 	spa_t *spa;
1276 	int error;
1277 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1278 
1279 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1280 		return (error);
1281 	switch (zc->zc_cookie) {
1282 	case VDEV_STATE_ONLINE:
1283 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1284 		break;
1285 
1286 	case VDEV_STATE_OFFLINE:
1287 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1288 		break;
1289 
1290 	case VDEV_STATE_FAULTED:
1291 		error = vdev_fault(spa, zc->zc_guid);
1292 		break;
1293 
1294 	case VDEV_STATE_DEGRADED:
1295 		error = vdev_degrade(spa, zc->zc_guid);
1296 		break;
1297 
1298 	default:
1299 		error = EINVAL;
1300 	}
1301 	zc->zc_cookie = newstate;
1302 	spa_close(spa, FTAG);
1303 	return (error);
1304 }
1305 
1306 static int
1307 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1308 {
1309 	spa_t *spa;
1310 	int replacing = zc->zc_cookie;
1311 	nvlist_t *config;
1312 	int error;
1313 
1314 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1315 		return (error);
1316 
1317 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1318 	    zc->zc_iflags, &config)) == 0) {
1319 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1320 		nvlist_free(config);
1321 	}
1322 
1323 	spa_close(spa, FTAG);
1324 	return (error);
1325 }
1326 
1327 static int
1328 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1329 {
1330 	spa_t *spa;
1331 	int error;
1332 
1333 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1334 		return (error);
1335 
1336 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1337 
1338 	spa_close(spa, FTAG);
1339 	return (error);
1340 }
1341 
1342 static int
1343 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1344 {
1345 	spa_t *spa;
1346 	char *path = zc->zc_value;
1347 	uint64_t guid = zc->zc_guid;
1348 	int error;
1349 
1350 	error = spa_open(zc->zc_name, &spa, FTAG);
1351 	if (error != 0)
1352 		return (error);
1353 
1354 	error = spa_vdev_setpath(spa, guid, path);
1355 	spa_close(spa, FTAG);
1356 	return (error);
1357 }
1358 
1359 static int
1360 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
1361 {
1362 	spa_t *spa;
1363 	char *fru = zc->zc_value;
1364 	uint64_t guid = zc->zc_guid;
1365 	int error;
1366 
1367 	error = spa_open(zc->zc_name, &spa, FTAG);
1368 	if (error != 0)
1369 		return (error);
1370 
1371 	error = spa_vdev_setfru(spa, guid, fru);
1372 	spa_close(spa, FTAG);
1373 	return (error);
1374 }
1375 
1376 /*
1377  * inputs:
1378  * zc_name		name of filesystem
1379  * zc_nvlist_dst_size	size of buffer for property nvlist
1380  *
1381  * outputs:
1382  * zc_objset_stats	stats
1383  * zc_nvlist_dst	property nvlist
1384  * zc_nvlist_dst_size	size of property nvlist
1385  */
1386 static int
1387 zfs_ioc_objset_stats(zfs_cmd_t *zc)
1388 {
1389 	objset_t *os = NULL;
1390 	int error;
1391 	nvlist_t *nv;
1392 
1393 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1394 		return (error);
1395 
1396 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1397 
1398 	if (zc->zc_nvlist_dst != 0 &&
1399 	    (error = dsl_prop_get_all(os, &nv, FALSE)) == 0) {
1400 		dmu_objset_stats(os, nv);
1401 		/*
1402 		 * NB: zvol_get_stats() will read the objset contents,
1403 		 * which we aren't supposed to do with a
1404 		 * DS_MODE_USER hold, because it could be
1405 		 * inconsistent.  So this is a bit of a workaround...
1406 		 * XXX reading with out owning
1407 		 */
1408 		if (!zc->zc_objset_stats.dds_inconsistent) {
1409 			if (dmu_objset_type(os) == DMU_OST_ZVOL)
1410 				VERIFY(zvol_get_stats(os, nv) == 0);
1411 		}
1412 		error = put_nvlist(zc, nv);
1413 		nvlist_free(nv);
1414 	}
1415 
1416 	dmu_objset_rele(os, FTAG);
1417 	return (error);
1418 }
1419 
1420 static int
1421 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
1422 {
1423 	uint64_t value;
1424 	int error;
1425 
1426 	/*
1427 	 * zfs_get_zplprop() will either find a value or give us
1428 	 * the default value (if there is one).
1429 	 */
1430 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
1431 		return (error);
1432 	VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
1433 	return (0);
1434 }
1435 
1436 /*
1437  * inputs:
1438  * zc_name		name of filesystem
1439  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
1440  *
1441  * outputs:
1442  * zc_nvlist_dst	zpl property nvlist
1443  * zc_nvlist_dst_size	size of zpl property nvlist
1444  */
1445 static int
1446 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
1447 {
1448 	objset_t *os;
1449 	int err;
1450 
1451 	/* XXX reading without owning */
1452 	if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
1453 		return (err);
1454 
1455 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1456 
1457 	/*
1458 	 * NB: nvl_add_zplprop() will read the objset contents,
1459 	 * which we aren't supposed to do with a DS_MODE_USER
1460 	 * hold, because it could be inconsistent.
1461 	 */
1462 	if (zc->zc_nvlist_dst != NULL &&
1463 	    !zc->zc_objset_stats.dds_inconsistent &&
1464 	    dmu_objset_type(os) == DMU_OST_ZFS) {
1465 		nvlist_t *nv;
1466 
1467 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1468 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
1469 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
1470 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
1471 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
1472 			err = put_nvlist(zc, nv);
1473 		nvlist_free(nv);
1474 	} else {
1475 		err = ENOENT;
1476 	}
1477 	dmu_objset_rele(os, FTAG);
1478 	return (err);
1479 }
1480 
1481 static boolean_t
1482 dataset_name_hidden(const char *name)
1483 {
1484 	/*
1485 	 * Skip over datasets that are not visible in this zone,
1486 	 * internal datasets (which have a $ in their name), and
1487 	 * temporary datasets (which have a % in their name).
1488 	 */
1489 	if (strchr(name, '$') != NULL)
1490 		return (B_TRUE);
1491 	if (strchr(name, '%') != NULL)
1492 		return (B_TRUE);
1493 	if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
1494 		return (B_TRUE);
1495 	return (B_FALSE);
1496 }
1497 
1498 /*
1499  * inputs:
1500  * zc_name		name of filesystem
1501  * zc_cookie		zap cursor
1502  * zc_nvlist_dst_size	size of buffer for property nvlist
1503  *
1504  * outputs:
1505  * zc_name		name of next filesystem
1506  * zc_cookie		zap cursor
1507  * zc_objset_stats	stats
1508  * zc_nvlist_dst	property nvlist
1509  * zc_nvlist_dst_size	size of property nvlist
1510  */
1511 static int
1512 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
1513 {
1514 	objset_t *os;
1515 	int error;
1516 	char *p;
1517 
1518 	if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
1519 		if (error == ENOENT)
1520 			error = ESRCH;
1521 		return (error);
1522 	}
1523 
1524 	p = strrchr(zc->zc_name, '/');
1525 	if (p == NULL || p[1] != '\0')
1526 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
1527 	p = zc->zc_name + strlen(zc->zc_name);
1528 
1529 	/*
1530 	 * Pre-fetch the datasets.  dmu_objset_prefetch() always returns 0
1531 	 * but is not declared void because its called by dmu_objset_find().
1532 	 */
1533 	if (zc->zc_cookie == 0) {
1534 		uint64_t cookie = 0;
1535 		int len = sizeof (zc->zc_name) - (p - zc->zc_name);
1536 
1537 		while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0)
1538 			(void) dmu_objset_prefetch(p, NULL);
1539 	}
1540 
1541 	do {
1542 		error = dmu_dir_list_next(os,
1543 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
1544 		    NULL, &zc->zc_cookie);
1545 		if (error == ENOENT)
1546 			error = ESRCH;
1547 	} while (error == 0 && dataset_name_hidden(zc->zc_name));
1548 	dmu_objset_rele(os, FTAG);
1549 
1550 	if (error == 0)
1551 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1552 
1553 	return (error);
1554 }
1555 
1556 /*
1557  * inputs:
1558  * zc_name		name of filesystem
1559  * zc_cookie		zap cursor
1560  * zc_nvlist_dst_size	size of buffer for property nvlist
1561  *
1562  * outputs:
1563  * zc_name		name of next snapshot
1564  * zc_objset_stats	stats
1565  * zc_nvlist_dst	property nvlist
1566  * zc_nvlist_dst_size	size of property nvlist
1567  */
1568 static int
1569 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
1570 {
1571 	objset_t *os;
1572 	int error;
1573 
1574 	if (zc->zc_cookie == 0)
1575 		(void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
1576 		    NULL, DS_FIND_SNAPSHOTS);
1577 
1578 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
1579 	if (error)
1580 		return (error == ENOENT ? ESRCH : error);
1581 
1582 	/*
1583 	 * A dataset name of maximum length cannot have any snapshots,
1584 	 * so exit immediately.
1585 	 */
1586 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
1587 		dmu_objset_rele(os, FTAG);
1588 		return (ESRCH);
1589 	}
1590 
1591 	error = dmu_snapshot_list_next(os,
1592 	    sizeof (zc->zc_name) - strlen(zc->zc_name),
1593 	    zc->zc_name + strlen(zc->zc_name), NULL, &zc->zc_cookie, NULL);
1594 	dmu_objset_rele(os, FTAG);
1595 	if (error == 0)
1596 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
1597 	else if (error == ENOENT)
1598 		error = ESRCH;
1599 
1600 	/* if we failed, undo the @ that we tacked on to zc_name */
1601 	if (error)
1602 		*strchr(zc->zc_name, '@') = '\0';
1603 	return (error);
1604 }
1605 
1606 int
1607 zfs_set_prop_nvlist(const char *name, nvlist_t *nvl)
1608 {
1609 	nvpair_t *elem;
1610 	int error = 0;
1611 	uint64_t intval;
1612 	char *strval;
1613 	nvlist_t *genericnvl;
1614 	boolean_t issnap = (strchr(name, '@') != NULL);
1615 
1616 	/*
1617 	 * First validate permission to set all of the properties
1618 	 */
1619 	elem = NULL;
1620 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
1621 		const char *propname = nvpair_name(elem);
1622 		zfs_prop_t prop = zfs_name_to_prop(propname);
1623 
1624 		if (prop == ZPROP_INVAL) {
1625 			/*
1626 			 * If this is a user-defined property, it must be a
1627 			 * string, and there is no further validation to do.
1628 			 */
1629 			if (zfs_prop_user(propname) &&
1630 			    nvpair_type(elem) == DATA_TYPE_STRING) {
1631 				if (error = zfs_secpolicy_write_perms(name,
1632 				    ZFS_DELEG_PERM_USERPROP, CRED()))
1633 					return (error);
1634 				continue;
1635 			}
1636 
1637 			if (!issnap && zfs_prop_userquota(propname) &&
1638 			    nvpair_type(elem) == DATA_TYPE_UINT64_ARRAY) {
1639 				const char *perm;
1640 				const char *up = zfs_userquota_prop_prefixes
1641 				    [ZFS_PROP_USERQUOTA];
1642 				if (strncmp(propname, up, strlen(up)) == 0)
1643 					perm = ZFS_DELEG_PERM_USERQUOTA;
1644 				else
1645 					perm = ZFS_DELEG_PERM_GROUPQUOTA;
1646 				if (error = zfs_secpolicy_write_perms(name,
1647 				    perm, CRED()))
1648 					return (error);
1649 				continue;
1650 			}
1651 
1652 			return (EINVAL);
1653 		}
1654 
1655 		if (issnap)
1656 			return (EINVAL);
1657 
1658 		if ((error = zfs_secpolicy_setprop(name, prop, CRED())) != 0)
1659 			return (error);
1660 
1661 		/*
1662 		 * Check that this value is valid for this pool version
1663 		 */
1664 		switch (prop) {
1665 		case ZFS_PROP_COMPRESSION:
1666 			/*
1667 			 * If the user specified gzip compression, make sure
1668 			 * the SPA supports it. We ignore any errors here since
1669 			 * we'll catch them later.
1670 			 */
1671 			if (nvpair_type(elem) == DATA_TYPE_UINT64 &&
1672 			    nvpair_value_uint64(elem, &intval) == 0) {
1673 				if (intval >= ZIO_COMPRESS_GZIP_1 &&
1674 				    intval <= ZIO_COMPRESS_GZIP_9 &&
1675 				    zfs_earlier_version(name,
1676 				    SPA_VERSION_GZIP_COMPRESSION))
1677 					return (ENOTSUP);
1678 
1679 				/*
1680 				 * If this is a bootable dataset then
1681 				 * verify that the compression algorithm
1682 				 * is supported for booting. We must return
1683 				 * something other than ENOTSUP since it
1684 				 * implies a downrev pool version.
1685 				 */
1686 				if (zfs_is_bootfs(name) &&
1687 				    !BOOTFS_COMPRESS_VALID(intval))
1688 					return (ERANGE);
1689 			}
1690 			break;
1691 
1692 		case ZFS_PROP_COPIES:
1693 			if (zfs_earlier_version(name, SPA_VERSION_DITTO_BLOCKS))
1694 				return (ENOTSUP);
1695 			break;
1696 
1697 		case ZFS_PROP_SHARESMB:
1698 			if (zpl_earlier_version(name, ZPL_VERSION_FUID))
1699 				return (ENOTSUP);
1700 			break;
1701 
1702 		case ZFS_PROP_ACLINHERIT:
1703 			if (nvpair_type(elem) == DATA_TYPE_UINT64 &&
1704 			    nvpair_value_uint64(elem, &intval) == 0)
1705 				if (intval == ZFS_ACL_PASSTHROUGH_X &&
1706 				    zfs_earlier_version(name,
1707 				    SPA_VERSION_PASSTHROUGH_X))
1708 					return (ENOTSUP);
1709 		}
1710 	}
1711 
1712 	VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1713 	elem = NULL;
1714 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
1715 		const char *propname = nvpair_name(elem);
1716 		zfs_prop_t prop = zfs_name_to_prop(propname);
1717 
1718 		if (prop == ZPROP_INVAL) {
1719 			if (zfs_prop_userquota(propname)) {
1720 				uint64_t *valary;
1721 				unsigned int vallen;
1722 				const char *domain;
1723 				zfs_userquota_prop_t type;
1724 				uint64_t rid;
1725 				uint64_t quota;
1726 				zfsvfs_t *zfsvfs;
1727 
1728 				VERIFY(nvpair_value_uint64_array(elem,
1729 				    &valary, &vallen) == 0);
1730 				VERIFY(vallen == 3);
1731 				type = valary[0];
1732 				rid = valary[1];
1733 				quota = valary[2];
1734 				domain = propname +
1735 				    strlen(zfs_userquota_prop_prefixes[type]);
1736 
1737 				error = zfsvfs_hold(name, FTAG, &zfsvfs);
1738 				if (error == 0) {
1739 					error = zfs_set_userquota(zfsvfs,
1740 					    type, domain, rid, quota);
1741 					zfsvfs_rele(zfsvfs, FTAG);
1742 				}
1743 				if (error == 0)
1744 					continue;
1745 				else
1746 					goto out;
1747 			} else if (zfs_prop_user(propname)) {
1748 				VERIFY(nvpair_value_string(elem, &strval) == 0);
1749 				error = dsl_prop_set(name, propname, 1,
1750 				    strlen(strval) + 1, strval);
1751 				if (error == 0)
1752 					continue;
1753 				else
1754 					goto out;
1755 			}
1756 		}
1757 
1758 		switch (prop) {
1759 		case ZFS_PROP_QUOTA:
1760 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1761 			    (error = dsl_dir_set_quota(name, intval)) != 0)
1762 				goto out;
1763 			break;
1764 
1765 		case ZFS_PROP_REFQUOTA:
1766 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1767 			    (error = dsl_dataset_set_quota(name, intval)) != 0)
1768 				goto out;
1769 			break;
1770 
1771 		case ZFS_PROP_RESERVATION:
1772 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1773 			    (error = dsl_dir_set_reservation(name,
1774 			    intval)) != 0)
1775 				goto out;
1776 			break;
1777 
1778 		case ZFS_PROP_REFRESERVATION:
1779 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1780 			    (error = dsl_dataset_set_reservation(name,
1781 			    intval)) != 0)
1782 				goto out;
1783 			break;
1784 
1785 		case ZFS_PROP_VOLSIZE:
1786 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1787 			    (error = zvol_set_volsize(name,
1788 			    ddi_driver_major(zfs_dip), intval)) != 0)
1789 				goto out;
1790 			break;
1791 
1792 		case ZFS_PROP_VOLBLOCKSIZE:
1793 			if ((error = nvpair_value_uint64(elem, &intval)) != 0 ||
1794 			    (error = zvol_set_volblocksize(name, intval)) != 0)
1795 				goto out;
1796 			break;
1797 
1798 		case ZFS_PROP_VERSION:
1799 		{
1800 			zfsvfs_t *zfsvfs;
1801 
1802 			if ((error = nvpair_value_uint64(elem, &intval)) != 0)
1803 				goto out;
1804 			if ((error = zfsvfs_hold(name, FTAG, &zfsvfs)) != 0)
1805 				goto out;
1806 			error = zfs_set_version(zfsvfs, intval);
1807 			zfsvfs_rele(zfsvfs, FTAG);
1808 
1809 			if (error == 0 && intval >= ZPL_VERSION_USERSPACE) {
1810 				zfs_cmd_t zc = { 0 };
1811 				(void) strcpy(zc.zc_name, name);
1812 				(void) zfs_ioc_userspace_upgrade(&zc);
1813 			}
1814 			if (error)
1815 				goto out;
1816 			break;
1817 		}
1818 
1819 		default:
1820 			if (nvpair_type(elem) == DATA_TYPE_STRING) {
1821 				if (zfs_prop_get_type(prop) !=
1822 				    PROP_TYPE_STRING) {
1823 					error = EINVAL;
1824 					goto out;
1825 				}
1826 			} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
1827 				const char *unused;
1828 
1829 				VERIFY(nvpair_value_uint64(elem, &intval) == 0);
1830 
1831 				switch (zfs_prop_get_type(prop)) {
1832 				case PROP_TYPE_NUMBER:
1833 					break;
1834 				case PROP_TYPE_STRING:
1835 					error = EINVAL;
1836 					goto out;
1837 				case PROP_TYPE_INDEX:
1838 					if (zfs_prop_index_to_string(prop,
1839 					    intval, &unused) != 0) {
1840 						error = EINVAL;
1841 						goto out;
1842 					}
1843 					break;
1844 				default:
1845 					cmn_err(CE_PANIC,
1846 					    "unknown property type");
1847 					break;
1848 				}
1849 			} else {
1850 				error = EINVAL;
1851 				goto out;
1852 			}
1853 			if ((error = nvlist_add_nvpair(genericnvl, elem)) != 0)
1854 				goto out;
1855 		}
1856 	}
1857 
1858 	if (nvlist_next_nvpair(genericnvl, NULL) != NULL) {
1859 		error = dsl_props_set(name, genericnvl);
1860 	}
1861 out:
1862 	nvlist_free(genericnvl);
1863 	return (error);
1864 }
1865 
1866 /*
1867  * Check that all the properties are valid user properties.
1868  */
1869 static int
1870 zfs_check_userprops(char *fsname, nvlist_t *nvl)
1871 {
1872 	nvpair_t *elem = NULL;
1873 	int error = 0;
1874 
1875 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
1876 		const char *propname = nvpair_name(elem);
1877 		char *valstr;
1878 
1879 		if (!zfs_prop_user(propname) ||
1880 		    nvpair_type(elem) != DATA_TYPE_STRING)
1881 			return (EINVAL);
1882 
1883 		if (error = zfs_secpolicy_write_perms(fsname,
1884 		    ZFS_DELEG_PERM_USERPROP, CRED()))
1885 			return (error);
1886 
1887 		if (strlen(propname) >= ZAP_MAXNAMELEN)
1888 			return (ENAMETOOLONG);
1889 
1890 		VERIFY(nvpair_value_string(elem, &valstr) == 0);
1891 		if (strlen(valstr) >= ZAP_MAXVALUELEN)
1892 			return (E2BIG);
1893 	}
1894 	return (0);
1895 }
1896 
1897 /*
1898  * inputs:
1899  * zc_name		name of filesystem
1900  * zc_value		name of property to set
1901  * zc_nvlist_src{_size}	nvlist of properties to apply
1902  * zc_cookie		clear existing local props?
1903  *
1904  * outputs:		none
1905  */
1906 static int
1907 zfs_ioc_set_prop(zfs_cmd_t *zc)
1908 {
1909 	nvlist_t *nvl;
1910 	int error;
1911 
1912 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1913 	    zc->zc_iflags, &nvl)) != 0)
1914 		return (error);
1915 
1916 	if (zc->zc_cookie) {
1917 		nvlist_t *origprops;
1918 		objset_t *os;
1919 
1920 		if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
1921 			if (dsl_prop_get_all(os, &origprops, TRUE) == 0) {
1922 				clear_props(zc->zc_name, origprops, nvl);
1923 				nvlist_free(origprops);
1924 			}
1925 			dmu_objset_rele(os, FTAG);
1926 		}
1927 
1928 	}
1929 
1930 	error = zfs_set_prop_nvlist(zc->zc_name, nvl);
1931 
1932 	nvlist_free(nvl);
1933 	return (error);
1934 }
1935 
1936 /*
1937  * inputs:
1938  * zc_name		name of filesystem
1939  * zc_value		name of property to inherit
1940  *
1941  * outputs:		none
1942  */
1943 static int
1944 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
1945 {
1946 	/* the property name has been validated by zfs_secpolicy_inherit() */
1947 	return (dsl_prop_set(zc->zc_name, zc->zc_value, 0, 0, NULL));
1948 }
1949 
1950 static int
1951 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
1952 {
1953 	nvlist_t *props;
1954 	spa_t *spa;
1955 	int error;
1956 	nvpair_t *elem;
1957 
1958 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1959 	    zc->zc_iflags, &props)))
1960 		return (error);
1961 
1962 	/*
1963 	 * If the only property is the configfile, then just do a spa_lookup()
1964 	 * to handle the faulted case.
1965 	 */
1966 	elem = nvlist_next_nvpair(props, NULL);
1967 	if (elem != NULL && strcmp(nvpair_name(elem),
1968 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
1969 	    nvlist_next_nvpair(props, elem) == NULL) {
1970 		mutex_enter(&spa_namespace_lock);
1971 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
1972 			spa_configfile_set(spa, props, B_FALSE);
1973 			spa_config_sync(spa, B_FALSE, B_TRUE);
1974 		}
1975 		mutex_exit(&spa_namespace_lock);
1976 		if (spa != NULL)
1977 			return (0);
1978 	}
1979 
1980 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
1981 		nvlist_free(props);
1982 		return (error);
1983 	}
1984 
1985 	error = spa_prop_set(spa, props);
1986 
1987 	nvlist_free(props);
1988 	spa_close(spa, FTAG);
1989 
1990 	return (error);
1991 }
1992 
1993 static int
1994 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
1995 {
1996 	spa_t *spa;
1997 	int error;
1998 	nvlist_t *nvp = NULL;
1999 
2000 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2001 		/*
2002 		 * If the pool is faulted, there may be properties we can still
2003 		 * get (such as altroot and cachefile), so attempt to get them
2004 		 * anyway.
2005 		 */
2006 		mutex_enter(&spa_namespace_lock);
2007 		if ((spa = spa_lookup(zc->zc_name)) != NULL)
2008 			error = spa_prop_get(spa, &nvp);
2009 		mutex_exit(&spa_namespace_lock);
2010 	} else {
2011 		error = spa_prop_get(spa, &nvp);
2012 		spa_close(spa, FTAG);
2013 	}
2014 
2015 	if (error == 0 && zc->zc_nvlist_dst != NULL)
2016 		error = put_nvlist(zc, nvp);
2017 	else
2018 		error = EFAULT;
2019 
2020 	nvlist_free(nvp);
2021 	return (error);
2022 }
2023 
2024 static int
2025 zfs_ioc_iscsi_perm_check(zfs_cmd_t *zc)
2026 {
2027 	nvlist_t *nvp;
2028 	int error;
2029 	uint32_t uid;
2030 	uint32_t gid;
2031 	uint32_t *groups;
2032 	uint_t group_cnt;
2033 	cred_t	*usercred;
2034 
2035 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2036 	    zc->zc_iflags, &nvp)) != 0) {
2037 		return (error);
2038 	}
2039 
2040 	if ((error = nvlist_lookup_uint32(nvp,
2041 	    ZFS_DELEG_PERM_UID, &uid)) != 0) {
2042 		nvlist_free(nvp);
2043 		return (EPERM);
2044 	}
2045 
2046 	if ((error = nvlist_lookup_uint32(nvp,
2047 	    ZFS_DELEG_PERM_GID, &gid)) != 0) {
2048 		nvlist_free(nvp);
2049 		return (EPERM);
2050 	}
2051 
2052 	if ((error = nvlist_lookup_uint32_array(nvp, ZFS_DELEG_PERM_GROUPS,
2053 	    &groups, &group_cnt)) != 0) {
2054 		nvlist_free(nvp);
2055 		return (EPERM);
2056 	}
2057 	usercred = cralloc();
2058 	if ((crsetugid(usercred, uid, gid) != 0) ||
2059 	    (crsetgroups(usercred, group_cnt, (gid_t *)groups) != 0)) {
2060 		nvlist_free(nvp);
2061 		crfree(usercred);
2062 		return (EPERM);
2063 	}
2064 	nvlist_free(nvp);
2065 	error = dsl_deleg_access(zc->zc_name,
2066 	    zfs_prop_to_name(ZFS_PROP_SHAREISCSI), usercred);
2067 	crfree(usercred);
2068 	return (error);
2069 }
2070 
2071 /*
2072  * inputs:
2073  * zc_name		name of filesystem
2074  * zc_nvlist_src{_size}	nvlist of delegated permissions
2075  * zc_perm_action	allow/unallow flag
2076  *
2077  * outputs:		none
2078  */
2079 static int
2080 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2081 {
2082 	int error;
2083 	nvlist_t *fsaclnv = NULL;
2084 
2085 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2086 	    zc->zc_iflags, &fsaclnv)) != 0)
2087 		return (error);
2088 
2089 	/*
2090 	 * Verify nvlist is constructed correctly
2091 	 */
2092 	if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2093 		nvlist_free(fsaclnv);
2094 		return (EINVAL);
2095 	}
2096 
2097 	/*
2098 	 * If we don't have PRIV_SYS_MOUNT, then validate
2099 	 * that user is allowed to hand out each permission in
2100 	 * the nvlist(s)
2101 	 */
2102 
2103 	error = secpolicy_zfs(CRED());
2104 	if (error) {
2105 		if (zc->zc_perm_action == B_FALSE) {
2106 			error = dsl_deleg_can_allow(zc->zc_name,
2107 			    fsaclnv, CRED());
2108 		} else {
2109 			error = dsl_deleg_can_unallow(zc->zc_name,
2110 			    fsaclnv, CRED());
2111 		}
2112 	}
2113 
2114 	if (error == 0)
2115 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2116 
2117 	nvlist_free(fsaclnv);
2118 	return (error);
2119 }
2120 
2121 /*
2122  * inputs:
2123  * zc_name		name of filesystem
2124  *
2125  * outputs:
2126  * zc_nvlist_src{_size}	nvlist of delegated permissions
2127  */
2128 static int
2129 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2130 {
2131 	nvlist_t *nvp;
2132 	int error;
2133 
2134 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2135 		error = put_nvlist(zc, nvp);
2136 		nvlist_free(nvp);
2137 	}
2138 
2139 	return (error);
2140 }
2141 
2142 /*
2143  * inputs:
2144  * zc_name		name of volume
2145  *
2146  * outputs:		none
2147  */
2148 static int
2149 zfs_ioc_create_minor(zfs_cmd_t *zc)
2150 {
2151 	return (zvol_create_minor(zc->zc_name, ddi_driver_major(zfs_dip)));
2152 }
2153 
2154 /*
2155  * inputs:
2156  * zc_name		name of volume
2157  *
2158  * outputs:		none
2159  */
2160 static int
2161 zfs_ioc_remove_minor(zfs_cmd_t *zc)
2162 {
2163 	return (zvol_remove_minor(zc->zc_name));
2164 }
2165 
2166 /*
2167  * Search the vfs list for a specified resource.  Returns a pointer to it
2168  * or NULL if no suitable entry is found. The caller of this routine
2169  * is responsible for releasing the returned vfs pointer.
2170  */
2171 static vfs_t *
2172 zfs_get_vfs(const char *resource)
2173 {
2174 	struct vfs *vfsp;
2175 	struct vfs *vfs_found = NULL;
2176 
2177 	vfs_list_read_lock();
2178 	vfsp = rootvfs;
2179 	do {
2180 		if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
2181 			VFS_HOLD(vfsp);
2182 			vfs_found = vfsp;
2183 			break;
2184 		}
2185 		vfsp = vfsp->vfs_next;
2186 	} while (vfsp != rootvfs);
2187 	vfs_list_unlock();
2188 	return (vfs_found);
2189 }
2190 
2191 /* ARGSUSED */
2192 static void
2193 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2194 {
2195 	zfs_creat_t *zct = arg;
2196 
2197 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
2198 }
2199 
2200 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
2201 
2202 /*
2203  * inputs:
2204  * createprops		list of properties requested by creator
2205  * default_zplver	zpl version to use if unspecified in createprops
2206  * fuids_ok		fuids allowed in this version of the spa?
2207  * os			parent objset pointer (NULL if root fs)
2208  *
2209  * outputs:
2210  * zplprops	values for the zplprops we attach to the master node object
2211  * is_ci	true if requested file system will be purely case-insensitive
2212  *
2213  * Determine the settings for utf8only, normalization and
2214  * casesensitivity.  Specific values may have been requested by the
2215  * creator and/or we can inherit values from the parent dataset.  If
2216  * the file system is of too early a vintage, a creator can not
2217  * request settings for these properties, even if the requested
2218  * setting is the default value.  We don't actually want to create dsl
2219  * properties for these, so remove them from the source nvlist after
2220  * processing.
2221  */
2222 static int
2223 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
2224     boolean_t fuids_ok, nvlist_t *createprops, nvlist_t *zplprops,
2225     boolean_t *is_ci)
2226 {
2227 	uint64_t sense = ZFS_PROP_UNDEFINED;
2228 	uint64_t norm = ZFS_PROP_UNDEFINED;
2229 	uint64_t u8 = ZFS_PROP_UNDEFINED;
2230 
2231 	ASSERT(zplprops != NULL);
2232 
2233 	/*
2234 	 * Pull out creator prop choices, if any.
2235 	 */
2236 	if (createprops) {
2237 		(void) nvlist_lookup_uint64(createprops,
2238 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
2239 		(void) nvlist_lookup_uint64(createprops,
2240 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
2241 		(void) nvlist_remove_all(createprops,
2242 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
2243 		(void) nvlist_lookup_uint64(createprops,
2244 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
2245 		(void) nvlist_remove_all(createprops,
2246 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
2247 		(void) nvlist_lookup_uint64(createprops,
2248 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
2249 		(void) nvlist_remove_all(createprops,
2250 		    zfs_prop_to_name(ZFS_PROP_CASE));
2251 	}
2252 
2253 	/*
2254 	 * If the zpl version requested is whacky or the file system
2255 	 * or pool is version is too "young" to support normalization
2256 	 * and the creator tried to set a value for one of the props,
2257 	 * error out.
2258 	 */
2259 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
2260 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
2261 	    (zplver < ZPL_VERSION_NORMALIZATION &&
2262 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
2263 	    sense != ZFS_PROP_UNDEFINED)))
2264 		return (ENOTSUP);
2265 
2266 	/*
2267 	 * Put the version in the zplprops
2268 	 */
2269 	VERIFY(nvlist_add_uint64(zplprops,
2270 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
2271 
2272 	if (norm == ZFS_PROP_UNDEFINED)
2273 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
2274 	VERIFY(nvlist_add_uint64(zplprops,
2275 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
2276 
2277 	/*
2278 	 * If we're normalizing, names must always be valid UTF-8 strings.
2279 	 */
2280 	if (norm)
2281 		u8 = 1;
2282 	if (u8 == ZFS_PROP_UNDEFINED)
2283 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
2284 	VERIFY(nvlist_add_uint64(zplprops,
2285 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
2286 
2287 	if (sense == ZFS_PROP_UNDEFINED)
2288 		VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
2289 	VERIFY(nvlist_add_uint64(zplprops,
2290 	    zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
2291 
2292 	if (is_ci)
2293 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
2294 
2295 	return (0);
2296 }
2297 
2298 static int
2299 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
2300     nvlist_t *zplprops, boolean_t *is_ci)
2301 {
2302 	boolean_t fuids_ok = B_TRUE;
2303 	uint64_t zplver = ZPL_VERSION;
2304 	objset_t *os = NULL;
2305 	char parentname[MAXNAMELEN];
2306 	char *cp;
2307 	int error;
2308 
2309 	(void) strlcpy(parentname, dataset, sizeof (parentname));
2310 	cp = strrchr(parentname, '/');
2311 	ASSERT(cp != NULL);
2312 	cp[0] = '\0';
2313 
2314 	if (zfs_earlier_version(dataset, SPA_VERSION_USERSPACE))
2315 		zplver = ZPL_VERSION_USERSPACE - 1;
2316 	if (zfs_earlier_version(dataset, SPA_VERSION_FUID)) {
2317 		zplver = ZPL_VERSION_FUID - 1;
2318 		fuids_ok = B_FALSE;
2319 	}
2320 
2321 	/*
2322 	 * Open parent object set so we can inherit zplprop values.
2323 	 */
2324 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
2325 		return (error);
2326 
2327 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, createprops,
2328 	    zplprops, is_ci);
2329 	dmu_objset_rele(os, FTAG);
2330 	return (error);
2331 }
2332 
2333 static int
2334 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
2335     nvlist_t *zplprops, boolean_t *is_ci)
2336 {
2337 	boolean_t fuids_ok = B_TRUE;
2338 	uint64_t zplver = ZPL_VERSION;
2339 	int error;
2340 
2341 	if (spa_vers < SPA_VERSION_FUID) {
2342 		zplver = ZPL_VERSION_FUID - 1;
2343 		fuids_ok = B_FALSE;
2344 	}
2345 
2346 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, createprops,
2347 	    zplprops, is_ci);
2348 	return (error);
2349 }
2350 
2351 /*
2352  * inputs:
2353  * zc_objset_type	type of objset to create (fs vs zvol)
2354  * zc_name		name of new objset
2355  * zc_value		name of snapshot to clone from (may be empty)
2356  * zc_nvlist_src{_size}	nvlist of properties to apply
2357  *
2358  * outputs: none
2359  */
2360 static int
2361 zfs_ioc_create(zfs_cmd_t *zc)
2362 {
2363 	objset_t *clone;
2364 	int error = 0;
2365 	zfs_creat_t zct;
2366 	nvlist_t *nvprops = NULL;
2367 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
2368 	dmu_objset_type_t type = zc->zc_objset_type;
2369 
2370 	switch (type) {
2371 
2372 	case DMU_OST_ZFS:
2373 		cbfunc = zfs_create_cb;
2374 		break;
2375 
2376 	case DMU_OST_ZVOL:
2377 		cbfunc = zvol_create_cb;
2378 		break;
2379 
2380 	default:
2381 		cbfunc = NULL;
2382 		break;
2383 	}
2384 	if (strchr(zc->zc_name, '@') ||
2385 	    strchr(zc->zc_name, '%'))
2386 		return (EINVAL);
2387 
2388 	if (zc->zc_nvlist_src != NULL &&
2389 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2390 	    zc->zc_iflags, &nvprops)) != 0)
2391 		return (error);
2392 
2393 	zct.zct_zplprops = NULL;
2394 	zct.zct_props = nvprops;
2395 
2396 	if (zc->zc_value[0] != '\0') {
2397 		/*
2398 		 * We're creating a clone of an existing snapshot.
2399 		 */
2400 		zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2401 		if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
2402 			nvlist_free(nvprops);
2403 			return (EINVAL);
2404 		}
2405 
2406 		error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
2407 		if (error) {
2408 			nvlist_free(nvprops);
2409 			return (error);
2410 		}
2411 
2412 		error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
2413 		dmu_objset_rele(clone, FTAG);
2414 		if (error) {
2415 			nvlist_free(nvprops);
2416 			return (error);
2417 		}
2418 	} else {
2419 		boolean_t is_insensitive = B_FALSE;
2420 
2421 		if (cbfunc == NULL) {
2422 			nvlist_free(nvprops);
2423 			return (EINVAL);
2424 		}
2425 
2426 		if (type == DMU_OST_ZVOL) {
2427 			uint64_t volsize, volblocksize;
2428 
2429 			if (nvprops == NULL ||
2430 			    nvlist_lookup_uint64(nvprops,
2431 			    zfs_prop_to_name(ZFS_PROP_VOLSIZE),
2432 			    &volsize) != 0) {
2433 				nvlist_free(nvprops);
2434 				return (EINVAL);
2435 			}
2436 
2437 			if ((error = nvlist_lookup_uint64(nvprops,
2438 			    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
2439 			    &volblocksize)) != 0 && error != ENOENT) {
2440 				nvlist_free(nvprops);
2441 				return (EINVAL);
2442 			}
2443 
2444 			if (error != 0)
2445 				volblocksize = zfs_prop_default_numeric(
2446 				    ZFS_PROP_VOLBLOCKSIZE);
2447 
2448 			if ((error = zvol_check_volblocksize(
2449 			    volblocksize)) != 0 ||
2450 			    (error = zvol_check_volsize(volsize,
2451 			    volblocksize)) != 0) {
2452 				nvlist_free(nvprops);
2453 				return (error);
2454 			}
2455 		} else if (type == DMU_OST_ZFS) {
2456 			int error;
2457 
2458 			/*
2459 			 * We have to have normalization and
2460 			 * case-folding flags correct when we do the
2461 			 * file system creation, so go figure them out
2462 			 * now.
2463 			 */
2464 			VERIFY(nvlist_alloc(&zct.zct_zplprops,
2465 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
2466 			error = zfs_fill_zplprops(zc->zc_name, nvprops,
2467 			    zct.zct_zplprops, &is_insensitive);
2468 			if (error != 0) {
2469 				nvlist_free(nvprops);
2470 				nvlist_free(zct.zct_zplprops);
2471 				return (error);
2472 			}
2473 		}
2474 		error = dmu_objset_create(zc->zc_name, type,
2475 		    is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
2476 		nvlist_free(zct.zct_zplprops);
2477 	}
2478 
2479 	/*
2480 	 * It would be nice to do this atomically.
2481 	 */
2482 	if (error == 0) {
2483 		if ((error = zfs_set_prop_nvlist(zc->zc_name, nvprops)) != 0)
2484 			(void) dmu_objset_destroy(zc->zc_name, B_FALSE);
2485 	}
2486 	nvlist_free(nvprops);
2487 	return (error);
2488 }
2489 
2490 /*
2491  * inputs:
2492  * zc_name	name of filesystem
2493  * zc_value	short name of snapshot
2494  * zc_cookie	recursive flag
2495  * zc_nvlist_src[_size] property list
2496  *
2497  * outputs:	none
2498  */
2499 static int
2500 zfs_ioc_snapshot(zfs_cmd_t *zc)
2501 {
2502 	nvlist_t *nvprops = NULL;
2503 	int error;
2504 	boolean_t recursive = zc->zc_cookie;
2505 
2506 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2507 		return (EINVAL);
2508 
2509 	if (zc->zc_nvlist_src != NULL &&
2510 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2511 	    zc->zc_iflags, &nvprops)) != 0)
2512 		return (error);
2513 
2514 	error = zfs_check_userprops(zc->zc_name, nvprops);
2515 	if (error)
2516 		goto out;
2517 
2518 	if (nvprops != NULL && nvlist_next_nvpair(nvprops, NULL) != NULL &&
2519 	    zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) {
2520 		error = ENOTSUP;
2521 		goto out;
2522 	}
2523 
2524 	error = dmu_objset_snapshot(zc->zc_name, zc->zc_value,
2525 	    nvprops, recursive);
2526 
2527 out:
2528 	nvlist_free(nvprops);
2529 	return (error);
2530 }
2531 
2532 int
2533 zfs_unmount_snap(char *name, void *arg)
2534 {
2535 	vfs_t *vfsp = NULL;
2536 
2537 	if (arg) {
2538 		char *snapname = arg;
2539 		int len = strlen(name) + strlen(snapname) + 2;
2540 		char *buf = kmem_alloc(len, KM_SLEEP);
2541 
2542 		(void) strcpy(buf, name);
2543 		(void) strcat(buf, "@");
2544 		(void) strcat(buf, snapname);
2545 		vfsp = zfs_get_vfs(buf);
2546 		kmem_free(buf, len);
2547 	} else if (strchr(name, '@')) {
2548 		vfsp = zfs_get_vfs(name);
2549 	}
2550 
2551 	if (vfsp) {
2552 		/*
2553 		 * Always force the unmount for snapshots.
2554 		 */
2555 		int flag = MS_FORCE;
2556 		int err;
2557 
2558 		if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
2559 			VFS_RELE(vfsp);
2560 			return (err);
2561 		}
2562 		VFS_RELE(vfsp);
2563 		if ((err = dounmount(vfsp, flag, kcred)) != 0)
2564 			return (err);
2565 	}
2566 	return (0);
2567 }
2568 
2569 /*
2570  * inputs:
2571  * zc_name		name of filesystem
2572  * zc_value		short name of snapshot
2573  * zc_defer_destroy	mark for deferred destroy
2574  *
2575  * outputs:	none
2576  */
2577 static int
2578 zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
2579 {
2580 	int err;
2581 
2582 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
2583 		return (EINVAL);
2584 	err = dmu_objset_find(zc->zc_name,
2585 	    zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
2586 	if (err)
2587 		return (err);
2588 	return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value,
2589 	    zc->zc_defer_destroy));
2590 }
2591 
2592 /*
2593  * inputs:
2594  * zc_name		name of dataset to destroy
2595  * zc_objset_type	type of objset
2596  * zc_defer_destroy	mark for deferred destroy
2597  *
2598  * outputs:		none
2599  */
2600 static int
2601 zfs_ioc_destroy(zfs_cmd_t *zc)
2602 {
2603 	if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
2604 		int err = zfs_unmount_snap(zc->zc_name, NULL);
2605 		if (err)
2606 			return (err);
2607 	}
2608 
2609 	return (dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy));
2610 }
2611 
2612 /*
2613  * inputs:
2614  * zc_name	name of dataset to rollback (to most recent snapshot)
2615  *
2616  * outputs:	none
2617  */
2618 static int
2619 zfs_ioc_rollback(zfs_cmd_t *zc)
2620 {
2621 	dsl_dataset_t *ds, *clone;
2622 	int error;
2623 	zfsvfs_t *zfsvfs;
2624 	char *clone_name;
2625 
2626 	error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
2627 	if (error)
2628 		return (error);
2629 
2630 	/* must not be a snapshot */
2631 	if (dsl_dataset_is_snapshot(ds)) {
2632 		dsl_dataset_rele(ds, FTAG);
2633 		return (EINVAL);
2634 	}
2635 
2636 	/* must have a most recent snapshot */
2637 	if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
2638 		dsl_dataset_rele(ds, FTAG);
2639 		return (EINVAL);
2640 	}
2641 
2642 	/*
2643 	 * Create clone of most recent snapshot.
2644 	 */
2645 	clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
2646 	error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
2647 	if (error)
2648 		goto out;
2649 
2650 	error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
2651 	if (error)
2652 		goto out;
2653 
2654 	/*
2655 	 * Do clone swap.
2656 	 */
2657 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
2658 		error = zfs_suspend_fs(zfsvfs);
2659 		if (error == 0) {
2660 			int resume_err;
2661 
2662 			if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
2663 				error = dsl_dataset_clone_swap(clone, ds,
2664 				    B_TRUE);
2665 				dsl_dataset_disown(ds, FTAG);
2666 				ds = NULL;
2667 			} else {
2668 				error = EBUSY;
2669 			}
2670 			resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
2671 			error = error ? error : resume_err;
2672 		}
2673 		VFS_RELE(zfsvfs->z_vfs);
2674 	} else {
2675 		if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
2676 			error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
2677 			dsl_dataset_disown(ds, FTAG);
2678 			ds = NULL;
2679 		} else {
2680 			error = EBUSY;
2681 		}
2682 	}
2683 
2684 	/*
2685 	 * Destroy clone (which also closes it).
2686 	 */
2687 	(void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
2688 
2689 out:
2690 	strfree(clone_name);
2691 	if (ds)
2692 		dsl_dataset_rele(ds, FTAG);
2693 	return (error);
2694 }
2695 
2696 /*
2697  * inputs:
2698  * zc_name	old name of dataset
2699  * zc_value	new name of dataset
2700  * zc_cookie	recursive flag (only valid for snapshots)
2701  *
2702  * outputs:	none
2703  */
2704 static int
2705 zfs_ioc_rename(zfs_cmd_t *zc)
2706 {
2707 	boolean_t recursive = zc->zc_cookie & 1;
2708 
2709 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2710 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
2711 	    strchr(zc->zc_value, '%'))
2712 		return (EINVAL);
2713 
2714 	/*
2715 	 * Unmount snapshot unless we're doing a recursive rename,
2716 	 * in which case the dataset code figures out which snapshots
2717 	 * to unmount.
2718 	 */
2719 	if (!recursive && strchr(zc->zc_name, '@') != NULL &&
2720 	    zc->zc_objset_type == DMU_OST_ZFS) {
2721 		int err = zfs_unmount_snap(zc->zc_name, NULL);
2722 		if (err)
2723 			return (err);
2724 	}
2725 	return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
2726 }
2727 
2728 static void
2729 clear_props(char *dataset, nvlist_t *props, nvlist_t *newprops)
2730 {
2731 	zfs_cmd_t *zc;
2732 	nvpair_t *prop;
2733 
2734 	if (props == NULL)
2735 		return;
2736 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
2737 	(void) strcpy(zc->zc_name, dataset);
2738 	for (prop = nvlist_next_nvpair(props, NULL); prop;
2739 	    prop = nvlist_next_nvpair(props, prop)) {
2740 		if (newprops != NULL &&
2741 		    nvlist_exists(newprops, nvpair_name(prop)))
2742 			continue;
2743 		(void) strcpy(zc->zc_value, nvpair_name(prop));
2744 		if (zfs_secpolicy_inherit(zc, CRED()) == 0)
2745 			(void) zfs_ioc_inherit_prop(zc);
2746 	}
2747 	kmem_free(zc, sizeof (zfs_cmd_t));
2748 }
2749 
2750 /*
2751  * inputs:
2752  * zc_name		name of containing filesystem
2753  * zc_nvlist_src{_size}	nvlist of properties to apply
2754  * zc_value		name of snapshot to create
2755  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
2756  * zc_cookie		file descriptor to recv from
2757  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
2758  * zc_guid		force flag
2759  *
2760  * outputs:
2761  * zc_cookie		number of bytes read
2762  */
2763 static int
2764 zfs_ioc_recv(zfs_cmd_t *zc)
2765 {
2766 	file_t *fp;
2767 	objset_t *os;
2768 	dmu_recv_cookie_t drc;
2769 	boolean_t force = (boolean_t)zc->zc_guid;
2770 	int error, fd;
2771 	offset_t off;
2772 	nvlist_t *props = NULL;
2773 	nvlist_t *origprops = NULL;
2774 	objset_t *origin = NULL;
2775 	char *tosnap;
2776 	char tofs[ZFS_MAXNAMELEN];
2777 
2778 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
2779 	    strchr(zc->zc_value, '@') == NULL ||
2780 	    strchr(zc->zc_value, '%'))
2781 		return (EINVAL);
2782 
2783 	(void) strcpy(tofs, zc->zc_value);
2784 	tosnap = strchr(tofs, '@');
2785 	*tosnap = '\0';
2786 	tosnap++;
2787 
2788 	if (zc->zc_nvlist_src != NULL &&
2789 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2790 	    zc->zc_iflags, &props)) != 0)
2791 		return (error);
2792 
2793 	fd = zc->zc_cookie;
2794 	fp = getf(fd);
2795 	if (fp == NULL) {
2796 		nvlist_free(props);
2797 		return (EBADF);
2798 	}
2799 
2800 	if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
2801 		/*
2802 		 * If new properties are supplied, they are to completely
2803 		 * replace the existing ones, so stash away the existing ones.
2804 		 */
2805 		(void) dsl_prop_get_all(os, &origprops, B_TRUE);
2806 
2807 		dmu_objset_rele(os, FTAG);
2808 	}
2809 
2810 	if (zc->zc_string[0]) {
2811 		error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
2812 		if (error)
2813 			goto out;
2814 	}
2815 
2816 	error = dmu_recv_begin(tofs, tosnap, &zc->zc_begin_record,
2817 	    force, origin, &drc);
2818 	if (origin)
2819 		dmu_objset_rele(origin, FTAG);
2820 	if (error)
2821 		goto out;
2822 
2823 	/*
2824 	 * Reset properties.  We do this before we receive the stream
2825 	 * so that the properties are applied to the new data.
2826 	 */
2827 	if (props) {
2828 		clear_props(tofs, origprops, props);
2829 		/*
2830 		 * XXX - Note, this is all-or-nothing; should be best-effort.
2831 		 */
2832 		(void) zfs_set_prop_nvlist(tofs, props);
2833 	}
2834 
2835 	off = fp->f_offset;
2836 	error = dmu_recv_stream(&drc, fp->f_vnode, &off);
2837 
2838 	if (error == 0) {
2839 		zfsvfs_t *zfsvfs = NULL;
2840 
2841 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
2842 			/* online recv */
2843 			int end_err;
2844 
2845 			error = zfs_suspend_fs(zfsvfs);
2846 			/*
2847 			 * If the suspend fails, then the recv_end will
2848 			 * likely also fail, and clean up after itself.
2849 			 */
2850 			end_err = dmu_recv_end(&drc);
2851 			if (error == 0) {
2852 				int resume_err =
2853 				    zfs_resume_fs(zfsvfs, tofs);
2854 				error = error ? error : resume_err;
2855 			}
2856 			error = error ? error : end_err;
2857 			VFS_RELE(zfsvfs->z_vfs);
2858 		} else {
2859 			error = dmu_recv_end(&drc);
2860 		}
2861 	}
2862 
2863 	zc->zc_cookie = off - fp->f_offset;
2864 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
2865 		fp->f_offset = off;
2866 
2867 	/*
2868 	 * On error, restore the original props.
2869 	 */
2870 	if (error && props) {
2871 		clear_props(tofs, props, NULL);
2872 		(void) zfs_set_prop_nvlist(tofs, origprops);
2873 	}
2874 out:
2875 	nvlist_free(props);
2876 	nvlist_free(origprops);
2877 	releasef(fd);
2878 	return (error);
2879 }
2880 
2881 /*
2882  * inputs:
2883  * zc_name	name of snapshot to send
2884  * zc_value	short name of incremental fromsnap (may be empty)
2885  * zc_cookie	file descriptor to send stream to
2886  * zc_obj	fromorigin flag (mutually exclusive with zc_value)
2887  *
2888  * outputs: none
2889  */
2890 static int
2891 zfs_ioc_send(zfs_cmd_t *zc)
2892 {
2893 	objset_t *fromsnap = NULL;
2894 	objset_t *tosnap;
2895 	file_t *fp;
2896 	int error;
2897 	offset_t off;
2898 
2899 	error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
2900 	if (error)
2901 		return (error);
2902 
2903 	if (zc->zc_value[0] != '\0') {
2904 		char *buf;
2905 		char *cp;
2906 
2907 		buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2908 		(void) strncpy(buf, zc->zc_name, MAXPATHLEN);
2909 		cp = strchr(buf, '@');
2910 		if (cp)
2911 			*(cp+1) = 0;
2912 		(void) strncat(buf, zc->zc_value, MAXPATHLEN);
2913 		error = dmu_objset_hold(buf, FTAG, &fromsnap);
2914 		kmem_free(buf, MAXPATHLEN);
2915 		if (error) {
2916 			dmu_objset_rele(tosnap, FTAG);
2917 			return (error);
2918 		}
2919 	}
2920 
2921 	fp = getf(zc->zc_cookie);
2922 	if (fp == NULL) {
2923 		dmu_objset_rele(tosnap, FTAG);
2924 		if (fromsnap)
2925 			dmu_objset_rele(fromsnap, FTAG);
2926 		return (EBADF);
2927 	}
2928 
2929 	off = fp->f_offset;
2930 	error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off);
2931 
2932 	if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
2933 		fp->f_offset = off;
2934 	releasef(zc->zc_cookie);
2935 	if (fromsnap)
2936 		dmu_objset_rele(fromsnap, FTAG);
2937 	dmu_objset_rele(tosnap, FTAG);
2938 	return (error);
2939 }
2940 
2941 static int
2942 zfs_ioc_inject_fault(zfs_cmd_t *zc)
2943 {
2944 	int id, error;
2945 
2946 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
2947 	    &zc->zc_inject_record);
2948 
2949 	if (error == 0)
2950 		zc->zc_guid = (uint64_t)id;
2951 
2952 	return (error);
2953 }
2954 
2955 static int
2956 zfs_ioc_clear_fault(zfs_cmd_t *zc)
2957 {
2958 	return (zio_clear_fault((int)zc->zc_guid));
2959 }
2960 
2961 static int
2962 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
2963 {
2964 	int id = (int)zc->zc_guid;
2965 	int error;
2966 
2967 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
2968 	    &zc->zc_inject_record);
2969 
2970 	zc->zc_guid = id;
2971 
2972 	return (error);
2973 }
2974 
2975 static int
2976 zfs_ioc_error_log(zfs_cmd_t *zc)
2977 {
2978 	spa_t *spa;
2979 	int error;
2980 	size_t count = (size_t)zc->zc_nvlist_dst_size;
2981 
2982 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2983 		return (error);
2984 
2985 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
2986 	    &count);
2987 	if (error == 0)
2988 		zc->zc_nvlist_dst_size = count;
2989 	else
2990 		zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
2991 
2992 	spa_close(spa, FTAG);
2993 
2994 	return (error);
2995 }
2996 
2997 static int
2998 zfs_ioc_clear(zfs_cmd_t *zc)
2999 {
3000 	spa_t *spa;
3001 	vdev_t *vd;
3002 	int error;
3003 
3004 	/*
3005 	 * On zpool clear we also fix up missing slogs
3006 	 */
3007 	mutex_enter(&spa_namespace_lock);
3008 	spa = spa_lookup(zc->zc_name);
3009 	if (spa == NULL) {
3010 		mutex_exit(&spa_namespace_lock);
3011 		return (EIO);
3012 	}
3013 	if (spa->spa_log_state == SPA_LOG_MISSING) {
3014 		/* we need to let spa_open/spa_load clear the chains */
3015 		spa->spa_log_state = SPA_LOG_CLEAR;
3016 	}
3017 	mutex_exit(&spa_namespace_lock);
3018 
3019 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
3020 		return (error);
3021 
3022 	spa_vdev_state_enter(spa);
3023 
3024 	if (zc->zc_guid == 0) {
3025 		vd = NULL;
3026 	} else {
3027 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
3028 		if (vd == NULL) {
3029 			(void) spa_vdev_state_exit(spa, NULL, ENODEV);
3030 			spa_close(spa, FTAG);
3031 			return (ENODEV);
3032 		}
3033 	}
3034 
3035 	vdev_clear(spa, vd);
3036 
3037 	(void) spa_vdev_state_exit(spa, NULL, 0);
3038 
3039 	/*
3040 	 * Resume any suspended I/Os.
3041 	 */
3042 	if (zio_resume(spa) != 0)
3043 		error = EIO;
3044 
3045 	spa_close(spa, FTAG);
3046 
3047 	return (error);
3048 }
3049 
3050 /*
3051  * inputs:
3052  * zc_name	name of filesystem
3053  * zc_value	name of origin snapshot
3054  *
3055  * outputs:	none
3056  */
3057 static int
3058 zfs_ioc_promote(zfs_cmd_t *zc)
3059 {
3060 	char *cp;
3061 
3062 	/*
3063 	 * We don't need to unmount *all* the origin fs's snapshots, but
3064 	 * it's easier.
3065 	 */
3066 	cp = strchr(zc->zc_value, '@');
3067 	if (cp)
3068 		*cp = '\0';
3069 	(void) dmu_objset_find(zc->zc_value,
3070 	    zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
3071 	return (dsl_dataset_promote(zc->zc_name));
3072 }
3073 
3074 /*
3075  * Retrieve a single {user|group}{used|quota}@... property.
3076  *
3077  * inputs:
3078  * zc_name	name of filesystem
3079  * zc_objset_type zfs_userquota_prop_t
3080  * zc_value	domain name (eg. "S-1-234-567-89")
3081  * zc_guid	RID/UID/GID
3082  *
3083  * outputs:
3084  * zc_cookie	property value
3085  */
3086 static int
3087 zfs_ioc_userspace_one(zfs_cmd_t *zc)
3088 {
3089 	zfsvfs_t *zfsvfs;
3090 	int error;
3091 
3092 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
3093 		return (EINVAL);
3094 
3095 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs);
3096 	if (error)
3097 		return (error);
3098 
3099 	error = zfs_userspace_one(zfsvfs,
3100 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
3101 	zfsvfs_rele(zfsvfs, FTAG);
3102 
3103 	return (error);
3104 }
3105 
3106 /*
3107  * inputs:
3108  * zc_name		name of filesystem
3109  * zc_cookie		zap cursor
3110  * zc_objset_type	zfs_userquota_prop_t
3111  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
3112  *
3113  * outputs:
3114  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
3115  * zc_cookie	zap cursor
3116  */
3117 static int
3118 zfs_ioc_userspace_many(zfs_cmd_t *zc)
3119 {
3120 	zfsvfs_t *zfsvfs;
3121 	int error;
3122 
3123 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs);
3124 	if (error)
3125 		return (error);
3126 
3127 	int bufsize = zc->zc_nvlist_dst_size;
3128 	void *buf = kmem_alloc(bufsize, KM_SLEEP);
3129 
3130 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
3131 	    buf, &zc->zc_nvlist_dst_size);
3132 
3133 	if (error == 0) {
3134 		error = xcopyout(buf,
3135 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
3136 		    zc->zc_nvlist_dst_size);
3137 	}
3138 	kmem_free(buf, bufsize);
3139 	zfsvfs_rele(zfsvfs, FTAG);
3140 
3141 	return (error);
3142 }
3143 
3144 /*
3145  * inputs:
3146  * zc_name		name of filesystem
3147  *
3148  * outputs:
3149  * none
3150  */
3151 static int
3152 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
3153 {
3154 	objset_t *os;
3155 	int error;
3156 	zfsvfs_t *zfsvfs;
3157 
3158 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
3159 		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
3160 			/*
3161 			 * If userused is not enabled, it may be because the
3162 			 * objset needs to be closed & reopened (to grow the
3163 			 * objset_phys_t).  Suspend/resume the fs will do that.
3164 			 */
3165 			error = zfs_suspend_fs(zfsvfs);
3166 			if (error == 0)
3167 				error = zfs_resume_fs(zfsvfs, zc->zc_name);
3168 		}
3169 		if (error == 0)
3170 			error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
3171 		VFS_RELE(zfsvfs->z_vfs);
3172 	} else {
3173 		/* XXX kind of reading contents without owning */
3174 		error = dmu_objset_hold(zc->zc_name, FTAG, &os);
3175 		if (error)
3176 			return (error);
3177 
3178 		error = dmu_objset_userspace_upgrade(os);
3179 		dmu_objset_rele(os, FTAG);
3180 	}
3181 
3182 	return (error);
3183 }
3184 
3185 /*
3186  * We don't want to have a hard dependency
3187  * against some special symbols in sharefs
3188  * nfs, and smbsrv.  Determine them if needed when
3189  * the first file system is shared.
3190  * Neither sharefs, nfs or smbsrv are unloadable modules.
3191  */
3192 int (*znfsexport_fs)(void *arg);
3193 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
3194 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
3195 
3196 int zfs_nfsshare_inited;
3197 int zfs_smbshare_inited;
3198 
3199 ddi_modhandle_t nfs_mod;
3200 ddi_modhandle_t sharefs_mod;
3201 ddi_modhandle_t smbsrv_mod;
3202 kmutex_t zfs_share_lock;
3203 
3204 static int
3205 zfs_init_sharefs()
3206 {
3207 	int error;
3208 
3209 	ASSERT(MUTEX_HELD(&zfs_share_lock));
3210 	/* Both NFS and SMB shares also require sharetab support. */
3211 	if (sharefs_mod == NULL && ((sharefs_mod =
3212 	    ddi_modopen("fs/sharefs",
3213 	    KRTLD_MODE_FIRST, &error)) == NULL)) {
3214 		return (ENOSYS);
3215 	}
3216 	if (zshare_fs == NULL && ((zshare_fs =
3217 	    (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
3218 	    ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
3219 		return (ENOSYS);
3220 	}
3221 	return (0);
3222 }
3223 
3224 static int
3225 zfs_ioc_share(zfs_cmd_t *zc)
3226 {
3227 	int error;
3228 	int opcode;
3229 
3230 	switch (zc->zc_share.z_sharetype) {
3231 	case ZFS_SHARE_NFS:
3232 	case ZFS_UNSHARE_NFS:
3233 		if (zfs_nfsshare_inited == 0) {
3234 			mutex_enter(&zfs_share_lock);
3235 			if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
3236 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
3237 				mutex_exit(&zfs_share_lock);
3238 				return (ENOSYS);
3239 			}
3240 			if (znfsexport_fs == NULL &&
3241 			    ((znfsexport_fs = (int (*)(void *))
3242 			    ddi_modsym(nfs_mod,
3243 			    "nfs_export", &error)) == NULL)) {
3244 				mutex_exit(&zfs_share_lock);
3245 				return (ENOSYS);
3246 			}
3247 			error = zfs_init_sharefs();
3248 			if (error) {
3249 				mutex_exit(&zfs_share_lock);
3250 				return (ENOSYS);
3251 			}
3252 			zfs_nfsshare_inited = 1;
3253 			mutex_exit(&zfs_share_lock);
3254 		}
3255 		break;
3256 	case ZFS_SHARE_SMB:
3257 	case ZFS_UNSHARE_SMB:
3258 		if (zfs_smbshare_inited == 0) {
3259 			mutex_enter(&zfs_share_lock);
3260 			if (smbsrv_mod == NULL && ((smbsrv_mod =
3261 			    ddi_modopen("drv/smbsrv",
3262 			    KRTLD_MODE_FIRST, &error)) == NULL)) {
3263 				mutex_exit(&zfs_share_lock);
3264 				return (ENOSYS);
3265 			}
3266 			if (zsmbexport_fs == NULL && ((zsmbexport_fs =
3267 			    (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
3268 			    "smb_server_share", &error)) == NULL)) {
3269 				mutex_exit(&zfs_share_lock);
3270 				return (ENOSYS);
3271 			}
3272 			error = zfs_init_sharefs();
3273 			if (error) {
3274 				mutex_exit(&zfs_share_lock);
3275 				return (ENOSYS);
3276 			}
3277 			zfs_smbshare_inited = 1;
3278 			mutex_exit(&zfs_share_lock);
3279 		}
3280 		break;
3281 	default:
3282 		return (EINVAL);
3283 	}
3284 
3285 	switch (zc->zc_share.z_sharetype) {
3286 	case ZFS_SHARE_NFS:
3287 	case ZFS_UNSHARE_NFS:
3288 		if (error =
3289 		    znfsexport_fs((void *)
3290 		    (uintptr_t)zc->zc_share.z_exportdata))
3291 			return (error);
3292 		break;
3293 	case ZFS_SHARE_SMB:
3294 	case ZFS_UNSHARE_SMB:
3295 		if (error = zsmbexport_fs((void *)
3296 		    (uintptr_t)zc->zc_share.z_exportdata,
3297 		    zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
3298 		    B_TRUE: B_FALSE)) {
3299 			return (error);
3300 		}
3301 		break;
3302 	}
3303 
3304 	opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
3305 	    zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
3306 	    SHAREFS_ADD : SHAREFS_REMOVE;
3307 
3308 	/*
3309 	 * Add or remove share from sharetab
3310 	 */
3311 	error = zshare_fs(opcode,
3312 	    (void *)(uintptr_t)zc->zc_share.z_sharedata,
3313 	    zc->zc_share.z_sharemax);
3314 
3315 	return (error);
3316 
3317 }
3318 
3319 ace_t full_access[] = {
3320 	{(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
3321 };
3322 
3323 /*
3324  * Remove all ACL files in shares dir
3325  */
3326 static int
3327 zfs_smb_acl_purge(znode_t *dzp)
3328 {
3329 	zap_cursor_t	zc;
3330 	zap_attribute_t	zap;
3331 	zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
3332 	int error;
3333 
3334 	for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
3335 	    (error = zap_cursor_retrieve(&zc, &zap)) == 0;
3336 	    zap_cursor_advance(&zc)) {
3337 		if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
3338 		    NULL, 0)) != 0)
3339 			break;
3340 	}
3341 	zap_cursor_fini(&zc);
3342 	return (error);
3343 }
3344 
3345 static int
3346 zfs_ioc_smb_acl(zfs_cmd_t *zc)
3347 {
3348 	vnode_t *vp;
3349 	znode_t *dzp;
3350 	vnode_t *resourcevp = NULL;
3351 	znode_t *sharedir;
3352 	zfsvfs_t *zfsvfs;
3353 	nvlist_t *nvlist;
3354 	char *src, *target;
3355 	vattr_t vattr;
3356 	vsecattr_t vsec;
3357 	int error = 0;
3358 
3359 	if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
3360 	    NO_FOLLOW, NULL, &vp)) != 0)
3361 		return (error);
3362 
3363 	/* Now make sure mntpnt and dataset are ZFS */
3364 
3365 	if (vp->v_vfsp->vfs_fstype != zfsfstype ||
3366 	    (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
3367 	    zc->zc_name) != 0)) {
3368 		VN_RELE(vp);
3369 		return (EINVAL);
3370 	}
3371 
3372 	dzp = VTOZ(vp);
3373 	zfsvfs = dzp->z_zfsvfs;
3374 	ZFS_ENTER(zfsvfs);
3375 
3376 	/*
3377 	 * Create share dir if its missing.
3378 	 */
3379 	mutex_enter(&zfsvfs->z_lock);
3380 	if (zfsvfs->z_shares_dir == 0) {
3381 		dmu_tx_t *tx;
3382 
3383 		tx = dmu_tx_create(zfsvfs->z_os);
3384 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
3385 		    ZFS_SHARES_DIR);
3386 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
3387 		error = dmu_tx_assign(tx, TXG_WAIT);
3388 		if (error) {
3389 			dmu_tx_abort(tx);
3390 		} else {
3391 			error = zfs_create_share_dir(zfsvfs, tx);
3392 			dmu_tx_commit(tx);
3393 		}
3394 		if (error) {
3395 			mutex_exit(&zfsvfs->z_lock);
3396 			VN_RELE(vp);
3397 			ZFS_EXIT(zfsvfs);
3398 			return (error);
3399 		}
3400 	}
3401 	mutex_exit(&zfsvfs->z_lock);
3402 
3403 	ASSERT(zfsvfs->z_shares_dir);
3404 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
3405 		VN_RELE(vp);
3406 		ZFS_EXIT(zfsvfs);
3407 		return (error);
3408 	}
3409 
3410 	switch (zc->zc_cookie) {
3411 	case ZFS_SMB_ACL_ADD:
3412 		vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
3413 		vattr.va_type = VREG;
3414 		vattr.va_mode = S_IFREG|0777;
3415 		vattr.va_uid = 0;
3416 		vattr.va_gid = 0;
3417 
3418 		vsec.vsa_mask = VSA_ACE;
3419 		vsec.vsa_aclentp = &full_access;
3420 		vsec.vsa_aclentsz = sizeof (full_access);
3421 		vsec.vsa_aclcnt = 1;
3422 
3423 		error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
3424 		    &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
3425 		if (resourcevp)
3426 			VN_RELE(resourcevp);
3427 		break;
3428 
3429 	case ZFS_SMB_ACL_REMOVE:
3430 		error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
3431 		    NULL, 0);
3432 		break;
3433 
3434 	case ZFS_SMB_ACL_RENAME:
3435 		if ((error = get_nvlist(zc->zc_nvlist_src,
3436 		    zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
3437 			VN_RELE(vp);
3438 			ZFS_EXIT(zfsvfs);
3439 			return (error);
3440 		}
3441 		if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
3442 		    nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
3443 		    &target)) {
3444 			VN_RELE(vp);
3445 			VN_RELE(ZTOV(sharedir));
3446 			ZFS_EXIT(zfsvfs);
3447 			return (error);
3448 		}
3449 		error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
3450 		    kcred, NULL, 0);
3451 		nvlist_free(nvlist);
3452 		break;
3453 
3454 	case ZFS_SMB_ACL_PURGE:
3455 		error = zfs_smb_acl_purge(sharedir);
3456 		break;
3457 
3458 	default:
3459 		error = EINVAL;
3460 		break;
3461 	}
3462 
3463 	VN_RELE(vp);
3464 	VN_RELE(ZTOV(sharedir));
3465 
3466 	ZFS_EXIT(zfsvfs);
3467 
3468 	return (error);
3469 }
3470 
3471 /*
3472  * inputs:
3473  * zc_name	name of filesystem
3474  * zc_value	short name of snap
3475  * zc_string	user-supplied tag for this reference
3476  * zc_cookie	recursive flag
3477  * zc_temphold	set if hold is temporary
3478  *
3479  * outputs:		none
3480  */
3481 static int
3482 zfs_ioc_hold(zfs_cmd_t *zc)
3483 {
3484 	boolean_t recursive = zc->zc_cookie;
3485 
3486 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
3487 		return (EINVAL);
3488 
3489 	return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
3490 	    zc->zc_string, recursive, zc->zc_temphold));
3491 }
3492 
3493 /*
3494  * inputs:
3495  * zc_name	name of dataset from which we're releasing a user reference
3496  * zc_value	short name of snap
3497  * zc_string	user-supplied tag for this reference
3498  * zc_cookie	recursive flag
3499  *
3500  * outputs:		none
3501  */
3502 static int
3503 zfs_ioc_release(zfs_cmd_t *zc)
3504 {
3505 	boolean_t recursive = zc->zc_cookie;
3506 
3507 	if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
3508 		return (EINVAL);
3509 
3510 	return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
3511 	    zc->zc_string, recursive));
3512 }
3513 
3514 /*
3515  * inputs:
3516  * zc_name		name of filesystem
3517  *
3518  * outputs:
3519  * zc_nvlist_src{_size}	nvlist of snapshot holds
3520  */
3521 static int
3522 zfs_ioc_get_holds(zfs_cmd_t *zc)
3523 {
3524 	nvlist_t *nvp;
3525 	int error;
3526 
3527 	if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
3528 		error = put_nvlist(zc, nvp);
3529 		nvlist_free(nvp);
3530 	}
3531 
3532 	return (error);
3533 }
3534 
3535 /*
3536  * pool create, destroy, and export don't log the history as part of
3537  * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
3538  * do the logging of those commands.
3539  */
3540 static zfs_ioc_vec_t zfs_ioc_vec[] = {
3541 	{ zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE,
3542 	    B_FALSE },
3543 	{ zfs_ioc_pool_destroy,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
3544 	    B_FALSE },
3545 	{ zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE,
3546 	    B_FALSE },
3547 	{ zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE,
3548 	    B_FALSE },
3549 	{ zfs_ioc_pool_configs,	zfs_secpolicy_none, NO_NAME, B_FALSE,
3550 	    B_FALSE },
3551 	{ zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE,
3552 	    B_FALSE },
3553 	{ zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
3554 	    B_FALSE },
3555 	{ zfs_ioc_pool_scrub, zfs_secpolicy_config, POOL_NAME, B_TRUE,
3556 	    B_TRUE },
3557 	{ zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
3558 	    B_FALSE },
3559 	{ zfs_ioc_pool_upgrade,	zfs_secpolicy_config, POOL_NAME, B_TRUE,
3560 	    B_TRUE },
3561 	{ zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE,
3562 	    B_FALSE },
3563 	{ zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE,
3564 	    B_TRUE },
3565 	{ zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE,
3566 	    B_TRUE },
3567 	{ zfs_ioc_vdev_set_state, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
3568 	    B_FALSE },
3569 	{ zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
3570 	    B_TRUE },
3571 	{ zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
3572 	    B_TRUE },
3573 	{ zfs_ioc_vdev_setpath,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
3574 	    B_TRUE },
3575 	{ zfs_ioc_vdev_setfru,	zfs_secpolicy_config, POOL_NAME, B_FALSE,
3576 	    B_TRUE },
3577 	{ zfs_ioc_objset_stats,	zfs_secpolicy_read, DATASET_NAME, B_FALSE,
3578 	    B_FALSE },
3579 	{ zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
3580 	    B_FALSE },
3581 	{ zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
3582 	    B_FALSE },
3583 	{ zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
3584 	    B_FALSE },
3585 	{ zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE, B_TRUE },
3586 	{ zfs_ioc_create_minor,	zfs_secpolicy_minor, DATASET_NAME, B_FALSE,
3587 	    B_FALSE },
3588 	{ zfs_ioc_remove_minor,	zfs_secpolicy_minor, DATASET_NAME, B_FALSE,
3589 	    B_FALSE },
3590 	{ zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE, B_TRUE },
3591 	{ zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE,
3592 	    B_TRUE},
3593 	{ zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE,
3594 	    B_TRUE },
3595 	{ zfs_ioc_rename, zfs_secpolicy_rename,	DATASET_NAME, B_TRUE, B_TRUE },
3596 	{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE, B_TRUE },
3597 	{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE, B_FALSE },
3598 	{ zfs_ioc_inject_fault,	zfs_secpolicy_inject, NO_NAME, B_FALSE,
3599 	    B_FALSE },
3600 	{ zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
3601 	    B_FALSE },
3602 	{ zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE,
3603 	    B_FALSE },
3604 	{ zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE,
3605 	    B_FALSE },
3606 	{ zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE, B_FALSE },
3607 	{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
3608 	    B_TRUE },
3609 	{ zfs_ioc_destroy_snaps, zfs_secpolicy_destroy,	DATASET_NAME, B_TRUE,
3610 	    B_TRUE },
3611 	{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
3612 	    B_TRUE },
3613 	{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_config, POOL_NAME, B_FALSE,
3614 	    B_FALSE },
3615 	{ zfs_ioc_obj_to_path, zfs_secpolicy_config, DATASET_NAME, B_FALSE,
3616 	    B_TRUE },
3617 	{ zfs_ioc_pool_set_props, zfs_secpolicy_config,	POOL_NAME, B_TRUE,
3618 	    B_TRUE },
3619 	{ zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
3620 	    B_FALSE },
3621 	{ zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE,
3622 	    B_TRUE },
3623 	{ zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
3624 	    B_FALSE },
3625 	{ zfs_ioc_iscsi_perm_check, zfs_secpolicy_iscsi, DATASET_NAME, B_FALSE,
3626 	    B_FALSE },
3627 	{ zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE, B_FALSE },
3628 	{ zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
3629 	    B_TRUE },
3630 	{ zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
3631 	    B_FALSE },
3632 	{ zfs_ioc_userspace_one, zfs_secpolicy_userspace_one,
3633 	    DATASET_NAME, B_FALSE, B_FALSE },
3634 	{ zfs_ioc_userspace_many, zfs_secpolicy_userspace_many,
3635 	    DATASET_NAME, B_FALSE, B_FALSE },
3636 	{ zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
3637 	    DATASET_NAME, B_FALSE, B_TRUE },
3638 	{ zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE, B_TRUE },
3639 	{ zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
3640 	    B_TRUE },
3641 	{ zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
3642 	    B_TRUE }
3643 };
3644 
3645 int
3646 pool_status_check(const char *name, zfs_ioc_namecheck_t type)
3647 {
3648 	spa_t *spa;
3649 	int error;
3650 
3651 	ASSERT(type == POOL_NAME || type == DATASET_NAME);
3652 
3653 	error = spa_open(name, &spa, FTAG);
3654 	if (error == 0) {
3655 		if (spa_suspended(spa))
3656 			error = EAGAIN;
3657 		spa_close(spa, FTAG);
3658 	}
3659 	return (error);
3660 }
3661 
3662 static int
3663 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
3664 {
3665 	zfs_cmd_t *zc;
3666 	uint_t vec;
3667 	int error, rc;
3668 
3669 	if (getminor(dev) != 0)
3670 		return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
3671 
3672 	vec = cmd - ZFS_IOC;
3673 	ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
3674 
3675 	if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
3676 		return (EINVAL);
3677 
3678 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
3679 
3680 	error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
3681 
3682 	if (error == 0)
3683 		error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr);
3684 
3685 	/*
3686 	 * Ensure that all pool/dataset names are valid before we pass down to
3687 	 * the lower layers.
3688 	 */
3689 	if (error == 0) {
3690 		zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
3691 		zc->zc_iflags = flag & FKIOCTL;
3692 		switch (zfs_ioc_vec[vec].zvec_namecheck) {
3693 		case POOL_NAME:
3694 			if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
3695 				error = EINVAL;
3696 			if (zfs_ioc_vec[vec].zvec_pool_check)
3697 				error = pool_status_check(zc->zc_name,
3698 				    zfs_ioc_vec[vec].zvec_namecheck);
3699 			break;
3700 
3701 		case DATASET_NAME:
3702 			if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
3703 				error = EINVAL;
3704 			if (zfs_ioc_vec[vec].zvec_pool_check)
3705 				error = pool_status_check(zc->zc_name,
3706 				    zfs_ioc_vec[vec].zvec_namecheck);
3707 			break;
3708 
3709 		case NO_NAME:
3710 			break;
3711 		}
3712 	}
3713 
3714 	if (error == 0)
3715 		error = zfs_ioc_vec[vec].zvec_func(zc);
3716 
3717 	rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
3718 	if (error == 0) {
3719 		error = rc;
3720 		if (zfs_ioc_vec[vec].zvec_his_log)
3721 			zfs_log_history(zc);
3722 	}
3723 
3724 	kmem_free(zc, sizeof (zfs_cmd_t));
3725 	return (error);
3726 }
3727 
3728 static int
3729 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
3730 {
3731 	if (cmd != DDI_ATTACH)
3732 		return (DDI_FAILURE);
3733 
3734 	if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
3735 	    DDI_PSEUDO, 0) == DDI_FAILURE)
3736 		return (DDI_FAILURE);
3737 
3738 	zfs_dip = dip;
3739 
3740 	ddi_report_dev(dip);
3741 
3742 	return (DDI_SUCCESS);
3743 }
3744 
3745 static int
3746 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
3747 {
3748 	if (spa_busy() || zfs_busy() || zvol_busy())
3749 		return (DDI_FAILURE);
3750 
3751 	if (cmd != DDI_DETACH)
3752 		return (DDI_FAILURE);
3753 
3754 	zfs_dip = NULL;
3755 
3756 	ddi_prop_remove_all(dip);
3757 	ddi_remove_minor_node(dip, NULL);
3758 
3759 	return (DDI_SUCCESS);
3760 }
3761 
3762 /*ARGSUSED*/
3763 static int
3764 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
3765 {
3766 	switch (infocmd) {
3767 	case DDI_INFO_DEVT2DEVINFO:
3768 		*result = zfs_dip;
3769 		return (DDI_SUCCESS);
3770 
3771 	case DDI_INFO_DEVT2INSTANCE:
3772 		*result = (void *)0;
3773 		return (DDI_SUCCESS);
3774 	}
3775 
3776 	return (DDI_FAILURE);
3777 }
3778 
3779 /*
3780  * OK, so this is a little weird.
3781  *
3782  * /dev/zfs is the control node, i.e. minor 0.
3783  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
3784  *
3785  * /dev/zfs has basically nothing to do except serve up ioctls,
3786  * so most of the standard driver entry points are in zvol.c.
3787  */
3788 static struct cb_ops zfs_cb_ops = {
3789 	zvol_open,	/* open */
3790 	zvol_close,	/* close */
3791 	zvol_strategy,	/* strategy */
3792 	nodev,		/* print */
3793 	zvol_dump,	/* dump */
3794 	zvol_read,	/* read */
3795 	zvol_write,	/* write */
3796 	zfsdev_ioctl,	/* ioctl */
3797 	nodev,		/* devmap */
3798 	nodev,		/* mmap */
3799 	nodev,		/* segmap */
3800 	nochpoll,	/* poll */
3801 	ddi_prop_op,	/* prop_op */
3802 	NULL,		/* streamtab */
3803 	D_NEW | D_MP | D_64BIT,		/* Driver compatibility flag */
3804 	CB_REV,		/* version */
3805 	nodev,		/* async read */
3806 	nodev,		/* async write */
3807 };
3808 
3809 static struct dev_ops zfs_dev_ops = {
3810 	DEVO_REV,	/* version */
3811 	0,		/* refcnt */
3812 	zfs_info,	/* info */
3813 	nulldev,	/* identify */
3814 	nulldev,	/* probe */
3815 	zfs_attach,	/* attach */
3816 	zfs_detach,	/* detach */
3817 	nodev,		/* reset */
3818 	&zfs_cb_ops,	/* driver operations */
3819 	NULL,		/* no bus operations */
3820 	NULL,		/* power */
3821 	ddi_quiesce_not_needed,	/* quiesce */
3822 };
3823 
3824 static struct modldrv zfs_modldrv = {
3825 	&mod_driverops,
3826 	"ZFS storage pool",
3827 	&zfs_dev_ops
3828 };
3829 
3830 static struct modlinkage modlinkage = {
3831 	MODREV_1,
3832 	(void *)&zfs_modlfs,
3833 	(void *)&zfs_modldrv,
3834 	NULL
3835 };
3836 
3837 
3838 uint_t zfs_fsyncer_key;
3839 extern uint_t rrw_tsd_key;
3840 
3841 int
3842 _init(void)
3843 {
3844 	int error;
3845 
3846 	spa_init(FREAD | FWRITE);
3847 	zfs_init();
3848 	zvol_init();
3849 
3850 	if ((error = mod_install(&modlinkage)) != 0) {
3851 		zvol_fini();
3852 		zfs_fini();
3853 		spa_fini();
3854 		return (error);
3855 	}
3856 
3857 	tsd_create(&zfs_fsyncer_key, NULL);
3858 	tsd_create(&rrw_tsd_key, NULL);
3859 
3860 	error = ldi_ident_from_mod(&modlinkage, &zfs_li);
3861 	ASSERT(error == 0);
3862 	mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
3863 
3864 	return (0);
3865 }
3866 
3867 int
3868 _fini(void)
3869 {
3870 	int error;
3871 
3872 	if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
3873 		return (EBUSY);
3874 
3875 	if ((error = mod_remove(&modlinkage)) != 0)
3876 		return (error);
3877 
3878 	zvol_fini();
3879 	zfs_fini();
3880 	spa_fini();
3881 	if (zfs_nfsshare_inited)
3882 		(void) ddi_modclose(nfs_mod);
3883 	if (zfs_smbshare_inited)
3884 		(void) ddi_modclose(smbsrv_mod);
3885 	if (zfs_nfsshare_inited || zfs_smbshare_inited)
3886 		(void) ddi_modclose(sharefs_mod);
3887 
3888 	tsd_destroy(&zfs_fsyncer_key);
3889 	ldi_ident_release(zfs_li);
3890 	zfs_li = NULL;
3891 	mutex_destroy(&zfs_share_lock);
3892 
3893 	return (error);
3894 }
3895 
3896 int
3897 _info(struct modinfo *modinfop)
3898 {
3899 	return (mod_info(&modlinkage, modinfop));
3900 }
3901