xref: /titanic_50/usr/src/lib/libzfs/common/libzfs_dataset.c (revision 1d03c31e0733adea0edef54f0d5d2ea9639ecd2a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <assert.h>
30 #include <ctype.h>
31 #include <errno.h>
32 #include <libdevinfo.h>
33 #include <libintl.h>
34 #include <math.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <strings.h>
38 #include <unistd.h>
39 #include <zone.h>
40 #include <fcntl.h>
41 #include <sys/mntent.h>
42 #include <sys/mnttab.h>
43 #include <sys/mount.h>
44 #include <sys/avl.h>
45 #include <priv.h>
46 #include <pwd.h>
47 #include <grp.h>
48 #include <stddef.h>
49 #include <ucred.h>
50 
51 #include <sys/spa.h>
52 #include <sys/zio.h>
53 #include <sys/zap.h>
54 #include <libzfs.h>
55 
56 #include "zfs_namecheck.h"
57 #include "zfs_prop.h"
58 #include "libzfs_impl.h"
59 #include "zfs_deleg.h"
60 
61 static int create_parents(libzfs_handle_t *, char *, int);
62 static int zvol_create_link_common(libzfs_handle_t *, const char *, int);
63 
64 /*
65  * Given a single type (not a mask of types), return the type in a human
66  * readable form.
67  */
68 const char *
69 zfs_type_to_name(zfs_type_t type)
70 {
71 	switch (type) {
72 	case ZFS_TYPE_FILESYSTEM:
73 		return (dgettext(TEXT_DOMAIN, "filesystem"));
74 	case ZFS_TYPE_SNAPSHOT:
75 		return (dgettext(TEXT_DOMAIN, "snapshot"));
76 	case ZFS_TYPE_VOLUME:
77 		return (dgettext(TEXT_DOMAIN, "volume"));
78 	}
79 
80 	return (NULL);
81 }
82 
83 /*
84  * Given a path and mask of ZFS types, return a string describing this dataset.
85  * This is used when we fail to open a dataset and we cannot get an exact type.
86  * We guess what the type would have been based on the path and the mask of
87  * acceptable types.
88  */
89 static const char *
90 path_to_str(const char *path, int types)
91 {
92 	/*
93 	 * When given a single type, always report the exact type.
94 	 */
95 	if (types == ZFS_TYPE_SNAPSHOT)
96 		return (dgettext(TEXT_DOMAIN, "snapshot"));
97 	if (types == ZFS_TYPE_FILESYSTEM)
98 		return (dgettext(TEXT_DOMAIN, "filesystem"));
99 	if (types == ZFS_TYPE_VOLUME)
100 		return (dgettext(TEXT_DOMAIN, "volume"));
101 
102 	/*
103 	 * The user is requesting more than one type of dataset.  If this is the
104 	 * case, consult the path itself.  If we're looking for a snapshot, and
105 	 * a '@' is found, then report it as "snapshot".  Otherwise, remove the
106 	 * snapshot attribute and try again.
107 	 */
108 	if (types & ZFS_TYPE_SNAPSHOT) {
109 		if (strchr(path, '@') != NULL)
110 			return (dgettext(TEXT_DOMAIN, "snapshot"));
111 		return (path_to_str(path, types & ~ZFS_TYPE_SNAPSHOT));
112 	}
113 
114 
115 	/*
116 	 * The user has requested either filesystems or volumes.
117 	 * We have no way of knowing a priori what type this would be, so always
118 	 * report it as "filesystem" or "volume", our two primitive types.
119 	 */
120 	if (types & ZFS_TYPE_FILESYSTEM)
121 		return (dgettext(TEXT_DOMAIN, "filesystem"));
122 
123 	assert(types & ZFS_TYPE_VOLUME);
124 	return (dgettext(TEXT_DOMAIN, "volume"));
125 }
126 
127 /*
128  * Validate a ZFS path.  This is used even before trying to open the dataset, to
129  * provide a more meaningful error message.  We place a more useful message in
130  * 'buf' detailing exactly why the name was not valid.
131  */
132 static int
133 zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type)
134 {
135 	namecheck_err_t why;
136 	char what;
137 
138 	if (dataset_namecheck(path, &why, &what) != 0) {
139 		if (hdl != NULL) {
140 			switch (why) {
141 			case NAME_ERR_TOOLONG:
142 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
143 				    "name is too long"));
144 				break;
145 
146 			case NAME_ERR_LEADING_SLASH:
147 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
148 				    "leading slash in name"));
149 				break;
150 
151 			case NAME_ERR_EMPTY_COMPONENT:
152 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
153 				    "empty component in name"));
154 				break;
155 
156 			case NAME_ERR_TRAILING_SLASH:
157 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
158 				    "trailing slash in name"));
159 				break;
160 
161 			case NAME_ERR_INVALCHAR:
162 				zfs_error_aux(hdl,
163 				    dgettext(TEXT_DOMAIN, "invalid character "
164 				    "'%c' in name"), what);
165 				break;
166 
167 			case NAME_ERR_MULTIPLE_AT:
168 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
169 				    "multiple '@' delimiters in name"));
170 				break;
171 
172 			case NAME_ERR_NOLETTER:
173 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
174 				    "pool doesn't begin with a letter"));
175 				break;
176 
177 			case NAME_ERR_RESERVED:
178 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
179 				    "name is reserved"));
180 				break;
181 
182 			case NAME_ERR_DISKLIKE:
183 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
184 				    "reserved disk name"));
185 				break;
186 			}
187 		}
188 
189 		return (0);
190 	}
191 
192 	if (!(type & ZFS_TYPE_SNAPSHOT) && strchr(path, '@') != NULL) {
193 		if (hdl != NULL)
194 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
195 			    "snapshot delimiter '@' in filesystem name"));
196 		return (0);
197 	}
198 
199 	if (type == ZFS_TYPE_SNAPSHOT && strchr(path, '@') == NULL) {
200 		if (hdl != NULL)
201 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
202 			    "missing '@' delimiter in snapshot name"));
203 		return (0);
204 	}
205 
206 	return (-1);
207 }
208 
209 int
210 zfs_name_valid(const char *name, zfs_type_t type)
211 {
212 	return (zfs_validate_name(NULL, name, type));
213 }
214 
215 /*
216  * This function takes the raw DSL properties, and filters out the user-defined
217  * properties into a separate nvlist.
218  */
219 static nvlist_t *
220 process_user_props(zfs_handle_t *zhp, nvlist_t *props)
221 {
222 	libzfs_handle_t *hdl = zhp->zfs_hdl;
223 	nvpair_t *elem;
224 	nvlist_t *propval;
225 	nvlist_t *nvl;
226 
227 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
228 		(void) no_memory(hdl);
229 		return (NULL);
230 	}
231 
232 	elem = NULL;
233 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
234 		if (!zfs_prop_user(nvpair_name(elem)))
235 			continue;
236 
237 		verify(nvpair_value_nvlist(elem, &propval) == 0);
238 		if (nvlist_add_nvlist(nvl, nvpair_name(elem), propval) != 0) {
239 			nvlist_free(nvl);
240 			(void) no_memory(hdl);
241 			return (NULL);
242 		}
243 	}
244 
245 	return (nvl);
246 }
247 
248 /*
249  * Utility function to gather stats (objset and zpl) for the given object.
250  */
251 static int
252 get_stats(zfs_handle_t *zhp)
253 {
254 	zfs_cmd_t zc = { 0 };
255 	libzfs_handle_t *hdl = zhp->zfs_hdl;
256 	nvlist_t *allprops, *userprops;
257 
258 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
259 
260 	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
261 		return (-1);
262 
263 	while (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
264 		if (errno == ENOMEM) {
265 			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
266 				zcmd_free_nvlists(&zc);
267 				return (-1);
268 			}
269 		} else {
270 			zcmd_free_nvlists(&zc);
271 			return (-1);
272 		}
273 	}
274 
275 	zhp->zfs_dmustats = zc.zc_objset_stats; /* structure assignment */
276 
277 	(void) strlcpy(zhp->zfs_root, zc.zc_value, sizeof (zhp->zfs_root));
278 
279 	if (zcmd_read_dst_nvlist(hdl, &zc, &allprops) != 0) {
280 		zcmd_free_nvlists(&zc);
281 		return (-1);
282 	}
283 
284 	zcmd_free_nvlists(&zc);
285 
286 	if ((userprops = process_user_props(zhp, allprops)) == NULL) {
287 		nvlist_free(allprops);
288 		return (-1);
289 	}
290 
291 	nvlist_free(zhp->zfs_props);
292 	nvlist_free(zhp->zfs_user_props);
293 
294 	zhp->zfs_props = allprops;
295 	zhp->zfs_user_props = userprops;
296 
297 	return (0);
298 }
299 
300 /*
301  * Refresh the properties currently stored in the handle.
302  */
303 void
304 zfs_refresh_properties(zfs_handle_t *zhp)
305 {
306 	(void) get_stats(zhp);
307 }
308 
309 /*
310  * Makes a handle from the given dataset name.  Used by zfs_open() and
311  * zfs_iter_* to create child handles on the fly.
312  */
313 zfs_handle_t *
314 make_dataset_handle(libzfs_handle_t *hdl, const char *path)
315 {
316 	zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1);
317 	char *logstr;
318 
319 	if (zhp == NULL)
320 		return (NULL);
321 
322 	zhp->zfs_hdl = hdl;
323 
324 	/*
325 	 * Preserve history log string.
326 	 * any changes performed here will be
327 	 * logged as an internal event.
328 	 */
329 	logstr = zhp->zfs_hdl->libzfs_log_str;
330 	zhp->zfs_hdl->libzfs_log_str = NULL;
331 top:
332 	(void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name));
333 
334 	if (get_stats(zhp) != 0) {
335 		zhp->zfs_hdl->libzfs_log_str = logstr;
336 		free(zhp);
337 		return (NULL);
338 	}
339 
340 	if (zhp->zfs_dmustats.dds_inconsistent) {
341 		zfs_cmd_t zc = { 0 };
342 
343 		/*
344 		 * If it is dds_inconsistent, then we've caught it in
345 		 * the middle of a 'zfs receive' or 'zfs destroy', and
346 		 * it is inconsistent from the ZPL's point of view, so
347 		 * can't be mounted.  However, it could also be that we
348 		 * have crashed in the middle of one of those
349 		 * operations, in which case we need to get rid of the
350 		 * inconsistent state.  We do that by either rolling
351 		 * back to the previous snapshot (which will fail if
352 		 * there is none), or destroying the filesystem.  Note
353 		 * that if we are still in the middle of an active
354 		 * 'receive' or 'destroy', then the rollback and destroy
355 		 * will fail with EBUSY and we will drive on as usual.
356 		 */
357 
358 		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
359 
360 		if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) {
361 			(void) zvol_remove_link(hdl, zhp->zfs_name);
362 			zc.zc_objset_type = DMU_OST_ZVOL;
363 		} else {
364 			zc.zc_objset_type = DMU_OST_ZFS;
365 		}
366 
367 		/* If we can successfully roll it back, reget the stats */
368 		if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc) == 0)
369 			goto top;
370 		/*
371 		 * If we can sucessfully destroy it, pretend that it
372 		 * never existed.
373 		 */
374 		if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc) == 0) {
375 			zhp->zfs_hdl->libzfs_log_str = logstr;
376 			free(zhp);
377 			errno = ENOENT;
378 			return (NULL);
379 		}
380 	}
381 
382 	/*
383 	 * We've managed to open the dataset and gather statistics.  Determine
384 	 * the high-level type.
385 	 */
386 	if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
387 		zhp->zfs_head_type = ZFS_TYPE_VOLUME;
388 	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
389 		zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM;
390 	else
391 		abort();
392 
393 	if (zhp->zfs_dmustats.dds_is_snapshot)
394 		zhp->zfs_type = ZFS_TYPE_SNAPSHOT;
395 	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
396 		zhp->zfs_type = ZFS_TYPE_VOLUME;
397 	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
398 		zhp->zfs_type = ZFS_TYPE_FILESYSTEM;
399 	else
400 		abort();	/* we should never see any other types */
401 
402 	zhp->zfs_hdl->libzfs_log_str = logstr;
403 	return (zhp);
404 }
405 
406 /*
407  * Opens the given snapshot, filesystem, or volume.   The 'types'
408  * argument is a mask of acceptable types.  The function will print an
409  * appropriate error message and return NULL if it can't be opened.
410  */
411 zfs_handle_t *
412 zfs_open(libzfs_handle_t *hdl, const char *path, int types)
413 {
414 	zfs_handle_t *zhp;
415 	char errbuf[1024];
416 
417 	(void) snprintf(errbuf, sizeof (errbuf),
418 	    dgettext(TEXT_DOMAIN, "cannot open '%s'"), path);
419 
420 	/*
421 	 * Validate the name before we even try to open it.
422 	 */
423 	if (!zfs_validate_name(hdl, path, ZFS_TYPE_DATASET)) {
424 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
425 		    "invalid dataset name"));
426 		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
427 		return (NULL);
428 	}
429 
430 	/*
431 	 * Try to get stats for the dataset, which will tell us if it exists.
432 	 */
433 	errno = 0;
434 	if ((zhp = make_dataset_handle(hdl, path)) == NULL) {
435 		(void) zfs_standard_error(hdl, errno, errbuf);
436 		return (NULL);
437 	}
438 
439 	if (!(types & zhp->zfs_type)) {
440 		(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
441 		zfs_close(zhp);
442 		return (NULL);
443 	}
444 
445 	return (zhp);
446 }
447 
448 /*
449  * Release a ZFS handle.  Nothing to do but free the associated memory.
450  */
451 void
452 zfs_close(zfs_handle_t *zhp)
453 {
454 	if (zhp->zfs_mntopts)
455 		free(zhp->zfs_mntopts);
456 	nvlist_free(zhp->zfs_props);
457 	nvlist_free(zhp->zfs_user_props);
458 	free(zhp);
459 }
460 
461 
462 /*
463  * Given an nvlist of properties to set, validates that they are correct, and
464  * parses any numeric properties (index, boolean, etc) if they are specified as
465  * strings.
466  */
467 static nvlist_t *
468 zfs_validate_properties(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
469     uint64_t zoned, zfs_handle_t *zhp, const char *errbuf)
470 {
471 	nvpair_t *elem;
472 	uint64_t intval;
473 	char *strval;
474 	zfs_prop_t prop;
475 	nvlist_t *ret;
476 
477 	if (type == ZFS_TYPE_SNAPSHOT) {
478 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
479 		    "snapshot properties cannot be modified"));
480 		(void) zfs_error(hdl, EZFS_PROPTYPE, errbuf);
481 		return (NULL);
482 	}
483 
484 	if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) {
485 		(void) no_memory(hdl);
486 		return (NULL);
487 	}
488 
489 	elem = NULL;
490 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
491 		const char *propname = nvpair_name(elem);
492 
493 		/*
494 		 * Make sure this property is valid and applies to this type.
495 		 */
496 		if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
497 			if (!zfs_prop_user(propname)) {
498 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
499 				    "invalid property '%s'"), propname);
500 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
501 				goto error;
502 			}
503 
504 			/*
505 			 * If this is a user property, make sure it's a
506 			 * string, and that it's less than ZAP_MAXNAMELEN.
507 			 */
508 			if (nvpair_type(elem) != DATA_TYPE_STRING) {
509 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
510 				    "'%s' must be a string"), propname);
511 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
512 				goto error;
513 			}
514 
515 			if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) {
516 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
517 				    "property name '%s' is too long"),
518 				    propname);
519 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
520 				goto error;
521 			}
522 
523 			(void) nvpair_value_string(elem, &strval);
524 			if (nvlist_add_string(ret, propname, strval) != 0) {
525 				(void) no_memory(hdl);
526 				goto error;
527 			}
528 			continue;
529 		}
530 
531 		if (!zfs_prop_valid_for_type(prop, type)) {
532 			zfs_error_aux(hdl,
533 			    dgettext(TEXT_DOMAIN, "'%s' does not "
534 			    "apply to datasets of this type"), propname);
535 			(void) zfs_error(hdl, EZFS_PROPTYPE, errbuf);
536 			goto error;
537 		}
538 
539 		if (zfs_prop_readonly(prop) &&
540 		    (prop != ZFS_PROP_VOLBLOCKSIZE || zhp != NULL)) {
541 			zfs_error_aux(hdl,
542 			    dgettext(TEXT_DOMAIN, "'%s' is readonly"),
543 			    propname);
544 			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
545 			goto error;
546 		}
547 
548 		if (zprop_parse_value(hdl, elem, prop, type, ret,
549 		    &strval, &intval, errbuf) != 0)
550 			goto error;
551 
552 		/*
553 		 * Perform some additional checks for specific properties.
554 		 */
555 		switch (prop) {
556 		case ZFS_PROP_VERSION:
557 		{
558 			int version;
559 
560 			if (zhp == NULL)
561 				break;
562 			version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
563 			if (intval < version) {
564 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
565 				    "Can not downgrade; already at version %u"),
566 				    version);
567 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
568 				goto error;
569 			}
570 			break;
571 		}
572 
573 		case ZFS_PROP_RECORDSIZE:
574 		case ZFS_PROP_VOLBLOCKSIZE:
575 			/* must be power of two within SPA_{MIN,MAX}BLOCKSIZE */
576 			if (intval < SPA_MINBLOCKSIZE ||
577 			    intval > SPA_MAXBLOCKSIZE || !ISP2(intval)) {
578 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
579 				    "'%s' must be power of 2 from %u "
580 				    "to %uk"), propname,
581 				    (uint_t)SPA_MINBLOCKSIZE,
582 				    (uint_t)SPA_MAXBLOCKSIZE >> 10);
583 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
584 				goto error;
585 			}
586 			break;
587 
588 		case ZFS_PROP_SHAREISCSI:
589 			if (strcmp(strval, "off") != 0 &&
590 			    strcmp(strval, "on") != 0 &&
591 			    strcmp(strval, "type=disk") != 0) {
592 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
593 				    "'%s' must be 'on', 'off', or 'type=disk'"),
594 				    propname);
595 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
596 				goto error;
597 			}
598 
599 			break;
600 
601 		case ZFS_PROP_MOUNTPOINT:
602 		{
603 			namecheck_err_t why;
604 
605 			if (strcmp(strval, ZFS_MOUNTPOINT_NONE) == 0 ||
606 			    strcmp(strval, ZFS_MOUNTPOINT_LEGACY) == 0)
607 				break;
608 
609 			if (mountpoint_namecheck(strval, &why)) {
610 				switch (why) {
611 				case NAME_ERR_LEADING_SLASH:
612 					zfs_error_aux(hdl,
613 					    dgettext(TEXT_DOMAIN,
614 					    "'%s' must be an absolute path, "
615 					    "'none', or 'legacy'"), propname);
616 					break;
617 				case NAME_ERR_TOOLONG:
618 					zfs_error_aux(hdl,
619 					    dgettext(TEXT_DOMAIN,
620 					    "component of '%s' is too long"),
621 					    propname);
622 					break;
623 				}
624 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
625 				goto error;
626 			}
627 		}
628 
629 			/*FALLTHRU*/
630 
631 		case ZFS_PROP_SHARENFS:
632 			/*
633 			 * For the mountpoint and sharenfs properties, check if
634 			 * it can be set in a global/non-global zone based on
635 			 * the zoned property value:
636 			 *
637 			 *		global zone	    non-global zone
638 			 * --------------------------------------------------
639 			 * zoned=on	mountpoint (no)	    mountpoint (yes)
640 			 *		sharenfs (no)	    sharenfs (no)
641 			 *
642 			 * zoned=off	mountpoint (yes)	N/A
643 			 *		sharenfs (yes)
644 			 */
645 			if (zoned) {
646 				if (getzoneid() == GLOBAL_ZONEID) {
647 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
648 					    "'%s' cannot be set on "
649 					    "dataset in a non-global zone"),
650 					    propname);
651 					(void) zfs_error(hdl, EZFS_ZONED,
652 					    errbuf);
653 					goto error;
654 				} else if (prop == ZFS_PROP_SHARENFS) {
655 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
656 					    "'%s' cannot be set in "
657 					    "a non-global zone"), propname);
658 					(void) zfs_error(hdl, EZFS_ZONED,
659 					    errbuf);
660 					goto error;
661 				}
662 			} else if (getzoneid() != GLOBAL_ZONEID) {
663 				/*
664 				 * If zoned property is 'off', this must be in
665 				 * a globle zone. If not, something is wrong.
666 				 */
667 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
668 				    "'%s' cannot be set while dataset "
669 				    "'zoned' property is set"), propname);
670 				(void) zfs_error(hdl, EZFS_ZONED, errbuf);
671 				goto error;
672 			}
673 
674 			/*
675 			 * At this point, it is legitimate to set the
676 			 * property. Now we want to make sure that the
677 			 * property value is valid if it is sharenfs.
678 			 */
679 			if (prop == ZFS_PROP_SHARENFS &&
680 			    strcmp(strval, "on") != 0 &&
681 			    strcmp(strval, "off") != 0) {
682 
683 				/*
684 				 * Must be an NFS option string so
685 				 * init the libshare in order to
686 				 * enable the parser and then parse
687 				 * the options. We use the control API
688 				 * since we don't care about the
689 				 * current configuration and don't
690 				 * want the overhead of loading it
691 				 * until we actually do something.
692 				 */
693 
694 				if (zfs_init_libshare(hdl,
695 				    SA_INIT_CONTROL_API) != SA_OK) {
696 					/*
697 					 * An error occurred so we can't do
698 					 * anything
699 					 */
700 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
701 					    "'%s' cannot be set: problem "
702 					    "in share initialization"),
703 					    propname);
704 					(void) zfs_error(hdl, EZFS_BADPROP,
705 					    errbuf);
706 					goto error;
707 				}
708 
709 				if (zfs_parse_options(strval, "nfs") != SA_OK) {
710 					/*
711 					 * There was an error in parsing so
712 					 * deal with it by issuing an error
713 					 * message and leaving after
714 					 * uninitializing the the libshare
715 					 * interface.
716 					 */
717 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
718 					    "'%s' cannot be set to invalid "
719 					    "options"), propname);
720 					(void) zfs_error(hdl, EZFS_BADPROP,
721 					    errbuf);
722 					zfs_uninit_libshare(hdl);
723 					goto error;
724 				}
725 				zfs_uninit_libshare(hdl);
726 			}
727 
728 			break;
729 		}
730 
731 		/*
732 		 * For changes to existing volumes, we have some additional
733 		 * checks to enforce.
734 		 */
735 		if (type == ZFS_TYPE_VOLUME && zhp != NULL) {
736 			uint64_t volsize = zfs_prop_get_int(zhp,
737 			    ZFS_PROP_VOLSIZE);
738 			uint64_t blocksize = zfs_prop_get_int(zhp,
739 			    ZFS_PROP_VOLBLOCKSIZE);
740 			char buf[64];
741 
742 			switch (prop) {
743 			case ZFS_PROP_RESERVATION:
744 				if (intval > volsize) {
745 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
746 					    "'%s' is greater than current "
747 					    "volume size"), propname);
748 					(void) zfs_error(hdl, EZFS_BADPROP,
749 					    errbuf);
750 					goto error;
751 				}
752 				break;
753 
754 			case ZFS_PROP_VOLSIZE:
755 				if (intval % blocksize != 0) {
756 					zfs_nicenum(blocksize, buf,
757 					    sizeof (buf));
758 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
759 					    "'%s' must be a multiple of "
760 					    "volume block size (%s)"),
761 					    propname, buf);
762 					(void) zfs_error(hdl, EZFS_BADPROP,
763 					    errbuf);
764 					goto error;
765 				}
766 
767 				if (intval == 0) {
768 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
769 					    "'%s' cannot be zero"),
770 					    propname);
771 					(void) zfs_error(hdl, EZFS_BADPROP,
772 					    errbuf);
773 					goto error;
774 				}
775 				break;
776 			}
777 		}
778 	}
779 
780 	/*
781 	 * If this is an existing volume, and someone is setting the volsize,
782 	 * make sure that it matches the reservation, or add it if necessary.
783 	 */
784 	if (zhp != NULL && type == ZFS_TYPE_VOLUME &&
785 	    nvlist_lookup_uint64(ret, zfs_prop_to_name(ZFS_PROP_VOLSIZE),
786 	    &intval) == 0) {
787 		uint64_t old_volsize = zfs_prop_get_int(zhp,
788 		    ZFS_PROP_VOLSIZE);
789 		uint64_t old_reservation = zfs_prop_get_int(zhp,
790 		    ZFS_PROP_RESERVATION);
791 		uint64_t new_reservation;
792 
793 		if (old_volsize == old_reservation &&
794 		    nvlist_lookup_uint64(ret,
795 		    zfs_prop_to_name(ZFS_PROP_RESERVATION),
796 		    &new_reservation) != 0) {
797 			if (nvlist_add_uint64(ret,
798 			    zfs_prop_to_name(ZFS_PROP_RESERVATION),
799 			    intval) != 0) {
800 				(void) no_memory(hdl);
801 				goto error;
802 			}
803 		}
804 	}
805 
806 	return (ret);
807 
808 error:
809 	nvlist_free(ret);
810 	return (NULL);
811 }
812 
813 static int
814 zfs_get_perm_who(const char *who, zfs_deleg_who_type_t *who_type,
815     uint64_t *ret_who)
816 {
817 	struct passwd *pwd;
818 	struct group *grp;
819 	uid_t id;
820 
821 	if (*who_type == ZFS_DELEG_EVERYONE || *who_type == ZFS_DELEG_CREATE ||
822 	    *who_type == ZFS_DELEG_NAMED_SET) {
823 		*ret_who = -1;
824 		return (0);
825 	}
826 	if (who == NULL && !(*who_type == ZFS_DELEG_EVERYONE))
827 		return (EZFS_BADWHO);
828 
829 	if (*who_type == ZFS_DELEG_WHO_UNKNOWN &&
830 	    strcmp(who, "everyone") == 0) {
831 		*ret_who = -1;
832 		*who_type = ZFS_DELEG_EVERYONE;
833 		return (0);
834 	}
835 
836 	pwd = getpwnam(who);
837 	grp = getgrnam(who);
838 
839 	if ((*who_type == ZFS_DELEG_USER) && pwd) {
840 		*ret_who = pwd->pw_uid;
841 	} else if ((*who_type == ZFS_DELEG_GROUP) && grp) {
842 		*ret_who = grp->gr_gid;
843 	} else if (pwd) {
844 		*ret_who = pwd->pw_uid;
845 		*who_type = ZFS_DELEG_USER;
846 	} else if (grp) {
847 		*ret_who = grp->gr_gid;
848 		*who_type = ZFS_DELEG_GROUP;
849 	} else {
850 		char *end;
851 
852 		id = strtol(who, &end, 10);
853 		if (errno != 0 || *end != '\0') {
854 			return (EZFS_BADWHO);
855 		} else {
856 			*ret_who = id;
857 			if (*who_type == ZFS_DELEG_WHO_UNKNOWN)
858 				*who_type = ZFS_DELEG_USER;
859 		}
860 	}
861 
862 	return (0);
863 }
864 
865 static void
866 zfs_perms_add_to_nvlist(nvlist_t *who_nvp, char *name, nvlist_t *perms_nvp)
867 {
868 	if (perms_nvp != NULL) {
869 		verify(nvlist_add_nvlist(who_nvp,
870 		    name, perms_nvp) == 0);
871 	} else {
872 		verify(nvlist_add_boolean(who_nvp, name) == 0);
873 	}
874 }
875 
876 static void
877 helper(zfs_deleg_who_type_t who_type, uint64_t whoid, char *whostr,
878     zfs_deleg_inherit_t inherit, nvlist_t *who_nvp, nvlist_t *perms_nvp,
879     nvlist_t *sets_nvp)
880 {
881 	boolean_t do_perms, do_sets;
882 	char name[ZFS_MAX_DELEG_NAME];
883 
884 	do_perms = (nvlist_next_nvpair(perms_nvp, NULL) != NULL);
885 	do_sets = (nvlist_next_nvpair(sets_nvp, NULL) != NULL);
886 
887 	if (!do_perms && !do_sets)
888 		do_perms = do_sets = B_TRUE;
889 
890 	if (do_perms) {
891 		zfs_deleg_whokey(name, who_type, inherit,
892 		    (who_type == ZFS_DELEG_NAMED_SET) ?
893 		    whostr : (void *)&whoid);
894 		zfs_perms_add_to_nvlist(who_nvp, name, perms_nvp);
895 	}
896 	if (do_sets) {
897 		zfs_deleg_whokey(name, toupper(who_type), inherit,
898 		    (who_type == ZFS_DELEG_NAMED_SET) ?
899 		    whostr : (void *)&whoid);
900 		zfs_perms_add_to_nvlist(who_nvp, name, sets_nvp);
901 	}
902 }
903 
904 static void
905 zfs_perms_add_who_nvlist(nvlist_t *who_nvp, uint64_t whoid, void *whostr,
906     nvlist_t *perms_nvp, nvlist_t *sets_nvp,
907     zfs_deleg_who_type_t who_type, zfs_deleg_inherit_t inherit)
908 {
909 	if (who_type == ZFS_DELEG_NAMED_SET || who_type == ZFS_DELEG_CREATE) {
910 		helper(who_type, whoid, whostr, 0,
911 		    who_nvp, perms_nvp, sets_nvp);
912 	} else {
913 		if (inherit & ZFS_DELEG_PERM_LOCAL) {
914 			helper(who_type, whoid, whostr, ZFS_DELEG_LOCAL,
915 			    who_nvp, perms_nvp, sets_nvp);
916 		}
917 		if (inherit & ZFS_DELEG_PERM_DESCENDENT) {
918 			helper(who_type, whoid, whostr, ZFS_DELEG_DESCENDENT,
919 			    who_nvp, perms_nvp, sets_nvp);
920 		}
921 	}
922 }
923 
924 /*
925  * Construct nvlist to pass down to kernel for setting/removing permissions.
926  *
927  * The nvlist is constructed as a series of nvpairs with an optional embedded
928  * nvlist of permissions to remove or set.  The topmost nvpairs are the actual
929  * base attribute named stored in the dsl.
930  * Arguments:
931  *
932  * whostr:   is a comma separated list of users, groups, or a single set name.
933  *           whostr may be null for everyone or create perms.
934  * who_type: is the type of entry in whostr.  Typically this will be
935  *           ZFS_DELEG_WHO_UNKNOWN.
936  * perms:    comman separated list of permissions.  May be null if user
937  *           is requested to remove permissions by who.
938  * inherit:  Specifies the inheritance of the permissions.  Will be either
939  *           ZFS_DELEG_PERM_LOCAL and/or  ZFS_DELEG_PERM_DESCENDENT.
940  * nvp       The constructed nvlist to pass to zfs_perm_set().
941  *           The output nvp will look something like this.
942  *              ul$1234 -> {create ; destroy }
943  *              Ul$1234 -> { @myset }
944  *              s-$@myset - { snapshot; checksum; compression }
945  */
946 int
947 zfs_build_perms(zfs_handle_t *zhp, char *whostr, char *perms,
948     zfs_deleg_who_type_t who_type, zfs_deleg_inherit_t inherit, nvlist_t **nvp)
949 {
950 	nvlist_t *who_nvp;
951 	nvlist_t *perms_nvp = NULL;
952 	nvlist_t *sets_nvp = NULL;
953 	char errbuf[1024];
954 	char *who_tok, *perm;
955 	int error;
956 
957 	*nvp = NULL;
958 
959 	if (perms) {
960 		if ((error = nvlist_alloc(&perms_nvp,
961 		    NV_UNIQUE_NAME, 0)) != 0) {
962 			return (1);
963 		}
964 		if ((error = nvlist_alloc(&sets_nvp,
965 		    NV_UNIQUE_NAME, 0)) != 0) {
966 			nvlist_free(perms_nvp);
967 			return (1);
968 		}
969 	}
970 
971 	if ((error = nvlist_alloc(&who_nvp, NV_UNIQUE_NAME, 0)) != 0) {
972 		if (perms_nvp)
973 			nvlist_free(perms_nvp);
974 		if (sets_nvp)
975 			nvlist_free(sets_nvp);
976 		return (1);
977 	}
978 
979 	if (who_type == ZFS_DELEG_NAMED_SET) {
980 		namecheck_err_t why;
981 		char what;
982 
983 		if ((error = permset_namecheck(whostr, &why, &what)) != 0) {
984 			nvlist_free(who_nvp);
985 			if (perms_nvp)
986 				nvlist_free(perms_nvp);
987 			if (sets_nvp)
988 				nvlist_free(sets_nvp);
989 
990 			switch (why) {
991 			case NAME_ERR_NO_AT:
992 				zfs_error_aux(zhp->zfs_hdl,
993 				    dgettext(TEXT_DOMAIN,
994 				    "set definition must begin with an '@' "
995 				    "character"));
996 			}
997 			return (zfs_error(zhp->zfs_hdl,
998 			    EZFS_BADPERMSET, whostr));
999 		}
1000 	}
1001 
1002 	/*
1003 	 * Build up nvlist(s) of permissions.  Two nvlists are maintained.
1004 	 * The first nvlist perms_nvp will have normal permissions and the
1005 	 * other sets_nvp will have only permssion set names in it.
1006 	 */
1007 	for (perm = strtok(perms, ","); perm; perm = strtok(NULL, ",")) {
1008 		const char *perm_canonical = zfs_deleg_canonicalize_perm(perm);
1009 
1010 		if (perm_canonical) {
1011 			verify(nvlist_add_boolean(perms_nvp,
1012 			    perm_canonical) == 0);
1013 		} else if (perm[0] == '@') {
1014 			verify(nvlist_add_boolean(sets_nvp, perm) == 0);
1015 		} else {
1016 			nvlist_free(who_nvp);
1017 			nvlist_free(perms_nvp);
1018 			nvlist_free(sets_nvp);
1019 			return (zfs_error(zhp->zfs_hdl, EZFS_BADPERM, perm));
1020 		}
1021 	}
1022 
1023 	if (whostr && who_type != ZFS_DELEG_CREATE) {
1024 		who_tok = strtok(whostr, ",");
1025 		if (who_tok == NULL) {
1026 			nvlist_free(who_nvp);
1027 			if (perms_nvp)
1028 				nvlist_free(perms_nvp);
1029 			if (sets_nvp)
1030 				nvlist_free(sets_nvp);
1031 			(void) snprintf(errbuf, sizeof (errbuf),
1032 			    dgettext(TEXT_DOMAIN, "Who string is NULL"),
1033 			    whostr);
1034 			return (zfs_error(zhp->zfs_hdl, EZFS_BADWHO, errbuf));
1035 		}
1036 	}
1037 
1038 	/*
1039 	 * Now create the nvlist(s)
1040 	 */
1041 	do {
1042 		uint64_t who_id;
1043 
1044 		error = zfs_get_perm_who(who_tok, &who_type,
1045 		    &who_id);
1046 		if (error) {
1047 			nvlist_free(who_nvp);
1048 			if (perms_nvp)
1049 				nvlist_free(perms_nvp);
1050 			if (sets_nvp)
1051 				nvlist_free(sets_nvp);
1052 			(void) snprintf(errbuf, sizeof (errbuf),
1053 			    dgettext(TEXT_DOMAIN,
1054 			    "Unable to determine uid/gid for "
1055 			    "%s "), who_tok);
1056 			return (zfs_error(zhp->zfs_hdl, EZFS_BADWHO, errbuf));
1057 		}
1058 
1059 		/*
1060 		 * add entries for both local and descendent when required
1061 		 */
1062 		zfs_perms_add_who_nvlist(who_nvp, who_id, who_tok,
1063 		    perms_nvp, sets_nvp, who_type, inherit);
1064 
1065 	} while (who_tok = strtok(NULL, ","));
1066 	*nvp = who_nvp;
1067 	return (0);
1068 }
1069 
1070 static int
1071 zfs_perm_set_common(zfs_handle_t *zhp, nvlist_t *nvp, boolean_t unset)
1072 {
1073 	zfs_cmd_t zc = { 0 };
1074 	int error;
1075 	char errbuf[1024];
1076 
1077 	(void) snprintf(errbuf, sizeof (errbuf),
1078 	    dgettext(TEXT_DOMAIN, "Cannot update 'allows' for '%s'"),
1079 	    zhp->zfs_name);
1080 
1081 	if (zcmd_write_src_nvlist(zhp->zfs_hdl, &zc, nvp))
1082 		return (-1);
1083 
1084 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1085 	zc.zc_perm_action = unset;
1086 
1087 	error = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SET_FSACL, &zc);
1088 	if (error && errno == ENOTSUP) {
1089 		(void) snprintf(errbuf, sizeof (errbuf),
1090 		    gettext("Pool must be upgraded to use 'allow/unallow'"));
1091 		zcmd_free_nvlists(&zc);
1092 		return (zfs_error(zhp->zfs_hdl, EZFS_BADVERSION, errbuf));
1093 	} else if (error) {
1094 		return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf));
1095 	}
1096 	zcmd_free_nvlists(&zc);
1097 
1098 	return (error);
1099 }
1100 
1101 int
1102 zfs_perm_set(zfs_handle_t *zhp, nvlist_t *nvp)
1103 {
1104 	return (zfs_perm_set_common(zhp, nvp, B_FALSE));
1105 }
1106 
1107 int
1108 zfs_perm_remove(zfs_handle_t *zhp, nvlist_t *perms)
1109 {
1110 	return (zfs_perm_set_common(zhp, perms, B_TRUE));
1111 }
1112 
1113 static int
1114 perm_compare(const void *arg1, const void *arg2)
1115 {
1116 	const zfs_perm_node_t *node1 = arg1;
1117 	const zfs_perm_node_t *node2 = arg2;
1118 	int ret;
1119 
1120 	ret = strcmp(node1->z_pname, node2->z_pname);
1121 
1122 	if (ret > 0)
1123 		return (1);
1124 	if (ret < 0)
1125 		return (-1);
1126 	else
1127 		return (0);
1128 }
1129 
1130 static void
1131 zfs_destroy_perm_tree(avl_tree_t *tree)
1132 {
1133 	zfs_perm_node_t *permnode;
1134 	void *cookie;
1135 
1136 	cookie = NULL;
1137 	while ((permnode = avl_destroy_nodes(tree,  &cookie)) != NULL) {
1138 		avl_remove(tree, permnode);
1139 		free(permnode);
1140 	}
1141 }
1142 
1143 static void
1144 zfs_destroy_tree(avl_tree_t *tree)
1145 {
1146 	zfs_allow_node_t *allownode;
1147 	void *cookie;
1148 
1149 	cookie = NULL;
1150 	while ((allownode = avl_destroy_nodes(tree, &cookie)) != NULL) {
1151 		zfs_destroy_perm_tree(&allownode->z_localdescend);
1152 		zfs_destroy_perm_tree(&allownode->z_local);
1153 		zfs_destroy_perm_tree(&allownode->z_descend);
1154 		avl_remove(tree, allownode);
1155 		free(allownode);
1156 	}
1157 }
1158 
1159 void
1160 zfs_free_allows(zfs_allow_t *allow)
1161 {
1162 	zfs_allow_t *allownext;
1163 	zfs_allow_t *freeallow;
1164 
1165 	allownext = allow;
1166 	while (allownext) {
1167 		zfs_destroy_tree(&allownext->z_sets);
1168 		zfs_destroy_tree(&allownext->z_crperms);
1169 		zfs_destroy_tree(&allownext->z_user);
1170 		zfs_destroy_tree(&allownext->z_group);
1171 		zfs_destroy_tree(&allownext->z_everyone);
1172 		freeallow = allownext;
1173 		allownext = allownext->z_next;
1174 		free(freeallow);
1175 	}
1176 }
1177 
1178 static zfs_allow_t *
1179 zfs_alloc_perm_tree(zfs_handle_t *zhp, zfs_allow_t *prev, char *setpoint)
1180 {
1181 	zfs_allow_t *ptree;
1182 
1183 	if ((ptree = zfs_alloc(zhp->zfs_hdl,
1184 	    sizeof (zfs_allow_t))) == NULL) {
1185 		return (NULL);
1186 	}
1187 
1188 	(void) strlcpy(ptree->z_setpoint, setpoint, sizeof (ptree->z_setpoint));
1189 	avl_create(&ptree->z_sets,
1190 	    perm_compare, sizeof (zfs_allow_node_t),
1191 	    offsetof(zfs_allow_node_t, z_node));
1192 	avl_create(&ptree->z_crperms,
1193 	    perm_compare, sizeof (zfs_allow_node_t),
1194 	    offsetof(zfs_allow_node_t, z_node));
1195 	avl_create(&ptree->z_user,
1196 	    perm_compare, sizeof (zfs_allow_node_t),
1197 	    offsetof(zfs_allow_node_t, z_node));
1198 	avl_create(&ptree->z_group,
1199 	    perm_compare, sizeof (zfs_allow_node_t),
1200 	    offsetof(zfs_allow_node_t, z_node));
1201 	avl_create(&ptree->z_everyone,
1202 	    perm_compare, sizeof (zfs_allow_node_t),
1203 	    offsetof(zfs_allow_node_t, z_node));
1204 
1205 	if (prev)
1206 		prev->z_next = ptree;
1207 	ptree->z_next = NULL;
1208 	return (ptree);
1209 }
1210 
1211 /*
1212  * Add permissions to the appropriate AVL permission tree.
1213  * The appropriate tree may not be the requested tree.
1214  * For example if ld indicates a local permission, but
1215  * same permission also exists as a descendent permission
1216  * then the permission will be removed from the descendent
1217  * tree and add the the local+descendent tree.
1218  */
1219 static int
1220 zfs_coalesce_perm(zfs_handle_t *zhp, zfs_allow_node_t *allownode,
1221     char *perm, char ld)
1222 {
1223 	zfs_perm_node_t pnode, *permnode, *permnode2;
1224 	zfs_perm_node_t *newnode;
1225 	avl_index_t where, where2;
1226 	avl_tree_t *tree, *altree;
1227 
1228 	(void) strlcpy(pnode.z_pname, perm, sizeof (pnode.z_pname));
1229 
1230 	if (ld == ZFS_DELEG_NA) {
1231 		tree =  &allownode->z_localdescend;
1232 		altree = &allownode->z_descend;
1233 	} else if (ld == ZFS_DELEG_LOCAL) {
1234 		tree = &allownode->z_local;
1235 		altree = &allownode->z_descend;
1236 	} else {
1237 		tree = &allownode->z_descend;
1238 		altree = &allownode->z_local;
1239 	}
1240 	permnode = avl_find(tree, &pnode, &where);
1241 	permnode2 = avl_find(altree, &pnode, &where2);
1242 
1243 	if (permnode2) {
1244 		avl_remove(altree, permnode2);
1245 		free(permnode2);
1246 		if (permnode == NULL) {
1247 			tree =  &allownode->z_localdescend;
1248 		}
1249 	}
1250 
1251 	/*
1252 	 * Now insert new permission in either requested location
1253 	 * local/descendent or into ld when perm will exist in both.
1254 	 */
1255 	if (permnode == NULL) {
1256 		if ((newnode = zfs_alloc(zhp->zfs_hdl,
1257 		    sizeof (zfs_perm_node_t))) == NULL) {
1258 			return (-1);
1259 		}
1260 		*newnode = pnode;
1261 		avl_add(tree, newnode);
1262 	}
1263 	return (0);
1264 }
1265 
1266 /*
1267  * Uggh, this is going to be a bit complicated.
1268  * we have an nvlist coming out of the kernel that
1269  * will indicate where the permission is set and then
1270  * it will contain allow of the various "who's", and what
1271  * their permissions are.  To further complicate this
1272  * we will then have to coalesce the local,descendent
1273  * and local+descendent permissions where appropriate.
1274  * The kernel only knows about a permission as being local
1275  * or descendent, but not both.
1276  *
1277  * In order to make this easier for zfs_main to deal with
1278  * a series of AVL trees will be used to maintain
1279  * all of this, primarily for sorting purposes as well
1280  * as the ability to quickly locate a specific entry.
1281  *
1282  * What we end up with are tree's for sets, create perms,
1283  * user, groups and everyone.  With each of those trees
1284  * we have subtrees for local, descendent and local+descendent
1285  * permissions.
1286  */
1287 int
1288 zfs_perm_get(zfs_handle_t *zhp, zfs_allow_t **zfs_perms)
1289 {
1290 	zfs_cmd_t zc = { 0 };
1291 	int error;
1292 	nvlist_t *nvlist;
1293 	nvlist_t *permnv, *sourcenv;
1294 	nvpair_t *who_pair, *source_pair;
1295 	nvpair_t *perm_pair;
1296 	char errbuf[1024];
1297 	zfs_allow_t *zallowp, *newallowp;
1298 	char  ld;
1299 	char *nvpname;
1300 	uid_t	uid;
1301 	gid_t	gid;
1302 	avl_tree_t *tree;
1303 	avl_index_t where;
1304 
1305 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1306 
1307 	if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
1308 		return (-1);
1309 
1310 	while (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_GET_FSACL, &zc) != 0) {
1311 		if (errno == ENOMEM) {
1312 			if (zcmd_expand_dst_nvlist(zhp->zfs_hdl, &zc) != 0) {
1313 				zcmd_free_nvlists(&zc);
1314 				return (-1);
1315 			}
1316 		} else if (errno == ENOTSUP) {
1317 			zcmd_free_nvlists(&zc);
1318 			(void) snprintf(errbuf, sizeof (errbuf),
1319 			    gettext("Pool must be upgraded to use 'allow'"));
1320 			return (zfs_error(zhp->zfs_hdl,
1321 			    EZFS_BADVERSION, errbuf));
1322 		} else {
1323 			zcmd_free_nvlists(&zc);
1324 			return (-1);
1325 		}
1326 	}
1327 
1328 	if (zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &nvlist) != 0) {
1329 		zcmd_free_nvlists(&zc);
1330 		return (-1);
1331 	}
1332 
1333 	zcmd_free_nvlists(&zc);
1334 
1335 	source_pair = nvlist_next_nvpair(nvlist, NULL);
1336 
1337 	if (source_pair == NULL) {
1338 		*zfs_perms = NULL;
1339 		return (0);
1340 	}
1341 
1342 	*zfs_perms = zfs_alloc_perm_tree(zhp, NULL, nvpair_name(source_pair));
1343 	if (*zfs_perms == NULL) {
1344 		return (0);
1345 	}
1346 
1347 	zallowp = *zfs_perms;
1348 
1349 	for (;;) {
1350 		struct passwd *pwd;
1351 		struct group *grp;
1352 		zfs_allow_node_t *allownode;
1353 		zfs_allow_node_t  findallownode;
1354 		zfs_allow_node_t *newallownode;
1355 
1356 		(void) strlcpy(zallowp->z_setpoint,
1357 		    nvpair_name(source_pair),
1358 		    sizeof (zallowp->z_setpoint));
1359 
1360 		if ((error = nvpair_value_nvlist(source_pair, &sourcenv)) != 0)
1361 			goto abort;
1362 
1363 		/*
1364 		 * Make sure nvlist is composed correctly
1365 		 */
1366 		if (zfs_deleg_verify_nvlist(sourcenv)) {
1367 			goto abort;
1368 		}
1369 
1370 		who_pair = nvlist_next_nvpair(sourcenv, NULL);
1371 		if (who_pair == NULL) {
1372 			goto abort;
1373 		}
1374 
1375 		do {
1376 			error = nvpair_value_nvlist(who_pair, &permnv);
1377 			if (error) {
1378 				goto abort;
1379 			}
1380 
1381 			/*
1382 			 * First build up the key to use
1383 			 * for looking up in the various
1384 			 * who trees.
1385 			 */
1386 			ld = nvpair_name(who_pair)[1];
1387 			nvpname = nvpair_name(who_pair);
1388 			switch (nvpair_name(who_pair)[0]) {
1389 			case ZFS_DELEG_USER:
1390 			case ZFS_DELEG_USER_SETS:
1391 				tree = &zallowp->z_user;
1392 				uid = atol(&nvpname[3]);
1393 				pwd = getpwuid(uid);
1394 				(void) snprintf(findallownode.z_key,
1395 				    sizeof (findallownode.z_key), "user %s",
1396 				    (pwd) ? pwd->pw_name :
1397 				    &nvpair_name(who_pair)[3]);
1398 				break;
1399 			case ZFS_DELEG_GROUP:
1400 			case ZFS_DELEG_GROUP_SETS:
1401 				tree = &zallowp->z_group;
1402 				gid = atol(&nvpname[3]);
1403 				grp = getgrgid(gid);
1404 				(void) snprintf(findallownode.z_key,
1405 				    sizeof (findallownode.z_key), "group %s",
1406 				    (grp) ? grp->gr_name :
1407 				    &nvpair_name(who_pair)[3]);
1408 				break;
1409 			case ZFS_DELEG_CREATE:
1410 			case ZFS_DELEG_CREATE_SETS:
1411 				tree = &zallowp->z_crperms;
1412 				(void) strlcpy(findallownode.z_key, "",
1413 				    sizeof (findallownode.z_key));
1414 				break;
1415 			case ZFS_DELEG_EVERYONE:
1416 			case ZFS_DELEG_EVERYONE_SETS:
1417 				(void) snprintf(findallownode.z_key,
1418 				    sizeof (findallownode.z_key), "everyone");
1419 				tree = &zallowp->z_everyone;
1420 				break;
1421 			case ZFS_DELEG_NAMED_SET:
1422 			case ZFS_DELEG_NAMED_SET_SETS:
1423 				(void) snprintf(findallownode.z_key,
1424 				    sizeof (findallownode.z_key), "%s",
1425 				    &nvpair_name(who_pair)[3]);
1426 				tree = &zallowp->z_sets;
1427 				break;
1428 			}
1429 
1430 			/*
1431 			 * Place who in tree
1432 			 */
1433 			allownode = avl_find(tree, &findallownode, &where);
1434 			if (allownode == NULL) {
1435 				if ((newallownode = zfs_alloc(zhp->zfs_hdl,
1436 				    sizeof (zfs_allow_node_t))) == NULL) {
1437 					goto abort;
1438 				}
1439 				avl_create(&newallownode->z_localdescend,
1440 				    perm_compare,
1441 				    sizeof (zfs_perm_node_t),
1442 				    offsetof(zfs_perm_node_t, z_node));
1443 				avl_create(&newallownode->z_local,
1444 				    perm_compare,
1445 				    sizeof (zfs_perm_node_t),
1446 				    offsetof(zfs_perm_node_t, z_node));
1447 				avl_create(&newallownode->z_descend,
1448 				    perm_compare,
1449 				    sizeof (zfs_perm_node_t),
1450 				    offsetof(zfs_perm_node_t, z_node));
1451 				(void) strlcpy(newallownode->z_key,
1452 				    findallownode.z_key,
1453 				    sizeof (findallownode.z_key));
1454 				avl_insert(tree, newallownode, where);
1455 				allownode = newallownode;
1456 			}
1457 
1458 			/*
1459 			 * Now iterate over the permissions and
1460 			 * place them in the appropriate local,
1461 			 * descendent or local+descendent tree.
1462 			 *
1463 			 * The permissions are added to the tree
1464 			 * via zfs_coalesce_perm().
1465 			 */
1466 			perm_pair = nvlist_next_nvpair(permnv, NULL);
1467 			if (perm_pair == NULL)
1468 				goto abort;
1469 			do {
1470 				if (zfs_coalesce_perm(zhp, allownode,
1471 				    nvpair_name(perm_pair), ld) != 0)
1472 					goto abort;
1473 			} while (perm_pair = nvlist_next_nvpair(permnv,
1474 			    perm_pair));
1475 		} while (who_pair = nvlist_next_nvpair(sourcenv, who_pair));
1476 
1477 		source_pair = nvlist_next_nvpair(nvlist, source_pair);
1478 		if (source_pair == NULL)
1479 			break;
1480 
1481 		/*
1482 		 * allocate another node from the link list of
1483 		 * zfs_allow_t structures
1484 		 */
1485 		newallowp = zfs_alloc_perm_tree(zhp, zallowp,
1486 		    nvpair_name(source_pair));
1487 		if (newallowp == NULL) {
1488 			goto abort;
1489 		}
1490 		zallowp = newallowp;
1491 	}
1492 	nvlist_free(nvlist);
1493 	return (0);
1494 abort:
1495 	zfs_free_allows(*zfs_perms);
1496 	nvlist_free(nvlist);
1497 	return (-1);
1498 }
1499 
1500 /*
1501  * Given a property name and value, set the property for the given dataset.
1502  */
1503 int
1504 zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
1505 {
1506 	zfs_cmd_t zc = { 0 };
1507 	int ret = -1;
1508 	prop_changelist_t *cl = NULL;
1509 	char errbuf[1024];
1510 	libzfs_handle_t *hdl = zhp->zfs_hdl;
1511 	nvlist_t *nvl = NULL, *realprops;
1512 	zfs_prop_t prop;
1513 
1514 	(void) snprintf(errbuf, sizeof (errbuf),
1515 	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
1516 	    zhp->zfs_name);
1517 
1518 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0 ||
1519 	    nvlist_add_string(nvl, propname, propval) != 0) {
1520 		(void) no_memory(hdl);
1521 		goto error;
1522 	}
1523 
1524 	if ((realprops = zfs_validate_properties(hdl, zhp->zfs_type, nvl,
1525 	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, errbuf)) == NULL)
1526 		goto error;
1527 
1528 	nvlist_free(nvl);
1529 	nvl = realprops;
1530 
1531 	prop = zfs_name_to_prop(propname);
1532 
1533 	if ((cl = changelist_gather(zhp, prop, 0)) == NULL)
1534 		goto error;
1535 
1536 	if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
1537 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1538 		    "child dataset with inherited mountpoint is used "
1539 		    "in a non-global zone"));
1540 		ret = zfs_error(hdl, EZFS_ZONED, errbuf);
1541 		goto error;
1542 	}
1543 
1544 	if ((ret = changelist_prefix(cl)) != 0)
1545 		goto error;
1546 
1547 	/*
1548 	 * Execute the corresponding ioctl() to set this property.
1549 	 */
1550 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1551 
1552 	if (zcmd_write_src_nvlist(hdl, &zc, nvl) != 0)
1553 		goto error;
1554 
1555 	ret = zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
1556 
1557 	if (ret != 0) {
1558 		switch (errno) {
1559 
1560 		case ENOSPC:
1561 			/*
1562 			 * For quotas and reservations, ENOSPC indicates
1563 			 * something different; setting a quota or reservation
1564 			 * doesn't use any disk space.
1565 			 */
1566 			switch (prop) {
1567 			case ZFS_PROP_QUOTA:
1568 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1569 				    "size is less than current used or "
1570 				    "reserved space"));
1571 				(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
1572 				break;
1573 
1574 			case ZFS_PROP_RESERVATION:
1575 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1576 				    "size is greater than available space"));
1577 				(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
1578 				break;
1579 
1580 			default:
1581 				(void) zfs_standard_error(hdl, errno, errbuf);
1582 				break;
1583 			}
1584 			break;
1585 
1586 		case EBUSY:
1587 			if (prop == ZFS_PROP_VOLBLOCKSIZE)
1588 				(void) zfs_error(hdl, EZFS_VOLHASDATA, errbuf);
1589 			else
1590 				(void) zfs_standard_error(hdl, EBUSY, errbuf);
1591 			break;
1592 
1593 		case EROFS:
1594 			(void) zfs_error(hdl, EZFS_DSREADONLY, errbuf);
1595 			break;
1596 
1597 		case ENOTSUP:
1598 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1599 			    "pool must be upgraded to set this "
1600 			    "property or value"));
1601 			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
1602 			break;
1603 
1604 		case EOVERFLOW:
1605 			/*
1606 			 * This platform can't address a volume this big.
1607 			 */
1608 #ifdef _ILP32
1609 			if (prop == ZFS_PROP_VOLSIZE) {
1610 				(void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf);
1611 				break;
1612 			}
1613 #endif
1614 			/* FALLTHROUGH */
1615 		default:
1616 			(void) zfs_standard_error(hdl, errno, errbuf);
1617 		}
1618 	} else {
1619 		/*
1620 		 * Refresh the statistics so the new property value
1621 		 * is reflected.
1622 		 */
1623 		if ((ret = changelist_postfix(cl)) == 0)
1624 			(void) get_stats(zhp);
1625 	}
1626 
1627 error:
1628 	nvlist_free(nvl);
1629 	zcmd_free_nvlists(&zc);
1630 	if (cl)
1631 		changelist_free(cl);
1632 	return (ret);
1633 }
1634 
1635 /*
1636  * Given a property, inherit the value from the parent dataset.
1637  */
1638 int
1639 zfs_prop_inherit(zfs_handle_t *zhp, const char *propname)
1640 {
1641 	zfs_cmd_t zc = { 0 };
1642 	int ret;
1643 	prop_changelist_t *cl;
1644 	libzfs_handle_t *hdl = zhp->zfs_hdl;
1645 	char errbuf[1024];
1646 	zfs_prop_t prop;
1647 
1648 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1649 	    "cannot inherit %s for '%s'"), propname, zhp->zfs_name);
1650 
1651 	if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
1652 		/*
1653 		 * For user properties, the amount of work we have to do is very
1654 		 * small, so just do it here.
1655 		 */
1656 		if (!zfs_prop_user(propname)) {
1657 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1658 			    "invalid property"));
1659 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1660 		}
1661 
1662 		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1663 		(void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value));
1664 
1665 		if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc) != 0)
1666 			return (zfs_standard_error(hdl, errno, errbuf));
1667 
1668 		return (0);
1669 	}
1670 
1671 	/*
1672 	 * Verify that this property is inheritable.
1673 	 */
1674 	if (zfs_prop_readonly(prop))
1675 		return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf));
1676 
1677 	if (!zfs_prop_inheritable(prop))
1678 		return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf));
1679 
1680 	/*
1681 	 * Check to see if the value applies to this type
1682 	 */
1683 	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
1684 		return (zfs_error(hdl, EZFS_PROPTYPE, errbuf));
1685 
1686 	/*
1687 	 * Normalize the name, to get rid of shorthand abbrevations.
1688 	 */
1689 	propname = zfs_prop_to_name(prop);
1690 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1691 	(void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value));
1692 
1693 	if (prop == ZFS_PROP_MOUNTPOINT && getzoneid() == GLOBAL_ZONEID &&
1694 	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
1695 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1696 		    "dataset is used in a non-global zone"));
1697 		return (zfs_error(hdl, EZFS_ZONED, errbuf));
1698 	}
1699 
1700 	/*
1701 	 * Determine datasets which will be affected by this change, if any.
1702 	 */
1703 	if ((cl = changelist_gather(zhp, prop, 0)) == NULL)
1704 		return (-1);
1705 
1706 	if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
1707 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1708 		    "child dataset with inherited mountpoint is used "
1709 		    "in a non-global zone"));
1710 		ret = zfs_error(hdl, EZFS_ZONED, errbuf);
1711 		goto error;
1712 	}
1713 
1714 	if ((ret = changelist_prefix(cl)) != 0)
1715 		goto error;
1716 
1717 	if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc)) != 0) {
1718 		return (zfs_standard_error(hdl, errno, errbuf));
1719 	} else {
1720 
1721 		if ((ret = changelist_postfix(cl)) != 0)
1722 			goto error;
1723 
1724 		/*
1725 		 * Refresh the statistics so the new property is reflected.
1726 		 */
1727 		(void) get_stats(zhp);
1728 	}
1729 
1730 error:
1731 	changelist_free(cl);
1732 	return (ret);
1733 }
1734 
1735 /*
1736  * True DSL properties are stored in an nvlist.  The following two functions
1737  * extract them appropriately.
1738  */
1739 static uint64_t
1740 getprop_uint64(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
1741 {
1742 	nvlist_t *nv;
1743 	uint64_t value;
1744 
1745 	*source = NULL;
1746 	if (nvlist_lookup_nvlist(zhp->zfs_props,
1747 	    zfs_prop_to_name(prop), &nv) == 0) {
1748 		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
1749 		(void) nvlist_lookup_string(nv, ZPROP_SOURCE, source);
1750 	} else {
1751 		value = zfs_prop_default_numeric(prop);
1752 		*source = "";
1753 	}
1754 
1755 	return (value);
1756 }
1757 
1758 static char *
1759 getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
1760 {
1761 	nvlist_t *nv;
1762 	char *value;
1763 
1764 	*source = NULL;
1765 	if (nvlist_lookup_nvlist(zhp->zfs_props,
1766 	    zfs_prop_to_name(prop), &nv) == 0) {
1767 		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
1768 		(void) nvlist_lookup_string(nv, ZPROP_SOURCE, source);
1769 	} else {
1770 		if ((value = (char *)zfs_prop_default_string(prop)) == NULL)
1771 			value = "";
1772 		*source = "";
1773 	}
1774 
1775 	return (value);
1776 }
1777 
1778 /*
1779  * Internal function for getting a numeric property.  Both zfs_prop_get() and
1780  * zfs_prop_get_int() are built using this interface.
1781  *
1782  * Certain properties can be overridden using 'mount -o'.  In this case, scan
1783  * the contents of the /etc/mnttab entry, searching for the appropriate options.
1784  * If they differ from the on-disk values, report the current values and mark
1785  * the source "temporary".
1786  */
1787 static int
1788 get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
1789     char **source, uint64_t *val)
1790 {
1791 	zfs_cmd_t zc = { 0 };
1792 	struct mnttab mnt;
1793 	char *mntopt_on = NULL;
1794 	char *mntopt_off = NULL;
1795 
1796 	*source = NULL;
1797 
1798 	switch (prop) {
1799 	case ZFS_PROP_ATIME:
1800 		mntopt_on = MNTOPT_ATIME;
1801 		mntopt_off = MNTOPT_NOATIME;
1802 		break;
1803 
1804 	case ZFS_PROP_DEVICES:
1805 		mntopt_on = MNTOPT_DEVICES;
1806 		mntopt_off = MNTOPT_NODEVICES;
1807 		break;
1808 
1809 	case ZFS_PROP_EXEC:
1810 		mntopt_on = MNTOPT_EXEC;
1811 		mntopt_off = MNTOPT_NOEXEC;
1812 		break;
1813 
1814 	case ZFS_PROP_READONLY:
1815 		mntopt_on = MNTOPT_RO;
1816 		mntopt_off = MNTOPT_RW;
1817 		break;
1818 
1819 	case ZFS_PROP_SETUID:
1820 		mntopt_on = MNTOPT_SETUID;
1821 		mntopt_off = MNTOPT_NOSETUID;
1822 		break;
1823 
1824 	case ZFS_PROP_XATTR:
1825 		mntopt_on = MNTOPT_XATTR;
1826 		mntopt_off = MNTOPT_NOXATTR;
1827 		break;
1828 	}
1829 
1830 	/*
1831 	 * Because looking up the mount options is potentially expensive
1832 	 * (iterating over all of /etc/mnttab), we defer its calculation until
1833 	 * we're looking up a property which requires its presence.
1834 	 */
1835 	if (!zhp->zfs_mntcheck &&
1836 	    (mntopt_on != NULL || prop == ZFS_PROP_MOUNTED)) {
1837 		struct mnttab entry, search = { 0 };
1838 		FILE *mnttab = zhp->zfs_hdl->libzfs_mnttab;
1839 
1840 		search.mnt_special = (char *)zhp->zfs_name;
1841 		search.mnt_fstype = MNTTYPE_ZFS;
1842 		rewind(mnttab);
1843 
1844 		if (getmntany(mnttab, &entry, &search) == 0) {
1845 			zhp->zfs_mntopts = zfs_strdup(zhp->zfs_hdl,
1846 			    entry.mnt_mntopts);
1847 			if (zhp->zfs_mntopts == NULL)
1848 				return (-1);
1849 		}
1850 
1851 		zhp->zfs_mntcheck = B_TRUE;
1852 	}
1853 
1854 	if (zhp->zfs_mntopts == NULL)
1855 		mnt.mnt_mntopts = "";
1856 	else
1857 		mnt.mnt_mntopts = zhp->zfs_mntopts;
1858 
1859 	switch (prop) {
1860 	case ZFS_PROP_ATIME:
1861 	case ZFS_PROP_DEVICES:
1862 	case ZFS_PROP_EXEC:
1863 	case ZFS_PROP_READONLY:
1864 	case ZFS_PROP_SETUID:
1865 	case ZFS_PROP_XATTR:
1866 		*val = getprop_uint64(zhp, prop, source);
1867 
1868 		if (hasmntopt(&mnt, mntopt_on) && !*val) {
1869 			*val = B_TRUE;
1870 			if (src)
1871 				*src = ZPROP_SRC_TEMPORARY;
1872 		} else if (hasmntopt(&mnt, mntopt_off) && *val) {
1873 			*val = B_FALSE;
1874 			if (src)
1875 				*src = ZPROP_SRC_TEMPORARY;
1876 		}
1877 		break;
1878 
1879 	case ZFS_PROP_CANMOUNT:
1880 		*val = getprop_uint64(zhp, prop, source);
1881 		if (*val == 0)
1882 			*source = zhp->zfs_name;
1883 		else
1884 			*source = "";	/* default */
1885 		break;
1886 
1887 	case ZFS_PROP_QUOTA:
1888 	case ZFS_PROP_RESERVATION:
1889 		*val = getprop_uint64(zhp, prop, source);
1890 		if (*val == 0)
1891 			*source = "";	/* default */
1892 		else
1893 			*source = zhp->zfs_name;
1894 		break;
1895 
1896 	case ZFS_PROP_MOUNTED:
1897 		*val = (zhp->zfs_mntopts != NULL);
1898 		break;
1899 
1900 	case ZFS_PROP_NUMCLONES:
1901 		*val = zhp->zfs_dmustats.dds_num_clones;
1902 		break;
1903 
1904 	case ZFS_PROP_VERSION:
1905 		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1906 		if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_VERSION, &zc) ||
1907 		    (zc.zc_cookie == 0)) {
1908 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1909 			    "unable to get version property"));
1910 			return (zfs_error(zhp->zfs_hdl, EZFS_BADVERSION,
1911 			    dgettext(TEXT_DOMAIN, "internal error")));
1912 		}
1913 		*val = zc.zc_cookie;
1914 		break;
1915 
1916 	default:
1917 		switch (zfs_prop_get_type(prop)) {
1918 		case PROP_TYPE_NUMBER:
1919 		case PROP_TYPE_INDEX:
1920 			*val = getprop_uint64(zhp, prop, source);
1921 			break;
1922 
1923 		case PROP_TYPE_STRING:
1924 		default:
1925 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1926 			    "cannot get non-numeric property"));
1927 			return (zfs_error(zhp->zfs_hdl, EZFS_BADPROP,
1928 			    dgettext(TEXT_DOMAIN, "internal error")));
1929 		}
1930 	}
1931 
1932 	return (0);
1933 }
1934 
1935 /*
1936  * Calculate the source type, given the raw source string.
1937  */
1938 static void
1939 get_source(zfs_handle_t *zhp, zprop_source_t *srctype, char *source,
1940     char *statbuf, size_t statlen)
1941 {
1942 	if (statbuf == NULL || *srctype == ZPROP_SRC_TEMPORARY)
1943 		return;
1944 
1945 	if (source == NULL) {
1946 		*srctype = ZPROP_SRC_NONE;
1947 	} else if (source[0] == '\0') {
1948 		*srctype = ZPROP_SRC_DEFAULT;
1949 	} else {
1950 		if (strcmp(source, zhp->zfs_name) == 0) {
1951 			*srctype = ZPROP_SRC_LOCAL;
1952 		} else {
1953 			(void) strlcpy(statbuf, source, statlen);
1954 			*srctype = ZPROP_SRC_INHERITED;
1955 		}
1956 	}
1957 
1958 }
1959 
1960 /*
1961  * Retrieve a property from the given object.  If 'literal' is specified, then
1962  * numbers are left as exact values.  Otherwise, numbers are converted to a
1963  * human-readable form.
1964  *
1965  * Returns 0 on success, or -1 on error.
1966  */
1967 int
1968 zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
1969     zprop_source_t *src, char *statbuf, size_t statlen, boolean_t literal)
1970 {
1971 	char *source = NULL;
1972 	uint64_t val;
1973 	char *str;
1974 	const char *root;
1975 	const char *strval;
1976 
1977 	/*
1978 	 * Check to see if this property applies to our object
1979 	 */
1980 	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
1981 		return (-1);
1982 
1983 	if (src)
1984 		*src = ZPROP_SRC_NONE;
1985 
1986 	switch (prop) {
1987 	case ZFS_PROP_CREATION:
1988 		/*
1989 		 * 'creation' is a time_t stored in the statistics.  We convert
1990 		 * this into a string unless 'literal' is specified.
1991 		 */
1992 		{
1993 			val = getprop_uint64(zhp, prop, &source);
1994 			time_t time = (time_t)val;
1995 			struct tm t;
1996 
1997 			if (literal ||
1998 			    localtime_r(&time, &t) == NULL ||
1999 			    strftime(propbuf, proplen, "%a %b %e %k:%M %Y",
2000 			    &t) == 0)
2001 				(void) snprintf(propbuf, proplen, "%llu", val);
2002 		}
2003 		break;
2004 
2005 	case ZFS_PROP_MOUNTPOINT:
2006 		/*
2007 		 * Getting the precise mountpoint can be tricky.
2008 		 *
2009 		 *  - for 'none' or 'legacy', return those values.
2010 		 *  - for default mountpoints, construct it as /zfs/<dataset>
2011 		 *  - for inherited mountpoints, we want to take everything
2012 		 *    after our ancestor and append it to the inherited value.
2013 		 *
2014 		 * If the pool has an alternate root, we want to prepend that
2015 		 * root to any values we return.
2016 		 */
2017 		root = zhp->zfs_root;
2018 		str = getprop_string(zhp, prop, &source);
2019 
2020 		if (str[0] == '\0') {
2021 			(void) snprintf(propbuf, proplen, "%s/zfs/%s",
2022 			    root, zhp->zfs_name);
2023 		} else if (str[0] == '/') {
2024 			const char *relpath = zhp->zfs_name + strlen(source);
2025 
2026 			if (relpath[0] == '/')
2027 				relpath++;
2028 			if (str[1] == '\0')
2029 				str++;
2030 
2031 			if (relpath[0] == '\0')
2032 				(void) snprintf(propbuf, proplen, "%s%s",
2033 				    root, str);
2034 			else
2035 				(void) snprintf(propbuf, proplen, "%s%s%s%s",
2036 				    root, str, relpath[0] == '@' ? "" : "/",
2037 				    relpath);
2038 		} else {
2039 			/* 'legacy' or 'none' */
2040 			(void) strlcpy(propbuf, str, proplen);
2041 		}
2042 
2043 		break;
2044 
2045 	case ZFS_PROP_ORIGIN:
2046 		(void) strlcpy(propbuf, getprop_string(zhp, prop, &source),
2047 		    proplen);
2048 		/*
2049 		 * If there is no parent at all, return failure to indicate that
2050 		 * it doesn't apply to this dataset.
2051 		 */
2052 		if (propbuf[0] == '\0')
2053 			return (-1);
2054 		break;
2055 
2056 	case ZFS_PROP_QUOTA:
2057 	case ZFS_PROP_RESERVATION:
2058 		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
2059 			return (-1);
2060 
2061 		/*
2062 		 * If quota or reservation is 0, we translate this into 'none'
2063 		 * (unless literal is set), and indicate that it's the default
2064 		 * value.  Otherwise, we print the number nicely and indicate
2065 		 * that its set locally.
2066 		 */
2067 		if (val == 0) {
2068 			if (literal)
2069 				(void) strlcpy(propbuf, "0", proplen);
2070 			else
2071 				(void) strlcpy(propbuf, "none", proplen);
2072 		} else {
2073 			if (literal)
2074 				(void) snprintf(propbuf, proplen, "%llu",
2075 				    (u_longlong_t)val);
2076 			else
2077 				zfs_nicenum(val, propbuf, proplen);
2078 		}
2079 		break;
2080 
2081 	case ZFS_PROP_COMPRESSRATIO:
2082 		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
2083 			return (-1);
2084 		(void) snprintf(propbuf, proplen, "%lld.%02lldx", (longlong_t)
2085 		    val / 100, (longlong_t)val % 100);
2086 		break;
2087 
2088 	case ZFS_PROP_TYPE:
2089 		switch (zhp->zfs_type) {
2090 		case ZFS_TYPE_FILESYSTEM:
2091 			str = "filesystem";
2092 			break;
2093 		case ZFS_TYPE_VOLUME:
2094 			str = "volume";
2095 			break;
2096 		case ZFS_TYPE_SNAPSHOT:
2097 			str = "snapshot";
2098 			break;
2099 		default:
2100 			abort();
2101 		}
2102 		(void) snprintf(propbuf, proplen, "%s", str);
2103 		break;
2104 
2105 	case ZFS_PROP_MOUNTED:
2106 		/*
2107 		 * The 'mounted' property is a pseudo-property that described
2108 		 * whether the filesystem is currently mounted.  Even though
2109 		 * it's a boolean value, the typical values of "on" and "off"
2110 		 * don't make sense, so we translate to "yes" and "no".
2111 		 */
2112 		if (get_numeric_property(zhp, ZFS_PROP_MOUNTED,
2113 		    src, &source, &val) != 0)
2114 			return (-1);
2115 		if (val)
2116 			(void) strlcpy(propbuf, "yes", proplen);
2117 		else
2118 			(void) strlcpy(propbuf, "no", proplen);
2119 		break;
2120 
2121 	case ZFS_PROP_NAME:
2122 		/*
2123 		 * The 'name' property is a pseudo-property derived from the
2124 		 * dataset name.  It is presented as a real property to simplify
2125 		 * consumers.
2126 		 */
2127 		(void) strlcpy(propbuf, zhp->zfs_name, proplen);
2128 		break;
2129 
2130 	default:
2131 		switch (zfs_prop_get_type(prop)) {
2132 		case PROP_TYPE_NUMBER:
2133 			if (get_numeric_property(zhp, prop, src,
2134 			    &source, &val) != 0)
2135 				return (-1);
2136 			if (literal)
2137 				(void) snprintf(propbuf, proplen, "%llu",
2138 				    (u_longlong_t)val);
2139 			else
2140 				zfs_nicenum(val, propbuf, proplen);
2141 			break;
2142 
2143 		case PROP_TYPE_STRING:
2144 			(void) strlcpy(propbuf,
2145 			    getprop_string(zhp, prop, &source), proplen);
2146 			break;
2147 
2148 		case PROP_TYPE_INDEX:
2149 			if (get_numeric_property(zhp, prop, src,
2150 			    &source, &val) != 0)
2151 				return (-1);
2152 			if (zfs_prop_index_to_string(prop, val, &strval) != 0)
2153 				return (-1);
2154 			(void) strlcpy(propbuf, strval, proplen);
2155 			break;
2156 
2157 		default:
2158 			abort();
2159 		}
2160 	}
2161 
2162 	get_source(zhp, src, source, statbuf, statlen);
2163 
2164 	return (0);
2165 }
2166 
2167 /*
2168  * Utility function to get the given numeric property.  Does no validation that
2169  * the given property is the appropriate type; should only be used with
2170  * hard-coded property types.
2171  */
2172 uint64_t
2173 zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop)
2174 {
2175 	char *source;
2176 	zprop_source_t sourcetype = ZPROP_SRC_NONE;
2177 	uint64_t val;
2178 
2179 	(void) get_numeric_property(zhp, prop, &sourcetype, &source, &val);
2180 
2181 	return (val);
2182 }
2183 
2184 /*
2185  * Similar to zfs_prop_get(), but returns the value as an integer.
2186  */
2187 int
2188 zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value,
2189     zprop_source_t *src, char *statbuf, size_t statlen)
2190 {
2191 	char *source;
2192 
2193 	/*
2194 	 * Check to see if this property applies to our object
2195 	 */
2196 	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) {
2197 		return (zfs_error_fmt(zhp->zfs_hdl, EZFS_PROPTYPE,
2198 		    dgettext(TEXT_DOMAIN, "cannot get property '%s'"),
2199 		    zfs_prop_to_name(prop)));
2200 	}
2201 
2202 	if (src)
2203 		*src = ZPROP_SRC_NONE;
2204 
2205 	if (get_numeric_property(zhp, prop, src, &source, value) != 0)
2206 		return (-1);
2207 
2208 	get_source(zhp, src, source, statbuf, statlen);
2209 
2210 	return (0);
2211 }
2212 
2213 /*
2214  * Returns the name of the given zfs handle.
2215  */
2216 const char *
2217 zfs_get_name(const zfs_handle_t *zhp)
2218 {
2219 	return (zhp->zfs_name);
2220 }
2221 
2222 /*
2223  * Returns the type of the given zfs handle.
2224  */
2225 zfs_type_t
2226 zfs_get_type(const zfs_handle_t *zhp)
2227 {
2228 	return (zhp->zfs_type);
2229 }
2230 
2231 /*
2232  * Iterate over all child filesystems
2233  */
2234 int
2235 zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
2236 {
2237 	zfs_cmd_t zc = { 0 };
2238 	zfs_handle_t *nzhp;
2239 	int ret;
2240 
2241 	for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
2242 	    ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
2243 	    (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
2244 		/*
2245 		 * Ignore private dataset names.
2246 		 */
2247 		if (dataset_name_hidden(zc.zc_name))
2248 			continue;
2249 
2250 		/*
2251 		 * Silently ignore errors, as the only plausible explanation is
2252 		 * that the pool has since been removed.
2253 		 */
2254 		if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
2255 		    zc.zc_name)) == NULL)
2256 			continue;
2257 
2258 		if ((ret = func(nzhp, data)) != 0)
2259 			return (ret);
2260 	}
2261 
2262 	/*
2263 	 * An errno value of ESRCH indicates normal completion.  If ENOENT is
2264 	 * returned, then the underlying dataset has been removed since we
2265 	 * obtained the handle.
2266 	 */
2267 	if (errno != ESRCH && errno != ENOENT)
2268 		return (zfs_standard_error(zhp->zfs_hdl, errno,
2269 		    dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
2270 
2271 	return (0);
2272 }
2273 
2274 /*
2275  * Iterate over all snapshots
2276  */
2277 int
2278 zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data)
2279 {
2280 	zfs_cmd_t zc = { 0 };
2281 	zfs_handle_t *nzhp;
2282 	int ret;
2283 
2284 	for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
2285 	    ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
2286 	    &zc) == 0;
2287 	    (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
2288 
2289 		if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
2290 		    zc.zc_name)) == NULL)
2291 			continue;
2292 
2293 		if ((ret = func(nzhp, data)) != 0)
2294 			return (ret);
2295 	}
2296 
2297 	/*
2298 	 * An errno value of ESRCH indicates normal completion.  If ENOENT is
2299 	 * returned, then the underlying dataset has been removed since we
2300 	 * obtained the handle.  Silently ignore this case, and return success.
2301 	 */
2302 	if (errno != ESRCH && errno != ENOENT)
2303 		return (zfs_standard_error(zhp->zfs_hdl, errno,
2304 		    dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
2305 
2306 	return (0);
2307 }
2308 
2309 /*
2310  * Iterate over all children, snapshots and filesystems
2311  */
2312 int
2313 zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data)
2314 {
2315 	int ret;
2316 
2317 	if ((ret = zfs_iter_filesystems(zhp, func, data)) != 0)
2318 		return (ret);
2319 
2320 	return (zfs_iter_snapshots(zhp, func, data));
2321 }
2322 
2323 /*
2324  * Given a complete name, return just the portion that refers to the parent.
2325  * Can return NULL if this is a pool.
2326  */
2327 static int
2328 parent_name(const char *path, char *buf, size_t buflen)
2329 {
2330 	char *loc;
2331 
2332 	if ((loc = strrchr(path, '/')) == NULL)
2333 		return (-1);
2334 
2335 	(void) strncpy(buf, path, MIN(buflen, loc - path));
2336 	buf[loc - path] = '\0';
2337 
2338 	return (0);
2339 }
2340 
2341 /*
2342  * If accept_ancestor is false, then check to make sure that the given path has
2343  * a parent, and that it exists.  If accept_ancestor is true, then find the
2344  * closest existing ancestor for the given path.  In prefixlen return the
2345  * length of already existing prefix of the given path.  We also fetch the
2346  * 'zoned' property, which is used to validate property settings when creating
2347  * new datasets.
2348  */
2349 static int
2350 check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned,
2351     boolean_t accept_ancestor, int *prefixlen)
2352 {
2353 	zfs_cmd_t zc = { 0 };
2354 	char parent[ZFS_MAXNAMELEN];
2355 	char *slash;
2356 	zfs_handle_t *zhp;
2357 	char errbuf[1024];
2358 
2359 	(void) snprintf(errbuf, sizeof (errbuf), "cannot create '%s'",
2360 	    path);
2361 
2362 	/* get parent, and check to see if this is just a pool */
2363 	if (parent_name(path, parent, sizeof (parent)) != 0) {
2364 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2365 		    "missing dataset name"));
2366 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2367 	}
2368 
2369 	/* check to see if the pool exists */
2370 	if ((slash = strchr(parent, '/')) == NULL)
2371 		slash = parent + strlen(parent);
2372 	(void) strncpy(zc.zc_name, parent, slash - parent);
2373 	zc.zc_name[slash - parent] = '\0';
2374 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 &&
2375 	    errno == ENOENT) {
2376 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2377 		    "no such pool '%s'"), zc.zc_name);
2378 		return (zfs_error(hdl, EZFS_NOENT, errbuf));
2379 	}
2380 
2381 	/* check to see if the parent dataset exists */
2382 	while ((zhp = make_dataset_handle(hdl, parent)) == NULL) {
2383 		if (errno == ENOENT && accept_ancestor) {
2384 			/*
2385 			 * Go deeper to find an ancestor, give up on top level.
2386 			 */
2387 			if (parent_name(parent, parent, sizeof (parent)) != 0) {
2388 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2389 				    "no such pool '%s'"), zc.zc_name);
2390 				return (zfs_error(hdl, EZFS_NOENT, errbuf));
2391 			}
2392 		} else if (errno == ENOENT) {
2393 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2394 			    "parent does not exist"));
2395 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
2396 		} else
2397 			return (zfs_standard_error(hdl, errno, errbuf));
2398 	}
2399 
2400 	*zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
2401 	/* we are in a non-global zone, but parent is in the global zone */
2402 	if (getzoneid() != GLOBAL_ZONEID && !(*zoned)) {
2403 		(void) zfs_standard_error(hdl, EPERM, errbuf);
2404 		zfs_close(zhp);
2405 		return (-1);
2406 	}
2407 
2408 	/* make sure parent is a filesystem */
2409 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
2410 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2411 		    "parent is not a filesystem"));
2412 		(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
2413 		zfs_close(zhp);
2414 		return (-1);
2415 	}
2416 
2417 	zfs_close(zhp);
2418 	if (prefixlen != NULL)
2419 		*prefixlen = strlen(parent);
2420 	return (0);
2421 }
2422 
2423 /*
2424  * Finds whether the dataset of the given type(s) exists.
2425  */
2426 boolean_t
2427 zfs_dataset_exists(libzfs_handle_t *hdl, const char *path, zfs_type_t types)
2428 {
2429 	zfs_handle_t *zhp;
2430 
2431 	if (!zfs_validate_name(hdl, path, types))
2432 		return (B_FALSE);
2433 
2434 	/*
2435 	 * Try to get stats for the dataset, which will tell us if it exists.
2436 	 */
2437 	if ((zhp = make_dataset_handle(hdl, path)) != NULL) {
2438 		int ds_type = zhp->zfs_type;
2439 
2440 		zfs_close(zhp);
2441 		if (types & ds_type)
2442 			return (B_TRUE);
2443 	}
2444 	return (B_FALSE);
2445 }
2446 
2447 /*
2448  * Creates non-existing ancestors of the given path.
2449  */
2450 int
2451 zfs_create_ancestors(libzfs_handle_t *hdl, const char *path)
2452 {
2453 	int prefix;
2454 	uint64_t zoned;
2455 	char *path_copy;
2456 	int rc;
2457 
2458 	if (check_parents(hdl, path, &zoned, B_TRUE, &prefix) != 0)
2459 		return (-1);
2460 
2461 	if ((path_copy = strdup(path)) != NULL) {
2462 		rc = create_parents(hdl, path_copy, prefix);
2463 		free(path_copy);
2464 	}
2465 	if (path_copy == NULL || rc != 0)
2466 		return (-1);
2467 
2468 	return (0);
2469 }
2470 
2471 /*
2472  * Create a new filesystem or volume.
2473  */
2474 int
2475 zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
2476     nvlist_t *props)
2477 {
2478 	zfs_cmd_t zc = { 0 };
2479 	int ret;
2480 	uint64_t size = 0;
2481 	uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
2482 	char errbuf[1024];
2483 	uint64_t zoned;
2484 
2485 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2486 	    "cannot create '%s'"), path);
2487 
2488 	/* validate the path, taking care to note the extended error message */
2489 	if (!zfs_validate_name(hdl, path, type))
2490 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2491 
2492 	/* validate parents exist */
2493 	if (check_parents(hdl, path, &zoned, B_FALSE, NULL) != 0)
2494 		return (-1);
2495 
2496 	/*
2497 	 * The failure modes when creating a dataset of a different type over
2498 	 * one that already exists is a little strange.  In particular, if you
2499 	 * try to create a dataset on top of an existing dataset, the ioctl()
2500 	 * will return ENOENT, not EEXIST.  To prevent this from happening, we
2501 	 * first try to see if the dataset exists.
2502 	 */
2503 	(void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name));
2504 	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2505 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2506 		    "dataset already exists"));
2507 		return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2508 	}
2509 
2510 	if (type == ZFS_TYPE_VOLUME)
2511 		zc.zc_objset_type = DMU_OST_ZVOL;
2512 	else
2513 		zc.zc_objset_type = DMU_OST_ZFS;
2514 
2515 	if (props && (props = zfs_validate_properties(hdl, type, props,
2516 	    zoned, NULL, errbuf)) == 0)
2517 		return (-1);
2518 
2519 	if (type == ZFS_TYPE_VOLUME) {
2520 		/*
2521 		 * If we are creating a volume, the size and block size must
2522 		 * satisfy a few restraints.  First, the blocksize must be a
2523 		 * valid block size between SPA_{MIN,MAX}BLOCKSIZE.  Second, the
2524 		 * volsize must be a multiple of the block size, and cannot be
2525 		 * zero.
2526 		 */
2527 		if (props == NULL || nvlist_lookup_uint64(props,
2528 		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &size) != 0) {
2529 			nvlist_free(props);
2530 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2531 			    "missing volume size"));
2532 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
2533 		}
2534 
2535 		if ((ret = nvlist_lookup_uint64(props,
2536 		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
2537 		    &blocksize)) != 0) {
2538 			if (ret == ENOENT) {
2539 				blocksize = zfs_prop_default_numeric(
2540 				    ZFS_PROP_VOLBLOCKSIZE);
2541 			} else {
2542 				nvlist_free(props);
2543 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2544 				    "missing volume block size"));
2545 				return (zfs_error(hdl, EZFS_BADPROP, errbuf));
2546 			}
2547 		}
2548 
2549 		if (size == 0) {
2550 			nvlist_free(props);
2551 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2552 			    "volume size cannot be zero"));
2553 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
2554 		}
2555 
2556 		if (size % blocksize != 0) {
2557 			nvlist_free(props);
2558 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2559 			    "volume size must be a multiple of volume block "
2560 			    "size"));
2561 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
2562 		}
2563 	}
2564 
2565 	if (props && zcmd_write_src_nvlist(hdl, &zc, props) != 0)
2566 		return (-1);
2567 	nvlist_free(props);
2568 
2569 	/* create the dataset */
2570 	ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc);
2571 
2572 	if (ret == 0 && type == ZFS_TYPE_VOLUME) {
2573 		ret = zvol_create_link(hdl, path);
2574 		if (ret) {
2575 			(void) zfs_standard_error(hdl, errno,
2576 			    dgettext(TEXT_DOMAIN,
2577 			    "Volume successfully created, but device links "
2578 			    "were not created"));
2579 			zcmd_free_nvlists(&zc);
2580 			return (-1);
2581 		}
2582 	}
2583 
2584 	zcmd_free_nvlists(&zc);
2585 
2586 	/* check for failure */
2587 	if (ret != 0) {
2588 		char parent[ZFS_MAXNAMELEN];
2589 		(void) parent_name(path, parent, sizeof (parent));
2590 
2591 		switch (errno) {
2592 		case ENOENT:
2593 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2594 			    "no such parent '%s'"), parent);
2595 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
2596 
2597 		case EINVAL:
2598 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2599 			    "parent '%s' is not a filesystem"), parent);
2600 			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
2601 
2602 		case EDOM:
2603 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2604 			    "volume block size must be power of 2 from "
2605 			    "%u to %uk"),
2606 			    (uint_t)SPA_MINBLOCKSIZE,
2607 			    (uint_t)SPA_MAXBLOCKSIZE >> 10);
2608 
2609 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
2610 
2611 		case ENOTSUP:
2612 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2613 			    "pool must be upgraded to set this "
2614 			    "property or value"));
2615 			return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
2616 
2617 #ifdef _ILP32
2618 		case EOVERFLOW:
2619 			/*
2620 			 * This platform can't address a volume this big.
2621 			 */
2622 			if (type == ZFS_TYPE_VOLUME)
2623 				return (zfs_error(hdl, EZFS_VOLTOOBIG,
2624 				    errbuf));
2625 #endif
2626 			/* FALLTHROUGH */
2627 		default:
2628 			return (zfs_standard_error(hdl, errno, errbuf));
2629 		}
2630 	}
2631 
2632 	return (0);
2633 }
2634 
2635 /*
2636  * Destroys the given dataset.  The caller must make sure that the filesystem
2637  * isn't mounted, and that there are no active dependents.
2638  */
2639 int
2640 zfs_destroy(zfs_handle_t *zhp)
2641 {
2642 	zfs_cmd_t zc = { 0 };
2643 
2644 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
2645 
2646 	if (ZFS_IS_VOLUME(zhp)) {
2647 		/*
2648 		 * If user doesn't have permissions to unshare volume, then
2649 		 * abort the request.  This would only happen for a
2650 		 * non-privileged user.
2651 		 */
2652 		if (zfs_unshare_iscsi(zhp) != 0) {
2653 			return (-1);
2654 		}
2655 
2656 		if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
2657 			return (-1);
2658 
2659 		zc.zc_objset_type = DMU_OST_ZVOL;
2660 	} else {
2661 		zc.zc_objset_type = DMU_OST_ZFS;
2662 	}
2663 
2664 	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY, &zc) != 0) {
2665 		return (zfs_standard_error_fmt(zhp->zfs_hdl, errno,
2666 		    dgettext(TEXT_DOMAIN, "cannot destroy '%s'"),
2667 		    zhp->zfs_name));
2668 	}
2669 
2670 	remove_mountpoint(zhp);
2671 
2672 	return (0);
2673 }
2674 
2675 struct destroydata {
2676 	char *snapname;
2677 	boolean_t gotone;
2678 	boolean_t closezhp;
2679 };
2680 
2681 static int
2682 zfs_remove_link_cb(zfs_handle_t *zhp, void *arg)
2683 {
2684 	struct destroydata *dd = arg;
2685 	zfs_handle_t *szhp;
2686 	char name[ZFS_MAXNAMELEN];
2687 	boolean_t closezhp = dd->closezhp;
2688 	int rv;
2689 
2690 	(void) strlcpy(name, zhp->zfs_name, sizeof (name));
2691 	(void) strlcat(name, "@", sizeof (name));
2692 	(void) strlcat(name, dd->snapname, sizeof (name));
2693 
2694 	szhp = make_dataset_handle(zhp->zfs_hdl, name);
2695 	if (szhp) {
2696 		dd->gotone = B_TRUE;
2697 		zfs_close(szhp);
2698 	}
2699 
2700 	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
2701 		(void) zvol_remove_link(zhp->zfs_hdl, name);
2702 		/*
2703 		 * NB: this is simply a best-effort.  We don't want to
2704 		 * return an error, because then we wouldn't visit all
2705 		 * the volumes.
2706 		 */
2707 	}
2708 
2709 	dd->closezhp = B_TRUE;
2710 	rv = zfs_iter_filesystems(zhp, zfs_remove_link_cb, arg);
2711 	if (closezhp)
2712 		zfs_close(zhp);
2713 	return (rv);
2714 }
2715 
2716 /*
2717  * Destroys all snapshots with the given name in zhp & descendants.
2718  */
2719 int
2720 zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname)
2721 {
2722 	zfs_cmd_t zc = { 0 };
2723 	int ret;
2724 	struct destroydata dd = { 0 };
2725 
2726 	dd.snapname = snapname;
2727 	(void) zfs_remove_link_cb(zhp, &dd);
2728 
2729 	if (!dd.gotone) {
2730 		return (zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT,
2731 		    dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
2732 		    zhp->zfs_name, snapname));
2733 	}
2734 
2735 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
2736 	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
2737 
2738 	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS, &zc);
2739 	if (ret != 0) {
2740 		char errbuf[1024];
2741 
2742 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2743 		    "cannot destroy '%s@%s'"), zc.zc_name, snapname);
2744 
2745 		switch (errno) {
2746 		case EEXIST:
2747 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
2748 			    "snapshot is cloned"));
2749 			return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf));
2750 
2751 		default:
2752 			return (zfs_standard_error(zhp->zfs_hdl, errno,
2753 			    errbuf));
2754 		}
2755 	}
2756 
2757 	return (0);
2758 }
2759 
2760 /*
2761  * Clones the given dataset.  The target must be of the same type as the source.
2762  */
2763 int
2764 zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
2765 {
2766 	zfs_cmd_t zc = { 0 };
2767 	char parent[ZFS_MAXNAMELEN];
2768 	int ret;
2769 	char errbuf[1024];
2770 	libzfs_handle_t *hdl = zhp->zfs_hdl;
2771 	zfs_type_t type;
2772 	uint64_t zoned;
2773 
2774 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
2775 
2776 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2777 	    "cannot create '%s'"), target);
2778 
2779 	/* validate the target name */
2780 	if (!zfs_validate_name(hdl, target, ZFS_TYPE_FILESYSTEM))
2781 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2782 
2783 	/* validate parents exist */
2784 	if (check_parents(hdl, target, &zoned, B_FALSE, NULL) != 0)
2785 		return (-1);
2786 
2787 	(void) parent_name(target, parent, sizeof (parent));
2788 
2789 	/* do the clone */
2790 	if (ZFS_IS_VOLUME(zhp)) {
2791 		zc.zc_objset_type = DMU_OST_ZVOL;
2792 		type = ZFS_TYPE_VOLUME;
2793 	} else {
2794 		zc.zc_objset_type = DMU_OST_ZFS;
2795 		type = ZFS_TYPE_FILESYSTEM;
2796 	}
2797 
2798 	if (props) {
2799 		if ((props = zfs_validate_properties(hdl, type, props,
2800 		    zoned, zhp, errbuf)) == NULL)
2801 			return (-1);
2802 
2803 		if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
2804 			nvlist_free(props);
2805 			return (-1);
2806 		}
2807 
2808 		nvlist_free(props);
2809 	}
2810 
2811 	(void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name));
2812 	(void) strlcpy(zc.zc_value, zhp->zfs_name, sizeof (zc.zc_value));
2813 	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_CREATE, &zc);
2814 
2815 	zcmd_free_nvlists(&zc);
2816 
2817 	if (ret != 0) {
2818 		switch (errno) {
2819 
2820 		case ENOENT:
2821 			/*
2822 			 * The parent doesn't exist.  We should have caught this
2823 			 * above, but there may a race condition that has since
2824 			 * destroyed the parent.
2825 			 *
2826 			 * At this point, we don't know whether it's the source
2827 			 * that doesn't exist anymore, or whether the target
2828 			 * dataset doesn't exist.
2829 			 */
2830 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
2831 			    "no such parent '%s'"), parent);
2832 			return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
2833 
2834 		case EXDEV:
2835 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
2836 			    "source and target pools differ"));
2837 			return (zfs_error(zhp->zfs_hdl, EZFS_CROSSTARGET,
2838 			    errbuf));
2839 
2840 		default:
2841 			return (zfs_standard_error(zhp->zfs_hdl, errno,
2842 			    errbuf));
2843 		}
2844 	} else if (ZFS_IS_VOLUME(zhp)) {
2845 		ret = zvol_create_link(zhp->zfs_hdl, target);
2846 	}
2847 
2848 	return (ret);
2849 }
2850 
2851 typedef struct promote_data {
2852 	char cb_mountpoint[MAXPATHLEN];
2853 	const char *cb_target;
2854 	const char *cb_errbuf;
2855 	uint64_t cb_pivot_txg;
2856 } promote_data_t;
2857 
2858 static int
2859 promote_snap_cb(zfs_handle_t *zhp, void *data)
2860 {
2861 	promote_data_t *pd = data;
2862 	zfs_handle_t *szhp;
2863 	char snapname[MAXPATHLEN];
2864 	int rv = 0;
2865 
2866 	/* We don't care about snapshots after the pivot point */
2867 	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg) {
2868 		zfs_close(zhp);
2869 		return (0);
2870 	}
2871 
2872 	/* Remove the device link if it's a zvol. */
2873 	if (ZFS_IS_VOLUME(zhp))
2874 		(void) zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name);
2875 
2876 	/* Check for conflicting names */
2877 	(void) strlcpy(snapname, pd->cb_target, sizeof (snapname));
2878 	(void) strlcat(snapname, strchr(zhp->zfs_name, '@'), sizeof (snapname));
2879 	szhp = make_dataset_handle(zhp->zfs_hdl, snapname);
2880 	if (szhp != NULL) {
2881 		zfs_close(szhp);
2882 		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
2883 		    "snapshot name '%s' from origin \n"
2884 		    "conflicts with '%s' from target"),
2885 		    zhp->zfs_name, snapname);
2886 		rv = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf);
2887 	}
2888 	zfs_close(zhp);
2889 	return (rv);
2890 }
2891 
2892 static int
2893 promote_snap_done_cb(zfs_handle_t *zhp, void *data)
2894 {
2895 	promote_data_t *pd = data;
2896 
2897 	/* We don't care about snapshots after the pivot point */
2898 	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) <= pd->cb_pivot_txg) {
2899 		/* Create the device link if it's a zvol. */
2900 		if (ZFS_IS_VOLUME(zhp))
2901 			(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
2902 	}
2903 
2904 	zfs_close(zhp);
2905 	return (0);
2906 }
2907 
2908 /*
2909  * Promotes the given clone fs to be the clone parent.
2910  */
2911 int
2912 zfs_promote(zfs_handle_t *zhp)
2913 {
2914 	libzfs_handle_t *hdl = zhp->zfs_hdl;
2915 	zfs_cmd_t zc = { 0 };
2916 	char parent[MAXPATHLEN];
2917 	char *cp;
2918 	int ret;
2919 	zfs_handle_t *pzhp;
2920 	promote_data_t pd;
2921 	char errbuf[1024];
2922 
2923 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2924 	    "cannot promote '%s'"), zhp->zfs_name);
2925 
2926 	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
2927 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2928 		    "snapshots can not be promoted"));
2929 		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
2930 	}
2931 
2932 	(void) strlcpy(parent, zhp->zfs_dmustats.dds_clone_of, sizeof (parent));
2933 	if (parent[0] == '\0') {
2934 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2935 		    "not a cloned filesystem"));
2936 		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
2937 	}
2938 	cp = strchr(parent, '@');
2939 	*cp = '\0';
2940 
2941 	/* Walk the snapshots we will be moving */
2942 	pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_clone_of, ZFS_TYPE_SNAPSHOT);
2943 	if (pzhp == NULL)
2944 		return (-1);
2945 	pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG);
2946 	zfs_close(pzhp);
2947 	pd.cb_target = zhp->zfs_name;
2948 	pd.cb_errbuf = errbuf;
2949 	pzhp = zfs_open(hdl, parent, ZFS_TYPE_DATASET);
2950 	if (pzhp == NULL)
2951 		return (-1);
2952 	(void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint,
2953 	    sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE);
2954 	ret = zfs_iter_snapshots(pzhp, promote_snap_cb, &pd);
2955 	if (ret != 0) {
2956 		zfs_close(pzhp);
2957 		return (-1);
2958 	}
2959 
2960 	/* issue the ioctl */
2961 	(void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_clone_of,
2962 	    sizeof (zc.zc_value));
2963 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
2964 	ret = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2965 
2966 	if (ret != 0) {
2967 		int save_errno = errno;
2968 
2969 		(void) zfs_iter_snapshots(pzhp, promote_snap_done_cb, &pd);
2970 		zfs_close(pzhp);
2971 
2972 		switch (save_errno) {
2973 		case EEXIST:
2974 			/*
2975 			 * There is a conflicting snapshot name.  We
2976 			 * should have caught this above, but they could
2977 			 * have renamed something in the mean time.
2978 			 */
2979 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2980 			    "conflicting snapshot name from parent '%s'"),
2981 			    parent);
2982 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2983 
2984 		default:
2985 			return (zfs_standard_error(hdl, save_errno, errbuf));
2986 		}
2987 	} else {
2988 		(void) zfs_iter_snapshots(zhp, promote_snap_done_cb, &pd);
2989 	}
2990 
2991 	zfs_close(pzhp);
2992 	return (ret);
2993 }
2994 
2995 struct createdata {
2996 	const char *cd_snapname;
2997 	int cd_ifexists;
2998 };
2999 
3000 static int
3001 zfs_create_link_cb(zfs_handle_t *zhp, void *arg)
3002 {
3003 	struct createdata *cd = arg;
3004 	int ret;
3005 
3006 	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
3007 		char name[MAXPATHLEN];
3008 
3009 		(void) strlcpy(name, zhp->zfs_name, sizeof (name));
3010 		(void) strlcat(name, "@", sizeof (name));
3011 		(void) strlcat(name, cd->cd_snapname, sizeof (name));
3012 		(void) zvol_create_link_common(zhp->zfs_hdl, name,
3013 		    cd->cd_ifexists);
3014 		/*
3015 		 * NB: this is simply a best-effort.  We don't want to
3016 		 * return an error, because then we wouldn't visit all
3017 		 * the volumes.
3018 		 */
3019 	}
3020 
3021 	ret = zfs_iter_filesystems(zhp, zfs_create_link_cb, cd);
3022 
3023 	zfs_close(zhp);
3024 
3025 	return (ret);
3026 }
3027 
3028 /*
3029  * Takes a snapshot of the given dataset.
3030  */
3031 int
3032 zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive)
3033 {
3034 	const char *delim;
3035 	char *parent;
3036 	zfs_handle_t *zhp;
3037 	zfs_cmd_t zc = { 0 };
3038 	int ret;
3039 	char errbuf[1024];
3040 
3041 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3042 	    "cannot snapshot '%s'"), path);
3043 
3044 	/* validate the target name */
3045 	if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT))
3046 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3047 
3048 	/* make sure the parent exists and is of the appropriate type */
3049 	delim = strchr(path, '@');
3050 	if ((parent = zfs_alloc(hdl, delim - path + 1)) == NULL)
3051 		return (-1);
3052 	(void) strncpy(parent, path, delim - path);
3053 	parent[delim - path] = '\0';
3054 
3055 	if ((zhp = zfs_open(hdl, parent, ZFS_TYPE_FILESYSTEM |
3056 	    ZFS_TYPE_VOLUME)) == NULL) {
3057 		free(parent);
3058 		return (-1);
3059 	}
3060 
3061 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
3062 	(void) strlcpy(zc.zc_value, delim+1, sizeof (zc.zc_value));
3063 	if (ZFS_IS_VOLUME(zhp))
3064 		zc.zc_objset_type = DMU_OST_ZVOL;
3065 	else
3066 		zc.zc_objset_type = DMU_OST_ZFS;
3067 	zc.zc_cookie = recursive;
3068 	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SNAPSHOT, &zc);
3069 
3070 	/*
3071 	 * if it was recursive, the one that actually failed will be in
3072 	 * zc.zc_name.
3073 	 */
3074 	if (ret != 0)
3075 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3076 		    "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value);
3077 
3078 	if (ret == 0 && recursive) {
3079 		struct createdata cd;
3080 
3081 		cd.cd_snapname = delim + 1;
3082 		cd.cd_ifexists = B_FALSE;
3083 		(void) zfs_iter_filesystems(zhp, zfs_create_link_cb, &cd);
3084 	}
3085 	if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) {
3086 		ret = zvol_create_link(zhp->zfs_hdl, path);
3087 		if (ret != 0) {
3088 			(void) zfs_standard_error(hdl, errno,
3089 			    dgettext(TEXT_DOMAIN,
3090 			    "Volume successfully snapshotted, but device links "
3091 			    "were not created"));
3092 			free(parent);
3093 			zfs_close(zhp);
3094 			return (-1);
3095 		}
3096 	}
3097 
3098 	if (ret != 0)
3099 		(void) zfs_standard_error(hdl, errno, errbuf);
3100 
3101 	free(parent);
3102 	zfs_close(zhp);
3103 
3104 	return (ret);
3105 }
3106 
3107 /*
3108  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
3109  * NULL) to the file descriptor specified by outfd.
3110  */
3111 int
3112 zfs_send(zfs_handle_t *zhp, const char *fromsnap, int outfd)
3113 {
3114 	zfs_cmd_t zc = { 0 };
3115 	char errbuf[1024];
3116 	libzfs_handle_t *hdl = zhp->zfs_hdl;
3117 
3118 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
3119 
3120 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
3121 	if (fromsnap)
3122 		(void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_name));
3123 	zc.zc_cookie = outfd;
3124 
3125 	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SENDBACKUP, &zc) != 0) {
3126 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3127 		    "cannot send '%s'"), zhp->zfs_name);
3128 
3129 		switch (errno) {
3130 
3131 		case EXDEV:
3132 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3133 			    "not an earlier snapshot from the same fs"));
3134 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
3135 
3136 		case EDQUOT:
3137 		case EFBIG:
3138 		case EIO:
3139 		case ENOLINK:
3140 		case ENOSPC:
3141 		case ENOSTR:
3142 		case ENXIO:
3143 		case EPIPE:
3144 		case ERANGE:
3145 		case EFAULT:
3146 		case EROFS:
3147 			zfs_error_aux(hdl, strerror(errno));
3148 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
3149 
3150 		default:
3151 			return (zfs_standard_error(hdl, errno, errbuf));
3152 		}
3153 	}
3154 
3155 	return (0);
3156 }
3157 
3158 /*
3159  * Create ancestors of 'target', but not target itself, and not
3160  * ancestors whose names are shorter than prefixlen.  Die if
3161  * prefixlen-ancestor does not exist.
3162  */
3163 static int
3164 create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
3165 {
3166 	zfs_handle_t *h;
3167 	char *cp;
3168 
3169 	/* make sure prefix exists */
3170 	cp = strchr(target + prefixlen, '/');
3171 	if (cp == NULL) {
3172 		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
3173 	} else {
3174 		*cp = '\0';
3175 		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
3176 		*cp = '/';
3177 	}
3178 	if (h == NULL)
3179 		return (-1);
3180 	zfs_close(h);
3181 
3182 	/*
3183 	 * Attempt to create, mount, and share any ancestor filesystems,
3184 	 * up to the prefixlen-long one.
3185 	 */
3186 	for (cp = target + prefixlen + 1;
3187 	    cp = strchr(cp, '/'); *cp = '/', cp++) {
3188 		const char *opname;
3189 		char *logstr;
3190 
3191 		*cp = '\0';
3192 
3193 		h = make_dataset_handle(hdl, target);
3194 		if (h) {
3195 			/* it already exists, nothing to do here */
3196 			zfs_close(h);
3197 			continue;
3198 		}
3199 
3200 		opname = dgettext(TEXT_DOMAIN, "create");
3201 		logstr = hdl->libzfs_log_str;
3202 		hdl->libzfs_log_str = NULL;
3203 		if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM,
3204 		    NULL) != 0) {
3205 			hdl->libzfs_log_str = logstr;
3206 			goto ancestorerr;
3207 		}
3208 
3209 		hdl->libzfs_log_str = logstr;
3210 		opname = dgettext(TEXT_DOMAIN, "open");
3211 		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
3212 		if (h == NULL)
3213 			goto ancestorerr;
3214 
3215 		opname = dgettext(TEXT_DOMAIN, "mount");
3216 		if (zfs_mount(h, NULL, 0) != 0)
3217 			goto ancestorerr;
3218 
3219 		opname = dgettext(TEXT_DOMAIN, "share");
3220 		if (zfs_share(h) != 0)
3221 			goto ancestorerr;
3222 
3223 		zfs_close(h);
3224 
3225 		continue;
3226 ancestorerr:
3227 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3228 		    "failed to %s ancestor '%s'"), opname, target);
3229 		return (-1);
3230 	}
3231 
3232 	return (0);
3233 }
3234 
3235 /*
3236  * Restores a backup of tosnap from the file descriptor specified by infd.
3237  */
3238 int
3239 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, int isprefix,
3240     int verbose, int dryrun, boolean_t force, int infd)
3241 {
3242 	zfs_cmd_t zc = { 0 };
3243 	time_t begin_time;
3244 	int ioctl_err, err, bytes, size, choplen;
3245 	char *cp;
3246 	dmu_replay_record_t drr;
3247 	struct drr_begin *drrb = &zc.zc_begin_record;
3248 	char errbuf[1024];
3249 	prop_changelist_t *clp;
3250 	char chopprefix[ZFS_MAXNAMELEN];
3251 
3252 	begin_time = time(NULL);
3253 
3254 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3255 	    "cannot receive"));
3256 
3257 	/* read in the BEGIN record */
3258 	cp = (char *)&drr;
3259 	bytes = 0;
3260 	do {
3261 		size = read(infd, cp, sizeof (drr) - bytes);
3262 		cp += size;
3263 		bytes += size;
3264 	} while (size > 0);
3265 
3266 	if (size < 0 || bytes != sizeof (drr)) {
3267 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3268 		    "stream (failed to read first record)"));
3269 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3270 	}
3271 
3272 	zc.zc_begin_record = drr.drr_u.drr_begin;
3273 
3274 	if (drrb->drr_magic != DMU_BACKUP_MAGIC &&
3275 	    drrb->drr_magic != BSWAP_64(DMU_BACKUP_MAGIC)) {
3276 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3277 		    "stream (bad magic number)"));
3278 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3279 	}
3280 
3281 	if (drrb->drr_version != DMU_BACKUP_VERSION &&
3282 	    drrb->drr_version != BSWAP_64(DMU_BACKUP_VERSION)) {
3283 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only version "
3284 		    "0x%llx is supported (stream is version 0x%llx)"),
3285 		    DMU_BACKUP_VERSION, drrb->drr_version);
3286 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3287 	}
3288 
3289 	if (strchr(drr.drr_u.drr_begin.drr_toname, '@') == NULL) {
3290 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3291 		    "stream (bad snapshot name)"));
3292 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3293 	}
3294 	/*
3295 	 * Determine how much of the snapshot name stored in the stream
3296 	 * we are going to tack on to the name they specified on the
3297 	 * command line, and how much we are going to chop off.
3298 	 *
3299 	 * If they specified a snapshot, chop the entire name stored in
3300 	 * the stream.
3301 	 */
3302 	(void) strcpy(chopprefix, drr.drr_u.drr_begin.drr_toname);
3303 	if (isprefix) {
3304 		/*
3305 		 * They specified a fs with -d, we want to tack on
3306 		 * everything but the pool name stored in the stream
3307 		 */
3308 		if (strchr(tosnap, '@')) {
3309 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3310 			    "argument - snapshot not allowed with -d"));
3311 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3312 		}
3313 		cp = strchr(chopprefix, '/');
3314 		if (cp == NULL)
3315 			cp = strchr(chopprefix, '@');
3316 		*cp = '\0';
3317 	} else if (strchr(tosnap, '@') == NULL) {
3318 		/*
3319 		 * If they specified a filesystem without -d, we want to
3320 		 * tack on everything after the fs specified in the
3321 		 * first name from the stream.
3322 		 */
3323 		cp = strchr(chopprefix, '@');
3324 		*cp = '\0';
3325 	}
3326 	choplen = strlen(chopprefix);
3327 
3328 	/*
3329 	 * Determine name of destination snapshot, store in zc_value.
3330 	 */
3331 	(void) strcpy(zc.zc_value, tosnap);
3332 	(void) strncat(zc.zc_value, drr.drr_u.drr_begin.drr_toname+choplen,
3333 	    sizeof (zc.zc_value));
3334 	if (!zfs_validate_name(hdl, zc.zc_value, ZFS_TYPE_SNAPSHOT))
3335 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3336 
3337 	(void) strcpy(zc.zc_name, zc.zc_value);
3338 	if (drrb->drr_fromguid) {
3339 		/* incremental backup stream */
3340 		zfs_handle_t *h;
3341 
3342 		/* do the recvbackup ioctl to the containing fs */
3343 		*strchr(zc.zc_name, '@') = '\0';
3344 
3345 		/* make sure destination fs exists */
3346 		h = zfs_open(hdl, zc.zc_name,
3347 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3348 		if (h == NULL)
3349 			return (-1);
3350 		if (!dryrun) {
3351 			/*
3352 			 * We need to unmount all the dependents of the dataset
3353 			 * and the dataset itself. If it's a volume
3354 			 * then remove device link.
3355 			 */
3356 			if (h->zfs_type == ZFS_TYPE_FILESYSTEM) {
3357 				clp = changelist_gather(h, ZFS_PROP_NAME, 0);
3358 				if (clp == NULL)
3359 					return (-1);
3360 				if (changelist_prefix(clp) != 0) {
3361 					changelist_free(clp);
3362 					return (-1);
3363 				}
3364 			} else {
3365 				if (zvol_remove_link(hdl, h->zfs_name) != 0) {
3366 					zfs_close(h);
3367 					return (-1);
3368 				}
3369 
3370 			}
3371 		}
3372 		zfs_close(h);
3373 	} else {
3374 		/* full backup stream */
3375 
3376 		/* Make sure destination fs does not exist */
3377 		*strchr(zc.zc_name, '@') = '\0';
3378 		if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
3379 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3380 			    "destination '%s' exists"), zc.zc_name);
3381 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
3382 		}
3383 
3384 		if (strchr(zc.zc_name, '/') == NULL) {
3385 			/*
3386 			 * they're trying to do a recv into a
3387 			 * nonexistant topmost filesystem.
3388 			 */
3389 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3390 			    "destination does not exist"), zc.zc_name);
3391 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
3392 		}
3393 
3394 		/* Do the recvbackup ioctl to the fs's parent. */
3395 		*strrchr(zc.zc_name, '/') = '\0';
3396 
3397 		if (isprefix && (err = create_parents(hdl,
3398 		    zc.zc_value, strlen(tosnap))) != 0) {
3399 			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
3400 		}
3401 
3402 	}
3403 
3404 	zc.zc_cookie = infd;
3405 	zc.zc_guid = force;
3406 	if (verbose) {
3407 		(void) printf("%s %s stream of %s into %s\n",
3408 		    dryrun ? "would receive" : "receiving",
3409 		    drrb->drr_fromguid ? "incremental" : "full",
3410 		    drr.drr_u.drr_begin.drr_toname,
3411 		    zc.zc_value);
3412 		(void) fflush(stdout);
3413 	}
3414 	if (dryrun)
3415 		return (0);
3416 	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECVBACKUP, &zc);
3417 	if (ioctl_err != 0) {
3418 		switch (errno) {
3419 		case ENODEV:
3420 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3421 			    "most recent snapshot does not match incremental "
3422 			    "source"));
3423 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3424 			break;
3425 		case ETXTBSY:
3426 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3427 			    "destination has been modified since most recent "
3428 			    "snapshot"));
3429 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3430 			break;
3431 		case EEXIST:
3432 			if (drrb->drr_fromguid == 0) {
3433 				/* it's the containing fs that exists */
3434 				cp = strchr(zc.zc_value, '@');
3435 				*cp = '\0';
3436 			}
3437 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3438 			    "destination already exists"));
3439 			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
3440 			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
3441 			    zc.zc_value);
3442 			break;
3443 		case EINVAL:
3444 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3445 			break;
3446 		case ECKSUM:
3447 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3448 			    "invalid stream (checksum mismatch)"));
3449 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3450 			break;
3451 		default:
3452 			(void) zfs_standard_error(hdl, errno, errbuf);
3453 		}
3454 	}
3455 
3456 	/*
3457 	 * Mount or recreate the /dev links for the target filesystem
3458 	 * (if created, or if we tore them down to do an incremental
3459 	 * restore), and the /dev links for the new snapshot (if
3460 	 * created). Also mount any children of the target filesystem
3461 	 * if we did an incremental receive.
3462 	 */
3463 	cp = strchr(zc.zc_value, '@');
3464 	if (cp && (ioctl_err == 0 || drrb->drr_fromguid)) {
3465 		zfs_handle_t *h;
3466 
3467 		*cp = '\0';
3468 		h = zfs_open(hdl, zc.zc_value,
3469 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3470 		*cp = '@';
3471 		if (h) {
3472 			if (h->zfs_type == ZFS_TYPE_VOLUME) {
3473 				err = zvol_create_link(hdl, h->zfs_name);
3474 				if (err == 0 && ioctl_err == 0)
3475 					err = zvol_create_link(hdl,
3476 					    zc.zc_value);
3477 			} else {
3478 				if (drrb->drr_fromguid) {
3479 					err = changelist_postfix(clp);
3480 					changelist_free(clp);
3481 				} else {
3482 					err = zfs_mount(h, NULL, 0);
3483 				}
3484 			}
3485 		zfs_close(h);
3486 		}
3487 	}
3488 
3489 	if (err || ioctl_err)
3490 		return (-1);
3491 
3492 	if (verbose) {
3493 		char buf1[64];
3494 		char buf2[64];
3495 		uint64_t bytes = zc.zc_cookie;
3496 		time_t delta = time(NULL) - begin_time;
3497 		if (delta == 0)
3498 			delta = 1;
3499 		zfs_nicenum(bytes, buf1, sizeof (buf1));
3500 		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
3501 
3502 		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
3503 		    buf1, delta, buf2);
3504 	}
3505 
3506 	return (0);
3507 }
3508 
3509 /*
3510  * Destroy any more recent snapshots.  We invoke this callback on any dependents
3511  * of the snapshot first.  If the 'cb_dependent' member is non-zero, then this
3512  * is a dependent and we should just destroy it without checking the transaction
3513  * group.
3514  */
3515 typedef struct rollback_data {
3516 	const char	*cb_target;		/* the snapshot */
3517 	uint64_t	cb_create;		/* creation time reference */
3518 	prop_changelist_t *cb_clp;		/* changelist pointer */
3519 	int		cb_error;
3520 	boolean_t	cb_dependent;
3521 } rollback_data_t;
3522 
3523 static int
3524 rollback_destroy(zfs_handle_t *zhp, void *data)
3525 {
3526 	rollback_data_t *cbp = data;
3527 
3528 	if (!cbp->cb_dependent) {
3529 		if (strcmp(zhp->zfs_name, cbp->cb_target) != 0 &&
3530 		    zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
3531 		    zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) >
3532 		    cbp->cb_create) {
3533 			char *logstr;
3534 
3535 			cbp->cb_dependent = B_TRUE;
3536 			if (zfs_iter_dependents(zhp, B_FALSE, rollback_destroy,
3537 			    cbp) != 0)
3538 				cbp->cb_error = 1;
3539 			cbp->cb_dependent = B_FALSE;
3540 
3541 			logstr = zhp->zfs_hdl->libzfs_log_str;
3542 			zhp->zfs_hdl->libzfs_log_str = NULL;
3543 			if (zfs_destroy(zhp) != 0)
3544 				cbp->cb_error = 1;
3545 			else
3546 				changelist_remove(zhp, cbp->cb_clp);
3547 			zhp->zfs_hdl->libzfs_log_str = logstr;
3548 		}
3549 	} else {
3550 		if (zfs_destroy(zhp) != 0)
3551 			cbp->cb_error = 1;
3552 		else
3553 			changelist_remove(zhp, cbp->cb_clp);
3554 	}
3555 
3556 	zfs_close(zhp);
3557 	return (0);
3558 }
3559 
3560 /*
3561  * Rollback the dataset to its latest snapshot.
3562  */
3563 static int
3564 do_rollback(zfs_handle_t *zhp)
3565 {
3566 	int ret;
3567 	zfs_cmd_t zc = { 0 };
3568 
3569 	assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM ||
3570 	    zhp->zfs_type == ZFS_TYPE_VOLUME);
3571 
3572 	if (zhp->zfs_type == ZFS_TYPE_VOLUME &&
3573 	    zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
3574 		return (-1);
3575 
3576 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
3577 
3578 	if (ZFS_IS_VOLUME(zhp))
3579 		zc.zc_objset_type = DMU_OST_ZVOL;
3580 	else
3581 		zc.zc_objset_type = DMU_OST_ZFS;
3582 
3583 	/*
3584 	 * We rely on the consumer to verify that there are no newer snapshots
3585 	 * for the given dataset.  Given these constraints, we can simply pass
3586 	 * the name on to the ioctl() call.  There is still an unlikely race
3587 	 * condition where the user has taken a snapshot since we verified that
3588 	 * this was the most recent.
3589 	 */
3590 	if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_ROLLBACK, &zc)) != 0) {
3591 		(void) zfs_standard_error_fmt(zhp->zfs_hdl, errno,
3592 		    dgettext(TEXT_DOMAIN, "cannot rollback '%s'"),
3593 		    zhp->zfs_name);
3594 	} else if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
3595 		ret = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
3596 	}
3597 
3598 	return (ret);
3599 }
3600 
3601 /*
3602  * Given a dataset, rollback to a specific snapshot, discarding any
3603  * data changes since then and making it the active dataset.
3604  *
3605  * Any snapshots more recent than the target are destroyed, along with
3606  * their dependents.
3607  */
3608 int
3609 zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, int flag)
3610 {
3611 	int ret;
3612 	rollback_data_t cb = { 0 };
3613 	prop_changelist_t *clp;
3614 
3615 	/*
3616 	 * Unmount all dependendents of the dataset and the dataset itself.
3617 	 * The list we need to gather is the same as for doing rename
3618 	 */
3619 	clp = changelist_gather(zhp, ZFS_PROP_NAME, flag ? MS_FORCE: 0);
3620 	if (clp == NULL)
3621 		return (-1);
3622 
3623 	if ((ret = changelist_prefix(clp)) != 0)
3624 		goto out;
3625 
3626 	/*
3627 	 * Destroy all recent snapshots and its dependends.
3628 	 */
3629 	cb.cb_target = snap->zfs_name;
3630 	cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
3631 	cb.cb_clp = clp;
3632 	(void) zfs_iter_children(zhp, rollback_destroy, &cb);
3633 
3634 	if ((ret = cb.cb_error) != 0) {
3635 		(void) changelist_postfix(clp);
3636 		goto out;
3637 	}
3638 
3639 	/*
3640 	 * Now that we have verified that the snapshot is the latest,
3641 	 * rollback to the given snapshot.
3642 	 */
3643 	ret = do_rollback(zhp);
3644 
3645 	if (ret != 0) {
3646 		(void) changelist_postfix(clp);
3647 		goto out;
3648 	}
3649 
3650 	/*
3651 	 * We only want to re-mount the filesystem if it was mounted in the
3652 	 * first place.
3653 	 */
3654 	ret = changelist_postfix(clp);
3655 
3656 out:
3657 	changelist_free(clp);
3658 	return (ret);
3659 }
3660 
3661 /*
3662  * Iterate over all dependents for a given dataset.  This includes both
3663  * hierarchical dependents (children) and data dependents (snapshots and
3664  * clones).  The bulk of the processing occurs in get_dependents() in
3665  * libzfs_graph.c.
3666  */
3667 int
3668 zfs_iter_dependents(zfs_handle_t *zhp, boolean_t allowrecursion,
3669     zfs_iter_f func, void *data)
3670 {
3671 	char **dependents;
3672 	size_t count;
3673 	int i;
3674 	zfs_handle_t *child;
3675 	int ret = 0;
3676 
3677 	if (get_dependents(zhp->zfs_hdl, allowrecursion, zhp->zfs_name,
3678 	    &dependents, &count) != 0)
3679 		return (-1);
3680 
3681 	for (i = 0; i < count; i++) {
3682 		if ((child = make_dataset_handle(zhp->zfs_hdl,
3683 		    dependents[i])) == NULL)
3684 			continue;
3685 
3686 		if ((ret = func(child, data)) != 0)
3687 			break;
3688 	}
3689 
3690 	for (i = 0; i < count; i++)
3691 		free(dependents[i]);
3692 	free(dependents);
3693 
3694 	return (ret);
3695 }
3696 
3697 /*
3698  * Renames the given dataset.
3699  */
3700 int
3701 zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
3702 {
3703 	int ret;
3704 	zfs_cmd_t zc = { 0 };
3705 	char *delim;
3706 	prop_changelist_t *cl = NULL;
3707 	zfs_handle_t *zhrp = NULL;
3708 	char *parentname = NULL;
3709 	char parent[ZFS_MAXNAMELEN];
3710 	libzfs_handle_t *hdl = zhp->zfs_hdl;
3711 	char errbuf[1024];
3712 
3713 	/* if we have the same exact name, just return success */
3714 	if (strcmp(zhp->zfs_name, target) == 0)
3715 		return (0);
3716 
3717 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3718 	    "cannot rename to '%s'"), target);
3719 
3720 	/*
3721 	 * Make sure the target name is valid
3722 	 */
3723 	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
3724 		if ((strchr(target, '@') == NULL) ||
3725 		    *target == '@') {
3726 			/*
3727 			 * Snapshot target name is abbreviated,
3728 			 * reconstruct full dataset name
3729 			 */
3730 			(void) strlcpy(parent, zhp->zfs_name,
3731 			    sizeof (parent));
3732 			delim = strchr(parent, '@');
3733 			if (strchr(target, '@') == NULL)
3734 				*(++delim) = '\0';
3735 			else
3736 				*delim = '\0';
3737 			(void) strlcat(parent, target, sizeof (parent));
3738 			target = parent;
3739 		} else {
3740 			/*
3741 			 * Make sure we're renaming within the same dataset.
3742 			 */
3743 			delim = strchr(target, '@');
3744 			if (strncmp(zhp->zfs_name, target, delim - target)
3745 			    != 0 || zhp->zfs_name[delim - target] != '@') {
3746 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3747 				    "snapshots must be part of same "
3748 				    "dataset"));
3749 				return (zfs_error(hdl, EZFS_CROSSTARGET,
3750 				    errbuf));
3751 			}
3752 		}
3753 		if (!zfs_validate_name(hdl, target, zhp->zfs_type))
3754 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3755 	} else {
3756 		if (recursive) {
3757 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3758 			    "recursive rename must be a snapshot"));
3759 			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
3760 		}
3761 
3762 		if (!zfs_validate_name(hdl, target, zhp->zfs_type))
3763 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3764 		uint64_t unused;
3765 
3766 		/* validate parents */
3767 		if (check_parents(hdl, target, &unused, B_FALSE, NULL) != 0)
3768 			return (-1);
3769 
3770 		(void) parent_name(target, parent, sizeof (parent));
3771 
3772 		/* make sure we're in the same pool */
3773 		verify((delim = strchr(target, '/')) != NULL);
3774 		if (strncmp(zhp->zfs_name, target, delim - target) != 0 ||
3775 		    zhp->zfs_name[delim - target] != '/') {
3776 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3777 			    "datasets must be within same pool"));
3778 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
3779 		}
3780 
3781 		/* new name cannot be a child of the current dataset name */
3782 		if (strncmp(parent, zhp->zfs_name,
3783 		    strlen(zhp->zfs_name)) == 0) {
3784 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3785 			    "New dataset name cannot be a descendent of "
3786 			    "current dataset name"));
3787 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3788 		}
3789 	}
3790 
3791 	(void) snprintf(errbuf, sizeof (errbuf),
3792 	    dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zhp->zfs_name);
3793 
3794 	if (getzoneid() == GLOBAL_ZONEID &&
3795 	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
3796 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3797 		    "dataset is used in a non-global zone"));
3798 		return (zfs_error(hdl, EZFS_ZONED, errbuf));
3799 	}
3800 
3801 	if (recursive) {
3802 		struct destroydata dd;
3803 
3804 		parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name);
3805 		if (parentname == NULL) {
3806 			ret = -1;
3807 			goto error;
3808 		}
3809 		delim = strchr(parentname, '@');
3810 		*delim = '\0';
3811 		zhrp = zfs_open(zhp->zfs_hdl, parentname, ZFS_TYPE_DATASET);
3812 		if (zhrp == NULL) {
3813 			ret = -1;
3814 			goto error;
3815 		}
3816 
3817 		dd.snapname = delim + 1;
3818 		dd.gotone = B_FALSE;
3819 		dd.closezhp = B_TRUE;
3820 
3821 		/* We remove any zvol links prior to renaming them */
3822 		ret = zfs_iter_filesystems(zhrp, zfs_remove_link_cb, &dd);
3823 		if (ret) {
3824 			goto error;
3825 		}
3826 	} else {
3827 		if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0)) == NULL)
3828 			return (-1);
3829 
3830 		if (changelist_haszonedchild(cl)) {
3831 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3832 			    "child dataset with inherited mountpoint is used "
3833 			    "in a non-global zone"));
3834 			(void) zfs_error(hdl, EZFS_ZONED, errbuf);
3835 			goto error;
3836 		}
3837 
3838 		if ((ret = changelist_prefix(cl)) != 0)
3839 			goto error;
3840 	}
3841 
3842 	if (ZFS_IS_VOLUME(zhp))
3843 		zc.zc_objset_type = DMU_OST_ZVOL;
3844 	else
3845 		zc.zc_objset_type = DMU_OST_ZFS;
3846 
3847 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
3848 	(void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
3849 
3850 	zc.zc_cookie = recursive;
3851 
3852 	if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_RENAME, &zc)) != 0) {
3853 		/*
3854 		 * if it was recursive, the one that actually failed will
3855 		 * be in zc.zc_name
3856 		 */
3857 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3858 		    "cannot rename to '%s'"), zc.zc_name);
3859 
3860 		if (recursive && errno == EEXIST) {
3861 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3862 			    "a child dataset already has a snapshot "
3863 			    "with the new name"));
3864 			(void) zfs_error(hdl, EZFS_EXISTS, errbuf);
3865 		} else {
3866 			(void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf);
3867 		}
3868 
3869 		/*
3870 		 * On failure, we still want to remount any filesystems that
3871 		 * were previously mounted, so we don't alter the system state.
3872 		 */
3873 		if (recursive) {
3874 			struct createdata cd;
3875 
3876 			/* only create links for datasets that had existed */
3877 			cd.cd_snapname = delim + 1;
3878 			cd.cd_ifexists = B_TRUE;
3879 			(void) zfs_iter_filesystems(zhrp, zfs_create_link_cb,
3880 			    &cd);
3881 		} else {
3882 			(void) changelist_postfix(cl);
3883 		}
3884 	} else {
3885 		if (recursive) {
3886 			struct createdata cd;
3887 
3888 			/* only create links for datasets that had existed */
3889 			cd.cd_snapname = strchr(target, '@') + 1;
3890 			cd.cd_ifexists = B_TRUE;
3891 			ret = zfs_iter_filesystems(zhrp, zfs_create_link_cb,
3892 			    &cd);
3893 		} else {
3894 			changelist_rename(cl, zfs_get_name(zhp), target);
3895 			ret = changelist_postfix(cl);
3896 		}
3897 	}
3898 
3899 error:
3900 	if (parentname) {
3901 		free(parentname);
3902 	}
3903 	if (zhrp) {
3904 		zfs_close(zhrp);
3905 	}
3906 	if (cl) {
3907 		changelist_free(cl);
3908 	}
3909 	return (ret);
3910 }
3911 
3912 /*
3913  * Given a zvol dataset, issue the ioctl to create the appropriate minor node,
3914  * poke devfsadm to create the /dev link, and then wait for the link to appear.
3915  */
3916 int
3917 zvol_create_link(libzfs_handle_t *hdl, const char *dataset)
3918 {
3919 	return (zvol_create_link_common(hdl, dataset, B_FALSE));
3920 }
3921 
3922 static int
3923 zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists)
3924 {
3925 	zfs_cmd_t zc = { 0 };
3926 	di_devlink_handle_t dhdl;
3927 	priv_set_t *priv_effective;
3928 	int privileged;
3929 
3930 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
3931 
3932 	/*
3933 	 * Issue the appropriate ioctl.
3934 	 */
3935 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) {
3936 		switch (errno) {
3937 		case EEXIST:
3938 			/*
3939 			 * Silently ignore the case where the link already
3940 			 * exists.  This allows 'zfs volinit' to be run multiple
3941 			 * times without errors.
3942 			 */
3943 			return (0);
3944 
3945 		case ENOENT:
3946 			/*
3947 			 * Dataset does not exist in the kernel.  If we
3948 			 * don't care (see zfs_rename), then ignore the
3949 			 * error quietly.
3950 			 */
3951 			if (ifexists) {
3952 				return (0);
3953 			}
3954 
3955 			/* FALLTHROUGH */
3956 
3957 		default:
3958 			return (zfs_standard_error_fmt(hdl, errno,
3959 			    dgettext(TEXT_DOMAIN, "cannot create device links "
3960 			    "for '%s'"), dataset));
3961 		}
3962 	}
3963 
3964 	/*
3965 	 * If privileged call devfsadm and wait for the links to
3966 	 * magically appear.
3967 	 * Otherwise, print out an informational message.
3968 	 */
3969 
3970 	priv_effective = priv_allocset();
3971 	(void) getppriv(PRIV_EFFECTIVE, priv_effective);
3972 	privileged = (priv_isfullset(priv_effective) == B_TRUE);
3973 	priv_freeset(priv_effective);
3974 
3975 	if (privileged) {
3976 		if ((dhdl = di_devlink_init(ZFS_DRIVER,
3977 		    DI_MAKE_LINK)) == NULL) {
3978 			zfs_error_aux(hdl, strerror(errno));
3979 			(void) zfs_standard_error_fmt(hdl, EZFS_DEVLINKS,
3980 			    dgettext(TEXT_DOMAIN, "cannot create device links "
3981 			    "for '%s'"), dataset);
3982 			(void) ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc);
3983 			return (-1);
3984 		} else {
3985 			(void) di_devlink_fini(&dhdl);
3986 		}
3987 	} else {
3988 		char pathname[MAXPATHLEN];
3989 		struct stat64 statbuf;
3990 		int i;
3991 
3992 #define	MAX_WAIT	10
3993 
3994 		/*
3995 		 * This is the poor mans way of waiting for the link
3996 		 * to show up.  If after 10 seconds we still don't
3997 		 * have it, then print out a message.
3998 		 */
3999 		(void) snprintf(pathname, sizeof (pathname), "/dev/zvol/dsk/%s",
4000 		    dataset);
4001 
4002 		for (i = 0; i != MAX_WAIT; i++) {
4003 			if (stat64(pathname, &statbuf) == 0)
4004 				break;
4005 			(void) sleep(1);
4006 		}
4007 		if (i == MAX_WAIT)
4008 			(void) printf(gettext("%s may not be immediately "
4009 			    "available\n"), pathname);
4010 	}
4011 
4012 	return (0);
4013 }
4014 
4015 /*
4016  * Remove a minor node for the given zvol and the associated /dev links.
4017  */
4018 int
4019 zvol_remove_link(libzfs_handle_t *hdl, const char *dataset)
4020 {
4021 	zfs_cmd_t zc = { 0 };
4022 
4023 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
4024 
4025 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc) != 0) {
4026 		switch (errno) {
4027 		case ENXIO:
4028 			/*
4029 			 * Silently ignore the case where the link no longer
4030 			 * exists, so that 'zfs volfini' can be run multiple
4031 			 * times without errors.
4032 			 */
4033 			return (0);
4034 
4035 		default:
4036 			return (zfs_standard_error_fmt(hdl, errno,
4037 			    dgettext(TEXT_DOMAIN, "cannot remove device "
4038 			    "links for '%s'"), dataset));
4039 		}
4040 	}
4041 
4042 	return (0);
4043 }
4044 
4045 nvlist_t *
4046 zfs_get_user_props(zfs_handle_t *zhp)
4047 {
4048 	return (zhp->zfs_user_props);
4049 }
4050 
4051 /*
4052  * This function is used by 'zfs list' to determine the exact set of columns to
4053  * display, and their maximum widths.  This does two main things:
4054  *
4055  *      - If this is a list of all properties, then expand the list to include
4056  *        all native properties, and set a flag so that for each dataset we look
4057  *        for new unique user properties and add them to the list.
4058  *
4059  *      - For non fixed-width properties, keep track of the maximum width seen
4060  *        so that we can size the column appropriately.
4061  */
4062 int
4063 zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp)
4064 {
4065 	libzfs_handle_t *hdl = zhp->zfs_hdl;
4066 	zprop_list_t *entry;
4067 	zprop_list_t **last, **start;
4068 	nvlist_t *userprops, *propval;
4069 	nvpair_t *elem;
4070 	char *strval;
4071 	char buf[ZFS_MAXPROPLEN];
4072 
4073 	if (zprop_expand_list(hdl, plp, ZFS_TYPE_DATASET) != 0)
4074 		return (-1);
4075 
4076 	userprops = zfs_get_user_props(zhp);
4077 
4078 	entry = *plp;
4079 	if (entry->pl_all && nvlist_next_nvpair(userprops, NULL) != NULL) {
4080 		/*
4081 		 * Go through and add any user properties as necessary.  We
4082 		 * start by incrementing our list pointer to the first
4083 		 * non-native property.
4084 		 */
4085 		start = plp;
4086 		while (*start != NULL) {
4087 			if ((*start)->pl_prop == ZPROP_INVAL)
4088 				break;
4089 			start = &(*start)->pl_next;
4090 		}
4091 
4092 		elem = NULL;
4093 		while ((elem = nvlist_next_nvpair(userprops, elem)) != NULL) {
4094 			/*
4095 			 * See if we've already found this property in our list.
4096 			 */
4097 			for (last = start; *last != NULL;
4098 			    last = &(*last)->pl_next) {
4099 				if (strcmp((*last)->pl_user_prop,
4100 				    nvpair_name(elem)) == 0)
4101 					break;
4102 			}
4103 
4104 			if (*last == NULL) {
4105 				if ((entry = zfs_alloc(hdl,
4106 				    sizeof (zprop_list_t))) == NULL ||
4107 				    ((entry->pl_user_prop = zfs_strdup(hdl,
4108 				    nvpair_name(elem)))) == NULL) {
4109 					free(entry);
4110 					return (-1);
4111 				}
4112 
4113 				entry->pl_prop = ZPROP_INVAL;
4114 				entry->pl_width = strlen(nvpair_name(elem));
4115 				entry->pl_all = B_TRUE;
4116 				*last = entry;
4117 			}
4118 		}
4119 	}
4120 
4121 	/*
4122 	 * Now go through and check the width of any non-fixed columns
4123 	 */
4124 	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
4125 		if (entry->pl_fixed)
4126 			continue;
4127 
4128 		if (entry->pl_prop != ZPROP_INVAL) {
4129 			if (zfs_prop_get(zhp, entry->pl_prop,
4130 			    buf, sizeof (buf), NULL, NULL, 0, B_FALSE) == 0) {
4131 				if (strlen(buf) > entry->pl_width)
4132 					entry->pl_width = strlen(buf);
4133 			}
4134 		} else if (nvlist_lookup_nvlist(userprops,
4135 		    entry->pl_user_prop, &propval)  == 0) {
4136 			verify(nvlist_lookup_string(propval,
4137 			    ZPROP_VALUE, &strval) == 0);
4138 			if (strlen(strval) > entry->pl_width)
4139 				entry->pl_width = strlen(strval);
4140 		}
4141 	}
4142 
4143 	return (0);
4144 }
4145 
4146 int
4147 zfs_iscsi_perm_check(libzfs_handle_t *hdl, char *dataset, ucred_t *cred)
4148 {
4149 	zfs_cmd_t zc = { 0 };
4150 	nvlist_t *nvp;
4151 	gid_t gid;
4152 	uid_t uid;
4153 	const gid_t *groups;
4154 	int group_cnt;
4155 	int error;
4156 
4157 	if (nvlist_alloc(&nvp, NV_UNIQUE_NAME, 0) != 0)
4158 		return (no_memory(hdl));
4159 
4160 	uid = ucred_geteuid(cred);
4161 	gid = ucred_getegid(cred);
4162 	group_cnt = ucred_getgroups(cred, &groups);
4163 
4164 	if (uid == (uid_t)-1 || gid == (uid_t)-1 || group_cnt == (uid_t)-1)
4165 		return (1);
4166 
4167 	if (nvlist_add_uint32(nvp, ZFS_DELEG_PERM_UID, uid) != 0) {
4168 		nvlist_free(nvp);
4169 		return (1);
4170 	}
4171 
4172 	if (nvlist_add_uint32(nvp, ZFS_DELEG_PERM_GID, gid) != 0) {
4173 		nvlist_free(nvp);
4174 		return (1);
4175 	}
4176 
4177 	if (nvlist_add_uint32_array(nvp,
4178 	    ZFS_DELEG_PERM_GROUPS, (uint32_t *)groups, group_cnt) != 0) {
4179 		nvlist_free(nvp);
4180 		return (1);
4181 	}
4182 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
4183 
4184 	if (zcmd_write_src_nvlist(hdl, &zc, nvp))
4185 		return (-1);
4186 
4187 	error = ioctl(hdl->libzfs_fd, ZFS_IOC_ISCSI_PERM_CHECK, &zc);
4188 	nvlist_free(nvp);
4189 	return (error);
4190 }
4191 
4192 int
4193 zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path,
4194     void *export, void *sharetab, int sharemax, boolean_t share_on)
4195 {
4196 	zfs_cmd_t zc = { 0 };
4197 	int error;
4198 
4199 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
4200 	(void) strlcpy(zc.zc_value, path, sizeof (zc.zc_value));
4201 	zc.zc_share.z_sharedata = (uint64_t)(uintptr_t)sharetab;
4202 	zc.zc_share.z_exportdata = (uint64_t)(uintptr_t)export;
4203 	zc.zc_share.z_sharetype = share_on;
4204 	zc.zc_share.z_sharemax = sharemax;
4205 
4206 	error = ioctl(hdl->libzfs_fd, ZFS_IOC_SHARE, &zc);
4207 	return (error);
4208 }
4209