xref: /illumos-gate/usr/src/lib/libzfs/common/libzfs_dataset.c (revision da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <assert.h>
30 #include <ctype.h>
31 #include <errno.h>
32 #include <libdevinfo.h>
33 #include <libintl.h>
34 #include <math.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <strings.h>
38 #include <unistd.h>
39 #include <zone.h>
40 #include <fcntl.h>
41 #include <sys/mntent.h>
42 #include <sys/mnttab.h>
43 #include <sys/mount.h>
44 #include <sys/avl.h>
45 #include <priv.h>
46 #include <pwd.h>
47 #include <grp.h>
48 #include <stddef.h>
49 #include <ucred.h>
50 
51 #include <sys/spa.h>
52 #include <sys/zio.h>
53 #include <sys/zap.h>
54 #include <sys/zfs_i18n.h>
55 #include <libzfs.h>
56 
57 #include "zfs_namecheck.h"
58 #include "zfs_prop.h"
59 #include "libzfs_impl.h"
60 #include "zfs_deleg.h"
61 
62 static int create_parents(libzfs_handle_t *, char *, int);
63 static int zvol_create_link_common(libzfs_handle_t *, const char *, int);
64 
65 /*
66  * Given a single type (not a mask of types), return the type in a human
67  * readable form.
68  */
69 const char *
70 zfs_type_to_name(zfs_type_t type)
71 {
72 	switch (type) {
73 	case ZFS_TYPE_FILESYSTEM:
74 		return (dgettext(TEXT_DOMAIN, "filesystem"));
75 	case ZFS_TYPE_SNAPSHOT:
76 		return (dgettext(TEXT_DOMAIN, "snapshot"));
77 	case ZFS_TYPE_VOLUME:
78 		return (dgettext(TEXT_DOMAIN, "volume"));
79 	}
80 
81 	return (NULL);
82 }
83 
84 /*
85  * Given a path and mask of ZFS types, return a string describing this dataset.
86  * This is used when we fail to open a dataset and we cannot get an exact type.
87  * We guess what the type would have been based on the path and the mask of
88  * acceptable types.
89  */
90 static const char *
91 path_to_str(const char *path, int types)
92 {
93 	/*
94 	 * When given a single type, always report the exact type.
95 	 */
96 	if (types == ZFS_TYPE_SNAPSHOT)
97 		return (dgettext(TEXT_DOMAIN, "snapshot"));
98 	if (types == ZFS_TYPE_FILESYSTEM)
99 		return (dgettext(TEXT_DOMAIN, "filesystem"));
100 	if (types == ZFS_TYPE_VOLUME)
101 		return (dgettext(TEXT_DOMAIN, "volume"));
102 
103 	/*
104 	 * The user is requesting more than one type of dataset.  If this is the
105 	 * case, consult the path itself.  If we're looking for a snapshot, and
106 	 * a '@' is found, then report it as "snapshot".  Otherwise, remove the
107 	 * snapshot attribute and try again.
108 	 */
109 	if (types & ZFS_TYPE_SNAPSHOT) {
110 		if (strchr(path, '@') != NULL)
111 			return (dgettext(TEXT_DOMAIN, "snapshot"));
112 		return (path_to_str(path, types & ~ZFS_TYPE_SNAPSHOT));
113 	}
114 
115 
116 	/*
117 	 * The user has requested either filesystems or volumes.
118 	 * We have no way of knowing a priori what type this would be, so always
119 	 * report it as "filesystem" or "volume", our two primitive types.
120 	 */
121 	if (types & ZFS_TYPE_FILESYSTEM)
122 		return (dgettext(TEXT_DOMAIN, "filesystem"));
123 
124 	assert(types & ZFS_TYPE_VOLUME);
125 	return (dgettext(TEXT_DOMAIN, "volume"));
126 }
127 
128 /*
129  * Validate a ZFS path.  This is used even before trying to open the dataset, to
130  * provide a more meaningful error message.  We place a more useful message in
131  * 'buf' detailing exactly why the name was not valid.
132  */
133 static int
134 zfs_validate_name(libzfs_handle_t *hdl, const char *path, int type,
135     boolean_t modifying)
136 {
137 	namecheck_err_t why;
138 	char what;
139 
140 	if (dataset_namecheck(path, &why, &what) != 0) {
141 		if (hdl != NULL) {
142 			switch (why) {
143 			case NAME_ERR_TOOLONG:
144 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
145 				    "name is too long"));
146 				break;
147 
148 			case NAME_ERR_LEADING_SLASH:
149 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
150 				    "leading slash in name"));
151 				break;
152 
153 			case NAME_ERR_EMPTY_COMPONENT:
154 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
155 				    "empty component in name"));
156 				break;
157 
158 			case NAME_ERR_TRAILING_SLASH:
159 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
160 				    "trailing slash in name"));
161 				break;
162 
163 			case NAME_ERR_INVALCHAR:
164 				zfs_error_aux(hdl,
165 				    dgettext(TEXT_DOMAIN, "invalid character "
166 				    "'%c' in name"), what);
167 				break;
168 
169 			case NAME_ERR_MULTIPLE_AT:
170 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
171 				    "multiple '@' delimiters in name"));
172 				break;
173 
174 			case NAME_ERR_NOLETTER:
175 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
176 				    "pool doesn't begin with a letter"));
177 				break;
178 
179 			case NAME_ERR_RESERVED:
180 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
181 				    "name is reserved"));
182 				break;
183 
184 			case NAME_ERR_DISKLIKE:
185 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
186 				    "reserved disk name"));
187 				break;
188 			}
189 		}
190 
191 		return (0);
192 	}
193 
194 	if (!(type & ZFS_TYPE_SNAPSHOT) && strchr(path, '@') != NULL) {
195 		if (hdl != NULL)
196 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
197 			    "snapshot delimiter '@' in filesystem name"));
198 		return (0);
199 	}
200 
201 	if (type == ZFS_TYPE_SNAPSHOT && strchr(path, '@') == NULL) {
202 		if (hdl != NULL)
203 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
204 			    "missing '@' delimiter in snapshot name"));
205 		return (0);
206 	}
207 
208 	if (modifying && strchr(path, '%') != NULL) {
209 		if (hdl != NULL)
210 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
211 			    "invalid character %c in name"), '%');
212 		return (0);
213 	}
214 
215 	return (-1);
216 }
217 
218 int
219 zfs_name_valid(const char *name, zfs_type_t type)
220 {
221 	return (zfs_validate_name(NULL, name, type, B_FALSE));
222 }
223 
224 /*
225  * This function takes the raw DSL properties, and filters out the user-defined
226  * properties into a separate nvlist.
227  */
228 static nvlist_t *
229 process_user_props(zfs_handle_t *zhp, nvlist_t *props)
230 {
231 	libzfs_handle_t *hdl = zhp->zfs_hdl;
232 	nvpair_t *elem;
233 	nvlist_t *propval;
234 	nvlist_t *nvl;
235 
236 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
237 		(void) no_memory(hdl);
238 		return (NULL);
239 	}
240 
241 	elem = NULL;
242 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
243 		if (!zfs_prop_user(nvpair_name(elem)))
244 			continue;
245 
246 		verify(nvpair_value_nvlist(elem, &propval) == 0);
247 		if (nvlist_add_nvlist(nvl, nvpair_name(elem), propval) != 0) {
248 			nvlist_free(nvl);
249 			(void) no_memory(hdl);
250 			return (NULL);
251 		}
252 	}
253 
254 	return (nvl);
255 }
256 
257 /*
258  * Utility function to gather stats (objset and zpl) for the given object.
259  */
260 static int
261 get_stats(zfs_handle_t *zhp)
262 {
263 	zfs_cmd_t zc = { 0 };
264 	libzfs_handle_t *hdl = zhp->zfs_hdl;
265 	nvlist_t *allprops, *userprops;
266 
267 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
268 
269 	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
270 		return (-1);
271 
272 	while (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
273 		if (errno == ENOMEM) {
274 			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
275 				zcmd_free_nvlists(&zc);
276 				return (-1);
277 			}
278 		} else {
279 			zcmd_free_nvlists(&zc);
280 			return (-1);
281 		}
282 	}
283 
284 	zhp->zfs_dmustats = zc.zc_objset_stats; /* structure assignment */
285 
286 	(void) strlcpy(zhp->zfs_root, zc.zc_value, sizeof (zhp->zfs_root));
287 
288 	if (zcmd_read_dst_nvlist(hdl, &zc, &allprops) != 0) {
289 		zcmd_free_nvlists(&zc);
290 		return (-1);
291 	}
292 
293 	zcmd_free_nvlists(&zc);
294 
295 	if ((userprops = process_user_props(zhp, allprops)) == NULL) {
296 		nvlist_free(allprops);
297 		return (-1);
298 	}
299 
300 	nvlist_free(zhp->zfs_props);
301 	nvlist_free(zhp->zfs_user_props);
302 
303 	zhp->zfs_props = allprops;
304 	zhp->zfs_user_props = userprops;
305 
306 	return (0);
307 }
308 
309 /*
310  * Refresh the properties currently stored in the handle.
311  */
312 void
313 zfs_refresh_properties(zfs_handle_t *zhp)
314 {
315 	(void) get_stats(zhp);
316 }
317 
318 /*
319  * Makes a handle from the given dataset name.  Used by zfs_open() and
320  * zfs_iter_* to create child handles on the fly.
321  */
322 zfs_handle_t *
323 make_dataset_handle(libzfs_handle_t *hdl, const char *path)
324 {
325 	zfs_handle_t *zhp = calloc(sizeof (zfs_handle_t), 1);
326 	char *logstr;
327 
328 	if (zhp == NULL)
329 		return (NULL);
330 
331 	zhp->zfs_hdl = hdl;
332 
333 	/*
334 	 * Preserve history log string.
335 	 * any changes performed here will be
336 	 * logged as an internal event.
337 	 */
338 	logstr = zhp->zfs_hdl->libzfs_log_str;
339 	zhp->zfs_hdl->libzfs_log_str = NULL;
340 top:
341 	(void) strlcpy(zhp->zfs_name, path, sizeof (zhp->zfs_name));
342 
343 	if (get_stats(zhp) != 0) {
344 		zhp->zfs_hdl->libzfs_log_str = logstr;
345 		free(zhp);
346 		return (NULL);
347 	}
348 
349 	if (zhp->zfs_dmustats.dds_inconsistent) {
350 		zfs_cmd_t zc = { 0 };
351 
352 		/*
353 		 * If it is dds_inconsistent, then we've caught it in
354 		 * the middle of a 'zfs receive' or 'zfs destroy', and
355 		 * it is inconsistent from the ZPL's point of view, so
356 		 * can't be mounted.  However, it could also be that we
357 		 * have crashed in the middle of one of those
358 		 * operations, in which case we need to get rid of the
359 		 * inconsistent state.  We do that by either rolling
360 		 * back to the previous snapshot (which will fail if
361 		 * there is none), or destroying the filesystem.  Note
362 		 * that if we are still in the middle of an active
363 		 * 'receive' or 'destroy', then the rollback and destroy
364 		 * will fail with EBUSY and we will drive on as usual.
365 		 */
366 
367 		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
368 
369 		if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL) {
370 			(void) zvol_remove_link(hdl, zhp->zfs_name);
371 			zc.zc_objset_type = DMU_OST_ZVOL;
372 		} else {
373 			zc.zc_objset_type = DMU_OST_ZFS;
374 		}
375 
376 		/* If we can successfully roll it back, reget the stats */
377 		if (ioctl(hdl->libzfs_fd, ZFS_IOC_ROLLBACK, &zc) == 0)
378 			goto top;
379 		/*
380 		 * If we can successfully destroy it, pretend that it
381 		 * never existed.
382 		 */
383 		if (ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc) == 0) {
384 			zhp->zfs_hdl->libzfs_log_str = logstr;
385 			free(zhp);
386 			errno = ENOENT;
387 			return (NULL);
388 		}
389 	}
390 
391 	/*
392 	 * We've managed to open the dataset and gather statistics.  Determine
393 	 * the high-level type.
394 	 */
395 	if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
396 		zhp->zfs_head_type = ZFS_TYPE_VOLUME;
397 	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
398 		zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM;
399 	else
400 		abort();
401 
402 	if (zhp->zfs_dmustats.dds_is_snapshot)
403 		zhp->zfs_type = ZFS_TYPE_SNAPSHOT;
404 	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZVOL)
405 		zhp->zfs_type = ZFS_TYPE_VOLUME;
406 	else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
407 		zhp->zfs_type = ZFS_TYPE_FILESYSTEM;
408 	else
409 		abort();	/* we should never see any other types */
410 
411 	zhp->zfs_hdl->libzfs_log_str = logstr;
412 	return (zhp);
413 }
414 
415 /*
416  * Opens the given snapshot, filesystem, or volume.   The 'types'
417  * argument is a mask of acceptable types.  The function will print an
418  * appropriate error message and return NULL if it can't be opened.
419  */
420 zfs_handle_t *
421 zfs_open(libzfs_handle_t *hdl, const char *path, int types)
422 {
423 	zfs_handle_t *zhp;
424 	char errbuf[1024];
425 
426 	(void) snprintf(errbuf, sizeof (errbuf),
427 	    dgettext(TEXT_DOMAIN, "cannot open '%s'"), path);
428 
429 	/*
430 	 * Validate the name before we even try to open it.
431 	 */
432 	if (!zfs_validate_name(hdl, path, ZFS_TYPE_DATASET, B_FALSE)) {
433 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
434 		    "invalid dataset name"));
435 		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
436 		return (NULL);
437 	}
438 
439 	/*
440 	 * Try to get stats for the dataset, which will tell us if it exists.
441 	 */
442 	errno = 0;
443 	if ((zhp = make_dataset_handle(hdl, path)) == NULL) {
444 		(void) zfs_standard_error(hdl, errno, errbuf);
445 		return (NULL);
446 	}
447 
448 	if (!(types & zhp->zfs_type)) {
449 		(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
450 		zfs_close(zhp);
451 		return (NULL);
452 	}
453 
454 	return (zhp);
455 }
456 
457 /*
458  * Release a ZFS handle.  Nothing to do but free the associated memory.
459  */
460 void
461 zfs_close(zfs_handle_t *zhp)
462 {
463 	if (zhp->zfs_mntopts)
464 		free(zhp->zfs_mntopts);
465 	nvlist_free(zhp->zfs_props);
466 	nvlist_free(zhp->zfs_user_props);
467 	free(zhp);
468 }
469 
470 
471 /*
472  * Given an nvlist of properties to set, validates that they are correct, and
473  * parses any numeric properties (index, boolean, etc) if they are specified as
474  * strings.
475  */
476 static nvlist_t *
477 zfs_validate_properties(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
478     uint64_t zoned, zfs_handle_t *zhp, const char *errbuf)
479 {
480 	nvpair_t *elem;
481 	uint64_t intval;
482 	char *strval;
483 	zfs_prop_t prop;
484 	nvlist_t *ret;
485 	int chosen_normal = -1;
486 	int chosen_utf = -1;
487 
488 	if (type == ZFS_TYPE_SNAPSHOT) {
489 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
490 		    "snapshot properties cannot be modified"));
491 		(void) zfs_error(hdl, EZFS_PROPTYPE, errbuf);
492 		return (NULL);
493 	}
494 
495 	if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) {
496 		(void) no_memory(hdl);
497 		return (NULL);
498 	}
499 
500 	elem = NULL;
501 	while ((elem = nvlist_next_nvpair(nvl, elem)) != NULL) {
502 		const char *propname = nvpair_name(elem);
503 
504 		/*
505 		 * Make sure this property is valid and applies to this type.
506 		 */
507 		if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
508 			if (!zfs_prop_user(propname)) {
509 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
510 				    "invalid property '%s'"), propname);
511 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
512 				goto error;
513 			}
514 
515 			/*
516 			 * If this is a user property, make sure it's a
517 			 * string, and that it's less than ZAP_MAXNAMELEN.
518 			 */
519 			if (nvpair_type(elem) != DATA_TYPE_STRING) {
520 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
521 				    "'%s' must be a string"), propname);
522 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
523 				goto error;
524 			}
525 
526 			if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) {
527 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
528 				    "property name '%s' is too long"),
529 				    propname);
530 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
531 				goto error;
532 			}
533 
534 			(void) nvpair_value_string(elem, &strval);
535 			if (nvlist_add_string(ret, propname, strval) != 0) {
536 				(void) no_memory(hdl);
537 				goto error;
538 			}
539 			continue;
540 		}
541 
542 		if (!zfs_prop_valid_for_type(prop, type)) {
543 			zfs_error_aux(hdl,
544 			    dgettext(TEXT_DOMAIN, "'%s' does not "
545 			    "apply to datasets of this type"), propname);
546 			(void) zfs_error(hdl, EZFS_PROPTYPE, errbuf);
547 			goto error;
548 		}
549 
550 		if (zfs_prop_readonly(prop) &&
551 		    (!zfs_prop_setonce(prop) || zhp != NULL)) {
552 			zfs_error_aux(hdl,
553 			    dgettext(TEXT_DOMAIN, "'%s' is readonly"),
554 			    propname);
555 			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
556 			goto error;
557 		}
558 
559 		if (zprop_parse_value(hdl, elem, prop, type, ret,
560 		    &strval, &intval, errbuf) != 0)
561 			goto error;
562 
563 		/*
564 		 * Perform some additional checks for specific properties.
565 		 */
566 		switch (prop) {
567 		case ZFS_PROP_VERSION:
568 		{
569 			int version;
570 
571 			if (zhp == NULL)
572 				break;
573 			version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
574 			if (intval < version) {
575 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
576 				    "Can not downgrade; already at version %u"),
577 				    version);
578 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
579 				goto error;
580 			}
581 			break;
582 		}
583 
584 		case ZFS_PROP_RECORDSIZE:
585 		case ZFS_PROP_VOLBLOCKSIZE:
586 			/* must be power of two within SPA_{MIN,MAX}BLOCKSIZE */
587 			if (intval < SPA_MINBLOCKSIZE ||
588 			    intval > SPA_MAXBLOCKSIZE || !ISP2(intval)) {
589 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
590 				    "'%s' must be power of 2 from %u "
591 				    "to %uk"), propname,
592 				    (uint_t)SPA_MINBLOCKSIZE,
593 				    (uint_t)SPA_MAXBLOCKSIZE >> 10);
594 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
595 				goto error;
596 			}
597 			break;
598 
599 		case ZFS_PROP_SHAREISCSI:
600 			if (strcmp(strval, "off") != 0 &&
601 			    strcmp(strval, "on") != 0 &&
602 			    strcmp(strval, "type=disk") != 0) {
603 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
604 				    "'%s' must be 'on', 'off', or 'type=disk'"),
605 				    propname);
606 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
607 				goto error;
608 			}
609 
610 			break;
611 
612 		case ZFS_PROP_MOUNTPOINT:
613 		{
614 			namecheck_err_t why;
615 
616 			if (strcmp(strval, ZFS_MOUNTPOINT_NONE) == 0 ||
617 			    strcmp(strval, ZFS_MOUNTPOINT_LEGACY) == 0)
618 				break;
619 
620 			if (mountpoint_namecheck(strval, &why)) {
621 				switch (why) {
622 				case NAME_ERR_LEADING_SLASH:
623 					zfs_error_aux(hdl,
624 					    dgettext(TEXT_DOMAIN,
625 					    "'%s' must be an absolute path, "
626 					    "'none', or 'legacy'"), propname);
627 					break;
628 				case NAME_ERR_TOOLONG:
629 					zfs_error_aux(hdl,
630 					    dgettext(TEXT_DOMAIN,
631 					    "component of '%s' is too long"),
632 					    propname);
633 					break;
634 				}
635 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
636 				goto error;
637 			}
638 		}
639 
640 			/*FALLTHRU*/
641 
642 		case ZFS_PROP_SHARESMB:
643 		case ZFS_PROP_SHARENFS:
644 			/*
645 			 * For the mountpoint and sharenfs or sharesmb
646 			 * properties, check if it can be set in a
647 			 * global/non-global zone based on
648 			 * the zoned property value:
649 			 *
650 			 *		global zone	    non-global zone
651 			 * --------------------------------------------------
652 			 * zoned=on	mountpoint (no)	    mountpoint (yes)
653 			 *		sharenfs (no)	    sharenfs (no)
654 			 *		sharesmb (no)	    sharesmb (no)
655 			 *
656 			 * zoned=off	mountpoint (yes)	N/A
657 			 *		sharenfs (yes)
658 			 *		sharesmb (yes)
659 			 */
660 			if (zoned) {
661 				if (getzoneid() == GLOBAL_ZONEID) {
662 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
663 					    "'%s' cannot be set on "
664 					    "dataset in a non-global zone"),
665 					    propname);
666 					(void) zfs_error(hdl, EZFS_ZONED,
667 					    errbuf);
668 					goto error;
669 				} else if (prop == ZFS_PROP_SHARENFS ||
670 				    prop == ZFS_PROP_SHARESMB) {
671 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
672 					    "'%s' cannot be set in "
673 					    "a non-global zone"), propname);
674 					(void) zfs_error(hdl, EZFS_ZONED,
675 					    errbuf);
676 					goto error;
677 				}
678 			} else if (getzoneid() != GLOBAL_ZONEID) {
679 				/*
680 				 * If zoned property is 'off', this must be in
681 				 * a globle zone. If not, something is wrong.
682 				 */
683 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
684 				    "'%s' cannot be set while dataset "
685 				    "'zoned' property is set"), propname);
686 				(void) zfs_error(hdl, EZFS_ZONED, errbuf);
687 				goto error;
688 			}
689 
690 			/*
691 			 * At this point, it is legitimate to set the
692 			 * property. Now we want to make sure that the
693 			 * property value is valid if it is sharenfs.
694 			 */
695 			if ((prop == ZFS_PROP_SHARENFS ||
696 			    prop == ZFS_PROP_SHARESMB) &&
697 			    strcmp(strval, "on") != 0 &&
698 			    strcmp(strval, "off") != 0) {
699 				zfs_share_proto_t proto;
700 
701 				if (prop == ZFS_PROP_SHARESMB)
702 					proto = PROTO_SMB;
703 				else
704 					proto = PROTO_NFS;
705 
706 				/*
707 				 * Must be an valid sharing protocol
708 				 * option string so init the libshare
709 				 * in order to enable the parser and
710 				 * then parse the options. We use the
711 				 * control API since we don't care about
712 				 * the current configuration and don't
713 				 * want the overhead of loading it
714 				 * until we actually do something.
715 				 */
716 
717 				if (zfs_init_libshare(hdl,
718 				    SA_INIT_CONTROL_API) != SA_OK) {
719 					/*
720 					 * An error occurred so we can't do
721 					 * anything
722 					 */
723 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
724 					    "'%s' cannot be set: problem "
725 					    "in share initialization"),
726 					    propname);
727 					(void) zfs_error(hdl, EZFS_BADPROP,
728 					    errbuf);
729 					goto error;
730 				}
731 
732 				if (zfs_parse_options(strval, proto) != SA_OK) {
733 					/*
734 					 * There was an error in parsing so
735 					 * deal with it by issuing an error
736 					 * message and leaving after
737 					 * uninitializing the the libshare
738 					 * interface.
739 					 */
740 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
741 					    "'%s' cannot be set to invalid "
742 					    "options"), propname);
743 					(void) zfs_error(hdl, EZFS_BADPROP,
744 					    errbuf);
745 					zfs_uninit_libshare(hdl);
746 					goto error;
747 				}
748 				zfs_uninit_libshare(hdl);
749 			}
750 
751 			break;
752 		case ZFS_PROP_UTF8ONLY:
753 			chosen_utf = (int)intval;
754 			break;
755 		case ZFS_PROP_NORMALIZE:
756 			chosen_normal = (int)intval;
757 			break;
758 		}
759 
760 		/*
761 		 * For changes to existing volumes, we have some additional
762 		 * checks to enforce.
763 		 */
764 		if (type == ZFS_TYPE_VOLUME && zhp != NULL) {
765 			uint64_t volsize = zfs_prop_get_int(zhp,
766 			    ZFS_PROP_VOLSIZE);
767 			uint64_t blocksize = zfs_prop_get_int(zhp,
768 			    ZFS_PROP_VOLBLOCKSIZE);
769 			char buf[64];
770 
771 			switch (prop) {
772 			case ZFS_PROP_RESERVATION:
773 				if (intval > volsize) {
774 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
775 					    "'%s' is greater than current "
776 					    "volume size"), propname);
777 					(void) zfs_error(hdl, EZFS_BADPROP,
778 					    errbuf);
779 					goto error;
780 				}
781 				break;
782 
783 			case ZFS_PROP_VOLSIZE:
784 				if (intval % blocksize != 0) {
785 					zfs_nicenum(blocksize, buf,
786 					    sizeof (buf));
787 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
788 					    "'%s' must be a multiple of "
789 					    "volume block size (%s)"),
790 					    propname, buf);
791 					(void) zfs_error(hdl, EZFS_BADPROP,
792 					    errbuf);
793 					goto error;
794 				}
795 
796 				if (intval == 0) {
797 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
798 					    "'%s' cannot be zero"),
799 					    propname);
800 					(void) zfs_error(hdl, EZFS_BADPROP,
801 					    errbuf);
802 					goto error;
803 				}
804 				break;
805 			}
806 		}
807 	}
808 
809 	/*
810 	 * If normalization was chosen, but no UTF8 choice was made,
811 	 * enforce rejection of non-UTF8 names.
812 	 *
813 	 * If normalization was chosen, but rejecting non-UTF8 names
814 	 * was explicitly not chosen, it is an error.
815 	 */
816 	if (chosen_normal > ZFS_NORMALIZE_NONE && chosen_utf < 0) {
817 		if (nvlist_add_uint64(ret,
818 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), 1) != 0) {
819 			(void) no_memory(hdl);
820 			goto error;
821 		}
822 	} else if (chosen_normal > ZFS_NORMALIZE_NONE && chosen_utf == 0) {
823 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
824 		    "'%s' must be set 'on' if normalization chosen"),
825 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
826 		(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
827 		goto error;
828 	}
829 
830 	/*
831 	 * If this is an existing volume, and someone is setting the volsize,
832 	 * make sure that it matches the reservation, or add it if necessary.
833 	 */
834 	if (zhp != NULL && type == ZFS_TYPE_VOLUME &&
835 	    nvlist_lookup_uint64(ret, zfs_prop_to_name(ZFS_PROP_VOLSIZE),
836 	    &intval) == 0) {
837 		uint64_t old_volsize = zfs_prop_get_int(zhp,
838 		    ZFS_PROP_VOLSIZE);
839 		uint64_t old_reservation = zfs_prop_get_int(zhp,
840 		    ZFS_PROP_RESERVATION);
841 		uint64_t new_reservation;
842 
843 		if (old_volsize == old_reservation &&
844 		    nvlist_lookup_uint64(ret,
845 		    zfs_prop_to_name(ZFS_PROP_RESERVATION),
846 		    &new_reservation) != 0) {
847 			if (nvlist_add_uint64(ret,
848 			    zfs_prop_to_name(ZFS_PROP_RESERVATION),
849 			    intval) != 0) {
850 				(void) no_memory(hdl);
851 				goto error;
852 			}
853 		}
854 	}
855 
856 	return (ret);
857 
858 error:
859 	nvlist_free(ret);
860 	return (NULL);
861 }
862 
863 static int
864 zfs_get_perm_who(const char *who, zfs_deleg_who_type_t *who_type,
865     uint64_t *ret_who)
866 {
867 	struct passwd *pwd;
868 	struct group *grp;
869 	uid_t id;
870 
871 	if (*who_type == ZFS_DELEG_EVERYONE || *who_type == ZFS_DELEG_CREATE ||
872 	    *who_type == ZFS_DELEG_NAMED_SET) {
873 		*ret_who = -1;
874 		return (0);
875 	}
876 	if (who == NULL && !(*who_type == ZFS_DELEG_EVERYONE))
877 		return (EZFS_BADWHO);
878 
879 	if (*who_type == ZFS_DELEG_WHO_UNKNOWN &&
880 	    strcmp(who, "everyone") == 0) {
881 		*ret_who = -1;
882 		*who_type = ZFS_DELEG_EVERYONE;
883 		return (0);
884 	}
885 
886 	pwd = getpwnam(who);
887 	grp = getgrnam(who);
888 
889 	if ((*who_type == ZFS_DELEG_USER) && pwd) {
890 		*ret_who = pwd->pw_uid;
891 	} else if ((*who_type == ZFS_DELEG_GROUP) && grp) {
892 		*ret_who = grp->gr_gid;
893 	} else if (pwd) {
894 		*ret_who = pwd->pw_uid;
895 		*who_type = ZFS_DELEG_USER;
896 	} else if (grp) {
897 		*ret_who = grp->gr_gid;
898 		*who_type = ZFS_DELEG_GROUP;
899 	} else {
900 		char *end;
901 
902 		id = strtol(who, &end, 10);
903 		if (errno != 0 || *end != '\0') {
904 			return (EZFS_BADWHO);
905 		} else {
906 			*ret_who = id;
907 			if (*who_type == ZFS_DELEG_WHO_UNKNOWN)
908 				*who_type = ZFS_DELEG_USER;
909 		}
910 	}
911 
912 	return (0);
913 }
914 
915 static void
916 zfs_perms_add_to_nvlist(nvlist_t *who_nvp, char *name, nvlist_t *perms_nvp)
917 {
918 	if (perms_nvp != NULL) {
919 		verify(nvlist_add_nvlist(who_nvp,
920 		    name, perms_nvp) == 0);
921 	} else {
922 		verify(nvlist_add_boolean(who_nvp, name) == 0);
923 	}
924 }
925 
926 static void
927 helper(zfs_deleg_who_type_t who_type, uint64_t whoid, char *whostr,
928     zfs_deleg_inherit_t inherit, nvlist_t *who_nvp, nvlist_t *perms_nvp,
929     nvlist_t *sets_nvp)
930 {
931 	boolean_t do_perms, do_sets;
932 	char name[ZFS_MAX_DELEG_NAME];
933 
934 	do_perms = (nvlist_next_nvpair(perms_nvp, NULL) != NULL);
935 	do_sets = (nvlist_next_nvpair(sets_nvp, NULL) != NULL);
936 
937 	if (!do_perms && !do_sets)
938 		do_perms = do_sets = B_TRUE;
939 
940 	if (do_perms) {
941 		zfs_deleg_whokey(name, who_type, inherit,
942 		    (who_type == ZFS_DELEG_NAMED_SET) ?
943 		    whostr : (void *)&whoid);
944 		zfs_perms_add_to_nvlist(who_nvp, name, perms_nvp);
945 	}
946 	if (do_sets) {
947 		zfs_deleg_whokey(name, toupper(who_type), inherit,
948 		    (who_type == ZFS_DELEG_NAMED_SET) ?
949 		    whostr : (void *)&whoid);
950 		zfs_perms_add_to_nvlist(who_nvp, name, sets_nvp);
951 	}
952 }
953 
954 static void
955 zfs_perms_add_who_nvlist(nvlist_t *who_nvp, uint64_t whoid, void *whostr,
956     nvlist_t *perms_nvp, nvlist_t *sets_nvp,
957     zfs_deleg_who_type_t who_type, zfs_deleg_inherit_t inherit)
958 {
959 	if (who_type == ZFS_DELEG_NAMED_SET || who_type == ZFS_DELEG_CREATE) {
960 		helper(who_type, whoid, whostr, 0,
961 		    who_nvp, perms_nvp, sets_nvp);
962 	} else {
963 		if (inherit & ZFS_DELEG_PERM_LOCAL) {
964 			helper(who_type, whoid, whostr, ZFS_DELEG_LOCAL,
965 			    who_nvp, perms_nvp, sets_nvp);
966 		}
967 		if (inherit & ZFS_DELEG_PERM_DESCENDENT) {
968 			helper(who_type, whoid, whostr, ZFS_DELEG_DESCENDENT,
969 			    who_nvp, perms_nvp, sets_nvp);
970 		}
971 	}
972 }
973 
974 /*
975  * Construct nvlist to pass down to kernel for setting/removing permissions.
976  *
977  * The nvlist is constructed as a series of nvpairs with an optional embedded
978  * nvlist of permissions to remove or set.  The topmost nvpairs are the actual
979  * base attribute named stored in the dsl.
980  * Arguments:
981  *
982  * whostr:   is a comma separated list of users, groups, or a single set name.
983  *           whostr may be null for everyone or create perms.
984  * who_type: is the type of entry in whostr.  Typically this will be
985  *           ZFS_DELEG_WHO_UNKNOWN.
986  * perms:    common separated list of permissions.  May be null if user
987  *           is requested to remove permissions by who.
988  * inherit:  Specifies the inheritance of the permissions.  Will be either
989  *           ZFS_DELEG_PERM_LOCAL and/or  ZFS_DELEG_PERM_DESCENDENT.
990  * nvp       The constructed nvlist to pass to zfs_perm_set().
991  *           The output nvp will look something like this.
992  *              ul$1234 -> {create ; destroy }
993  *              Ul$1234 -> { @myset }
994  *              s-$@myset - { snapshot; checksum; compression }
995  */
996 int
997 zfs_build_perms(zfs_handle_t *zhp, char *whostr, char *perms,
998     zfs_deleg_who_type_t who_type, zfs_deleg_inherit_t inherit, nvlist_t **nvp)
999 {
1000 	nvlist_t *who_nvp;
1001 	nvlist_t *perms_nvp = NULL;
1002 	nvlist_t *sets_nvp = NULL;
1003 	char errbuf[1024];
1004 	char *who_tok, *perm;
1005 	int error;
1006 
1007 	*nvp = NULL;
1008 
1009 	if (perms) {
1010 		if ((error = nvlist_alloc(&perms_nvp,
1011 		    NV_UNIQUE_NAME, 0)) != 0) {
1012 			return (1);
1013 		}
1014 		if ((error = nvlist_alloc(&sets_nvp,
1015 		    NV_UNIQUE_NAME, 0)) != 0) {
1016 			nvlist_free(perms_nvp);
1017 			return (1);
1018 		}
1019 	}
1020 
1021 	if ((error = nvlist_alloc(&who_nvp, NV_UNIQUE_NAME, 0)) != 0) {
1022 		if (perms_nvp)
1023 			nvlist_free(perms_nvp);
1024 		if (sets_nvp)
1025 			nvlist_free(sets_nvp);
1026 		return (1);
1027 	}
1028 
1029 	if (who_type == ZFS_DELEG_NAMED_SET) {
1030 		namecheck_err_t why;
1031 		char what;
1032 
1033 		if ((error = permset_namecheck(whostr, &why, &what)) != 0) {
1034 			nvlist_free(who_nvp);
1035 			if (perms_nvp)
1036 				nvlist_free(perms_nvp);
1037 			if (sets_nvp)
1038 				nvlist_free(sets_nvp);
1039 
1040 			switch (why) {
1041 			case NAME_ERR_NO_AT:
1042 				zfs_error_aux(zhp->zfs_hdl,
1043 				    dgettext(TEXT_DOMAIN,
1044 				    "set definition must begin with an '@' "
1045 				    "character"));
1046 			}
1047 			return (zfs_error(zhp->zfs_hdl,
1048 			    EZFS_BADPERMSET, whostr));
1049 		}
1050 	}
1051 
1052 	/*
1053 	 * Build up nvlist(s) of permissions.  Two nvlists are maintained.
1054 	 * The first nvlist perms_nvp will have normal permissions and the
1055 	 * other sets_nvp will have only permssion set names in it.
1056 	 */
1057 	for (perm = strtok(perms, ","); perm; perm = strtok(NULL, ",")) {
1058 		const char *perm_canonical = zfs_deleg_canonicalize_perm(perm);
1059 
1060 		if (perm_canonical) {
1061 			verify(nvlist_add_boolean(perms_nvp,
1062 			    perm_canonical) == 0);
1063 		} else if (perm[0] == '@') {
1064 			verify(nvlist_add_boolean(sets_nvp, perm) == 0);
1065 		} else {
1066 			nvlist_free(who_nvp);
1067 			nvlist_free(perms_nvp);
1068 			nvlist_free(sets_nvp);
1069 			return (zfs_error(zhp->zfs_hdl, EZFS_BADPERM, perm));
1070 		}
1071 	}
1072 
1073 	if (whostr && who_type != ZFS_DELEG_CREATE) {
1074 		who_tok = strtok(whostr, ",");
1075 		if (who_tok == NULL) {
1076 			nvlist_free(who_nvp);
1077 			if (perms_nvp)
1078 				nvlist_free(perms_nvp);
1079 			if (sets_nvp)
1080 				nvlist_free(sets_nvp);
1081 			(void) snprintf(errbuf, sizeof (errbuf),
1082 			    dgettext(TEXT_DOMAIN, "Who string is NULL"),
1083 			    whostr);
1084 			return (zfs_error(zhp->zfs_hdl, EZFS_BADWHO, errbuf));
1085 		}
1086 	}
1087 
1088 	/*
1089 	 * Now create the nvlist(s)
1090 	 */
1091 	do {
1092 		uint64_t who_id;
1093 
1094 		error = zfs_get_perm_who(who_tok, &who_type,
1095 		    &who_id);
1096 		if (error) {
1097 			nvlist_free(who_nvp);
1098 			if (perms_nvp)
1099 				nvlist_free(perms_nvp);
1100 			if (sets_nvp)
1101 				nvlist_free(sets_nvp);
1102 			(void) snprintf(errbuf, sizeof (errbuf),
1103 			    dgettext(TEXT_DOMAIN,
1104 			    "Unable to determine uid/gid for "
1105 			    "%s "), who_tok);
1106 			return (zfs_error(zhp->zfs_hdl, EZFS_BADWHO, errbuf));
1107 		}
1108 
1109 		/*
1110 		 * add entries for both local and descendent when required
1111 		 */
1112 		zfs_perms_add_who_nvlist(who_nvp, who_id, who_tok,
1113 		    perms_nvp, sets_nvp, who_type, inherit);
1114 
1115 	} while (who_tok = strtok(NULL, ","));
1116 	*nvp = who_nvp;
1117 	return (0);
1118 }
1119 
1120 static int
1121 zfs_perm_set_common(zfs_handle_t *zhp, nvlist_t *nvp, boolean_t unset)
1122 {
1123 	zfs_cmd_t zc = { 0 };
1124 	int error;
1125 	char errbuf[1024];
1126 
1127 	(void) snprintf(errbuf, sizeof (errbuf),
1128 	    dgettext(TEXT_DOMAIN, "Cannot update 'allows' for '%s'"),
1129 	    zhp->zfs_name);
1130 
1131 	if (zcmd_write_src_nvlist(zhp->zfs_hdl, &zc, nvp))
1132 		return (-1);
1133 
1134 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1135 	zc.zc_perm_action = unset;
1136 
1137 	error = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SET_FSACL, &zc);
1138 	if (error && errno == ENOTSUP) {
1139 		(void) snprintf(errbuf, sizeof (errbuf),
1140 		    gettext("Pool must be upgraded to use 'allow/unallow'"));
1141 		zcmd_free_nvlists(&zc);
1142 		return (zfs_error(zhp->zfs_hdl, EZFS_BADVERSION, errbuf));
1143 	} else if (error) {
1144 		return (zfs_standard_error(zhp->zfs_hdl, errno, errbuf));
1145 	}
1146 	zcmd_free_nvlists(&zc);
1147 
1148 	return (error);
1149 }
1150 
1151 int
1152 zfs_perm_set(zfs_handle_t *zhp, nvlist_t *nvp)
1153 {
1154 	return (zfs_perm_set_common(zhp, nvp, B_FALSE));
1155 }
1156 
1157 int
1158 zfs_perm_remove(zfs_handle_t *zhp, nvlist_t *perms)
1159 {
1160 	return (zfs_perm_set_common(zhp, perms, B_TRUE));
1161 }
1162 
1163 static int
1164 perm_compare(const void *arg1, const void *arg2)
1165 {
1166 	const zfs_perm_node_t *node1 = arg1;
1167 	const zfs_perm_node_t *node2 = arg2;
1168 	int ret;
1169 
1170 	ret = strcmp(node1->z_pname, node2->z_pname);
1171 
1172 	if (ret > 0)
1173 		return (1);
1174 	if (ret < 0)
1175 		return (-1);
1176 	else
1177 		return (0);
1178 }
1179 
1180 static void
1181 zfs_destroy_perm_tree(avl_tree_t *tree)
1182 {
1183 	zfs_perm_node_t *permnode;
1184 	void *cookie;
1185 
1186 	cookie = NULL;
1187 	while ((permnode = avl_destroy_nodes(tree,  &cookie)) != NULL) {
1188 		avl_remove(tree, permnode);
1189 		free(permnode);
1190 	}
1191 }
1192 
1193 static void
1194 zfs_destroy_tree(avl_tree_t *tree)
1195 {
1196 	zfs_allow_node_t *allownode;
1197 	void *cookie;
1198 
1199 	cookie = NULL;
1200 	while ((allownode = avl_destroy_nodes(tree, &cookie)) != NULL) {
1201 		zfs_destroy_perm_tree(&allownode->z_localdescend);
1202 		zfs_destroy_perm_tree(&allownode->z_local);
1203 		zfs_destroy_perm_tree(&allownode->z_descend);
1204 		avl_remove(tree, allownode);
1205 		free(allownode);
1206 	}
1207 }
1208 
1209 void
1210 zfs_free_allows(zfs_allow_t *allow)
1211 {
1212 	zfs_allow_t *allownext;
1213 	zfs_allow_t *freeallow;
1214 
1215 	allownext = allow;
1216 	while (allownext) {
1217 		zfs_destroy_tree(&allownext->z_sets);
1218 		zfs_destroy_tree(&allownext->z_crperms);
1219 		zfs_destroy_tree(&allownext->z_user);
1220 		zfs_destroy_tree(&allownext->z_group);
1221 		zfs_destroy_tree(&allownext->z_everyone);
1222 		freeallow = allownext;
1223 		allownext = allownext->z_next;
1224 		free(freeallow);
1225 	}
1226 }
1227 
1228 static zfs_allow_t *
1229 zfs_alloc_perm_tree(zfs_handle_t *zhp, zfs_allow_t *prev, char *setpoint)
1230 {
1231 	zfs_allow_t *ptree;
1232 
1233 	if ((ptree = zfs_alloc(zhp->zfs_hdl,
1234 	    sizeof (zfs_allow_t))) == NULL) {
1235 		return (NULL);
1236 	}
1237 
1238 	(void) strlcpy(ptree->z_setpoint, setpoint, sizeof (ptree->z_setpoint));
1239 	avl_create(&ptree->z_sets,
1240 	    perm_compare, sizeof (zfs_allow_node_t),
1241 	    offsetof(zfs_allow_node_t, z_node));
1242 	avl_create(&ptree->z_crperms,
1243 	    perm_compare, sizeof (zfs_allow_node_t),
1244 	    offsetof(zfs_allow_node_t, z_node));
1245 	avl_create(&ptree->z_user,
1246 	    perm_compare, sizeof (zfs_allow_node_t),
1247 	    offsetof(zfs_allow_node_t, z_node));
1248 	avl_create(&ptree->z_group,
1249 	    perm_compare, sizeof (zfs_allow_node_t),
1250 	    offsetof(zfs_allow_node_t, z_node));
1251 	avl_create(&ptree->z_everyone,
1252 	    perm_compare, sizeof (zfs_allow_node_t),
1253 	    offsetof(zfs_allow_node_t, z_node));
1254 
1255 	if (prev)
1256 		prev->z_next = ptree;
1257 	ptree->z_next = NULL;
1258 	return (ptree);
1259 }
1260 
1261 /*
1262  * Add permissions to the appropriate AVL permission tree.
1263  * The appropriate tree may not be the requested tree.
1264  * For example if ld indicates a local permission, but
1265  * same permission also exists as a descendent permission
1266  * then the permission will be removed from the descendent
1267  * tree and add the the local+descendent tree.
1268  */
1269 static int
1270 zfs_coalesce_perm(zfs_handle_t *zhp, zfs_allow_node_t *allownode,
1271     char *perm, char ld)
1272 {
1273 	zfs_perm_node_t pnode, *permnode, *permnode2;
1274 	zfs_perm_node_t *newnode;
1275 	avl_index_t where, where2;
1276 	avl_tree_t *tree, *altree;
1277 
1278 	(void) strlcpy(pnode.z_pname, perm, sizeof (pnode.z_pname));
1279 
1280 	if (ld == ZFS_DELEG_NA) {
1281 		tree =  &allownode->z_localdescend;
1282 		altree = &allownode->z_descend;
1283 	} else if (ld == ZFS_DELEG_LOCAL) {
1284 		tree = &allownode->z_local;
1285 		altree = &allownode->z_descend;
1286 	} else {
1287 		tree = &allownode->z_descend;
1288 		altree = &allownode->z_local;
1289 	}
1290 	permnode = avl_find(tree, &pnode, &where);
1291 	permnode2 = avl_find(altree, &pnode, &where2);
1292 
1293 	if (permnode2) {
1294 		avl_remove(altree, permnode2);
1295 		free(permnode2);
1296 		if (permnode == NULL) {
1297 			tree =  &allownode->z_localdescend;
1298 		}
1299 	}
1300 
1301 	/*
1302 	 * Now insert new permission in either requested location
1303 	 * local/descendent or into ld when perm will exist in both.
1304 	 */
1305 	if (permnode == NULL) {
1306 		if ((newnode = zfs_alloc(zhp->zfs_hdl,
1307 		    sizeof (zfs_perm_node_t))) == NULL) {
1308 			return (-1);
1309 		}
1310 		*newnode = pnode;
1311 		avl_add(tree, newnode);
1312 	}
1313 	return (0);
1314 }
1315 
1316 /*
1317  * Uggh, this is going to be a bit complicated.
1318  * we have an nvlist coming out of the kernel that
1319  * will indicate where the permission is set and then
1320  * it will contain allow of the various "who's", and what
1321  * their permissions are.  To further complicate this
1322  * we will then have to coalesce the local,descendent
1323  * and local+descendent permissions where appropriate.
1324  * The kernel only knows about a permission as being local
1325  * or descendent, but not both.
1326  *
1327  * In order to make this easier for zfs_main to deal with
1328  * a series of AVL trees will be used to maintain
1329  * all of this, primarily for sorting purposes as well
1330  * as the ability to quickly locate a specific entry.
1331  *
1332  * What we end up with are tree's for sets, create perms,
1333  * user, groups and everyone.  With each of those trees
1334  * we have subtrees for local, descendent and local+descendent
1335  * permissions.
1336  */
1337 int
1338 zfs_perm_get(zfs_handle_t *zhp, zfs_allow_t **zfs_perms)
1339 {
1340 	zfs_cmd_t zc = { 0 };
1341 	int error;
1342 	nvlist_t *nvlist;
1343 	nvlist_t *permnv, *sourcenv;
1344 	nvpair_t *who_pair, *source_pair;
1345 	nvpair_t *perm_pair;
1346 	char errbuf[1024];
1347 	zfs_allow_t *zallowp, *newallowp;
1348 	char  ld;
1349 	char *nvpname;
1350 	uid_t	uid;
1351 	gid_t	gid;
1352 	avl_tree_t *tree;
1353 	avl_index_t where;
1354 
1355 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1356 
1357 	if (zcmd_alloc_dst_nvlist(zhp->zfs_hdl, &zc, 0) != 0)
1358 		return (-1);
1359 
1360 	while (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_GET_FSACL, &zc) != 0) {
1361 		if (errno == ENOMEM) {
1362 			if (zcmd_expand_dst_nvlist(zhp->zfs_hdl, &zc) != 0) {
1363 				zcmd_free_nvlists(&zc);
1364 				return (-1);
1365 			}
1366 		} else if (errno == ENOTSUP) {
1367 			zcmd_free_nvlists(&zc);
1368 			(void) snprintf(errbuf, sizeof (errbuf),
1369 			    gettext("Pool must be upgraded to use 'allow'"));
1370 			return (zfs_error(zhp->zfs_hdl,
1371 			    EZFS_BADVERSION, errbuf));
1372 		} else {
1373 			zcmd_free_nvlists(&zc);
1374 			return (-1);
1375 		}
1376 	}
1377 
1378 	if (zcmd_read_dst_nvlist(zhp->zfs_hdl, &zc, &nvlist) != 0) {
1379 		zcmd_free_nvlists(&zc);
1380 		return (-1);
1381 	}
1382 
1383 	zcmd_free_nvlists(&zc);
1384 
1385 	source_pair = nvlist_next_nvpair(nvlist, NULL);
1386 
1387 	if (source_pair == NULL) {
1388 		*zfs_perms = NULL;
1389 		return (0);
1390 	}
1391 
1392 	*zfs_perms = zfs_alloc_perm_tree(zhp, NULL, nvpair_name(source_pair));
1393 	if (*zfs_perms == NULL) {
1394 		return (0);
1395 	}
1396 
1397 	zallowp = *zfs_perms;
1398 
1399 	for (;;) {
1400 		struct passwd *pwd;
1401 		struct group *grp;
1402 		zfs_allow_node_t *allownode;
1403 		zfs_allow_node_t  findallownode;
1404 		zfs_allow_node_t *newallownode;
1405 
1406 		(void) strlcpy(zallowp->z_setpoint,
1407 		    nvpair_name(source_pair),
1408 		    sizeof (zallowp->z_setpoint));
1409 
1410 		if ((error = nvpair_value_nvlist(source_pair, &sourcenv)) != 0)
1411 			goto abort;
1412 
1413 		/*
1414 		 * Make sure nvlist is composed correctly
1415 		 */
1416 		if (zfs_deleg_verify_nvlist(sourcenv)) {
1417 			goto abort;
1418 		}
1419 
1420 		who_pair = nvlist_next_nvpair(sourcenv, NULL);
1421 		if (who_pair == NULL) {
1422 			goto abort;
1423 		}
1424 
1425 		do {
1426 			error = nvpair_value_nvlist(who_pair, &permnv);
1427 			if (error) {
1428 				goto abort;
1429 			}
1430 
1431 			/*
1432 			 * First build up the key to use
1433 			 * for looking up in the various
1434 			 * who trees.
1435 			 */
1436 			ld = nvpair_name(who_pair)[1];
1437 			nvpname = nvpair_name(who_pair);
1438 			switch (nvpair_name(who_pair)[0]) {
1439 			case ZFS_DELEG_USER:
1440 			case ZFS_DELEG_USER_SETS:
1441 				tree = &zallowp->z_user;
1442 				uid = atol(&nvpname[3]);
1443 				pwd = getpwuid(uid);
1444 				(void) snprintf(findallownode.z_key,
1445 				    sizeof (findallownode.z_key), "user %s",
1446 				    (pwd) ? pwd->pw_name :
1447 				    &nvpair_name(who_pair)[3]);
1448 				break;
1449 			case ZFS_DELEG_GROUP:
1450 			case ZFS_DELEG_GROUP_SETS:
1451 				tree = &zallowp->z_group;
1452 				gid = atol(&nvpname[3]);
1453 				grp = getgrgid(gid);
1454 				(void) snprintf(findallownode.z_key,
1455 				    sizeof (findallownode.z_key), "group %s",
1456 				    (grp) ? grp->gr_name :
1457 				    &nvpair_name(who_pair)[3]);
1458 				break;
1459 			case ZFS_DELEG_CREATE:
1460 			case ZFS_DELEG_CREATE_SETS:
1461 				tree = &zallowp->z_crperms;
1462 				(void) strlcpy(findallownode.z_key, "",
1463 				    sizeof (findallownode.z_key));
1464 				break;
1465 			case ZFS_DELEG_EVERYONE:
1466 			case ZFS_DELEG_EVERYONE_SETS:
1467 				(void) snprintf(findallownode.z_key,
1468 				    sizeof (findallownode.z_key), "everyone");
1469 				tree = &zallowp->z_everyone;
1470 				break;
1471 			case ZFS_DELEG_NAMED_SET:
1472 			case ZFS_DELEG_NAMED_SET_SETS:
1473 				(void) snprintf(findallownode.z_key,
1474 				    sizeof (findallownode.z_key), "%s",
1475 				    &nvpair_name(who_pair)[3]);
1476 				tree = &zallowp->z_sets;
1477 				break;
1478 			}
1479 
1480 			/*
1481 			 * Place who in tree
1482 			 */
1483 			allownode = avl_find(tree, &findallownode, &where);
1484 			if (allownode == NULL) {
1485 				if ((newallownode = zfs_alloc(zhp->zfs_hdl,
1486 				    sizeof (zfs_allow_node_t))) == NULL) {
1487 					goto abort;
1488 				}
1489 				avl_create(&newallownode->z_localdescend,
1490 				    perm_compare,
1491 				    sizeof (zfs_perm_node_t),
1492 				    offsetof(zfs_perm_node_t, z_node));
1493 				avl_create(&newallownode->z_local,
1494 				    perm_compare,
1495 				    sizeof (zfs_perm_node_t),
1496 				    offsetof(zfs_perm_node_t, z_node));
1497 				avl_create(&newallownode->z_descend,
1498 				    perm_compare,
1499 				    sizeof (zfs_perm_node_t),
1500 				    offsetof(zfs_perm_node_t, z_node));
1501 				(void) strlcpy(newallownode->z_key,
1502 				    findallownode.z_key,
1503 				    sizeof (findallownode.z_key));
1504 				avl_insert(tree, newallownode, where);
1505 				allownode = newallownode;
1506 			}
1507 
1508 			/*
1509 			 * Now iterate over the permissions and
1510 			 * place them in the appropriate local,
1511 			 * descendent or local+descendent tree.
1512 			 *
1513 			 * The permissions are added to the tree
1514 			 * via zfs_coalesce_perm().
1515 			 */
1516 			perm_pair = nvlist_next_nvpair(permnv, NULL);
1517 			if (perm_pair == NULL)
1518 				goto abort;
1519 			do {
1520 				if (zfs_coalesce_perm(zhp, allownode,
1521 				    nvpair_name(perm_pair), ld) != 0)
1522 					goto abort;
1523 			} while (perm_pair = nvlist_next_nvpair(permnv,
1524 			    perm_pair));
1525 		} while (who_pair = nvlist_next_nvpair(sourcenv, who_pair));
1526 
1527 		source_pair = nvlist_next_nvpair(nvlist, source_pair);
1528 		if (source_pair == NULL)
1529 			break;
1530 
1531 		/*
1532 		 * allocate another node from the link list of
1533 		 * zfs_allow_t structures
1534 		 */
1535 		newallowp = zfs_alloc_perm_tree(zhp, zallowp,
1536 		    nvpair_name(source_pair));
1537 		if (newallowp == NULL) {
1538 			goto abort;
1539 		}
1540 		zallowp = newallowp;
1541 	}
1542 	nvlist_free(nvlist);
1543 	return (0);
1544 abort:
1545 	zfs_free_allows(*zfs_perms);
1546 	nvlist_free(nvlist);
1547 	return (-1);
1548 }
1549 
1550 /*
1551  * Given a property name and value, set the property for the given dataset.
1552  */
1553 int
1554 zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
1555 {
1556 	zfs_cmd_t zc = { 0 };
1557 	int ret = -1;
1558 	prop_changelist_t *cl = NULL;
1559 	char errbuf[1024];
1560 	libzfs_handle_t *hdl = zhp->zfs_hdl;
1561 	nvlist_t *nvl = NULL, *realprops;
1562 	zfs_prop_t prop;
1563 
1564 	(void) snprintf(errbuf, sizeof (errbuf),
1565 	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
1566 	    zhp->zfs_name);
1567 
1568 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0 ||
1569 	    nvlist_add_string(nvl, propname, propval) != 0) {
1570 		(void) no_memory(hdl);
1571 		goto error;
1572 	}
1573 
1574 	if ((realprops = zfs_validate_properties(hdl, zhp->zfs_type, nvl,
1575 	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED), zhp, errbuf)) == NULL)
1576 		goto error;
1577 
1578 	nvlist_free(nvl);
1579 	nvl = realprops;
1580 
1581 	prop = zfs_name_to_prop(propname);
1582 
1583 	if ((cl = changelist_gather(zhp, prop, 0)) == NULL)
1584 		goto error;
1585 
1586 	if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
1587 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1588 		    "child dataset with inherited mountpoint is used "
1589 		    "in a non-global zone"));
1590 		ret = zfs_error(hdl, EZFS_ZONED, errbuf);
1591 		goto error;
1592 	}
1593 
1594 	if ((ret = changelist_prefix(cl)) != 0)
1595 		goto error;
1596 
1597 	/*
1598 	 * Execute the corresponding ioctl() to set this property.
1599 	 */
1600 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1601 
1602 	if (zcmd_write_src_nvlist(hdl, &zc, nvl) != 0)
1603 		goto error;
1604 
1605 	ret = zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
1606 
1607 	if (ret != 0) {
1608 		switch (errno) {
1609 
1610 		case ENOSPC:
1611 			/*
1612 			 * For quotas and reservations, ENOSPC indicates
1613 			 * something different; setting a quota or reservation
1614 			 * doesn't use any disk space.
1615 			 */
1616 			switch (prop) {
1617 			case ZFS_PROP_QUOTA:
1618 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1619 				    "size is less than current used or "
1620 				    "reserved space"));
1621 				(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
1622 				break;
1623 
1624 			case ZFS_PROP_RESERVATION:
1625 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1626 				    "size is greater than available space"));
1627 				(void) zfs_error(hdl, EZFS_PROPSPACE, errbuf);
1628 				break;
1629 
1630 			default:
1631 				(void) zfs_standard_error(hdl, errno, errbuf);
1632 				break;
1633 			}
1634 			break;
1635 
1636 		case EBUSY:
1637 			if (prop == ZFS_PROP_VOLBLOCKSIZE)
1638 				(void) zfs_error(hdl, EZFS_VOLHASDATA, errbuf);
1639 			else
1640 				(void) zfs_standard_error(hdl, EBUSY, errbuf);
1641 			break;
1642 
1643 		case EROFS:
1644 			(void) zfs_error(hdl, EZFS_DSREADONLY, errbuf);
1645 			break;
1646 
1647 		case ENOTSUP:
1648 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1649 			    "pool must be upgraded to set this "
1650 			    "property or value"));
1651 			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
1652 			break;
1653 
1654 		case EOVERFLOW:
1655 			/*
1656 			 * This platform can't address a volume this big.
1657 			 */
1658 #ifdef _ILP32
1659 			if (prop == ZFS_PROP_VOLSIZE) {
1660 				(void) zfs_error(hdl, EZFS_VOLTOOBIG, errbuf);
1661 				break;
1662 			}
1663 #endif
1664 			/* FALLTHROUGH */
1665 		default:
1666 			(void) zfs_standard_error(hdl, errno, errbuf);
1667 		}
1668 	} else {
1669 		/*
1670 		 * Refresh the statistics so the new property value
1671 		 * is reflected.
1672 		 */
1673 		if ((ret = changelist_postfix(cl)) == 0)
1674 			(void) get_stats(zhp);
1675 	}
1676 
1677 error:
1678 	nvlist_free(nvl);
1679 	zcmd_free_nvlists(&zc);
1680 	if (cl)
1681 		changelist_free(cl);
1682 	return (ret);
1683 }
1684 
1685 /*
1686  * Given a property, inherit the value from the parent dataset.
1687  */
1688 int
1689 zfs_prop_inherit(zfs_handle_t *zhp, const char *propname)
1690 {
1691 	zfs_cmd_t zc = { 0 };
1692 	int ret;
1693 	prop_changelist_t *cl;
1694 	libzfs_handle_t *hdl = zhp->zfs_hdl;
1695 	char errbuf[1024];
1696 	zfs_prop_t prop;
1697 
1698 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1699 	    "cannot inherit %s for '%s'"), propname, zhp->zfs_name);
1700 
1701 	if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
1702 		/*
1703 		 * For user properties, the amount of work we have to do is very
1704 		 * small, so just do it here.
1705 		 */
1706 		if (!zfs_prop_user(propname)) {
1707 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1708 			    "invalid property"));
1709 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1710 		}
1711 
1712 		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1713 		(void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value));
1714 
1715 		if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc) != 0)
1716 			return (zfs_standard_error(hdl, errno, errbuf));
1717 
1718 		return (0);
1719 	}
1720 
1721 	/*
1722 	 * Verify that this property is inheritable.
1723 	 */
1724 	if (zfs_prop_readonly(prop))
1725 		return (zfs_error(hdl, EZFS_PROPREADONLY, errbuf));
1726 
1727 	if (!zfs_prop_inheritable(prop))
1728 		return (zfs_error(hdl, EZFS_PROPNONINHERIT, errbuf));
1729 
1730 	/*
1731 	 * Check to see if the value applies to this type
1732 	 */
1733 	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
1734 		return (zfs_error(hdl, EZFS_PROPTYPE, errbuf));
1735 
1736 	/*
1737 	 * Normalize the name, to get rid of shorthand abbrevations.
1738 	 */
1739 	propname = zfs_prop_to_name(prop);
1740 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1741 	(void) strlcpy(zc.zc_value, propname, sizeof (zc.zc_value));
1742 
1743 	if (prop == ZFS_PROP_MOUNTPOINT && getzoneid() == GLOBAL_ZONEID &&
1744 	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
1745 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1746 		    "dataset is used in a non-global zone"));
1747 		return (zfs_error(hdl, EZFS_ZONED, errbuf));
1748 	}
1749 
1750 	/*
1751 	 * Determine datasets which will be affected by this change, if any.
1752 	 */
1753 	if ((cl = changelist_gather(zhp, prop, 0)) == NULL)
1754 		return (-1);
1755 
1756 	if (prop == ZFS_PROP_MOUNTPOINT && changelist_haszonedchild(cl)) {
1757 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1758 		    "child dataset with inherited mountpoint is used "
1759 		    "in a non-global zone"));
1760 		ret = zfs_error(hdl, EZFS_ZONED, errbuf);
1761 		goto error;
1762 	}
1763 
1764 	if ((ret = changelist_prefix(cl)) != 0)
1765 		goto error;
1766 
1767 	if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_INHERIT_PROP, &zc)) != 0) {
1768 		return (zfs_standard_error(hdl, errno, errbuf));
1769 	} else {
1770 
1771 		if ((ret = changelist_postfix(cl)) != 0)
1772 			goto error;
1773 
1774 		/*
1775 		 * Refresh the statistics so the new property is reflected.
1776 		 */
1777 		(void) get_stats(zhp);
1778 	}
1779 
1780 error:
1781 	changelist_free(cl);
1782 	return (ret);
1783 }
1784 
1785 /*
1786  * True DSL properties are stored in an nvlist.  The following two functions
1787  * extract them appropriately.
1788  */
1789 static uint64_t
1790 getprop_uint64(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
1791 {
1792 	nvlist_t *nv;
1793 	uint64_t value;
1794 
1795 	*source = NULL;
1796 	if (nvlist_lookup_nvlist(zhp->zfs_props,
1797 	    zfs_prop_to_name(prop), &nv) == 0) {
1798 		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
1799 		(void) nvlist_lookup_string(nv, ZPROP_SOURCE, source);
1800 	} else {
1801 		value = zfs_prop_default_numeric(prop);
1802 		*source = "";
1803 	}
1804 
1805 	return (value);
1806 }
1807 
1808 static char *
1809 getprop_string(zfs_handle_t *zhp, zfs_prop_t prop, char **source)
1810 {
1811 	nvlist_t *nv;
1812 	char *value;
1813 
1814 	*source = NULL;
1815 	if (nvlist_lookup_nvlist(zhp->zfs_props,
1816 	    zfs_prop_to_name(prop), &nv) == 0) {
1817 		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
1818 		(void) nvlist_lookup_string(nv, ZPROP_SOURCE, source);
1819 	} else {
1820 		if ((value = (char *)zfs_prop_default_string(prop)) == NULL)
1821 			value = "";
1822 		*source = "";
1823 	}
1824 
1825 	return (value);
1826 }
1827 
1828 /*
1829  * Internal function for getting a numeric property.  Both zfs_prop_get() and
1830  * zfs_prop_get_int() are built using this interface.
1831  *
1832  * Certain properties can be overridden using 'mount -o'.  In this case, scan
1833  * the contents of the /etc/mnttab entry, searching for the appropriate options.
1834  * If they differ from the on-disk values, report the current values and mark
1835  * the source "temporary".
1836  */
1837 static int
1838 get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
1839     char **source, uint64_t *val)
1840 {
1841 	zfs_cmd_t zc = { 0 };
1842 	struct mnttab mnt;
1843 	char *mntopt_on = NULL;
1844 	char *mntopt_off = NULL;
1845 
1846 	*source = NULL;
1847 
1848 	switch (prop) {
1849 	case ZFS_PROP_ATIME:
1850 		mntopt_on = MNTOPT_ATIME;
1851 		mntopt_off = MNTOPT_NOATIME;
1852 		break;
1853 
1854 	case ZFS_PROP_DEVICES:
1855 		mntopt_on = MNTOPT_DEVICES;
1856 		mntopt_off = MNTOPT_NODEVICES;
1857 		break;
1858 
1859 	case ZFS_PROP_EXEC:
1860 		mntopt_on = MNTOPT_EXEC;
1861 		mntopt_off = MNTOPT_NOEXEC;
1862 		break;
1863 
1864 	case ZFS_PROP_READONLY:
1865 		mntopt_on = MNTOPT_RO;
1866 		mntopt_off = MNTOPT_RW;
1867 		break;
1868 
1869 	case ZFS_PROP_SETUID:
1870 		mntopt_on = MNTOPT_SETUID;
1871 		mntopt_off = MNTOPT_NOSETUID;
1872 		break;
1873 
1874 	case ZFS_PROP_XATTR:
1875 		mntopt_on = MNTOPT_XATTR;
1876 		mntopt_off = MNTOPT_NOXATTR;
1877 		break;
1878 
1879 	case ZFS_PROP_NBMAND:
1880 		mntopt_on = MNTOPT_NBMAND;
1881 		mntopt_off = MNTOPT_NONBMAND;
1882 		break;
1883 	}
1884 
1885 	/*
1886 	 * Because looking up the mount options is potentially expensive
1887 	 * (iterating over all of /etc/mnttab), we defer its calculation until
1888 	 * we're looking up a property which requires its presence.
1889 	 */
1890 	if (!zhp->zfs_mntcheck &&
1891 	    (mntopt_on != NULL || prop == ZFS_PROP_MOUNTED)) {
1892 		struct mnttab entry, search = { 0 };
1893 		FILE *mnttab = zhp->zfs_hdl->libzfs_mnttab;
1894 
1895 		search.mnt_special = (char *)zhp->zfs_name;
1896 		search.mnt_fstype = MNTTYPE_ZFS;
1897 		rewind(mnttab);
1898 
1899 		if (getmntany(mnttab, &entry, &search) == 0) {
1900 			zhp->zfs_mntopts = zfs_strdup(zhp->zfs_hdl,
1901 			    entry.mnt_mntopts);
1902 			if (zhp->zfs_mntopts == NULL)
1903 				return (-1);
1904 		}
1905 
1906 		zhp->zfs_mntcheck = B_TRUE;
1907 	}
1908 
1909 	if (zhp->zfs_mntopts == NULL)
1910 		mnt.mnt_mntopts = "";
1911 	else
1912 		mnt.mnt_mntopts = zhp->zfs_mntopts;
1913 
1914 	switch (prop) {
1915 	case ZFS_PROP_ATIME:
1916 	case ZFS_PROP_DEVICES:
1917 	case ZFS_PROP_EXEC:
1918 	case ZFS_PROP_READONLY:
1919 	case ZFS_PROP_SETUID:
1920 	case ZFS_PROP_XATTR:
1921 	case ZFS_PROP_NBMAND:
1922 		*val = getprop_uint64(zhp, prop, source);
1923 
1924 		if (hasmntopt(&mnt, mntopt_on) && !*val) {
1925 			*val = B_TRUE;
1926 			if (src)
1927 				*src = ZPROP_SRC_TEMPORARY;
1928 		} else if (hasmntopt(&mnt, mntopt_off) && *val) {
1929 			*val = B_FALSE;
1930 			if (src)
1931 				*src = ZPROP_SRC_TEMPORARY;
1932 		}
1933 		break;
1934 
1935 	case ZFS_PROP_CANMOUNT:
1936 		*val = getprop_uint64(zhp, prop, source);
1937 		if (*val == 0)
1938 			*source = zhp->zfs_name;
1939 		else
1940 			*source = "";	/* default */
1941 		break;
1942 
1943 	case ZFS_PROP_QUOTA:
1944 	case ZFS_PROP_RESERVATION:
1945 		*val = getprop_uint64(zhp, prop, source);
1946 		if (*val == 0)
1947 			*source = "";	/* default */
1948 		else
1949 			*source = zhp->zfs_name;
1950 		break;
1951 
1952 	case ZFS_PROP_MOUNTED:
1953 		*val = (zhp->zfs_mntopts != NULL);
1954 		break;
1955 
1956 	case ZFS_PROP_NUMCLONES:
1957 		*val = zhp->zfs_dmustats.dds_num_clones;
1958 		break;
1959 
1960 	case ZFS_PROP_VERSION:
1961 		(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
1962 		if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_VERSION, &zc) ||
1963 		    (zc.zc_cookie == 0)) {
1964 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1965 			    "unable to get version property"));
1966 			return (zfs_error(zhp->zfs_hdl, EZFS_BADVERSION,
1967 			    dgettext(TEXT_DOMAIN, "internal error")));
1968 		}
1969 		*val = zc.zc_cookie;
1970 		break;
1971 
1972 	default:
1973 		switch (zfs_prop_get_type(prop)) {
1974 		case PROP_TYPE_NUMBER:
1975 		case PROP_TYPE_INDEX:
1976 			*val = getprop_uint64(zhp, prop, source);
1977 			break;
1978 
1979 		case PROP_TYPE_STRING:
1980 		default:
1981 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1982 			    "cannot get non-numeric property"));
1983 			return (zfs_error(zhp->zfs_hdl, EZFS_BADPROP,
1984 			    dgettext(TEXT_DOMAIN, "internal error")));
1985 		}
1986 	}
1987 
1988 	return (0);
1989 }
1990 
1991 /*
1992  * Calculate the source type, given the raw source string.
1993  */
1994 static void
1995 get_source(zfs_handle_t *zhp, zprop_source_t *srctype, char *source,
1996     char *statbuf, size_t statlen)
1997 {
1998 	if (statbuf == NULL || *srctype == ZPROP_SRC_TEMPORARY)
1999 		return;
2000 
2001 	if (source == NULL) {
2002 		*srctype = ZPROP_SRC_NONE;
2003 	} else if (source[0] == '\0') {
2004 		*srctype = ZPROP_SRC_DEFAULT;
2005 	} else {
2006 		if (strcmp(source, zhp->zfs_name) == 0) {
2007 			*srctype = ZPROP_SRC_LOCAL;
2008 		} else {
2009 			(void) strlcpy(statbuf, source, statlen);
2010 			*srctype = ZPROP_SRC_INHERITED;
2011 		}
2012 	}
2013 
2014 }
2015 
2016 /*
2017  * Retrieve a property from the given object.  If 'literal' is specified, then
2018  * numbers are left as exact values.  Otherwise, numbers are converted to a
2019  * human-readable form.
2020  *
2021  * Returns 0 on success, or -1 on error.
2022  */
2023 int
2024 zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
2025     zprop_source_t *src, char *statbuf, size_t statlen, boolean_t literal)
2026 {
2027 	char *source = NULL;
2028 	uint64_t val;
2029 	char *str;
2030 	const char *root;
2031 	const char *strval;
2032 
2033 	/*
2034 	 * Check to see if this property applies to our object
2035 	 */
2036 	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type))
2037 		return (-1);
2038 
2039 	if (src)
2040 		*src = ZPROP_SRC_NONE;
2041 
2042 	switch (prop) {
2043 	case ZFS_PROP_CREATION:
2044 		/*
2045 		 * 'creation' is a time_t stored in the statistics.  We convert
2046 		 * this into a string unless 'literal' is specified.
2047 		 */
2048 		{
2049 			val = getprop_uint64(zhp, prop, &source);
2050 			time_t time = (time_t)val;
2051 			struct tm t;
2052 
2053 			if (literal ||
2054 			    localtime_r(&time, &t) == NULL ||
2055 			    strftime(propbuf, proplen, "%a %b %e %k:%M %Y",
2056 			    &t) == 0)
2057 				(void) snprintf(propbuf, proplen, "%llu", val);
2058 		}
2059 		break;
2060 
2061 	case ZFS_PROP_MOUNTPOINT:
2062 		/*
2063 		 * Getting the precise mountpoint can be tricky.
2064 		 *
2065 		 *  - for 'none' or 'legacy', return those values.
2066 		 *  - for default mountpoints, construct it as /zfs/<dataset>
2067 		 *  - for inherited mountpoints, we want to take everything
2068 		 *    after our ancestor and append it to the inherited value.
2069 		 *
2070 		 * If the pool has an alternate root, we want to prepend that
2071 		 * root to any values we return.
2072 		 */
2073 		root = zhp->zfs_root;
2074 		str = getprop_string(zhp, prop, &source);
2075 
2076 		if (str[0] == '\0') {
2077 			(void) snprintf(propbuf, proplen, "%s/zfs/%s",
2078 			    root, zhp->zfs_name);
2079 		} else if (str[0] == '/') {
2080 			const char *relpath = zhp->zfs_name + strlen(source);
2081 
2082 			if (relpath[0] == '/')
2083 				relpath++;
2084 			if (str[1] == '\0')
2085 				str++;
2086 
2087 			if (relpath[0] == '\0')
2088 				(void) snprintf(propbuf, proplen, "%s%s",
2089 				    root, str);
2090 			else
2091 				(void) snprintf(propbuf, proplen, "%s%s%s%s",
2092 				    root, str, relpath[0] == '@' ? "" : "/",
2093 				    relpath);
2094 		} else {
2095 			/* 'legacy' or 'none' */
2096 			(void) strlcpy(propbuf, str, proplen);
2097 		}
2098 
2099 		break;
2100 
2101 	case ZFS_PROP_ORIGIN:
2102 		(void) strlcpy(propbuf, getprop_string(zhp, prop, &source),
2103 		    proplen);
2104 		/*
2105 		 * If there is no parent at all, return failure to indicate that
2106 		 * it doesn't apply to this dataset.
2107 		 */
2108 		if (propbuf[0] == '\0')
2109 			return (-1);
2110 		break;
2111 
2112 	case ZFS_PROP_QUOTA:
2113 	case ZFS_PROP_RESERVATION:
2114 		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
2115 			return (-1);
2116 
2117 		/*
2118 		 * If quota or reservation is 0, we translate this into 'none'
2119 		 * (unless literal is set), and indicate that it's the default
2120 		 * value.  Otherwise, we print the number nicely and indicate
2121 		 * that its set locally.
2122 		 */
2123 		if (val == 0) {
2124 			if (literal)
2125 				(void) strlcpy(propbuf, "0", proplen);
2126 			else
2127 				(void) strlcpy(propbuf, "none", proplen);
2128 		} else {
2129 			if (literal)
2130 				(void) snprintf(propbuf, proplen, "%llu",
2131 				    (u_longlong_t)val);
2132 			else
2133 				zfs_nicenum(val, propbuf, proplen);
2134 		}
2135 		break;
2136 
2137 	case ZFS_PROP_COMPRESSRATIO:
2138 		if (get_numeric_property(zhp, prop, src, &source, &val) != 0)
2139 			return (-1);
2140 		(void) snprintf(propbuf, proplen, "%lld.%02lldx", (longlong_t)
2141 		    val / 100, (longlong_t)val % 100);
2142 		break;
2143 
2144 	case ZFS_PROP_TYPE:
2145 		switch (zhp->zfs_type) {
2146 		case ZFS_TYPE_FILESYSTEM:
2147 			str = "filesystem";
2148 			break;
2149 		case ZFS_TYPE_VOLUME:
2150 			str = "volume";
2151 			break;
2152 		case ZFS_TYPE_SNAPSHOT:
2153 			str = "snapshot";
2154 			break;
2155 		default:
2156 			abort();
2157 		}
2158 		(void) snprintf(propbuf, proplen, "%s", str);
2159 		break;
2160 
2161 	case ZFS_PROP_MOUNTED:
2162 		/*
2163 		 * The 'mounted' property is a pseudo-property that described
2164 		 * whether the filesystem is currently mounted.  Even though
2165 		 * it's a boolean value, the typical values of "on" and "off"
2166 		 * don't make sense, so we translate to "yes" and "no".
2167 		 */
2168 		if (get_numeric_property(zhp, ZFS_PROP_MOUNTED,
2169 		    src, &source, &val) != 0)
2170 			return (-1);
2171 		if (val)
2172 			(void) strlcpy(propbuf, "yes", proplen);
2173 		else
2174 			(void) strlcpy(propbuf, "no", proplen);
2175 		break;
2176 
2177 	case ZFS_PROP_NAME:
2178 		/*
2179 		 * The 'name' property is a pseudo-property derived from the
2180 		 * dataset name.  It is presented as a real property to simplify
2181 		 * consumers.
2182 		 */
2183 		(void) strlcpy(propbuf, zhp->zfs_name, proplen);
2184 		break;
2185 
2186 	default:
2187 		switch (zfs_prop_get_type(prop)) {
2188 		case PROP_TYPE_NUMBER:
2189 			if (get_numeric_property(zhp, prop, src,
2190 			    &source, &val) != 0)
2191 				return (-1);
2192 			if (literal)
2193 				(void) snprintf(propbuf, proplen, "%llu",
2194 				    (u_longlong_t)val);
2195 			else
2196 				zfs_nicenum(val, propbuf, proplen);
2197 			break;
2198 
2199 		case PROP_TYPE_STRING:
2200 			(void) strlcpy(propbuf,
2201 			    getprop_string(zhp, prop, &source), proplen);
2202 			break;
2203 
2204 		case PROP_TYPE_INDEX:
2205 			if (get_numeric_property(zhp, prop, src,
2206 			    &source, &val) != 0)
2207 				return (-1);
2208 			if (zfs_prop_index_to_string(prop, val, &strval) != 0)
2209 				return (-1);
2210 			(void) strlcpy(propbuf, strval, proplen);
2211 			break;
2212 
2213 		default:
2214 			abort();
2215 		}
2216 	}
2217 
2218 	get_source(zhp, src, source, statbuf, statlen);
2219 
2220 	return (0);
2221 }
2222 
2223 /*
2224  * Utility function to get the given numeric property.  Does no validation that
2225  * the given property is the appropriate type; should only be used with
2226  * hard-coded property types.
2227  */
2228 uint64_t
2229 zfs_prop_get_int(zfs_handle_t *zhp, zfs_prop_t prop)
2230 {
2231 	char *source;
2232 	zprop_source_t sourcetype = ZPROP_SRC_NONE;
2233 	uint64_t val;
2234 
2235 	(void) get_numeric_property(zhp, prop, &sourcetype, &source, &val);
2236 
2237 	return (val);
2238 }
2239 
2240 /*
2241  * Similar to zfs_prop_get(), but returns the value as an integer.
2242  */
2243 int
2244 zfs_prop_get_numeric(zfs_handle_t *zhp, zfs_prop_t prop, uint64_t *value,
2245     zprop_source_t *src, char *statbuf, size_t statlen)
2246 {
2247 	char *source;
2248 
2249 	/*
2250 	 * Check to see if this property applies to our object
2251 	 */
2252 	if (!zfs_prop_valid_for_type(prop, zhp->zfs_type)) {
2253 		return (zfs_error_fmt(zhp->zfs_hdl, EZFS_PROPTYPE,
2254 		    dgettext(TEXT_DOMAIN, "cannot get property '%s'"),
2255 		    zfs_prop_to_name(prop)));
2256 	}
2257 
2258 	if (src)
2259 		*src = ZPROP_SRC_NONE;
2260 
2261 	if (get_numeric_property(zhp, prop, src, &source, value) != 0)
2262 		return (-1);
2263 
2264 	get_source(zhp, src, source, statbuf, statlen);
2265 
2266 	return (0);
2267 }
2268 
2269 /*
2270  * Returns the name of the given zfs handle.
2271  */
2272 const char *
2273 zfs_get_name(const zfs_handle_t *zhp)
2274 {
2275 	return (zhp->zfs_name);
2276 }
2277 
2278 /*
2279  * Returns the type of the given zfs handle.
2280  */
2281 zfs_type_t
2282 zfs_get_type(const zfs_handle_t *zhp)
2283 {
2284 	return (zhp->zfs_type);
2285 }
2286 
2287 /*
2288  * Iterate over all child filesystems
2289  */
2290 int
2291 zfs_iter_filesystems(zfs_handle_t *zhp, zfs_iter_f func, void *data)
2292 {
2293 	zfs_cmd_t zc = { 0 };
2294 	zfs_handle_t *nzhp;
2295 	int ret;
2296 
2297 	for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
2298 	    ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_DATASET_LIST_NEXT, &zc) == 0;
2299 	    (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
2300 		/*
2301 		 * Ignore private dataset names.
2302 		 */
2303 		if (dataset_name_hidden(zc.zc_name))
2304 			continue;
2305 
2306 		/*
2307 		 * Silently ignore errors, as the only plausible explanation is
2308 		 * that the pool has since been removed.
2309 		 */
2310 		if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
2311 		    zc.zc_name)) == NULL)
2312 			continue;
2313 
2314 		if ((ret = func(nzhp, data)) != 0)
2315 			return (ret);
2316 	}
2317 
2318 	/*
2319 	 * An errno value of ESRCH indicates normal completion.  If ENOENT is
2320 	 * returned, then the underlying dataset has been removed since we
2321 	 * obtained the handle.
2322 	 */
2323 	if (errno != ESRCH && errno != ENOENT)
2324 		return (zfs_standard_error(zhp->zfs_hdl, errno,
2325 		    dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
2326 
2327 	return (0);
2328 }
2329 
2330 /*
2331  * Iterate over all snapshots
2332  */
2333 int
2334 zfs_iter_snapshots(zfs_handle_t *zhp, zfs_iter_f func, void *data)
2335 {
2336 	zfs_cmd_t zc = { 0 };
2337 	zfs_handle_t *nzhp;
2338 	int ret;
2339 
2340 	for ((void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
2341 	    ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
2342 	    &zc) == 0;
2343 	    (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name))) {
2344 
2345 		if ((nzhp = make_dataset_handle(zhp->zfs_hdl,
2346 		    zc.zc_name)) == NULL)
2347 			continue;
2348 
2349 		if ((ret = func(nzhp, data)) != 0)
2350 			return (ret);
2351 	}
2352 
2353 	/*
2354 	 * An errno value of ESRCH indicates normal completion.  If ENOENT is
2355 	 * returned, then the underlying dataset has been removed since we
2356 	 * obtained the handle.  Silently ignore this case, and return success.
2357 	 */
2358 	if (errno != ESRCH && errno != ENOENT)
2359 		return (zfs_standard_error(zhp->zfs_hdl, errno,
2360 		    dgettext(TEXT_DOMAIN, "cannot iterate filesystems")));
2361 
2362 	return (0);
2363 }
2364 
2365 /*
2366  * Iterate over all children, snapshots and filesystems
2367  */
2368 int
2369 zfs_iter_children(zfs_handle_t *zhp, zfs_iter_f func, void *data)
2370 {
2371 	int ret;
2372 
2373 	if ((ret = zfs_iter_filesystems(zhp, func, data)) != 0)
2374 		return (ret);
2375 
2376 	return (zfs_iter_snapshots(zhp, func, data));
2377 }
2378 
2379 /*
2380  * Given a complete name, return just the portion that refers to the parent.
2381  * Can return NULL if this is a pool.
2382  */
2383 static int
2384 parent_name(const char *path, char *buf, size_t buflen)
2385 {
2386 	char *loc;
2387 
2388 	if ((loc = strrchr(path, '/')) == NULL)
2389 		return (-1);
2390 
2391 	(void) strncpy(buf, path, MIN(buflen, loc - path));
2392 	buf[loc - path] = '\0';
2393 
2394 	return (0);
2395 }
2396 
2397 /*
2398  * If accept_ancestor is false, then check to make sure that the given path has
2399  * a parent, and that it exists.  If accept_ancestor is true, then find the
2400  * closest existing ancestor for the given path.  In prefixlen return the
2401  * length of already existing prefix of the given path.  We also fetch the
2402  * 'zoned' property, which is used to validate property settings when creating
2403  * new datasets.
2404  */
2405 static int
2406 check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned,
2407     boolean_t accept_ancestor, int *prefixlen)
2408 {
2409 	zfs_cmd_t zc = { 0 };
2410 	char parent[ZFS_MAXNAMELEN];
2411 	char *slash;
2412 	zfs_handle_t *zhp;
2413 	char errbuf[1024];
2414 
2415 	(void) snprintf(errbuf, sizeof (errbuf), "cannot create '%s'",
2416 	    path);
2417 
2418 	/* get parent, and check to see if this is just a pool */
2419 	if (parent_name(path, parent, sizeof (parent)) != 0) {
2420 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2421 		    "missing dataset name"));
2422 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2423 	}
2424 
2425 	/* check to see if the pool exists */
2426 	if ((slash = strchr(parent, '/')) == NULL)
2427 		slash = parent + strlen(parent);
2428 	(void) strncpy(zc.zc_name, parent, slash - parent);
2429 	zc.zc_name[slash - parent] = '\0';
2430 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 &&
2431 	    errno == ENOENT) {
2432 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2433 		    "no such pool '%s'"), zc.zc_name);
2434 		return (zfs_error(hdl, EZFS_NOENT, errbuf));
2435 	}
2436 
2437 	/* check to see if the parent dataset exists */
2438 	while ((zhp = make_dataset_handle(hdl, parent)) == NULL) {
2439 		if (errno == ENOENT && accept_ancestor) {
2440 			/*
2441 			 * Go deeper to find an ancestor, give up on top level.
2442 			 */
2443 			if (parent_name(parent, parent, sizeof (parent)) != 0) {
2444 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2445 				    "no such pool '%s'"), zc.zc_name);
2446 				return (zfs_error(hdl, EZFS_NOENT, errbuf));
2447 			}
2448 		} else if (errno == ENOENT) {
2449 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2450 			    "parent does not exist"));
2451 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
2452 		} else
2453 			return (zfs_standard_error(hdl, errno, errbuf));
2454 	}
2455 
2456 	*zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
2457 	/* we are in a non-global zone, but parent is in the global zone */
2458 	if (getzoneid() != GLOBAL_ZONEID && !(*zoned)) {
2459 		(void) zfs_standard_error(hdl, EPERM, errbuf);
2460 		zfs_close(zhp);
2461 		return (-1);
2462 	}
2463 
2464 	/* make sure parent is a filesystem */
2465 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
2466 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2467 		    "parent is not a filesystem"));
2468 		(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
2469 		zfs_close(zhp);
2470 		return (-1);
2471 	}
2472 
2473 	zfs_close(zhp);
2474 	if (prefixlen != NULL)
2475 		*prefixlen = strlen(parent);
2476 	return (0);
2477 }
2478 
2479 /*
2480  * Finds whether the dataset of the given type(s) exists.
2481  */
2482 boolean_t
2483 zfs_dataset_exists(libzfs_handle_t *hdl, const char *path, zfs_type_t types)
2484 {
2485 	zfs_handle_t *zhp;
2486 
2487 	if (!zfs_validate_name(hdl, path, types, B_FALSE))
2488 		return (B_FALSE);
2489 
2490 	/*
2491 	 * Try to get stats for the dataset, which will tell us if it exists.
2492 	 */
2493 	if ((zhp = make_dataset_handle(hdl, path)) != NULL) {
2494 		int ds_type = zhp->zfs_type;
2495 
2496 		zfs_close(zhp);
2497 		if (types & ds_type)
2498 			return (B_TRUE);
2499 	}
2500 	return (B_FALSE);
2501 }
2502 
2503 /*
2504  * Creates non-existing ancestors of the given path.
2505  */
2506 int
2507 zfs_create_ancestors(libzfs_handle_t *hdl, const char *path)
2508 {
2509 	int prefix;
2510 	uint64_t zoned;
2511 	char *path_copy;
2512 	int rc;
2513 
2514 	if (check_parents(hdl, path, &zoned, B_TRUE, &prefix) != 0)
2515 		return (-1);
2516 
2517 	if ((path_copy = strdup(path)) != NULL) {
2518 		rc = create_parents(hdl, path_copy, prefix);
2519 		free(path_copy);
2520 	}
2521 	if (path_copy == NULL || rc != 0)
2522 		return (-1);
2523 
2524 	return (0);
2525 }
2526 
2527 /*
2528  * Create a new filesystem or volume.
2529  */
2530 int
2531 zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
2532     nvlist_t *props)
2533 {
2534 	zfs_cmd_t zc = { 0 };
2535 	int ret;
2536 	uint64_t size = 0;
2537 	uint64_t blocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
2538 	char errbuf[1024];
2539 	uint64_t zoned;
2540 
2541 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2542 	    "cannot create '%s'"), path);
2543 
2544 	/* validate the path, taking care to note the extended error message */
2545 	if (!zfs_validate_name(hdl, path, type, B_TRUE))
2546 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2547 
2548 	/* validate parents exist */
2549 	if (check_parents(hdl, path, &zoned, B_FALSE, NULL) != 0)
2550 		return (-1);
2551 
2552 	/*
2553 	 * The failure modes when creating a dataset of a different type over
2554 	 * one that already exists is a little strange.  In particular, if you
2555 	 * try to create a dataset on top of an existing dataset, the ioctl()
2556 	 * will return ENOENT, not EEXIST.  To prevent this from happening, we
2557 	 * first try to see if the dataset exists.
2558 	 */
2559 	(void) strlcpy(zc.zc_name, path, sizeof (zc.zc_name));
2560 	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2561 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2562 		    "dataset already exists"));
2563 		return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2564 	}
2565 
2566 	if (type == ZFS_TYPE_VOLUME)
2567 		zc.zc_objset_type = DMU_OST_ZVOL;
2568 	else
2569 		zc.zc_objset_type = DMU_OST_ZFS;
2570 
2571 	if (props && (props = zfs_validate_properties(hdl, type, props,
2572 	    zoned, NULL, errbuf)) == 0)
2573 		return (-1);
2574 
2575 	if (type == ZFS_TYPE_VOLUME) {
2576 		/*
2577 		 * If we are creating a volume, the size and block size must
2578 		 * satisfy a few restraints.  First, the blocksize must be a
2579 		 * valid block size between SPA_{MIN,MAX}BLOCKSIZE.  Second, the
2580 		 * volsize must be a multiple of the block size, and cannot be
2581 		 * zero.
2582 		 */
2583 		if (props == NULL || nvlist_lookup_uint64(props,
2584 		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &size) != 0) {
2585 			nvlist_free(props);
2586 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2587 			    "missing volume size"));
2588 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
2589 		}
2590 
2591 		if ((ret = nvlist_lookup_uint64(props,
2592 		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
2593 		    &blocksize)) != 0) {
2594 			if (ret == ENOENT) {
2595 				blocksize = zfs_prop_default_numeric(
2596 				    ZFS_PROP_VOLBLOCKSIZE);
2597 			} else {
2598 				nvlist_free(props);
2599 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2600 				    "missing volume block size"));
2601 				return (zfs_error(hdl, EZFS_BADPROP, errbuf));
2602 			}
2603 		}
2604 
2605 		if (size == 0) {
2606 			nvlist_free(props);
2607 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2608 			    "volume size cannot be zero"));
2609 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
2610 		}
2611 
2612 		if (size % blocksize != 0) {
2613 			nvlist_free(props);
2614 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2615 			    "volume size must be a multiple of volume block "
2616 			    "size"));
2617 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
2618 		}
2619 	}
2620 
2621 	if (props && zcmd_write_src_nvlist(hdl, &zc, props) != 0)
2622 		return (-1);
2623 	nvlist_free(props);
2624 
2625 	/* create the dataset */
2626 	ret = zfs_ioctl(hdl, ZFS_IOC_CREATE, &zc);
2627 
2628 	if (ret == 0 && type == ZFS_TYPE_VOLUME) {
2629 		ret = zvol_create_link(hdl, path);
2630 		if (ret) {
2631 			(void) zfs_standard_error(hdl, errno,
2632 			    dgettext(TEXT_DOMAIN,
2633 			    "Volume successfully created, but device links "
2634 			    "were not created"));
2635 			zcmd_free_nvlists(&zc);
2636 			return (-1);
2637 		}
2638 	}
2639 
2640 	zcmd_free_nvlists(&zc);
2641 
2642 	/* check for failure */
2643 	if (ret != 0) {
2644 		char parent[ZFS_MAXNAMELEN];
2645 		(void) parent_name(path, parent, sizeof (parent));
2646 
2647 		switch (errno) {
2648 		case ENOENT:
2649 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2650 			    "no such parent '%s'"), parent);
2651 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
2652 
2653 		case EINVAL:
2654 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2655 			    "parent '%s' is not a filesystem"), parent);
2656 			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
2657 
2658 		case EDOM:
2659 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2660 			    "volume block size must be power of 2 from "
2661 			    "%u to %uk"),
2662 			    (uint_t)SPA_MINBLOCKSIZE,
2663 			    (uint_t)SPA_MAXBLOCKSIZE >> 10);
2664 
2665 			return (zfs_error(hdl, EZFS_BADPROP, errbuf));
2666 
2667 		case ENOTSUP:
2668 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2669 			    "pool must be upgraded to set this "
2670 			    "property or value"));
2671 			return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
2672 
2673 #ifdef _ILP32
2674 		case EOVERFLOW:
2675 			/*
2676 			 * This platform can't address a volume this big.
2677 			 */
2678 			if (type == ZFS_TYPE_VOLUME)
2679 				return (zfs_error(hdl, EZFS_VOLTOOBIG,
2680 				    errbuf));
2681 #endif
2682 			/* FALLTHROUGH */
2683 		default:
2684 			return (zfs_standard_error(hdl, errno, errbuf));
2685 		}
2686 	}
2687 
2688 	return (0);
2689 }
2690 
2691 /*
2692  * Destroys the given dataset.  The caller must make sure that the filesystem
2693  * isn't mounted, and that there are no active dependents.
2694  */
2695 int
2696 zfs_destroy(zfs_handle_t *zhp)
2697 {
2698 	zfs_cmd_t zc = { 0 };
2699 
2700 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
2701 
2702 	if (ZFS_IS_VOLUME(zhp)) {
2703 		/*
2704 		 * If user doesn't have permissions to unshare volume, then
2705 		 * abort the request.  This would only happen for a
2706 		 * non-privileged user.
2707 		 */
2708 		if (zfs_unshare_iscsi(zhp) != 0) {
2709 			return (-1);
2710 		}
2711 
2712 		if (zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
2713 			return (-1);
2714 
2715 		zc.zc_objset_type = DMU_OST_ZVOL;
2716 	} else {
2717 		zc.zc_objset_type = DMU_OST_ZFS;
2718 	}
2719 
2720 	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY, &zc) != 0) {
2721 		return (zfs_standard_error_fmt(zhp->zfs_hdl, errno,
2722 		    dgettext(TEXT_DOMAIN, "cannot destroy '%s'"),
2723 		    zhp->zfs_name));
2724 	}
2725 
2726 	remove_mountpoint(zhp);
2727 
2728 	return (0);
2729 }
2730 
2731 struct destroydata {
2732 	char *snapname;
2733 	boolean_t gotone;
2734 	boolean_t closezhp;
2735 };
2736 
2737 static int
2738 zfs_remove_link_cb(zfs_handle_t *zhp, void *arg)
2739 {
2740 	struct destroydata *dd = arg;
2741 	zfs_handle_t *szhp;
2742 	char name[ZFS_MAXNAMELEN];
2743 	boolean_t closezhp = dd->closezhp;
2744 	int rv;
2745 
2746 	(void) strlcpy(name, zhp->zfs_name, sizeof (name));
2747 	(void) strlcat(name, "@", sizeof (name));
2748 	(void) strlcat(name, dd->snapname, sizeof (name));
2749 
2750 	szhp = make_dataset_handle(zhp->zfs_hdl, name);
2751 	if (szhp) {
2752 		dd->gotone = B_TRUE;
2753 		zfs_close(szhp);
2754 	}
2755 
2756 	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
2757 		(void) zvol_remove_link(zhp->zfs_hdl, name);
2758 		/*
2759 		 * NB: this is simply a best-effort.  We don't want to
2760 		 * return an error, because then we wouldn't visit all
2761 		 * the volumes.
2762 		 */
2763 	}
2764 
2765 	dd->closezhp = B_TRUE;
2766 	rv = zfs_iter_filesystems(zhp, zfs_remove_link_cb, arg);
2767 	if (closezhp)
2768 		zfs_close(zhp);
2769 	return (rv);
2770 }
2771 
2772 /*
2773  * Destroys all snapshots with the given name in zhp & descendants.
2774  */
2775 int
2776 zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname)
2777 {
2778 	zfs_cmd_t zc = { 0 };
2779 	int ret;
2780 	struct destroydata dd = { 0 };
2781 
2782 	dd.snapname = snapname;
2783 	(void) zfs_remove_link_cb(zhp, &dd);
2784 
2785 	if (!dd.gotone) {
2786 		return (zfs_standard_error_fmt(zhp->zfs_hdl, ENOENT,
2787 		    dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
2788 		    zhp->zfs_name, snapname));
2789 	}
2790 
2791 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
2792 	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
2793 
2794 	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_DESTROY_SNAPS, &zc);
2795 	if (ret != 0) {
2796 		char errbuf[1024];
2797 
2798 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2799 		    "cannot destroy '%s@%s'"), zc.zc_name, snapname);
2800 
2801 		switch (errno) {
2802 		case EEXIST:
2803 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
2804 			    "snapshot is cloned"));
2805 			return (zfs_error(zhp->zfs_hdl, EZFS_EXISTS, errbuf));
2806 
2807 		default:
2808 			return (zfs_standard_error(zhp->zfs_hdl, errno,
2809 			    errbuf));
2810 		}
2811 	}
2812 
2813 	return (0);
2814 }
2815 
2816 /*
2817  * Clones the given dataset.  The target must be of the same type as the source.
2818  */
2819 int
2820 zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
2821 {
2822 	zfs_cmd_t zc = { 0 };
2823 	char parent[ZFS_MAXNAMELEN];
2824 	int ret;
2825 	char errbuf[1024];
2826 	libzfs_handle_t *hdl = zhp->zfs_hdl;
2827 	zfs_type_t type;
2828 	uint64_t zoned;
2829 
2830 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
2831 
2832 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2833 	    "cannot create '%s'"), target);
2834 
2835 	/* validate the target name */
2836 	if (!zfs_validate_name(hdl, target, ZFS_TYPE_FILESYSTEM, B_TRUE))
2837 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2838 
2839 	/* validate parents exist */
2840 	if (check_parents(hdl, target, &zoned, B_FALSE, NULL) != 0)
2841 		return (-1);
2842 
2843 	(void) parent_name(target, parent, sizeof (parent));
2844 
2845 	/* do the clone */
2846 	if (ZFS_IS_VOLUME(zhp)) {
2847 		zc.zc_objset_type = DMU_OST_ZVOL;
2848 		type = ZFS_TYPE_VOLUME;
2849 	} else {
2850 		zc.zc_objset_type = DMU_OST_ZFS;
2851 		type = ZFS_TYPE_FILESYSTEM;
2852 	}
2853 
2854 	if (props) {
2855 		if ((props = zfs_validate_properties(hdl, type, props,
2856 		    zoned, zhp, errbuf)) == NULL)
2857 			return (-1);
2858 
2859 		if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
2860 			nvlist_free(props);
2861 			return (-1);
2862 		}
2863 
2864 		nvlist_free(props);
2865 	}
2866 
2867 	(void) strlcpy(zc.zc_name, target, sizeof (zc.zc_name));
2868 	(void) strlcpy(zc.zc_value, zhp->zfs_name, sizeof (zc.zc_value));
2869 	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_CREATE, &zc);
2870 
2871 	zcmd_free_nvlists(&zc);
2872 
2873 	if (ret != 0) {
2874 		switch (errno) {
2875 
2876 		case ENOENT:
2877 			/*
2878 			 * The parent doesn't exist.  We should have caught this
2879 			 * above, but there may a race condition that has since
2880 			 * destroyed the parent.
2881 			 *
2882 			 * At this point, we don't know whether it's the source
2883 			 * that doesn't exist anymore, or whether the target
2884 			 * dataset doesn't exist.
2885 			 */
2886 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
2887 			    "no such parent '%s'"), parent);
2888 			return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
2889 
2890 		case EXDEV:
2891 			zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
2892 			    "source and target pools differ"));
2893 			return (zfs_error(zhp->zfs_hdl, EZFS_CROSSTARGET,
2894 			    errbuf));
2895 
2896 		default:
2897 			return (zfs_standard_error(zhp->zfs_hdl, errno,
2898 			    errbuf));
2899 		}
2900 	} else if (ZFS_IS_VOLUME(zhp)) {
2901 		ret = zvol_create_link(zhp->zfs_hdl, target);
2902 	}
2903 
2904 	return (ret);
2905 }
2906 
2907 typedef struct promote_data {
2908 	char cb_mountpoint[MAXPATHLEN];
2909 	const char *cb_target;
2910 	const char *cb_errbuf;
2911 	uint64_t cb_pivot_txg;
2912 } promote_data_t;
2913 
2914 static int
2915 promote_snap_cb(zfs_handle_t *zhp, void *data)
2916 {
2917 	promote_data_t *pd = data;
2918 	zfs_handle_t *szhp;
2919 	char snapname[MAXPATHLEN];
2920 	int rv = 0;
2921 
2922 	/* We don't care about snapshots after the pivot point */
2923 	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) > pd->cb_pivot_txg) {
2924 		zfs_close(zhp);
2925 		return (0);
2926 	}
2927 
2928 	/* Remove the device link if it's a zvol. */
2929 	if (ZFS_IS_VOLUME(zhp))
2930 		(void) zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name);
2931 
2932 	/* Check for conflicting names */
2933 	(void) strlcpy(snapname, pd->cb_target, sizeof (snapname));
2934 	(void) strlcat(snapname, strchr(zhp->zfs_name, '@'), sizeof (snapname));
2935 	szhp = make_dataset_handle(zhp->zfs_hdl, snapname);
2936 	if (szhp != NULL) {
2937 		zfs_close(szhp);
2938 		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
2939 		    "snapshot name '%s' from origin \n"
2940 		    "conflicts with '%s' from target"),
2941 		    zhp->zfs_name, snapname);
2942 		rv = zfs_error(zhp->zfs_hdl, EZFS_EXISTS, pd->cb_errbuf);
2943 	}
2944 	zfs_close(zhp);
2945 	return (rv);
2946 }
2947 
2948 static int
2949 promote_snap_done_cb(zfs_handle_t *zhp, void *data)
2950 {
2951 	promote_data_t *pd = data;
2952 
2953 	/* We don't care about snapshots after the pivot point */
2954 	if (zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) <= pd->cb_pivot_txg) {
2955 		/* Create the device link if it's a zvol. */
2956 		if (ZFS_IS_VOLUME(zhp))
2957 			(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
2958 	}
2959 
2960 	zfs_close(zhp);
2961 	return (0);
2962 }
2963 
2964 /*
2965  * Promotes the given clone fs to be the clone parent.
2966  */
2967 int
2968 zfs_promote(zfs_handle_t *zhp)
2969 {
2970 	libzfs_handle_t *hdl = zhp->zfs_hdl;
2971 	zfs_cmd_t zc = { 0 };
2972 	char parent[MAXPATHLEN];
2973 	char *cp;
2974 	int ret;
2975 	zfs_handle_t *pzhp;
2976 	promote_data_t pd;
2977 	char errbuf[1024];
2978 
2979 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2980 	    "cannot promote '%s'"), zhp->zfs_name);
2981 
2982 	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
2983 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2984 		    "snapshots can not be promoted"));
2985 		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
2986 	}
2987 
2988 	(void) strlcpy(parent, zhp->zfs_dmustats.dds_clone_of, sizeof (parent));
2989 	if (parent[0] == '\0') {
2990 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2991 		    "not a cloned filesystem"));
2992 		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
2993 	}
2994 	cp = strchr(parent, '@');
2995 	*cp = '\0';
2996 
2997 	/* Walk the snapshots we will be moving */
2998 	pzhp = zfs_open(hdl, zhp->zfs_dmustats.dds_clone_of, ZFS_TYPE_SNAPSHOT);
2999 	if (pzhp == NULL)
3000 		return (-1);
3001 	pd.cb_pivot_txg = zfs_prop_get_int(pzhp, ZFS_PROP_CREATETXG);
3002 	zfs_close(pzhp);
3003 	pd.cb_target = zhp->zfs_name;
3004 	pd.cb_errbuf = errbuf;
3005 	pzhp = zfs_open(hdl, parent, ZFS_TYPE_DATASET);
3006 	if (pzhp == NULL)
3007 		return (-1);
3008 	(void) zfs_prop_get(pzhp, ZFS_PROP_MOUNTPOINT, pd.cb_mountpoint,
3009 	    sizeof (pd.cb_mountpoint), NULL, NULL, 0, FALSE);
3010 	ret = zfs_iter_snapshots(pzhp, promote_snap_cb, &pd);
3011 	if (ret != 0) {
3012 		zfs_close(pzhp);
3013 		return (-1);
3014 	}
3015 
3016 	/* issue the ioctl */
3017 	(void) strlcpy(zc.zc_value, zhp->zfs_dmustats.dds_clone_of,
3018 	    sizeof (zc.zc_value));
3019 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
3020 	ret = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
3021 
3022 	if (ret != 0) {
3023 		int save_errno = errno;
3024 
3025 		(void) zfs_iter_snapshots(pzhp, promote_snap_done_cb, &pd);
3026 		zfs_close(pzhp);
3027 
3028 		switch (save_errno) {
3029 		case EEXIST:
3030 			/*
3031 			 * There is a conflicting snapshot name.  We
3032 			 * should have caught this above, but they could
3033 			 * have renamed something in the mean time.
3034 			 */
3035 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3036 			    "conflicting snapshot name from parent '%s'"),
3037 			    parent);
3038 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
3039 
3040 		default:
3041 			return (zfs_standard_error(hdl, save_errno, errbuf));
3042 		}
3043 	} else {
3044 		(void) zfs_iter_snapshots(zhp, promote_snap_done_cb, &pd);
3045 	}
3046 
3047 	zfs_close(pzhp);
3048 	return (ret);
3049 }
3050 
3051 struct createdata {
3052 	const char *cd_snapname;
3053 	int cd_ifexists;
3054 };
3055 
3056 static int
3057 zfs_create_link_cb(zfs_handle_t *zhp, void *arg)
3058 {
3059 	struct createdata *cd = arg;
3060 	int ret;
3061 
3062 	if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
3063 		char name[MAXPATHLEN];
3064 
3065 		(void) strlcpy(name, zhp->zfs_name, sizeof (name));
3066 		(void) strlcat(name, "@", sizeof (name));
3067 		(void) strlcat(name, cd->cd_snapname, sizeof (name));
3068 		(void) zvol_create_link_common(zhp->zfs_hdl, name,
3069 		    cd->cd_ifexists);
3070 		/*
3071 		 * NB: this is simply a best-effort.  We don't want to
3072 		 * return an error, because then we wouldn't visit all
3073 		 * the volumes.
3074 		 */
3075 	}
3076 
3077 	ret = zfs_iter_filesystems(zhp, zfs_create_link_cb, cd);
3078 
3079 	zfs_close(zhp);
3080 
3081 	return (ret);
3082 }
3083 
3084 /*
3085  * Takes a snapshot of the given dataset.
3086  */
3087 int
3088 zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive)
3089 {
3090 	const char *delim;
3091 	char *parent;
3092 	zfs_handle_t *zhp;
3093 	zfs_cmd_t zc = { 0 };
3094 	int ret;
3095 	char errbuf[1024];
3096 
3097 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3098 	    "cannot snapshot '%s'"), path);
3099 
3100 	/* validate the target name */
3101 	if (!zfs_validate_name(hdl, path, ZFS_TYPE_SNAPSHOT, B_TRUE))
3102 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3103 
3104 	/* make sure the parent exists and is of the appropriate type */
3105 	delim = strchr(path, '@');
3106 	if ((parent = zfs_alloc(hdl, delim - path + 1)) == NULL)
3107 		return (-1);
3108 	(void) strncpy(parent, path, delim - path);
3109 	parent[delim - path] = '\0';
3110 
3111 	if ((zhp = zfs_open(hdl, parent, ZFS_TYPE_FILESYSTEM |
3112 	    ZFS_TYPE_VOLUME)) == NULL) {
3113 		free(parent);
3114 		return (-1);
3115 	}
3116 
3117 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
3118 	(void) strlcpy(zc.zc_value, delim+1, sizeof (zc.zc_value));
3119 	if (ZFS_IS_VOLUME(zhp))
3120 		zc.zc_objset_type = DMU_OST_ZVOL;
3121 	else
3122 		zc.zc_objset_type = DMU_OST_ZFS;
3123 	zc.zc_cookie = recursive;
3124 	ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SNAPSHOT, &zc);
3125 
3126 	/*
3127 	 * if it was recursive, the one that actually failed will be in
3128 	 * zc.zc_name.
3129 	 */
3130 	if (ret != 0)
3131 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3132 		    "cannot create snapshot '%s@%s'"), zc.zc_name, zc.zc_value);
3133 
3134 	if (ret == 0 && recursive) {
3135 		struct createdata cd;
3136 
3137 		cd.cd_snapname = delim + 1;
3138 		cd.cd_ifexists = B_FALSE;
3139 		(void) zfs_iter_filesystems(zhp, zfs_create_link_cb, &cd);
3140 	}
3141 	if (ret == 0 && zhp->zfs_type == ZFS_TYPE_VOLUME) {
3142 		ret = zvol_create_link(zhp->zfs_hdl, path);
3143 		if (ret != 0) {
3144 			(void) zfs_standard_error(hdl, errno,
3145 			    dgettext(TEXT_DOMAIN,
3146 			    "Volume successfully snapshotted, but device links "
3147 			    "were not created"));
3148 			free(parent);
3149 			zfs_close(zhp);
3150 			return (-1);
3151 		}
3152 	}
3153 
3154 	if (ret != 0)
3155 		(void) zfs_standard_error(hdl, errno, errbuf);
3156 
3157 	free(parent);
3158 	zfs_close(zhp);
3159 
3160 	return (ret);
3161 }
3162 
3163 /*
3164  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
3165  * NULL) to the file descriptor specified by outfd.
3166  */
3167 int
3168 zfs_send(zfs_handle_t *zhp, const char *fromsnap, int outfd)
3169 {
3170 	zfs_cmd_t zc = { 0 };
3171 	char errbuf[1024];
3172 	libzfs_handle_t *hdl = zhp->zfs_hdl;
3173 
3174 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
3175 
3176 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
3177 	if (fromsnap)
3178 		(void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_name));
3179 	zc.zc_cookie = outfd;
3180 
3181 	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SENDBACKUP, &zc) != 0) {
3182 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3183 		    "cannot send '%s'"), zhp->zfs_name);
3184 
3185 		switch (errno) {
3186 
3187 		case EXDEV:
3188 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3189 			    "not an earlier snapshot from the same fs"));
3190 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
3191 
3192 		case EDQUOT:
3193 		case EFBIG:
3194 		case EIO:
3195 		case ENOLINK:
3196 		case ENOSPC:
3197 		case ENOSTR:
3198 		case ENXIO:
3199 		case EPIPE:
3200 		case ERANGE:
3201 		case EFAULT:
3202 		case EROFS:
3203 			zfs_error_aux(hdl, strerror(errno));
3204 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
3205 
3206 		default:
3207 			return (zfs_standard_error(hdl, errno, errbuf));
3208 		}
3209 	}
3210 
3211 	return (0);
3212 }
3213 
3214 /*
3215  * Create ancestors of 'target', but not target itself, and not
3216  * ancestors whose names are shorter than prefixlen.  Die if
3217  * prefixlen-ancestor does not exist.
3218  */
3219 static int
3220 create_parents(libzfs_handle_t *hdl, char *target, int prefixlen)
3221 {
3222 	zfs_handle_t *h;
3223 	char *cp;
3224 
3225 	/* make sure prefix exists */
3226 	cp = strchr(target + prefixlen, '/');
3227 	if (cp == NULL) {
3228 		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
3229 	} else {
3230 		*cp = '\0';
3231 		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
3232 		*cp = '/';
3233 	}
3234 	if (h == NULL)
3235 		return (-1);
3236 	zfs_close(h);
3237 
3238 	/*
3239 	 * Attempt to create, mount, and share any ancestor filesystems,
3240 	 * up to the prefixlen-long one.
3241 	 */
3242 	for (cp = target + prefixlen + 1;
3243 	    cp = strchr(cp, '/'); *cp = '/', cp++) {
3244 		const char *opname;
3245 		char *logstr;
3246 
3247 		*cp = '\0';
3248 
3249 		h = make_dataset_handle(hdl, target);
3250 		if (h) {
3251 			/* it already exists, nothing to do here */
3252 			zfs_close(h);
3253 			continue;
3254 		}
3255 
3256 		opname = dgettext(TEXT_DOMAIN, "create");
3257 		logstr = hdl->libzfs_log_str;
3258 		hdl->libzfs_log_str = NULL;
3259 		if (zfs_create(hdl, target, ZFS_TYPE_FILESYSTEM,
3260 		    NULL) != 0) {
3261 			hdl->libzfs_log_str = logstr;
3262 			goto ancestorerr;
3263 		}
3264 
3265 		hdl->libzfs_log_str = logstr;
3266 		opname = dgettext(TEXT_DOMAIN, "open");
3267 		h = zfs_open(hdl, target, ZFS_TYPE_FILESYSTEM);
3268 		if (h == NULL)
3269 			goto ancestorerr;
3270 
3271 		opname = dgettext(TEXT_DOMAIN, "mount");
3272 		if (zfs_mount(h, NULL, 0) != 0)
3273 			goto ancestorerr;
3274 
3275 		opname = dgettext(TEXT_DOMAIN, "share");
3276 		if (zfs_share(h) != 0)
3277 			goto ancestorerr;
3278 
3279 		zfs_close(h);
3280 
3281 		continue;
3282 ancestorerr:
3283 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3284 		    "failed to %s ancestor '%s'"), opname, target);
3285 		return (-1);
3286 	}
3287 
3288 	return (0);
3289 }
3290 
3291 /*
3292  * Restores a backup of tosnap from the file descriptor specified by infd.
3293  */
3294 int
3295 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, int isprefix,
3296     int verbose, int dryrun, boolean_t force, int infd)
3297 {
3298 	zfs_cmd_t zc = { 0 };
3299 	time_t begin_time;
3300 	int ioctl_err, err, bytes, size, choplen;
3301 	char *cp;
3302 	dmu_replay_record_t drr;
3303 	struct drr_begin *drrb = &zc.zc_begin_record;
3304 	char errbuf[1024];
3305 	char chopprefix[ZFS_MAXNAMELEN];
3306 
3307 	begin_time = time(NULL);
3308 
3309 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3310 	    "cannot receive"));
3311 
3312 	/* read in the BEGIN record */
3313 	cp = (char *)&drr;
3314 	bytes = 0;
3315 	do {
3316 		size = read(infd, cp, sizeof (drr) - bytes);
3317 		cp += size;
3318 		bytes += size;
3319 	} while (size > 0);
3320 
3321 	if (size < 0 || bytes != sizeof (drr)) {
3322 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3323 		    "stream (failed to read first record)"));
3324 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3325 	}
3326 
3327 	zc.zc_begin_record = drr.drr_u.drr_begin;
3328 
3329 	if (drrb->drr_magic != DMU_BACKUP_MAGIC &&
3330 	    drrb->drr_magic != BSWAP_64(DMU_BACKUP_MAGIC)) {
3331 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3332 		    "stream (bad magic number)"));
3333 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3334 	}
3335 
3336 	if (drrb->drr_version != DMU_BACKUP_VERSION &&
3337 	    drrb->drr_version != BSWAP_64(DMU_BACKUP_VERSION)) {
3338 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only version "
3339 		    "0x%llx is supported (stream is version 0x%llx)"),
3340 		    DMU_BACKUP_VERSION, drrb->drr_version);
3341 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3342 	}
3343 
3344 	if (strchr(drr.drr_u.drr_begin.drr_toname, '@') == NULL) {
3345 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3346 		    "stream (bad snapshot name)"));
3347 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3348 	}
3349 	/*
3350 	 * Determine how much of the snapshot name stored in the stream
3351 	 * we are going to tack on to the name they specified on the
3352 	 * command line, and how much we are going to chop off.
3353 	 *
3354 	 * If they specified a snapshot, chop the entire name stored in
3355 	 * the stream.
3356 	 */
3357 	(void) strcpy(chopprefix, drr.drr_u.drr_begin.drr_toname);
3358 	if (isprefix) {
3359 		/*
3360 		 * They specified a fs with -d, we want to tack on
3361 		 * everything but the pool name stored in the stream
3362 		 */
3363 		if (strchr(tosnap, '@')) {
3364 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3365 			    "argument - snapshot not allowed with -d"));
3366 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3367 		}
3368 		cp = strchr(chopprefix, '/');
3369 		if (cp == NULL)
3370 			cp = strchr(chopprefix, '@');
3371 		*cp = '\0';
3372 	} else if (strchr(tosnap, '@') == NULL) {
3373 		/*
3374 		 * If they specified a filesystem without -d, we want to
3375 		 * tack on everything after the fs specified in the
3376 		 * first name from the stream.
3377 		 */
3378 		cp = strchr(chopprefix, '@');
3379 		*cp = '\0';
3380 	}
3381 	choplen = strlen(chopprefix);
3382 
3383 	/*
3384 	 * Determine name of destination snapshot, store in zc_value.
3385 	 */
3386 	(void) strcpy(zc.zc_value, tosnap);
3387 	(void) strncat(zc.zc_value, drr.drr_u.drr_begin.drr_toname+choplen,
3388 	    sizeof (zc.zc_value));
3389 	if (!zfs_validate_name(hdl, zc.zc_value, ZFS_TYPE_SNAPSHOT, B_TRUE))
3390 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3391 
3392 	(void) strcpy(zc.zc_name, zc.zc_value);
3393 	if (drrb->drr_fromguid) {
3394 		/* incremental backup stream */
3395 		zfs_handle_t *h;
3396 
3397 		/* do the recvbackup ioctl to the containing fs */
3398 		*strchr(zc.zc_name, '@') = '\0';
3399 
3400 		/* make sure destination fs exists */
3401 		h = zfs_open(hdl, zc.zc_name,
3402 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3403 		if (h == NULL)
3404 			return (-1);
3405 		if (!dryrun && h->zfs_type == ZFS_TYPE_VOLUME) {
3406 			if (zvol_remove_link(hdl, h->zfs_name) != 0) {
3407 				zfs_close(h);
3408 				return (-1);
3409 			}
3410 		}
3411 		zfs_close(h);
3412 	} else {
3413 		/* full backup stream */
3414 
3415 		/* Make sure destination fs does not exist */
3416 		*strchr(zc.zc_name, '@') = '\0';
3417 		if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
3418 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3419 			    "destination '%s' exists"), zc.zc_name);
3420 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
3421 		}
3422 
3423 		if (strchr(zc.zc_name, '/') == NULL) {
3424 			/*
3425 			 * they're trying to do a recv into a
3426 			 * nonexistant topmost filesystem.
3427 			 */
3428 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3429 			    "destination does not exist"), zc.zc_name);
3430 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
3431 		}
3432 
3433 		/* Do the recvbackup ioctl to the fs's parent. */
3434 		*strrchr(zc.zc_name, '/') = '\0';
3435 
3436 		if (isprefix && (err = create_parents(hdl,
3437 		    zc.zc_value, strlen(tosnap))) != 0) {
3438 			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
3439 		}
3440 
3441 	}
3442 
3443 	zc.zc_cookie = infd;
3444 	zc.zc_guid = force;
3445 	if (verbose) {
3446 		(void) printf("%s %s stream of %s into %s\n",
3447 		    dryrun ? "would receive" : "receiving",
3448 		    drrb->drr_fromguid ? "incremental" : "full",
3449 		    drr.drr_u.drr_begin.drr_toname,
3450 		    zc.zc_value);
3451 		(void) fflush(stdout);
3452 	}
3453 	if (dryrun)
3454 		return (0);
3455 	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECVBACKUP, &zc);
3456 	if (ioctl_err != 0) {
3457 		switch (errno) {
3458 		case ENODEV:
3459 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3460 			    "most recent snapshot does not match incremental "
3461 			    "source"));
3462 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3463 			break;
3464 		case ETXTBSY:
3465 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3466 			    "destination has been modified since most recent "
3467 			    "snapshot"));
3468 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
3469 			break;
3470 		case EEXIST:
3471 			if (drrb->drr_fromguid == 0) {
3472 				/* it's the containing fs that exists */
3473 				cp = strchr(zc.zc_value, '@');
3474 				*cp = '\0';
3475 			}
3476 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3477 			    "destination already exists"));
3478 			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
3479 			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
3480 			    zc.zc_value);
3481 			break;
3482 		case EINVAL:
3483 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3484 			break;
3485 		case ECKSUM:
3486 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3487 			    "invalid stream (checksum mismatch)"));
3488 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3489 			break;
3490 		default:
3491 			(void) zfs_standard_error(hdl, errno, errbuf);
3492 		}
3493 	}
3494 
3495 	/*
3496 	 * Mount or recreate the /dev links for the target filesystem
3497 	 * (if created, or if we tore them down to do an incremental
3498 	 * restore), and the /dev links for the new snapshot (if
3499 	 * created). Also mount any children of the target filesystem
3500 	 * if we did an incremental receive.
3501 	 */
3502 	cp = strchr(zc.zc_value, '@');
3503 	if (cp && (ioctl_err == 0 || drrb->drr_fromguid)) {
3504 		zfs_handle_t *h;
3505 
3506 		*cp = '\0';
3507 		h = zfs_open(hdl, zc.zc_value,
3508 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3509 		*cp = '@';
3510 		if (h) {
3511 			if (h->zfs_type == ZFS_TYPE_VOLUME) {
3512 				err = zvol_create_link(hdl, h->zfs_name);
3513 				if (err == 0 && ioctl_err == 0)
3514 					err = zvol_create_link(hdl,
3515 					    zc.zc_value);
3516 			} else if (!drrb->drr_fromguid) {
3517 				err = zfs_mount(h, NULL, 0);
3518 			}
3519 		zfs_close(h);
3520 		}
3521 	}
3522 
3523 	if (err || ioctl_err)
3524 		return (-1);
3525 
3526 	if (verbose) {
3527 		char buf1[64];
3528 		char buf2[64];
3529 		uint64_t bytes = zc.zc_cookie;
3530 		time_t delta = time(NULL) - begin_time;
3531 		if (delta == 0)
3532 			delta = 1;
3533 		zfs_nicenum(bytes, buf1, sizeof (buf1));
3534 		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
3535 
3536 		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
3537 		    buf1, delta, buf2);
3538 	}
3539 
3540 	return (0);
3541 }
3542 
3543 /*
3544  * Destroy any more recent snapshots.  We invoke this callback on any dependents
3545  * of the snapshot first.  If the 'cb_dependent' member is non-zero, then this
3546  * is a dependent and we should just destroy it without checking the transaction
3547  * group.
3548  */
3549 typedef struct rollback_data {
3550 	const char	*cb_target;		/* the snapshot */
3551 	uint64_t	cb_create;		/* creation time reference */
3552 	prop_changelist_t *cb_clp;		/* changelist pointer */
3553 	int		cb_error;
3554 	boolean_t	cb_dependent;
3555 } rollback_data_t;
3556 
3557 static int
3558 rollback_destroy(zfs_handle_t *zhp, void *data)
3559 {
3560 	rollback_data_t *cbp = data;
3561 
3562 	if (!cbp->cb_dependent) {
3563 		if (strcmp(zhp->zfs_name, cbp->cb_target) != 0 &&
3564 		    zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
3565 		    zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG) >
3566 		    cbp->cb_create) {
3567 			char *logstr;
3568 
3569 			cbp->cb_dependent = B_TRUE;
3570 			if (zfs_iter_dependents(zhp, B_FALSE, rollback_destroy,
3571 			    cbp) != 0)
3572 				cbp->cb_error = 1;
3573 			cbp->cb_dependent = B_FALSE;
3574 
3575 			logstr = zhp->zfs_hdl->libzfs_log_str;
3576 			zhp->zfs_hdl->libzfs_log_str = NULL;
3577 			if (zfs_destroy(zhp) != 0)
3578 				cbp->cb_error = 1;
3579 			else
3580 				changelist_remove(zhp, cbp->cb_clp);
3581 			zhp->zfs_hdl->libzfs_log_str = logstr;
3582 		}
3583 	} else {
3584 		if (zfs_destroy(zhp) != 0)
3585 			cbp->cb_error = 1;
3586 		else
3587 			changelist_remove(zhp, cbp->cb_clp);
3588 	}
3589 
3590 	zfs_close(zhp);
3591 	return (0);
3592 }
3593 
3594 /*
3595  * Rollback the dataset to its latest snapshot.
3596  */
3597 static int
3598 do_rollback(zfs_handle_t *zhp)
3599 {
3600 	int ret;
3601 	zfs_cmd_t zc = { 0 };
3602 
3603 	assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM ||
3604 	    zhp->zfs_type == ZFS_TYPE_VOLUME);
3605 
3606 	if (zhp->zfs_type == ZFS_TYPE_VOLUME &&
3607 	    zvol_remove_link(zhp->zfs_hdl, zhp->zfs_name) != 0)
3608 		return (-1);
3609 
3610 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
3611 
3612 	if (ZFS_IS_VOLUME(zhp))
3613 		zc.zc_objset_type = DMU_OST_ZVOL;
3614 	else
3615 		zc.zc_objset_type = DMU_OST_ZFS;
3616 
3617 	/*
3618 	 * We rely on the consumer to verify that there are no newer snapshots
3619 	 * for the given dataset.  Given these constraints, we can simply pass
3620 	 * the name on to the ioctl() call.  There is still an unlikely race
3621 	 * condition where the user has taken a snapshot since we verified that
3622 	 * this was the most recent.
3623 	 */
3624 	if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_ROLLBACK, &zc)) != 0) {
3625 		(void) zfs_standard_error_fmt(zhp->zfs_hdl, errno,
3626 		    dgettext(TEXT_DOMAIN, "cannot rollback '%s'"),
3627 		    zhp->zfs_name);
3628 	} else if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
3629 		ret = zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
3630 	}
3631 
3632 	return (ret);
3633 }
3634 
3635 /*
3636  * Given a dataset, rollback to a specific snapshot, discarding any
3637  * data changes since then and making it the active dataset.
3638  *
3639  * Any snapshots more recent than the target are destroyed, along with
3640  * their dependents.
3641  */
3642 int
3643 zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, int flag)
3644 {
3645 	int ret;
3646 	rollback_data_t cb = { 0 };
3647 	prop_changelist_t *clp;
3648 
3649 	/*
3650 	 * Unmount all dependendents of the dataset and the dataset itself.
3651 	 * The list we need to gather is the same as for doing rename
3652 	 */
3653 	clp = changelist_gather(zhp, ZFS_PROP_NAME, flag ? MS_FORCE: 0);
3654 	if (clp == NULL)
3655 		return (-1);
3656 
3657 	if ((ret = changelist_prefix(clp)) != 0)
3658 		goto out;
3659 
3660 	/*
3661 	 * Destroy all recent snapshots and its dependends.
3662 	 */
3663 	cb.cb_target = snap->zfs_name;
3664 	cb.cb_create = zfs_prop_get_int(snap, ZFS_PROP_CREATETXG);
3665 	cb.cb_clp = clp;
3666 	(void) zfs_iter_children(zhp, rollback_destroy, &cb);
3667 
3668 	if ((ret = cb.cb_error) != 0) {
3669 		(void) changelist_postfix(clp);
3670 		goto out;
3671 	}
3672 
3673 	/*
3674 	 * Now that we have verified that the snapshot is the latest,
3675 	 * rollback to the given snapshot.
3676 	 */
3677 	ret = do_rollback(zhp);
3678 
3679 	if (ret != 0) {
3680 		(void) changelist_postfix(clp);
3681 		goto out;
3682 	}
3683 
3684 	/*
3685 	 * We only want to re-mount the filesystem if it was mounted in the
3686 	 * first place.
3687 	 */
3688 	ret = changelist_postfix(clp);
3689 
3690 out:
3691 	changelist_free(clp);
3692 	return (ret);
3693 }
3694 
3695 /*
3696  * Iterate over all dependents for a given dataset.  This includes both
3697  * hierarchical dependents (children) and data dependents (snapshots and
3698  * clones).  The bulk of the processing occurs in get_dependents() in
3699  * libzfs_graph.c.
3700  */
3701 int
3702 zfs_iter_dependents(zfs_handle_t *zhp, boolean_t allowrecursion,
3703     zfs_iter_f func, void *data)
3704 {
3705 	char **dependents;
3706 	size_t count;
3707 	int i;
3708 	zfs_handle_t *child;
3709 	int ret = 0;
3710 
3711 	if (get_dependents(zhp->zfs_hdl, allowrecursion, zhp->zfs_name,
3712 	    &dependents, &count) != 0)
3713 		return (-1);
3714 
3715 	for (i = 0; i < count; i++) {
3716 		if ((child = make_dataset_handle(zhp->zfs_hdl,
3717 		    dependents[i])) == NULL)
3718 			continue;
3719 
3720 		if ((ret = func(child, data)) != 0)
3721 			break;
3722 	}
3723 
3724 	for (i = 0; i < count; i++)
3725 		free(dependents[i]);
3726 	free(dependents);
3727 
3728 	return (ret);
3729 }
3730 
3731 /*
3732  * Renames the given dataset.
3733  */
3734 int
3735 zfs_rename(zfs_handle_t *zhp, const char *target, boolean_t recursive)
3736 {
3737 	int ret;
3738 	zfs_cmd_t zc = { 0 };
3739 	char *delim;
3740 	prop_changelist_t *cl = NULL;
3741 	zfs_handle_t *zhrp = NULL;
3742 	char *parentname = NULL;
3743 	char parent[ZFS_MAXNAMELEN];
3744 	libzfs_handle_t *hdl = zhp->zfs_hdl;
3745 	char errbuf[1024];
3746 
3747 	/* if we have the same exact name, just return success */
3748 	if (strcmp(zhp->zfs_name, target) == 0)
3749 		return (0);
3750 
3751 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3752 	    "cannot rename to '%s'"), target);
3753 
3754 	/*
3755 	 * Make sure the target name is valid
3756 	 */
3757 	if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT) {
3758 		if ((strchr(target, '@') == NULL) ||
3759 		    *target == '@') {
3760 			/*
3761 			 * Snapshot target name is abbreviated,
3762 			 * reconstruct full dataset name
3763 			 */
3764 			(void) strlcpy(parent, zhp->zfs_name,
3765 			    sizeof (parent));
3766 			delim = strchr(parent, '@');
3767 			if (strchr(target, '@') == NULL)
3768 				*(++delim) = '\0';
3769 			else
3770 				*delim = '\0';
3771 			(void) strlcat(parent, target, sizeof (parent));
3772 			target = parent;
3773 		} else {
3774 			/*
3775 			 * Make sure we're renaming within the same dataset.
3776 			 */
3777 			delim = strchr(target, '@');
3778 			if (strncmp(zhp->zfs_name, target, delim - target)
3779 			    != 0 || zhp->zfs_name[delim - target] != '@') {
3780 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3781 				    "snapshots must be part of same "
3782 				    "dataset"));
3783 				return (zfs_error(hdl, EZFS_CROSSTARGET,
3784 				    errbuf));
3785 			}
3786 		}
3787 		if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE))
3788 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3789 	} else {
3790 		if (recursive) {
3791 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3792 			    "recursive rename must be a snapshot"));
3793 			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
3794 		}
3795 
3796 		if (!zfs_validate_name(hdl, target, zhp->zfs_type, B_TRUE))
3797 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3798 		uint64_t unused;
3799 
3800 		/* validate parents */
3801 		if (check_parents(hdl, target, &unused, B_FALSE, NULL) != 0)
3802 			return (-1);
3803 
3804 		(void) parent_name(target, parent, sizeof (parent));
3805 
3806 		/* make sure we're in the same pool */
3807 		verify((delim = strchr(target, '/')) != NULL);
3808 		if (strncmp(zhp->zfs_name, target, delim - target) != 0 ||
3809 		    zhp->zfs_name[delim - target] != '/') {
3810 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3811 			    "datasets must be within same pool"));
3812 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
3813 		}
3814 
3815 		/* new name cannot be a child of the current dataset name */
3816 		if (strncmp(parent, zhp->zfs_name,
3817 		    strlen(zhp->zfs_name)) == 0) {
3818 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3819 			    "New dataset name cannot be a descendent of "
3820 			    "current dataset name"));
3821 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
3822 		}
3823 	}
3824 
3825 	(void) snprintf(errbuf, sizeof (errbuf),
3826 	    dgettext(TEXT_DOMAIN, "cannot rename '%s'"), zhp->zfs_name);
3827 
3828 	if (getzoneid() == GLOBAL_ZONEID &&
3829 	    zfs_prop_get_int(zhp, ZFS_PROP_ZONED)) {
3830 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3831 		    "dataset is used in a non-global zone"));
3832 		return (zfs_error(hdl, EZFS_ZONED, errbuf));
3833 	}
3834 
3835 	if (recursive) {
3836 		struct destroydata dd;
3837 
3838 		parentname = zfs_strdup(zhp->zfs_hdl, zhp->zfs_name);
3839 		if (parentname == NULL) {
3840 			ret = -1;
3841 			goto error;
3842 		}
3843 		delim = strchr(parentname, '@');
3844 		*delim = '\0';
3845 		zhrp = zfs_open(zhp->zfs_hdl, parentname, ZFS_TYPE_DATASET);
3846 		if (zhrp == NULL) {
3847 			ret = -1;
3848 			goto error;
3849 		}
3850 
3851 		dd.snapname = delim + 1;
3852 		dd.gotone = B_FALSE;
3853 		dd.closezhp = B_TRUE;
3854 
3855 		/* We remove any zvol links prior to renaming them */
3856 		ret = zfs_iter_filesystems(zhrp, zfs_remove_link_cb, &dd);
3857 		if (ret) {
3858 			goto error;
3859 		}
3860 	} else {
3861 		if ((cl = changelist_gather(zhp, ZFS_PROP_NAME, 0)) == NULL)
3862 			return (-1);
3863 
3864 		if (changelist_haszonedchild(cl)) {
3865 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3866 			    "child dataset with inherited mountpoint is used "
3867 			    "in a non-global zone"));
3868 			(void) zfs_error(hdl, EZFS_ZONED, errbuf);
3869 			goto error;
3870 		}
3871 
3872 		if ((ret = changelist_prefix(cl)) != 0)
3873 			goto error;
3874 	}
3875 
3876 	if (ZFS_IS_VOLUME(zhp))
3877 		zc.zc_objset_type = DMU_OST_ZVOL;
3878 	else
3879 		zc.zc_objset_type = DMU_OST_ZFS;
3880 
3881 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
3882 	(void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
3883 
3884 	zc.zc_cookie = recursive;
3885 
3886 	if ((ret = zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_RENAME, &zc)) != 0) {
3887 		/*
3888 		 * if it was recursive, the one that actually failed will
3889 		 * be in zc.zc_name
3890 		 */
3891 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3892 		    "cannot rename to '%s'"), zc.zc_name);
3893 
3894 		if (recursive && errno == EEXIST) {
3895 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3896 			    "a child dataset already has a snapshot "
3897 			    "with the new name"));
3898 			(void) zfs_error(hdl, EZFS_EXISTS, errbuf);
3899 		} else {
3900 			(void) zfs_standard_error(zhp->zfs_hdl, errno, errbuf);
3901 		}
3902 
3903 		/*
3904 		 * On failure, we still want to remount any filesystems that
3905 		 * were previously mounted, so we don't alter the system state.
3906 		 */
3907 		if (recursive) {
3908 			struct createdata cd;
3909 
3910 			/* only create links for datasets that had existed */
3911 			cd.cd_snapname = delim + 1;
3912 			cd.cd_ifexists = B_TRUE;
3913 			(void) zfs_iter_filesystems(zhrp, zfs_create_link_cb,
3914 			    &cd);
3915 		} else {
3916 			(void) changelist_postfix(cl);
3917 		}
3918 	} else {
3919 		if (recursive) {
3920 			struct createdata cd;
3921 
3922 			/* only create links for datasets that had existed */
3923 			cd.cd_snapname = strchr(target, '@') + 1;
3924 			cd.cd_ifexists = B_TRUE;
3925 			ret = zfs_iter_filesystems(zhrp, zfs_create_link_cb,
3926 			    &cd);
3927 		} else {
3928 			changelist_rename(cl, zfs_get_name(zhp), target);
3929 			ret = changelist_postfix(cl);
3930 		}
3931 	}
3932 
3933 error:
3934 	if (parentname) {
3935 		free(parentname);
3936 	}
3937 	if (zhrp) {
3938 		zfs_close(zhrp);
3939 	}
3940 	if (cl) {
3941 		changelist_free(cl);
3942 	}
3943 	return (ret);
3944 }
3945 
3946 /*
3947  * Given a zvol dataset, issue the ioctl to create the appropriate minor node,
3948  * poke devfsadm to create the /dev link, and then wait for the link to appear.
3949  */
3950 int
3951 zvol_create_link(libzfs_handle_t *hdl, const char *dataset)
3952 {
3953 	return (zvol_create_link_common(hdl, dataset, B_FALSE));
3954 }
3955 
3956 static int
3957 zvol_create_link_common(libzfs_handle_t *hdl, const char *dataset, int ifexists)
3958 {
3959 	zfs_cmd_t zc = { 0 };
3960 	di_devlink_handle_t dhdl;
3961 	priv_set_t *priv_effective;
3962 	int privileged;
3963 
3964 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
3965 
3966 	/*
3967 	 * Issue the appropriate ioctl.
3968 	 */
3969 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CREATE_MINOR, &zc) != 0) {
3970 		switch (errno) {
3971 		case EEXIST:
3972 			/*
3973 			 * Silently ignore the case where the link already
3974 			 * exists.  This allows 'zfs volinit' to be run multiple
3975 			 * times without errors.
3976 			 */
3977 			return (0);
3978 
3979 		case ENOENT:
3980 			/*
3981 			 * Dataset does not exist in the kernel.  If we
3982 			 * don't care (see zfs_rename), then ignore the
3983 			 * error quietly.
3984 			 */
3985 			if (ifexists) {
3986 				return (0);
3987 			}
3988 
3989 			/* FALLTHROUGH */
3990 
3991 		default:
3992 			return (zfs_standard_error_fmt(hdl, errno,
3993 			    dgettext(TEXT_DOMAIN, "cannot create device links "
3994 			    "for '%s'"), dataset));
3995 		}
3996 	}
3997 
3998 	/*
3999 	 * If privileged call devfsadm and wait for the links to
4000 	 * magically appear.
4001 	 * Otherwise, print out an informational message.
4002 	 */
4003 
4004 	priv_effective = priv_allocset();
4005 	(void) getppriv(PRIV_EFFECTIVE, priv_effective);
4006 	privileged = (priv_isfullset(priv_effective) == B_TRUE);
4007 	priv_freeset(priv_effective);
4008 
4009 	if (privileged) {
4010 		if ((dhdl = di_devlink_init(ZFS_DRIVER,
4011 		    DI_MAKE_LINK)) == NULL) {
4012 			zfs_error_aux(hdl, strerror(errno));
4013 			(void) zfs_standard_error_fmt(hdl, EZFS_DEVLINKS,
4014 			    dgettext(TEXT_DOMAIN, "cannot create device links "
4015 			    "for '%s'"), dataset);
4016 			(void) ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc);
4017 			return (-1);
4018 		} else {
4019 			(void) di_devlink_fini(&dhdl);
4020 		}
4021 	} else {
4022 		char pathname[MAXPATHLEN];
4023 		struct stat64 statbuf;
4024 		int i;
4025 
4026 #define	MAX_WAIT	10
4027 
4028 		/*
4029 		 * This is the poor mans way of waiting for the link
4030 		 * to show up.  If after 10 seconds we still don't
4031 		 * have it, then print out a message.
4032 		 */
4033 		(void) snprintf(pathname, sizeof (pathname), "/dev/zvol/dsk/%s",
4034 		    dataset);
4035 
4036 		for (i = 0; i != MAX_WAIT; i++) {
4037 			if (stat64(pathname, &statbuf) == 0)
4038 				break;
4039 			(void) sleep(1);
4040 		}
4041 		if (i == MAX_WAIT)
4042 			(void) printf(gettext("%s may not be immediately "
4043 			    "available\n"), pathname);
4044 	}
4045 
4046 	return (0);
4047 }
4048 
4049 /*
4050  * Remove a minor node for the given zvol and the associated /dev links.
4051  */
4052 int
4053 zvol_remove_link(libzfs_handle_t *hdl, const char *dataset)
4054 {
4055 	zfs_cmd_t zc = { 0 };
4056 
4057 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
4058 
4059 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_REMOVE_MINOR, &zc) != 0) {
4060 		switch (errno) {
4061 		case ENXIO:
4062 			/*
4063 			 * Silently ignore the case where the link no longer
4064 			 * exists, so that 'zfs volfini' can be run multiple
4065 			 * times without errors.
4066 			 */
4067 			return (0);
4068 
4069 		default:
4070 			return (zfs_standard_error_fmt(hdl, errno,
4071 			    dgettext(TEXT_DOMAIN, "cannot remove device "
4072 			    "links for '%s'"), dataset));
4073 		}
4074 	}
4075 
4076 	return (0);
4077 }
4078 
4079 nvlist_t *
4080 zfs_get_user_props(zfs_handle_t *zhp)
4081 {
4082 	return (zhp->zfs_user_props);
4083 }
4084 
4085 /*
4086  * This function is used by 'zfs list' to determine the exact set of columns to
4087  * display, and their maximum widths.  This does two main things:
4088  *
4089  *      - If this is a list of all properties, then expand the list to include
4090  *        all native properties, and set a flag so that for each dataset we look
4091  *        for new unique user properties and add them to the list.
4092  *
4093  *      - For non fixed-width properties, keep track of the maximum width seen
4094  *        so that we can size the column appropriately.
4095  */
4096 int
4097 zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp)
4098 {
4099 	libzfs_handle_t *hdl = zhp->zfs_hdl;
4100 	zprop_list_t *entry;
4101 	zprop_list_t **last, **start;
4102 	nvlist_t *userprops, *propval;
4103 	nvpair_t *elem;
4104 	char *strval;
4105 	char buf[ZFS_MAXPROPLEN];
4106 
4107 	if (zprop_expand_list(hdl, plp, ZFS_TYPE_DATASET) != 0)
4108 		return (-1);
4109 
4110 	userprops = zfs_get_user_props(zhp);
4111 
4112 	entry = *plp;
4113 	if (entry->pl_all && nvlist_next_nvpair(userprops, NULL) != NULL) {
4114 		/*
4115 		 * Go through and add any user properties as necessary.  We
4116 		 * start by incrementing our list pointer to the first
4117 		 * non-native property.
4118 		 */
4119 		start = plp;
4120 		while (*start != NULL) {
4121 			if ((*start)->pl_prop == ZPROP_INVAL)
4122 				break;
4123 			start = &(*start)->pl_next;
4124 		}
4125 
4126 		elem = NULL;
4127 		while ((elem = nvlist_next_nvpair(userprops, elem)) != NULL) {
4128 			/*
4129 			 * See if we've already found this property in our list.
4130 			 */
4131 			for (last = start; *last != NULL;
4132 			    last = &(*last)->pl_next) {
4133 				if (strcmp((*last)->pl_user_prop,
4134 				    nvpair_name(elem)) == 0)
4135 					break;
4136 			}
4137 
4138 			if (*last == NULL) {
4139 				if ((entry = zfs_alloc(hdl,
4140 				    sizeof (zprop_list_t))) == NULL ||
4141 				    ((entry->pl_user_prop = zfs_strdup(hdl,
4142 				    nvpair_name(elem)))) == NULL) {
4143 					free(entry);
4144 					return (-1);
4145 				}
4146 
4147 				entry->pl_prop = ZPROP_INVAL;
4148 				entry->pl_width = strlen(nvpair_name(elem));
4149 				entry->pl_all = B_TRUE;
4150 				*last = entry;
4151 			}
4152 		}
4153 	}
4154 
4155 	/*
4156 	 * Now go through and check the width of any non-fixed columns
4157 	 */
4158 	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
4159 		if (entry->pl_fixed)
4160 			continue;
4161 
4162 		if (entry->pl_prop != ZPROP_INVAL) {
4163 			if (zfs_prop_get(zhp, entry->pl_prop,
4164 			    buf, sizeof (buf), NULL, NULL, 0, B_FALSE) == 0) {
4165 				if (strlen(buf) > entry->pl_width)
4166 					entry->pl_width = strlen(buf);
4167 			}
4168 		} else if (nvlist_lookup_nvlist(userprops,
4169 		    entry->pl_user_prop, &propval)  == 0) {
4170 			verify(nvlist_lookup_string(propval,
4171 			    ZPROP_VALUE, &strval) == 0);
4172 			if (strlen(strval) > entry->pl_width)
4173 				entry->pl_width = strlen(strval);
4174 		}
4175 	}
4176 
4177 	return (0);
4178 }
4179 
4180 int
4181 zfs_iscsi_perm_check(libzfs_handle_t *hdl, char *dataset, ucred_t *cred)
4182 {
4183 	zfs_cmd_t zc = { 0 };
4184 	nvlist_t *nvp;
4185 	gid_t gid;
4186 	uid_t uid;
4187 	const gid_t *groups;
4188 	int group_cnt;
4189 	int error;
4190 
4191 	if (nvlist_alloc(&nvp, NV_UNIQUE_NAME, 0) != 0)
4192 		return (no_memory(hdl));
4193 
4194 	uid = ucred_geteuid(cred);
4195 	gid = ucred_getegid(cred);
4196 	group_cnt = ucred_getgroups(cred, &groups);
4197 
4198 	if (uid == (uid_t)-1 || gid == (uid_t)-1 || group_cnt == (uid_t)-1)
4199 		return (1);
4200 
4201 	if (nvlist_add_uint32(nvp, ZFS_DELEG_PERM_UID, uid) != 0) {
4202 		nvlist_free(nvp);
4203 		return (1);
4204 	}
4205 
4206 	if (nvlist_add_uint32(nvp, ZFS_DELEG_PERM_GID, gid) != 0) {
4207 		nvlist_free(nvp);
4208 		return (1);
4209 	}
4210 
4211 	if (nvlist_add_uint32_array(nvp,
4212 	    ZFS_DELEG_PERM_GROUPS, (uint32_t *)groups, group_cnt) != 0) {
4213 		nvlist_free(nvp);
4214 		return (1);
4215 	}
4216 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
4217 
4218 	if (zcmd_write_src_nvlist(hdl, &zc, nvp))
4219 		return (-1);
4220 
4221 	error = ioctl(hdl->libzfs_fd, ZFS_IOC_ISCSI_PERM_CHECK, &zc);
4222 	nvlist_free(nvp);
4223 	return (error);
4224 }
4225 
4226 int
4227 zfs_deleg_share_nfs(libzfs_handle_t *hdl, char *dataset, char *path,
4228     void *export, void *sharetab, int sharemax, zfs_share_op_t operation)
4229 {
4230 	zfs_cmd_t zc = { 0 };
4231 	int error;
4232 
4233 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
4234 	(void) strlcpy(zc.zc_value, path, sizeof (zc.zc_value));
4235 	zc.zc_share.z_sharedata = (uint64_t)(uintptr_t)sharetab;
4236 	zc.zc_share.z_exportdata = (uint64_t)(uintptr_t)export;
4237 	zc.zc_share.z_sharetype = operation;
4238 	zc.zc_share.z_sharemax = sharemax;
4239 
4240 	error = ioctl(hdl->libzfs_fd, ZFS_IOC_SHARE, &zc);
4241 	return (error);
4242 }
4243