xref: /freebsd/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c (revision f73124b077d867990cbcb4d903b48be2ca55e4ca)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2014, 2022 by Delphix. All rights reserved.
26  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
27  * Copyright 2017 RackTop Systems.
28  * Copyright (c) 2018 Datto Inc.
29  * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
30  */
31 
32 /*
33  * Routines to manage ZFS mounts.  We separate all the nasty routines that have
34  * to deal with the OS.  The following functions are the main entry points --
35  * they are used by mount and unmount and when changing a filesystem's
36  * mountpoint.
37  *
38  *	zfs_is_mounted()
39  *	zfs_mount()
40  *	zfs_mount_at()
41  *	zfs_unmount()
42  *	zfs_unmountall()
43  *
44  * This file also contains the functions used to manage sharing filesystems:
45  *
46  *	zfs_is_shared()
47  *	zfs_share()
48  *	zfs_unshare()
49  *	zfs_unshareall()
50  *	zfs_commit_shares()
51  *
52  * The following functions are available for pool consumers, and will
53  * mount/unmount and share/unshare all datasets within pool:
54  *
55  *	zpool_enable_datasets()
56  *	zpool_disable_datasets()
57  */
58 
59 #include <dirent.h>
60 #include <dlfcn.h>
61 #include <errno.h>
62 #include <fcntl.h>
63 #include <libgen.h>
64 #include <libintl.h>
65 #include <stdio.h>
66 #include <stdlib.h>
67 #include <string.h>
68 #include <unistd.h>
69 #include <zone.h>
70 #include <sys/mntent.h>
71 #include <sys/mount.h>
72 #include <sys/stat.h>
73 #include <sys/vfs.h>
74 #include <sys/dsl_crypt.h>
75 
76 #include <libzfs.h>
77 #include <libzutil.h>
78 
79 #include "libzfs_impl.h"
80 #include <thread_pool.h>
81 
82 #include <libshare.h>
83 #include <sys/systeminfo.h>
84 #define	MAXISALEN	257	/* based on sysinfo(2) man page */
85 
86 static int mount_tp_nthr = 512;	/* tpool threads for multi-threaded mounting */
87 
88 static void zfs_mount_task(void *);
89 
90 static const proto_table_t proto_table[SA_PROTOCOL_COUNT] = {
91 	[SA_PROTOCOL_NFS] =
92 	    {ZFS_PROP_SHARENFS, EZFS_SHARENFSFAILED, EZFS_UNSHARENFSFAILED},
93 	[SA_PROTOCOL_SMB] =
94 	    {ZFS_PROP_SHARESMB, EZFS_SHARESMBFAILED, EZFS_UNSHARESMBFAILED},
95 };
96 
97 static const enum sa_protocol share_all_proto[SA_PROTOCOL_COUNT + 1] = {
98 	SA_PROTOCOL_NFS,
99 	SA_PROTOCOL_SMB,
100 	SA_NO_PROTOCOL
101 };
102 
103 
104 
105 static boolean_t
106 dir_is_empty_stat(const char *dirname)
107 {
108 	struct stat st;
109 
110 	/*
111 	 * We only want to return false if the given path is a non empty
112 	 * directory, all other errors are handled elsewhere.
113 	 */
114 	if (stat(dirname, &st) < 0 || !S_ISDIR(st.st_mode)) {
115 		return (B_TRUE);
116 	}
117 
118 	/*
119 	 * An empty directory will still have two entries in it, one
120 	 * entry for each of "." and "..".
121 	 */
122 	if (st.st_size > 2) {
123 		return (B_FALSE);
124 	}
125 
126 	return (B_TRUE);
127 }
128 
129 static boolean_t
130 dir_is_empty_readdir(const char *dirname)
131 {
132 	DIR *dirp;
133 	struct dirent64 *dp;
134 	int dirfd;
135 
136 	if ((dirfd = openat(AT_FDCWD, dirname,
137 	    O_RDONLY | O_NDELAY | O_LARGEFILE | O_CLOEXEC, 0)) < 0) {
138 		return (B_TRUE);
139 	}
140 
141 	if ((dirp = fdopendir(dirfd)) == NULL) {
142 		(void) close(dirfd);
143 		return (B_TRUE);
144 	}
145 
146 	while ((dp = readdir64(dirp)) != NULL) {
147 
148 		if (strcmp(dp->d_name, ".") == 0 ||
149 		    strcmp(dp->d_name, "..") == 0)
150 			continue;
151 
152 		(void) closedir(dirp);
153 		return (B_FALSE);
154 	}
155 
156 	(void) closedir(dirp);
157 	return (B_TRUE);
158 }
159 
160 /*
161  * Returns true if the specified directory is empty.  If we can't open the
162  * directory at all, return true so that the mount can fail with a more
163  * informative error message.
164  */
165 static boolean_t
166 dir_is_empty(const char *dirname)
167 {
168 	struct statfs64 st;
169 
170 	/*
171 	 * If the statvfs call fails or the filesystem is not a ZFS
172 	 * filesystem, fall back to the slow path which uses readdir.
173 	 */
174 	if ((statfs64(dirname, &st) != 0) ||
175 	    (st.f_type != ZFS_SUPER_MAGIC)) {
176 		return (dir_is_empty_readdir(dirname));
177 	}
178 
179 	/*
180 	 * At this point, we know the provided path is on a ZFS
181 	 * filesystem, so we can use stat instead of readdir to
182 	 * determine if the directory is empty or not. We try to avoid
183 	 * using readdir because that requires opening "dirname"; this
184 	 * open file descriptor can potentially end up in a child
185 	 * process if there's a concurrent fork, thus preventing the
186 	 * zfs_mount() from otherwise succeeding (the open file
187 	 * descriptor inherited by the child process will cause the
188 	 * parent's mount to fail with EBUSY). The performance
189 	 * implications of replacing the open, read, and close with a
190 	 * single stat is nice; but is not the main motivation for the
191 	 * added complexity.
192 	 */
193 	return (dir_is_empty_stat(dirname));
194 }
195 
196 /*
197  * Checks to see if the mount is active.  If the filesystem is mounted, we fill
198  * in 'where' with the current mountpoint, and return 1.  Otherwise, we return
199  * 0.
200  */
201 boolean_t
202 is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)
203 {
204 	struct mnttab entry;
205 
206 	if (libzfs_mnttab_find(zfs_hdl, special, &entry) != 0)
207 		return (B_FALSE);
208 
209 	if (where != NULL)
210 		*where = zfs_strdup(zfs_hdl, entry.mnt_mountp);
211 
212 	return (B_TRUE);
213 }
214 
215 boolean_t
216 zfs_is_mounted(zfs_handle_t *zhp, char **where)
217 {
218 	return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where));
219 }
220 
221 /*
222  * Checks any higher order concerns about whether the given dataset is
223  * mountable, false otherwise.  zfs_is_mountable_internal specifically assumes
224  * that the caller has verified the sanity of mounting the dataset at
225  * its mountpoint to the extent the caller wants.
226  */
227 static boolean_t
228 zfs_is_mountable_internal(zfs_handle_t *zhp)
229 {
230 	if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
231 	    getzoneid() == GLOBAL_ZONEID)
232 		return (B_FALSE);
233 
234 	return (B_TRUE);
235 }
236 
237 /*
238  * Returns true if the given dataset is mountable, false otherwise.  Returns the
239  * mountpoint in 'buf'.
240  */
241 static boolean_t
242 zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,
243     zprop_source_t *source, int flags)
244 {
245 	char sourceloc[MAXNAMELEN];
246 	zprop_source_t sourcetype;
247 
248 	if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type,
249 	    B_FALSE))
250 		return (B_FALSE);
251 
252 	verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen,
253 	    &sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0);
254 
255 	if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 ||
256 	    strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0)
257 		return (B_FALSE);
258 
259 	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF)
260 		return (B_FALSE);
261 
262 	if (!zfs_is_mountable_internal(zhp))
263 		return (B_FALSE);
264 
265 	if (zfs_prop_get_int(zhp, ZFS_PROP_REDACTED) && !(flags & MS_FORCE))
266 		return (B_FALSE);
267 
268 	if (source)
269 		*source = sourcetype;
270 
271 	return (B_TRUE);
272 }
273 
274 /*
275  * The filesystem is mounted by invoking the system mount utility rather
276  * than by the system call mount(2).  This ensures that the /etc/mtab
277  * file is correctly locked for the update.  Performing our own locking
278  * and /etc/mtab update requires making an unsafe assumption about how
279  * the mount utility performs its locking.  Unfortunately, this also means
280  * in the case of a mount failure we do not have the exact errno.  We must
281  * make due with return value from the mount process.
282  *
283  * In the long term a shared library called libmount is under development
284  * which provides a common API to address the locking and errno issues.
285  * Once the standard mount utility has been updated to use this library
286  * we can add an autoconf check to conditionally use it.
287  *
288  * http://www.kernel.org/pub/linux/utils/util-linux/libmount-docs/index.html
289  */
290 
291 static int
292 zfs_add_option(zfs_handle_t *zhp, char *options, int len,
293     zfs_prop_t prop, const char *on, const char *off)
294 {
295 	const char *source;
296 	uint64_t value;
297 
298 	/* Skip adding duplicate default options */
299 	if ((strstr(options, on) != NULL) || (strstr(options, off) != NULL))
300 		return (0);
301 
302 	/*
303 	 * zfs_prop_get_int() is not used to ensure our mount options
304 	 * are not influenced by the current /proc/self/mounts contents.
305 	 */
306 	value = getprop_uint64(zhp, prop, &source);
307 
308 	(void) strlcat(options, ",", len);
309 	(void) strlcat(options, value ? on : off, len);
310 
311 	return (0);
312 }
313 
314 static int
315 zfs_add_options(zfs_handle_t *zhp, char *options, int len)
316 {
317 	int error = 0;
318 
319 	error = zfs_add_option(zhp, options, len,
320 	    ZFS_PROP_ATIME, MNTOPT_ATIME, MNTOPT_NOATIME);
321 	/*
322 	 * don't add relatime/strictatime when atime=off, otherwise strictatime
323 	 * will force atime=on
324 	 */
325 	if (strstr(options, MNTOPT_NOATIME) == NULL) {
326 		error = zfs_add_option(zhp, options, len,
327 		    ZFS_PROP_RELATIME, MNTOPT_RELATIME, MNTOPT_STRICTATIME);
328 	}
329 	error = error ? error : zfs_add_option(zhp, options, len,
330 	    ZFS_PROP_DEVICES, MNTOPT_DEVICES, MNTOPT_NODEVICES);
331 	error = error ? error : zfs_add_option(zhp, options, len,
332 	    ZFS_PROP_EXEC, MNTOPT_EXEC, MNTOPT_NOEXEC);
333 	error = error ? error : zfs_add_option(zhp, options, len,
334 	    ZFS_PROP_READONLY, MNTOPT_RO, MNTOPT_RW);
335 	error = error ? error : zfs_add_option(zhp, options, len,
336 	    ZFS_PROP_SETUID, MNTOPT_SETUID, MNTOPT_NOSETUID);
337 	error = error ? error : zfs_add_option(zhp, options, len,
338 	    ZFS_PROP_NBMAND, MNTOPT_NBMAND, MNTOPT_NONBMAND);
339 
340 	return (error);
341 }
342 
343 int
344 zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
345 {
346 	char mountpoint[ZFS_MAXPROPLEN];
347 
348 	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL,
349 	    flags))
350 		return (0);
351 
352 	return (zfs_mount_at(zhp, options, flags, mountpoint));
353 }
354 
355 /*
356  * Mount the given filesystem.
357  */
358 int
359 zfs_mount_at(zfs_handle_t *zhp, const char *options, int flags,
360     const char *mountpoint)
361 {
362 	struct stat buf;
363 	char mntopts[MNT_LINE_MAX];
364 	char overlay[ZFS_MAXPROPLEN];
365 	char prop_encroot[MAXNAMELEN];
366 	boolean_t is_encroot;
367 	zfs_handle_t *encroot_hp = zhp;
368 	libzfs_handle_t *hdl = zhp->zfs_hdl;
369 	uint64_t keystatus;
370 	int remount = 0, rc;
371 
372 	if (options == NULL) {
373 		(void) strlcpy(mntopts, MNTOPT_DEFAULTS, sizeof (mntopts));
374 	} else {
375 		(void) strlcpy(mntopts, options, sizeof (mntopts));
376 	}
377 
378 	if (strstr(mntopts, MNTOPT_REMOUNT) != NULL)
379 		remount = 1;
380 
381 	/* Potentially duplicates some checks if invoked by zfs_mount(). */
382 	if (!zfs_is_mountable_internal(zhp))
383 		return (0);
384 
385 	/*
386 	 * If the pool is imported read-only then all mounts must be read-only
387 	 */
388 	if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL))
389 		(void) strlcat(mntopts, "," MNTOPT_RO, sizeof (mntopts));
390 
391 	/*
392 	 * Append default mount options which apply to the mount point.
393 	 * This is done because under Linux (unlike Solaris) multiple mount
394 	 * points may reference a single super block.  This means that just
395 	 * given a super block there is no back reference to update the per
396 	 * mount point options.
397 	 */
398 	rc = zfs_add_options(zhp, mntopts, sizeof (mntopts));
399 	if (rc) {
400 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
401 		    "default options unavailable"));
402 		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
403 		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
404 		    mountpoint));
405 	}
406 
407 	/*
408 	 * If the filesystem is encrypted the key must be loaded  in order to
409 	 * mount. If the key isn't loaded, the MS_CRYPT flag decides whether
410 	 * or not we attempt to load the keys. Note: we must call
411 	 * zfs_refresh_properties() here since some callers of this function
412 	 * (most notably zpool_enable_datasets()) may implicitly load our key
413 	 * by loading the parent's key first.
414 	 */
415 	if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
416 		zfs_refresh_properties(zhp);
417 		keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
418 
419 		/*
420 		 * If the key is unavailable and MS_CRYPT is set give the
421 		 * user a chance to enter the key. Otherwise just fail
422 		 * immediately.
423 		 */
424 		if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) {
425 			if (flags & MS_CRYPT) {
426 				rc = zfs_crypto_get_encryption_root(zhp,
427 				    &is_encroot, prop_encroot);
428 				if (rc) {
429 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
430 					    "Failed to get encryption root for "
431 					    "'%s'."), zfs_get_name(zhp));
432 					return (rc);
433 				}
434 
435 				if (!is_encroot) {
436 					encroot_hp = zfs_open(hdl, prop_encroot,
437 					    ZFS_TYPE_DATASET);
438 					if (encroot_hp == NULL)
439 						return (hdl->libzfs_error);
440 				}
441 
442 				rc = zfs_crypto_load_key(encroot_hp,
443 				    B_FALSE, NULL);
444 
445 				if (!is_encroot)
446 					zfs_close(encroot_hp);
447 				if (rc)
448 					return (rc);
449 			} else {
450 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
451 				    "encryption key not loaded"));
452 				return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
453 				    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
454 				    mountpoint));
455 			}
456 		}
457 
458 	}
459 
460 	/*
461 	 * Append zfsutil option so the mount helper allow the mount
462 	 */
463 	strlcat(mntopts, "," MNTOPT_ZFSUTIL, sizeof (mntopts));
464 
465 	/* Create the directory if it doesn't already exist */
466 	if (lstat(mountpoint, &buf) != 0) {
467 		if (mkdirp(mountpoint, 0755) != 0) {
468 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
469 			    "failed to create mountpoint: %s"),
470 			    zfs_strerror(errno));
471 			return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
472 			    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
473 			    mountpoint));
474 		}
475 	}
476 
477 	/*
478 	 * Overlay mounts are enabled by default but may be disabled
479 	 * via the 'overlay' property. The -O flag remains for compatibility.
480 	 */
481 	if (!(flags & MS_OVERLAY)) {
482 		if (zfs_prop_get(zhp, ZFS_PROP_OVERLAY, overlay,
483 		    sizeof (overlay), NULL, NULL, 0, B_FALSE) == 0) {
484 			if (strcmp(overlay, "on") == 0) {
485 				flags |= MS_OVERLAY;
486 			}
487 		}
488 	}
489 
490 	/*
491 	 * Determine if the mountpoint is empty.  If so, refuse to perform the
492 	 * mount.  We don't perform this check if 'remount' is
493 	 * specified or if overlay option (-O) is given
494 	 */
495 	if ((flags & MS_OVERLAY) == 0 && !remount &&
496 	    !dir_is_empty(mountpoint)) {
497 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
498 		    "directory is not empty"));
499 		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
500 		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint));
501 	}
502 
503 	/* perform the mount */
504 	rc = do_mount(zhp, mountpoint, mntopts, flags);
505 	if (rc) {
506 		/*
507 		 * Generic errors are nasty, but there are just way too many
508 		 * from mount(), and they're well-understood.  We pick a few
509 		 * common ones to improve upon.
510 		 */
511 		if (rc == EBUSY) {
512 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
513 			    "mountpoint or dataset is busy"));
514 		} else if (rc == EPERM) {
515 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
516 			    "Insufficient privileges"));
517 		} else if (rc == ENOTSUP) {
518 			int spa_version;
519 
520 			VERIFY(zfs_spa_version(zhp, &spa_version) == 0);
521 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
522 			    "Can't mount a version %llu "
523 			    "file system on a version %d pool. Pool must be"
524 			    " upgraded to mount this file system."),
525 			    (u_longlong_t)zfs_prop_get_int(zhp,
526 			    ZFS_PROP_VERSION), spa_version);
527 		} else {
528 			zfs_error_aux(hdl, "%s", zfs_strerror(rc));
529 		}
530 		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
531 		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
532 		    zhp->zfs_name));
533 	}
534 
535 	/* remove the mounted entry before re-adding on remount */
536 	if (remount)
537 		libzfs_mnttab_remove(hdl, zhp->zfs_name);
538 
539 	/* add the mounted entry into our cache */
540 	libzfs_mnttab_add(hdl, zfs_get_name(zhp), mountpoint, mntopts);
541 	return (0);
542 }
543 
544 /*
545  * Unmount a single filesystem.
546  */
547 static int
548 unmount_one(zfs_handle_t *zhp, const char *mountpoint, int flags)
549 {
550 	int error;
551 
552 	error = do_unmount(zhp, mountpoint, flags);
553 	if (error != 0) {
554 		int libzfs_err;
555 
556 		switch (error) {
557 		case EBUSY:
558 			libzfs_err = EZFS_BUSY;
559 			break;
560 		case EIO:
561 			libzfs_err = EZFS_IO;
562 			break;
563 		case ENOENT:
564 			libzfs_err = EZFS_NOENT;
565 			break;
566 		case ENOMEM:
567 			libzfs_err = EZFS_NOMEM;
568 			break;
569 		case EPERM:
570 			libzfs_err = EZFS_PERM;
571 			break;
572 		default:
573 			libzfs_err = EZFS_UMOUNTFAILED;
574 		}
575 		if (zhp) {
576 			return (zfs_error_fmt(zhp->zfs_hdl, libzfs_err,
577 			    dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
578 			    mountpoint));
579 		} else {
580 			return (-1);
581 		}
582 	}
583 
584 	return (0);
585 }
586 
587 /*
588  * Unmount the given filesystem.
589  */
590 int
591 zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
592 {
593 	libzfs_handle_t *hdl = zhp->zfs_hdl;
594 	struct mnttab entry;
595 	char *mntpt = NULL;
596 	boolean_t encroot, unmounted = B_FALSE;
597 
598 	/* check to see if we need to unmount the filesystem */
599 	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
600 	    libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0)) {
601 		/*
602 		 * mountpoint may have come from a call to
603 		 * getmnt/getmntany if it isn't NULL. If it is NULL,
604 		 * we know it comes from libzfs_mnttab_find which can
605 		 * then get freed later. We strdup it to play it safe.
606 		 */
607 		if (mountpoint == NULL)
608 			mntpt = zfs_strdup(hdl, entry.mnt_mountp);
609 		else
610 			mntpt = zfs_strdup(hdl, mountpoint);
611 
612 		/*
613 		 * Unshare and unmount the filesystem
614 		 */
615 		if (zfs_unshare(zhp, mntpt, share_all_proto) != 0) {
616 			free(mntpt);
617 			return (-1);
618 		}
619 		zfs_commit_shares(NULL);
620 
621 		if (unmount_one(zhp, mntpt, flags) != 0) {
622 			free(mntpt);
623 			(void) zfs_share(zhp, NULL);
624 			zfs_commit_shares(NULL);
625 			return (-1);
626 		}
627 
628 		libzfs_mnttab_remove(hdl, zhp->zfs_name);
629 		free(mntpt);
630 		unmounted = B_TRUE;
631 	}
632 
633 	/*
634 	 * If the MS_CRYPT flag is provided we must ensure we attempt to
635 	 * unload the dataset's key regardless of whether we did any work
636 	 * to unmount it. We only do this for encryption roots.
637 	 */
638 	if ((flags & MS_CRYPT) != 0 &&
639 	    zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
640 		zfs_refresh_properties(zhp);
641 
642 		if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0 &&
643 		    unmounted) {
644 			(void) zfs_mount(zhp, NULL, 0);
645 			return (-1);
646 		}
647 
648 		if (encroot && zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
649 		    ZFS_KEYSTATUS_AVAILABLE &&
650 		    zfs_crypto_unload_key(zhp) != 0) {
651 			(void) zfs_mount(zhp, NULL, 0);
652 			return (-1);
653 		}
654 	}
655 
656 	zpool_disable_volume_os(zhp->zfs_name);
657 
658 	return (0);
659 }
660 
661 /*
662  * Unmount this filesystem and any children inheriting the mountpoint property.
663  * To do this, just act like we're changing the mountpoint property, but don't
664  * remount the filesystems afterwards.
665  */
666 int
667 zfs_unmountall(zfs_handle_t *zhp, int flags)
668 {
669 	prop_changelist_t *clp;
670 	int ret;
671 
672 	clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
673 	    CL_GATHER_ITER_MOUNTED, flags);
674 	if (clp == NULL)
675 		return (-1);
676 
677 	ret = changelist_prefix(clp);
678 	changelist_free(clp);
679 
680 	return (ret);
681 }
682 
683 /*
684  * Unshare a filesystem by mountpoint.
685  */
686 static int
687 unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint,
688     enum sa_protocol proto)
689 {
690 	int err = sa_disable_share(mountpoint, proto);
691 	if (err != SA_OK)
692 		return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
693 		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
694 		    name, sa_errorstr(err)));
695 
696 	return (0);
697 }
698 
699 /*
700  * Share the given filesystem according to the options in the specified
701  * protocol specific properties (sharenfs, sharesmb).  We rely
702  * on "libshare" to do the dirty work for us.
703  */
704 int
705 zfs_share(zfs_handle_t *zhp, const enum sa_protocol *proto)
706 {
707 	char mountpoint[ZFS_MAXPROPLEN];
708 	char shareopts[ZFS_MAXPROPLEN];
709 	char sourcestr[ZFS_MAXPROPLEN];
710 	const enum sa_protocol *curr_proto;
711 	zprop_source_t sourcetype;
712 	int err = 0;
713 
714 	if (proto == NULL)
715 		proto = share_all_proto;
716 
717 	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL, 0))
718 		return (0);
719 
720 	for (curr_proto = proto; *curr_proto != SA_NO_PROTOCOL; curr_proto++) {
721 		/*
722 		 * Return success if there are no share options.
723 		 */
724 		if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,
725 		    shareopts, sizeof (shareopts), &sourcetype, sourcestr,
726 		    ZFS_MAXPROPLEN, B_FALSE) != 0 ||
727 		    strcmp(shareopts, "off") == 0)
728 			continue;
729 
730 		/*
731 		 * If the 'zoned' property is set, then zfs_is_mountable()
732 		 * will have already bailed out if we are in the global zone.
733 		 * But local zones cannot be NFS servers, so we ignore it for
734 		 * local zones as well.
735 		 */
736 		if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
737 			continue;
738 
739 		err = sa_enable_share(zfs_get_name(zhp), mountpoint, shareopts,
740 		    *curr_proto);
741 		if (err != SA_OK) {
742 			return (zfs_error_fmt(zhp->zfs_hdl,
743 			    proto_table[*curr_proto].p_share_err,
744 			    dgettext(TEXT_DOMAIN, "cannot share '%s: %s'"),
745 			    zfs_get_name(zhp), sa_errorstr(err)));
746 		}
747 
748 	}
749 	return (0);
750 }
751 
752 /*
753  * Check to see if the filesystem is currently shared.
754  */
755 boolean_t
756 zfs_is_shared(zfs_handle_t *zhp, char **where,
757     const enum sa_protocol *proto)
758 {
759 	char *mountpoint;
760 	if (proto == NULL)
761 		proto = share_all_proto;
762 
763 	if (ZFS_IS_VOLUME(zhp))
764 		return (B_FALSE);
765 
766 	if (!zfs_is_mounted(zhp, &mountpoint))
767 		return (B_FALSE);
768 
769 	for (const enum sa_protocol *p = proto; *p != SA_NO_PROTOCOL; ++p)
770 		if (sa_is_shared(mountpoint, *p)) {
771 			if (where != NULL)
772 				*where = mountpoint;
773 			else
774 				free(mountpoint);
775 			return (B_TRUE);
776 		}
777 
778 	free(mountpoint);
779 	return (B_FALSE);
780 }
781 
782 void
783 zfs_commit_shares(const enum sa_protocol *proto)
784 {
785 	if (proto == NULL)
786 		proto = share_all_proto;
787 
788 	for (const enum sa_protocol *p = proto; *p != SA_NO_PROTOCOL; ++p)
789 		sa_commit_shares(*p);
790 }
791 
792 void
793 zfs_truncate_shares(const enum sa_protocol *proto)
794 {
795 	if (proto == NULL)
796 		proto = share_all_proto;
797 
798 	for (const enum sa_protocol *p = proto; *p != SA_NO_PROTOCOL; ++p)
799 		sa_truncate_shares(*p);
800 }
801 
802 /*
803  * Unshare the given filesystem.
804  */
805 int
806 zfs_unshare(zfs_handle_t *zhp, const char *mountpoint,
807     const enum sa_protocol *proto)
808 {
809 	libzfs_handle_t *hdl = zhp->zfs_hdl;
810 	struct mnttab entry;
811 
812 	if (proto == NULL)
813 		proto = share_all_proto;
814 
815 	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
816 	    libzfs_mnttab_find(hdl, zfs_get_name(zhp), &entry) == 0)) {
817 
818 		/* check to see if need to unmount the filesystem */
819 		const char *mntpt = mountpoint ?: entry.mnt_mountp;
820 
821 		for (const enum sa_protocol *curr_proto = proto;
822 		    *curr_proto != SA_NO_PROTOCOL; curr_proto++)
823 			if (sa_is_shared(mntpt, *curr_proto) &&
824 			    unshare_one(hdl, zhp->zfs_name,
825 			    mntpt, *curr_proto) != 0)
826 					return (-1);
827 	}
828 
829 	return (0);
830 }
831 
832 /*
833  * Same as zfs_unmountall(), but for NFS and SMB unshares.
834  */
835 int
836 zfs_unshareall(zfs_handle_t *zhp, const enum sa_protocol *proto)
837 {
838 	prop_changelist_t *clp;
839 	int ret;
840 
841 	if (proto == NULL)
842 		proto = share_all_proto;
843 
844 	clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0, 0);
845 	if (clp == NULL)
846 		return (-1);
847 
848 	ret = changelist_unshare(clp, proto);
849 	changelist_free(clp);
850 
851 	return (ret);
852 }
853 
854 /*
855  * Remove the mountpoint associated with the current dataset, if necessary.
856  * We only remove the underlying directory if:
857  *
858  *	- The mountpoint is not 'none' or 'legacy'
859  *	- The mountpoint is non-empty
860  *	- The mountpoint is the default or inherited
861  *	- The 'zoned' property is set, or we're in a local zone
862  *
863  * Any other directories we leave alone.
864  */
865 void
866 remove_mountpoint(zfs_handle_t *zhp)
867 {
868 	char mountpoint[ZFS_MAXPROPLEN];
869 	zprop_source_t source;
870 
871 	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint),
872 	    &source, 0))
873 		return;
874 
875 	if (source == ZPROP_SRC_DEFAULT ||
876 	    source == ZPROP_SRC_INHERITED) {
877 		/*
878 		 * Try to remove the directory, silently ignoring any errors.
879 		 * The filesystem may have since been removed or moved around,
880 		 * and this error isn't really useful to the administrator in
881 		 * any way.
882 		 */
883 		(void) rmdir(mountpoint);
884 	}
885 }
886 
887 /*
888  * Add the given zfs handle to the cb_handles array, dynamically reallocating
889  * the array if it is out of space.
890  */
891 void
892 libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp)
893 {
894 	if (cbp->cb_alloc == cbp->cb_used) {
895 		size_t newsz;
896 		zfs_handle_t **newhandles;
897 
898 		newsz = cbp->cb_alloc != 0 ? cbp->cb_alloc * 2 : 64;
899 		newhandles = zfs_realloc(zhp->zfs_hdl,
900 		    cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *),
901 		    newsz * sizeof (zfs_handle_t *));
902 		cbp->cb_handles = newhandles;
903 		cbp->cb_alloc = newsz;
904 	}
905 	cbp->cb_handles[cbp->cb_used++] = zhp;
906 }
907 
908 /*
909  * Recursive helper function used during file system enumeration
910  */
911 static int
912 zfs_iter_cb(zfs_handle_t *zhp, void *data)
913 {
914 	get_all_cb_t *cbp = data;
915 
916 	if (!(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM)) {
917 		zfs_close(zhp);
918 		return (0);
919 	}
920 
921 	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_NOAUTO) {
922 		zfs_close(zhp);
923 		return (0);
924 	}
925 
926 	if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
927 	    ZFS_KEYSTATUS_UNAVAILABLE) {
928 		zfs_close(zhp);
929 		return (0);
930 	}
931 
932 	/*
933 	 * If this filesystem is inconsistent and has a receive resume
934 	 * token, we can not mount it.
935 	 */
936 	if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&
937 	    zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
938 	    NULL, 0, NULL, NULL, 0, B_TRUE) == 0) {
939 		zfs_close(zhp);
940 		return (0);
941 	}
942 
943 	libzfs_add_handle(cbp, zhp);
944 	if (zfs_iter_filesystems_v2(zhp, 0, zfs_iter_cb, cbp) != 0) {
945 		zfs_close(zhp);
946 		return (-1);
947 	}
948 	return (0);
949 }
950 
951 /*
952  * Sort comparator that compares two mountpoint paths. We sort these paths so
953  * that subdirectories immediately follow their parents. This means that we
954  * effectively treat the '/' character as the lowest value non-nul char.
955  * Since filesystems from non-global zones can have the same mountpoint
956  * as other filesystems, the comparator sorts global zone filesystems to
957  * the top of the list. This means that the global zone will traverse the
958  * filesystem list in the correct order and can stop when it sees the
959  * first zoned filesystem. In a non-global zone, only the delegated
960  * filesystems are seen.
961  *
962  * An example sorted list using this comparator would look like:
963  *
964  * /foo
965  * /foo/bar
966  * /foo/bar/baz
967  * /foo/baz
968  * /foo.bar
969  * /foo (NGZ1)
970  * /foo (NGZ2)
971  *
972  * The mounting code depends on this ordering to deterministically iterate
973  * over filesystems in order to spawn parallel mount tasks.
974  */
975 static int
976 mountpoint_cmp(const void *arga, const void *argb)
977 {
978 	zfs_handle_t *const *zap = arga;
979 	zfs_handle_t *za = *zap;
980 	zfs_handle_t *const *zbp = argb;
981 	zfs_handle_t *zb = *zbp;
982 	char mounta[MAXPATHLEN];
983 	char mountb[MAXPATHLEN];
984 	const char *a = mounta;
985 	const char *b = mountb;
986 	boolean_t gota, gotb;
987 	uint64_t zoneda, zonedb;
988 
989 	zoneda = zfs_prop_get_int(za, ZFS_PROP_ZONED);
990 	zonedb = zfs_prop_get_int(zb, ZFS_PROP_ZONED);
991 	if (zoneda && !zonedb)
992 		return (1);
993 	if (!zoneda && zonedb)
994 		return (-1);
995 
996 	gota = (zfs_get_type(za) == ZFS_TYPE_FILESYSTEM);
997 	if (gota) {
998 		verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta,
999 		    sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
1000 	}
1001 	gotb = (zfs_get_type(zb) == ZFS_TYPE_FILESYSTEM);
1002 	if (gotb) {
1003 		verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb,
1004 		    sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
1005 	}
1006 
1007 	if (gota && gotb) {
1008 		while (*a != '\0' && (*a == *b)) {
1009 			a++;
1010 			b++;
1011 		}
1012 		if (*a == *b)
1013 			return (0);
1014 		if (*a == '\0')
1015 			return (-1);
1016 		if (*b == '\0')
1017 			return (1);
1018 		if (*a == '/')
1019 			return (-1);
1020 		if (*b == '/')
1021 			return (1);
1022 		return (*a < *b ? -1 : *a > *b);
1023 	}
1024 
1025 	if (gota)
1026 		return (-1);
1027 	if (gotb)
1028 		return (1);
1029 
1030 	/*
1031 	 * If neither filesystem has a mountpoint, revert to sorting by
1032 	 * dataset name.
1033 	 */
1034 	return (strcmp(zfs_get_name(za), zfs_get_name(zb)));
1035 }
1036 
1037 /*
1038  * Return true if path2 is a child of path1 or path2 equals path1 or
1039  * path1 is "/" (path2 is always a child of "/").
1040  */
1041 static boolean_t
1042 libzfs_path_contains(const char *path1, const char *path2)
1043 {
1044 	return (strcmp(path1, path2) == 0 || strcmp(path1, "/") == 0 ||
1045 	    (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/'));
1046 }
1047 
1048 /*
1049  * Given a mountpoint specified by idx in the handles array, find the first
1050  * non-descendent of that mountpoint and return its index. Descendant paths
1051  * start with the parent's path. This function relies on the ordering
1052  * enforced by mountpoint_cmp().
1053  */
1054 static int
1055 non_descendant_idx(zfs_handle_t **handles, size_t num_handles, int idx)
1056 {
1057 	char parent[ZFS_MAXPROPLEN];
1058 	char child[ZFS_MAXPROPLEN];
1059 	int i;
1060 
1061 	verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent,
1062 	    sizeof (parent), NULL, NULL, 0, B_FALSE) == 0);
1063 
1064 	for (i = idx + 1; i < num_handles; i++) {
1065 		verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child,
1066 		    sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
1067 		if (!libzfs_path_contains(parent, child))
1068 			break;
1069 	}
1070 	return (i);
1071 }
1072 
1073 typedef struct mnt_param {
1074 	libzfs_handle_t	*mnt_hdl;
1075 	tpool_t		*mnt_tp;
1076 	zfs_handle_t	**mnt_zhps; /* filesystems to mount */
1077 	size_t		mnt_num_handles;
1078 	int		mnt_idx;	/* Index of selected entry to mount */
1079 	zfs_iter_f	mnt_func;
1080 	void		*mnt_data;
1081 } mnt_param_t;
1082 
1083 /*
1084  * Allocate and populate the parameter struct for mount function, and
1085  * schedule mounting of the entry selected by idx.
1086  */
1087 static void
1088 zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles,
1089     size_t num_handles, int idx, zfs_iter_f func, void *data, tpool_t *tp)
1090 {
1091 	mnt_param_t *mnt_param = zfs_alloc(hdl, sizeof (mnt_param_t));
1092 
1093 	mnt_param->mnt_hdl = hdl;
1094 	mnt_param->mnt_tp = tp;
1095 	mnt_param->mnt_zhps = handles;
1096 	mnt_param->mnt_num_handles = num_handles;
1097 	mnt_param->mnt_idx = idx;
1098 	mnt_param->mnt_func = func;
1099 	mnt_param->mnt_data = data;
1100 
1101 	(void) tpool_dispatch(tp, zfs_mount_task, (void*)mnt_param);
1102 }
1103 
1104 /*
1105  * This is the structure used to keep state of mounting or sharing operations
1106  * during a call to zpool_enable_datasets().
1107  */
1108 typedef struct mount_state {
1109 	/*
1110 	 * ms_mntstatus is set to -1 if any mount fails. While multiple threads
1111 	 * could update this variable concurrently, no synchronization is
1112 	 * needed as it's only ever set to -1.
1113 	 */
1114 	int		ms_mntstatus;
1115 	int		ms_mntflags;
1116 	const char	*ms_mntopts;
1117 } mount_state_t;
1118 
1119 static int
1120 zfs_mount_one(zfs_handle_t *zhp, void *arg)
1121 {
1122 	mount_state_t *ms = arg;
1123 	int ret = 0;
1124 
1125 	/*
1126 	 * don't attempt to mount encrypted datasets with
1127 	 * unloaded keys
1128 	 */
1129 	if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
1130 	    ZFS_KEYSTATUS_UNAVAILABLE)
1131 		return (0);
1132 
1133 	if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0)
1134 		ret = ms->ms_mntstatus = -1;
1135 	return (ret);
1136 }
1137 
1138 static int
1139 zfs_share_one(zfs_handle_t *zhp, void *arg)
1140 {
1141 	mount_state_t *ms = arg;
1142 	int ret = 0;
1143 
1144 	if (zfs_share(zhp, NULL) != 0)
1145 		ret = ms->ms_mntstatus = -1;
1146 	return (ret);
1147 }
1148 
1149 /*
1150  * Thread pool function to mount one file system. On completion, it finds and
1151  * schedules its children to be mounted. This depends on the sorting done in
1152  * zfs_foreach_mountpoint(). Note that the degenerate case (chain of entries
1153  * each descending from the previous) will have no parallelism since we always
1154  * have to wait for the parent to finish mounting before we can schedule
1155  * its children.
1156  */
1157 static void
1158 zfs_mount_task(void *arg)
1159 {
1160 	mnt_param_t *mp = arg;
1161 	int idx = mp->mnt_idx;
1162 	zfs_handle_t **handles = mp->mnt_zhps;
1163 	size_t num_handles = mp->mnt_num_handles;
1164 	char mountpoint[ZFS_MAXPROPLEN];
1165 
1166 	verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, mountpoint,
1167 	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
1168 
1169 	if (mp->mnt_func(handles[idx], mp->mnt_data) != 0)
1170 		goto out;
1171 
1172 	/*
1173 	 * We dispatch tasks to mount filesystems with mountpoints underneath
1174 	 * this one. We do this by dispatching the next filesystem with a
1175 	 * descendant mountpoint of the one we just mounted, then skip all of
1176 	 * its descendants, dispatch the next descendant mountpoint, and so on.
1177 	 * The non_descendant_idx() function skips over filesystems that are
1178 	 * descendants of the filesystem we just dispatched.
1179 	 */
1180 	for (int i = idx + 1; i < num_handles;
1181 	    i = non_descendant_idx(handles, num_handles, i)) {
1182 		char child[ZFS_MAXPROPLEN];
1183 		verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT,
1184 		    child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
1185 
1186 		if (!libzfs_path_contains(mountpoint, child))
1187 			break; /* not a descendant, return */
1188 		zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i,
1189 		    mp->mnt_func, mp->mnt_data, mp->mnt_tp);
1190 	}
1191 
1192 out:
1193 	free(mp);
1194 }
1195 
1196 /*
1197  * Issue the func callback for each ZFS handle contained in the handles
1198  * array. This function is used to mount all datasets, and so this function
1199  * guarantees that filesystems for parent mountpoints are called before their
1200  * children. As such, before issuing any callbacks, we first sort the array
1201  * of handles by mountpoint.
1202  *
1203  * Callbacks are issued in one of two ways:
1204  *
1205  * 1. Sequentially: If the parallel argument is B_FALSE or the ZFS_SERIAL_MOUNT
1206  *    environment variable is set, then we issue callbacks sequentially.
1207  *
1208  * 2. In parallel: If the parallel argument is B_TRUE and the ZFS_SERIAL_MOUNT
1209  *    environment variable is not set, then we use a tpool to dispatch threads
1210  *    to mount filesystems in parallel. This function dispatches tasks to mount
1211  *    the filesystems at the top-level mountpoints, and these tasks in turn
1212  *    are responsible for recursively mounting filesystems in their children
1213  *    mountpoints.
1214  */
1215 void
1216 zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
1217     size_t num_handles, zfs_iter_f func, void *data, boolean_t parallel)
1218 {
1219 	zoneid_t zoneid = getzoneid();
1220 
1221 	/*
1222 	 * The ZFS_SERIAL_MOUNT environment variable is an undocumented
1223 	 * variable that can be used as a convenience to do a/b comparison
1224 	 * of serial vs. parallel mounting.
1225 	 */
1226 	boolean_t serial_mount = !parallel ||
1227 	    (getenv("ZFS_SERIAL_MOUNT") != NULL);
1228 
1229 	/*
1230 	 * Sort the datasets by mountpoint. See mountpoint_cmp for details
1231 	 * of how these are sorted.
1232 	 */
1233 	qsort(handles, num_handles, sizeof (zfs_handle_t *), mountpoint_cmp);
1234 
1235 	if (serial_mount) {
1236 		for (int i = 0; i < num_handles; i++) {
1237 			func(handles[i], data);
1238 		}
1239 		return;
1240 	}
1241 
1242 	/*
1243 	 * Issue the callback function for each dataset using a parallel
1244 	 * algorithm that uses a thread pool to manage threads.
1245 	 */
1246 	tpool_t *tp = tpool_create(1, mount_tp_nthr, 0, NULL);
1247 
1248 	/*
1249 	 * There may be multiple "top level" mountpoints outside of the pool's
1250 	 * root mountpoint, e.g.: /foo /bar. Dispatch a mount task for each of
1251 	 * these.
1252 	 */
1253 	for (int i = 0; i < num_handles;
1254 	    i = non_descendant_idx(handles, num_handles, i)) {
1255 		/*
1256 		 * Since the mountpoints have been sorted so that the zoned
1257 		 * filesystems are at the end, a zoned filesystem seen from
1258 		 * the global zone means that we're done.
1259 		 */
1260 		if (zoneid == GLOBAL_ZONEID &&
1261 		    zfs_prop_get_int(handles[i], ZFS_PROP_ZONED))
1262 			break;
1263 		zfs_dispatch_mount(hdl, handles, num_handles, i, func, data,
1264 		    tp);
1265 	}
1266 
1267 	tpool_wait(tp);	/* wait for all scheduled mounts to complete */
1268 	tpool_destroy(tp);
1269 }
1270 
1271 /*
1272  * Mount and share all datasets within the given pool.  This assumes that no
1273  * datasets within the pool are currently mounted.
1274  */
1275 int
1276 zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
1277 {
1278 	get_all_cb_t cb = { 0 };
1279 	mount_state_t ms = { 0 };
1280 	zfs_handle_t *zfsp;
1281 	int ret = 0;
1282 
1283 	if ((zfsp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
1284 	    ZFS_TYPE_DATASET)) == NULL)
1285 		goto out;
1286 
1287 	/*
1288 	 * Gather all non-snapshot datasets within the pool. Start by adding
1289 	 * the root filesystem for this pool to the list, and then iterate
1290 	 * over all child filesystems.
1291 	 */
1292 	libzfs_add_handle(&cb, zfsp);
1293 	if (zfs_iter_filesystems_v2(zfsp, 0, zfs_iter_cb, &cb) != 0)
1294 		goto out;
1295 
1296 	/*
1297 	 * Mount all filesystems
1298 	 */
1299 	ms.ms_mntopts = mntopts;
1300 	ms.ms_mntflags = flags;
1301 	zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
1302 	    zfs_mount_one, &ms, B_TRUE);
1303 	if (ms.ms_mntstatus != 0)
1304 		ret = EZFS_MOUNTFAILED;
1305 
1306 	/*
1307 	 * Share all filesystems that need to be shared. This needs to be
1308 	 * a separate pass because libshare is not mt-safe, and so we need
1309 	 * to share serially.
1310 	 */
1311 	ms.ms_mntstatus = 0;
1312 	zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
1313 	    zfs_share_one, &ms, B_FALSE);
1314 	if (ms.ms_mntstatus != 0)
1315 		ret = EZFS_SHAREFAILED;
1316 	else
1317 		zfs_commit_shares(NULL);
1318 
1319 out:
1320 	for (int i = 0; i < cb.cb_used; i++)
1321 		zfs_close(cb.cb_handles[i]);
1322 	free(cb.cb_handles);
1323 
1324 	return (ret);
1325 }
1326 
1327 struct sets_s {
1328 	char *mountpoint;
1329 	zfs_handle_t *dataset;
1330 };
1331 
1332 static int
1333 mountpoint_compare(const void *a, const void *b)
1334 {
1335 	const struct sets_s *mounta = (struct sets_s *)a;
1336 	const struct sets_s *mountb = (struct sets_s *)b;
1337 
1338 	return (strcmp(mountb->mountpoint, mounta->mountpoint));
1339 }
1340 
1341 /*
1342  * Unshare and unmount all datasets within the given pool.  We don't want to
1343  * rely on traversing the DSL to discover the filesystems within the pool,
1344  * because this may be expensive (if not all of them are mounted), and can fail
1345  * arbitrarily (on I/O error, for example).  Instead, we walk /proc/self/mounts
1346  * and gather all the filesystems that are currently mounted.
1347  */
1348 int
1349 zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
1350 {
1351 	int used, alloc;
1352 	FILE *mnttab;
1353 	struct mnttab entry;
1354 	size_t namelen;
1355 	struct sets_s *sets = NULL;
1356 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1357 	int i;
1358 	int ret = -1;
1359 	int flags = (force ? MS_FORCE : 0);
1360 
1361 	namelen = strlen(zhp->zpool_name);
1362 
1363 	if ((mnttab = fopen(MNTTAB, "re")) == NULL)
1364 		return (ENOENT);
1365 
1366 	used = alloc = 0;
1367 	while (getmntent(mnttab, &entry) == 0) {
1368 		/*
1369 		 * Ignore non-ZFS entries.
1370 		 */
1371 		if (entry.mnt_fstype == NULL ||
1372 		    strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
1373 			continue;
1374 
1375 		/*
1376 		 * Ignore filesystems not within this pool.
1377 		 */
1378 		if (entry.mnt_mountp == NULL ||
1379 		    strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 ||
1380 		    (entry.mnt_special[namelen] != '/' &&
1381 		    entry.mnt_special[namelen] != '\0'))
1382 			continue;
1383 
1384 		/*
1385 		 * At this point we've found a filesystem within our pool.  Add
1386 		 * it to our growing list.
1387 		 */
1388 		if (used == alloc) {
1389 			if (alloc == 0) {
1390 				sets = zfs_alloc(hdl,
1391 				    8 * sizeof (struct sets_s));
1392 				alloc = 8;
1393 			} else {
1394 				sets = zfs_realloc(hdl, sets,
1395 				    alloc * sizeof (struct sets_s),
1396 				    alloc * 2 * sizeof (struct sets_s));
1397 
1398 				alloc *= 2;
1399 			}
1400 		}
1401 
1402 		sets[used].mountpoint = zfs_strdup(hdl, entry.mnt_mountp);
1403 
1404 		/*
1405 		 * This is allowed to fail, in case there is some I/O error.  It
1406 		 * is only used to determine if we need to remove the underlying
1407 		 * mountpoint, so failure is not fatal.
1408 		 */
1409 		sets[used].dataset = make_dataset_handle(hdl,
1410 		    entry.mnt_special);
1411 
1412 		used++;
1413 	}
1414 
1415 	/*
1416 	 * At this point, we have the entire list of filesystems, so sort it by
1417 	 * mountpoint.
1418 	 */
1419 	if (used != 0)
1420 		qsort(sets, used, sizeof (struct sets_s), mountpoint_compare);
1421 
1422 	/*
1423 	 * Walk through and first unshare everything.
1424 	 */
1425 	for (i = 0; i < used; i++) {
1426 		for (enum sa_protocol p = 0; p < SA_PROTOCOL_COUNT; ++p) {
1427 			if (sa_is_shared(sets[i].mountpoint, p) &&
1428 			    unshare_one(hdl, sets[i].mountpoint,
1429 			    sets[i].mountpoint, p) != 0)
1430 				goto out;
1431 		}
1432 	}
1433 	zfs_commit_shares(NULL);
1434 
1435 	/*
1436 	 * Now unmount everything, removing the underlying directories as
1437 	 * appropriate.
1438 	 */
1439 	for (i = 0; i < used; i++) {
1440 		if (unmount_one(sets[i].dataset, sets[i].mountpoint,
1441 		    flags) != 0)
1442 			goto out;
1443 	}
1444 
1445 	for (i = 0; i < used; i++) {
1446 		if (sets[i].dataset)
1447 			remove_mountpoint(sets[i].dataset);
1448 	}
1449 
1450 	zpool_disable_datasets_os(zhp, force);
1451 
1452 	ret = 0;
1453 out:
1454 	(void) fclose(mnttab);
1455 	for (i = 0; i < used; i++) {
1456 		if (sets[i].dataset)
1457 			zfs_close(sets[i].dataset);
1458 		free(sets[i].mountpoint);
1459 	}
1460 	free(sets);
1461 
1462 	return (ret);
1463 }
1464