xref: /freebsd/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2014, 2022 by Delphix. All rights reserved.
26  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
27  * Copyright 2017 RackTop Systems.
28  * Copyright (c) 2018 Datto Inc.
29  * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
30  */
31 
32 /*
33  * Routines to manage ZFS mounts.  We separate all the nasty routines that have
34  * to deal with the OS.  The following functions are the main entry points --
35  * they are used by mount and unmount and when changing a filesystem's
36  * mountpoint.
37  *
38  *	zfs_is_mounted()
39  *	zfs_mount()
40  *	zfs_mount_at()
41  *	zfs_unmount()
42  *	zfs_unmountall()
43  *
44  * This file also contains the functions used to manage sharing filesystems:
45  *
46  *	zfs_is_shared()
47  *	zfs_share()
48  *	zfs_unshare()
49  *	zfs_unshareall()
50  *	zfs_commit_shares()
51  *
52  * The following functions are available for pool consumers, and will
53  * mount/unmount and share/unshare all datasets within pool:
54  *
55  *	zpool_enable_datasets()
56  *	zpool_disable_datasets()
57  */
58 
59 #include <dirent.h>
60 #include <dlfcn.h>
61 #include <errno.h>
62 #include <fcntl.h>
63 #include <libgen.h>
64 #include <libintl.h>
65 #include <stdio.h>
66 #include <stdlib.h>
67 #include <string.h>
68 #include <unistd.h>
69 #include <zone.h>
70 #include <sys/mntent.h>
71 #include <sys/mount.h>
72 #include <sys/stat.h>
73 #include <sys/vfs.h>
74 #include <sys/dsl_crypt.h>
75 
76 #include <libzfs.h>
77 #include <libzutil.h>
78 
79 #include "libzfs_impl.h"
80 #include <thread_pool.h>
81 
82 #include <libshare.h>
83 #include <sys/systeminfo.h>
84 #define	MAXISALEN	257	/* based on sysinfo(2) man page */
85 
86 static void zfs_mount_task(void *);
87 
88 static const proto_table_t proto_table[SA_PROTOCOL_COUNT] = {
89 	[SA_PROTOCOL_NFS] =
90 	    {ZFS_PROP_SHARENFS, EZFS_SHARENFSFAILED, EZFS_UNSHARENFSFAILED},
91 	[SA_PROTOCOL_SMB] =
92 	    {ZFS_PROP_SHARESMB, EZFS_SHARESMBFAILED, EZFS_UNSHARESMBFAILED},
93 };
94 
95 static const enum sa_protocol share_all_proto[SA_PROTOCOL_COUNT + 1] = {
96 	SA_PROTOCOL_NFS,
97 	SA_PROTOCOL_SMB,
98 	SA_NO_PROTOCOL
99 };
100 
101 
102 
103 static boolean_t
104 dir_is_empty_stat(const char *dirname)
105 {
106 	struct stat st;
107 
108 	/*
109 	 * We only want to return false if the given path is a non empty
110 	 * directory, all other errors are handled elsewhere.
111 	 */
112 	if (stat(dirname, &st) < 0 || !S_ISDIR(st.st_mode)) {
113 		return (B_TRUE);
114 	}
115 
116 	/*
117 	 * An empty directory will still have two entries in it, one
118 	 * entry for each of "." and "..".
119 	 */
120 	if (st.st_size > 2) {
121 		return (B_FALSE);
122 	}
123 
124 	return (B_TRUE);
125 }
126 
127 static boolean_t
128 dir_is_empty_readdir(const char *dirname)
129 {
130 	DIR *dirp;
131 	struct dirent64 *dp;
132 	int dirfd;
133 
134 	if ((dirfd = openat(AT_FDCWD, dirname,
135 	    O_RDONLY | O_NDELAY | O_LARGEFILE | O_CLOEXEC, 0)) < 0) {
136 		return (B_TRUE);
137 	}
138 
139 	if ((dirp = fdopendir(dirfd)) == NULL) {
140 		(void) close(dirfd);
141 		return (B_TRUE);
142 	}
143 
144 	while ((dp = readdir64(dirp)) != NULL) {
145 
146 		if (strcmp(dp->d_name, ".") == 0 ||
147 		    strcmp(dp->d_name, "..") == 0)
148 			continue;
149 
150 		(void) closedir(dirp);
151 		return (B_FALSE);
152 	}
153 
154 	(void) closedir(dirp);
155 	return (B_TRUE);
156 }
157 
158 /*
159  * Returns true if the specified directory is empty.  If we can't open the
160  * directory at all, return true so that the mount can fail with a more
161  * informative error message.
162  */
163 static boolean_t
164 dir_is_empty(const char *dirname)
165 {
166 	struct statfs64 st;
167 
168 	/*
169 	 * If the statvfs call fails or the filesystem is not a ZFS
170 	 * filesystem, fall back to the slow path which uses readdir.
171 	 */
172 	if ((statfs64(dirname, &st) != 0) ||
173 	    (st.f_type != ZFS_SUPER_MAGIC)) {
174 		return (dir_is_empty_readdir(dirname));
175 	}
176 
177 	/*
178 	 * At this point, we know the provided path is on a ZFS
179 	 * filesystem, so we can use stat instead of readdir to
180 	 * determine if the directory is empty or not. We try to avoid
181 	 * using readdir because that requires opening "dirname"; this
182 	 * open file descriptor can potentially end up in a child
183 	 * process if there's a concurrent fork, thus preventing the
184 	 * zfs_mount() from otherwise succeeding (the open file
185 	 * descriptor inherited by the child process will cause the
186 	 * parent's mount to fail with EBUSY). The performance
187 	 * implications of replacing the open, read, and close with a
188 	 * single stat is nice; but is not the main motivation for the
189 	 * added complexity.
190 	 */
191 	return (dir_is_empty_stat(dirname));
192 }
193 
194 /*
195  * Checks to see if the mount is active.  If the filesystem is mounted, we fill
196  * in 'where' with the current mountpoint, and return 1.  Otherwise, we return
197  * 0.
198  */
199 boolean_t
200 is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)
201 {
202 	struct mnttab entry;
203 
204 	if (libzfs_mnttab_find(zfs_hdl, special, &entry) != 0)
205 		return (B_FALSE);
206 
207 	if (where != NULL)
208 		*where = zfs_strdup(zfs_hdl, entry.mnt_mountp);
209 
210 	return (B_TRUE);
211 }
212 
213 boolean_t
214 zfs_is_mounted(zfs_handle_t *zhp, char **where)
215 {
216 	return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where));
217 }
218 
219 /*
220  * Checks any higher order concerns about whether the given dataset is
221  * mountable, false otherwise.  zfs_is_mountable_internal specifically assumes
222  * that the caller has verified the sanity of mounting the dataset at
223  * its mountpoint to the extent the caller wants.
224  */
225 static boolean_t
226 zfs_is_mountable_internal(zfs_handle_t *zhp)
227 {
228 	if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
229 	    getzoneid() == GLOBAL_ZONEID)
230 		return (B_FALSE);
231 
232 	return (B_TRUE);
233 }
234 
235 /*
236  * Returns true if the given dataset is mountable, false otherwise.  Returns the
237  * mountpoint in 'buf'.
238  */
239 static boolean_t
240 zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,
241     zprop_source_t *source, int flags)
242 {
243 	char sourceloc[MAXNAMELEN];
244 	zprop_source_t sourcetype;
245 
246 	if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type,
247 	    B_FALSE))
248 		return (B_FALSE);
249 
250 	verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen,
251 	    &sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0);
252 
253 	if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 ||
254 	    strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0)
255 		return (B_FALSE);
256 
257 	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF)
258 		return (B_FALSE);
259 
260 	if (!zfs_is_mountable_internal(zhp))
261 		return (B_FALSE);
262 
263 	if (zfs_prop_get_int(zhp, ZFS_PROP_REDACTED) && !(flags & MS_FORCE))
264 		return (B_FALSE);
265 
266 	if (source)
267 		*source = sourcetype;
268 
269 	return (B_TRUE);
270 }
271 
272 /*
273  * The filesystem is mounted by invoking the system mount utility rather
274  * than by the system call mount(2).  This ensures that the /etc/mtab
275  * file is correctly locked for the update.  Performing our own locking
276  * and /etc/mtab update requires making an unsafe assumption about how
277  * the mount utility performs its locking.  Unfortunately, this also means
278  * in the case of a mount failure we do not have the exact errno.  We must
279  * make due with return value from the mount process.
280  *
281  * In the long term a shared library called libmount is under development
282  * which provides a common API to address the locking and errno issues.
283  * Once the standard mount utility has been updated to use this library
284  * we can add an autoconf check to conditionally use it.
285  *
286  * http://www.kernel.org/pub/linux/utils/util-linux/libmount-docs/index.html
287  */
288 
289 static int
290 zfs_add_option(zfs_handle_t *zhp, char *options, int len,
291     zfs_prop_t prop, const char *on, const char *off)
292 {
293 	const char *source;
294 	uint64_t value;
295 
296 	/* Skip adding duplicate default options */
297 	if ((strstr(options, on) != NULL) || (strstr(options, off) != NULL))
298 		return (0);
299 
300 	/*
301 	 * zfs_prop_get_int() is not used to ensure our mount options
302 	 * are not influenced by the current /proc/self/mounts contents.
303 	 */
304 	value = getprop_uint64(zhp, prop, &source);
305 
306 	(void) strlcat(options, ",", len);
307 	(void) strlcat(options, value ? on : off, len);
308 
309 	return (0);
310 }
311 
312 static int
313 zfs_add_options(zfs_handle_t *zhp, char *options, int len)
314 {
315 	int error = 0;
316 
317 	error = zfs_add_option(zhp, options, len,
318 	    ZFS_PROP_ATIME, MNTOPT_ATIME, MNTOPT_NOATIME);
319 	/*
320 	 * don't add relatime/strictatime when atime=off, otherwise strictatime
321 	 * will force atime=on
322 	 */
323 	if (strstr(options, MNTOPT_NOATIME) == NULL) {
324 		error = zfs_add_option(zhp, options, len,
325 		    ZFS_PROP_RELATIME, MNTOPT_RELATIME, MNTOPT_STRICTATIME);
326 	}
327 	error = error ? error : zfs_add_option(zhp, options, len,
328 	    ZFS_PROP_DEVICES, MNTOPT_DEVICES, MNTOPT_NODEVICES);
329 	error = error ? error : zfs_add_option(zhp, options, len,
330 	    ZFS_PROP_EXEC, MNTOPT_EXEC, MNTOPT_NOEXEC);
331 	error = error ? error : zfs_add_option(zhp, options, len,
332 	    ZFS_PROP_READONLY, MNTOPT_RO, MNTOPT_RW);
333 	error = error ? error : zfs_add_option(zhp, options, len,
334 	    ZFS_PROP_SETUID, MNTOPT_SETUID, MNTOPT_NOSETUID);
335 	error = error ? error : zfs_add_option(zhp, options, len,
336 	    ZFS_PROP_NBMAND, MNTOPT_NBMAND, MNTOPT_NONBMAND);
337 
338 	return (error);
339 }
340 
341 int
342 zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
343 {
344 	char mountpoint[ZFS_MAXPROPLEN];
345 
346 	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL,
347 	    flags))
348 		return (0);
349 
350 	return (zfs_mount_at(zhp, options, flags, mountpoint));
351 }
352 
353 /*
354  * Mount the given filesystem.
355  */
356 int
357 zfs_mount_at(zfs_handle_t *zhp, const char *options, int flags,
358     const char *mountpoint)
359 {
360 	struct stat buf;
361 	char mntopts[MNT_LINE_MAX];
362 	char overlay[ZFS_MAXPROPLEN];
363 	char prop_encroot[MAXNAMELEN];
364 	boolean_t is_encroot;
365 	zfs_handle_t *encroot_hp = zhp;
366 	libzfs_handle_t *hdl = zhp->zfs_hdl;
367 	uint64_t keystatus;
368 	int remount = 0, rc;
369 
370 	if (options == NULL) {
371 		(void) strlcpy(mntopts, MNTOPT_DEFAULTS, sizeof (mntopts));
372 	} else {
373 		(void) strlcpy(mntopts, options, sizeof (mntopts));
374 	}
375 
376 	if (strstr(mntopts, MNTOPT_REMOUNT) != NULL)
377 		remount = 1;
378 
379 	/* Potentially duplicates some checks if invoked by zfs_mount(). */
380 	if (!zfs_is_mountable_internal(zhp))
381 		return (0);
382 
383 	/*
384 	 * If the pool is imported read-only then all mounts must be read-only
385 	 */
386 	if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL))
387 		(void) strlcat(mntopts, "," MNTOPT_RO, sizeof (mntopts));
388 
389 	/*
390 	 * Append default mount options which apply to the mount point.
391 	 * This is done because under Linux (unlike Solaris) multiple mount
392 	 * points may reference a single super block.  This means that just
393 	 * given a super block there is no back reference to update the per
394 	 * mount point options.
395 	 */
396 	rc = zfs_add_options(zhp, mntopts, sizeof (mntopts));
397 	if (rc) {
398 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
399 		    "default options unavailable"));
400 		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
401 		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
402 		    mountpoint));
403 	}
404 
405 	/*
406 	 * If the filesystem is encrypted the key must be loaded  in order to
407 	 * mount. If the key isn't loaded, the MS_CRYPT flag decides whether
408 	 * or not we attempt to load the keys. Note: we must call
409 	 * zfs_refresh_properties() here since some callers of this function
410 	 * (most notably zpool_enable_datasets()) may implicitly load our key
411 	 * by loading the parent's key first.
412 	 */
413 	if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
414 		zfs_refresh_properties(zhp);
415 		keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
416 
417 		/*
418 		 * If the key is unavailable and MS_CRYPT is set give the
419 		 * user a chance to enter the key. Otherwise just fail
420 		 * immediately.
421 		 */
422 		if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) {
423 			if (flags & MS_CRYPT) {
424 				rc = zfs_crypto_get_encryption_root(zhp,
425 				    &is_encroot, prop_encroot);
426 				if (rc) {
427 					zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
428 					    "Failed to get encryption root for "
429 					    "'%s'."), zfs_get_name(zhp));
430 					return (rc);
431 				}
432 
433 				if (!is_encroot) {
434 					encroot_hp = zfs_open(hdl, prop_encroot,
435 					    ZFS_TYPE_DATASET);
436 					if (encroot_hp == NULL)
437 						return (hdl->libzfs_error);
438 				}
439 
440 				rc = zfs_crypto_load_key(encroot_hp,
441 				    B_FALSE, NULL);
442 
443 				if (!is_encroot)
444 					zfs_close(encroot_hp);
445 				if (rc)
446 					return (rc);
447 			} else {
448 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
449 				    "encryption key not loaded"));
450 				return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
451 				    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
452 				    mountpoint));
453 			}
454 		}
455 
456 	}
457 
458 	/*
459 	 * Append zfsutil option so the mount helper allow the mount
460 	 */
461 	strlcat(mntopts, "," MNTOPT_ZFSUTIL, sizeof (mntopts));
462 
463 	/* Create the directory if it doesn't already exist */
464 	if (lstat(mountpoint, &buf) != 0) {
465 		if (mkdirp(mountpoint, 0755) != 0) {
466 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
467 			    "failed to create mountpoint: %s"),
468 			    zfs_strerror(errno));
469 			return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
470 			    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
471 			    mountpoint));
472 		}
473 	}
474 
475 	/*
476 	 * Overlay mounts are enabled by default but may be disabled
477 	 * via the 'overlay' property. The -O flag remains for compatibility.
478 	 */
479 	if (!(flags & MS_OVERLAY)) {
480 		if (zfs_prop_get(zhp, ZFS_PROP_OVERLAY, overlay,
481 		    sizeof (overlay), NULL, NULL, 0, B_FALSE) == 0) {
482 			if (strcmp(overlay, "on") == 0) {
483 				flags |= MS_OVERLAY;
484 			}
485 		}
486 	}
487 
488 	/*
489 	 * Determine if the mountpoint is empty.  If so, refuse to perform the
490 	 * mount.  We don't perform this check if 'remount' is
491 	 * specified or if overlay option (-O) is given
492 	 */
493 	if ((flags & MS_OVERLAY) == 0 && !remount &&
494 	    !dir_is_empty(mountpoint)) {
495 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
496 		    "directory is not empty"));
497 		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
498 		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint));
499 	}
500 
501 	/* perform the mount */
502 	rc = do_mount(zhp, mountpoint, mntopts, flags);
503 	if (rc) {
504 		/*
505 		 * Generic errors are nasty, but there are just way too many
506 		 * from mount(), and they're well-understood.  We pick a few
507 		 * common ones to improve upon.
508 		 */
509 		if (rc == EBUSY) {
510 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
511 			    "mountpoint or dataset is busy"));
512 		} else if (rc == EPERM) {
513 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
514 			    "Insufficient privileges"));
515 		} else if (rc == ENOTSUP) {
516 			int spa_version;
517 
518 			VERIFY(zfs_spa_version(zhp, &spa_version) == 0);
519 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
520 			    "Can't mount a version %llu "
521 			    "file system on a version %d pool. Pool must be"
522 			    " upgraded to mount this file system."),
523 			    (u_longlong_t)zfs_prop_get_int(zhp,
524 			    ZFS_PROP_VERSION), spa_version);
525 		} else {
526 			zfs_error_aux(hdl, "%s", zfs_strerror(rc));
527 		}
528 		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
529 		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
530 		    zhp->zfs_name));
531 	}
532 
533 	/* remove the mounted entry before re-adding on remount */
534 	if (remount)
535 		libzfs_mnttab_remove(hdl, zhp->zfs_name);
536 
537 	/* add the mounted entry into our cache */
538 	libzfs_mnttab_add(hdl, zfs_get_name(zhp), mountpoint, mntopts);
539 	return (0);
540 }
541 
542 /*
543  * Unmount a single filesystem.
544  */
545 static int
546 unmount_one(zfs_handle_t *zhp, const char *mountpoint, int flags)
547 {
548 	int error;
549 
550 	error = do_unmount(zhp, mountpoint, flags);
551 	if (error != 0) {
552 		int libzfs_err;
553 
554 		switch (error) {
555 		case EBUSY:
556 			libzfs_err = EZFS_BUSY;
557 			break;
558 		case EIO:
559 			libzfs_err = EZFS_IO;
560 			break;
561 		case ENOENT:
562 			libzfs_err = EZFS_NOENT;
563 			break;
564 		case ENOMEM:
565 			libzfs_err = EZFS_NOMEM;
566 			break;
567 		case EPERM:
568 			libzfs_err = EZFS_PERM;
569 			break;
570 		default:
571 			libzfs_err = EZFS_UMOUNTFAILED;
572 		}
573 		if (zhp) {
574 			return (zfs_error_fmt(zhp->zfs_hdl, libzfs_err,
575 			    dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
576 			    mountpoint));
577 		} else {
578 			return (-1);
579 		}
580 	}
581 
582 	return (0);
583 }
584 
585 /*
586  * Unmount the given filesystem.
587  */
588 int
589 zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
590 {
591 	libzfs_handle_t *hdl = zhp->zfs_hdl;
592 	struct mnttab entry;
593 	char *mntpt = NULL;
594 	boolean_t encroot, unmounted = B_FALSE;
595 
596 	/* check to see if we need to unmount the filesystem */
597 	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
598 	    libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0)) {
599 		/*
600 		 * mountpoint may have come from a call to
601 		 * getmnt/getmntany if it isn't NULL. If it is NULL,
602 		 * we know it comes from libzfs_mnttab_find which can
603 		 * then get freed later. We strdup it to play it safe.
604 		 */
605 		if (mountpoint == NULL)
606 			mntpt = zfs_strdup(hdl, entry.mnt_mountp);
607 		else
608 			mntpt = zfs_strdup(hdl, mountpoint);
609 
610 		/*
611 		 * Unshare and unmount the filesystem
612 		 */
613 		if (zfs_unshare(zhp, mntpt, share_all_proto) != 0) {
614 			free(mntpt);
615 			return (-1);
616 		}
617 		zfs_commit_shares(NULL);
618 
619 		if (unmount_one(zhp, mntpt, flags) != 0) {
620 			free(mntpt);
621 			(void) zfs_share(zhp, NULL);
622 			zfs_commit_shares(NULL);
623 			return (-1);
624 		}
625 
626 		libzfs_mnttab_remove(hdl, zhp->zfs_name);
627 		free(mntpt);
628 		unmounted = B_TRUE;
629 	}
630 
631 	/*
632 	 * If the MS_CRYPT flag is provided we must ensure we attempt to
633 	 * unload the dataset's key regardless of whether we did any work
634 	 * to unmount it. We only do this for encryption roots.
635 	 */
636 	if ((flags & MS_CRYPT) != 0 &&
637 	    zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
638 		zfs_refresh_properties(zhp);
639 
640 		if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0 &&
641 		    unmounted) {
642 			(void) zfs_mount(zhp, NULL, 0);
643 			return (-1);
644 		}
645 
646 		if (encroot && zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
647 		    ZFS_KEYSTATUS_AVAILABLE &&
648 		    zfs_crypto_unload_key(zhp) != 0) {
649 			(void) zfs_mount(zhp, NULL, 0);
650 			return (-1);
651 		}
652 	}
653 
654 	zpool_disable_volume_os(zhp->zfs_name);
655 
656 	return (0);
657 }
658 
659 /*
660  * Unmount this filesystem and any children inheriting the mountpoint property.
661  * To do this, just act like we're changing the mountpoint property, but don't
662  * remount the filesystems afterwards.
663  */
664 int
665 zfs_unmountall(zfs_handle_t *zhp, int flags)
666 {
667 	prop_changelist_t *clp;
668 	int ret;
669 
670 	clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
671 	    CL_GATHER_ITER_MOUNTED, flags);
672 	if (clp == NULL)
673 		return (-1);
674 
675 	ret = changelist_prefix(clp);
676 	changelist_free(clp);
677 
678 	return (ret);
679 }
680 
681 /*
682  * Unshare a filesystem by mountpoint.
683  */
684 static int
685 unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint,
686     enum sa_protocol proto)
687 {
688 	int err = sa_disable_share(mountpoint, proto);
689 	if (err != SA_OK)
690 		return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
691 		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
692 		    name, sa_errorstr(err)));
693 
694 	return (0);
695 }
696 
697 /*
698  * Share the given filesystem according to the options in the specified
699  * protocol specific properties (sharenfs, sharesmb).  We rely
700  * on "libshare" to do the dirty work for us.
701  */
702 int
703 zfs_share(zfs_handle_t *zhp, const enum sa_protocol *proto)
704 {
705 	char mountpoint[ZFS_MAXPROPLEN];
706 	char shareopts[ZFS_MAXPROPLEN];
707 	char sourcestr[ZFS_MAXPROPLEN];
708 	const enum sa_protocol *curr_proto;
709 	zprop_source_t sourcetype;
710 	int err = 0;
711 
712 	if (proto == NULL)
713 		proto = share_all_proto;
714 
715 	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL, 0))
716 		return (0);
717 
718 	for (curr_proto = proto; *curr_proto != SA_NO_PROTOCOL; curr_proto++) {
719 		/*
720 		 * Return success if there are no share options.
721 		 */
722 		if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,
723 		    shareopts, sizeof (shareopts), &sourcetype, sourcestr,
724 		    ZFS_MAXPROPLEN, B_FALSE) != 0 ||
725 		    strcmp(shareopts, "off") == 0)
726 			continue;
727 
728 		/*
729 		 * If the 'zoned' property is set, then zfs_is_mountable()
730 		 * will have already bailed out if we are in the global zone.
731 		 * But local zones cannot be NFS servers, so we ignore it for
732 		 * local zones as well.
733 		 */
734 		if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED))
735 			continue;
736 
737 		err = sa_enable_share(zfs_get_name(zhp), mountpoint, shareopts,
738 		    *curr_proto);
739 		if (err != SA_OK) {
740 			return (zfs_error_fmt(zhp->zfs_hdl,
741 			    proto_table[*curr_proto].p_share_err,
742 			    dgettext(TEXT_DOMAIN, "cannot share '%s: %s'"),
743 			    zfs_get_name(zhp), sa_errorstr(err)));
744 		}
745 
746 	}
747 	return (0);
748 }
749 
750 /*
751  * Check to see if the filesystem is currently shared.
752  */
753 boolean_t
754 zfs_is_shared(zfs_handle_t *zhp, char **where,
755     const enum sa_protocol *proto)
756 {
757 	char *mountpoint;
758 	if (proto == NULL)
759 		proto = share_all_proto;
760 
761 	if (ZFS_IS_VOLUME(zhp))
762 		return (B_FALSE);
763 
764 	if (!zfs_is_mounted(zhp, &mountpoint))
765 		return (B_FALSE);
766 
767 	for (const enum sa_protocol *p = proto; *p != SA_NO_PROTOCOL; ++p)
768 		if (sa_is_shared(mountpoint, *p)) {
769 			if (where != NULL)
770 				*where = mountpoint;
771 			else
772 				free(mountpoint);
773 			return (B_TRUE);
774 		}
775 
776 	free(mountpoint);
777 	return (B_FALSE);
778 }
779 
780 void
781 zfs_commit_shares(const enum sa_protocol *proto)
782 {
783 	if (proto == NULL)
784 		proto = share_all_proto;
785 
786 	for (const enum sa_protocol *p = proto; *p != SA_NO_PROTOCOL; ++p)
787 		sa_commit_shares(*p);
788 }
789 
790 void
791 zfs_truncate_shares(const enum sa_protocol *proto)
792 {
793 	if (proto == NULL)
794 		proto = share_all_proto;
795 
796 	for (const enum sa_protocol *p = proto; *p != SA_NO_PROTOCOL; ++p)
797 		sa_truncate_shares(*p);
798 }
799 
800 /*
801  * Unshare the given filesystem.
802  */
803 int
804 zfs_unshare(zfs_handle_t *zhp, const char *mountpoint,
805     const enum sa_protocol *proto)
806 {
807 	libzfs_handle_t *hdl = zhp->zfs_hdl;
808 	struct mnttab entry;
809 
810 	if (proto == NULL)
811 		proto = share_all_proto;
812 
813 	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
814 	    libzfs_mnttab_find(hdl, zfs_get_name(zhp), &entry) == 0)) {
815 
816 		/* check to see if need to unmount the filesystem */
817 		const char *mntpt = mountpoint ?: entry.mnt_mountp;
818 
819 		for (const enum sa_protocol *curr_proto = proto;
820 		    *curr_proto != SA_NO_PROTOCOL; curr_proto++)
821 			if (sa_is_shared(mntpt, *curr_proto) &&
822 			    unshare_one(hdl, zhp->zfs_name,
823 			    mntpt, *curr_proto) != 0)
824 					return (-1);
825 	}
826 
827 	return (0);
828 }
829 
830 /*
831  * Same as zfs_unmountall(), but for NFS and SMB unshares.
832  */
833 int
834 zfs_unshareall(zfs_handle_t *zhp, const enum sa_protocol *proto)
835 {
836 	prop_changelist_t *clp;
837 	int ret;
838 
839 	if (proto == NULL)
840 		proto = share_all_proto;
841 
842 	clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0, 0);
843 	if (clp == NULL)
844 		return (-1);
845 
846 	ret = changelist_unshare(clp, proto);
847 	changelist_free(clp);
848 
849 	return (ret);
850 }
851 
852 /*
853  * Remove the mountpoint associated with the current dataset, if necessary.
854  * We only remove the underlying directory if:
855  *
856  *	- The mountpoint is not 'none' or 'legacy'
857  *	- The mountpoint is non-empty
858  *	- The mountpoint is the default or inherited
859  *	- The 'zoned' property is set, or we're in a local zone
860  *
861  * Any other directories we leave alone.
862  */
863 void
864 remove_mountpoint(zfs_handle_t *zhp)
865 {
866 	char mountpoint[ZFS_MAXPROPLEN];
867 	zprop_source_t source;
868 
869 	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint),
870 	    &source, 0))
871 		return;
872 
873 	if (source == ZPROP_SRC_DEFAULT ||
874 	    source == ZPROP_SRC_INHERITED) {
875 		/*
876 		 * Try to remove the directory, silently ignoring any errors.
877 		 * The filesystem may have since been removed or moved around,
878 		 * and this error isn't really useful to the administrator in
879 		 * any way.
880 		 */
881 		(void) rmdir(mountpoint);
882 	}
883 }
884 
885 /*
886  * Add the given zfs handle to the cb_handles array, dynamically reallocating
887  * the array if it is out of space.
888  */
889 void
890 libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp)
891 {
892 	if (cbp->cb_alloc == cbp->cb_used) {
893 		size_t newsz;
894 		zfs_handle_t **newhandles;
895 
896 		newsz = cbp->cb_alloc != 0 ? cbp->cb_alloc * 2 : 64;
897 		newhandles = zfs_realloc(zhp->zfs_hdl,
898 		    cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *),
899 		    newsz * sizeof (zfs_handle_t *));
900 		cbp->cb_handles = newhandles;
901 		cbp->cb_alloc = newsz;
902 	}
903 	cbp->cb_handles[cbp->cb_used++] = zhp;
904 }
905 
906 /*
907  * Recursive helper function used during file system enumeration
908  */
909 static int
910 zfs_iter_cb(zfs_handle_t *zhp, void *data)
911 {
912 	get_all_cb_t *cbp = data;
913 
914 	if (!(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM)) {
915 		zfs_close(zhp);
916 		return (0);
917 	}
918 
919 	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_NOAUTO) {
920 		zfs_close(zhp);
921 		return (0);
922 	}
923 
924 	if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
925 	    ZFS_KEYSTATUS_UNAVAILABLE) {
926 		zfs_close(zhp);
927 		return (0);
928 	}
929 
930 	/*
931 	 * If this filesystem is inconsistent and has a receive resume
932 	 * token, we can not mount it.
933 	 */
934 	if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&
935 	    zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
936 	    NULL, 0, NULL, NULL, 0, B_TRUE) == 0) {
937 		zfs_close(zhp);
938 		return (0);
939 	}
940 
941 	libzfs_add_handle(cbp, zhp);
942 	if (zfs_iter_filesystems_v2(zhp, 0, zfs_iter_cb, cbp) != 0) {
943 		zfs_close(zhp);
944 		return (-1);
945 	}
946 	return (0);
947 }
948 
949 /*
950  * Sort comparator that compares two mountpoint paths. We sort these paths so
951  * that subdirectories immediately follow their parents. This means that we
952  * effectively treat the '/' character as the lowest value non-nul char.
953  * Since filesystems from non-global zones can have the same mountpoint
954  * as other filesystems, the comparator sorts global zone filesystems to
955  * the top of the list. This means that the global zone will traverse the
956  * filesystem list in the correct order and can stop when it sees the
957  * first zoned filesystem. In a non-global zone, only the delegated
958  * filesystems are seen.
959  *
960  * An example sorted list using this comparator would look like:
961  *
962  * /foo
963  * /foo/bar
964  * /foo/bar/baz
965  * /foo/baz
966  * /foo.bar
967  * /foo (NGZ1)
968  * /foo (NGZ2)
969  *
970  * The mounting code depends on this ordering to deterministically iterate
971  * over filesystems in order to spawn parallel mount tasks.
972  */
973 static int
974 mountpoint_cmp(const void *arga, const void *argb)
975 {
976 	zfs_handle_t *const *zap = arga;
977 	zfs_handle_t *za = *zap;
978 	zfs_handle_t *const *zbp = argb;
979 	zfs_handle_t *zb = *zbp;
980 	char mounta[MAXPATHLEN];
981 	char mountb[MAXPATHLEN];
982 	const char *a = mounta;
983 	const char *b = mountb;
984 	boolean_t gota, gotb;
985 	uint64_t zoneda, zonedb;
986 
987 	zoneda = zfs_prop_get_int(za, ZFS_PROP_ZONED);
988 	zonedb = zfs_prop_get_int(zb, ZFS_PROP_ZONED);
989 	if (zoneda && !zonedb)
990 		return (1);
991 	if (!zoneda && zonedb)
992 		return (-1);
993 
994 	gota = (zfs_get_type(za) == ZFS_TYPE_FILESYSTEM);
995 	if (gota) {
996 		verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta,
997 		    sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
998 	}
999 	gotb = (zfs_get_type(zb) == ZFS_TYPE_FILESYSTEM);
1000 	if (gotb) {
1001 		verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb,
1002 		    sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
1003 	}
1004 
1005 	if (gota && gotb) {
1006 		while (*a != '\0' && (*a == *b)) {
1007 			a++;
1008 			b++;
1009 		}
1010 		if (*a == *b)
1011 			return (0);
1012 		if (*a == '\0')
1013 			return (-1);
1014 		if (*b == '\0')
1015 			return (1);
1016 		if (*a == '/')
1017 			return (-1);
1018 		if (*b == '/')
1019 			return (1);
1020 		return (*a < *b ? -1 : *a > *b);
1021 	}
1022 
1023 	if (gota)
1024 		return (-1);
1025 	if (gotb)
1026 		return (1);
1027 
1028 	/*
1029 	 * If neither filesystem has a mountpoint, revert to sorting by
1030 	 * dataset name.
1031 	 */
1032 	return (strcmp(zfs_get_name(za), zfs_get_name(zb)));
1033 }
1034 
1035 /*
1036  * Return true if path2 is a child of path1 or path2 equals path1 or
1037  * path1 is "/" (path2 is always a child of "/").
1038  */
1039 static boolean_t
1040 libzfs_path_contains(const char *path1, const char *path2)
1041 {
1042 	return (strcmp(path1, path2) == 0 || strcmp(path1, "/") == 0 ||
1043 	    (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/'));
1044 }
1045 
1046 /*
1047  * Given a mountpoint specified by idx in the handles array, find the first
1048  * non-descendent of that mountpoint and return its index. Descendant paths
1049  * start with the parent's path. This function relies on the ordering
1050  * enforced by mountpoint_cmp().
1051  */
1052 static int
1053 non_descendant_idx(zfs_handle_t **handles, size_t num_handles, int idx)
1054 {
1055 	char parent[ZFS_MAXPROPLEN];
1056 	char child[ZFS_MAXPROPLEN];
1057 	int i;
1058 
1059 	verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent,
1060 	    sizeof (parent), NULL, NULL, 0, B_FALSE) == 0);
1061 
1062 	for (i = idx + 1; i < num_handles; i++) {
1063 		verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child,
1064 		    sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
1065 		if (!libzfs_path_contains(parent, child))
1066 			break;
1067 	}
1068 	return (i);
1069 }
1070 
1071 typedef struct mnt_param {
1072 	libzfs_handle_t	*mnt_hdl;
1073 	tpool_t		*mnt_tp;
1074 	zfs_handle_t	**mnt_zhps; /* filesystems to mount */
1075 	size_t		mnt_num_handles;
1076 	int		mnt_idx;	/* Index of selected entry to mount */
1077 	zfs_iter_f	mnt_func;
1078 	void		*mnt_data;
1079 } mnt_param_t;
1080 
1081 /*
1082  * Allocate and populate the parameter struct for mount function, and
1083  * schedule mounting of the entry selected by idx.
1084  */
1085 static void
1086 zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles,
1087     size_t num_handles, int idx, zfs_iter_f func, void *data, tpool_t *tp)
1088 {
1089 	mnt_param_t *mnt_param = zfs_alloc(hdl, sizeof (mnt_param_t));
1090 
1091 	mnt_param->mnt_hdl = hdl;
1092 	mnt_param->mnt_tp = tp;
1093 	mnt_param->mnt_zhps = handles;
1094 	mnt_param->mnt_num_handles = num_handles;
1095 	mnt_param->mnt_idx = idx;
1096 	mnt_param->mnt_func = func;
1097 	mnt_param->mnt_data = data;
1098 
1099 	if (tpool_dispatch(tp, zfs_mount_task, (void*)mnt_param)) {
1100 		/* Could not dispatch to thread pool; execute directly */
1101 		zfs_mount_task((void*)mnt_param);
1102 	}
1103 }
1104 
1105 /*
1106  * This is the structure used to keep state of mounting or sharing operations
1107  * during a call to zpool_enable_datasets().
1108  */
1109 typedef struct mount_state {
1110 	/*
1111 	 * ms_mntstatus is set to -1 if any mount fails. While multiple threads
1112 	 * could update this variable concurrently, no synchronization is
1113 	 * needed as it's only ever set to -1.
1114 	 */
1115 	int		ms_mntstatus;
1116 	int		ms_mntflags;
1117 	const char	*ms_mntopts;
1118 } mount_state_t;
1119 
1120 static int
1121 zfs_mount_one(zfs_handle_t *zhp, void *arg)
1122 {
1123 	mount_state_t *ms = arg;
1124 	int ret = 0;
1125 
1126 	/*
1127 	 * don't attempt to mount encrypted datasets with
1128 	 * unloaded keys
1129 	 */
1130 	if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
1131 	    ZFS_KEYSTATUS_UNAVAILABLE)
1132 		return (0);
1133 
1134 	if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0)
1135 		ret = ms->ms_mntstatus = -1;
1136 	return (ret);
1137 }
1138 
1139 static int
1140 zfs_share_one(zfs_handle_t *zhp, void *arg)
1141 {
1142 	mount_state_t *ms = arg;
1143 	int ret = 0;
1144 
1145 	if (zfs_share(zhp, NULL) != 0)
1146 		ret = ms->ms_mntstatus = -1;
1147 	return (ret);
1148 }
1149 
1150 /*
1151  * Thread pool function to mount one file system. On completion, it finds and
1152  * schedules its children to be mounted. This depends on the sorting done in
1153  * zfs_foreach_mountpoint(). Note that the degenerate case (chain of entries
1154  * each descending from the previous) will have no parallelism since we always
1155  * have to wait for the parent to finish mounting before we can schedule
1156  * its children.
1157  */
1158 static void
1159 zfs_mount_task(void *arg)
1160 {
1161 	mnt_param_t *mp = arg;
1162 	int idx = mp->mnt_idx;
1163 	zfs_handle_t **handles = mp->mnt_zhps;
1164 	size_t num_handles = mp->mnt_num_handles;
1165 	char mountpoint[ZFS_MAXPROPLEN];
1166 
1167 	verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, mountpoint,
1168 	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
1169 
1170 	if (mp->mnt_func(handles[idx], mp->mnt_data) != 0)
1171 		goto out;
1172 
1173 	/*
1174 	 * We dispatch tasks to mount filesystems with mountpoints underneath
1175 	 * this one. We do this by dispatching the next filesystem with a
1176 	 * descendant mountpoint of the one we just mounted, then skip all of
1177 	 * its descendants, dispatch the next descendant mountpoint, and so on.
1178 	 * The non_descendant_idx() function skips over filesystems that are
1179 	 * descendants of the filesystem we just dispatched.
1180 	 */
1181 	for (int i = idx + 1; i < num_handles;
1182 	    i = non_descendant_idx(handles, num_handles, i)) {
1183 		char child[ZFS_MAXPROPLEN];
1184 		verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT,
1185 		    child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
1186 
1187 		if (!libzfs_path_contains(mountpoint, child))
1188 			break; /* not a descendant, return */
1189 		zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i,
1190 		    mp->mnt_func, mp->mnt_data, mp->mnt_tp);
1191 	}
1192 
1193 out:
1194 	free(mp);
1195 }
1196 
1197 /*
1198  * Issue the func callback for each ZFS handle contained in the handles
1199  * array. This function is used to mount all datasets, and so this function
1200  * guarantees that filesystems for parent mountpoints are called before their
1201  * children. As such, before issuing any callbacks, we first sort the array
1202  * of handles by mountpoint.
1203  *
1204  * Callbacks are issued in one of two ways:
1205  *
1206  * 1. Sequentially: If the nthr argument is <= 1 or the ZFS_SERIAL_MOUNT
1207  *    environment variable is set, then we issue callbacks sequentially.
1208  *
1209  * 2. In parallel: If the nthr argument is > 1 and the ZFS_SERIAL_MOUNT
1210  *    environment variable is not set, then we use a tpool to dispatch threads
1211  *    to mount filesystems in parallel. This function dispatches tasks to mount
1212  *    the filesystems at the top-level mountpoints, and these tasks in turn
1213  *    are responsible for recursively mounting filesystems in their children
1214  *    mountpoints.  The value of the nthr argument will be the number of worker
1215  *    threads for the thread pool.
1216  */
1217 void
1218 zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
1219     size_t num_handles, zfs_iter_f func, void *data, uint_t nthr)
1220 {
1221 	zoneid_t zoneid = getzoneid();
1222 
1223 	/*
1224 	 * The ZFS_SERIAL_MOUNT environment variable is an undocumented
1225 	 * variable that can be used as a convenience to do a/b comparison
1226 	 * of serial vs. parallel mounting.
1227 	 */
1228 	boolean_t serial_mount = nthr <= 1 ||
1229 	    (getenv("ZFS_SERIAL_MOUNT") != NULL);
1230 
1231 	/*
1232 	 * Sort the datasets by mountpoint. See mountpoint_cmp for details
1233 	 * of how these are sorted.
1234 	 */
1235 	qsort(handles, num_handles, sizeof (zfs_handle_t *), mountpoint_cmp);
1236 
1237 	if (serial_mount) {
1238 		for (int i = 0; i < num_handles; i++) {
1239 			func(handles[i], data);
1240 		}
1241 		return;
1242 	}
1243 
1244 	/*
1245 	 * Issue the callback function for each dataset using a parallel
1246 	 * algorithm that uses a thread pool to manage threads.
1247 	 */
1248 	tpool_t *tp = tpool_create(1, nthr, 0, NULL);
1249 
1250 	/*
1251 	 * There may be multiple "top level" mountpoints outside of the pool's
1252 	 * root mountpoint, e.g.: /foo /bar. Dispatch a mount task for each of
1253 	 * these.
1254 	 */
1255 	for (int i = 0; i < num_handles;
1256 	    i = non_descendant_idx(handles, num_handles, i)) {
1257 		/*
1258 		 * Since the mountpoints have been sorted so that the zoned
1259 		 * filesystems are at the end, a zoned filesystem seen from
1260 		 * the global zone means that we're done.
1261 		 */
1262 		if (zoneid == GLOBAL_ZONEID &&
1263 		    zfs_prop_get_int(handles[i], ZFS_PROP_ZONED))
1264 			break;
1265 		zfs_dispatch_mount(hdl, handles, num_handles, i, func, data,
1266 		    tp);
1267 	}
1268 
1269 	tpool_wait(tp);	/* wait for all scheduled mounts to complete */
1270 	tpool_destroy(tp);
1271 }
1272 
1273 /*
1274  * Mount and share all datasets within the given pool.  This assumes that no
1275  * datasets within the pool are currently mounted.  nthr will be number of
1276  * worker threads to use while mounting datasets.
1277  */
1278 int
1279 zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags,
1280     uint_t nthr)
1281 {
1282 	get_all_cb_t cb = { 0 };
1283 	mount_state_t ms = { 0 };
1284 	zfs_handle_t *zfsp;
1285 	int ret = 0;
1286 
1287 	if ((zfsp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
1288 	    ZFS_TYPE_DATASET)) == NULL)
1289 		goto out;
1290 
1291 	/*
1292 	 * Gather all non-snapshot datasets within the pool. Start by adding
1293 	 * the root filesystem for this pool to the list, and then iterate
1294 	 * over all child filesystems.
1295 	 */
1296 	libzfs_add_handle(&cb, zfsp);
1297 	if (zfs_iter_filesystems_v2(zfsp, 0, zfs_iter_cb, &cb) != 0)
1298 		goto out;
1299 
1300 	/*
1301 	 * Mount all filesystems
1302 	 */
1303 	ms.ms_mntopts = mntopts;
1304 	ms.ms_mntflags = flags;
1305 	zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
1306 	    zfs_mount_one, &ms, nthr);
1307 	if (ms.ms_mntstatus != 0)
1308 		ret = EZFS_MOUNTFAILED;
1309 
1310 	/*
1311 	 * Share all filesystems that need to be shared. This needs to be
1312 	 * a separate pass because libshare is not mt-safe, and so we need
1313 	 * to share serially.
1314 	 */
1315 	ms.ms_mntstatus = 0;
1316 	zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
1317 	    zfs_share_one, &ms, 1);
1318 	if (ms.ms_mntstatus != 0)
1319 		ret = EZFS_SHAREFAILED;
1320 	else
1321 		zfs_commit_shares(NULL);
1322 
1323 out:
1324 	for (int i = 0; i < cb.cb_used; i++)
1325 		zfs_close(cb.cb_handles[i]);
1326 	free(cb.cb_handles);
1327 
1328 	return (ret);
1329 }
1330 
1331 struct sets_s {
1332 	char *mountpoint;
1333 	zfs_handle_t *dataset;
1334 };
1335 
1336 static int
1337 mountpoint_compare(const void *a, const void *b)
1338 {
1339 	const struct sets_s *mounta = (struct sets_s *)a;
1340 	const struct sets_s *mountb = (struct sets_s *)b;
1341 
1342 	return (strcmp(mountb->mountpoint, mounta->mountpoint));
1343 }
1344 
1345 /*
1346  * Unshare and unmount all datasets within the given pool.  We don't want to
1347  * rely on traversing the DSL to discover the filesystems within the pool,
1348  * because this may be expensive (if not all of them are mounted), and can fail
1349  * arbitrarily (on I/O error, for example).  Instead, we walk /proc/self/mounts
1350  * and gather all the filesystems that are currently mounted.
1351  */
1352 int
1353 zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
1354 {
1355 	int used, alloc;
1356 	FILE *mnttab;
1357 	struct mnttab entry;
1358 	size_t namelen;
1359 	struct sets_s *sets = NULL;
1360 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1361 	int i;
1362 	int ret = -1;
1363 	int flags = (force ? MS_FORCE : 0);
1364 
1365 	namelen = strlen(zhp->zpool_name);
1366 
1367 	if ((mnttab = fopen(MNTTAB, "re")) == NULL)
1368 		return (ENOENT);
1369 
1370 	used = alloc = 0;
1371 	while (getmntent(mnttab, &entry) == 0) {
1372 		/*
1373 		 * Ignore non-ZFS entries.
1374 		 */
1375 		if (entry.mnt_fstype == NULL ||
1376 		    strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
1377 			continue;
1378 
1379 		/*
1380 		 * Ignore filesystems not within this pool.
1381 		 */
1382 		if (entry.mnt_mountp == NULL ||
1383 		    strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 ||
1384 		    (entry.mnt_special[namelen] != '/' &&
1385 		    entry.mnt_special[namelen] != '\0'))
1386 			continue;
1387 
1388 		/*
1389 		 * At this point we've found a filesystem within our pool.  Add
1390 		 * it to our growing list.
1391 		 */
1392 		if (used == alloc) {
1393 			if (alloc == 0) {
1394 				sets = zfs_alloc(hdl,
1395 				    8 * sizeof (struct sets_s));
1396 				alloc = 8;
1397 			} else {
1398 				sets = zfs_realloc(hdl, sets,
1399 				    alloc * sizeof (struct sets_s),
1400 				    alloc * 2 * sizeof (struct sets_s));
1401 
1402 				alloc *= 2;
1403 			}
1404 		}
1405 
1406 		sets[used].mountpoint = zfs_strdup(hdl, entry.mnt_mountp);
1407 
1408 		/*
1409 		 * This is allowed to fail, in case there is some I/O error.  It
1410 		 * is only used to determine if we need to remove the underlying
1411 		 * mountpoint, so failure is not fatal.
1412 		 */
1413 		sets[used].dataset = make_dataset_handle(hdl,
1414 		    entry.mnt_special);
1415 
1416 		used++;
1417 	}
1418 
1419 	/*
1420 	 * At this point, we have the entire list of filesystems, so sort it by
1421 	 * mountpoint.
1422 	 */
1423 	if (used != 0)
1424 		qsort(sets, used, sizeof (struct sets_s), mountpoint_compare);
1425 
1426 	/*
1427 	 * Walk through and first unshare everything.
1428 	 */
1429 	for (i = 0; i < used; i++) {
1430 		for (enum sa_protocol p = 0; p < SA_PROTOCOL_COUNT; ++p) {
1431 			if (sa_is_shared(sets[i].mountpoint, p) &&
1432 			    unshare_one(hdl, sets[i].mountpoint,
1433 			    sets[i].mountpoint, p) != 0)
1434 				goto out;
1435 		}
1436 	}
1437 	zfs_commit_shares(NULL);
1438 
1439 	/*
1440 	 * Now unmount everything, removing the underlying directories as
1441 	 * appropriate.
1442 	 */
1443 	for (i = 0; i < used; i++) {
1444 		if (unmount_one(sets[i].dataset, sets[i].mountpoint,
1445 		    flags) != 0)
1446 			goto out;
1447 	}
1448 
1449 	for (i = 0; i < used; i++) {
1450 		if (sets[i].dataset)
1451 			remove_mountpoint(sets[i].dataset);
1452 	}
1453 
1454 	zpool_disable_datasets_os(zhp, force);
1455 
1456 	ret = 0;
1457 out:
1458 	(void) fclose(mnttab);
1459 	for (i = 0; i < used; i++) {
1460 		if (sets[i].dataset)
1461 			zfs_close(sets[i].dataset);
1462 		free(sets[i].mountpoint);
1463 	}
1464 	free(sets);
1465 
1466 	return (ret);
1467 }
1468