xref: /illumos-gate/usr/src/lib/libzfs/common/libzfs_mount.c (revision 9164a50bf932130cbb5097a16f6986873ce0e6e5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * Copyright 2019 Nexenta Systems, Inc.
28  * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
29  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
30  * Copyright 2017 Joyent, Inc.
31  * Copyright 2017 RackTop Systems.
32  * Copyright 2018 OmniOS Community Edition (OmniOSce) Association.
33  */
34 
35 /*
36  * Routines to manage ZFS mounts.  We separate all the nasty routines that have
37  * to deal with the OS.  The following functions are the main entry points --
38  * they are used by mount and unmount and when changing a filesystem's
39  * mountpoint.
40  *
41  *	zfs_is_mounted()
42  *	zfs_mount()
43  *	zfs_unmount()
44  *	zfs_unmountall()
45  *
46  * This file also contains the functions used to manage sharing filesystems via
47  * NFS and iSCSI:
48  *
49  *	zfs_is_shared()
50  *	zfs_share()
51  *	zfs_unshare()
52  *
53  *	zfs_is_shared_nfs()
54  *	zfs_is_shared_smb()
55  *	zfs_share_proto()
56  *	zfs_shareall();
57  *	zfs_unshare_nfs()
58  *	zfs_unshare_smb()
59  *	zfs_unshareall_nfs()
60  *	zfs_unshareall_smb()
61  *	zfs_unshareall()
62  *	zfs_unshareall_bypath()
63  *
64  * The following functions are available for pool consumers, and will
65  * mount/unmount and share/unshare all datasets within pool:
66  *
67  *	zpool_enable_datasets()
68  *	zpool_disable_datasets()
69  */
70 
71 #include <dirent.h>
72 #include <dlfcn.h>
73 #include <errno.h>
74 #include <fcntl.h>
75 #include <libgen.h>
76 #include <libintl.h>
77 #include <stdio.h>
78 #include <stdlib.h>
79 #include <strings.h>
80 #include <unistd.h>
81 #include <zone.h>
82 #include <sys/mntent.h>
83 #include <sys/mount.h>
84 #include <sys/stat.h>
85 #include <sys/statvfs.h>
86 #include <sys/dsl_crypt.h>
87 
88 #include <libzfs.h>
89 
90 #include "libzfs_impl.h"
91 #include "libzfs_taskq.h"
92 
93 #include <libshare.h>
94 #include <sys/systeminfo.h>
95 #define	MAXISALEN	257	/* based on sysinfo(2) man page */
96 
97 static int mount_tq_nthr = 512;	/* taskq threads for multi-threaded mounting */
98 
99 static void zfs_mount_task(void *);
100 static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
101 zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
102     zfs_share_proto_t);
103 
104 /*
105  * The share protocols table must be in the same order as the zfs_share_proto_t
106  * enum in libzfs_impl.h
107  */
108 typedef struct {
109 	zfs_prop_t p_prop;
110 	char *p_name;
111 	int p_share_err;
112 	int p_unshare_err;
113 } proto_table_t;
114 
115 proto_table_t proto_table[PROTO_END] = {
116 	{ZFS_PROP_SHARENFS, "nfs", EZFS_SHARENFSFAILED, EZFS_UNSHARENFSFAILED},
117 	{ZFS_PROP_SHARESMB, "smb", EZFS_SHARESMBFAILED, EZFS_UNSHARESMBFAILED},
118 };
119 
120 zfs_share_proto_t nfs_only[] = {
121 	PROTO_NFS,
122 	PROTO_END
123 };
124 
125 zfs_share_proto_t smb_only[] = {
126 	PROTO_SMB,
127 	PROTO_END
128 };
129 zfs_share_proto_t share_all_proto[] = {
130 	PROTO_NFS,
131 	PROTO_SMB,
132 	PROTO_END
133 };
134 
135 /*
136  * Search the sharetab for the given mountpoint and protocol, returning
137  * a zfs_share_type_t value.
138  */
139 static zfs_share_type_t
140 is_shared(libzfs_handle_t *hdl, const char *mountpoint, zfs_share_proto_t proto)
141 {
142 	char buf[MAXPATHLEN], *tab;
143 	char *ptr;
144 
145 	if (hdl->libzfs_sharetab == NULL)
146 		return (SHARED_NOT_SHARED);
147 
148 	(void) fseek(hdl->libzfs_sharetab, 0, SEEK_SET);
149 
150 	while (fgets(buf, sizeof (buf), hdl->libzfs_sharetab) != NULL) {
151 
152 		/* the mountpoint is the first entry on each line */
153 		if ((tab = strchr(buf, '\t')) == NULL)
154 			continue;
155 
156 		*tab = '\0';
157 		if (strcmp(buf, mountpoint) == 0) {
158 			/*
159 			 * the protocol field is the third field
160 			 * skip over second field
161 			 */
162 			ptr = ++tab;
163 			if ((tab = strchr(ptr, '\t')) == NULL)
164 				continue;
165 			ptr = ++tab;
166 			if ((tab = strchr(ptr, '\t')) == NULL)
167 				continue;
168 			*tab = '\0';
169 			if (strcmp(ptr,
170 			    proto_table[proto].p_name) == 0) {
171 				switch (proto) {
172 				case PROTO_NFS:
173 					return (SHARED_NFS);
174 				case PROTO_SMB:
175 					return (SHARED_SMB);
176 				default:
177 					return (0);
178 				}
179 			}
180 		}
181 	}
182 
183 	return (SHARED_NOT_SHARED);
184 }
185 
186 static boolean_t
187 dir_is_empty_stat(const char *dirname)
188 {
189 	struct stat st;
190 
191 	/*
192 	 * We only want to return false if the given path is a non empty
193 	 * directory, all other errors are handled elsewhere.
194 	 */
195 	if (stat(dirname, &st) < 0 || !S_ISDIR(st.st_mode)) {
196 		return (B_TRUE);
197 	}
198 
199 	/*
200 	 * An empty directory will still have two entries in it, one
201 	 * entry for each of "." and "..".
202 	 */
203 	if (st.st_size > 2) {
204 		return (B_FALSE);
205 	}
206 
207 	return (B_TRUE);
208 }
209 
210 static boolean_t
211 dir_is_empty_readdir(const char *dirname)
212 {
213 	DIR *dirp;
214 	struct dirent64 *dp;
215 	int dirfd;
216 
217 	if ((dirfd = openat(AT_FDCWD, dirname,
218 	    O_RDONLY | O_NDELAY | O_LARGEFILE | O_CLOEXEC, 0)) < 0) {
219 		return (B_TRUE);
220 	}
221 
222 	if ((dirp = fdopendir(dirfd)) == NULL) {
223 		(void) close(dirfd);
224 		return (B_TRUE);
225 	}
226 
227 	while ((dp = readdir64(dirp)) != NULL) {
228 
229 		if (strcmp(dp->d_name, ".") == 0 ||
230 		    strcmp(dp->d_name, "..") == 0)
231 			continue;
232 
233 		(void) closedir(dirp);
234 		return (B_FALSE);
235 	}
236 
237 	(void) closedir(dirp);
238 	return (B_TRUE);
239 }
240 
241 /*
242  * Returns true if the specified directory is empty.  If we can't open the
243  * directory at all, return true so that the mount can fail with a more
244  * informative error message.
245  */
246 static boolean_t
247 dir_is_empty(const char *dirname)
248 {
249 	struct statvfs64 st;
250 
251 	/*
252 	 * If the statvfs call fails or the filesystem is not a ZFS
253 	 * filesystem, fall back to the slow path which uses readdir.
254 	 */
255 	if ((statvfs64(dirname, &st) != 0) ||
256 	    (strcmp(st.f_basetype, "zfs") != 0)) {
257 		return (dir_is_empty_readdir(dirname));
258 	}
259 
260 	/*
261 	 * At this point, we know the provided path is on a ZFS
262 	 * filesystem, so we can use stat instead of readdir to
263 	 * determine if the directory is empty or not. We try to avoid
264 	 * using readdir because that requires opening "dirname"; this
265 	 * open file descriptor can potentially end up in a child
266 	 * process if there's a concurrent fork, thus preventing the
267 	 * zfs_mount() from otherwise succeeding (the open file
268 	 * descriptor inherited by the child process will cause the
269 	 * parent's mount to fail with EBUSY). The performance
270 	 * implications of replacing the open, read, and close with a
271 	 * single stat is nice; but is not the main motivation for the
272 	 * added complexity.
273 	 */
274 	return (dir_is_empty_stat(dirname));
275 }
276 
277 /*
278  * Checks to see if the mount is active.  If the filesystem is mounted, we fill
279  * in 'where' with the current mountpoint, and return 1.  Otherwise, we return
280  * 0.
281  */
282 boolean_t
283 is_mounted(libzfs_handle_t *zfs_hdl, const char *special, char **where)
284 {
285 	struct mnttab entry;
286 
287 	if (libzfs_mnttab_find(zfs_hdl, special, &entry) != 0)
288 		return (B_FALSE);
289 
290 	if (where != NULL)
291 		*where = zfs_strdup(zfs_hdl, entry.mnt_mountp);
292 
293 	return (B_TRUE);
294 }
295 
296 boolean_t
297 zfs_is_mounted(zfs_handle_t *zhp, char **where)
298 {
299 	return (is_mounted(zhp->zfs_hdl, zfs_get_name(zhp), where));
300 }
301 
302 /*
303  * Returns true if the given dataset is mountable, false otherwise.  Returns the
304  * mountpoint in 'buf'.
305  */
306 static boolean_t
307 zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,
308     zprop_source_t *source)
309 {
310 	char sourceloc[MAXNAMELEN];
311 	zprop_source_t sourcetype;
312 
313 	if (!zfs_prop_valid_for_type(ZFS_PROP_MOUNTPOINT, zhp->zfs_type,
314 	    B_FALSE))
315 		return (B_FALSE);
316 
317 	verify(zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, buf, buflen,
318 	    &sourcetype, sourceloc, sizeof (sourceloc), B_FALSE) == 0);
319 
320 	if (strcmp(buf, ZFS_MOUNTPOINT_NONE) == 0 ||
321 	    strcmp(buf, ZFS_MOUNTPOINT_LEGACY) == 0)
322 		return (B_FALSE);
323 
324 	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_OFF)
325 		return (B_FALSE);
326 
327 	if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
328 	    getzoneid() == GLOBAL_ZONEID)
329 		return (B_FALSE);
330 
331 	if (source)
332 		*source = sourcetype;
333 
334 	return (B_TRUE);
335 }
336 
337 /*
338  * Mount the given filesystem.
339  */
340 int
341 zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
342 {
343 	struct stat buf;
344 	char mountpoint[ZFS_MAXPROPLEN];
345 	char mntopts[MNT_LINE_MAX];
346 	libzfs_handle_t *hdl = zhp->zfs_hdl;
347 	uint64_t keystatus;
348 	int rc;
349 
350 	if (options == NULL)
351 		mntopts[0] = '\0';
352 	else
353 		(void) strlcpy(mntopts, options, sizeof (mntopts));
354 
355 	/*
356 	 * If the pool is imported read-only then all mounts must be read-only
357 	 */
358 	if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL))
359 		flags |= MS_RDONLY;
360 
361 	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
362 		return (0);
363 
364 	/*
365 	 * If the filesystem is encrypted the key must be loaded  in order to
366 	 * mount. If the key isn't loaded, the MS_CRYPT flag decides whether
367 	 * or not we attempt to load the keys. Note: we must call
368 	 * zfs_refresh_properties() here since some callers of this function
369 	 * (most notably zpool_enable_datasets()) may implicitly load our key
370 	 * by loading the parent's key first.
371 	 */
372 	if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
373 		zfs_refresh_properties(zhp);
374 		keystatus = zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS);
375 
376 		/*
377 		 * If the key is unavailable and MS_CRYPT is set give the
378 		 * user a chance to enter the key. Otherwise just fail
379 		 * immediately.
380 		 */
381 		if (keystatus == ZFS_KEYSTATUS_UNAVAILABLE) {
382 			if (flags & MS_CRYPT) {
383 				rc = zfs_crypto_load_key(zhp, B_FALSE, NULL);
384 				if (rc != 0)
385 					return (rc);
386 			} else {
387 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
388 				    "encryption key not loaded"));
389 				return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
390 				    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
391 				    mountpoint));
392 			}
393 		}
394 
395 	}
396 
397 	/* Create the directory if it doesn't already exist */
398 	if (lstat(mountpoint, &buf) != 0) {
399 		if (mkdirp(mountpoint, 0755) != 0) {
400 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
401 			    "failed to create mountpoint"));
402 			return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
403 			    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
404 			    mountpoint));
405 		}
406 	}
407 
408 	/*
409 	 * Determine if the mountpoint is empty.  If so, refuse to perform the
410 	 * mount.  We don't perform this check if MS_OVERLAY is specified, which
411 	 * would defeat the point.  We also avoid this check if 'remount' is
412 	 * specified.
413 	 */
414 	if ((flags & MS_OVERLAY) == 0 &&
415 	    strstr(mntopts, MNTOPT_REMOUNT) == NULL &&
416 	    !dir_is_empty(mountpoint)) {
417 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
418 		    "directory is not empty"));
419 		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
420 		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"), mountpoint));
421 	}
422 
423 	/* perform the mount */
424 	if (mount(zfs_get_name(zhp), mountpoint, MS_OPTIONSTR | flags,
425 	    MNTTYPE_ZFS, NULL, 0, mntopts, sizeof (mntopts)) != 0) {
426 		/*
427 		 * Generic errors are nasty, but there are just way too many
428 		 * from mount(), and they're well-understood.  We pick a few
429 		 * common ones to improve upon.
430 		 */
431 		if (errno == EBUSY) {
432 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
433 			    "mountpoint or dataset is busy"));
434 		} else if (errno == EPERM) {
435 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
436 			    "Insufficient privileges"));
437 		} else if (errno == ENOTSUP) {
438 			char buf[256];
439 			int spa_version;
440 
441 			VERIFY(zfs_spa_version(zhp, &spa_version) == 0);
442 			(void) snprintf(buf, sizeof (buf),
443 			    dgettext(TEXT_DOMAIN, "Can't mount a version %lld "
444 			    "file system on a version %d pool. Pool must be"
445 			    " upgraded to mount this file system."),
446 			    (u_longlong_t)zfs_prop_get_int(zhp,
447 			    ZFS_PROP_VERSION), spa_version);
448 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, buf));
449 		} else {
450 			zfs_error_aux(hdl, strerror(errno));
451 		}
452 		return (zfs_error_fmt(hdl, EZFS_MOUNTFAILED,
453 		    dgettext(TEXT_DOMAIN, "cannot mount '%s'"),
454 		    zhp->zfs_name));
455 	}
456 
457 	/* add the mounted entry into our cache */
458 	libzfs_mnttab_add(hdl, zfs_get_name(zhp), mountpoint,
459 	    mntopts);
460 	return (0);
461 }
462 
463 /*
464  * Unmount a single filesystem.
465  */
466 static int
467 unmount_one(libzfs_handle_t *hdl, const char *mountpoint, int flags)
468 {
469 	if (umount2(mountpoint, flags) != 0) {
470 		zfs_error_aux(hdl, strerror(errno));
471 		return (zfs_error_fmt(hdl, EZFS_UMOUNTFAILED,
472 		    dgettext(TEXT_DOMAIN, "cannot unmount '%s'"),
473 		    mountpoint));
474 	}
475 
476 	return (0);
477 }
478 
479 /*
480  * Unmount the given filesystem.
481  */
482 int
483 zfs_unmount(zfs_handle_t *zhp, const char *mountpoint, int flags)
484 {
485 	libzfs_handle_t *hdl = zhp->zfs_hdl;
486 	struct mnttab entry;
487 	char *mntpt = NULL;
488 
489 	/* check to see if we need to unmount the filesystem */
490 	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
491 	    libzfs_mnttab_find(hdl, zhp->zfs_name, &entry) == 0)) {
492 		/*
493 		 * mountpoint may have come from a call to
494 		 * getmnt/getmntany if it isn't NULL. If it is NULL,
495 		 * we know it comes from libzfs_mnttab_find which can
496 		 * then get freed later. We strdup it to play it safe.
497 		 */
498 		if (mountpoint == NULL)
499 			mntpt = zfs_strdup(hdl, entry.mnt_mountp);
500 		else
501 			mntpt = zfs_strdup(hdl, mountpoint);
502 
503 		/*
504 		 * Unshare and unmount the filesystem
505 		 */
506 		if (zfs_unshare_proto(zhp, mntpt, share_all_proto) != 0)
507 			return (-1);
508 
509 		if (unmount_one(hdl, mntpt, flags) != 0) {
510 			free(mntpt);
511 			(void) zfs_shareall(zhp);
512 			return (-1);
513 		}
514 		libzfs_mnttab_remove(hdl, zhp->zfs_name);
515 		free(mntpt);
516 	}
517 
518 	return (0);
519 }
520 
521 /*
522  * Unmount this filesystem and any children inheriting the mountpoint property.
523  * To do this, just act like we're changing the mountpoint property, but don't
524  * remount the filesystems afterwards.
525  */
526 int
527 zfs_unmountall(zfs_handle_t *zhp, int flags)
528 {
529 	prop_changelist_t *clp;
530 	int ret;
531 
532 	clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT, 0, flags);
533 	if (clp == NULL)
534 		return (-1);
535 
536 	ret = changelist_prefix(clp);
537 	changelist_free(clp);
538 
539 	return (ret);
540 }
541 
542 boolean_t
543 zfs_is_shared(zfs_handle_t *zhp)
544 {
545 	zfs_share_type_t rc = 0;
546 	zfs_share_proto_t *curr_proto;
547 
548 	if (ZFS_IS_VOLUME(zhp))
549 		return (B_FALSE);
550 
551 	for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
552 	    curr_proto++)
553 		rc |= zfs_is_shared_proto(zhp, NULL, *curr_proto);
554 
555 	return (rc ? B_TRUE : B_FALSE);
556 }
557 
558 int
559 zfs_share(zfs_handle_t *zhp)
560 {
561 	assert(!ZFS_IS_VOLUME(zhp));
562 	return (zfs_share_proto(zhp, share_all_proto));
563 }
564 
565 int
566 zfs_unshare(zfs_handle_t *zhp)
567 {
568 	assert(!ZFS_IS_VOLUME(zhp));
569 	return (zfs_unshareall(zhp));
570 }
571 
572 /*
573  * Check to see if the filesystem is currently shared.
574  */
575 zfs_share_type_t
576 zfs_is_shared_proto(zfs_handle_t *zhp, char **where, zfs_share_proto_t proto)
577 {
578 	char *mountpoint;
579 	zfs_share_type_t rc;
580 
581 	if (!zfs_is_mounted(zhp, &mountpoint))
582 		return (SHARED_NOT_SHARED);
583 
584 	if ((rc = is_shared(zhp->zfs_hdl, mountpoint, proto))
585 	    != SHARED_NOT_SHARED) {
586 		if (where != NULL)
587 			*where = mountpoint;
588 		else
589 			free(mountpoint);
590 		return (rc);
591 	} else {
592 		free(mountpoint);
593 		return (SHARED_NOT_SHARED);
594 	}
595 }
596 
597 boolean_t
598 zfs_is_shared_nfs(zfs_handle_t *zhp, char **where)
599 {
600 	return (zfs_is_shared_proto(zhp, where,
601 	    PROTO_NFS) != SHARED_NOT_SHARED);
602 }
603 
604 boolean_t
605 zfs_is_shared_smb(zfs_handle_t *zhp, char **where)
606 {
607 	return (zfs_is_shared_proto(zhp, where,
608 	    PROTO_SMB) != SHARED_NOT_SHARED);
609 }
610 
611 /*
612  * Make sure things will work if libshare isn't installed by using
613  * wrapper functions that check to see that the pointers to functions
614  * initialized in _zfs_init_libshare() are actually present.
615  */
616 
617 static sa_handle_t (*_sa_init)(int);
618 static sa_handle_t (*_sa_init_arg)(int, void *);
619 static int (*_sa_service)(sa_handle_t);
620 static void (*_sa_fini)(sa_handle_t);
621 static sa_share_t (*_sa_find_share)(sa_handle_t, char *);
622 static int (*_sa_enable_share)(sa_share_t, char *);
623 static int (*_sa_disable_share)(sa_share_t, char *);
624 static char *(*_sa_errorstr)(int);
625 static int (*_sa_parse_legacy_options)(sa_group_t, char *, char *);
626 static boolean_t (*_sa_needs_refresh)(sa_handle_t *);
627 static libzfs_handle_t *(*_sa_get_zfs_handle)(sa_handle_t);
628 static int (* _sa_get_zfs_share)(sa_handle_t, char *, zfs_handle_t *);
629 static void (*_sa_update_sharetab_ts)(sa_handle_t);
630 
631 /*
632  * _zfs_init_libshare()
633  *
634  * Find the libshare.so.1 entry points that we use here and save the
635  * values to be used later. This is triggered by the runtime loader.
636  * Make sure the correct ISA version is loaded.
637  */
638 
639 #pragma init(_zfs_init_libshare)
640 static void
641 _zfs_init_libshare(void)
642 {
643 	void *libshare;
644 	char path[MAXPATHLEN];
645 	char isa[MAXISALEN];
646 
647 #if defined(_LP64)
648 	if (sysinfo(SI_ARCHITECTURE_64, isa, MAXISALEN) == -1)
649 		isa[0] = '\0';
650 #else
651 	isa[0] = '\0';
652 #endif
653 	(void) snprintf(path, MAXPATHLEN,
654 	    "/usr/lib/%s/libshare.so.1", isa);
655 
656 	if ((libshare = dlopen(path, RTLD_LAZY | RTLD_GLOBAL)) != NULL) {
657 		_sa_init = (sa_handle_t (*)(int))dlsym(libshare, "sa_init");
658 		_sa_init_arg = (sa_handle_t (*)(int, void *))dlsym(libshare,
659 		    "sa_init_arg");
660 		_sa_fini = (void (*)(sa_handle_t))dlsym(libshare, "sa_fini");
661 		_sa_service = (int (*)(sa_handle_t))dlsym(libshare,
662 		    "sa_service");
663 		_sa_find_share = (sa_share_t (*)(sa_handle_t, char *))
664 		    dlsym(libshare, "sa_find_share");
665 		_sa_enable_share = (int (*)(sa_share_t, char *))dlsym(libshare,
666 		    "sa_enable_share");
667 		_sa_disable_share = (int (*)(sa_share_t, char *))dlsym(libshare,
668 		    "sa_disable_share");
669 		_sa_errorstr = (char *(*)(int))dlsym(libshare, "sa_errorstr");
670 		_sa_parse_legacy_options = (int (*)(sa_group_t, char *, char *))
671 		    dlsym(libshare, "sa_parse_legacy_options");
672 		_sa_needs_refresh = (boolean_t (*)(sa_handle_t *))
673 		    dlsym(libshare, "sa_needs_refresh");
674 		_sa_get_zfs_handle = (libzfs_handle_t *(*)(sa_handle_t))
675 		    dlsym(libshare, "sa_get_zfs_handle");
676 		_sa_get_zfs_share = (int (*)(sa_handle_t, char *,
677 		    zfs_handle_t *)) dlsym(libshare, "sa_get_zfs_share");
678 		_sa_update_sharetab_ts = (void (*)(sa_handle_t))
679 		    dlsym(libshare, "sa_update_sharetab_ts");
680 		if (_sa_init == NULL || _sa_init_arg == NULL ||
681 		    _sa_fini == NULL || _sa_find_share == NULL ||
682 		    _sa_enable_share == NULL || _sa_disable_share == NULL ||
683 		    _sa_errorstr == NULL || _sa_parse_legacy_options == NULL ||
684 		    _sa_needs_refresh == NULL || _sa_get_zfs_handle == NULL ||
685 		    _sa_get_zfs_share == NULL || _sa_service == NULL ||
686 		    _sa_update_sharetab_ts == NULL) {
687 			_sa_init = NULL;
688 			_sa_init_arg = NULL;
689 			_sa_service = NULL;
690 			_sa_fini = NULL;
691 			_sa_disable_share = NULL;
692 			_sa_enable_share = NULL;
693 			_sa_errorstr = NULL;
694 			_sa_parse_legacy_options = NULL;
695 			(void) dlclose(libshare);
696 			_sa_needs_refresh = NULL;
697 			_sa_get_zfs_handle = NULL;
698 			_sa_get_zfs_share = NULL;
699 			_sa_update_sharetab_ts = NULL;
700 		}
701 	}
702 }
703 
704 /*
705  * zfs_init_libshare(zhandle, service)
706  *
707  * Initialize the libshare API if it hasn't already been initialized.
708  * In all cases it returns 0 if it succeeded and an error if not. The
709  * service value is which part(s) of the API to initialize and is a
710  * direct map to the libshare sa_init(service) interface.
711  */
712 static int
713 zfs_init_libshare_impl(libzfs_handle_t *zhandle, int service, void *arg)
714 {
715 	/*
716 	 * libshare is either not installed or we're in a branded zone. The
717 	 * rest of the wrapper functions around the libshare calls already
718 	 * handle NULL function pointers, but we don't want the callers of
719 	 * zfs_init_libshare() to fail prematurely if libshare is not available.
720 	 */
721 	if (_sa_init == NULL)
722 		return (SA_OK);
723 
724 	/*
725 	 * Attempt to refresh libshare. This is necessary if there was a cache
726 	 * miss for a new ZFS dataset that was just created, or if state of the
727 	 * sharetab file has changed since libshare was last initialized. We
728 	 * want to make sure so check timestamps to see if a different process
729 	 * has updated any of the configuration. If there was some non-ZFS
730 	 * change, we need to re-initialize the internal cache.
731 	 */
732 	if (_sa_needs_refresh != NULL &&
733 	    _sa_needs_refresh(zhandle->libzfs_sharehdl)) {
734 		zfs_uninit_libshare(zhandle);
735 		zhandle->libzfs_sharehdl = _sa_init_arg(service, arg);
736 	}
737 
738 	if (zhandle && zhandle->libzfs_sharehdl == NULL)
739 		zhandle->libzfs_sharehdl = _sa_init_arg(service, arg);
740 
741 	if (zhandle->libzfs_sharehdl == NULL)
742 		return (SA_NO_MEMORY);
743 
744 	return (SA_OK);
745 }
746 int
747 zfs_init_libshare(libzfs_handle_t *zhandle, int service)
748 {
749 	return (zfs_init_libshare_impl(zhandle, service, NULL));
750 }
751 
752 int
753 zfs_init_libshare_arg(libzfs_handle_t *zhandle, int service, void *arg)
754 {
755 	return (zfs_init_libshare_impl(zhandle, service, arg));
756 }
757 
758 
759 /*
760  * zfs_uninit_libshare(zhandle)
761  *
762  * Uninitialize the libshare API if it hasn't already been
763  * uninitialized. It is OK to call multiple times.
764  */
765 void
766 zfs_uninit_libshare(libzfs_handle_t *zhandle)
767 {
768 	if (zhandle != NULL && zhandle->libzfs_sharehdl != NULL) {
769 		if (_sa_fini != NULL)
770 			_sa_fini(zhandle->libzfs_sharehdl);
771 		zhandle->libzfs_sharehdl = NULL;
772 	}
773 }
774 
775 /*
776  * zfs_parse_options(options, proto)
777  *
778  * Call the legacy parse interface to get the protocol specific
779  * options using the NULL arg to indicate that this is a "parse" only.
780  */
781 int
782 zfs_parse_options(char *options, zfs_share_proto_t proto)
783 {
784 	if (_sa_parse_legacy_options != NULL) {
785 		return (_sa_parse_legacy_options(NULL, options,
786 		    proto_table[proto].p_name));
787 	}
788 	return (SA_CONFIG_ERR);
789 }
790 
791 /*
792  * zfs_sa_find_share(handle, path)
793  *
794  * wrapper around sa_find_share to find a share path in the
795  * configuration.
796  */
797 static sa_share_t
798 zfs_sa_find_share(sa_handle_t handle, char *path)
799 {
800 	if (_sa_find_share != NULL)
801 		return (_sa_find_share(handle, path));
802 	return (NULL);
803 }
804 
805 /*
806  * zfs_sa_enable_share(share, proto)
807  *
808  * Wrapper for sa_enable_share which enables a share for a specified
809  * protocol.
810  */
811 static int
812 zfs_sa_enable_share(sa_share_t share, char *proto)
813 {
814 	if (_sa_enable_share != NULL)
815 		return (_sa_enable_share(share, proto));
816 	return (SA_CONFIG_ERR);
817 }
818 
819 /*
820  * zfs_sa_disable_share(share, proto)
821  *
822  * Wrapper for sa_enable_share which disables a share for a specified
823  * protocol.
824  */
825 static int
826 zfs_sa_disable_share(sa_share_t share, char *proto)
827 {
828 	if (_sa_disable_share != NULL)
829 		return (_sa_disable_share(share, proto));
830 	return (SA_CONFIG_ERR);
831 }
832 
833 /*
834  * Share the given filesystem according to the options in the specified
835  * protocol specific properties (sharenfs, sharesmb).  We rely
836  * on "libshare" to the dirty work for us.
837  */
838 static int
839 zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
840 {
841 	char mountpoint[ZFS_MAXPROPLEN];
842 	char shareopts[ZFS_MAXPROPLEN];
843 	char sourcestr[ZFS_MAXPROPLEN];
844 	libzfs_handle_t *hdl = zhp->zfs_hdl;
845 	sa_share_t share;
846 	zfs_share_proto_t *curr_proto;
847 	zprop_source_t sourcetype;
848 	int service = SA_INIT_ONE_SHARE_FROM_HANDLE;
849 	int ret;
850 
851 	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
852 		return (0);
853 
854 	/*
855 	 * Function may be called in a loop from higher up stack, with libshare
856 	 * initialized for multiple shares (SA_INIT_SHARE_API_SELECTIVE).
857 	 * zfs_init_libshare_arg will refresh the handle's cache if necessary.
858 	 * In this case we do not want to switch to per share initialization.
859 	 * Specify SA_INIT_SHARE_API to do full refresh, if refresh required.
860 	 */
861 	if ((hdl->libzfs_sharehdl != NULL) && (_sa_service != NULL) &&
862 	    (_sa_service(hdl->libzfs_sharehdl) ==
863 	    SA_INIT_SHARE_API_SELECTIVE)) {
864 		service = SA_INIT_SHARE_API;
865 	}
866 
867 	for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) {
868 		/*
869 		 * Return success if there are no share options.
870 		 */
871 		if (zfs_prop_get(zhp, proto_table[*curr_proto].p_prop,
872 		    shareopts, sizeof (shareopts), &sourcetype, sourcestr,
873 		    ZFS_MAXPROPLEN, B_FALSE) != 0 ||
874 		    strcmp(shareopts, "off") == 0)
875 			continue;
876 		ret = zfs_init_libshare_arg(hdl, service, zhp);
877 		if (ret != SA_OK) {
878 			(void) zfs_error_fmt(hdl, EZFS_SHARENFSFAILED,
879 			    dgettext(TEXT_DOMAIN, "cannot share '%s': %s"),
880 			    zfs_get_name(zhp), _sa_errorstr != NULL ?
881 			    _sa_errorstr(ret) : "");
882 			return (-1);
883 		}
884 
885 		share = zfs_sa_find_share(hdl->libzfs_sharehdl, mountpoint);
886 		if (share == NULL) {
887 			/*
888 			 * This may be a new file system that was just
889 			 * created so isn't in the internal cache.
890 			 * Rather than reloading the entire configuration,
891 			 * we can add just this one share to the cache.
892 			 */
893 			if ((_sa_get_zfs_share == NULL) ||
894 			    (_sa_get_zfs_share(hdl->libzfs_sharehdl, "zfs", zhp)
895 			    != SA_OK)) {
896 				(void) zfs_error_fmt(hdl,
897 				    proto_table[*curr_proto].p_share_err,
898 				    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
899 				    zfs_get_name(zhp));
900 				return (-1);
901 			}
902 			share = zfs_sa_find_share(hdl->libzfs_sharehdl,
903 			    mountpoint);
904 		}
905 		if (share != NULL) {
906 			int err;
907 			err = zfs_sa_enable_share(share,
908 			    proto_table[*curr_proto].p_name);
909 			if (err != SA_OK) {
910 				(void) zfs_error_fmt(hdl,
911 				    proto_table[*curr_proto].p_share_err,
912 				    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
913 				    zfs_get_name(zhp));
914 				return (-1);
915 			}
916 		} else {
917 			(void) zfs_error_fmt(hdl,
918 			    proto_table[*curr_proto].p_share_err,
919 			    dgettext(TEXT_DOMAIN, "cannot share '%s'"),
920 			    zfs_get_name(zhp));
921 			return (-1);
922 		}
923 
924 	}
925 	return (0);
926 }
927 
928 
929 int
930 zfs_share_nfs(zfs_handle_t *zhp)
931 {
932 	return (zfs_share_proto(zhp, nfs_only));
933 }
934 
935 int
936 zfs_share_smb(zfs_handle_t *zhp)
937 {
938 	return (zfs_share_proto(zhp, smb_only));
939 }
940 
941 int
942 zfs_shareall(zfs_handle_t *zhp)
943 {
944 	return (zfs_share_proto(zhp, share_all_proto));
945 }
946 
947 /*
948  * Unshare a filesystem by mountpoint.
949  */
950 static int
951 unshare_one(libzfs_handle_t *hdl, const char *name, const char *mountpoint,
952     zfs_share_proto_t proto)
953 {
954 	sa_share_t share;
955 	int err;
956 	char *mntpt;
957 	int service = SA_INIT_ONE_SHARE_FROM_NAME;
958 
959 	/*
960 	 * Mountpoint could get trashed if libshare calls getmntany
961 	 * which it does during API initialization, so strdup the
962 	 * value.
963 	 */
964 	mntpt = zfs_strdup(hdl, mountpoint);
965 
966 	/*
967 	 * Function may be called in a loop from higher up stack, with libshare
968 	 * initialized for multiple shares (SA_INIT_SHARE_API_SELECTIVE).
969 	 * zfs_init_libshare_arg will refresh the handle's cache if necessary.
970 	 * In this case we do not want to switch to per share initialization.
971 	 * Specify SA_INIT_SHARE_API to do full refresh, if refresh required.
972 	 */
973 	if ((hdl->libzfs_sharehdl != NULL) && (_sa_service != NULL) &&
974 	    (_sa_service(hdl->libzfs_sharehdl) ==
975 	    SA_INIT_SHARE_API_SELECTIVE)) {
976 		service = SA_INIT_SHARE_API;
977 	}
978 
979 	err = zfs_init_libshare_arg(hdl, service, (void *)name);
980 	if (err != SA_OK) {
981 		free(mntpt);	/* don't need the copy anymore */
982 		return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
983 		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
984 		    name, _sa_errorstr(err)));
985 	}
986 
987 	share = zfs_sa_find_share(hdl->libzfs_sharehdl, mntpt);
988 	free(mntpt);	/* don't need the copy anymore */
989 
990 	if (share != NULL) {
991 		err = zfs_sa_disable_share(share, proto_table[proto].p_name);
992 		if (err != SA_OK) {
993 			return (zfs_error_fmt(hdl,
994 			    proto_table[proto].p_unshare_err,
995 			    dgettext(TEXT_DOMAIN, "cannot unshare '%s': %s"),
996 			    name, _sa_errorstr(err)));
997 		}
998 	} else {
999 		return (zfs_error_fmt(hdl, proto_table[proto].p_unshare_err,
1000 		    dgettext(TEXT_DOMAIN, "cannot unshare '%s': not found"),
1001 		    name));
1002 	}
1003 	return (0);
1004 }
1005 
1006 /*
1007  * Unshare the given filesystem.
1008  */
1009 int
1010 zfs_unshare_proto(zfs_handle_t *zhp, const char *mountpoint,
1011     zfs_share_proto_t *proto)
1012 {
1013 	libzfs_handle_t *hdl = zhp->zfs_hdl;
1014 	struct mnttab entry;
1015 	char *mntpt = NULL;
1016 
1017 	/* check to see if need to unmount the filesystem */
1018 	rewind(zhp->zfs_hdl->libzfs_mnttab);
1019 	if (mountpoint != NULL)
1020 		mountpoint = mntpt = zfs_strdup(hdl, mountpoint);
1021 
1022 	if (mountpoint != NULL || ((zfs_get_type(zhp) == ZFS_TYPE_FILESYSTEM) &&
1023 	    libzfs_mnttab_find(hdl, zfs_get_name(zhp), &entry) == 0)) {
1024 		zfs_share_proto_t *curr_proto;
1025 
1026 		if (mountpoint == NULL)
1027 			mntpt = zfs_strdup(zhp->zfs_hdl, entry.mnt_mountp);
1028 
1029 		for (curr_proto = proto; *curr_proto != PROTO_END;
1030 		    curr_proto++) {
1031 
1032 			if (is_shared(hdl, mntpt, *curr_proto) &&
1033 			    unshare_one(hdl, zhp->zfs_name,
1034 			    mntpt, *curr_proto) != 0) {
1035 				if (mntpt != NULL)
1036 					free(mntpt);
1037 				return (-1);
1038 			}
1039 		}
1040 	}
1041 	if (mntpt != NULL)
1042 		free(mntpt);
1043 
1044 	return (0);
1045 }
1046 
1047 int
1048 zfs_unshare_nfs(zfs_handle_t *zhp, const char *mountpoint)
1049 {
1050 	return (zfs_unshare_proto(zhp, mountpoint, nfs_only));
1051 }
1052 
1053 int
1054 zfs_unshare_smb(zfs_handle_t *zhp, const char *mountpoint)
1055 {
1056 	return (zfs_unshare_proto(zhp, mountpoint, smb_only));
1057 }
1058 
1059 /*
1060  * Same as zfs_unmountall(), but for NFS and SMB unshares.
1061  */
1062 int
1063 zfs_unshareall_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
1064 {
1065 	prop_changelist_t *clp;
1066 	int ret;
1067 
1068 	clp = changelist_gather(zhp, ZFS_PROP_SHARENFS, 0, 0);
1069 	if (clp == NULL)
1070 		return (-1);
1071 
1072 	ret = changelist_unshare(clp, proto);
1073 	changelist_free(clp);
1074 
1075 	return (ret);
1076 }
1077 
1078 int
1079 zfs_unshareall_nfs(zfs_handle_t *zhp)
1080 {
1081 	return (zfs_unshareall_proto(zhp, nfs_only));
1082 }
1083 
1084 int
1085 zfs_unshareall_smb(zfs_handle_t *zhp)
1086 {
1087 	return (zfs_unshareall_proto(zhp, smb_only));
1088 }
1089 
1090 int
1091 zfs_unshareall(zfs_handle_t *zhp)
1092 {
1093 	return (zfs_unshareall_proto(zhp, share_all_proto));
1094 }
1095 
1096 int
1097 zfs_unshareall_bypath(zfs_handle_t *zhp, const char *mountpoint)
1098 {
1099 	return (zfs_unshare_proto(zhp, mountpoint, share_all_proto));
1100 }
1101 
1102 /*
1103  * Remove the mountpoint associated with the current dataset, if necessary.
1104  * We only remove the underlying directory if:
1105  *
1106  *	- The mountpoint is not 'none' or 'legacy'
1107  *	- The mountpoint is non-empty
1108  *	- The mountpoint is the default or inherited
1109  *	- The 'zoned' property is set, or we're in a local zone
1110  *
1111  * Any other directories we leave alone.
1112  */
1113 void
1114 remove_mountpoint(zfs_handle_t *zhp)
1115 {
1116 	char mountpoint[ZFS_MAXPROPLEN];
1117 	zprop_source_t source;
1118 
1119 	if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint),
1120 	    &source))
1121 		return;
1122 
1123 	if (source == ZPROP_SRC_DEFAULT ||
1124 	    source == ZPROP_SRC_INHERITED) {
1125 		/*
1126 		 * Try to remove the directory, silently ignoring any errors.
1127 		 * The filesystem may have since been removed or moved around,
1128 		 * and this error isn't really useful to the administrator in
1129 		 * any way.
1130 		 */
1131 		(void) rmdir(mountpoint);
1132 	}
1133 }
1134 
1135 /*
1136  * Add the given zfs handle to the cb_handles array, dynamically reallocating
1137  * the array if it is out of space.
1138  */
1139 void
1140 libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp)
1141 {
1142 	if (cbp->cb_alloc == cbp->cb_used) {
1143 		size_t newsz;
1144 		zfs_handle_t **newhandles;
1145 
1146 		newsz = cbp->cb_alloc != 0 ? cbp->cb_alloc * 2 : 64;
1147 		newhandles = zfs_realloc(zhp->zfs_hdl,
1148 		    cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *),
1149 		    newsz * sizeof (zfs_handle_t *));
1150 		cbp->cb_handles = newhandles;
1151 		cbp->cb_alloc = newsz;
1152 	}
1153 	cbp->cb_handles[cbp->cb_used++] = zhp;
1154 }
1155 
1156 /*
1157  * Recursive helper function used during file system enumeration
1158  */
1159 static int
1160 zfs_iter_cb(zfs_handle_t *zhp, void *data)
1161 {
1162 	get_all_cb_t *cbp = data;
1163 
1164 	if (!(zfs_get_type(zhp) & ZFS_TYPE_FILESYSTEM)) {
1165 		zfs_close(zhp);
1166 		return (0);
1167 	}
1168 
1169 	if (zfs_prop_get_int(zhp, ZFS_PROP_CANMOUNT) == ZFS_CANMOUNT_NOAUTO) {
1170 		zfs_close(zhp);
1171 		return (0);
1172 	}
1173 
1174 	if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
1175 	    ZFS_KEYSTATUS_UNAVAILABLE) {
1176 		zfs_close(zhp);
1177 		return (0);
1178 	}
1179 
1180 	/*
1181 	 * If this filesystem is inconsistent and has a receive resume
1182 	 * token, we can not mount it.
1183 	 */
1184 	if (zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&
1185 	    zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
1186 	    NULL, 0, NULL, NULL, 0, B_TRUE) == 0) {
1187 		zfs_close(zhp);
1188 		return (0);
1189 	}
1190 
1191 	libzfs_add_handle(cbp, zhp);
1192 	if (zfs_iter_filesystems(zhp, zfs_iter_cb, cbp) != 0) {
1193 		zfs_close(zhp);
1194 		return (-1);
1195 	}
1196 	return (0);
1197 }
1198 
1199 /*
1200  * Sort comparator that compares two mountpoint paths. We sort these paths so
1201  * that subdirectories immediately follow their parents. This means that we
1202  * effectively treat the '/' character as the lowest value non-nul char.
1203  * Since filesystems from non-global zones can have the same mountpoint
1204  * as other filesystems, the comparator sorts global zone filesystems to
1205  * the top of the list. This means that the global zone will traverse the
1206  * filesystem list in the correct order and can stop when it sees the
1207  * first zoned filesystem. In a non-global zone, only the delegated
1208  * filesystems are seen.
1209  *
1210  * An example sorted list using this comparator would look like:
1211  *
1212  * /foo
1213  * /foo/bar
1214  * /foo/bar/baz
1215  * /foo/baz
1216  * /foo.bar
1217  * /foo (NGZ1)
1218  * /foo (NGZ2)
1219  *
1220  * The mounting code depends on this ordering to deterministically iterate
1221  * over filesystems in order to spawn parallel mount tasks.
1222  */
1223 static int
1224 mountpoint_cmp(const void *arga, const void *argb)
1225 {
1226 	zfs_handle_t *const *zap = arga;
1227 	zfs_handle_t *za = *zap;
1228 	zfs_handle_t *const *zbp = argb;
1229 	zfs_handle_t *zb = *zbp;
1230 	char mounta[MAXPATHLEN];
1231 	char mountb[MAXPATHLEN];
1232 	const char *a = mounta;
1233 	const char *b = mountb;
1234 	boolean_t gota, gotb;
1235 	uint64_t zoneda, zonedb;
1236 
1237 	zoneda = zfs_prop_get_int(za, ZFS_PROP_ZONED);
1238 	zonedb = zfs_prop_get_int(zb, ZFS_PROP_ZONED);
1239 	if (zoneda && !zonedb)
1240 		return (1);
1241 	if (!zoneda && zonedb)
1242 		return (-1);
1243 
1244 	gota = (zfs_get_type(za) == ZFS_TYPE_FILESYSTEM);
1245 	if (gota) {
1246 		verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta,
1247 		    sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
1248 	}
1249 	gotb = (zfs_get_type(zb) == ZFS_TYPE_FILESYSTEM);
1250 	if (gotb) {
1251 		verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb,
1252 		    sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
1253 	}
1254 
1255 	if (gota && gotb) {
1256 		while (*a != '\0' && (*a == *b)) {
1257 			a++;
1258 			b++;
1259 		}
1260 		if (*a == *b)
1261 			return (0);
1262 		if (*a == '\0')
1263 			return (-1);
1264 		if (*b == '\0')
1265 			return (1);
1266 		if (*a == '/')
1267 			return (-1);
1268 		if (*b == '/')
1269 			return (1);
1270 		return (*a < *b ? -1 : *a > *b);
1271 	}
1272 
1273 	if (gota)
1274 		return (-1);
1275 	if (gotb)
1276 		return (1);
1277 
1278 	/*
1279 	 * If neither filesystem has a mountpoint, revert to sorting by
1280 	 * dataset name.
1281 	 */
1282 	return (strcmp(zfs_get_name(za), zfs_get_name(zb)));
1283 }
1284 
1285 /*
1286  * Return true if path2 is a child of path1.
1287  */
1288 static boolean_t
1289 libzfs_path_contains(const char *path1, const char *path2)
1290 {
1291 	return (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/');
1292 }
1293 
1294 /*
1295  * Given a mountpoint specified by idx in the handles array, find the first
1296  * non-descendent of that mountpoint and return its index. Descendant paths
1297  * start with the parent's path. This function relies on the ordering
1298  * enforced by mountpoint_cmp().
1299  */
1300 static int
1301 non_descendant_idx(zfs_handle_t **handles, size_t num_handles, int idx)
1302 {
1303 	char parent[ZFS_MAXPROPLEN];
1304 	char child[ZFS_MAXPROPLEN];
1305 	int i;
1306 
1307 	verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent,
1308 	    sizeof (parent), NULL, NULL, 0, B_FALSE) == 0);
1309 
1310 	for (i = idx + 1; i < num_handles; i++) {
1311 		verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child,
1312 		    sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
1313 		if (!libzfs_path_contains(parent, child))
1314 			break;
1315 	}
1316 	return (i);
1317 }
1318 
1319 typedef struct mnt_param {
1320 	libzfs_handle_t	*mnt_hdl;
1321 	zfs_taskq_t	*mnt_tq;
1322 	zfs_handle_t	**mnt_zhps; /* filesystems to mount */
1323 	size_t		mnt_num_handles;
1324 	int		mnt_idx;	/* Index of selected entry to mount */
1325 	zfs_iter_f	mnt_func;
1326 	void		*mnt_data;
1327 } mnt_param_t;
1328 
1329 /*
1330  * Allocate and populate the parameter struct for mount function, and
1331  * schedule mounting of the entry selected by idx.
1332  */
1333 static void
1334 zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles,
1335     size_t num_handles, int idx, zfs_iter_f func, void *data, zfs_taskq_t *tq)
1336 {
1337 	mnt_param_t *mnt_param = zfs_alloc(hdl, sizeof (mnt_param_t));
1338 
1339 	mnt_param->mnt_hdl = hdl;
1340 	mnt_param->mnt_tq = tq;
1341 	mnt_param->mnt_zhps = handles;
1342 	mnt_param->mnt_num_handles = num_handles;
1343 	mnt_param->mnt_idx = idx;
1344 	mnt_param->mnt_func = func;
1345 	mnt_param->mnt_data = data;
1346 
1347 	(void) zfs_taskq_dispatch(tq, zfs_mount_task, (void*)mnt_param,
1348 	    ZFS_TQ_SLEEP);
1349 }
1350 
1351 /*
1352  * This is the structure used to keep state of mounting or sharing operations
1353  * during a call to zpool_enable_datasets().
1354  */
1355 typedef struct mount_state {
1356 	/*
1357 	 * ms_mntstatus is set to -1 if any mount fails. While multiple threads
1358 	 * could update this variable concurrently, no synchronization is
1359 	 * needed as it's only ever set to -1.
1360 	 */
1361 	int		ms_mntstatus;
1362 	int		ms_mntflags;
1363 	const char	*ms_mntopts;
1364 } mount_state_t;
1365 
1366 static int
1367 zfs_mount_one(zfs_handle_t *zhp, void *arg)
1368 {
1369 	mount_state_t *ms = arg;
1370 	int ret = 0;
1371 
1372 	if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
1373 	    ZFS_KEYSTATUS_UNAVAILABLE)
1374 		return (0);
1375 
1376 	if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0)
1377 		ret = ms->ms_mntstatus = -1;
1378 	return (ret);
1379 }
1380 
1381 static int
1382 zfs_share_one(zfs_handle_t *zhp, void *arg)
1383 {
1384 	mount_state_t *ms = arg;
1385 	int ret = 0;
1386 
1387 	if (zfs_share(zhp) != 0)
1388 		ret = ms->ms_mntstatus = -1;
1389 	return (ret);
1390 }
1391 
1392 /*
1393  * Task queue function to mount one file system. On completion, it finds and
1394  * schedules its children to be mounted. This depends on the sorting done in
1395  * zfs_foreach_mountpoint(). Note that the degenerate case (chain of entries
1396  * each descending from the previous) will have no parallelism since we always
1397  * have to wait for the parent to finish mounting before we can schedule
1398  * its children.
1399  */
1400 static void
1401 zfs_mount_task(void *arg)
1402 {
1403 	mnt_param_t *mp = arg;
1404 	int idx = mp->mnt_idx;
1405 	zfs_handle_t **handles = mp->mnt_zhps;
1406 	size_t num_handles = mp->mnt_num_handles;
1407 	char mountpoint[ZFS_MAXPROPLEN];
1408 
1409 	verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, mountpoint,
1410 	    sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
1411 
1412 	if (mp->mnt_func(handles[idx], mp->mnt_data) != 0)
1413 		return;
1414 
1415 	/*
1416 	 * We dispatch tasks to mount filesystems with mountpoints underneath
1417 	 * this one. We do this by dispatching the next filesystem with a
1418 	 * descendant mountpoint of the one we just mounted, then skip all of
1419 	 * its descendants, dispatch the next descendant mountpoint, and so on.
1420 	 * The non_descendant_idx() function skips over filesystems that are
1421 	 * descendants of the filesystem we just dispatched.
1422 	 */
1423 	for (int i = idx + 1; i < num_handles;
1424 	    i = non_descendant_idx(handles, num_handles, i)) {
1425 		char child[ZFS_MAXPROPLEN];
1426 		verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT,
1427 		    child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
1428 
1429 		if (!libzfs_path_contains(mountpoint, child))
1430 			break; /* not a descendant, return */
1431 		zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i,
1432 		    mp->mnt_func, mp->mnt_data, mp->mnt_tq);
1433 	}
1434 	free(mp);
1435 }
1436 
1437 /*
1438  * Issue the func callback for each ZFS handle contained in the handles
1439  * array. This function is used to mount all datasets, and so this function
1440  * guarantees that filesystems for parent mountpoints are called before their
1441  * children. As such, before issuing any callbacks, we first sort the array
1442  * of handles by mountpoint.
1443  *
1444  * Callbacks are issued in one of two ways:
1445  *
1446  * 1. Sequentially: If the parallel argument is B_FALSE or the ZFS_SERIAL_MOUNT
1447  *    environment variable is set, then we issue callbacks sequentially.
1448  *
1449  * 2. In parallel: If the parallel argument is B_TRUE and the ZFS_SERIAL_MOUNT
1450  *    environment variable is not set, then we use a taskq to dispatch threads
1451  *    to mount filesystems is parallel. This function dispatches tasks to mount
1452  *    the filesystems at the top-level mountpoints, and these tasks in turn
1453  *    are responsible for recursively mounting filesystems in their children
1454  *    mountpoints.
1455  */
1456 void
1457 zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
1458     size_t num_handles, zfs_iter_f func, void *data, boolean_t parallel)
1459 {
1460 	zoneid_t zoneid = getzoneid();
1461 
1462 	/*
1463 	 * The ZFS_SERIAL_MOUNT environment variable is an undocumented
1464 	 * variable that can be used as a convenience to do a/b comparison
1465 	 * of serial vs. parallel mounting.
1466 	 */
1467 	boolean_t serial_mount = !parallel ||
1468 	    (getenv("ZFS_SERIAL_MOUNT") != NULL);
1469 
1470 	/*
1471 	 * Sort the datasets by mountpoint. See mountpoint_cmp for details
1472 	 * of how these are sorted.
1473 	 */
1474 	qsort(handles, num_handles, sizeof (zfs_handle_t *), mountpoint_cmp);
1475 
1476 	if (serial_mount) {
1477 		for (int i = 0; i < num_handles; i++) {
1478 			func(handles[i], data);
1479 		}
1480 		return;
1481 	}
1482 
1483 	/*
1484 	 * Issue the callback function for each dataset using a parallel
1485 	 * algorithm that uses a taskq to manage threads.
1486 	 */
1487 	zfs_taskq_t *tq = zfs_taskq_create("mount_taskq", mount_tq_nthr, 0,
1488 	    mount_tq_nthr, mount_tq_nthr, ZFS_TASKQ_PREPOPULATE);
1489 
1490 	/*
1491 	 * There may be multiple "top level" mountpoints outside of the pool's
1492 	 * root mountpoint, e.g.: /foo /bar. Dispatch a mount task for each of
1493 	 * these.
1494 	 */
1495 	for (int i = 0; i < num_handles;
1496 	    i = non_descendant_idx(handles, num_handles, i)) {
1497 		/*
1498 		 * Since the mountpoints have been sorted so that the zoned
1499 		 * filesystems are at the end, a zoned filesystem seen from
1500 		 * the global zone means that we're done.
1501 		 */
1502 		if (zoneid == GLOBAL_ZONEID &&
1503 		    zfs_prop_get_int(handles[i], ZFS_PROP_ZONED))
1504 			break;
1505 		zfs_dispatch_mount(hdl, handles, num_handles, i, func, data,
1506 		    tq);
1507 	}
1508 
1509 	zfs_taskq_wait(tq); /* wait for all scheduled mounts to complete */
1510 	zfs_taskq_destroy(tq);
1511 }
1512 
1513 /*
1514  * Mount and share all datasets within the given pool.  This assumes that no
1515  * datasets within the pool are currently mounted.
1516  */
1517 #pragma weak zpool_mount_datasets = zpool_enable_datasets
1518 int
1519 zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
1520 {
1521 	get_all_cb_t cb = { 0 };
1522 	mount_state_t ms = { 0 };
1523 	zfs_handle_t *zfsp;
1524 	sa_init_selective_arg_t sharearg;
1525 	int ret = 0;
1526 
1527 	if ((zfsp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
1528 	    ZFS_TYPE_DATASET)) == NULL)
1529 		goto out;
1530 
1531 
1532 	/*
1533 	 * Gather all non-snapshot datasets within the pool. Start by adding
1534 	 * the root filesystem for this pool to the list, and then iterate
1535 	 * over all child filesystems.
1536 	 */
1537 	libzfs_add_handle(&cb, zfsp);
1538 	if (zfs_iter_filesystems(zfsp, zfs_iter_cb, &cb) != 0)
1539 		goto out;
1540 
1541 	ms.ms_mntopts = mntopts;
1542 	ms.ms_mntflags = flags;
1543 	zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
1544 	    zfs_mount_one, &ms, B_TRUE);
1545 	if (ms.ms_mntstatus != 0)
1546 		ret = ms.ms_mntstatus;
1547 
1548 	/*
1549 	 * Initialize libshare SA_INIT_SHARE_API_SELECTIVE here
1550 	 * to avoid unnecessary load/unload of the libshare API
1551 	 * per shared dataset downstream.
1552 	 */
1553 	sharearg.zhandle_arr = cb.cb_handles;
1554 	sharearg.zhandle_len = cb.cb_used;
1555 	if ((ret = zfs_init_libshare_arg(zhp->zpool_hdl,
1556 	    SA_INIT_SHARE_API_SELECTIVE, &sharearg)) != 0)
1557 		goto out;
1558 
1559 	ms.ms_mntstatus = 0;
1560 	zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
1561 	    zfs_share_one, &ms, B_FALSE);
1562 	if (ms.ms_mntstatus != 0)
1563 		ret = ms.ms_mntstatus;
1564 
1565 out:
1566 	for (int i = 0; i < cb.cb_used; i++)
1567 		zfs_close(cb.cb_handles[i]);
1568 	free(cb.cb_handles);
1569 
1570 	return (ret);
1571 }
1572 
1573 static int
1574 mountpoint_compare(const void *a, const void *b)
1575 {
1576 	const char *mounta = *((char **)a);
1577 	const char *mountb = *((char **)b);
1578 
1579 	return (strcmp(mountb, mounta));
1580 }
1581 
1582 /* alias for 2002/240 */
1583 #pragma weak zpool_unmount_datasets = zpool_disable_datasets
1584 /*
1585  * Unshare and unmount all datasets within the given pool.  We don't want to
1586  * rely on traversing the DSL to discover the filesystems within the pool,
1587  * because this may be expensive (if not all of them are mounted), and can fail
1588  * arbitrarily (on I/O error, for example).  Instead, we walk /etc/mnttab and
1589  * gather all the filesystems that are currently mounted.
1590  */
1591 int
1592 zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
1593 {
1594 	int used, alloc;
1595 	struct mnttab entry;
1596 	size_t namelen;
1597 	char **mountpoints = NULL;
1598 	zfs_handle_t **datasets = NULL;
1599 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1600 	int i;
1601 	int ret = -1;
1602 	int flags = (force ? MS_FORCE : 0);
1603 	sa_init_selective_arg_t sharearg;
1604 
1605 	namelen = strlen(zhp->zpool_name);
1606 
1607 	rewind(hdl->libzfs_mnttab);
1608 	used = alloc = 0;
1609 	while (getmntent(hdl->libzfs_mnttab, &entry) == 0) {
1610 		/*
1611 		 * Ignore non-ZFS entries.
1612 		 */
1613 		if (entry.mnt_fstype == NULL ||
1614 		    strcmp(entry.mnt_fstype, MNTTYPE_ZFS) != 0)
1615 			continue;
1616 
1617 		/*
1618 		 * Ignore filesystems not within this pool.
1619 		 */
1620 		if (entry.mnt_mountp == NULL ||
1621 		    strncmp(entry.mnt_special, zhp->zpool_name, namelen) != 0 ||
1622 		    (entry.mnt_special[namelen] != '/' &&
1623 		    entry.mnt_special[namelen] != '\0'))
1624 			continue;
1625 
1626 		/*
1627 		 * At this point we've found a filesystem within our pool.  Add
1628 		 * it to our growing list.
1629 		 */
1630 		if (used == alloc) {
1631 			if (alloc == 0) {
1632 				if ((mountpoints = zfs_alloc(hdl,
1633 				    8 * sizeof (void *))) == NULL)
1634 					goto out;
1635 
1636 				if ((datasets = zfs_alloc(hdl,
1637 				    8 * sizeof (void *))) == NULL)
1638 					goto out;
1639 
1640 				alloc = 8;
1641 			} else {
1642 				void *ptr;
1643 
1644 				if ((ptr = zfs_realloc(hdl, mountpoints,
1645 				    alloc * sizeof (void *),
1646 				    alloc * 2 * sizeof (void *))) == NULL)
1647 					goto out;
1648 				mountpoints = ptr;
1649 
1650 				if ((ptr = zfs_realloc(hdl, datasets,
1651 				    alloc * sizeof (void *),
1652 				    alloc * 2 * sizeof (void *))) == NULL)
1653 					goto out;
1654 				datasets = ptr;
1655 
1656 				alloc *= 2;
1657 			}
1658 		}
1659 
1660 		if ((mountpoints[used] = zfs_strdup(hdl,
1661 		    entry.mnt_mountp)) == NULL)
1662 			goto out;
1663 
1664 		/*
1665 		 * This is allowed to fail, in case there is some I/O error.  It
1666 		 * is only used to determine if we need to remove the underlying
1667 		 * mountpoint, so failure is not fatal.
1668 		 */
1669 		datasets[used] = make_dataset_handle(hdl, entry.mnt_special);
1670 
1671 		used++;
1672 	}
1673 
1674 	/*
1675 	 * At this point, we have the entire list of filesystems, so sort it by
1676 	 * mountpoint.
1677 	 */
1678 	sharearg.zhandle_arr = datasets;
1679 	sharearg.zhandle_len = used;
1680 	ret = zfs_init_libshare_arg(hdl, SA_INIT_SHARE_API_SELECTIVE,
1681 	    &sharearg);
1682 	if (ret != 0)
1683 		goto out;
1684 	qsort(mountpoints, used, sizeof (char *), mountpoint_compare);
1685 
1686 	/*
1687 	 * Walk through and first unshare everything.
1688 	 */
1689 	for (i = 0; i < used; i++) {
1690 		zfs_share_proto_t *curr_proto;
1691 		for (curr_proto = share_all_proto; *curr_proto != PROTO_END;
1692 		    curr_proto++) {
1693 			if (is_shared(hdl, mountpoints[i], *curr_proto) &&
1694 			    unshare_one(hdl, mountpoints[i],
1695 			    mountpoints[i], *curr_proto) != 0)
1696 				goto out;
1697 		}
1698 	}
1699 
1700 	/*
1701 	 * Now unmount everything, removing the underlying directories as
1702 	 * appropriate.
1703 	 */
1704 	for (i = 0; i < used; i++) {
1705 		if (unmount_one(hdl, mountpoints[i], flags) != 0)
1706 			goto out;
1707 	}
1708 
1709 	for (i = 0; i < used; i++) {
1710 		if (datasets[i])
1711 			remove_mountpoint(datasets[i]);
1712 	}
1713 
1714 	ret = 0;
1715 out:
1716 	for (i = 0; i < used; i++) {
1717 		if (datasets[i])
1718 			zfs_close(datasets[i]);
1719 		free(mountpoints[i]);
1720 	}
1721 	free(datasets);
1722 	free(mountpoints);
1723 
1724 	return (ret);
1725 }
1726