xref: /illumos-gate/usr/src/cmd/zoneadm/zfs.c (revision b70bf3ee79d81df3ccc36e553e0ff11049a2b51a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26  * Copyright (c) 2016 Martin Matuska. All rights reserved.
27  * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
28  */
29 
30 /*
31  * This file contains the functions used to support the ZFS integration
32  * with zones.  This includes validation (e.g. zonecfg dataset), cloning,
33  * file system creation and destruction.
34  */
35 
36 #include <stdio.h>
37 #include <errno.h>
38 #include <unistd.h>
39 #include <string.h>
40 #include <locale.h>
41 #include <libintl.h>
42 #include <sys/stat.h>
43 #include <sys/statvfs.h>
44 #include <libgen.h>
45 #include <libzonecfg.h>
46 #include <sys/mnttab.h>
47 #include <libzfs.h>
48 #include <sys/mntent.h>
49 #include <values.h>
50 #include <strings.h>
51 #include <assert.h>
52 
53 #include "zoneadm.h"
54 
55 libzfs_handle_t *g_zfs;
56 
57 typedef struct zfs_mount_data {
58 	char		*match_name;
59 	zfs_handle_t	*match_handle;
60 } zfs_mount_data_t;
61 
62 typedef struct zfs_snapshot_data {
63 	char	*match_name;	/* zonename@SUNWzone */
64 	int	len;		/* strlen of match_name */
65 	int	max;		/* highest digit appended to snap name */
66 	int	num;		/* number of snapshots to rename */
67 	int	cntr;		/* counter for renaming snapshots */
68 } zfs_snapshot_data_t;
69 
70 typedef struct clone_data {
71 	zfs_handle_t	*clone_zhp;	/* clone dataset to promote */
72 	time_t		origin_creation; /* snapshot creation time of clone */
73 	const char	*snapshot;	/* snapshot of dataset being demoted */
74 } clone_data_t;
75 
76 /*
77  * A ZFS file system iterator call-back function which returns the
78  * zfs_handle_t for a ZFS file system on the specified mount point.
79  */
80 static int
81 match_mountpoint(zfs_handle_t *zhp, void *data)
82 {
83 	int			res;
84 	zfs_mount_data_t	*cbp;
85 	char			mp[ZFS_MAXPROPLEN];
86 
87 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
88 		zfs_close(zhp);
89 		return (0);
90 	}
91 
92 	/*
93 	 * First check if the dataset is mounted.
94 	 * If not, move on to iterating child datasets which may still be
95 	 * mounted.
96 	 */
97 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTED, mp, sizeof (mp), NULL, NULL,
98 	    0, B_FALSE) != 0 || strcmp(mp, "no") == 0) {
99 		goto children;
100 	}
101 
102 	/*
103 	 * Now check mount point.
104 	 * Move on to children if it cannot be retrieved.
105 	 */
106 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
107 	    0, B_FALSE) != 0) {
108 		goto children;
109 	}
110 
111 	cbp = (zfs_mount_data_t *)data;
112 
113 	if (strcmp(mp, "legacy") == 0) {
114 		/* If legacy, must look in mnttab for mountpoint. */
115 		FILE		*fp;
116 		struct mnttab	entry;
117 		const char	*nm;
118 
119 		nm = zfs_get_name(zhp);
120 		if ((fp = fopen(MNTTAB, "r")) == NULL) {
121 			zfs_close(zhp);
122 			return (0);
123 		}
124 
125 		while (getmntent(fp, &entry) == 0) {
126 			if (strcmp(nm, entry.mnt_special) == 0) {
127 				if (strcmp(entry.mnt_mountp, cbp->match_name)
128 				    == 0) {
129 					(void) fclose(fp);
130 					cbp->match_handle = zhp;
131 					return (1);
132 				}
133 				break;
134 			}
135 		}
136 		(void) fclose(fp);
137 
138 	} else if (strcmp(mp, cbp->match_name) == 0) {
139 		cbp->match_handle = zhp;
140 		return (1);
141 	}
142 
143 children:
144 	/* Iterate over any nested datasets. */
145 	res = zfs_iter_filesystems(zhp, match_mountpoint, data);
146 	zfs_close(zhp);
147 	return (res);
148 }
149 
150 /*
151  * Get ZFS handle for the specified mount point.
152  */
153 static zfs_handle_t *
154 mount2zhandle(char *mountpoint)
155 {
156 	zfs_mount_data_t	cb;
157 
158 	cb.match_name = mountpoint;
159 	cb.match_handle = NULL;
160 	(void) zfs_iter_root(g_zfs, match_mountpoint, &cb);
161 	return (cb.match_handle);
162 }
163 
164 /*
165  * Check if there is already a file system (zfs or any other type) mounted on
166  * path.
167  */
168 static boolean_t
169 is_mountpnt(char *path)
170 {
171 	FILE		*fp;
172 	struct mnttab	entry;
173 
174 	if ((fp = fopen(MNTTAB, "r")) == NULL)
175 		return (B_FALSE);
176 
177 	while (getmntent(fp, &entry) == 0) {
178 		if (strcmp(path, entry.mnt_mountp) == 0) {
179 			(void) fclose(fp);
180 			return (B_TRUE);
181 		}
182 	}
183 
184 	(void) fclose(fp);
185 	return (B_FALSE);
186 }
187 
188 /*
189  * Run the brand's pre-snapshot hook before we take a ZFS snapshot of the zone.
190  */
191 static int
192 pre_snapshot(char *presnapbuf)
193 {
194 	int status;
195 
196 	/* No brand-specific handler */
197 	if (presnapbuf[0] == '\0')
198 		return (Z_OK);
199 
200 	/* Run the hook */
201 	status = do_subproc(presnapbuf);
202 	if ((status = subproc_status(gettext("brand-specific presnapshot"),
203 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
204 		return (Z_ERR);
205 
206 	return (Z_OK);
207 }
208 
209 /*
210  * Run the brand's post-snapshot hook after we take a ZFS snapshot of the zone.
211  */
212 static int
213 post_snapshot(char *postsnapbuf)
214 {
215 	int status;
216 
217 	/* No brand-specific handler */
218 	if (postsnapbuf[0] == '\0')
219 		return (Z_OK);
220 
221 	/* Run the hook */
222 	status = do_subproc(postsnapbuf);
223 	if ((status = subproc_status(gettext("brand-specific postsnapshot"),
224 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
225 		return (Z_ERR);
226 
227 	return (Z_OK);
228 }
229 
230 /*
231  * This is a ZFS snapshot iterator call-back function which returns the
232  * highest number of SUNWzone snapshots that have been taken.
233  */
234 static int
235 get_snap_max(zfs_handle_t *zhp, void *data)
236 {
237 	int			res;
238 	zfs_snapshot_data_t	*cbp;
239 
240 	if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) {
241 		zfs_close(zhp);
242 		return (0);
243 	}
244 
245 	cbp = (zfs_snapshot_data_t *)data;
246 
247 	if (strncmp(zfs_get_name(zhp), cbp->match_name, cbp->len) == 0) {
248 		char	*nump;
249 		int	num;
250 
251 		cbp->num++;
252 		nump = (char *)(zfs_get_name(zhp) + cbp->len);
253 		num = atoi(nump);
254 		if (num > cbp->max)
255 			cbp->max = num;
256 	}
257 
258 	res = zfs_iter_snapshots(zhp, B_FALSE, get_snap_max, data);
259 	zfs_close(zhp);
260 	return (res);
261 }
262 
263 /*
264  * Take a ZFS snapshot to be used for cloning the zone.
265  */
266 static int
267 take_snapshot(zfs_handle_t *zhp, char *snapshot_name, int snap_size,
268     char *presnapbuf, char *postsnapbuf)
269 {
270 	int			res;
271 	char			template[ZFS_MAX_DATASET_NAME_LEN];
272 	zfs_snapshot_data_t	cb;
273 
274 	/*
275 	 * First we need to figure out the next available name for the
276 	 * zone snapshot.  Look through the list of zones snapshots for
277 	 * this file system to determine the maximum snapshot name.
278 	 */
279 	if (snprintf(template, sizeof (template), "%s@SUNWzone",
280 	    zfs_get_name(zhp)) >=  sizeof (template))
281 		return (Z_ERR);
282 
283 	cb.match_name = template;
284 	cb.len = strlen(template);
285 	cb.max = 0;
286 
287 	if (zfs_iter_snapshots(zhp, B_FALSE, get_snap_max, &cb) != 0)
288 		return (Z_ERR);
289 
290 	cb.max++;
291 
292 	if (snprintf(snapshot_name, snap_size, "%s@SUNWzone%d",
293 	    zfs_get_name(zhp), cb.max) >= snap_size)
294 		return (Z_ERR);
295 
296 	if (pre_snapshot(presnapbuf) != Z_OK)
297 		return (Z_ERR);
298 	res = zfs_snapshot(g_zfs, snapshot_name, B_FALSE, NULL);
299 	if (post_snapshot(postsnapbuf) != Z_OK)
300 		return (Z_ERR);
301 
302 	if (res != 0)
303 		return (Z_ERR);
304 	return (Z_OK);
305 }
306 
307 /*
308  * We are using an explicit snapshot from some earlier point in time so
309  * we need to validate it.  Run the brand specific hook.
310  */
311 static int
312 validate_snapshot(char *snapshot_name, char *snap_path, char *validsnapbuf)
313 {
314 	int status;
315 	char cmdbuf[MAXPATHLEN];
316 
317 	/* No brand-specific handler */
318 	if (validsnapbuf[0] == '\0')
319 		return (Z_OK);
320 
321 	/* pass args - snapshot_name & snap_path */
322 	if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %s %s", validsnapbuf,
323 	    snapshot_name, snap_path) >= sizeof (cmdbuf)) {
324 		zerror("Command line too long");
325 		return (Z_ERR);
326 	}
327 
328 	/* Run the hook */
329 	status = do_subproc(cmdbuf);
330 	if ((status = subproc_status(gettext("brand-specific validatesnapshot"),
331 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
332 		return (Z_ERR);
333 
334 	return (Z_OK);
335 }
336 
337 /*
338  * Remove the sw inventory file from inside this zonepath that we picked up out
339  * of the snapshot.
340  */
341 static int
342 clean_out_clone()
343 {
344 	int err;
345 	zone_dochandle_t handle;
346 
347 	if ((handle = zonecfg_init_handle()) == NULL) {
348 		zperror(cmd_to_str(CMD_CLONE), B_TRUE);
349 		return (Z_ERR);
350 	}
351 
352 	if ((err = zonecfg_get_handle(target_zone, handle)) != Z_OK) {
353 		errno = err;
354 		zperror(cmd_to_str(CMD_CLONE), B_TRUE);
355 		zonecfg_fini_handle(handle);
356 		return (Z_ERR);
357 	}
358 
359 	zonecfg_rm_detached(handle, B_FALSE);
360 	zonecfg_fini_handle(handle);
361 
362 	return (Z_OK);
363 }
364 
365 /*
366  * Make a ZFS clone on zonepath from snapshot_name.
367  */
368 static int
369 clone_snap(char *snapshot_name, char *zonepath)
370 {
371 	int		res = Z_OK;
372 	int		err;
373 	zfs_handle_t	*zhp;
374 	zfs_handle_t	*clone;
375 	nvlist_t	*props = NULL;
376 
377 	if ((zhp = zfs_open(g_zfs, snapshot_name, ZFS_TYPE_SNAPSHOT)) == NULL)
378 		return (Z_NO_ENTRY);
379 
380 	(void) printf(gettext("Cloning snapshot %s\n"), snapshot_name);
381 
382 	/*
383 	 * We turn off zfs SHARENFS and SHARESMB properties on the
384 	 * zoneroot dataset in order to prevent the GZ from sharing
385 	 * NGZ data by accident.
386 	 */
387 	if ((nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) ||
388 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARENFS),
389 	    "off") != 0) ||
390 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARESMB),
391 	    "off") != 0)) {
392 		nvlist_free(props);
393 		(void) fprintf(stderr, gettext("could not create ZFS clone "
394 		    "%s: out of memory\n"), zonepath);
395 		return (Z_ERR);
396 	}
397 
398 	err = zfs_clone(zhp, zonepath, props);
399 	zfs_close(zhp);
400 
401 	nvlist_free(props);
402 
403 	if (err != 0)
404 		return (Z_ERR);
405 
406 	/* create the mountpoint if necessary */
407 	if ((clone = zfs_open(g_zfs, zonepath, ZFS_TYPE_DATASET)) == NULL)
408 		return (Z_ERR);
409 
410 	/*
411 	 * The clone has been created so we need to print a diagnostic
412 	 * message if one of the following steps fails for some reason.
413 	 */
414 	if (zfs_mount(clone, NULL, 0) != 0) {
415 		(void) fprintf(stderr, gettext("could not mount ZFS clone "
416 		    "%s\n"), zfs_get_name(clone));
417 		res = Z_ERR;
418 
419 	} else if (clean_out_clone() != Z_OK) {
420 		(void) fprintf(stderr, gettext("could not remove the "
421 		    "software inventory from ZFS clone %s\n"),
422 		    zfs_get_name(clone));
423 		res = Z_ERR;
424 	}
425 
426 	zfs_close(clone);
427 	return (res);
428 }
429 
430 /*
431  * This function takes a zonepath and attempts to determine what the ZFS
432  * file system name (not mountpoint) should be for that path.  We do not
433  * assume that zonepath is an existing directory or ZFS fs since we use
434  * this function as part of the process of creating a new ZFS fs or clone.
435  *
436  * The way this works is that we look at the parent directory of the zonepath
437  * to see if it is a ZFS fs.  If it is, we get the name of that ZFS fs and
438  * append the last component of the zonepath to generate the ZFS name for the
439  * zonepath.  This matches the algorithm that ZFS uses for automatically
440  * mounting a new fs after it is created.
441  *
442  * Although a ZFS fs can be mounted anywhere, we don't worry about handling
443  * all of the complexity that a user could possibly configure with arbitrary
444  * mounts since there is no way to generate a ZFS name from a random path in
445  * the file system.  We only try to handle the automatic mounts that ZFS does
446  * for each file system.  ZFS restricts this so that a new fs must be created
447  * in an existing parent ZFS fs.  It then automatically mounts the new fs
448  * directly under the mountpoint for the parent fs using the last component
449  * of the name as the mountpoint directory.
450  *
451  * For example:
452  *    Name			Mountpoint
453  *    space/eng/dev/test/zone1	/project1/eng/dev/test/zone1
454  *
455  * Return Z_OK if the path mapped to a ZFS file system name, otherwise return
456  * Z_ERR.
457  */
458 static int
459 path2name(char *zonepath, char *zfs_name, int len)
460 {
461 	int		res;
462 	char		*bnm, *dnm, *dname, *bname;
463 	zfs_handle_t	*zhp;
464 	struct stat	stbuf;
465 
466 	/*
467 	 * We need two tmp strings to handle paths directly in / (e.g. /foo)
468 	 * since dirname will overwrite the first char after "/" in this case.
469 	 */
470 	if ((bnm = strdup(zonepath)) == NULL)
471 		return (Z_ERR);
472 
473 	if ((dnm = strdup(zonepath)) == NULL) {
474 		free(bnm);
475 		return (Z_ERR);
476 	}
477 
478 	bname = basename(bnm);
479 	dname = dirname(dnm);
480 
481 	/*
482 	 * This is a quick test to save iterating over all of the zfs datasets
483 	 * on the system (which can be a lot).  If the parent dir is not in a
484 	 * ZFS fs, then we're done.
485 	 */
486 	if (stat(dname, &stbuf) != 0 || !S_ISDIR(stbuf.st_mode) ||
487 	    strcmp(stbuf.st_fstype, MNTTYPE_ZFS) != 0) {
488 		free(bnm);
489 		free(dnm);
490 		return (Z_ERR);
491 	}
492 
493 	/* See if the parent directory is its own ZFS dataset. */
494 	if ((zhp = mount2zhandle(dname)) == NULL) {
495 		/*
496 		 * The parent is not a ZFS dataset so we can't automatically
497 		 * create a dataset on the given path.
498 		 */
499 		free(bnm);
500 		free(dnm);
501 		return (Z_ERR);
502 	}
503 
504 	res = snprintf(zfs_name, len, "%s/%s", zfs_get_name(zhp), bname);
505 
506 	free(bnm);
507 	free(dnm);
508 	zfs_close(zhp);
509 	if (res >= len)
510 		return (Z_ERR);
511 
512 	return (Z_OK);
513 }
514 
515 /*
516  * A ZFS file system iterator call-back function used to determine if the
517  * file system has dependents (snapshots & clones).
518  */
519 /* ARGSUSED */
520 static int
521 has_dependent(zfs_handle_t *zhp, void *data)
522 {
523 	zfs_close(zhp);
524 	return (1);
525 }
526 
527 /*
528  * Given a snapshot name, get the file system path where the snapshot lives.
529  * A snapshot name is of the form fs_name@snap_name.  For example, snapshot
530  * pl/zones/z1@SUNWzone1 would have a path of
531  * /pl/zones/z1/.zfs/snapshot/SUNWzone1.
532  */
533 static int
534 snap2path(char *snap_name, char *path, int len)
535 {
536 	char		*p;
537 	zfs_handle_t	*zhp;
538 	char		mp[ZFS_MAXPROPLEN];
539 
540 	if ((p = strrchr(snap_name, '@')) == NULL)
541 		return (Z_ERR);
542 
543 	/* Get the file system name from the snap_name. */
544 	*p = '\0';
545 	zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_DATASET);
546 	*p = '@';
547 	if (zhp == NULL)
548 		return (Z_ERR);
549 
550 	/* Get the file system mount point. */
551 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
552 	    0, B_FALSE) != 0) {
553 		zfs_close(zhp);
554 		return (Z_ERR);
555 	}
556 	zfs_close(zhp);
557 
558 	p++;
559 	if (snprintf(path, len, "%s/.zfs/snapshot/%s", mp, p) >= len)
560 		return (Z_ERR);
561 
562 	return (Z_OK);
563 }
564 
565 /*
566  * This callback function is used to iterate through a snapshot's dependencies
567  * to find a filesystem that is a direct clone of the snapshot being iterated.
568  */
569 static int
570 get_direct_clone(zfs_handle_t *zhp, void *data)
571 {
572 	clone_data_t	*cd = data;
573 	char		origin[ZFS_MAX_DATASET_NAME_LEN];
574 	char		ds_path[ZFS_MAX_DATASET_NAME_LEN];
575 
576 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
577 		zfs_close(zhp);
578 		return (0);
579 	}
580 
581 	(void) strlcpy(ds_path, zfs_get_name(zhp), sizeof (ds_path));
582 
583 	/* Make sure this is a direct clone of the snapshot we're iterating. */
584 	if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
585 	    NULL, 0, B_FALSE) != 0 || strcmp(origin, cd->snapshot) != 0) {
586 		zfs_close(zhp);
587 		return (0);
588 	}
589 
590 	if (cd->clone_zhp != NULL)
591 		zfs_close(cd->clone_zhp);
592 
593 	cd->clone_zhp = zhp;
594 	return (1);
595 }
596 
597 /*
598  * A ZFS file system iterator call-back function used to determine the clone
599  * to promote.  This function finds the youngest (i.e. last one taken) snapshot
600  * that has a clone.  If found, it returns a reference to that clone in the
601  * callback data.
602  */
603 static int
604 find_clone(zfs_handle_t *zhp, void *data)
605 {
606 	clone_data_t	*cd = data;
607 	time_t		snap_creation;
608 	int		zret = 0;
609 
610 	/* If snapshot has no clones, skip it */
611 	if (zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES) == 0) {
612 		zfs_close(zhp);
613 		return (0);
614 	}
615 
616 	cd->snapshot = zfs_get_name(zhp);
617 
618 	/* Get the creation time of this snapshot */
619 	snap_creation = (time_t)zfs_prop_get_int(zhp, ZFS_PROP_CREATION);
620 
621 	/*
622 	 * If this snapshot's creation time is greater than (i.e. younger than)
623 	 * the current youngest snapshot found, iterate this snapshot to
624 	 * get the right clone.
625 	 */
626 	if (snap_creation >= cd->origin_creation) {
627 		/*
628 		 * Iterate the dependents of this snapshot to find a clone
629 		 * that's a direct dependent.
630 		 */
631 		if ((zret = zfs_iter_dependents(zhp, B_FALSE, get_direct_clone,
632 		    cd)) == -1) {
633 			zfs_close(zhp);
634 			return (1);
635 		} else if (zret == 1) {
636 			/*
637 			 * Found a clone, update the origin_creation time
638 			 * in the callback data.
639 			 */
640 			cd->origin_creation = snap_creation;
641 		}
642 	}
643 
644 	zfs_close(zhp);
645 	return (0);
646 }
647 
648 /*
649  * A ZFS file system iterator call-back function used to remove standalone
650  * snapshots.
651  */
652 /* ARGSUSED */
653 static int
654 rm_snap(zfs_handle_t *zhp, void *data)
655 {
656 	/* If snapshot has clones, something is wrong */
657 	if (zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES) != 0) {
658 		zfs_close(zhp);
659 		return (1);
660 	}
661 
662 	if (zfs_unmount(zhp, NULL, 0) == 0) {
663 		(void) zfs_destroy(zhp, B_FALSE);
664 	}
665 
666 	zfs_close(zhp);
667 	return (0);
668 }
669 
670 /*
671  * A ZFS snapshot iterator call-back function which renames snapshots.
672  */
673 static int
674 rename_snap(zfs_handle_t *zhp, void *data)
675 {
676 	int			res;
677 	zfs_snapshot_data_t	*cbp;
678 	char			template[ZFS_MAX_DATASET_NAME_LEN];
679 
680 	cbp = (zfs_snapshot_data_t *)data;
681 
682 	/*
683 	 * When renaming snapshots with the iterator, the iterator can see
684 	 * the same snapshot after we've renamed up in the namespace.  To
685 	 * prevent this we check the count for the number of snapshots we have
686 	 * to rename and stop at that point.
687 	 */
688 	if (cbp->cntr >= cbp->num) {
689 		zfs_close(zhp);
690 		return (0);
691 	}
692 
693 	if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) {
694 		zfs_close(zhp);
695 		return (0);
696 	}
697 
698 	/* Only rename the snapshots we automatically generate when we clone. */
699 	if (strncmp(zfs_get_name(zhp), cbp->match_name, cbp->len) != 0) {
700 		zfs_close(zhp);
701 		return (0);
702 	}
703 
704 	(void) snprintf(template, sizeof (template), "%s%d", cbp->match_name,
705 	    cbp->max++);
706 
707 	res = (zfs_rename(zhp, template, B_FALSE, B_FALSE) != 0);
708 	if (res != 0)
709 		(void) fprintf(stderr, gettext("failed to rename snapshot %s "
710 		    "to %s: %s\n"), zfs_get_name(zhp), template,
711 		    libzfs_error_description(g_zfs));
712 
713 	cbp->cntr++;
714 
715 	zfs_close(zhp);
716 	return (res);
717 }
718 
719 /*
720  * Rename the source dataset's snapshots that are automatically generated when
721  * we clone a zone so that there won't be a name collision when we promote the
722  * cloned dataset.  Once the snapshots have been renamed, then promote the
723  * clone.
724  *
725  * The snapshot rename process gets the highest number on the snapshot names
726  * (the format is zonename@SUNWzoneXX where XX are digits) on both the source
727  * and clone datasets, then renames the source dataset snapshots starting at
728  * the next number.
729  */
730 static int
731 promote_clone(zfs_handle_t *src_zhp, zfs_handle_t *cln_zhp)
732 {
733 	zfs_snapshot_data_t	sd;
734 	char			nm[ZFS_MAX_DATASET_NAME_LEN];
735 	char			template[ZFS_MAX_DATASET_NAME_LEN];
736 
737 	(void) strlcpy(nm, zfs_get_name(cln_zhp), sizeof (nm));
738 	/*
739 	 * Start by getting the clone's snapshot max which we use
740 	 * during the rename of the original dataset's snapshots.
741 	 */
742 	(void) snprintf(template, sizeof (template), "%s@SUNWzone", nm);
743 	sd.match_name = template;
744 	sd.len = strlen(template);
745 	sd.max = 0;
746 
747 	if (zfs_iter_snapshots(cln_zhp, B_FALSE, get_snap_max, &sd) != 0)
748 		return (Z_ERR);
749 
750 	/*
751 	 * Now make sure the source's snapshot max is at least as high as
752 	 * the clone's snapshot max.
753 	 */
754 	(void) snprintf(template, sizeof (template), "%s@SUNWzone",
755 	    zfs_get_name(src_zhp));
756 	sd.match_name = template;
757 	sd.len = strlen(template);
758 	sd.num = 0;
759 
760 	if (zfs_iter_snapshots(src_zhp, B_FALSE, get_snap_max, &sd) != 0)
761 		return (Z_ERR);
762 
763 	/*
764 	 * Now rename the source dataset's snapshots so there's no
765 	 * conflict when we promote the clone.
766 	 */
767 	sd.max++;
768 	sd.cntr = 0;
769 	if (zfs_iter_snapshots(src_zhp, B_FALSE, rename_snap, &sd) != 0)
770 		return (Z_ERR);
771 
772 	/* close and reopen the clone dataset to get the latest info */
773 	zfs_close(cln_zhp);
774 	if ((cln_zhp = zfs_open(g_zfs, nm, ZFS_TYPE_FILESYSTEM)) == NULL)
775 		return (Z_ERR);
776 
777 	if (zfs_promote(cln_zhp) != 0) {
778 		(void) fprintf(stderr, gettext("failed to promote %s: %s\n"),
779 		    nm, libzfs_error_description(g_zfs));
780 		return (Z_ERR);
781 	}
782 
783 	zfs_close(cln_zhp);
784 	return (Z_OK);
785 }
786 
787 /*
788  * Promote the youngest clone.  That clone will then become the origin of all
789  * of the other clones that were hanging off of the source dataset.
790  */
791 int
792 promote_all_clones(zfs_handle_t *zhp)
793 {
794 	clone_data_t	cd;
795 	char		nm[ZFS_MAX_DATASET_NAME_LEN];
796 
797 	cd.clone_zhp = NULL;
798 	cd.origin_creation = 0;
799 	cd.snapshot = NULL;
800 
801 	if (zfs_iter_snapshots(zhp, B_FALSE, find_clone, &cd) != 0) {
802 		zfs_close(zhp);
803 		return (Z_ERR);
804 	}
805 
806 	/* Nothing to promote. */
807 	if (cd.clone_zhp == NULL)
808 		return (Z_OK);
809 
810 	/* Found the youngest clone to promote.  Promote it. */
811 	if (promote_clone(zhp, cd.clone_zhp) != 0) {
812 		zfs_close(cd.clone_zhp);
813 		zfs_close(zhp);
814 		return (Z_ERR);
815 	}
816 
817 	/* close and reopen the main dataset to get the latest info */
818 	(void) strlcpy(nm, zfs_get_name(zhp), sizeof (nm));
819 	zfs_close(zhp);
820 	if ((zhp = zfs_open(g_zfs, nm, ZFS_TYPE_FILESYSTEM)) == NULL)
821 		return (Z_ERR);
822 
823 	return (Z_OK);
824 }
825 
826 /*
827  * Clone a pre-existing ZFS snapshot, either by making a direct ZFS clone, if
828  * possible, or by copying the data from the snapshot to the zonepath.
829  */
830 int
831 clone_snapshot_zfs(char *snap_name, char *zonepath, char *validatesnap)
832 {
833 	int	err = Z_OK;
834 	char	clone_name[MAXPATHLEN];
835 	char	snap_path[MAXPATHLEN];
836 
837 	if (snap2path(snap_name, snap_path, sizeof (snap_path)) != Z_OK) {
838 		(void) fprintf(stderr, gettext("unable to find path for %s.\n"),
839 		    snap_name);
840 		return (Z_ERR);
841 	}
842 
843 	if (validate_snapshot(snap_name, snap_path, validatesnap) != Z_OK)
844 		return (Z_NO_ENTRY);
845 
846 	/*
847 	 * The zonepath cannot be ZFS cloned, try to copy the data from
848 	 * within the snapshot to the zonepath.
849 	 */
850 	if (path2name(zonepath, clone_name, sizeof (clone_name)) != Z_OK) {
851 		if ((err = clone_copy(snap_path, zonepath)) == Z_OK)
852 			if (clean_out_clone() != Z_OK)
853 				(void) fprintf(stderr,
854 				    gettext("could not remove the "
855 				    "software inventory from %s\n"), zonepath);
856 
857 		return (err);
858 	}
859 
860 	if ((err = clone_snap(snap_name, clone_name)) != Z_OK) {
861 		if (err != Z_NO_ENTRY) {
862 			/*
863 			 * Cloning the snapshot failed.  Fall back to trying
864 			 * to install the zone by copying from the snapshot.
865 			 */
866 			if ((err = clone_copy(snap_path, zonepath)) == Z_OK)
867 				if (clean_out_clone() != Z_OK)
868 					(void) fprintf(stderr,
869 					    gettext("could not remove the "
870 					    "software inventory from %s\n"),
871 					    zonepath);
872 		} else {
873 			/*
874 			 * The snapshot is unusable for some reason so restore
875 			 * the zone state to configured since we were unable to
876 			 * actually do anything about getting the zone
877 			 * installed.
878 			 */
879 			int tmp;
880 
881 			if ((tmp = zone_set_state(target_zone,
882 			    ZONE_STATE_CONFIGURED)) != Z_OK) {
883 				errno = tmp;
884 				zperror2(target_zone,
885 				    gettext("could not set state"));
886 			}
887 		}
888 	}
889 
890 	return (err);
891 }
892 
893 /*
894  * Attempt to clone a source_zone to a target zonepath by using a ZFS clone.
895  */
896 int
897 clone_zfs(char *source_zonepath, char *zonepath, char *presnapbuf,
898     char *postsnapbuf)
899 {
900 	zfs_handle_t	*zhp;
901 	char		clone_name[MAXPATHLEN];
902 	char		snap_name[MAXPATHLEN];
903 
904 	/*
905 	 * Try to get a zfs handle for the source_zonepath.  If this fails
906 	 * the source_zonepath is not ZFS so return an error.
907 	 */
908 	if ((zhp = mount2zhandle(source_zonepath)) == NULL)
909 		return (Z_ERR);
910 
911 	/*
912 	 * Check if there is a file system already mounted on zonepath.  If so,
913 	 * we can't clone to the path so we should fall back to copying.
914 	 */
915 	if (is_mountpnt(zonepath)) {
916 		zfs_close(zhp);
917 		(void) fprintf(stderr,
918 		    gettext("A file system is already mounted on %s,\n"
919 		    "preventing use of a ZFS clone.\n"), zonepath);
920 		return (Z_ERR);
921 	}
922 
923 	/*
924 	 * Instead of using path2name to get the clone name from the zonepath,
925 	 * we could generate a name from the source zone ZFS name.  However,
926 	 * this would mean we would create the clone under the ZFS fs of the
927 	 * source instead of what the zonepath says.  For example,
928 	 *
929 	 * source_zonepath		zonepath
930 	 * /pl/zones/dev/z1		/pl/zones/deploy/z2
931 	 *
932 	 * We don't want the clone to be under "dev", we want it under
933 	 * "deploy", so that we can leverage the normal attribute inheritance
934 	 * that ZFS provides in the fs hierarchy.
935 	 */
936 	if (path2name(zonepath, clone_name, sizeof (clone_name)) != Z_OK) {
937 		zfs_close(zhp);
938 		return (Z_ERR);
939 	}
940 
941 	if (take_snapshot(zhp, snap_name, sizeof (snap_name), presnapbuf,
942 	    postsnapbuf) != Z_OK) {
943 		zfs_close(zhp);
944 		return (Z_ERR);
945 	}
946 	zfs_close(zhp);
947 
948 	if (clone_snap(snap_name, clone_name) != Z_OK) {
949 		/* Clean up the snapshot we just took. */
950 		if ((zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_SNAPSHOT))
951 		    != NULL) {
952 			if (zfs_unmount(zhp, NULL, 0) == 0)
953 				(void) zfs_destroy(zhp, B_FALSE);
954 			zfs_close(zhp);
955 		}
956 
957 		return (Z_ERR);
958 	}
959 
960 	(void) printf(gettext("Instead of copying, a ZFS clone has been "
961 	    "created for this zone.\n"));
962 
963 	return (Z_OK);
964 }
965 
966 /*
967  * Attempt to create a ZFS file system for the specified zonepath.
968  * We either will successfully create a ZFS file system and get it mounted
969  * on the zonepath or we don't.  The caller doesn't care since a regular
970  * directory is used for the zonepath if no ZFS file system is mounted there.
971  */
972 void
973 create_zfs_zonepath(char *zonepath)
974 {
975 	zfs_handle_t	*zhp;
976 	char		zfs_name[MAXPATHLEN];
977 	nvlist_t	*props = NULL;
978 
979 	if (path2name(zonepath, zfs_name, sizeof (zfs_name)) != Z_OK)
980 		return;
981 
982 	/* Check if the dataset already exists. */
983 	if ((zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_DATASET)) != NULL) {
984 		zfs_close(zhp);
985 		return;
986 	}
987 
988 	/*
989 	 * We turn off zfs SHARENFS and SHARESMB properties on the
990 	 * zoneroot dataset in order to prevent the GZ from sharing
991 	 * NGZ data by accident.
992 	 */
993 	if ((nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) ||
994 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARENFS),
995 	    "off") != 0) ||
996 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARESMB),
997 	    "off") != 0)) {
998 		nvlist_free(props);
999 		(void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
1000 		    "out of memory\n"), zfs_name);
1001 	}
1002 
1003 	if (zfs_create(g_zfs, zfs_name, ZFS_TYPE_FILESYSTEM, props) != 0 ||
1004 	    (zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_DATASET)) == NULL) {
1005 		(void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
1006 		    "%s\n"), zfs_name, libzfs_error_description(g_zfs));
1007 		nvlist_free(props);
1008 		return;
1009 	}
1010 
1011 	nvlist_free(props);
1012 
1013 	if (zfs_mount(zhp, NULL, 0) != 0) {
1014 		(void) fprintf(stderr, gettext("cannot mount ZFS dataset %s: "
1015 		    "%s\n"), zfs_name, libzfs_error_description(g_zfs));
1016 		(void) zfs_destroy(zhp, B_FALSE);
1017 	} else {
1018 		if (chmod(zonepath, S_IRWXU) != 0) {
1019 			(void) fprintf(stderr, gettext("file system %s "
1020 			    "successfully created, but chmod %o failed: %s\n"),
1021 			    zfs_name, S_IRWXU, strerror(errno));
1022 			(void) destroy_zfs(zonepath);
1023 		} else {
1024 			(void) printf(gettext("A ZFS file system has been "
1025 			    "created for this zone.\n"));
1026 		}
1027 	}
1028 
1029 	zfs_close(zhp);
1030 }
1031 
1032 /*
1033  * If the zonepath is a ZFS file system, attempt to destroy it.  We return Z_OK
1034  * if we were able to zfs_destroy the zonepath, otherwise we return Z_ERR
1035  * which means the caller should clean up the zonepath in the traditional
1036  * way.
1037  */
1038 int
1039 destroy_zfs(char *zonepath)
1040 {
1041 	zfs_handle_t	*zhp;
1042 	boolean_t	is_clone = B_FALSE;
1043 	char		origin[ZFS_MAXPROPLEN];
1044 
1045 	if ((zhp = mount2zhandle(zonepath)) == NULL)
1046 		return (Z_ERR);
1047 
1048 	if (promote_all_clones(zhp) != 0)
1049 		return (Z_ERR);
1050 
1051 	/* Now cleanup any snapshots remaining. */
1052 	if (zfs_iter_snapshots(zhp, B_FALSE, rm_snap, NULL) != 0) {
1053 		zfs_close(zhp);
1054 		return (Z_ERR);
1055 	}
1056 
1057 	/*
1058 	 * We can't destroy the file system if it has still has dependents.
1059 	 * There shouldn't be any at this point, but we'll double check.
1060 	 */
1061 	if (zfs_iter_dependents(zhp, B_TRUE, has_dependent, NULL) != 0) {
1062 		(void) fprintf(stderr, gettext("zfs destroy %s failed: the "
1063 		    "dataset still has dependents\n"), zfs_get_name(zhp));
1064 		zfs_close(zhp);
1065 		return (Z_ERR);
1066 	}
1067 
1068 	/*
1069 	 * This might be a clone.  Try to get the snapshot so we can attempt
1070 	 * to destroy that as well.
1071 	 */
1072 	if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
1073 	    NULL, 0, B_FALSE) == 0)
1074 		is_clone = B_TRUE;
1075 
1076 	if (zfs_unmount(zhp, NULL, 0) != 0) {
1077 		(void) fprintf(stderr, gettext("zfs unmount %s failed: %s\n"),
1078 		    zfs_get_name(zhp), libzfs_error_description(g_zfs));
1079 		zfs_close(zhp);
1080 		return (Z_ERR);
1081 	}
1082 
1083 	if (zfs_destroy(zhp, B_FALSE) != 0) {
1084 		/*
1085 		 * If the destroy fails for some reason, try to remount
1086 		 * the file system so that we can use "rm -rf" to clean up
1087 		 * instead.
1088 		 */
1089 		(void) fprintf(stderr, gettext("zfs destroy %s failed: %s\n"),
1090 		    zfs_get_name(zhp), libzfs_error_description(g_zfs));
1091 		(void) zfs_mount(zhp, NULL, 0);
1092 		zfs_close(zhp);
1093 		return (Z_ERR);
1094 	}
1095 
1096 	/*
1097 	 * If the zone has ever been moved then the mountpoint dir will not be
1098 	 * cleaned up by the zfs_destroy().  To handle this case try to clean
1099 	 * it up now but don't worry if it fails, that will be normal.
1100 	 */
1101 	(void) rmdir(zonepath);
1102 
1103 	(void) printf(gettext("The ZFS file system for this zone has been "
1104 	    "destroyed.\n"));
1105 
1106 	if (is_clone) {
1107 		zfs_handle_t	*ohp;
1108 
1109 		/*
1110 		 * Try to clean up the snapshot that the clone was taken from.
1111 		 */
1112 		if ((ohp = zfs_open(g_zfs, origin,
1113 		    ZFS_TYPE_SNAPSHOT)) != NULL) {
1114 			if (zfs_iter_dependents(ohp, B_TRUE, has_dependent,
1115 			    NULL) == 0 && zfs_unmount(ohp, NULL, 0) == 0)
1116 				(void) zfs_destroy(ohp, B_FALSE);
1117 			zfs_close(ohp);
1118 		}
1119 	}
1120 
1121 	zfs_close(zhp);
1122 	return (Z_OK);
1123 }
1124 
1125 /*
1126  * Return true if the path is its own zfs file system.  We determine this
1127  * by stat-ing the path to see if it is zfs and stat-ing the parent to see
1128  * if it is a different fs.
1129  */
1130 boolean_t
1131 is_zonepath_zfs(char *zonepath)
1132 {
1133 	int res;
1134 	char *path;
1135 	char *parent;
1136 	struct statvfs64 buf1, buf2;
1137 
1138 	if (statvfs64(zonepath, &buf1) != 0)
1139 		return (B_FALSE);
1140 
1141 	if (strcmp(buf1.f_basetype, "zfs") != 0)
1142 		return (B_FALSE);
1143 
1144 	if ((path = strdup(zonepath)) == NULL)
1145 		return (B_FALSE);
1146 
1147 	parent = dirname(path);
1148 	res = statvfs64(parent, &buf2);
1149 	free(path);
1150 
1151 	if (res != 0)
1152 		return (B_FALSE);
1153 
1154 	if (buf1.f_fsid == buf2.f_fsid)
1155 		return (B_FALSE);
1156 
1157 	return (B_TRUE);
1158 }
1159 
1160 /*
1161  * Implement the fast move of a ZFS file system by simply updating the
1162  * mountpoint.  Since it is file system already, we don't have the
1163  * issue of cross-file system copying.
1164  */
1165 int
1166 move_zfs(char *zonepath, char *new_zonepath)
1167 {
1168 	int		ret = Z_ERR;
1169 	zfs_handle_t	*zhp;
1170 
1171 	if ((zhp = mount2zhandle(zonepath)) == NULL)
1172 		return (Z_ERR);
1173 
1174 	if (zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1175 	    new_zonepath) == 0) {
1176 		/*
1177 		 * Clean up the old mount point.  We ignore any failure since
1178 		 * the zone is already successfully mounted on the new path.
1179 		 */
1180 		(void) rmdir(zonepath);
1181 		ret = Z_OK;
1182 	}
1183 
1184 	zfs_close(zhp);
1185 
1186 	return (ret);
1187 }
1188 
1189 /*
1190  * Validate that the given dataset exists on the system, and that neither it nor
1191  * its children are zvols.
1192  *
1193  * Note that we don't do anything with the 'zoned' property here.  All
1194  * management is done in zoneadmd when the zone is actually rebooted.  This
1195  * allows us to automatically set the zoned property even when a zone is
1196  * rebooted by the administrator.
1197  */
1198 int
1199 verify_datasets(zone_dochandle_t handle)
1200 {
1201 	int return_code = Z_OK;
1202 	struct zone_dstab dstab;
1203 	zfs_handle_t *zhp;
1204 	char propbuf[ZFS_MAXPROPLEN];
1205 	char source[ZFS_MAX_DATASET_NAME_LEN];
1206 	zprop_source_t srctype;
1207 
1208 	if (zonecfg_setdsent(handle) != Z_OK) {
1209 		/*
1210 		 * TRANSLATION_NOTE
1211 		 * zfs and dataset are literals that should not be translated.
1212 		 */
1213 		(void) fprintf(stderr, gettext("could not verify zfs datasets: "
1214 		    "unable to enumerate datasets\n"));
1215 		return (Z_ERR);
1216 	}
1217 
1218 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
1219 
1220 		if ((zhp = zfs_open(g_zfs, dstab.zone_dataset_name,
1221 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
1222 			(void) fprintf(stderr, gettext("could not verify zfs "
1223 			    "dataset %s: %s\n"), dstab.zone_dataset_name,
1224 			    libzfs_error_description(g_zfs));
1225 			return_code = Z_ERR;
1226 			continue;
1227 		}
1228 
1229 		if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf,
1230 		    sizeof (propbuf), &srctype, source,
1231 		    sizeof (source), 0) == 0 &&
1232 		    (srctype == ZPROP_SRC_INHERITED)) {
1233 			(void) fprintf(stderr, gettext("could not verify zfs "
1234 			    "dataset %s: mountpoint cannot be inherited\n"),
1235 			    dstab.zone_dataset_name);
1236 			return_code = Z_ERR;
1237 			zfs_close(zhp);
1238 			continue;
1239 		}
1240 
1241 		zfs_close(zhp);
1242 	}
1243 	(void) zonecfg_enddsent(handle);
1244 
1245 	return (return_code);
1246 }
1247 
1248 /*
1249  * Verify that the ZFS dataset exists, and its mountpoint
1250  * property is set to "legacy".
1251  */
1252 int
1253 verify_fs_zfs(struct zone_fstab *fstab)
1254 {
1255 	zfs_handle_t *zhp;
1256 	char propbuf[ZFS_MAXPROPLEN];
1257 
1258 	if ((zhp = zfs_open(g_zfs, fstab->zone_fs_special,
1259 	    ZFS_TYPE_DATASET)) == NULL) {
1260 		(void) fprintf(stderr, gettext("could not verify fs %s: "
1261 		    "could not access zfs dataset '%s'\n"),
1262 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1263 		return (Z_ERR);
1264 	}
1265 
1266 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
1267 		(void) fprintf(stderr, gettext("cannot verify fs %s: "
1268 		    "'%s' is not a file system\n"),
1269 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1270 		zfs_close(zhp);
1271 		return (Z_ERR);
1272 	}
1273 
1274 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf, sizeof (propbuf),
1275 	    NULL, NULL, 0, 0) != 0 || strcmp(propbuf, "legacy") != 0) {
1276 		(void) fprintf(stderr, gettext("could not verify fs %s: "
1277 		    "zfs '%s' mountpoint is not \"legacy\"\n"),
1278 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1279 		zfs_close(zhp);
1280 		return (Z_ERR);
1281 	}
1282 
1283 	zfs_close(zhp);
1284 	return (Z_OK);
1285 }
1286 
1287 /*
1288  * Destroy the specified mnttab structure that was created by mnttab_dup().
1289  * NOTE: The structure's mnt_time field isn't freed.
1290  */
1291 static void
1292 mnttab_destroy(struct mnttab *tabp)
1293 {
1294 	assert(tabp != NULL);
1295 
1296 	free(tabp->mnt_mountp);
1297 	free(tabp->mnt_special);
1298 	free(tabp->mnt_fstype);
1299 	free(tabp->mnt_mntopts);
1300 	free(tabp);
1301 }
1302 
1303 /*
1304  * Duplicate the specified mnttab structure.  The mnt_mountp and mnt_time
1305  * fields aren't duplicated.  This function returns a pointer to the new mnttab
1306  * structure or NULL if an error occurred.  If an error occurs, then this
1307  * function sets errno to reflect the error.  mnttab structures created by
1308  * this function should be destroyed via mnttab_destroy().
1309  */
1310 static struct mnttab *
1311 mnttab_dup(const struct mnttab *srcp)
1312 {
1313 	struct mnttab *retval;
1314 
1315 	assert(srcp != NULL);
1316 
1317 	retval = (struct mnttab *)calloc(1, sizeof (*retval));
1318 	if (retval == NULL) {
1319 		errno = ENOMEM;
1320 		return (NULL);
1321 	}
1322 	if (srcp->mnt_special != NULL) {
1323 		retval->mnt_special = strdup(srcp->mnt_special);
1324 		if (retval->mnt_special == NULL)
1325 			goto err;
1326 	}
1327 	if (srcp->mnt_fstype != NULL) {
1328 		retval->mnt_fstype = strdup(srcp->mnt_fstype);
1329 		if (retval->mnt_fstype == NULL)
1330 			goto err;
1331 	}
1332 	retval->mnt_mntopts = (char *)malloc(MAX_MNTOPT_STR * sizeof (char));
1333 	if (retval->mnt_mntopts == NULL)
1334 		goto err;
1335 	if (srcp->mnt_mntopts != NULL) {
1336 		if (strlcpy(retval->mnt_mntopts, srcp->mnt_mntopts,
1337 		    MAX_MNTOPT_STR * sizeof (char)) >= MAX_MNTOPT_STR *
1338 		    sizeof (char)) {
1339 			mnttab_destroy(retval);
1340 			errno = EOVERFLOW; /* similar to mount(2) behavior */
1341 			return (NULL);
1342 		}
1343 	} else {
1344 		retval->mnt_mntopts[0] = '\0';
1345 	}
1346 	return (retval);
1347 
1348 err:
1349 	mnttab_destroy(retval);
1350 	errno = ENOMEM;
1351 	return (NULL);
1352 }
1353 
1354 /*
1355  * Determine whether the specified ZFS dataset's mountpoint property is set
1356  * to "legacy".  If the specified dataset does not have a legacy mountpoint,
1357  * then the string pointer to which the mountpoint argument points is assigned
1358  * a dynamically-allocated string containing the dataset's mountpoint
1359  * property.  If the dataset's mountpoint property is "legacy" or a libzfs
1360  * error occurs, then the string pointer to which the mountpoint argument
1361  * points isn't modified.
1362  *
1363  * This function returns B_TRUE if it doesn't encounter any fatal errors.
1364  * It returns B_FALSE if it encounters a fatal error and sets errno to the
1365  * appropriate error code.
1366  */
1367 static boolean_t
1368 get_zfs_non_legacy_mountpoint(const char *dataset_name, char **mountpoint)
1369 {
1370 	zfs_handle_t *zhp;
1371 	char propbuf[ZFS_MAXPROPLEN];
1372 
1373 	assert(dataset_name != NULL);
1374 	assert(mountpoint != NULL);
1375 
1376 	if ((zhp = zfs_open(g_zfs, dataset_name, ZFS_TYPE_DATASET)) == NULL) {
1377 		errno = EINVAL;
1378 		return (B_FALSE);
1379 	}
1380 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf, sizeof (propbuf),
1381 	    NULL, NULL, 0, 0) != 0) {
1382 		zfs_close(zhp);
1383 		errno = EINVAL;
1384 		return (B_FALSE);
1385 	}
1386 	zfs_close(zhp);
1387 	if (strcmp(propbuf, "legacy") != 0) {
1388 		if ((*mountpoint = strdup(propbuf)) == NULL) {
1389 			errno = ENOMEM;
1390 			return (B_FALSE);
1391 		}
1392 	}
1393 	return (B_TRUE);
1394 }
1395 
1396 
1397 /*
1398  * This zonecfg_find_mounts() callback records information about mounts of
1399  * interest in a zonepath.  It also tallies the number of zone
1400  * root overlay mounts and the number of unexpected mounts found.
1401  * This function outputs errors using zerror() if it finds unexpected
1402  * mounts.  cookiep should point to an initialized zone_mounts_t structure.
1403  *
1404  * This function returns zero on success and a nonzero value on failure.
1405  */
1406 static int
1407 zone_mounts_cb(const struct mnttab *mountp, void *cookiep)
1408 {
1409 	zone_mounts_t *mounts;
1410 	const char *zone_mount_dir;
1411 
1412 	assert(mountp != NULL);
1413 	assert(cookiep != NULL);
1414 
1415 	mounts = (zone_mounts_t *)cookiep;
1416 	zone_mount_dir = mountp->mnt_mountp + mounts->zonepath_len;
1417 	if (strcmp(zone_mount_dir, "/root") == 0) {
1418 		/*
1419 		 * Check for an overlay mount.  If we already detected a /root
1420 		 * mount, then the current mount must be an overlay mount.
1421 		 */
1422 		if (mounts->root_mnttab != NULL) {
1423 			mounts->num_root_overlay_mounts++;
1424 			return (0);
1425 		}
1426 
1427 		/*
1428 		 * Store the root mount's mnttab information in the
1429 		 * zone_mounts_t structure for future use.
1430 		 */
1431 		if ((mounts->root_mnttab = mnttab_dup(mountp)) == NULL) {
1432 			zperror(cmd_to_str(CMD_MOVE), B_FALSE);
1433 			return (-1);
1434 		}
1435 
1436 		/*
1437 		 * Determine if the filesystem is a ZFS filesystem with a
1438 		 * non-legacy mountpoint.  If it is, then set the root
1439 		 * filesystem's mnttab's mnt_mountp field to a non-NULL
1440 		 * value, which will serve as a flag to indicate this special
1441 		 * condition.
1442 		 */
1443 		if (strcmp(mountp->mnt_fstype, MNTTYPE_ZFS) == 0 &&
1444 		    get_zfs_non_legacy_mountpoint(mountp->mnt_special,
1445 		    &mounts->root_mnttab->mnt_mountp) != B_TRUE) {
1446 			zperror(cmd_to_str(CMD_MOVE), B_FALSE);
1447 			return (-1);
1448 		}
1449 	} else {
1450 		/*
1451 		 * An unexpected mount was found.  Notify the user.
1452 		 */
1453 		if (mounts->num_unexpected_mounts == 0)
1454 			zerror(gettext("These file systems are mounted on "
1455 			    "subdirectories of %s.\n"), mounts->zonepath);
1456 		mounts->num_unexpected_mounts++;
1457 		(void) zfm_print(mountp, NULL);
1458 	}
1459 	return (0);
1460 }
1461 
1462 /*
1463  * Initialize the specified zone_mounts_t structure for the given zonepath.
1464  * If this function succeeds, it returns zero and the specified zone_mounts_t
1465  * structure contains information about mounts in the specified zonepath.
1466  * The function returns a nonzero value if it fails.  The zone_mounts_t
1467  * structure doesn't need be destroyed via zone_mounts_destroy() if this
1468  * function fails.
1469  */
1470 int
1471 zone_mounts_init(zone_mounts_t *mounts, const char *zonepath)
1472 {
1473 	assert(mounts != NULL);
1474 	assert(zonepath != NULL);
1475 
1476 	bzero(mounts, sizeof (*mounts));
1477 	if ((mounts->zonepath = strdup(zonepath)) == NULL) {
1478 		zerror(gettext("the process ran out of memory while checking "
1479 		    "for mounts in zonepath %s."), zonepath);
1480 		return (-1);
1481 	}
1482 	mounts->zonepath_len = strlen(zonepath);
1483 	if (zonecfg_find_mounts((char *)zonepath, zone_mounts_cb, mounts) ==
1484 	    -1) {
1485 		zerror(gettext("an error occurred while checking for mounts "
1486 		    "in zonepath %s."), zonepath);
1487 		zone_mounts_destroy(mounts);
1488 		return (-1);
1489 	}
1490 	return (0);
1491 }
1492 
1493 /*
1494  * Destroy the memory used by the specified zone_mounts_t structure's fields.
1495  * This function doesn't free the memory occupied by the structure itself
1496  * (i.e., it doesn't free the parameter).
1497  */
1498 void
1499 zone_mounts_destroy(zone_mounts_t *mounts)
1500 {
1501 	assert(mounts != NULL);
1502 
1503 	free(mounts->zonepath);
1504 	if (mounts->root_mnttab != NULL)
1505 		mnttab_destroy(mounts->root_mnttab);
1506 }
1507 
1508 /*
1509  * Mount a moving zone's root filesystem (if it had a root filesystem mount
1510  * prior to the move) using the specified zonepath.  mounts should refer to
1511  * the zone_mounts_t structure describing the zone's mount information.
1512  *
1513  * This function returns zero if the mount succeeds and a nonzero value
1514  * if it doesn't.
1515  */
1516 int
1517 zone_mount_rootfs(zone_mounts_t *mounts, const char *zonepath)
1518 {
1519 	char zoneroot[MAXPATHLEN];
1520 	struct mnttab *mtab;
1521 	int flags;
1522 
1523 	assert(mounts != NULL);
1524 	assert(zonepath != NULL);
1525 
1526 	/*
1527 	 * If there isn't a root filesystem, then don't do anything.
1528 	 */
1529 	mtab = mounts->root_mnttab;
1530 	if (mtab == NULL)
1531 		return (0);
1532 
1533 	/*
1534 	 * Determine the root filesystem's new mountpoint.
1535 	 */
1536 	if (snprintf(zoneroot, sizeof (zoneroot), "%s/root", zonepath) >=
1537 	    sizeof (zoneroot)) {
1538 		zerror(gettext("Zonepath %s is too long.\n"), zonepath);
1539 		return (-1);
1540 	}
1541 
1542 	/*
1543 	 * If the root filesystem is a non-legacy ZFS filesystem (i.e., if it's
1544 	 * mnt_mountp field is non-NULL), then make the filesystem's new
1545 	 * mount point its mountpoint property and mount the filesystem.
1546 	 */
1547 	if (mtab->mnt_mountp != NULL) {
1548 		zfs_handle_t *zhp;
1549 
1550 		if ((zhp = zfs_open(g_zfs, mtab->mnt_special,
1551 		    ZFS_TYPE_DATASET)) == NULL) {
1552 			zerror(gettext("could not get ZFS handle for the zone's"
1553 			    " root filesystem"));
1554 			return (-1);
1555 		}
1556 		if (zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1557 		    zoneroot) != 0) {
1558 			zerror(gettext("could not modify zone's root "
1559 			    "filesystem's mountpoint property"));
1560 			zfs_close(zhp);
1561 			return (-1);
1562 		}
1563 		if (zfs_mount(zhp, mtab->mnt_mntopts, 0) != 0) {
1564 			zerror(gettext("unable to mount zone root %s: %s"),
1565 			    zoneroot, libzfs_error_description(g_zfs));
1566 			if (zfs_prop_set(zhp,
1567 			    zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1568 			    mtab->mnt_mountp) != 0)
1569 				zerror(gettext("unable to restore zone's root "
1570 				    "filesystem's mountpoint property"));
1571 			zfs_close(zhp);
1572 			return (-1);
1573 		}
1574 		zfs_close(zhp);
1575 		return (0);
1576 	}
1577 
1578 	/*
1579 	 * The root filesystem is either a legacy-mounted ZFS filesystem or
1580 	 * a non-ZFS filesystem.  Use mount(2) to mount the root filesystem.
1581 	 */
1582 	if (mtab->mnt_mntopts != NULL)
1583 		flags = MS_OPTIONSTR;
1584 	else
1585 		flags = 0;
1586 	if (mount(mtab->mnt_special, zoneroot, flags, mtab->mnt_fstype, NULL, 0,
1587 	    mtab->mnt_mntopts, MAX_MNTOPT_STR * sizeof (char)) != 0) {
1588 		flags = errno;
1589 		zerror(gettext("unable to mount zone root %s: %s"), zoneroot,
1590 		    strerror(flags));
1591 		return (-1);
1592 	}
1593 	return (0);
1594 }
1595 
1596 /*
1597  * Unmount a moving zone's root filesystem (if such a mount exists) using the
1598  * specified zonepath.  mounts should refer to the zone_mounts_t structure
1599  * describing the zone's mount information.  If force is B_TRUE, then if the
1600  * unmount fails, then the function will try to forcibly unmount the zone's root
1601  * filesystem.
1602  *
1603  * This function returns zero if the unmount (forced or otherwise) succeeds;
1604  * otherwise, it returns a nonzero value.
1605  */
1606 int
1607 zone_unmount_rootfs(zone_mounts_t *mounts, const char *zonepath,
1608     boolean_t force)
1609 {
1610 	char zoneroot[MAXPATHLEN];
1611 	struct mnttab *mtab;
1612 	int err;
1613 
1614 	assert(mounts != NULL);
1615 	assert(zonepath != NULL);
1616 
1617 	/*
1618 	 * If there isn't a root filesystem, then don't do anything.
1619 	 */
1620 	mtab = mounts->root_mnttab;
1621 	if (mtab == NULL)
1622 		return (0);
1623 
1624 	/*
1625 	 * Determine the root filesystem's mountpoint.
1626 	 */
1627 	if (snprintf(zoneroot, sizeof (zoneroot), "%s/root", zonepath) >=
1628 	    sizeof (zoneroot)) {
1629 		zerror(gettext("Zonepath %s is too long.\n"), zonepath);
1630 		return (-1);
1631 	}
1632 
1633 	/*
1634 	 * If the root filesystem is a non-legacy ZFS fileystem, then unmount
1635 	 * the filesystem via libzfs.
1636 	 */
1637 	if (mtab->mnt_mountp != NULL) {
1638 		zfs_handle_t *zhp;
1639 
1640 		if ((zhp = zfs_open(g_zfs, mtab->mnt_special,
1641 		    ZFS_TYPE_DATASET)) == NULL) {
1642 			zerror(gettext("could not get ZFS handle for the zone's"
1643 			    " root filesystem"));
1644 			return (-1);
1645 		}
1646 		if (zfs_unmount(zhp, zoneroot, 0) != 0) {
1647 			if (force && zfs_unmount(zhp, zoneroot, MS_FORCE) ==
1648 			    0) {
1649 				zfs_close(zhp);
1650 				return (0);
1651 			}
1652 			zerror(gettext("unable to unmount zone root %s: %s"),
1653 			    zoneroot, libzfs_error_description(g_zfs));
1654 			zfs_close(zhp);
1655 			return (-1);
1656 		}
1657 		zfs_close(zhp);
1658 		return (0);
1659 	}
1660 
1661 	/*
1662 	 * Use umount(2) to unmount the root filesystem.  If this fails, then
1663 	 * forcibly unmount it if the force flag is set.
1664 	 */
1665 	if (umount(zoneroot) != 0) {
1666 		if (force && umount2(zoneroot, MS_FORCE) == 0)
1667 			return (0);
1668 		err = errno;
1669 		zerror(gettext("unable to unmount zone root %s: %s"), zoneroot,
1670 		    strerror(err));
1671 		return (-1);
1672 	}
1673 	return (0);
1674 }
1675 
1676 int
1677 init_zfs(void)
1678 {
1679 	if ((g_zfs = libzfs_init()) == NULL) {
1680 		(void) fprintf(stderr, gettext("failed to initialize ZFS "
1681 		    "library\n"));
1682 		return (Z_ERR);
1683 	}
1684 
1685 	return (Z_OK);
1686 }
1687