xref: /titanic_51/usr/src/cmd/zoneadm/zfs.c (revision 05ead181677a01a3a118f8b89ce79361113e34cf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * This file contains the functions used to support the ZFS integration
28  * with zones.  This includes validation (e.g. zonecfg dataset), cloning,
29  * file system creation and destruction.
30  */
31 
32 #include <stdio.h>
33 #include <errno.h>
34 #include <unistd.h>
35 #include <string.h>
36 #include <locale.h>
37 #include <libintl.h>
38 #include <sys/stat.h>
39 #include <sys/statvfs.h>
40 #include <libgen.h>
41 #include <libzonecfg.h>
42 #include <sys/mnttab.h>
43 #include <libzfs.h>
44 #include <sys/mntent.h>
45 #include <values.h>
46 #include <strings.h>
47 #include <assert.h>
48 
49 #include "zoneadm.h"
50 
51 libzfs_handle_t *g_zfs;
52 
53 typedef struct zfs_mount_data {
54 	char		*match_name;
55 	zfs_handle_t	*match_handle;
56 } zfs_mount_data_t;
57 
58 typedef struct zfs_snapshot_data {
59 	char	*match_name;	/* zonename@SUNWzone */
60 	int	len;		/* strlen of match_name */
61 	int	max;		/* highest digit appended to snap name */
62 	int	num;		/* number of snapshots to rename */
63 	int	cntr;		/* counter for renaming snapshots */
64 } zfs_snapshot_data_t;
65 
66 typedef struct clone_data {
67 	zfs_handle_t	*clone_zhp;	/* clone dataset to promote */
68 	time_t		origin_creation; /* snapshot creation time of clone */
69 	const char	*snapshot;	/* snapshot of dataset being demoted */
70 } clone_data_t;
71 
72 /*
73  * A ZFS file system iterator call-back function which is used to validate
74  * datasets imported into the zone.
75  */
76 /* ARGSUSED */
77 static int
78 check_zvol(zfs_handle_t *zhp, void *unused)
79 {
80 	int ret;
81 
82 	if (zfs_get_type(zhp) == ZFS_TYPE_VOLUME) {
83 		/*
84 		 * TRANSLATION_NOTE
85 		 * zfs and dataset are literals that should not be translated.
86 		 */
87 		(void) fprintf(stderr, gettext("cannot verify zfs dataset %s: "
88 		    "volumes cannot be specified as a zone dataset resource\n"),
89 		    zfs_get_name(zhp));
90 		ret = -1;
91 	} else {
92 		ret = zfs_iter_children(zhp, check_zvol, NULL);
93 	}
94 
95 	zfs_close(zhp);
96 
97 	return (ret);
98 }
99 
100 /*
101  * A ZFS file system iterator call-back function which returns the
102  * zfs_handle_t for a ZFS file system on the specified mount point.
103  */
104 static int
105 match_mountpoint(zfs_handle_t *zhp, void *data)
106 {
107 	int			res;
108 	zfs_mount_data_t	*cbp;
109 	char			mp[ZFS_MAXPROPLEN];
110 
111 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
112 		zfs_close(zhp);
113 		return (0);
114 	}
115 
116 	/* First check if the dataset is mounted. */
117 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTED, mp, sizeof (mp), NULL, NULL,
118 	    0, B_FALSE) != 0 || strcmp(mp, "no") == 0) {
119 		zfs_close(zhp);
120 		return (0);
121 	}
122 
123 	/* Now check mount point. */
124 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
125 	    0, B_FALSE) != 0) {
126 		zfs_close(zhp);
127 		return (0);
128 	}
129 
130 	cbp = (zfs_mount_data_t *)data;
131 
132 	if (strcmp(mp, "legacy") == 0) {
133 		/* If legacy, must look in mnttab for mountpoint. */
134 		FILE		*fp;
135 		struct mnttab	entry;
136 		const char	*nm;
137 
138 		nm = zfs_get_name(zhp);
139 		if ((fp = fopen(MNTTAB, "r")) == NULL) {
140 			zfs_close(zhp);
141 			return (0);
142 		}
143 
144 		while (getmntent(fp, &entry) == 0) {
145 			if (strcmp(nm, entry.mnt_special) == 0) {
146 				if (strcmp(entry.mnt_mountp, cbp->match_name)
147 				    == 0) {
148 					(void) fclose(fp);
149 					cbp->match_handle = zhp;
150 					return (1);
151 				}
152 				break;
153 			}
154 		}
155 		(void) fclose(fp);
156 
157 	} else if (strcmp(mp, cbp->match_name) == 0) {
158 		cbp->match_handle = zhp;
159 		return (1);
160 	}
161 
162 	/* Iterate over any nested datasets. */
163 	res = zfs_iter_filesystems(zhp, match_mountpoint, data);
164 	zfs_close(zhp);
165 	return (res);
166 }
167 
168 /*
169  * Get ZFS handle for the specified mount point.
170  */
171 static zfs_handle_t *
172 mount2zhandle(char *mountpoint)
173 {
174 	zfs_mount_data_t	cb;
175 
176 	cb.match_name = mountpoint;
177 	cb.match_handle = NULL;
178 	(void) zfs_iter_root(g_zfs, match_mountpoint, &cb);
179 	return (cb.match_handle);
180 }
181 
182 /*
183  * Check if there is already a file system (zfs or any other type) mounted on
184  * path.
185  */
186 static boolean_t
187 is_mountpnt(char *path)
188 {
189 	FILE		*fp;
190 	struct mnttab	entry;
191 
192 	if ((fp = fopen(MNTTAB, "r")) == NULL)
193 		return (B_FALSE);
194 
195 	while (getmntent(fp, &entry) == 0) {
196 		if (strcmp(path, entry.mnt_mountp) == 0) {
197 			(void) fclose(fp);
198 			return (B_TRUE);
199 		}
200 	}
201 
202 	(void) fclose(fp);
203 	return (B_FALSE);
204 }
205 
206 /*
207  * Run the brand's pre-snapshot hook before we take a ZFS snapshot of the zone.
208  */
209 static int
210 pre_snapshot(char *presnapbuf)
211 {
212 	int status;
213 
214 	/* No brand-specific handler */
215 	if (presnapbuf[0] == '\0')
216 		return (Z_OK);
217 
218 	/* Run the hook */
219 	status = do_subproc(presnapbuf);
220 	if ((status = subproc_status(gettext("brand-specific presnapshot"),
221 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
222 		return (Z_ERR);
223 
224 	return (Z_OK);
225 }
226 
227 /*
228  * Run the brand's post-snapshot hook after we take a ZFS snapshot of the zone.
229  */
230 static int
231 post_snapshot(char *postsnapbuf)
232 {
233 	int status;
234 
235 	/* No brand-specific handler */
236 	if (postsnapbuf[0] == '\0')
237 		return (Z_OK);
238 
239 	/* Run the hook */
240 	status = do_subproc(postsnapbuf);
241 	if ((status = subproc_status(gettext("brand-specific postsnapshot"),
242 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
243 		return (Z_ERR);
244 
245 	return (Z_OK);
246 }
247 
248 /*
249  * This is a ZFS snapshot iterator call-back function which returns the
250  * highest number of SUNWzone snapshots that have been taken.
251  */
252 static int
253 get_snap_max(zfs_handle_t *zhp, void *data)
254 {
255 	int			res;
256 	zfs_snapshot_data_t	*cbp;
257 
258 	if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) {
259 		zfs_close(zhp);
260 		return (0);
261 	}
262 
263 	cbp = (zfs_snapshot_data_t *)data;
264 
265 	if (strncmp(zfs_get_name(zhp), cbp->match_name, cbp->len) == 0) {
266 		char	*nump;
267 		int	num;
268 
269 		cbp->num++;
270 		nump = (char *)(zfs_get_name(zhp) + cbp->len);
271 		num = atoi(nump);
272 		if (num > cbp->max)
273 			cbp->max = num;
274 	}
275 
276 	res = zfs_iter_snapshots(zhp, get_snap_max, data);
277 	zfs_close(zhp);
278 	return (res);
279 }
280 
281 /*
282  * Take a ZFS snapshot to be used for cloning the zone.
283  */
284 static int
285 take_snapshot(zfs_handle_t *zhp, char *snapshot_name, int snap_size,
286     char *presnapbuf, char *postsnapbuf)
287 {
288 	int			res;
289 	char			template[ZFS_MAXNAMELEN];
290 	zfs_snapshot_data_t	cb;
291 
292 	/*
293 	 * First we need to figure out the next available name for the
294 	 * zone snapshot.  Look through the list of zones snapshots for
295 	 * this file system to determine the maximum snapshot name.
296 	 */
297 	if (snprintf(template, sizeof (template), "%s@SUNWzone",
298 	    zfs_get_name(zhp)) >=  sizeof (template))
299 		return (Z_ERR);
300 
301 	cb.match_name = template;
302 	cb.len = strlen(template);
303 	cb.max = 0;
304 
305 	if (zfs_iter_snapshots(zhp, get_snap_max, &cb) != 0)
306 		return (Z_ERR);
307 
308 	cb.max++;
309 
310 	if (snprintf(snapshot_name, snap_size, "%s@SUNWzone%d",
311 	    zfs_get_name(zhp), cb.max) >= snap_size)
312 		return (Z_ERR);
313 
314 	if (pre_snapshot(presnapbuf) != Z_OK)
315 		return (Z_ERR);
316 	res = zfs_snapshot(g_zfs, snapshot_name, B_FALSE, NULL);
317 	if (post_snapshot(postsnapbuf) != Z_OK)
318 		return (Z_ERR);
319 
320 	if (res != 0)
321 		return (Z_ERR);
322 	return (Z_OK);
323 }
324 
325 /*
326  * We are using an explicit snapshot from some earlier point in time so
327  * we need to validate it.  Run the brand specific hook.
328  */
329 static int
330 validate_snapshot(char *snapshot_name, char *snap_path, char *validsnapbuf)
331 {
332 	int status;
333 	char cmdbuf[MAXPATHLEN];
334 
335 	/* No brand-specific handler */
336 	if (validsnapbuf[0] == '\0')
337 		return (Z_OK);
338 
339 	/* pass args - snapshot_name & snap_path */
340 	if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %s %s", validsnapbuf,
341 	    snapshot_name, snap_path) >= sizeof (cmdbuf)) {
342 		zerror("Command line too long");
343 		return (Z_ERR);
344 	}
345 
346 	/* Run the hook */
347 	status = do_subproc(cmdbuf);
348 	if ((status = subproc_status(gettext("brand-specific validatesnapshot"),
349 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
350 		return (Z_ERR);
351 
352 	return (Z_OK);
353 }
354 
355 /*
356  * Remove the sw inventory file from inside this zonepath that we picked up out
357  * of the snapshot.
358  */
359 static int
360 clean_out_clone()
361 {
362 	int err;
363 	zone_dochandle_t handle;
364 
365 	if ((handle = zonecfg_init_handle()) == NULL) {
366 		zperror(cmd_to_str(CMD_CLONE), B_TRUE);
367 		return (Z_ERR);
368 	}
369 
370 	if ((err = zonecfg_get_handle(target_zone, handle)) != Z_OK) {
371 		errno = err;
372 		zperror(cmd_to_str(CMD_CLONE), B_TRUE);
373 		zonecfg_fini_handle(handle);
374 		return (Z_ERR);
375 	}
376 
377 	zonecfg_rm_detached(handle, B_FALSE);
378 	zonecfg_fini_handle(handle);
379 
380 	return (Z_OK);
381 }
382 
383 /*
384  * Make a ZFS clone on zonepath from snapshot_name.
385  */
386 static int
387 clone_snap(char *snapshot_name, char *zonepath)
388 {
389 	int		res = Z_OK;
390 	int		err;
391 	zfs_handle_t	*zhp;
392 	zfs_handle_t	*clone;
393 	nvlist_t	*props = NULL;
394 
395 	if ((zhp = zfs_open(g_zfs, snapshot_name, ZFS_TYPE_SNAPSHOT)) == NULL)
396 		return (Z_NO_ENTRY);
397 
398 	(void) printf(gettext("Cloning snapshot %s\n"), snapshot_name);
399 
400 	/*
401 	 * We turn off zfs SHARENFS and SHARESMB properties on the
402 	 * zoneroot dataset in order to prevent the GZ from sharing
403 	 * NGZ data by accident.
404 	 */
405 	if ((nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) ||
406 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARENFS),
407 	    "off") != 0) ||
408 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARESMB),
409 	    "off") != 0)) {
410 		if (props != NULL)
411 			nvlist_free(props);
412 		(void) fprintf(stderr, gettext("could not create ZFS clone "
413 		    "%s: out of memory\n"), zonepath);
414 		return (Z_ERR);
415 	}
416 
417 	err = zfs_clone(zhp, zonepath, props);
418 	zfs_close(zhp);
419 
420 	nvlist_free(props);
421 
422 	if (err != 0)
423 		return (Z_ERR);
424 
425 	/* create the mountpoint if necessary */
426 	if ((clone = zfs_open(g_zfs, zonepath, ZFS_TYPE_DATASET)) == NULL)
427 		return (Z_ERR);
428 
429 	/*
430 	 * The clone has been created so we need to print a diagnostic
431 	 * message if one of the following steps fails for some reason.
432 	 */
433 	if (zfs_mount(clone, NULL, 0) != 0) {
434 		(void) fprintf(stderr, gettext("could not mount ZFS clone "
435 		    "%s\n"), zfs_get_name(clone));
436 		res = Z_ERR;
437 
438 	} else if (clean_out_clone() != Z_OK) {
439 		(void) fprintf(stderr, gettext("could not remove the "
440 		    "software inventory from ZFS clone %s\n"),
441 		    zfs_get_name(clone));
442 		res = Z_ERR;
443 	}
444 
445 	zfs_close(clone);
446 	return (res);
447 }
448 
449 /*
450  * This function takes a zonepath and attempts to determine what the ZFS
451  * file system name (not mountpoint) should be for that path.  We do not
452  * assume that zonepath is an existing directory or ZFS fs since we use
453  * this function as part of the process of creating a new ZFS fs or clone.
454  *
455  * The way this works is that we look at the parent directory of the zonepath
456  * to see if it is a ZFS fs.  If it is, we get the name of that ZFS fs and
457  * append the last component of the zonepath to generate the ZFS name for the
458  * zonepath.  This matches the algorithm that ZFS uses for automatically
459  * mounting a new fs after it is created.
460  *
461  * Although a ZFS fs can be mounted anywhere, we don't worry about handling
462  * all of the complexity that a user could possibly configure with arbitrary
463  * mounts since there is no way to generate a ZFS name from a random path in
464  * the file system.  We only try to handle the automatic mounts that ZFS does
465  * for each file system.  ZFS restricts this so that a new fs must be created
466  * in an existing parent ZFS fs.  It then automatically mounts the new fs
467  * directly under the mountpoint for the parent fs using the last component
468  * of the name as the mountpoint directory.
469  *
470  * For example:
471  *    Name			Mountpoint
472  *    space/eng/dev/test/zone1	/project1/eng/dev/test/zone1
473  *
474  * Return Z_OK if the path mapped to a ZFS file system name, otherwise return
475  * Z_ERR.
476  */
477 static int
478 path2name(char *zonepath, char *zfs_name, int len)
479 {
480 	int		res;
481 	char		*bnm, *dnm, *dname, *bname;
482 	zfs_handle_t	*zhp;
483 	struct stat	stbuf;
484 
485 	/*
486 	 * We need two tmp strings to handle paths directly in / (e.g. /foo)
487 	 * since dirname will overwrite the first char after "/" in this case.
488 	 */
489 	if ((bnm = strdup(zonepath)) == NULL)
490 		return (Z_ERR);
491 
492 	if ((dnm = strdup(zonepath)) == NULL) {
493 		free(bnm);
494 		return (Z_ERR);
495 	}
496 
497 	bname = basename(bnm);
498 	dname = dirname(dnm);
499 
500 	/*
501 	 * This is a quick test to save iterating over all of the zfs datasets
502 	 * on the system (which can be a lot).  If the parent dir is not in a
503 	 * ZFS fs, then we're done.
504 	 */
505 	if (stat(dname, &stbuf) != 0 || !S_ISDIR(stbuf.st_mode) ||
506 	    strcmp(stbuf.st_fstype, MNTTYPE_ZFS) != 0) {
507 		free(bnm);
508 		free(dnm);
509 		return (Z_ERR);
510 	}
511 
512 	/* See if the parent directory is its own ZFS dataset. */
513 	if ((zhp = mount2zhandle(dname)) == NULL) {
514 		/*
515 		 * The parent is not a ZFS dataset so we can't automatically
516 		 * create a dataset on the given path.
517 		 */
518 		free(bnm);
519 		free(dnm);
520 		return (Z_ERR);
521 	}
522 
523 	res = snprintf(zfs_name, len, "%s/%s", zfs_get_name(zhp), bname);
524 
525 	free(bnm);
526 	free(dnm);
527 	zfs_close(zhp);
528 	if (res >= len)
529 		return (Z_ERR);
530 
531 	return (Z_OK);
532 }
533 
534 /*
535  * A ZFS file system iterator call-back function used to determine if the
536  * file system has dependents (snapshots & clones).
537  */
538 /* ARGSUSED */
539 static int
540 has_dependent(zfs_handle_t *zhp, void *data)
541 {
542 	zfs_close(zhp);
543 	return (1);
544 }
545 
546 /*
547  * Given a snapshot name, get the file system path where the snapshot lives.
548  * A snapshot name is of the form fs_name@snap_name.  For example, snapshot
549  * pl/zones/z1@SUNWzone1 would have a path of
550  * /pl/zones/z1/.zfs/snapshot/SUNWzone1.
551  */
552 static int
553 snap2path(char *snap_name, char *path, int len)
554 {
555 	char		*p;
556 	zfs_handle_t	*zhp;
557 	char		mp[ZFS_MAXPROPLEN];
558 
559 	if ((p = strrchr(snap_name, '@')) == NULL)
560 		return (Z_ERR);
561 
562 	/* Get the file system name from the snap_name. */
563 	*p = '\0';
564 	zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_DATASET);
565 	*p = '@';
566 	if (zhp == NULL)
567 		return (Z_ERR);
568 
569 	/* Get the file system mount point. */
570 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
571 	    0, B_FALSE) != 0) {
572 		zfs_close(zhp);
573 		return (Z_ERR);
574 	}
575 	zfs_close(zhp);
576 
577 	p++;
578 	if (snprintf(path, len, "%s/.zfs/snapshot/%s", mp, p) >= len)
579 		return (Z_ERR);
580 
581 	return (Z_OK);
582 }
583 
584 /*
585  * This callback function is used to iterate through a snapshot's dependencies
586  * to find a filesystem that is a direct clone of the snapshot being iterated.
587  */
588 static int
589 get_direct_clone(zfs_handle_t *zhp, void *data)
590 {
591 	clone_data_t	*cd = data;
592 	char		origin[ZFS_MAXNAMELEN];
593 	char		ds_path[ZFS_MAXNAMELEN];
594 
595 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
596 		zfs_close(zhp);
597 		return (0);
598 	}
599 
600 	(void) strlcpy(ds_path, zfs_get_name(zhp), sizeof (ds_path));
601 
602 	/* Make sure this is a direct clone of the snapshot we're iterating. */
603 	if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
604 	    NULL, 0, B_FALSE) != 0 || strcmp(origin, cd->snapshot) != 0) {
605 		zfs_close(zhp);
606 		return (0);
607 	}
608 
609 	if (cd->clone_zhp != NULL)
610 		zfs_close(cd->clone_zhp);
611 
612 	cd->clone_zhp = zhp;
613 	return (1);
614 }
615 
616 /*
617  * A ZFS file system iterator call-back function used to determine the clone
618  * to promote.  This function finds the youngest (i.e. last one taken) snapshot
619  * that has a clone.  If found, it returns a reference to that clone in the
620  * callback data.
621  */
622 static int
623 find_clone(zfs_handle_t *zhp, void *data)
624 {
625 	clone_data_t	*cd = data;
626 	time_t		snap_creation;
627 	int		zret = 0;
628 
629 	/* If snapshot has no clones, skip it */
630 	if (zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES) == 0) {
631 		zfs_close(zhp);
632 		return (0);
633 	}
634 
635 	cd->snapshot = zfs_get_name(zhp);
636 
637 	/* Get the creation time of this snapshot */
638 	snap_creation = (time_t)zfs_prop_get_int(zhp, ZFS_PROP_CREATION);
639 
640 	/*
641 	 * If this snapshot's creation time is greater than (i.e. younger than)
642 	 * the current youngest snapshot found, iterate this snapshot to
643 	 * get the right clone.
644 	 */
645 	if (snap_creation >= cd->origin_creation) {
646 		/*
647 		 * Iterate the dependents of this snapshot to find a clone
648 		 * that's a direct dependent.
649 		 */
650 		if ((zret = zfs_iter_dependents(zhp, B_FALSE, get_direct_clone,
651 		    cd)) == -1) {
652 			zfs_close(zhp);
653 			return (1);
654 		} else if (zret == 1) {
655 			/*
656 			 * Found a clone, update the origin_creation time
657 			 * in the callback data.
658 			 */
659 			cd->origin_creation = snap_creation;
660 		}
661 	}
662 
663 	zfs_close(zhp);
664 	return (0);
665 }
666 
667 /*
668  * A ZFS file system iterator call-back function used to remove standalone
669  * snapshots.
670  */
671 /* ARGSUSED */
672 static int
673 rm_snap(zfs_handle_t *zhp, void *data)
674 {
675 	/* If snapshot has clones, something is wrong */
676 	if (zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES) != 0) {
677 		zfs_close(zhp);
678 		return (1);
679 	}
680 
681 	if (zfs_unmount(zhp, NULL, 0) == 0) {
682 		(void) zfs_destroy(zhp, B_FALSE);
683 	}
684 
685 	zfs_close(zhp);
686 	return (0);
687 }
688 
689 /*
690  * A ZFS snapshot iterator call-back function which renames snapshots.
691  */
692 static int
693 rename_snap(zfs_handle_t *zhp, void *data)
694 {
695 	int			res;
696 	zfs_snapshot_data_t	*cbp;
697 	char			template[ZFS_MAXNAMELEN];
698 
699 	cbp = (zfs_snapshot_data_t *)data;
700 
701 	/*
702 	 * When renaming snapshots with the iterator, the iterator can see
703 	 * the same snapshot after we've renamed up in the namespace.  To
704 	 * prevent this we check the count for the number of snapshots we have
705 	 * to rename and stop at that point.
706 	 */
707 	if (cbp->cntr >= cbp->num) {
708 		zfs_close(zhp);
709 		return (0);
710 	}
711 
712 	if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) {
713 		zfs_close(zhp);
714 		return (0);
715 	}
716 
717 	/* Only rename the snapshots we automatically generate when we clone. */
718 	if (strncmp(zfs_get_name(zhp), cbp->match_name, cbp->len) != 0) {
719 		zfs_close(zhp);
720 		return (0);
721 	}
722 
723 	(void) snprintf(template, sizeof (template), "%s%d", cbp->match_name,
724 	    cbp->max++);
725 
726 	res = (zfs_rename(zhp, template, B_FALSE) != 0);
727 	if (res != 0)
728 		(void) fprintf(stderr, gettext("failed to rename snapshot %s "
729 		    "to %s: %s\n"), zfs_get_name(zhp), template,
730 		    libzfs_error_description(g_zfs));
731 
732 	cbp->cntr++;
733 
734 	zfs_close(zhp);
735 	return (res);
736 }
737 
738 /*
739  * Rename the source dataset's snapshots that are automatically generated when
740  * we clone a zone so that there won't be a name collision when we promote the
741  * cloned dataset.  Once the snapshots have been renamed, then promote the
742  * clone.
743  *
744  * The snapshot rename process gets the highest number on the snapshot names
745  * (the format is zonename@SUNWzoneXX where XX are digits) on both the source
746  * and clone datasets, then renames the source dataset snapshots starting at
747  * the next number.
748  */
749 static int
750 promote_clone(zfs_handle_t *src_zhp, zfs_handle_t *cln_zhp)
751 {
752 	zfs_snapshot_data_t	sd;
753 	char			nm[ZFS_MAXNAMELEN];
754 	char			template[ZFS_MAXNAMELEN];
755 
756 	(void) strlcpy(nm, zfs_get_name(cln_zhp), sizeof (nm));
757 	/*
758 	 * Start by getting the clone's snapshot max which we use
759 	 * during the rename of the original dataset's snapshots.
760 	 */
761 	(void) snprintf(template, sizeof (template), "%s@SUNWzone", nm);
762 	sd.match_name = template;
763 	sd.len = strlen(template);
764 	sd.max = 0;
765 
766 	if (zfs_iter_snapshots(cln_zhp, get_snap_max, &sd) != 0)
767 		return (Z_ERR);
768 
769 	/*
770 	 * Now make sure the source's snapshot max is at least as high as
771 	 * the clone's snapshot max.
772 	 */
773 	(void) snprintf(template, sizeof (template), "%s@SUNWzone",
774 	    zfs_get_name(src_zhp));
775 	sd.match_name = template;
776 	sd.len = strlen(template);
777 	sd.num = 0;
778 
779 	if (zfs_iter_snapshots(src_zhp, get_snap_max, &sd) != 0)
780 		return (Z_ERR);
781 
782 	/*
783 	 * Now rename the source dataset's snapshots so there's no
784 	 * conflict when we promote the clone.
785 	 */
786 	sd.max++;
787 	sd.cntr = 0;
788 	if (zfs_iter_snapshots(src_zhp, rename_snap, &sd) != 0)
789 		return (Z_ERR);
790 
791 	/* close and reopen the clone dataset to get the latest info */
792 	zfs_close(cln_zhp);
793 	if ((cln_zhp = zfs_open(g_zfs, nm, ZFS_TYPE_FILESYSTEM)) == NULL)
794 		return (Z_ERR);
795 
796 	if (zfs_promote(cln_zhp) != 0) {
797 		(void) fprintf(stderr, gettext("failed to promote %s: %s\n"),
798 		    nm, libzfs_error_description(g_zfs));
799 		return (Z_ERR);
800 	}
801 
802 	zfs_close(cln_zhp);
803 	return (Z_OK);
804 }
805 
806 /*
807  * Promote the youngest clone.  That clone will then become the origin of all
808  * of the other clones that were hanging off of the source dataset.
809  */
810 int
811 promote_all_clones(zfs_handle_t *zhp)
812 {
813 	clone_data_t	cd;
814 	char		nm[ZFS_MAXNAMELEN];
815 
816 	cd.clone_zhp = NULL;
817 	cd.origin_creation = 0;
818 	cd.snapshot = NULL;
819 
820 	if (zfs_iter_snapshots(zhp, find_clone, &cd) != 0) {
821 		zfs_close(zhp);
822 		return (Z_ERR);
823 	}
824 
825 	/* Nothing to promote. */
826 	if (cd.clone_zhp == NULL)
827 		return (Z_OK);
828 
829 	/* Found the youngest clone to promote.  Promote it. */
830 	if (promote_clone(zhp, cd.clone_zhp) != 0) {
831 		zfs_close(cd.clone_zhp);
832 		zfs_close(zhp);
833 		return (Z_ERR);
834 	}
835 
836 	/* close and reopen the main dataset to get the latest info */
837 	(void) strlcpy(nm, zfs_get_name(zhp), sizeof (nm));
838 	zfs_close(zhp);
839 	if ((zhp = zfs_open(g_zfs, nm, ZFS_TYPE_FILESYSTEM)) == NULL)
840 		return (Z_ERR);
841 
842 	return (Z_OK);
843 }
844 
845 /*
846  * Clone a pre-existing ZFS snapshot, either by making a direct ZFS clone, if
847  * possible, or by copying the data from the snapshot to the zonepath.
848  */
849 int
850 clone_snapshot_zfs(char *snap_name, char *zonepath, char *validatesnap)
851 {
852 	int	err = Z_OK;
853 	char	clone_name[MAXPATHLEN];
854 	char	snap_path[MAXPATHLEN];
855 
856 	if (snap2path(snap_name, snap_path, sizeof (snap_path)) != Z_OK) {
857 		(void) fprintf(stderr, gettext("unable to find path for %s.\n"),
858 		    snap_name);
859 		return (Z_ERR);
860 	}
861 
862 	if (validate_snapshot(snap_name, snap_path, validatesnap) != Z_OK)
863 		return (Z_NO_ENTRY);
864 
865 	/*
866 	 * The zonepath cannot be ZFS cloned, try to copy the data from
867 	 * within the snapshot to the zonepath.
868 	 */
869 	if (path2name(zonepath, clone_name, sizeof (clone_name)) != Z_OK) {
870 		if ((err = clone_copy(snap_path, zonepath)) == Z_OK)
871 			if (clean_out_clone() != Z_OK)
872 				(void) fprintf(stderr,
873 				    gettext("could not remove the "
874 				    "software inventory from %s\n"), zonepath);
875 
876 		return (err);
877 	}
878 
879 	if ((err = clone_snap(snap_name, clone_name)) != Z_OK) {
880 		if (err != Z_NO_ENTRY) {
881 			/*
882 			 * Cloning the snapshot failed.  Fall back to trying
883 			 * to install the zone by copying from the snapshot.
884 			 */
885 			if ((err = clone_copy(snap_path, zonepath)) == Z_OK)
886 				if (clean_out_clone() != Z_OK)
887 					(void) fprintf(stderr,
888 					    gettext("could not remove the "
889 					    "software inventory from %s\n"),
890 					    zonepath);
891 		} else {
892 			/*
893 			 * The snapshot is unusable for some reason so restore
894 			 * the zone state to configured since we were unable to
895 			 * actually do anything about getting the zone
896 			 * installed.
897 			 */
898 			int tmp;
899 
900 			if ((tmp = zone_set_state(target_zone,
901 			    ZONE_STATE_CONFIGURED)) != Z_OK) {
902 				errno = tmp;
903 				zperror2(target_zone,
904 				    gettext("could not set state"));
905 			}
906 		}
907 	}
908 
909 	return (err);
910 }
911 
912 /*
913  * Attempt to clone a source_zone to a target zonepath by using a ZFS clone.
914  */
915 int
916 clone_zfs(char *source_zonepath, char *zonepath, char *presnapbuf,
917     char *postsnapbuf)
918 {
919 	zfs_handle_t	*zhp;
920 	char		clone_name[MAXPATHLEN];
921 	char		snap_name[MAXPATHLEN];
922 
923 	/*
924 	 * Try to get a zfs handle for the source_zonepath.  If this fails
925 	 * the source_zonepath is not ZFS so return an error.
926 	 */
927 	if ((zhp = mount2zhandle(source_zonepath)) == NULL)
928 		return (Z_ERR);
929 
930 	/*
931 	 * Check if there is a file system already mounted on zonepath.  If so,
932 	 * we can't clone to the path so we should fall back to copying.
933 	 */
934 	if (is_mountpnt(zonepath)) {
935 		zfs_close(zhp);
936 		(void) fprintf(stderr,
937 		    gettext("A file system is already mounted on %s,\n"
938 		    "preventing use of a ZFS clone.\n"), zonepath);
939 		return (Z_ERR);
940 	}
941 
942 	/*
943 	 * Instead of using path2name to get the clone name from the zonepath,
944 	 * we could generate a name from the source zone ZFS name.  However,
945 	 * this would mean we would create the clone under the ZFS fs of the
946 	 * source instead of what the zonepath says.  For example,
947 	 *
948 	 * source_zonepath		zonepath
949 	 * /pl/zones/dev/z1		/pl/zones/deploy/z2
950 	 *
951 	 * We don't want the clone to be under "dev", we want it under
952 	 * "deploy", so that we can leverage the normal attribute inheritance
953 	 * that ZFS provides in the fs hierarchy.
954 	 */
955 	if (path2name(zonepath, clone_name, sizeof (clone_name)) != Z_OK) {
956 		zfs_close(zhp);
957 		return (Z_ERR);
958 	}
959 
960 	if (take_snapshot(zhp, snap_name, sizeof (snap_name), presnapbuf,
961 	    postsnapbuf) != Z_OK) {
962 		zfs_close(zhp);
963 		return (Z_ERR);
964 	}
965 	zfs_close(zhp);
966 
967 	if (clone_snap(snap_name, clone_name) != Z_OK) {
968 		/* Clean up the snapshot we just took. */
969 		if ((zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_SNAPSHOT))
970 		    != NULL) {
971 			if (zfs_unmount(zhp, NULL, 0) == 0)
972 				(void) zfs_destroy(zhp, B_FALSE);
973 			zfs_close(zhp);
974 		}
975 
976 		return (Z_ERR);
977 	}
978 
979 	(void) printf(gettext("Instead of copying, a ZFS clone has been "
980 	    "created for this zone.\n"));
981 
982 	return (Z_OK);
983 }
984 
985 /*
986  * Attempt to create a ZFS file system for the specified zonepath.
987  * We either will successfully create a ZFS file system and get it mounted
988  * on the zonepath or we don't.  The caller doesn't care since a regular
989  * directory is used for the zonepath if no ZFS file system is mounted there.
990  */
991 void
992 create_zfs_zonepath(char *zonepath)
993 {
994 	zfs_handle_t	*zhp;
995 	char		zfs_name[MAXPATHLEN];
996 	nvlist_t	*props = NULL;
997 
998 	if (path2name(zonepath, zfs_name, sizeof (zfs_name)) != Z_OK)
999 		return;
1000 
1001 	/* Check if the dataset already exists. */
1002 	if ((zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_DATASET)) != NULL) {
1003 		zfs_close(zhp);
1004 		return;
1005 	}
1006 
1007 	/*
1008 	 * We turn off zfs SHARENFS and SHARESMB properties on the
1009 	 * zoneroot dataset in order to prevent the GZ from sharing
1010 	 * NGZ data by accident.
1011 	 */
1012 	if ((nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) ||
1013 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARENFS),
1014 	    "off") != 0) ||
1015 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARESMB),
1016 	    "off") != 0)) {
1017 		if (props != NULL)
1018 			nvlist_free(props);
1019 		(void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
1020 		    "out of memory\n"), zfs_name);
1021 	}
1022 
1023 	if (zfs_create(g_zfs, zfs_name, ZFS_TYPE_FILESYSTEM, props) != 0 ||
1024 	    (zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_DATASET)) == NULL) {
1025 		(void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
1026 		    "%s\n"), zfs_name, libzfs_error_description(g_zfs));
1027 		nvlist_free(props);
1028 		return;
1029 	}
1030 
1031 	nvlist_free(props);
1032 
1033 	if (zfs_mount(zhp, NULL, 0) != 0) {
1034 		(void) fprintf(stderr, gettext("cannot mount ZFS dataset %s: "
1035 		    "%s\n"), zfs_name, libzfs_error_description(g_zfs));
1036 		(void) zfs_destroy(zhp, B_FALSE);
1037 	} else {
1038 		if (chmod(zonepath, S_IRWXU) != 0) {
1039 			(void) fprintf(stderr, gettext("file system %s "
1040 			    "successfully created, but chmod %o failed: %s\n"),
1041 			    zfs_name, S_IRWXU, strerror(errno));
1042 			(void) destroy_zfs(zonepath);
1043 		} else {
1044 			(void) printf(gettext("A ZFS file system has been "
1045 			    "created for this zone.\n"));
1046 		}
1047 	}
1048 
1049 	zfs_close(zhp);
1050 }
1051 
1052 /*
1053  * If the zonepath is a ZFS file system, attempt to destroy it.  We return Z_OK
1054  * if we were able to zfs_destroy the zonepath, otherwise we return Z_ERR
1055  * which means the caller should clean up the zonepath in the traditional
1056  * way.
1057  */
1058 int
1059 destroy_zfs(char *zonepath)
1060 {
1061 	zfs_handle_t	*zhp;
1062 	boolean_t	is_clone = B_FALSE;
1063 	char		origin[ZFS_MAXPROPLEN];
1064 
1065 	if ((zhp = mount2zhandle(zonepath)) == NULL)
1066 		return (Z_ERR);
1067 
1068 	if (promote_all_clones(zhp) != 0)
1069 		return (Z_ERR);
1070 
1071 	/* Now cleanup any snapshots remaining. */
1072 	if (zfs_iter_snapshots(zhp, rm_snap, NULL) != 0) {
1073 		zfs_close(zhp);
1074 		return (Z_ERR);
1075 	}
1076 
1077 	/*
1078 	 * We can't destroy the file system if it has still has dependents.
1079 	 * There shouldn't be any at this point, but we'll double check.
1080 	 */
1081 	if (zfs_iter_dependents(zhp, B_TRUE, has_dependent, NULL) != 0) {
1082 		(void) fprintf(stderr, gettext("zfs destroy %s failed: the "
1083 		    "dataset still has dependents\n"), zfs_get_name(zhp));
1084 		zfs_close(zhp);
1085 		return (Z_ERR);
1086 	}
1087 
1088 	/*
1089 	 * This might be a clone.  Try to get the snapshot so we can attempt
1090 	 * to destroy that as well.
1091 	 */
1092 	if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
1093 	    NULL, 0, B_FALSE) == 0)
1094 		is_clone = B_TRUE;
1095 
1096 	if (zfs_unmount(zhp, NULL, 0) != 0) {
1097 		(void) fprintf(stderr, gettext("zfs unmount %s failed: %s\n"),
1098 		    zfs_get_name(zhp), libzfs_error_description(g_zfs));
1099 		zfs_close(zhp);
1100 		return (Z_ERR);
1101 	}
1102 
1103 	if (zfs_destroy(zhp, B_FALSE) != 0) {
1104 		/*
1105 		 * If the destroy fails for some reason, try to remount
1106 		 * the file system so that we can use "rm -rf" to clean up
1107 		 * instead.
1108 		 */
1109 		(void) fprintf(stderr, gettext("zfs destroy %s failed: %s\n"),
1110 		    zfs_get_name(zhp), libzfs_error_description(g_zfs));
1111 		(void) zfs_mount(zhp, NULL, 0);
1112 		zfs_close(zhp);
1113 		return (Z_ERR);
1114 	}
1115 
1116 	/*
1117 	 * If the zone has ever been moved then the mountpoint dir will not be
1118 	 * cleaned up by the zfs_destroy().  To handle this case try to clean
1119 	 * it up now but don't worry if it fails, that will be normal.
1120 	 */
1121 	(void) rmdir(zonepath);
1122 
1123 	(void) printf(gettext("The ZFS file system for this zone has been "
1124 	    "destroyed.\n"));
1125 
1126 	if (is_clone) {
1127 		zfs_handle_t	*ohp;
1128 
1129 		/*
1130 		 * Try to clean up the snapshot that the clone was taken from.
1131 		 */
1132 		if ((ohp = zfs_open(g_zfs, origin,
1133 		    ZFS_TYPE_SNAPSHOT)) != NULL) {
1134 			if (zfs_iter_dependents(ohp, B_TRUE, has_dependent,
1135 			    NULL) == 0 && zfs_unmount(ohp, NULL, 0) == 0)
1136 				(void) zfs_destroy(ohp, B_FALSE);
1137 			zfs_close(ohp);
1138 		}
1139 	}
1140 
1141 	zfs_close(zhp);
1142 	return (Z_OK);
1143 }
1144 
1145 /*
1146  * Return true if the path is its own zfs file system.  We determine this
1147  * by stat-ing the path to see if it is zfs and stat-ing the parent to see
1148  * if it is a different fs.
1149  */
1150 boolean_t
1151 is_zonepath_zfs(char *zonepath)
1152 {
1153 	int res;
1154 	char *path;
1155 	char *parent;
1156 	struct statvfs64 buf1, buf2;
1157 
1158 	if (statvfs64(zonepath, &buf1) != 0)
1159 		return (B_FALSE);
1160 
1161 	if (strcmp(buf1.f_basetype, "zfs") != 0)
1162 		return (B_FALSE);
1163 
1164 	if ((path = strdup(zonepath)) == NULL)
1165 		return (B_FALSE);
1166 
1167 	parent = dirname(path);
1168 	res = statvfs64(parent, &buf2);
1169 	free(path);
1170 
1171 	if (res != 0)
1172 		return (B_FALSE);
1173 
1174 	if (buf1.f_fsid == buf2.f_fsid)
1175 		return (B_FALSE);
1176 
1177 	return (B_TRUE);
1178 }
1179 
1180 /*
1181  * Implement the fast move of a ZFS file system by simply updating the
1182  * mountpoint.  Since it is file system already, we don't have the
1183  * issue of cross-file system copying.
1184  */
1185 int
1186 move_zfs(char *zonepath, char *new_zonepath)
1187 {
1188 	int		ret = Z_ERR;
1189 	zfs_handle_t	*zhp;
1190 
1191 	if ((zhp = mount2zhandle(zonepath)) == NULL)
1192 		return (Z_ERR);
1193 
1194 	if (zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1195 	    new_zonepath) == 0) {
1196 		/*
1197 		 * Clean up the old mount point.  We ignore any failure since
1198 		 * the zone is already successfully mounted on the new path.
1199 		 */
1200 		(void) rmdir(zonepath);
1201 		ret = Z_OK;
1202 	}
1203 
1204 	zfs_close(zhp);
1205 
1206 	return (ret);
1207 }
1208 
1209 /*
1210  * Validate that the given dataset exists on the system, and that neither it nor
1211  * its children are zvols.
1212  *
1213  * Note that we don't do anything with the 'zoned' property here.  All
1214  * management is done in zoneadmd when the zone is actually rebooted.  This
1215  * allows us to automatically set the zoned property even when a zone is
1216  * rebooted by the administrator.
1217  */
1218 int
1219 verify_datasets(zone_dochandle_t handle)
1220 {
1221 	int return_code = Z_OK;
1222 	struct zone_dstab dstab;
1223 	zfs_handle_t *zhp;
1224 	char propbuf[ZFS_MAXPROPLEN];
1225 	char source[ZFS_MAXNAMELEN];
1226 	zprop_source_t srctype;
1227 
1228 	if (zonecfg_setdsent(handle) != Z_OK) {
1229 		/*
1230 		 * TRANSLATION_NOTE
1231 		 * zfs and dataset are literals that should not be translated.
1232 		 */
1233 		(void) fprintf(stderr, gettext("could not verify zfs datasets: "
1234 		    "unable to enumerate datasets\n"));
1235 		return (Z_ERR);
1236 	}
1237 
1238 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
1239 
1240 		if ((zhp = zfs_open(g_zfs, dstab.zone_dataset_name,
1241 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
1242 			(void) fprintf(stderr, gettext("could not verify zfs "
1243 			    "dataset %s: %s\n"), dstab.zone_dataset_name,
1244 			    libzfs_error_description(g_zfs));
1245 			return_code = Z_ERR;
1246 			continue;
1247 		}
1248 
1249 		if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf,
1250 		    sizeof (propbuf), &srctype, source,
1251 		    sizeof (source), 0) == 0 &&
1252 		    (srctype == ZPROP_SRC_INHERITED)) {
1253 			(void) fprintf(stderr, gettext("could not verify zfs "
1254 			    "dataset %s: mountpoint cannot be inherited\n"),
1255 			    dstab.zone_dataset_name);
1256 			return_code = Z_ERR;
1257 			zfs_close(zhp);
1258 			continue;
1259 		}
1260 
1261 		if (zfs_get_type(zhp) == ZFS_TYPE_VOLUME) {
1262 			(void) fprintf(stderr, gettext("cannot verify zfs "
1263 			    "dataset %s: volumes cannot be specified as a "
1264 			    "zone dataset resource\n"),
1265 			    dstab.zone_dataset_name);
1266 			return_code = Z_ERR;
1267 		}
1268 
1269 		if (zfs_iter_children(zhp, check_zvol, NULL) != 0)
1270 			return_code = Z_ERR;
1271 
1272 		zfs_close(zhp);
1273 	}
1274 	(void) zonecfg_enddsent(handle);
1275 
1276 	return (return_code);
1277 }
1278 
1279 /*
1280  * Verify that the ZFS dataset exists, and its mountpoint
1281  * property is set to "legacy".
1282  */
1283 int
1284 verify_fs_zfs(struct zone_fstab *fstab)
1285 {
1286 	zfs_handle_t *zhp;
1287 	char propbuf[ZFS_MAXPROPLEN];
1288 
1289 	if ((zhp = zfs_open(g_zfs, fstab->zone_fs_special,
1290 	    ZFS_TYPE_DATASET)) == NULL) {
1291 		(void) fprintf(stderr, gettext("could not verify fs %s: "
1292 		    "could not access zfs dataset '%s'\n"),
1293 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1294 		return (Z_ERR);
1295 	}
1296 
1297 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
1298 		(void) fprintf(stderr, gettext("cannot verify fs %s: "
1299 		    "'%s' is not a file system\n"),
1300 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1301 		zfs_close(zhp);
1302 		return (Z_ERR);
1303 	}
1304 
1305 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf, sizeof (propbuf),
1306 	    NULL, NULL, 0, 0) != 0 || strcmp(propbuf, "legacy") != 0) {
1307 		(void) fprintf(stderr, gettext("could not verify fs %s: "
1308 		    "zfs '%s' mountpoint is not \"legacy\"\n"),
1309 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1310 		zfs_close(zhp);
1311 		return (Z_ERR);
1312 	}
1313 
1314 	zfs_close(zhp);
1315 	return (Z_OK);
1316 }
1317 
1318 /*
1319  * Destroy the specified mnttab structure that was created by mnttab_dup().
1320  * NOTE: The structure's mnt_time field isn't freed.
1321  */
1322 static void
1323 mnttab_destroy(struct mnttab *tabp)
1324 {
1325 	assert(tabp != NULL);
1326 
1327 	free(tabp->mnt_mountp);
1328 	free(tabp->mnt_special);
1329 	free(tabp->mnt_fstype);
1330 	free(tabp->mnt_mntopts);
1331 	free(tabp);
1332 }
1333 
1334 /*
1335  * Duplicate the specified mnttab structure.  The mnt_mountp and mnt_time
1336  * fields aren't duplicated.  This function returns a pointer to the new mnttab
1337  * structure or NULL if an error occurred.  If an error occurs, then this
1338  * function sets errno to reflect the error.  mnttab structures created by
1339  * this function should be destroyed via mnttab_destroy().
1340  */
1341 static struct mnttab *
1342 mnttab_dup(const struct mnttab *srcp)
1343 {
1344 	struct mnttab *retval;
1345 
1346 	assert(srcp != NULL);
1347 
1348 	retval = (struct mnttab *)calloc(1, sizeof (*retval));
1349 	if (retval == NULL) {
1350 		errno = ENOMEM;
1351 		return (NULL);
1352 	}
1353 	if (srcp->mnt_special != NULL) {
1354 		retval->mnt_special = strdup(srcp->mnt_special);
1355 		if (retval->mnt_special == NULL)
1356 			goto err;
1357 	}
1358 	if (srcp->mnt_fstype != NULL) {
1359 		retval->mnt_fstype = strdup(srcp->mnt_fstype);
1360 		if (retval->mnt_fstype == NULL)
1361 			goto err;
1362 	}
1363 	retval->mnt_mntopts = (char *)malloc(MAX_MNTOPT_STR * sizeof (char));
1364 	if (retval->mnt_mntopts == NULL)
1365 		goto err;
1366 	if (srcp->mnt_mntopts != NULL) {
1367 		if (strlcpy(retval->mnt_mntopts, srcp->mnt_mntopts,
1368 		    MAX_MNTOPT_STR * sizeof (char)) >= MAX_MNTOPT_STR *
1369 		    sizeof (char)) {
1370 			mnttab_destroy(retval);
1371 			errno = EOVERFLOW; /* similar to mount(2) behavior */
1372 			return (NULL);
1373 		}
1374 	} else {
1375 		retval->mnt_mntopts[0] = '\0';
1376 	}
1377 	return (retval);
1378 
1379 err:
1380 	mnttab_destroy(retval);
1381 	errno = ENOMEM;
1382 	return (NULL);
1383 }
1384 
1385 /*
1386  * Determine whether the specified ZFS dataset's mountpoint property is set
1387  * to "legacy".  If the specified dataset does not have a legacy mountpoint,
1388  * then the string pointer to which the mountpoint argument points is assigned
1389  * a dynamically-allocated string containing the dataset's mountpoint
1390  * property.  If the dataset's mountpoint property is "legacy" or a libzfs
1391  * error occurs, then the string pointer to which the mountpoint argument
1392  * points isn't modified.
1393  *
1394  * This function returns B_TRUE if it doesn't encounter any fatal errors.
1395  * It returns B_FALSE if it encounters a fatal error and sets errno to the
1396  * appropriate error code.
1397  */
1398 static boolean_t
1399 get_zfs_non_legacy_mountpoint(const char *dataset_name, char **mountpoint)
1400 {
1401 	zfs_handle_t *zhp;
1402 	char propbuf[ZFS_MAXPROPLEN];
1403 
1404 	assert(dataset_name != NULL);
1405 	assert(mountpoint != NULL);
1406 
1407 	if ((zhp = zfs_open(g_zfs, dataset_name, ZFS_TYPE_DATASET)) == NULL) {
1408 		errno = EINVAL;
1409 		return (B_FALSE);
1410 	}
1411 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf, sizeof (propbuf),
1412 	    NULL, NULL, 0, 0) != 0) {
1413 		zfs_close(zhp);
1414 		errno = EINVAL;
1415 		return (B_FALSE);
1416 	}
1417 	zfs_close(zhp);
1418 	if (strcmp(propbuf, "legacy") != 0) {
1419 		if ((*mountpoint = strdup(propbuf)) == NULL) {
1420 			errno = ENOMEM;
1421 			return (B_FALSE);
1422 		}
1423 	}
1424 	return (B_TRUE);
1425 }
1426 
1427 
1428 /*
1429  * This zonecfg_find_mounts() callback records information about mounts of
1430  * interest in a zonepath.  It also tallies the number of zone
1431  * root overlay mounts and the number of unexpected mounts found.
1432  * This function outputs errors using zerror() if it finds unexpected
1433  * mounts.  cookiep should point to an initialized zone_mounts_t structure.
1434  *
1435  * This function returns zero on success and a nonzero value on failure.
1436  */
1437 static int
1438 zone_mounts_cb(const struct mnttab *mountp, void *cookiep)
1439 {
1440 	zone_mounts_t *mounts;
1441 	const char *zone_mount_dir;
1442 
1443 	assert(mountp != NULL);
1444 	assert(cookiep != NULL);
1445 
1446 	mounts = (zone_mounts_t *)cookiep;
1447 	zone_mount_dir = mountp->mnt_mountp + mounts->zonepath_len;
1448 	if (strcmp(zone_mount_dir, "/root") == 0) {
1449 		/*
1450 		 * Check for an overlay mount.  If we already detected a /root
1451 		 * mount, then the current mount must be an overlay mount.
1452 		 */
1453 		if (mounts->root_mnttab != NULL) {
1454 			mounts->num_root_overlay_mounts++;
1455 			return (0);
1456 		}
1457 
1458 		/*
1459 		 * Store the root mount's mnttab information in the
1460 		 * zone_mounts_t structure for future use.
1461 		 */
1462 		if ((mounts->root_mnttab = mnttab_dup(mountp)) == NULL) {
1463 			zperror(cmd_to_str(CMD_MOVE), B_FALSE);
1464 			return (-1);
1465 		}
1466 
1467 		/*
1468 		 * Determine if the filesystem is a ZFS filesystem with a
1469 		 * non-legacy mountpoint.  If it is, then set the root
1470 		 * filesystem's mnttab's mnt_mountp field to a non-NULL
1471 		 * value, which will serve as a flag to indicate this special
1472 		 * condition.
1473 		 */
1474 		if (strcmp(mountp->mnt_fstype, MNTTYPE_ZFS) == 0 &&
1475 		    get_zfs_non_legacy_mountpoint(mountp->mnt_special,
1476 		    &mounts->root_mnttab->mnt_mountp) != B_TRUE) {
1477 			zperror(cmd_to_str(CMD_MOVE), B_FALSE);
1478 			return (-1);
1479 		}
1480 	} else {
1481 		/*
1482 		 * An unexpected mount was found.  Notify the user.
1483 		 */
1484 		if (mounts->num_unexpected_mounts == 0)
1485 			zerror(gettext("These file systems are mounted on "
1486 			    "subdirectories of %s.\n"), mounts->zonepath);
1487 		mounts->num_unexpected_mounts++;
1488 		(void) zfm_print(mountp, NULL);
1489 	}
1490 	return (0);
1491 }
1492 
1493 /*
1494  * Initialize the specified zone_mounts_t structure for the given zonepath.
1495  * If this function succeeds, it returns zero and the specified zone_mounts_t
1496  * structure contains information about mounts in the specified zonepath.
1497  * The function returns a nonzero value if it fails.  The zone_mounts_t
1498  * structure doesn't need be destroyed via zone_mounts_destroy() if this
1499  * function fails.
1500  */
1501 int
1502 zone_mounts_init(zone_mounts_t *mounts, const char *zonepath)
1503 {
1504 	assert(mounts != NULL);
1505 	assert(zonepath != NULL);
1506 
1507 	bzero(mounts, sizeof (*mounts));
1508 	if ((mounts->zonepath = strdup(zonepath)) == NULL) {
1509 		zerror(gettext("the process ran out of memory while checking "
1510 		    "for mounts in zonepath %s."), zonepath);
1511 		return (-1);
1512 	}
1513 	mounts->zonepath_len = strlen(zonepath);
1514 	if (zonecfg_find_mounts((char *)zonepath, zone_mounts_cb, mounts) ==
1515 	    -1) {
1516 		zerror(gettext("an error occurred while checking for mounts "
1517 		    "in zonepath %s."), zonepath);
1518 		zone_mounts_destroy(mounts);
1519 		return (-1);
1520 	}
1521 	return (0);
1522 }
1523 
1524 /*
1525  * Destroy the memory used by the specified zone_mounts_t structure's fields.
1526  * This function doesn't free the memory occupied by the structure itself
1527  * (i.e., it doesn't free the parameter).
1528  */
1529 void
1530 zone_mounts_destroy(zone_mounts_t *mounts)
1531 {
1532 	assert(mounts != NULL);
1533 
1534 	free(mounts->zonepath);
1535 	if (mounts->root_mnttab != NULL)
1536 		mnttab_destroy(mounts->root_mnttab);
1537 }
1538 
1539 /*
1540  * Mount a moving zone's root filesystem (if it had a root filesystem mount
1541  * prior to the move) using the specified zonepath.  mounts should refer to
1542  * the zone_mounts_t structure describing the zone's mount information.
1543  *
1544  * This function returns zero if the mount succeeds and a nonzero value
1545  * if it doesn't.
1546  */
1547 int
1548 zone_mount_rootfs(zone_mounts_t *mounts, const char *zonepath)
1549 {
1550 	char zoneroot[MAXPATHLEN];
1551 	struct mnttab *mtab;
1552 	int flags;
1553 
1554 	assert(mounts != NULL);
1555 	assert(zonepath != NULL);
1556 
1557 	/*
1558 	 * If there isn't a root filesystem, then don't do anything.
1559 	 */
1560 	mtab = mounts->root_mnttab;
1561 	if (mtab == NULL)
1562 		return (0);
1563 
1564 	/*
1565 	 * Determine the root filesystem's new mountpoint.
1566 	 */
1567 	if (snprintf(zoneroot, sizeof (zoneroot), "%s/root", zonepath) >=
1568 	    sizeof (zoneroot)) {
1569 		zerror(gettext("Zonepath %s is too long.\n"), zonepath);
1570 		return (-1);
1571 	}
1572 
1573 	/*
1574 	 * If the root filesystem is a non-legacy ZFS filesystem (i.e., if it's
1575 	 * mnt_mountp field is non-NULL), then make the filesystem's new
1576 	 * mount point its mountpoint property and mount the filesystem.
1577 	 */
1578 	if (mtab->mnt_mountp != NULL) {
1579 		zfs_handle_t *zhp;
1580 
1581 		if ((zhp = zfs_open(g_zfs, mtab->mnt_special,
1582 		    ZFS_TYPE_DATASET)) == NULL) {
1583 			zerror(gettext("could not get ZFS handle for the zone's"
1584 			    " root filesystem"));
1585 			return (-1);
1586 		}
1587 		if (zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1588 		    zoneroot) != 0) {
1589 			zerror(gettext("could not modify zone's root "
1590 			    "filesystem's mountpoint property"));
1591 			zfs_close(zhp);
1592 			return (-1);
1593 		}
1594 		if (zfs_mount(zhp, mtab->mnt_mntopts, 0) != 0) {
1595 			zerror(gettext("unable to mount zone root %s: %s"),
1596 			    zoneroot, libzfs_error_description(g_zfs));
1597 			if (zfs_prop_set(zhp,
1598 			    zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1599 			    mtab->mnt_mountp) != 0)
1600 				zerror(gettext("unable to restore zone's root "
1601 				    "filesystem's mountpoint property"));
1602 			zfs_close(zhp);
1603 			return (-1);
1604 		}
1605 		zfs_close(zhp);
1606 		return (0);
1607 	}
1608 
1609 	/*
1610 	 * The root filesystem is either a legacy-mounted ZFS filesystem or
1611 	 * a non-ZFS filesystem.  Use mount(2) to mount the root filesystem.
1612 	 */
1613 	if (mtab->mnt_mntopts != NULL)
1614 		flags = MS_OPTIONSTR;
1615 	else
1616 		flags = 0;
1617 	if (mount(mtab->mnt_special, zoneroot, flags, mtab->mnt_fstype, NULL, 0,
1618 	    mtab->mnt_mntopts, MAX_MNTOPT_STR * sizeof (char)) != 0) {
1619 		flags = errno;
1620 		zerror(gettext("unable to mount zone root %s: %s"), zoneroot,
1621 		    strerror(flags));
1622 		return (-1);
1623 	}
1624 	return (0);
1625 }
1626 
1627 /*
1628  * Unmount a moving zone's root filesystem (if such a mount exists) using the
1629  * specified zonepath.  mounts should refer to the zone_mounts_t structure
1630  * describing the zone's mount information.  If force is B_TRUE, then if the
1631  * unmount fails, then the function will try to forcibly unmount the zone's root
1632  * filesystem.
1633  *
1634  * This function returns zero if the unmount (forced or otherwise) succeeds;
1635  * otherwise, it returns a nonzero value.
1636  */
1637 int
1638 zone_unmount_rootfs(zone_mounts_t *mounts, const char *zonepath,
1639     boolean_t force)
1640 {
1641 	char zoneroot[MAXPATHLEN];
1642 	struct mnttab *mtab;
1643 	int err;
1644 
1645 	assert(mounts != NULL);
1646 	assert(zonepath != NULL);
1647 
1648 	/*
1649 	 * If there isn't a root filesystem, then don't do anything.
1650 	 */
1651 	mtab = mounts->root_mnttab;
1652 	if (mtab == NULL)
1653 		return (0);
1654 
1655 	/*
1656 	 * Determine the root filesystem's mountpoint.
1657 	 */
1658 	if (snprintf(zoneroot, sizeof (zoneroot), "%s/root", zonepath) >=
1659 	    sizeof (zoneroot)) {
1660 		zerror(gettext("Zonepath %s is too long.\n"), zonepath);
1661 		return (-1);
1662 	}
1663 
1664 	/*
1665 	 * If the root filesystem is a non-legacy ZFS fileystem, then unmount
1666 	 * the filesystem via libzfs.
1667 	 */
1668 	if (mtab->mnt_mountp != NULL) {
1669 		zfs_handle_t *zhp;
1670 
1671 		if ((zhp = zfs_open(g_zfs, mtab->mnt_special,
1672 		    ZFS_TYPE_DATASET)) == NULL) {
1673 			zerror(gettext("could not get ZFS handle for the zone's"
1674 			    " root filesystem"));
1675 			return (-1);
1676 		}
1677 		if (zfs_unmount(zhp, zoneroot, 0) != 0) {
1678 			if (force && zfs_unmount(zhp, zoneroot, MS_FORCE) ==
1679 			    0) {
1680 				zfs_close(zhp);
1681 				return (0);
1682 			}
1683 			zerror(gettext("unable to unmount zone root %s: %s"),
1684 			    zoneroot, libzfs_error_description(g_zfs));
1685 			zfs_close(zhp);
1686 			return (-1);
1687 		}
1688 		zfs_close(zhp);
1689 		return (0);
1690 	}
1691 
1692 	/*
1693 	 * Use umount(2) to unmount the root filesystem.  If this fails, then
1694 	 * forcibly unmount it if the force flag is set.
1695 	 */
1696 	if (umount(zoneroot) != 0) {
1697 		if (force && umount2(zoneroot, MS_FORCE) == 0)
1698 			return (0);
1699 		err = errno;
1700 		zerror(gettext("unable to unmount zone root %s: %s"), zoneroot,
1701 		    strerror(err));
1702 		return (-1);
1703 	}
1704 	return (0);
1705 }
1706 
1707 int
1708 init_zfs(void)
1709 {
1710 	if ((g_zfs = libzfs_init()) == NULL) {
1711 		(void) fprintf(stderr, gettext("failed to initialize ZFS "
1712 		    "library\n"));
1713 		return (Z_ERR);
1714 	}
1715 
1716 	return (Z_OK);
1717 }
1718