xref: /titanic_41/usr/src/cmd/zoneadm/zfs.c (revision 989f28072d20c73ae0955d6a1e3e2fc74831cb39)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012 by Delphix. All rights reserved.
25  */
26 
27 /*
28  * This file contains the functions used to support the ZFS integration
29  * with zones.  This includes validation (e.g. zonecfg dataset), cloning,
30  * file system creation and destruction.
31  */
32 
33 #include <stdio.h>
34 #include <errno.h>
35 #include <unistd.h>
36 #include <string.h>
37 #include <locale.h>
38 #include <libintl.h>
39 #include <sys/stat.h>
40 #include <sys/statvfs.h>
41 #include <libgen.h>
42 #include <libzonecfg.h>
43 #include <sys/mnttab.h>
44 #include <libzfs.h>
45 #include <sys/mntent.h>
46 #include <values.h>
47 #include <strings.h>
48 #include <assert.h>
49 
50 #include "zoneadm.h"
51 
52 libzfs_handle_t *g_zfs;
53 
54 typedef struct zfs_mount_data {
55 	char		*match_name;
56 	zfs_handle_t	*match_handle;
57 } zfs_mount_data_t;
58 
59 typedef struct zfs_snapshot_data {
60 	char	*match_name;	/* zonename@SUNWzone */
61 	int	len;		/* strlen of match_name */
62 	int	max;		/* highest digit appended to snap name */
63 	int	num;		/* number of snapshots to rename */
64 	int	cntr;		/* counter for renaming snapshots */
65 } zfs_snapshot_data_t;
66 
67 typedef struct clone_data {
68 	zfs_handle_t	*clone_zhp;	/* clone dataset to promote */
69 	time_t		origin_creation; /* snapshot creation time of clone */
70 	const char	*snapshot;	/* snapshot of dataset being demoted */
71 } clone_data_t;
72 
73 /*
74  * A ZFS file system iterator call-back function which is used to validate
75  * datasets imported into the zone.
76  */
77 /* ARGSUSED */
78 static int
79 check_zvol(zfs_handle_t *zhp, void *unused)
80 {
81 	int ret;
82 
83 	if (zfs_get_type(zhp) == ZFS_TYPE_VOLUME) {
84 		/*
85 		 * TRANSLATION_NOTE
86 		 * zfs and dataset are literals that should not be translated.
87 		 */
88 		(void) fprintf(stderr, gettext("cannot verify zfs dataset %s: "
89 		    "volumes cannot be specified as a zone dataset resource\n"),
90 		    zfs_get_name(zhp));
91 		ret = -1;
92 	} else {
93 		ret = zfs_iter_children(zhp, check_zvol, NULL);
94 	}
95 
96 	zfs_close(zhp);
97 
98 	return (ret);
99 }
100 
101 /*
102  * A ZFS file system iterator call-back function which returns the
103  * zfs_handle_t for a ZFS file system on the specified mount point.
104  */
105 static int
106 match_mountpoint(zfs_handle_t *zhp, void *data)
107 {
108 	int			res;
109 	zfs_mount_data_t	*cbp;
110 	char			mp[ZFS_MAXPROPLEN];
111 
112 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
113 		zfs_close(zhp);
114 		return (0);
115 	}
116 
117 	/* First check if the dataset is mounted. */
118 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTED, mp, sizeof (mp), NULL, NULL,
119 	    0, B_FALSE) != 0 || strcmp(mp, "no") == 0) {
120 		zfs_close(zhp);
121 		return (0);
122 	}
123 
124 	/* Now check mount point. */
125 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
126 	    0, B_FALSE) != 0) {
127 		zfs_close(zhp);
128 		return (0);
129 	}
130 
131 	cbp = (zfs_mount_data_t *)data;
132 
133 	if (strcmp(mp, "legacy") == 0) {
134 		/* If legacy, must look in mnttab for mountpoint. */
135 		FILE		*fp;
136 		struct mnttab	entry;
137 		const char	*nm;
138 
139 		nm = zfs_get_name(zhp);
140 		if ((fp = fopen(MNTTAB, "r")) == NULL) {
141 			zfs_close(zhp);
142 			return (0);
143 		}
144 
145 		while (getmntent(fp, &entry) == 0) {
146 			if (strcmp(nm, entry.mnt_special) == 0) {
147 				if (strcmp(entry.mnt_mountp, cbp->match_name)
148 				    == 0) {
149 					(void) fclose(fp);
150 					cbp->match_handle = zhp;
151 					return (1);
152 				}
153 				break;
154 			}
155 		}
156 		(void) fclose(fp);
157 
158 	} else if (strcmp(mp, cbp->match_name) == 0) {
159 		cbp->match_handle = zhp;
160 		return (1);
161 	}
162 
163 	/* Iterate over any nested datasets. */
164 	res = zfs_iter_filesystems(zhp, match_mountpoint, data);
165 	zfs_close(zhp);
166 	return (res);
167 }
168 
169 /*
170  * Get ZFS handle for the specified mount point.
171  */
172 static zfs_handle_t *
173 mount2zhandle(char *mountpoint)
174 {
175 	zfs_mount_data_t	cb;
176 
177 	cb.match_name = mountpoint;
178 	cb.match_handle = NULL;
179 	(void) zfs_iter_root(g_zfs, match_mountpoint, &cb);
180 	return (cb.match_handle);
181 }
182 
183 /*
184  * Check if there is already a file system (zfs or any other type) mounted on
185  * path.
186  */
187 static boolean_t
188 is_mountpnt(char *path)
189 {
190 	FILE		*fp;
191 	struct mnttab	entry;
192 
193 	if ((fp = fopen(MNTTAB, "r")) == NULL)
194 		return (B_FALSE);
195 
196 	while (getmntent(fp, &entry) == 0) {
197 		if (strcmp(path, entry.mnt_mountp) == 0) {
198 			(void) fclose(fp);
199 			return (B_TRUE);
200 		}
201 	}
202 
203 	(void) fclose(fp);
204 	return (B_FALSE);
205 }
206 
207 /*
208  * Run the brand's pre-snapshot hook before we take a ZFS snapshot of the zone.
209  */
210 static int
211 pre_snapshot(char *presnapbuf)
212 {
213 	int status;
214 
215 	/* No brand-specific handler */
216 	if (presnapbuf[0] == '\0')
217 		return (Z_OK);
218 
219 	/* Run the hook */
220 	status = do_subproc(presnapbuf);
221 	if ((status = subproc_status(gettext("brand-specific presnapshot"),
222 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
223 		return (Z_ERR);
224 
225 	return (Z_OK);
226 }
227 
228 /*
229  * Run the brand's post-snapshot hook after we take a ZFS snapshot of the zone.
230  */
231 static int
232 post_snapshot(char *postsnapbuf)
233 {
234 	int status;
235 
236 	/* No brand-specific handler */
237 	if (postsnapbuf[0] == '\0')
238 		return (Z_OK);
239 
240 	/* Run the hook */
241 	status = do_subproc(postsnapbuf);
242 	if ((status = subproc_status(gettext("brand-specific postsnapshot"),
243 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
244 		return (Z_ERR);
245 
246 	return (Z_OK);
247 }
248 
249 /*
250  * This is a ZFS snapshot iterator call-back function which returns the
251  * highest number of SUNWzone snapshots that have been taken.
252  */
253 static int
254 get_snap_max(zfs_handle_t *zhp, void *data)
255 {
256 	int			res;
257 	zfs_snapshot_data_t	*cbp;
258 
259 	if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) {
260 		zfs_close(zhp);
261 		return (0);
262 	}
263 
264 	cbp = (zfs_snapshot_data_t *)data;
265 
266 	if (strncmp(zfs_get_name(zhp), cbp->match_name, cbp->len) == 0) {
267 		char	*nump;
268 		int	num;
269 
270 		cbp->num++;
271 		nump = (char *)(zfs_get_name(zhp) + cbp->len);
272 		num = atoi(nump);
273 		if (num > cbp->max)
274 			cbp->max = num;
275 	}
276 
277 	res = zfs_iter_snapshots(zhp, get_snap_max, data);
278 	zfs_close(zhp);
279 	return (res);
280 }
281 
282 /*
283  * Take a ZFS snapshot to be used for cloning the zone.
284  */
285 static int
286 take_snapshot(zfs_handle_t *zhp, char *snapshot_name, int snap_size,
287     char *presnapbuf, char *postsnapbuf)
288 {
289 	int			res;
290 	char			template[ZFS_MAXNAMELEN];
291 	zfs_snapshot_data_t	cb;
292 
293 	/*
294 	 * First we need to figure out the next available name for the
295 	 * zone snapshot.  Look through the list of zones snapshots for
296 	 * this file system to determine the maximum snapshot name.
297 	 */
298 	if (snprintf(template, sizeof (template), "%s@SUNWzone",
299 	    zfs_get_name(zhp)) >=  sizeof (template))
300 		return (Z_ERR);
301 
302 	cb.match_name = template;
303 	cb.len = strlen(template);
304 	cb.max = 0;
305 
306 	if (zfs_iter_snapshots(zhp, get_snap_max, &cb) != 0)
307 		return (Z_ERR);
308 
309 	cb.max++;
310 
311 	if (snprintf(snapshot_name, snap_size, "%s@SUNWzone%d",
312 	    zfs_get_name(zhp), cb.max) >= snap_size)
313 		return (Z_ERR);
314 
315 	if (pre_snapshot(presnapbuf) != Z_OK)
316 		return (Z_ERR);
317 	res = zfs_snapshot(g_zfs, snapshot_name, B_FALSE, NULL);
318 	if (post_snapshot(postsnapbuf) != Z_OK)
319 		return (Z_ERR);
320 
321 	if (res != 0)
322 		return (Z_ERR);
323 	return (Z_OK);
324 }
325 
326 /*
327  * We are using an explicit snapshot from some earlier point in time so
328  * we need to validate it.  Run the brand specific hook.
329  */
330 static int
331 validate_snapshot(char *snapshot_name, char *snap_path, char *validsnapbuf)
332 {
333 	int status;
334 	char cmdbuf[MAXPATHLEN];
335 
336 	/* No brand-specific handler */
337 	if (validsnapbuf[0] == '\0')
338 		return (Z_OK);
339 
340 	/* pass args - snapshot_name & snap_path */
341 	if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %s %s", validsnapbuf,
342 	    snapshot_name, snap_path) >= sizeof (cmdbuf)) {
343 		zerror("Command line too long");
344 		return (Z_ERR);
345 	}
346 
347 	/* Run the hook */
348 	status = do_subproc(cmdbuf);
349 	if ((status = subproc_status(gettext("brand-specific validatesnapshot"),
350 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
351 		return (Z_ERR);
352 
353 	return (Z_OK);
354 }
355 
356 /*
357  * Remove the sw inventory file from inside this zonepath that we picked up out
358  * of the snapshot.
359  */
360 static int
361 clean_out_clone()
362 {
363 	int err;
364 	zone_dochandle_t handle;
365 
366 	if ((handle = zonecfg_init_handle()) == NULL) {
367 		zperror(cmd_to_str(CMD_CLONE), B_TRUE);
368 		return (Z_ERR);
369 	}
370 
371 	if ((err = zonecfg_get_handle(target_zone, handle)) != Z_OK) {
372 		errno = err;
373 		zperror(cmd_to_str(CMD_CLONE), B_TRUE);
374 		zonecfg_fini_handle(handle);
375 		return (Z_ERR);
376 	}
377 
378 	zonecfg_rm_detached(handle, B_FALSE);
379 	zonecfg_fini_handle(handle);
380 
381 	return (Z_OK);
382 }
383 
384 /*
385  * Make a ZFS clone on zonepath from snapshot_name.
386  */
387 static int
388 clone_snap(char *snapshot_name, char *zonepath)
389 {
390 	int		res = Z_OK;
391 	int		err;
392 	zfs_handle_t	*zhp;
393 	zfs_handle_t	*clone;
394 	nvlist_t	*props = NULL;
395 
396 	if ((zhp = zfs_open(g_zfs, snapshot_name, ZFS_TYPE_SNAPSHOT)) == NULL)
397 		return (Z_NO_ENTRY);
398 
399 	(void) printf(gettext("Cloning snapshot %s\n"), snapshot_name);
400 
401 	/*
402 	 * We turn off zfs SHARENFS and SHARESMB properties on the
403 	 * zoneroot dataset in order to prevent the GZ from sharing
404 	 * NGZ data by accident.
405 	 */
406 	if ((nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) ||
407 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARENFS),
408 	    "off") != 0) ||
409 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARESMB),
410 	    "off") != 0)) {
411 		if (props != NULL)
412 			nvlist_free(props);
413 		(void) fprintf(stderr, gettext("could not create ZFS clone "
414 		    "%s: out of memory\n"), zonepath);
415 		return (Z_ERR);
416 	}
417 
418 	err = zfs_clone(zhp, zonepath, props);
419 	zfs_close(zhp);
420 
421 	nvlist_free(props);
422 
423 	if (err != 0)
424 		return (Z_ERR);
425 
426 	/* create the mountpoint if necessary */
427 	if ((clone = zfs_open(g_zfs, zonepath, ZFS_TYPE_DATASET)) == NULL)
428 		return (Z_ERR);
429 
430 	/*
431 	 * The clone has been created so we need to print a diagnostic
432 	 * message if one of the following steps fails for some reason.
433 	 */
434 	if (zfs_mount(clone, NULL, 0) != 0) {
435 		(void) fprintf(stderr, gettext("could not mount ZFS clone "
436 		    "%s\n"), zfs_get_name(clone));
437 		res = Z_ERR;
438 
439 	} else if (clean_out_clone() != Z_OK) {
440 		(void) fprintf(stderr, gettext("could not remove the "
441 		    "software inventory from ZFS clone %s\n"),
442 		    zfs_get_name(clone));
443 		res = Z_ERR;
444 	}
445 
446 	zfs_close(clone);
447 	return (res);
448 }
449 
450 /*
451  * This function takes a zonepath and attempts to determine what the ZFS
452  * file system name (not mountpoint) should be for that path.  We do not
453  * assume that zonepath is an existing directory or ZFS fs since we use
454  * this function as part of the process of creating a new ZFS fs or clone.
455  *
456  * The way this works is that we look at the parent directory of the zonepath
457  * to see if it is a ZFS fs.  If it is, we get the name of that ZFS fs and
458  * append the last component of the zonepath to generate the ZFS name for the
459  * zonepath.  This matches the algorithm that ZFS uses for automatically
460  * mounting a new fs after it is created.
461  *
462  * Although a ZFS fs can be mounted anywhere, we don't worry about handling
463  * all of the complexity that a user could possibly configure with arbitrary
464  * mounts since there is no way to generate a ZFS name from a random path in
465  * the file system.  We only try to handle the automatic mounts that ZFS does
466  * for each file system.  ZFS restricts this so that a new fs must be created
467  * in an existing parent ZFS fs.  It then automatically mounts the new fs
468  * directly under the mountpoint for the parent fs using the last component
469  * of the name as the mountpoint directory.
470  *
471  * For example:
472  *    Name			Mountpoint
473  *    space/eng/dev/test/zone1	/project1/eng/dev/test/zone1
474  *
475  * Return Z_OK if the path mapped to a ZFS file system name, otherwise return
476  * Z_ERR.
477  */
478 static int
479 path2name(char *zonepath, char *zfs_name, int len)
480 {
481 	int		res;
482 	char		*bnm, *dnm, *dname, *bname;
483 	zfs_handle_t	*zhp;
484 	struct stat	stbuf;
485 
486 	/*
487 	 * We need two tmp strings to handle paths directly in / (e.g. /foo)
488 	 * since dirname will overwrite the first char after "/" in this case.
489 	 */
490 	if ((bnm = strdup(zonepath)) == NULL)
491 		return (Z_ERR);
492 
493 	if ((dnm = strdup(zonepath)) == NULL) {
494 		free(bnm);
495 		return (Z_ERR);
496 	}
497 
498 	bname = basename(bnm);
499 	dname = dirname(dnm);
500 
501 	/*
502 	 * This is a quick test to save iterating over all of the zfs datasets
503 	 * on the system (which can be a lot).  If the parent dir is not in a
504 	 * ZFS fs, then we're done.
505 	 */
506 	if (stat(dname, &stbuf) != 0 || !S_ISDIR(stbuf.st_mode) ||
507 	    strcmp(stbuf.st_fstype, MNTTYPE_ZFS) != 0) {
508 		free(bnm);
509 		free(dnm);
510 		return (Z_ERR);
511 	}
512 
513 	/* See if the parent directory is its own ZFS dataset. */
514 	if ((zhp = mount2zhandle(dname)) == NULL) {
515 		/*
516 		 * The parent is not a ZFS dataset so we can't automatically
517 		 * create a dataset on the given path.
518 		 */
519 		free(bnm);
520 		free(dnm);
521 		return (Z_ERR);
522 	}
523 
524 	res = snprintf(zfs_name, len, "%s/%s", zfs_get_name(zhp), bname);
525 
526 	free(bnm);
527 	free(dnm);
528 	zfs_close(zhp);
529 	if (res >= len)
530 		return (Z_ERR);
531 
532 	return (Z_OK);
533 }
534 
535 /*
536  * A ZFS file system iterator call-back function used to determine if the
537  * file system has dependents (snapshots & clones).
538  */
539 /* ARGSUSED */
540 static int
541 has_dependent(zfs_handle_t *zhp, void *data)
542 {
543 	zfs_close(zhp);
544 	return (1);
545 }
546 
547 /*
548  * Given a snapshot name, get the file system path where the snapshot lives.
549  * A snapshot name is of the form fs_name@snap_name.  For example, snapshot
550  * pl/zones/z1@SUNWzone1 would have a path of
551  * /pl/zones/z1/.zfs/snapshot/SUNWzone1.
552  */
553 static int
554 snap2path(char *snap_name, char *path, int len)
555 {
556 	char		*p;
557 	zfs_handle_t	*zhp;
558 	char		mp[ZFS_MAXPROPLEN];
559 
560 	if ((p = strrchr(snap_name, '@')) == NULL)
561 		return (Z_ERR);
562 
563 	/* Get the file system name from the snap_name. */
564 	*p = '\0';
565 	zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_DATASET);
566 	*p = '@';
567 	if (zhp == NULL)
568 		return (Z_ERR);
569 
570 	/* Get the file system mount point. */
571 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
572 	    0, B_FALSE) != 0) {
573 		zfs_close(zhp);
574 		return (Z_ERR);
575 	}
576 	zfs_close(zhp);
577 
578 	p++;
579 	if (snprintf(path, len, "%s/.zfs/snapshot/%s", mp, p) >= len)
580 		return (Z_ERR);
581 
582 	return (Z_OK);
583 }
584 
585 /*
586  * This callback function is used to iterate through a snapshot's dependencies
587  * to find a filesystem that is a direct clone of the snapshot being iterated.
588  */
589 static int
590 get_direct_clone(zfs_handle_t *zhp, void *data)
591 {
592 	clone_data_t	*cd = data;
593 	char		origin[ZFS_MAXNAMELEN];
594 	char		ds_path[ZFS_MAXNAMELEN];
595 
596 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
597 		zfs_close(zhp);
598 		return (0);
599 	}
600 
601 	(void) strlcpy(ds_path, zfs_get_name(zhp), sizeof (ds_path));
602 
603 	/* Make sure this is a direct clone of the snapshot we're iterating. */
604 	if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
605 	    NULL, 0, B_FALSE) != 0 || strcmp(origin, cd->snapshot) != 0) {
606 		zfs_close(zhp);
607 		return (0);
608 	}
609 
610 	if (cd->clone_zhp != NULL)
611 		zfs_close(cd->clone_zhp);
612 
613 	cd->clone_zhp = zhp;
614 	return (1);
615 }
616 
617 /*
618  * A ZFS file system iterator call-back function used to determine the clone
619  * to promote.  This function finds the youngest (i.e. last one taken) snapshot
620  * that has a clone.  If found, it returns a reference to that clone in the
621  * callback data.
622  */
623 static int
624 find_clone(zfs_handle_t *zhp, void *data)
625 {
626 	clone_data_t	*cd = data;
627 	time_t		snap_creation;
628 	int		zret = 0;
629 
630 	/* If snapshot has no clones, skip it */
631 	if (zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES) == 0) {
632 		zfs_close(zhp);
633 		return (0);
634 	}
635 
636 	cd->snapshot = zfs_get_name(zhp);
637 
638 	/* Get the creation time of this snapshot */
639 	snap_creation = (time_t)zfs_prop_get_int(zhp, ZFS_PROP_CREATION);
640 
641 	/*
642 	 * If this snapshot's creation time is greater than (i.e. younger than)
643 	 * the current youngest snapshot found, iterate this snapshot to
644 	 * get the right clone.
645 	 */
646 	if (snap_creation >= cd->origin_creation) {
647 		/*
648 		 * Iterate the dependents of this snapshot to find a clone
649 		 * that's a direct dependent.
650 		 */
651 		if ((zret = zfs_iter_dependents(zhp, B_FALSE, get_direct_clone,
652 		    cd)) == -1) {
653 			zfs_close(zhp);
654 			return (1);
655 		} else if (zret == 1) {
656 			/*
657 			 * Found a clone, update the origin_creation time
658 			 * in the callback data.
659 			 */
660 			cd->origin_creation = snap_creation;
661 		}
662 	}
663 
664 	zfs_close(zhp);
665 	return (0);
666 }
667 
668 /*
669  * A ZFS file system iterator call-back function used to remove standalone
670  * snapshots.
671  */
672 /* ARGSUSED */
673 static int
674 rm_snap(zfs_handle_t *zhp, void *data)
675 {
676 	/* If snapshot has clones, something is wrong */
677 	if (zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES) != 0) {
678 		zfs_close(zhp);
679 		return (1);
680 	}
681 
682 	if (zfs_unmount(zhp, NULL, 0) == 0) {
683 		(void) zfs_destroy(zhp, B_FALSE);
684 	}
685 
686 	zfs_close(zhp);
687 	return (0);
688 }
689 
690 /*
691  * A ZFS snapshot iterator call-back function which renames snapshots.
692  */
693 static int
694 rename_snap(zfs_handle_t *zhp, void *data)
695 {
696 	int			res;
697 	zfs_snapshot_data_t	*cbp;
698 	char			template[ZFS_MAXNAMELEN];
699 
700 	cbp = (zfs_snapshot_data_t *)data;
701 
702 	/*
703 	 * When renaming snapshots with the iterator, the iterator can see
704 	 * the same snapshot after we've renamed up in the namespace.  To
705 	 * prevent this we check the count for the number of snapshots we have
706 	 * to rename and stop at that point.
707 	 */
708 	if (cbp->cntr >= cbp->num) {
709 		zfs_close(zhp);
710 		return (0);
711 	}
712 
713 	if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) {
714 		zfs_close(zhp);
715 		return (0);
716 	}
717 
718 	/* Only rename the snapshots we automatically generate when we clone. */
719 	if (strncmp(zfs_get_name(zhp), cbp->match_name, cbp->len) != 0) {
720 		zfs_close(zhp);
721 		return (0);
722 	}
723 
724 	(void) snprintf(template, sizeof (template), "%s%d", cbp->match_name,
725 	    cbp->max++);
726 
727 	res = (zfs_rename(zhp, template, B_FALSE, B_FALSE) != 0);
728 	if (res != 0)
729 		(void) fprintf(stderr, gettext("failed to rename snapshot %s "
730 		    "to %s: %s\n"), zfs_get_name(zhp), template,
731 		    libzfs_error_description(g_zfs));
732 
733 	cbp->cntr++;
734 
735 	zfs_close(zhp);
736 	return (res);
737 }
738 
739 /*
740  * Rename the source dataset's snapshots that are automatically generated when
741  * we clone a zone so that there won't be a name collision when we promote the
742  * cloned dataset.  Once the snapshots have been renamed, then promote the
743  * clone.
744  *
745  * The snapshot rename process gets the highest number on the snapshot names
746  * (the format is zonename@SUNWzoneXX where XX are digits) on both the source
747  * and clone datasets, then renames the source dataset snapshots starting at
748  * the next number.
749  */
750 static int
751 promote_clone(zfs_handle_t *src_zhp, zfs_handle_t *cln_zhp)
752 {
753 	zfs_snapshot_data_t	sd;
754 	char			nm[ZFS_MAXNAMELEN];
755 	char			template[ZFS_MAXNAMELEN];
756 
757 	(void) strlcpy(nm, zfs_get_name(cln_zhp), sizeof (nm));
758 	/*
759 	 * Start by getting the clone's snapshot max which we use
760 	 * during the rename of the original dataset's snapshots.
761 	 */
762 	(void) snprintf(template, sizeof (template), "%s@SUNWzone", nm);
763 	sd.match_name = template;
764 	sd.len = strlen(template);
765 	sd.max = 0;
766 
767 	if (zfs_iter_snapshots(cln_zhp, get_snap_max, &sd) != 0)
768 		return (Z_ERR);
769 
770 	/*
771 	 * Now make sure the source's snapshot max is at least as high as
772 	 * the clone's snapshot max.
773 	 */
774 	(void) snprintf(template, sizeof (template), "%s@SUNWzone",
775 	    zfs_get_name(src_zhp));
776 	sd.match_name = template;
777 	sd.len = strlen(template);
778 	sd.num = 0;
779 
780 	if (zfs_iter_snapshots(src_zhp, get_snap_max, &sd) != 0)
781 		return (Z_ERR);
782 
783 	/*
784 	 * Now rename the source dataset's snapshots so there's no
785 	 * conflict when we promote the clone.
786 	 */
787 	sd.max++;
788 	sd.cntr = 0;
789 	if (zfs_iter_snapshots(src_zhp, rename_snap, &sd) != 0)
790 		return (Z_ERR);
791 
792 	/* close and reopen the clone dataset to get the latest info */
793 	zfs_close(cln_zhp);
794 	if ((cln_zhp = zfs_open(g_zfs, nm, ZFS_TYPE_FILESYSTEM)) == NULL)
795 		return (Z_ERR);
796 
797 	if (zfs_promote(cln_zhp) != 0) {
798 		(void) fprintf(stderr, gettext("failed to promote %s: %s\n"),
799 		    nm, libzfs_error_description(g_zfs));
800 		return (Z_ERR);
801 	}
802 
803 	zfs_close(cln_zhp);
804 	return (Z_OK);
805 }
806 
807 /*
808  * Promote the youngest clone.  That clone will then become the origin of all
809  * of the other clones that were hanging off of the source dataset.
810  */
811 int
812 promote_all_clones(zfs_handle_t *zhp)
813 {
814 	clone_data_t	cd;
815 	char		nm[ZFS_MAXNAMELEN];
816 
817 	cd.clone_zhp = NULL;
818 	cd.origin_creation = 0;
819 	cd.snapshot = NULL;
820 
821 	if (zfs_iter_snapshots(zhp, find_clone, &cd) != 0) {
822 		zfs_close(zhp);
823 		return (Z_ERR);
824 	}
825 
826 	/* Nothing to promote. */
827 	if (cd.clone_zhp == NULL)
828 		return (Z_OK);
829 
830 	/* Found the youngest clone to promote.  Promote it. */
831 	if (promote_clone(zhp, cd.clone_zhp) != 0) {
832 		zfs_close(cd.clone_zhp);
833 		zfs_close(zhp);
834 		return (Z_ERR);
835 	}
836 
837 	/* close and reopen the main dataset to get the latest info */
838 	(void) strlcpy(nm, zfs_get_name(zhp), sizeof (nm));
839 	zfs_close(zhp);
840 	if ((zhp = zfs_open(g_zfs, nm, ZFS_TYPE_FILESYSTEM)) == NULL)
841 		return (Z_ERR);
842 
843 	return (Z_OK);
844 }
845 
846 /*
847  * Clone a pre-existing ZFS snapshot, either by making a direct ZFS clone, if
848  * possible, or by copying the data from the snapshot to the zonepath.
849  */
850 int
851 clone_snapshot_zfs(char *snap_name, char *zonepath, char *validatesnap)
852 {
853 	int	err = Z_OK;
854 	char	clone_name[MAXPATHLEN];
855 	char	snap_path[MAXPATHLEN];
856 
857 	if (snap2path(snap_name, snap_path, sizeof (snap_path)) != Z_OK) {
858 		(void) fprintf(stderr, gettext("unable to find path for %s.\n"),
859 		    snap_name);
860 		return (Z_ERR);
861 	}
862 
863 	if (validate_snapshot(snap_name, snap_path, validatesnap) != Z_OK)
864 		return (Z_NO_ENTRY);
865 
866 	/*
867 	 * The zonepath cannot be ZFS cloned, try to copy the data from
868 	 * within the snapshot to the zonepath.
869 	 */
870 	if (path2name(zonepath, clone_name, sizeof (clone_name)) != Z_OK) {
871 		if ((err = clone_copy(snap_path, zonepath)) == Z_OK)
872 			if (clean_out_clone() != Z_OK)
873 				(void) fprintf(stderr,
874 				    gettext("could not remove the "
875 				    "software inventory from %s\n"), zonepath);
876 
877 		return (err);
878 	}
879 
880 	if ((err = clone_snap(snap_name, clone_name)) != Z_OK) {
881 		if (err != Z_NO_ENTRY) {
882 			/*
883 			 * Cloning the snapshot failed.  Fall back to trying
884 			 * to install the zone by copying from the snapshot.
885 			 */
886 			if ((err = clone_copy(snap_path, zonepath)) == Z_OK)
887 				if (clean_out_clone() != Z_OK)
888 					(void) fprintf(stderr,
889 					    gettext("could not remove the "
890 					    "software inventory from %s\n"),
891 					    zonepath);
892 		} else {
893 			/*
894 			 * The snapshot is unusable for some reason so restore
895 			 * the zone state to configured since we were unable to
896 			 * actually do anything about getting the zone
897 			 * installed.
898 			 */
899 			int tmp;
900 
901 			if ((tmp = zone_set_state(target_zone,
902 			    ZONE_STATE_CONFIGURED)) != Z_OK) {
903 				errno = tmp;
904 				zperror2(target_zone,
905 				    gettext("could not set state"));
906 			}
907 		}
908 	}
909 
910 	return (err);
911 }
912 
913 /*
914  * Attempt to clone a source_zone to a target zonepath by using a ZFS clone.
915  */
916 int
917 clone_zfs(char *source_zonepath, char *zonepath, char *presnapbuf,
918     char *postsnapbuf)
919 {
920 	zfs_handle_t	*zhp;
921 	char		clone_name[MAXPATHLEN];
922 	char		snap_name[MAXPATHLEN];
923 
924 	/*
925 	 * Try to get a zfs handle for the source_zonepath.  If this fails
926 	 * the source_zonepath is not ZFS so return an error.
927 	 */
928 	if ((zhp = mount2zhandle(source_zonepath)) == NULL)
929 		return (Z_ERR);
930 
931 	/*
932 	 * Check if there is a file system already mounted on zonepath.  If so,
933 	 * we can't clone to the path so we should fall back to copying.
934 	 */
935 	if (is_mountpnt(zonepath)) {
936 		zfs_close(zhp);
937 		(void) fprintf(stderr,
938 		    gettext("A file system is already mounted on %s,\n"
939 		    "preventing use of a ZFS clone.\n"), zonepath);
940 		return (Z_ERR);
941 	}
942 
943 	/*
944 	 * Instead of using path2name to get the clone name from the zonepath,
945 	 * we could generate a name from the source zone ZFS name.  However,
946 	 * this would mean we would create the clone under the ZFS fs of the
947 	 * source instead of what the zonepath says.  For example,
948 	 *
949 	 * source_zonepath		zonepath
950 	 * /pl/zones/dev/z1		/pl/zones/deploy/z2
951 	 *
952 	 * We don't want the clone to be under "dev", we want it under
953 	 * "deploy", so that we can leverage the normal attribute inheritance
954 	 * that ZFS provides in the fs hierarchy.
955 	 */
956 	if (path2name(zonepath, clone_name, sizeof (clone_name)) != Z_OK) {
957 		zfs_close(zhp);
958 		return (Z_ERR);
959 	}
960 
961 	if (take_snapshot(zhp, snap_name, sizeof (snap_name), presnapbuf,
962 	    postsnapbuf) != Z_OK) {
963 		zfs_close(zhp);
964 		return (Z_ERR);
965 	}
966 	zfs_close(zhp);
967 
968 	if (clone_snap(snap_name, clone_name) != Z_OK) {
969 		/* Clean up the snapshot we just took. */
970 		if ((zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_SNAPSHOT))
971 		    != NULL) {
972 			if (zfs_unmount(zhp, NULL, 0) == 0)
973 				(void) zfs_destroy(zhp, B_FALSE);
974 			zfs_close(zhp);
975 		}
976 
977 		return (Z_ERR);
978 	}
979 
980 	(void) printf(gettext("Instead of copying, a ZFS clone has been "
981 	    "created for this zone.\n"));
982 
983 	return (Z_OK);
984 }
985 
986 /*
987  * Attempt to create a ZFS file system for the specified zonepath.
988  * We either will successfully create a ZFS file system and get it mounted
989  * on the zonepath or we don't.  The caller doesn't care since a regular
990  * directory is used for the zonepath if no ZFS file system is mounted there.
991  */
992 void
993 create_zfs_zonepath(char *zonepath)
994 {
995 	zfs_handle_t	*zhp;
996 	char		zfs_name[MAXPATHLEN];
997 	nvlist_t	*props = NULL;
998 
999 	if (path2name(zonepath, zfs_name, sizeof (zfs_name)) != Z_OK)
1000 		return;
1001 
1002 	/* Check if the dataset already exists. */
1003 	if ((zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_DATASET)) != NULL) {
1004 		zfs_close(zhp);
1005 		return;
1006 	}
1007 
1008 	/*
1009 	 * We turn off zfs SHARENFS and SHARESMB properties on the
1010 	 * zoneroot dataset in order to prevent the GZ from sharing
1011 	 * NGZ data by accident.
1012 	 */
1013 	if ((nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) ||
1014 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARENFS),
1015 	    "off") != 0) ||
1016 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARESMB),
1017 	    "off") != 0)) {
1018 		if (props != NULL)
1019 			nvlist_free(props);
1020 		(void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
1021 		    "out of memory\n"), zfs_name);
1022 	}
1023 
1024 	if (zfs_create(g_zfs, zfs_name, ZFS_TYPE_FILESYSTEM, props) != 0 ||
1025 	    (zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_DATASET)) == NULL) {
1026 		(void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
1027 		    "%s\n"), zfs_name, libzfs_error_description(g_zfs));
1028 		nvlist_free(props);
1029 		return;
1030 	}
1031 
1032 	nvlist_free(props);
1033 
1034 	if (zfs_mount(zhp, NULL, 0) != 0) {
1035 		(void) fprintf(stderr, gettext("cannot mount ZFS dataset %s: "
1036 		    "%s\n"), zfs_name, libzfs_error_description(g_zfs));
1037 		(void) zfs_destroy(zhp, B_FALSE);
1038 	} else {
1039 		if (chmod(zonepath, S_IRWXU) != 0) {
1040 			(void) fprintf(stderr, gettext("file system %s "
1041 			    "successfully created, but chmod %o failed: %s\n"),
1042 			    zfs_name, S_IRWXU, strerror(errno));
1043 			(void) destroy_zfs(zonepath);
1044 		} else {
1045 			(void) printf(gettext("A ZFS file system has been "
1046 			    "created for this zone.\n"));
1047 		}
1048 	}
1049 
1050 	zfs_close(zhp);
1051 }
1052 
1053 /*
1054  * If the zonepath is a ZFS file system, attempt to destroy it.  We return Z_OK
1055  * if we were able to zfs_destroy the zonepath, otherwise we return Z_ERR
1056  * which means the caller should clean up the zonepath in the traditional
1057  * way.
1058  */
1059 int
1060 destroy_zfs(char *zonepath)
1061 {
1062 	zfs_handle_t	*zhp;
1063 	boolean_t	is_clone = B_FALSE;
1064 	char		origin[ZFS_MAXPROPLEN];
1065 
1066 	if ((zhp = mount2zhandle(zonepath)) == NULL)
1067 		return (Z_ERR);
1068 
1069 	if (promote_all_clones(zhp) != 0)
1070 		return (Z_ERR);
1071 
1072 	/* Now cleanup any snapshots remaining. */
1073 	if (zfs_iter_snapshots(zhp, rm_snap, NULL) != 0) {
1074 		zfs_close(zhp);
1075 		return (Z_ERR);
1076 	}
1077 
1078 	/*
1079 	 * We can't destroy the file system if it has still has dependents.
1080 	 * There shouldn't be any at this point, but we'll double check.
1081 	 */
1082 	if (zfs_iter_dependents(zhp, B_TRUE, has_dependent, NULL) != 0) {
1083 		(void) fprintf(stderr, gettext("zfs destroy %s failed: the "
1084 		    "dataset still has dependents\n"), zfs_get_name(zhp));
1085 		zfs_close(zhp);
1086 		return (Z_ERR);
1087 	}
1088 
1089 	/*
1090 	 * This might be a clone.  Try to get the snapshot so we can attempt
1091 	 * to destroy that as well.
1092 	 */
1093 	if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
1094 	    NULL, 0, B_FALSE) == 0)
1095 		is_clone = B_TRUE;
1096 
1097 	if (zfs_unmount(zhp, NULL, 0) != 0) {
1098 		(void) fprintf(stderr, gettext("zfs unmount %s failed: %s\n"),
1099 		    zfs_get_name(zhp), libzfs_error_description(g_zfs));
1100 		zfs_close(zhp);
1101 		return (Z_ERR);
1102 	}
1103 
1104 	if (zfs_destroy(zhp, B_FALSE) != 0) {
1105 		/*
1106 		 * If the destroy fails for some reason, try to remount
1107 		 * the file system so that we can use "rm -rf" to clean up
1108 		 * instead.
1109 		 */
1110 		(void) fprintf(stderr, gettext("zfs destroy %s failed: %s\n"),
1111 		    zfs_get_name(zhp), libzfs_error_description(g_zfs));
1112 		(void) zfs_mount(zhp, NULL, 0);
1113 		zfs_close(zhp);
1114 		return (Z_ERR);
1115 	}
1116 
1117 	/*
1118 	 * If the zone has ever been moved then the mountpoint dir will not be
1119 	 * cleaned up by the zfs_destroy().  To handle this case try to clean
1120 	 * it up now but don't worry if it fails, that will be normal.
1121 	 */
1122 	(void) rmdir(zonepath);
1123 
1124 	(void) printf(gettext("The ZFS file system for this zone has been "
1125 	    "destroyed.\n"));
1126 
1127 	if (is_clone) {
1128 		zfs_handle_t	*ohp;
1129 
1130 		/*
1131 		 * Try to clean up the snapshot that the clone was taken from.
1132 		 */
1133 		if ((ohp = zfs_open(g_zfs, origin,
1134 		    ZFS_TYPE_SNAPSHOT)) != NULL) {
1135 			if (zfs_iter_dependents(ohp, B_TRUE, has_dependent,
1136 			    NULL) == 0 && zfs_unmount(ohp, NULL, 0) == 0)
1137 				(void) zfs_destroy(ohp, B_FALSE);
1138 			zfs_close(ohp);
1139 		}
1140 	}
1141 
1142 	zfs_close(zhp);
1143 	return (Z_OK);
1144 }
1145 
1146 /*
1147  * Return true if the path is its own zfs file system.  We determine this
1148  * by stat-ing the path to see if it is zfs and stat-ing the parent to see
1149  * if it is a different fs.
1150  */
1151 boolean_t
1152 is_zonepath_zfs(char *zonepath)
1153 {
1154 	int res;
1155 	char *path;
1156 	char *parent;
1157 	struct statvfs64 buf1, buf2;
1158 
1159 	if (statvfs64(zonepath, &buf1) != 0)
1160 		return (B_FALSE);
1161 
1162 	if (strcmp(buf1.f_basetype, "zfs") != 0)
1163 		return (B_FALSE);
1164 
1165 	if ((path = strdup(zonepath)) == NULL)
1166 		return (B_FALSE);
1167 
1168 	parent = dirname(path);
1169 	res = statvfs64(parent, &buf2);
1170 	free(path);
1171 
1172 	if (res != 0)
1173 		return (B_FALSE);
1174 
1175 	if (buf1.f_fsid == buf2.f_fsid)
1176 		return (B_FALSE);
1177 
1178 	return (B_TRUE);
1179 }
1180 
1181 /*
1182  * Implement the fast move of a ZFS file system by simply updating the
1183  * mountpoint.  Since it is file system already, we don't have the
1184  * issue of cross-file system copying.
1185  */
1186 int
1187 move_zfs(char *zonepath, char *new_zonepath)
1188 {
1189 	int		ret = Z_ERR;
1190 	zfs_handle_t	*zhp;
1191 
1192 	if ((zhp = mount2zhandle(zonepath)) == NULL)
1193 		return (Z_ERR);
1194 
1195 	if (zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1196 	    new_zonepath) == 0) {
1197 		/*
1198 		 * Clean up the old mount point.  We ignore any failure since
1199 		 * the zone is already successfully mounted on the new path.
1200 		 */
1201 		(void) rmdir(zonepath);
1202 		ret = Z_OK;
1203 	}
1204 
1205 	zfs_close(zhp);
1206 
1207 	return (ret);
1208 }
1209 
1210 /*
1211  * Validate that the given dataset exists on the system, and that neither it nor
1212  * its children are zvols.
1213  *
1214  * Note that we don't do anything with the 'zoned' property here.  All
1215  * management is done in zoneadmd when the zone is actually rebooted.  This
1216  * allows us to automatically set the zoned property even when a zone is
1217  * rebooted by the administrator.
1218  */
1219 int
1220 verify_datasets(zone_dochandle_t handle)
1221 {
1222 	int return_code = Z_OK;
1223 	struct zone_dstab dstab;
1224 	zfs_handle_t *zhp;
1225 	char propbuf[ZFS_MAXPROPLEN];
1226 	char source[ZFS_MAXNAMELEN];
1227 	zprop_source_t srctype;
1228 
1229 	if (zonecfg_setdsent(handle) != Z_OK) {
1230 		/*
1231 		 * TRANSLATION_NOTE
1232 		 * zfs and dataset are literals that should not be translated.
1233 		 */
1234 		(void) fprintf(stderr, gettext("could not verify zfs datasets: "
1235 		    "unable to enumerate datasets\n"));
1236 		return (Z_ERR);
1237 	}
1238 
1239 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
1240 
1241 		if ((zhp = zfs_open(g_zfs, dstab.zone_dataset_name,
1242 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
1243 			(void) fprintf(stderr, gettext("could not verify zfs "
1244 			    "dataset %s: %s\n"), dstab.zone_dataset_name,
1245 			    libzfs_error_description(g_zfs));
1246 			return_code = Z_ERR;
1247 			continue;
1248 		}
1249 
1250 		if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf,
1251 		    sizeof (propbuf), &srctype, source,
1252 		    sizeof (source), 0) == 0 &&
1253 		    (srctype == ZPROP_SRC_INHERITED)) {
1254 			(void) fprintf(stderr, gettext("could not verify zfs "
1255 			    "dataset %s: mountpoint cannot be inherited\n"),
1256 			    dstab.zone_dataset_name);
1257 			return_code = Z_ERR;
1258 			zfs_close(zhp);
1259 			continue;
1260 		}
1261 
1262 		if (zfs_get_type(zhp) == ZFS_TYPE_VOLUME) {
1263 			(void) fprintf(stderr, gettext("cannot verify zfs "
1264 			    "dataset %s: volumes cannot be specified as a "
1265 			    "zone dataset resource\n"),
1266 			    dstab.zone_dataset_name);
1267 			return_code = Z_ERR;
1268 		}
1269 
1270 		if (zfs_iter_children(zhp, check_zvol, NULL) != 0)
1271 			return_code = Z_ERR;
1272 
1273 		zfs_close(zhp);
1274 	}
1275 	(void) zonecfg_enddsent(handle);
1276 
1277 	return (return_code);
1278 }
1279 
1280 /*
1281  * Verify that the ZFS dataset exists, and its mountpoint
1282  * property is set to "legacy".
1283  */
1284 int
1285 verify_fs_zfs(struct zone_fstab *fstab)
1286 {
1287 	zfs_handle_t *zhp;
1288 	char propbuf[ZFS_MAXPROPLEN];
1289 
1290 	if ((zhp = zfs_open(g_zfs, fstab->zone_fs_special,
1291 	    ZFS_TYPE_DATASET)) == NULL) {
1292 		(void) fprintf(stderr, gettext("could not verify fs %s: "
1293 		    "could not access zfs dataset '%s'\n"),
1294 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1295 		return (Z_ERR);
1296 	}
1297 
1298 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
1299 		(void) fprintf(stderr, gettext("cannot verify fs %s: "
1300 		    "'%s' is not a file system\n"),
1301 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1302 		zfs_close(zhp);
1303 		return (Z_ERR);
1304 	}
1305 
1306 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf, sizeof (propbuf),
1307 	    NULL, NULL, 0, 0) != 0 || strcmp(propbuf, "legacy") != 0) {
1308 		(void) fprintf(stderr, gettext("could not verify fs %s: "
1309 		    "zfs '%s' mountpoint is not \"legacy\"\n"),
1310 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1311 		zfs_close(zhp);
1312 		return (Z_ERR);
1313 	}
1314 
1315 	zfs_close(zhp);
1316 	return (Z_OK);
1317 }
1318 
1319 /*
1320  * Destroy the specified mnttab structure that was created by mnttab_dup().
1321  * NOTE: The structure's mnt_time field isn't freed.
1322  */
1323 static void
1324 mnttab_destroy(struct mnttab *tabp)
1325 {
1326 	assert(tabp != NULL);
1327 
1328 	free(tabp->mnt_mountp);
1329 	free(tabp->mnt_special);
1330 	free(tabp->mnt_fstype);
1331 	free(tabp->mnt_mntopts);
1332 	free(tabp);
1333 }
1334 
1335 /*
1336  * Duplicate the specified mnttab structure.  The mnt_mountp and mnt_time
1337  * fields aren't duplicated.  This function returns a pointer to the new mnttab
1338  * structure or NULL if an error occurred.  If an error occurs, then this
1339  * function sets errno to reflect the error.  mnttab structures created by
1340  * this function should be destroyed via mnttab_destroy().
1341  */
1342 static struct mnttab *
1343 mnttab_dup(const struct mnttab *srcp)
1344 {
1345 	struct mnttab *retval;
1346 
1347 	assert(srcp != NULL);
1348 
1349 	retval = (struct mnttab *)calloc(1, sizeof (*retval));
1350 	if (retval == NULL) {
1351 		errno = ENOMEM;
1352 		return (NULL);
1353 	}
1354 	if (srcp->mnt_special != NULL) {
1355 		retval->mnt_special = strdup(srcp->mnt_special);
1356 		if (retval->mnt_special == NULL)
1357 			goto err;
1358 	}
1359 	if (srcp->mnt_fstype != NULL) {
1360 		retval->mnt_fstype = strdup(srcp->mnt_fstype);
1361 		if (retval->mnt_fstype == NULL)
1362 			goto err;
1363 	}
1364 	retval->mnt_mntopts = (char *)malloc(MAX_MNTOPT_STR * sizeof (char));
1365 	if (retval->mnt_mntopts == NULL)
1366 		goto err;
1367 	if (srcp->mnt_mntopts != NULL) {
1368 		if (strlcpy(retval->mnt_mntopts, srcp->mnt_mntopts,
1369 		    MAX_MNTOPT_STR * sizeof (char)) >= MAX_MNTOPT_STR *
1370 		    sizeof (char)) {
1371 			mnttab_destroy(retval);
1372 			errno = EOVERFLOW; /* similar to mount(2) behavior */
1373 			return (NULL);
1374 		}
1375 	} else {
1376 		retval->mnt_mntopts[0] = '\0';
1377 	}
1378 	return (retval);
1379 
1380 err:
1381 	mnttab_destroy(retval);
1382 	errno = ENOMEM;
1383 	return (NULL);
1384 }
1385 
1386 /*
1387  * Determine whether the specified ZFS dataset's mountpoint property is set
1388  * to "legacy".  If the specified dataset does not have a legacy mountpoint,
1389  * then the string pointer to which the mountpoint argument points is assigned
1390  * a dynamically-allocated string containing the dataset's mountpoint
1391  * property.  If the dataset's mountpoint property is "legacy" or a libzfs
1392  * error occurs, then the string pointer to which the mountpoint argument
1393  * points isn't modified.
1394  *
1395  * This function returns B_TRUE if it doesn't encounter any fatal errors.
1396  * It returns B_FALSE if it encounters a fatal error and sets errno to the
1397  * appropriate error code.
1398  */
1399 static boolean_t
1400 get_zfs_non_legacy_mountpoint(const char *dataset_name, char **mountpoint)
1401 {
1402 	zfs_handle_t *zhp;
1403 	char propbuf[ZFS_MAXPROPLEN];
1404 
1405 	assert(dataset_name != NULL);
1406 	assert(mountpoint != NULL);
1407 
1408 	if ((zhp = zfs_open(g_zfs, dataset_name, ZFS_TYPE_DATASET)) == NULL) {
1409 		errno = EINVAL;
1410 		return (B_FALSE);
1411 	}
1412 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf, sizeof (propbuf),
1413 	    NULL, NULL, 0, 0) != 0) {
1414 		zfs_close(zhp);
1415 		errno = EINVAL;
1416 		return (B_FALSE);
1417 	}
1418 	zfs_close(zhp);
1419 	if (strcmp(propbuf, "legacy") != 0) {
1420 		if ((*mountpoint = strdup(propbuf)) == NULL) {
1421 			errno = ENOMEM;
1422 			return (B_FALSE);
1423 		}
1424 	}
1425 	return (B_TRUE);
1426 }
1427 
1428 
1429 /*
1430  * This zonecfg_find_mounts() callback records information about mounts of
1431  * interest in a zonepath.  It also tallies the number of zone
1432  * root overlay mounts and the number of unexpected mounts found.
1433  * This function outputs errors using zerror() if it finds unexpected
1434  * mounts.  cookiep should point to an initialized zone_mounts_t structure.
1435  *
1436  * This function returns zero on success and a nonzero value on failure.
1437  */
1438 static int
1439 zone_mounts_cb(const struct mnttab *mountp, void *cookiep)
1440 {
1441 	zone_mounts_t *mounts;
1442 	const char *zone_mount_dir;
1443 
1444 	assert(mountp != NULL);
1445 	assert(cookiep != NULL);
1446 
1447 	mounts = (zone_mounts_t *)cookiep;
1448 	zone_mount_dir = mountp->mnt_mountp + mounts->zonepath_len;
1449 	if (strcmp(zone_mount_dir, "/root") == 0) {
1450 		/*
1451 		 * Check for an overlay mount.  If we already detected a /root
1452 		 * mount, then the current mount must be an overlay mount.
1453 		 */
1454 		if (mounts->root_mnttab != NULL) {
1455 			mounts->num_root_overlay_mounts++;
1456 			return (0);
1457 		}
1458 
1459 		/*
1460 		 * Store the root mount's mnttab information in the
1461 		 * zone_mounts_t structure for future use.
1462 		 */
1463 		if ((mounts->root_mnttab = mnttab_dup(mountp)) == NULL) {
1464 			zperror(cmd_to_str(CMD_MOVE), B_FALSE);
1465 			return (-1);
1466 		}
1467 
1468 		/*
1469 		 * Determine if the filesystem is a ZFS filesystem with a
1470 		 * non-legacy mountpoint.  If it is, then set the root
1471 		 * filesystem's mnttab's mnt_mountp field to a non-NULL
1472 		 * value, which will serve as a flag to indicate this special
1473 		 * condition.
1474 		 */
1475 		if (strcmp(mountp->mnt_fstype, MNTTYPE_ZFS) == 0 &&
1476 		    get_zfs_non_legacy_mountpoint(mountp->mnt_special,
1477 		    &mounts->root_mnttab->mnt_mountp) != B_TRUE) {
1478 			zperror(cmd_to_str(CMD_MOVE), B_FALSE);
1479 			return (-1);
1480 		}
1481 	} else {
1482 		/*
1483 		 * An unexpected mount was found.  Notify the user.
1484 		 */
1485 		if (mounts->num_unexpected_mounts == 0)
1486 			zerror(gettext("These file systems are mounted on "
1487 			    "subdirectories of %s.\n"), mounts->zonepath);
1488 		mounts->num_unexpected_mounts++;
1489 		(void) zfm_print(mountp, NULL);
1490 	}
1491 	return (0);
1492 }
1493 
1494 /*
1495  * Initialize the specified zone_mounts_t structure for the given zonepath.
1496  * If this function succeeds, it returns zero and the specified zone_mounts_t
1497  * structure contains information about mounts in the specified zonepath.
1498  * The function returns a nonzero value if it fails.  The zone_mounts_t
1499  * structure doesn't need be destroyed via zone_mounts_destroy() if this
1500  * function fails.
1501  */
1502 int
1503 zone_mounts_init(zone_mounts_t *mounts, const char *zonepath)
1504 {
1505 	assert(mounts != NULL);
1506 	assert(zonepath != NULL);
1507 
1508 	bzero(mounts, sizeof (*mounts));
1509 	if ((mounts->zonepath = strdup(zonepath)) == NULL) {
1510 		zerror(gettext("the process ran out of memory while checking "
1511 		    "for mounts in zonepath %s."), zonepath);
1512 		return (-1);
1513 	}
1514 	mounts->zonepath_len = strlen(zonepath);
1515 	if (zonecfg_find_mounts((char *)zonepath, zone_mounts_cb, mounts) ==
1516 	    -1) {
1517 		zerror(gettext("an error occurred while checking for mounts "
1518 		    "in zonepath %s."), zonepath);
1519 		zone_mounts_destroy(mounts);
1520 		return (-1);
1521 	}
1522 	return (0);
1523 }
1524 
1525 /*
1526  * Destroy the memory used by the specified zone_mounts_t structure's fields.
1527  * This function doesn't free the memory occupied by the structure itself
1528  * (i.e., it doesn't free the parameter).
1529  */
1530 void
1531 zone_mounts_destroy(zone_mounts_t *mounts)
1532 {
1533 	assert(mounts != NULL);
1534 
1535 	free(mounts->zonepath);
1536 	if (mounts->root_mnttab != NULL)
1537 		mnttab_destroy(mounts->root_mnttab);
1538 }
1539 
1540 /*
1541  * Mount a moving zone's root filesystem (if it had a root filesystem mount
1542  * prior to the move) using the specified zonepath.  mounts should refer to
1543  * the zone_mounts_t structure describing the zone's mount information.
1544  *
1545  * This function returns zero if the mount succeeds and a nonzero value
1546  * if it doesn't.
1547  */
1548 int
1549 zone_mount_rootfs(zone_mounts_t *mounts, const char *zonepath)
1550 {
1551 	char zoneroot[MAXPATHLEN];
1552 	struct mnttab *mtab;
1553 	int flags;
1554 
1555 	assert(mounts != NULL);
1556 	assert(zonepath != NULL);
1557 
1558 	/*
1559 	 * If there isn't a root filesystem, then don't do anything.
1560 	 */
1561 	mtab = mounts->root_mnttab;
1562 	if (mtab == NULL)
1563 		return (0);
1564 
1565 	/*
1566 	 * Determine the root filesystem's new mountpoint.
1567 	 */
1568 	if (snprintf(zoneroot, sizeof (zoneroot), "%s/root", zonepath) >=
1569 	    sizeof (zoneroot)) {
1570 		zerror(gettext("Zonepath %s is too long.\n"), zonepath);
1571 		return (-1);
1572 	}
1573 
1574 	/*
1575 	 * If the root filesystem is a non-legacy ZFS filesystem (i.e., if it's
1576 	 * mnt_mountp field is non-NULL), then make the filesystem's new
1577 	 * mount point its mountpoint property and mount the filesystem.
1578 	 */
1579 	if (mtab->mnt_mountp != NULL) {
1580 		zfs_handle_t *zhp;
1581 
1582 		if ((zhp = zfs_open(g_zfs, mtab->mnt_special,
1583 		    ZFS_TYPE_DATASET)) == NULL) {
1584 			zerror(gettext("could not get ZFS handle for the zone's"
1585 			    " root filesystem"));
1586 			return (-1);
1587 		}
1588 		if (zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1589 		    zoneroot) != 0) {
1590 			zerror(gettext("could not modify zone's root "
1591 			    "filesystem's mountpoint property"));
1592 			zfs_close(zhp);
1593 			return (-1);
1594 		}
1595 		if (zfs_mount(zhp, mtab->mnt_mntopts, 0) != 0) {
1596 			zerror(gettext("unable to mount zone root %s: %s"),
1597 			    zoneroot, libzfs_error_description(g_zfs));
1598 			if (zfs_prop_set(zhp,
1599 			    zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1600 			    mtab->mnt_mountp) != 0)
1601 				zerror(gettext("unable to restore zone's root "
1602 				    "filesystem's mountpoint property"));
1603 			zfs_close(zhp);
1604 			return (-1);
1605 		}
1606 		zfs_close(zhp);
1607 		return (0);
1608 	}
1609 
1610 	/*
1611 	 * The root filesystem is either a legacy-mounted ZFS filesystem or
1612 	 * a non-ZFS filesystem.  Use mount(2) to mount the root filesystem.
1613 	 */
1614 	if (mtab->mnt_mntopts != NULL)
1615 		flags = MS_OPTIONSTR;
1616 	else
1617 		flags = 0;
1618 	if (mount(mtab->mnt_special, zoneroot, flags, mtab->mnt_fstype, NULL, 0,
1619 	    mtab->mnt_mntopts, MAX_MNTOPT_STR * sizeof (char)) != 0) {
1620 		flags = errno;
1621 		zerror(gettext("unable to mount zone root %s: %s"), zoneroot,
1622 		    strerror(flags));
1623 		return (-1);
1624 	}
1625 	return (0);
1626 }
1627 
1628 /*
1629  * Unmount a moving zone's root filesystem (if such a mount exists) using the
1630  * specified zonepath.  mounts should refer to the zone_mounts_t structure
1631  * describing the zone's mount information.  If force is B_TRUE, then if the
1632  * unmount fails, then the function will try to forcibly unmount the zone's root
1633  * filesystem.
1634  *
1635  * This function returns zero if the unmount (forced or otherwise) succeeds;
1636  * otherwise, it returns a nonzero value.
1637  */
1638 int
1639 zone_unmount_rootfs(zone_mounts_t *mounts, const char *zonepath,
1640     boolean_t force)
1641 {
1642 	char zoneroot[MAXPATHLEN];
1643 	struct mnttab *mtab;
1644 	int err;
1645 
1646 	assert(mounts != NULL);
1647 	assert(zonepath != NULL);
1648 
1649 	/*
1650 	 * If there isn't a root filesystem, then don't do anything.
1651 	 */
1652 	mtab = mounts->root_mnttab;
1653 	if (mtab == NULL)
1654 		return (0);
1655 
1656 	/*
1657 	 * Determine the root filesystem's mountpoint.
1658 	 */
1659 	if (snprintf(zoneroot, sizeof (zoneroot), "%s/root", zonepath) >=
1660 	    sizeof (zoneroot)) {
1661 		zerror(gettext("Zonepath %s is too long.\n"), zonepath);
1662 		return (-1);
1663 	}
1664 
1665 	/*
1666 	 * If the root filesystem is a non-legacy ZFS fileystem, then unmount
1667 	 * the filesystem via libzfs.
1668 	 */
1669 	if (mtab->mnt_mountp != NULL) {
1670 		zfs_handle_t *zhp;
1671 
1672 		if ((zhp = zfs_open(g_zfs, mtab->mnt_special,
1673 		    ZFS_TYPE_DATASET)) == NULL) {
1674 			zerror(gettext("could not get ZFS handle for the zone's"
1675 			    " root filesystem"));
1676 			return (-1);
1677 		}
1678 		if (zfs_unmount(zhp, zoneroot, 0) != 0) {
1679 			if (force && zfs_unmount(zhp, zoneroot, MS_FORCE) ==
1680 			    0) {
1681 				zfs_close(zhp);
1682 				return (0);
1683 			}
1684 			zerror(gettext("unable to unmount zone root %s: %s"),
1685 			    zoneroot, libzfs_error_description(g_zfs));
1686 			zfs_close(zhp);
1687 			return (-1);
1688 		}
1689 		zfs_close(zhp);
1690 		return (0);
1691 	}
1692 
1693 	/*
1694 	 * Use umount(2) to unmount the root filesystem.  If this fails, then
1695 	 * forcibly unmount it if the force flag is set.
1696 	 */
1697 	if (umount(zoneroot) != 0) {
1698 		if (force && umount2(zoneroot, MS_FORCE) == 0)
1699 			return (0);
1700 		err = errno;
1701 		zerror(gettext("unable to unmount zone root %s: %s"), zoneroot,
1702 		    strerror(err));
1703 		return (-1);
1704 	}
1705 	return (0);
1706 }
1707 
1708 int
1709 init_zfs(void)
1710 {
1711 	if ((g_zfs = libzfs_init()) == NULL) {
1712 		(void) fprintf(stderr, gettext("failed to initialize ZFS "
1713 		    "library\n"));
1714 		return (Z_ERR);
1715 	}
1716 
1717 	return (Z_OK);
1718 }
1719