xref: /titanic_50/usr/src/cmd/zoneadm/zfs.c (revision 4b3b7fc6e1f62f5e2bee41aafc52e9234c484bc0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26  */
27 
28 /*
29  * This file contains the functions used to support the ZFS integration
30  * with zones.  This includes validation (e.g. zonecfg dataset), cloning,
31  * file system creation and destruction.
32  */
33 
34 #include <stdio.h>
35 #include <errno.h>
36 #include <unistd.h>
37 #include <string.h>
38 #include <locale.h>
39 #include <libintl.h>
40 #include <sys/stat.h>
41 #include <sys/statvfs.h>
42 #include <libgen.h>
43 #include <libzonecfg.h>
44 #include <sys/mnttab.h>
45 #include <libzfs.h>
46 #include <sys/mntent.h>
47 #include <values.h>
48 #include <strings.h>
49 #include <assert.h>
50 
51 #include "zoneadm.h"
52 
53 libzfs_handle_t *g_zfs;
54 
55 typedef struct zfs_mount_data {
56 	char		*match_name;
57 	zfs_handle_t	*match_handle;
58 } zfs_mount_data_t;
59 
60 typedef struct zfs_snapshot_data {
61 	char	*match_name;	/* zonename@SUNWzone */
62 	int	len;		/* strlen of match_name */
63 	int	max;		/* highest digit appended to snap name */
64 	int	num;		/* number of snapshots to rename */
65 	int	cntr;		/* counter for renaming snapshots */
66 } zfs_snapshot_data_t;
67 
68 typedef struct clone_data {
69 	zfs_handle_t	*clone_zhp;	/* clone dataset to promote */
70 	time_t		origin_creation; /* snapshot creation time of clone */
71 	const char	*snapshot;	/* snapshot of dataset being demoted */
72 } clone_data_t;
73 
74 /*
75  * A ZFS file system iterator call-back function which returns the
76  * zfs_handle_t for a ZFS file system on the specified mount point.
77  */
78 static int
79 match_mountpoint(zfs_handle_t *zhp, void *data)
80 {
81 	int			res;
82 	zfs_mount_data_t	*cbp;
83 	char			mp[ZFS_MAXPROPLEN];
84 
85 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
86 		zfs_close(zhp);
87 		return (0);
88 	}
89 
90 	/* First check if the dataset is mounted. */
91 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTED, mp, sizeof (mp), NULL, NULL,
92 	    0, B_FALSE) != 0 || strcmp(mp, "no") == 0) {
93 		zfs_close(zhp);
94 		return (0);
95 	}
96 
97 	/* Now check mount point. */
98 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
99 	    0, B_FALSE) != 0) {
100 		zfs_close(zhp);
101 		return (0);
102 	}
103 
104 	cbp = (zfs_mount_data_t *)data;
105 
106 	if (strcmp(mp, "legacy") == 0) {
107 		/* If legacy, must look in mnttab for mountpoint. */
108 		FILE		*fp;
109 		struct mnttab	entry;
110 		const char	*nm;
111 
112 		nm = zfs_get_name(zhp);
113 		if ((fp = fopen(MNTTAB, "r")) == NULL) {
114 			zfs_close(zhp);
115 			return (0);
116 		}
117 
118 		while (getmntent(fp, &entry) == 0) {
119 			if (strcmp(nm, entry.mnt_special) == 0) {
120 				if (strcmp(entry.mnt_mountp, cbp->match_name)
121 				    == 0) {
122 					(void) fclose(fp);
123 					cbp->match_handle = zhp;
124 					return (1);
125 				}
126 				break;
127 			}
128 		}
129 		(void) fclose(fp);
130 
131 	} else if (strcmp(mp, cbp->match_name) == 0) {
132 		cbp->match_handle = zhp;
133 		return (1);
134 	}
135 
136 	/* Iterate over any nested datasets. */
137 	res = zfs_iter_filesystems(zhp, match_mountpoint, data);
138 	zfs_close(zhp);
139 	return (res);
140 }
141 
142 /*
143  * Get ZFS handle for the specified mount point.
144  */
145 static zfs_handle_t *
146 mount2zhandle(char *mountpoint)
147 {
148 	zfs_mount_data_t	cb;
149 
150 	cb.match_name = mountpoint;
151 	cb.match_handle = NULL;
152 	(void) zfs_iter_root(g_zfs, match_mountpoint, &cb);
153 	return (cb.match_handle);
154 }
155 
156 /*
157  * Check if there is already a file system (zfs or any other type) mounted on
158  * path.
159  */
160 static boolean_t
161 is_mountpnt(char *path)
162 {
163 	FILE		*fp;
164 	struct mnttab	entry;
165 
166 	if ((fp = fopen(MNTTAB, "r")) == NULL)
167 		return (B_FALSE);
168 
169 	while (getmntent(fp, &entry) == 0) {
170 		if (strcmp(path, entry.mnt_mountp) == 0) {
171 			(void) fclose(fp);
172 			return (B_TRUE);
173 		}
174 	}
175 
176 	(void) fclose(fp);
177 	return (B_FALSE);
178 }
179 
180 /*
181  * Run the brand's pre-snapshot hook before we take a ZFS snapshot of the zone.
182  */
183 static int
184 pre_snapshot(char *presnapbuf)
185 {
186 	int status;
187 
188 	/* No brand-specific handler */
189 	if (presnapbuf[0] == '\0')
190 		return (Z_OK);
191 
192 	/* Run the hook */
193 	status = do_subproc(presnapbuf);
194 	if ((status = subproc_status(gettext("brand-specific presnapshot"),
195 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
196 		return (Z_ERR);
197 
198 	return (Z_OK);
199 }
200 
201 /*
202  * Run the brand's post-snapshot hook after we take a ZFS snapshot of the zone.
203  */
204 static int
205 post_snapshot(char *postsnapbuf)
206 {
207 	int status;
208 
209 	/* No brand-specific handler */
210 	if (postsnapbuf[0] == '\0')
211 		return (Z_OK);
212 
213 	/* Run the hook */
214 	status = do_subproc(postsnapbuf);
215 	if ((status = subproc_status(gettext("brand-specific postsnapshot"),
216 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
217 		return (Z_ERR);
218 
219 	return (Z_OK);
220 }
221 
222 /*
223  * This is a ZFS snapshot iterator call-back function which returns the
224  * highest number of SUNWzone snapshots that have been taken.
225  */
226 static int
227 get_snap_max(zfs_handle_t *zhp, void *data)
228 {
229 	int			res;
230 	zfs_snapshot_data_t	*cbp;
231 
232 	if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) {
233 		zfs_close(zhp);
234 		return (0);
235 	}
236 
237 	cbp = (zfs_snapshot_data_t *)data;
238 
239 	if (strncmp(zfs_get_name(zhp), cbp->match_name, cbp->len) == 0) {
240 		char	*nump;
241 		int	num;
242 
243 		cbp->num++;
244 		nump = (char *)(zfs_get_name(zhp) + cbp->len);
245 		num = atoi(nump);
246 		if (num > cbp->max)
247 			cbp->max = num;
248 	}
249 
250 	res = zfs_iter_snapshots(zhp, get_snap_max, data);
251 	zfs_close(zhp);
252 	return (res);
253 }
254 
255 /*
256  * Take a ZFS snapshot to be used for cloning the zone.
257  */
258 static int
259 take_snapshot(zfs_handle_t *zhp, char *snapshot_name, int snap_size,
260     char *presnapbuf, char *postsnapbuf)
261 {
262 	int			res;
263 	char			template[ZFS_MAX_DATASET_NAME_LEN];
264 	zfs_snapshot_data_t	cb;
265 
266 	/*
267 	 * First we need to figure out the next available name for the
268 	 * zone snapshot.  Look through the list of zones snapshots for
269 	 * this file system to determine the maximum snapshot name.
270 	 */
271 	if (snprintf(template, sizeof (template), "%s@SUNWzone",
272 	    zfs_get_name(zhp)) >=  sizeof (template))
273 		return (Z_ERR);
274 
275 	cb.match_name = template;
276 	cb.len = strlen(template);
277 	cb.max = 0;
278 
279 	if (zfs_iter_snapshots(zhp, get_snap_max, &cb) != 0)
280 		return (Z_ERR);
281 
282 	cb.max++;
283 
284 	if (snprintf(snapshot_name, snap_size, "%s@SUNWzone%d",
285 	    zfs_get_name(zhp), cb.max) >= snap_size)
286 		return (Z_ERR);
287 
288 	if (pre_snapshot(presnapbuf) != Z_OK)
289 		return (Z_ERR);
290 	res = zfs_snapshot(g_zfs, snapshot_name, B_FALSE, NULL);
291 	if (post_snapshot(postsnapbuf) != Z_OK)
292 		return (Z_ERR);
293 
294 	if (res != 0)
295 		return (Z_ERR);
296 	return (Z_OK);
297 }
298 
299 /*
300  * We are using an explicit snapshot from some earlier point in time so
301  * we need to validate it.  Run the brand specific hook.
302  */
303 static int
304 validate_snapshot(char *snapshot_name, char *snap_path, char *validsnapbuf)
305 {
306 	int status;
307 	char cmdbuf[MAXPATHLEN];
308 
309 	/* No brand-specific handler */
310 	if (validsnapbuf[0] == '\0')
311 		return (Z_OK);
312 
313 	/* pass args - snapshot_name & snap_path */
314 	if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %s %s", validsnapbuf,
315 	    snapshot_name, snap_path) >= sizeof (cmdbuf)) {
316 		zerror("Command line too long");
317 		return (Z_ERR);
318 	}
319 
320 	/* Run the hook */
321 	status = do_subproc(cmdbuf);
322 	if ((status = subproc_status(gettext("brand-specific validatesnapshot"),
323 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
324 		return (Z_ERR);
325 
326 	return (Z_OK);
327 }
328 
329 /*
330  * Remove the sw inventory file from inside this zonepath that we picked up out
331  * of the snapshot.
332  */
333 static int
334 clean_out_clone()
335 {
336 	int err;
337 	zone_dochandle_t handle;
338 
339 	if ((handle = zonecfg_init_handle()) == NULL) {
340 		zperror(cmd_to_str(CMD_CLONE), B_TRUE);
341 		return (Z_ERR);
342 	}
343 
344 	if ((err = zonecfg_get_handle(target_zone, handle)) != Z_OK) {
345 		errno = err;
346 		zperror(cmd_to_str(CMD_CLONE), B_TRUE);
347 		zonecfg_fini_handle(handle);
348 		return (Z_ERR);
349 	}
350 
351 	zonecfg_rm_detached(handle, B_FALSE);
352 	zonecfg_fini_handle(handle);
353 
354 	return (Z_OK);
355 }
356 
357 /*
358  * Make a ZFS clone on zonepath from snapshot_name.
359  */
360 static int
361 clone_snap(char *snapshot_name, char *zonepath)
362 {
363 	int		res = Z_OK;
364 	int		err;
365 	zfs_handle_t	*zhp;
366 	zfs_handle_t	*clone;
367 	nvlist_t	*props = NULL;
368 
369 	if ((zhp = zfs_open(g_zfs, snapshot_name, ZFS_TYPE_SNAPSHOT)) == NULL)
370 		return (Z_NO_ENTRY);
371 
372 	(void) printf(gettext("Cloning snapshot %s\n"), snapshot_name);
373 
374 	/*
375 	 * We turn off zfs SHARENFS and SHARESMB properties on the
376 	 * zoneroot dataset in order to prevent the GZ from sharing
377 	 * NGZ data by accident.
378 	 */
379 	if ((nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) ||
380 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARENFS),
381 	    "off") != 0) ||
382 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARESMB),
383 	    "off") != 0)) {
384 		nvlist_free(props);
385 		(void) fprintf(stderr, gettext("could not create ZFS clone "
386 		    "%s: out of memory\n"), zonepath);
387 		return (Z_ERR);
388 	}
389 
390 	err = zfs_clone(zhp, zonepath, props);
391 	zfs_close(zhp);
392 
393 	nvlist_free(props);
394 
395 	if (err != 0)
396 		return (Z_ERR);
397 
398 	/* create the mountpoint if necessary */
399 	if ((clone = zfs_open(g_zfs, zonepath, ZFS_TYPE_DATASET)) == NULL)
400 		return (Z_ERR);
401 
402 	/*
403 	 * The clone has been created so we need to print a diagnostic
404 	 * message if one of the following steps fails for some reason.
405 	 */
406 	if (zfs_mount(clone, NULL, 0) != 0) {
407 		(void) fprintf(stderr, gettext("could not mount ZFS clone "
408 		    "%s\n"), zfs_get_name(clone));
409 		res = Z_ERR;
410 
411 	} else if (clean_out_clone() != Z_OK) {
412 		(void) fprintf(stderr, gettext("could not remove the "
413 		    "software inventory from ZFS clone %s\n"),
414 		    zfs_get_name(clone));
415 		res = Z_ERR;
416 	}
417 
418 	zfs_close(clone);
419 	return (res);
420 }
421 
422 /*
423  * This function takes a zonepath and attempts to determine what the ZFS
424  * file system name (not mountpoint) should be for that path.  We do not
425  * assume that zonepath is an existing directory or ZFS fs since we use
426  * this function as part of the process of creating a new ZFS fs or clone.
427  *
428  * The way this works is that we look at the parent directory of the zonepath
429  * to see if it is a ZFS fs.  If it is, we get the name of that ZFS fs and
430  * append the last component of the zonepath to generate the ZFS name for the
431  * zonepath.  This matches the algorithm that ZFS uses for automatically
432  * mounting a new fs after it is created.
433  *
434  * Although a ZFS fs can be mounted anywhere, we don't worry about handling
435  * all of the complexity that a user could possibly configure with arbitrary
436  * mounts since there is no way to generate a ZFS name from a random path in
437  * the file system.  We only try to handle the automatic mounts that ZFS does
438  * for each file system.  ZFS restricts this so that a new fs must be created
439  * in an existing parent ZFS fs.  It then automatically mounts the new fs
440  * directly under the mountpoint for the parent fs using the last component
441  * of the name as the mountpoint directory.
442  *
443  * For example:
444  *    Name			Mountpoint
445  *    space/eng/dev/test/zone1	/project1/eng/dev/test/zone1
446  *
447  * Return Z_OK if the path mapped to a ZFS file system name, otherwise return
448  * Z_ERR.
449  */
450 static int
451 path2name(char *zonepath, char *zfs_name, int len)
452 {
453 	int		res;
454 	char		*bnm, *dnm, *dname, *bname;
455 	zfs_handle_t	*zhp;
456 	struct stat	stbuf;
457 
458 	/*
459 	 * We need two tmp strings to handle paths directly in / (e.g. /foo)
460 	 * since dirname will overwrite the first char after "/" in this case.
461 	 */
462 	if ((bnm = strdup(zonepath)) == NULL)
463 		return (Z_ERR);
464 
465 	if ((dnm = strdup(zonepath)) == NULL) {
466 		free(bnm);
467 		return (Z_ERR);
468 	}
469 
470 	bname = basename(bnm);
471 	dname = dirname(dnm);
472 
473 	/*
474 	 * This is a quick test to save iterating over all of the zfs datasets
475 	 * on the system (which can be a lot).  If the parent dir is not in a
476 	 * ZFS fs, then we're done.
477 	 */
478 	if (stat(dname, &stbuf) != 0 || !S_ISDIR(stbuf.st_mode) ||
479 	    strcmp(stbuf.st_fstype, MNTTYPE_ZFS) != 0) {
480 		free(bnm);
481 		free(dnm);
482 		return (Z_ERR);
483 	}
484 
485 	/* See if the parent directory is its own ZFS dataset. */
486 	if ((zhp = mount2zhandle(dname)) == NULL) {
487 		/*
488 		 * The parent is not a ZFS dataset so we can't automatically
489 		 * create a dataset on the given path.
490 		 */
491 		free(bnm);
492 		free(dnm);
493 		return (Z_ERR);
494 	}
495 
496 	res = snprintf(zfs_name, len, "%s/%s", zfs_get_name(zhp), bname);
497 
498 	free(bnm);
499 	free(dnm);
500 	zfs_close(zhp);
501 	if (res >= len)
502 		return (Z_ERR);
503 
504 	return (Z_OK);
505 }
506 
507 /*
508  * A ZFS file system iterator call-back function used to determine if the
509  * file system has dependents (snapshots & clones).
510  */
511 /* ARGSUSED */
512 static int
513 has_dependent(zfs_handle_t *zhp, void *data)
514 {
515 	zfs_close(zhp);
516 	return (1);
517 }
518 
519 /*
520  * Given a snapshot name, get the file system path where the snapshot lives.
521  * A snapshot name is of the form fs_name@snap_name.  For example, snapshot
522  * pl/zones/z1@SUNWzone1 would have a path of
523  * /pl/zones/z1/.zfs/snapshot/SUNWzone1.
524  */
525 static int
526 snap2path(char *snap_name, char *path, int len)
527 {
528 	char		*p;
529 	zfs_handle_t	*zhp;
530 	char		mp[ZFS_MAXPROPLEN];
531 
532 	if ((p = strrchr(snap_name, '@')) == NULL)
533 		return (Z_ERR);
534 
535 	/* Get the file system name from the snap_name. */
536 	*p = '\0';
537 	zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_DATASET);
538 	*p = '@';
539 	if (zhp == NULL)
540 		return (Z_ERR);
541 
542 	/* Get the file system mount point. */
543 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
544 	    0, B_FALSE) != 0) {
545 		zfs_close(zhp);
546 		return (Z_ERR);
547 	}
548 	zfs_close(zhp);
549 
550 	p++;
551 	if (snprintf(path, len, "%s/.zfs/snapshot/%s", mp, p) >= len)
552 		return (Z_ERR);
553 
554 	return (Z_OK);
555 }
556 
557 /*
558  * This callback function is used to iterate through a snapshot's dependencies
559  * to find a filesystem that is a direct clone of the snapshot being iterated.
560  */
561 static int
562 get_direct_clone(zfs_handle_t *zhp, void *data)
563 {
564 	clone_data_t	*cd = data;
565 	char		origin[ZFS_MAX_DATASET_NAME_LEN];
566 	char		ds_path[ZFS_MAX_DATASET_NAME_LEN];
567 
568 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
569 		zfs_close(zhp);
570 		return (0);
571 	}
572 
573 	(void) strlcpy(ds_path, zfs_get_name(zhp), sizeof (ds_path));
574 
575 	/* Make sure this is a direct clone of the snapshot we're iterating. */
576 	if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
577 	    NULL, 0, B_FALSE) != 0 || strcmp(origin, cd->snapshot) != 0) {
578 		zfs_close(zhp);
579 		return (0);
580 	}
581 
582 	if (cd->clone_zhp != NULL)
583 		zfs_close(cd->clone_zhp);
584 
585 	cd->clone_zhp = zhp;
586 	return (1);
587 }
588 
589 /*
590  * A ZFS file system iterator call-back function used to determine the clone
591  * to promote.  This function finds the youngest (i.e. last one taken) snapshot
592  * that has a clone.  If found, it returns a reference to that clone in the
593  * callback data.
594  */
595 static int
596 find_clone(zfs_handle_t *zhp, void *data)
597 {
598 	clone_data_t	*cd = data;
599 	time_t		snap_creation;
600 	int		zret = 0;
601 
602 	/* If snapshot has no clones, skip it */
603 	if (zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES) == 0) {
604 		zfs_close(zhp);
605 		return (0);
606 	}
607 
608 	cd->snapshot = zfs_get_name(zhp);
609 
610 	/* Get the creation time of this snapshot */
611 	snap_creation = (time_t)zfs_prop_get_int(zhp, ZFS_PROP_CREATION);
612 
613 	/*
614 	 * If this snapshot's creation time is greater than (i.e. younger than)
615 	 * the current youngest snapshot found, iterate this snapshot to
616 	 * get the right clone.
617 	 */
618 	if (snap_creation >= cd->origin_creation) {
619 		/*
620 		 * Iterate the dependents of this snapshot to find a clone
621 		 * that's a direct dependent.
622 		 */
623 		if ((zret = zfs_iter_dependents(zhp, B_FALSE, get_direct_clone,
624 		    cd)) == -1) {
625 			zfs_close(zhp);
626 			return (1);
627 		} else if (zret == 1) {
628 			/*
629 			 * Found a clone, update the origin_creation time
630 			 * in the callback data.
631 			 */
632 			cd->origin_creation = snap_creation;
633 		}
634 	}
635 
636 	zfs_close(zhp);
637 	return (0);
638 }
639 
640 /*
641  * A ZFS file system iterator call-back function used to remove standalone
642  * snapshots.
643  */
644 /* ARGSUSED */
645 static int
646 rm_snap(zfs_handle_t *zhp, void *data)
647 {
648 	/* If snapshot has clones, something is wrong */
649 	if (zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES) != 0) {
650 		zfs_close(zhp);
651 		return (1);
652 	}
653 
654 	if (zfs_unmount(zhp, NULL, 0) == 0) {
655 		(void) zfs_destroy(zhp, B_FALSE);
656 	}
657 
658 	zfs_close(zhp);
659 	return (0);
660 }
661 
662 /*
663  * A ZFS snapshot iterator call-back function which renames snapshots.
664  */
665 static int
666 rename_snap(zfs_handle_t *zhp, void *data)
667 {
668 	int			res;
669 	zfs_snapshot_data_t	*cbp;
670 	char			template[ZFS_MAX_DATASET_NAME_LEN];
671 
672 	cbp = (zfs_snapshot_data_t *)data;
673 
674 	/*
675 	 * When renaming snapshots with the iterator, the iterator can see
676 	 * the same snapshot after we've renamed up in the namespace.  To
677 	 * prevent this we check the count for the number of snapshots we have
678 	 * to rename and stop at that point.
679 	 */
680 	if (cbp->cntr >= cbp->num) {
681 		zfs_close(zhp);
682 		return (0);
683 	}
684 
685 	if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) {
686 		zfs_close(zhp);
687 		return (0);
688 	}
689 
690 	/* Only rename the snapshots we automatically generate when we clone. */
691 	if (strncmp(zfs_get_name(zhp), cbp->match_name, cbp->len) != 0) {
692 		zfs_close(zhp);
693 		return (0);
694 	}
695 
696 	(void) snprintf(template, sizeof (template), "%s%d", cbp->match_name,
697 	    cbp->max++);
698 
699 	res = (zfs_rename(zhp, template, B_FALSE, B_FALSE) != 0);
700 	if (res != 0)
701 		(void) fprintf(stderr, gettext("failed to rename snapshot %s "
702 		    "to %s: %s\n"), zfs_get_name(zhp), template,
703 		    libzfs_error_description(g_zfs));
704 
705 	cbp->cntr++;
706 
707 	zfs_close(zhp);
708 	return (res);
709 }
710 
711 /*
712  * Rename the source dataset's snapshots that are automatically generated when
713  * we clone a zone so that there won't be a name collision when we promote the
714  * cloned dataset.  Once the snapshots have been renamed, then promote the
715  * clone.
716  *
717  * The snapshot rename process gets the highest number on the snapshot names
718  * (the format is zonename@SUNWzoneXX where XX are digits) on both the source
719  * and clone datasets, then renames the source dataset snapshots starting at
720  * the next number.
721  */
722 static int
723 promote_clone(zfs_handle_t *src_zhp, zfs_handle_t *cln_zhp)
724 {
725 	zfs_snapshot_data_t	sd;
726 	char			nm[ZFS_MAX_DATASET_NAME_LEN];
727 	char			template[ZFS_MAX_DATASET_NAME_LEN];
728 
729 	(void) strlcpy(nm, zfs_get_name(cln_zhp), sizeof (nm));
730 	/*
731 	 * Start by getting the clone's snapshot max which we use
732 	 * during the rename of the original dataset's snapshots.
733 	 */
734 	(void) snprintf(template, sizeof (template), "%s@SUNWzone", nm);
735 	sd.match_name = template;
736 	sd.len = strlen(template);
737 	sd.max = 0;
738 
739 	if (zfs_iter_snapshots(cln_zhp, get_snap_max, &sd) != 0)
740 		return (Z_ERR);
741 
742 	/*
743 	 * Now make sure the source's snapshot max is at least as high as
744 	 * the clone's snapshot max.
745 	 */
746 	(void) snprintf(template, sizeof (template), "%s@SUNWzone",
747 	    zfs_get_name(src_zhp));
748 	sd.match_name = template;
749 	sd.len = strlen(template);
750 	sd.num = 0;
751 
752 	if (zfs_iter_snapshots(src_zhp, get_snap_max, &sd) != 0)
753 		return (Z_ERR);
754 
755 	/*
756 	 * Now rename the source dataset's snapshots so there's no
757 	 * conflict when we promote the clone.
758 	 */
759 	sd.max++;
760 	sd.cntr = 0;
761 	if (zfs_iter_snapshots(src_zhp, rename_snap, &sd) != 0)
762 		return (Z_ERR);
763 
764 	/* close and reopen the clone dataset to get the latest info */
765 	zfs_close(cln_zhp);
766 	if ((cln_zhp = zfs_open(g_zfs, nm, ZFS_TYPE_FILESYSTEM)) == NULL)
767 		return (Z_ERR);
768 
769 	if (zfs_promote(cln_zhp) != 0) {
770 		(void) fprintf(stderr, gettext("failed to promote %s: %s\n"),
771 		    nm, libzfs_error_description(g_zfs));
772 		return (Z_ERR);
773 	}
774 
775 	zfs_close(cln_zhp);
776 	return (Z_OK);
777 }
778 
779 /*
780  * Promote the youngest clone.  That clone will then become the origin of all
781  * of the other clones that were hanging off of the source dataset.
782  */
783 int
784 promote_all_clones(zfs_handle_t *zhp)
785 {
786 	clone_data_t	cd;
787 	char		nm[ZFS_MAX_DATASET_NAME_LEN];
788 
789 	cd.clone_zhp = NULL;
790 	cd.origin_creation = 0;
791 	cd.snapshot = NULL;
792 
793 	if (zfs_iter_snapshots(zhp, find_clone, &cd) != 0) {
794 		zfs_close(zhp);
795 		return (Z_ERR);
796 	}
797 
798 	/* Nothing to promote. */
799 	if (cd.clone_zhp == NULL)
800 		return (Z_OK);
801 
802 	/* Found the youngest clone to promote.  Promote it. */
803 	if (promote_clone(zhp, cd.clone_zhp) != 0) {
804 		zfs_close(cd.clone_zhp);
805 		zfs_close(zhp);
806 		return (Z_ERR);
807 	}
808 
809 	/* close and reopen the main dataset to get the latest info */
810 	(void) strlcpy(nm, zfs_get_name(zhp), sizeof (nm));
811 	zfs_close(zhp);
812 	if ((zhp = zfs_open(g_zfs, nm, ZFS_TYPE_FILESYSTEM)) == NULL)
813 		return (Z_ERR);
814 
815 	return (Z_OK);
816 }
817 
818 /*
819  * Clone a pre-existing ZFS snapshot, either by making a direct ZFS clone, if
820  * possible, or by copying the data from the snapshot to the zonepath.
821  */
822 int
823 clone_snapshot_zfs(char *snap_name, char *zonepath, char *validatesnap)
824 {
825 	int	err = Z_OK;
826 	char	clone_name[MAXPATHLEN];
827 	char	snap_path[MAXPATHLEN];
828 
829 	if (snap2path(snap_name, snap_path, sizeof (snap_path)) != Z_OK) {
830 		(void) fprintf(stderr, gettext("unable to find path for %s.\n"),
831 		    snap_name);
832 		return (Z_ERR);
833 	}
834 
835 	if (validate_snapshot(snap_name, snap_path, validatesnap) != Z_OK)
836 		return (Z_NO_ENTRY);
837 
838 	/*
839 	 * The zonepath cannot be ZFS cloned, try to copy the data from
840 	 * within the snapshot to the zonepath.
841 	 */
842 	if (path2name(zonepath, clone_name, sizeof (clone_name)) != Z_OK) {
843 		if ((err = clone_copy(snap_path, zonepath)) == Z_OK)
844 			if (clean_out_clone() != Z_OK)
845 				(void) fprintf(stderr,
846 				    gettext("could not remove the "
847 				    "software inventory from %s\n"), zonepath);
848 
849 		return (err);
850 	}
851 
852 	if ((err = clone_snap(snap_name, clone_name)) != Z_OK) {
853 		if (err != Z_NO_ENTRY) {
854 			/*
855 			 * Cloning the snapshot failed.  Fall back to trying
856 			 * to install the zone by copying from the snapshot.
857 			 */
858 			if ((err = clone_copy(snap_path, zonepath)) == Z_OK)
859 				if (clean_out_clone() != Z_OK)
860 					(void) fprintf(stderr,
861 					    gettext("could not remove the "
862 					    "software inventory from %s\n"),
863 					    zonepath);
864 		} else {
865 			/*
866 			 * The snapshot is unusable for some reason so restore
867 			 * the zone state to configured since we were unable to
868 			 * actually do anything about getting the zone
869 			 * installed.
870 			 */
871 			int tmp;
872 
873 			if ((tmp = zone_set_state(target_zone,
874 			    ZONE_STATE_CONFIGURED)) != Z_OK) {
875 				errno = tmp;
876 				zperror2(target_zone,
877 				    gettext("could not set state"));
878 			}
879 		}
880 	}
881 
882 	return (err);
883 }
884 
885 /*
886  * Attempt to clone a source_zone to a target zonepath by using a ZFS clone.
887  */
888 int
889 clone_zfs(char *source_zonepath, char *zonepath, char *presnapbuf,
890     char *postsnapbuf)
891 {
892 	zfs_handle_t	*zhp;
893 	char		clone_name[MAXPATHLEN];
894 	char		snap_name[MAXPATHLEN];
895 
896 	/*
897 	 * Try to get a zfs handle for the source_zonepath.  If this fails
898 	 * the source_zonepath is not ZFS so return an error.
899 	 */
900 	if ((zhp = mount2zhandle(source_zonepath)) == NULL)
901 		return (Z_ERR);
902 
903 	/*
904 	 * Check if there is a file system already mounted on zonepath.  If so,
905 	 * we can't clone to the path so we should fall back to copying.
906 	 */
907 	if (is_mountpnt(zonepath)) {
908 		zfs_close(zhp);
909 		(void) fprintf(stderr,
910 		    gettext("A file system is already mounted on %s,\n"
911 		    "preventing use of a ZFS clone.\n"), zonepath);
912 		return (Z_ERR);
913 	}
914 
915 	/*
916 	 * Instead of using path2name to get the clone name from the zonepath,
917 	 * we could generate a name from the source zone ZFS name.  However,
918 	 * this would mean we would create the clone under the ZFS fs of the
919 	 * source instead of what the zonepath says.  For example,
920 	 *
921 	 * source_zonepath		zonepath
922 	 * /pl/zones/dev/z1		/pl/zones/deploy/z2
923 	 *
924 	 * We don't want the clone to be under "dev", we want it under
925 	 * "deploy", so that we can leverage the normal attribute inheritance
926 	 * that ZFS provides in the fs hierarchy.
927 	 */
928 	if (path2name(zonepath, clone_name, sizeof (clone_name)) != Z_OK) {
929 		zfs_close(zhp);
930 		return (Z_ERR);
931 	}
932 
933 	if (take_snapshot(zhp, snap_name, sizeof (snap_name), presnapbuf,
934 	    postsnapbuf) != Z_OK) {
935 		zfs_close(zhp);
936 		return (Z_ERR);
937 	}
938 	zfs_close(zhp);
939 
940 	if (clone_snap(snap_name, clone_name) != Z_OK) {
941 		/* Clean up the snapshot we just took. */
942 		if ((zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_SNAPSHOT))
943 		    != NULL) {
944 			if (zfs_unmount(zhp, NULL, 0) == 0)
945 				(void) zfs_destroy(zhp, B_FALSE);
946 			zfs_close(zhp);
947 		}
948 
949 		return (Z_ERR);
950 	}
951 
952 	(void) printf(gettext("Instead of copying, a ZFS clone has been "
953 	    "created for this zone.\n"));
954 
955 	return (Z_OK);
956 }
957 
958 /*
959  * Attempt to create a ZFS file system for the specified zonepath.
960  * We either will successfully create a ZFS file system and get it mounted
961  * on the zonepath or we don't.  The caller doesn't care since a regular
962  * directory is used for the zonepath if no ZFS file system is mounted there.
963  */
964 void
965 create_zfs_zonepath(char *zonepath)
966 {
967 	zfs_handle_t	*zhp;
968 	char		zfs_name[MAXPATHLEN];
969 	nvlist_t	*props = NULL;
970 
971 	if (path2name(zonepath, zfs_name, sizeof (zfs_name)) != Z_OK)
972 		return;
973 
974 	/* Check if the dataset already exists. */
975 	if ((zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_DATASET)) != NULL) {
976 		zfs_close(zhp);
977 		return;
978 	}
979 
980 	/*
981 	 * We turn off zfs SHARENFS and SHARESMB properties on the
982 	 * zoneroot dataset in order to prevent the GZ from sharing
983 	 * NGZ data by accident.
984 	 */
985 	if ((nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) ||
986 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARENFS),
987 	    "off") != 0) ||
988 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARESMB),
989 	    "off") != 0)) {
990 		nvlist_free(props);
991 		(void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
992 		    "out of memory\n"), zfs_name);
993 	}
994 
995 	if (zfs_create(g_zfs, zfs_name, ZFS_TYPE_FILESYSTEM, props) != 0 ||
996 	    (zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_DATASET)) == NULL) {
997 		(void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
998 		    "%s\n"), zfs_name, libzfs_error_description(g_zfs));
999 		nvlist_free(props);
1000 		return;
1001 	}
1002 
1003 	nvlist_free(props);
1004 
1005 	if (zfs_mount(zhp, NULL, 0) != 0) {
1006 		(void) fprintf(stderr, gettext("cannot mount ZFS dataset %s: "
1007 		    "%s\n"), zfs_name, libzfs_error_description(g_zfs));
1008 		(void) zfs_destroy(zhp, B_FALSE);
1009 	} else {
1010 		if (chmod(zonepath, S_IRWXU) != 0) {
1011 			(void) fprintf(stderr, gettext("file system %s "
1012 			    "successfully created, but chmod %o failed: %s\n"),
1013 			    zfs_name, S_IRWXU, strerror(errno));
1014 			(void) destroy_zfs(zonepath);
1015 		} else {
1016 			(void) printf(gettext("A ZFS file system has been "
1017 			    "created for this zone.\n"));
1018 		}
1019 	}
1020 
1021 	zfs_close(zhp);
1022 }
1023 
1024 /*
1025  * If the zonepath is a ZFS file system, attempt to destroy it.  We return Z_OK
1026  * if we were able to zfs_destroy the zonepath, otherwise we return Z_ERR
1027  * which means the caller should clean up the zonepath in the traditional
1028  * way.
1029  */
1030 int
1031 destroy_zfs(char *zonepath)
1032 {
1033 	zfs_handle_t	*zhp;
1034 	boolean_t	is_clone = B_FALSE;
1035 	char		origin[ZFS_MAXPROPLEN];
1036 
1037 	if ((zhp = mount2zhandle(zonepath)) == NULL)
1038 		return (Z_ERR);
1039 
1040 	if (promote_all_clones(zhp) != 0)
1041 		return (Z_ERR);
1042 
1043 	/* Now cleanup any snapshots remaining. */
1044 	if (zfs_iter_snapshots(zhp, rm_snap, NULL) != 0) {
1045 		zfs_close(zhp);
1046 		return (Z_ERR);
1047 	}
1048 
1049 	/*
1050 	 * We can't destroy the file system if it has still has dependents.
1051 	 * There shouldn't be any at this point, but we'll double check.
1052 	 */
1053 	if (zfs_iter_dependents(zhp, B_TRUE, has_dependent, NULL) != 0) {
1054 		(void) fprintf(stderr, gettext("zfs destroy %s failed: the "
1055 		    "dataset still has dependents\n"), zfs_get_name(zhp));
1056 		zfs_close(zhp);
1057 		return (Z_ERR);
1058 	}
1059 
1060 	/*
1061 	 * This might be a clone.  Try to get the snapshot so we can attempt
1062 	 * to destroy that as well.
1063 	 */
1064 	if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
1065 	    NULL, 0, B_FALSE) == 0)
1066 		is_clone = B_TRUE;
1067 
1068 	if (zfs_unmount(zhp, NULL, 0) != 0) {
1069 		(void) fprintf(stderr, gettext("zfs unmount %s failed: %s\n"),
1070 		    zfs_get_name(zhp), libzfs_error_description(g_zfs));
1071 		zfs_close(zhp);
1072 		return (Z_ERR);
1073 	}
1074 
1075 	if (zfs_destroy(zhp, B_FALSE) != 0) {
1076 		/*
1077 		 * If the destroy fails for some reason, try to remount
1078 		 * the file system so that we can use "rm -rf" to clean up
1079 		 * instead.
1080 		 */
1081 		(void) fprintf(stderr, gettext("zfs destroy %s failed: %s\n"),
1082 		    zfs_get_name(zhp), libzfs_error_description(g_zfs));
1083 		(void) zfs_mount(zhp, NULL, 0);
1084 		zfs_close(zhp);
1085 		return (Z_ERR);
1086 	}
1087 
1088 	/*
1089 	 * If the zone has ever been moved then the mountpoint dir will not be
1090 	 * cleaned up by the zfs_destroy().  To handle this case try to clean
1091 	 * it up now but don't worry if it fails, that will be normal.
1092 	 */
1093 	(void) rmdir(zonepath);
1094 
1095 	(void) printf(gettext("The ZFS file system for this zone has been "
1096 	    "destroyed.\n"));
1097 
1098 	if (is_clone) {
1099 		zfs_handle_t	*ohp;
1100 
1101 		/*
1102 		 * Try to clean up the snapshot that the clone was taken from.
1103 		 */
1104 		if ((ohp = zfs_open(g_zfs, origin,
1105 		    ZFS_TYPE_SNAPSHOT)) != NULL) {
1106 			if (zfs_iter_dependents(ohp, B_TRUE, has_dependent,
1107 			    NULL) == 0 && zfs_unmount(ohp, NULL, 0) == 0)
1108 				(void) zfs_destroy(ohp, B_FALSE);
1109 			zfs_close(ohp);
1110 		}
1111 	}
1112 
1113 	zfs_close(zhp);
1114 	return (Z_OK);
1115 }
1116 
1117 /*
1118  * Return true if the path is its own zfs file system.  We determine this
1119  * by stat-ing the path to see if it is zfs and stat-ing the parent to see
1120  * if it is a different fs.
1121  */
1122 boolean_t
1123 is_zonepath_zfs(char *zonepath)
1124 {
1125 	int res;
1126 	char *path;
1127 	char *parent;
1128 	struct statvfs64 buf1, buf2;
1129 
1130 	if (statvfs64(zonepath, &buf1) != 0)
1131 		return (B_FALSE);
1132 
1133 	if (strcmp(buf1.f_basetype, "zfs") != 0)
1134 		return (B_FALSE);
1135 
1136 	if ((path = strdup(zonepath)) == NULL)
1137 		return (B_FALSE);
1138 
1139 	parent = dirname(path);
1140 	res = statvfs64(parent, &buf2);
1141 	free(path);
1142 
1143 	if (res != 0)
1144 		return (B_FALSE);
1145 
1146 	if (buf1.f_fsid == buf2.f_fsid)
1147 		return (B_FALSE);
1148 
1149 	return (B_TRUE);
1150 }
1151 
1152 /*
1153  * Implement the fast move of a ZFS file system by simply updating the
1154  * mountpoint.  Since it is file system already, we don't have the
1155  * issue of cross-file system copying.
1156  */
1157 int
1158 move_zfs(char *zonepath, char *new_zonepath)
1159 {
1160 	int		ret = Z_ERR;
1161 	zfs_handle_t	*zhp;
1162 
1163 	if ((zhp = mount2zhandle(zonepath)) == NULL)
1164 		return (Z_ERR);
1165 
1166 	if (zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1167 	    new_zonepath) == 0) {
1168 		/*
1169 		 * Clean up the old mount point.  We ignore any failure since
1170 		 * the zone is already successfully mounted on the new path.
1171 		 */
1172 		(void) rmdir(zonepath);
1173 		ret = Z_OK;
1174 	}
1175 
1176 	zfs_close(zhp);
1177 
1178 	return (ret);
1179 }
1180 
1181 /*
1182  * Validate that the given dataset exists on the system, and that neither it nor
1183  * its children are zvols.
1184  *
1185  * Note that we don't do anything with the 'zoned' property here.  All
1186  * management is done in zoneadmd when the zone is actually rebooted.  This
1187  * allows us to automatically set the zoned property even when a zone is
1188  * rebooted by the administrator.
1189  */
1190 int
1191 verify_datasets(zone_dochandle_t handle)
1192 {
1193 	int return_code = Z_OK;
1194 	struct zone_dstab dstab;
1195 	zfs_handle_t *zhp;
1196 	char propbuf[ZFS_MAXPROPLEN];
1197 	char source[ZFS_MAX_DATASET_NAME_LEN];
1198 	zprop_source_t srctype;
1199 
1200 	if (zonecfg_setdsent(handle) != Z_OK) {
1201 		/*
1202 		 * TRANSLATION_NOTE
1203 		 * zfs and dataset are literals that should not be translated.
1204 		 */
1205 		(void) fprintf(stderr, gettext("could not verify zfs datasets: "
1206 		    "unable to enumerate datasets\n"));
1207 		return (Z_ERR);
1208 	}
1209 
1210 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
1211 
1212 		if ((zhp = zfs_open(g_zfs, dstab.zone_dataset_name,
1213 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
1214 			(void) fprintf(stderr, gettext("could not verify zfs "
1215 			    "dataset %s: %s\n"), dstab.zone_dataset_name,
1216 			    libzfs_error_description(g_zfs));
1217 			return_code = Z_ERR;
1218 			continue;
1219 		}
1220 
1221 		if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf,
1222 		    sizeof (propbuf), &srctype, source,
1223 		    sizeof (source), 0) == 0 &&
1224 		    (srctype == ZPROP_SRC_INHERITED)) {
1225 			(void) fprintf(stderr, gettext("could not verify zfs "
1226 			    "dataset %s: mountpoint cannot be inherited\n"),
1227 			    dstab.zone_dataset_name);
1228 			return_code = Z_ERR;
1229 			zfs_close(zhp);
1230 			continue;
1231 		}
1232 
1233 		zfs_close(zhp);
1234 	}
1235 	(void) zonecfg_enddsent(handle);
1236 
1237 	return (return_code);
1238 }
1239 
1240 /*
1241  * Verify that the ZFS dataset exists, and its mountpoint
1242  * property is set to "legacy".
1243  */
1244 int
1245 verify_fs_zfs(struct zone_fstab *fstab)
1246 {
1247 	zfs_handle_t *zhp;
1248 	char propbuf[ZFS_MAXPROPLEN];
1249 
1250 	if ((zhp = zfs_open(g_zfs, fstab->zone_fs_special,
1251 	    ZFS_TYPE_DATASET)) == NULL) {
1252 		(void) fprintf(stderr, gettext("could not verify fs %s: "
1253 		    "could not access zfs dataset '%s'\n"),
1254 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1255 		return (Z_ERR);
1256 	}
1257 
1258 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
1259 		(void) fprintf(stderr, gettext("cannot verify fs %s: "
1260 		    "'%s' is not a file system\n"),
1261 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1262 		zfs_close(zhp);
1263 		return (Z_ERR);
1264 	}
1265 
1266 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf, sizeof (propbuf),
1267 	    NULL, NULL, 0, 0) != 0 || strcmp(propbuf, "legacy") != 0) {
1268 		(void) fprintf(stderr, gettext("could not verify fs %s: "
1269 		    "zfs '%s' mountpoint is not \"legacy\"\n"),
1270 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1271 		zfs_close(zhp);
1272 		return (Z_ERR);
1273 	}
1274 
1275 	zfs_close(zhp);
1276 	return (Z_OK);
1277 }
1278 
1279 /*
1280  * Destroy the specified mnttab structure that was created by mnttab_dup().
1281  * NOTE: The structure's mnt_time field isn't freed.
1282  */
1283 static void
1284 mnttab_destroy(struct mnttab *tabp)
1285 {
1286 	assert(tabp != NULL);
1287 
1288 	free(tabp->mnt_mountp);
1289 	free(tabp->mnt_special);
1290 	free(tabp->mnt_fstype);
1291 	free(tabp->mnt_mntopts);
1292 	free(tabp);
1293 }
1294 
1295 /*
1296  * Duplicate the specified mnttab structure.  The mnt_mountp and mnt_time
1297  * fields aren't duplicated.  This function returns a pointer to the new mnttab
1298  * structure or NULL if an error occurred.  If an error occurs, then this
1299  * function sets errno to reflect the error.  mnttab structures created by
1300  * this function should be destroyed via mnttab_destroy().
1301  */
1302 static struct mnttab *
1303 mnttab_dup(const struct mnttab *srcp)
1304 {
1305 	struct mnttab *retval;
1306 
1307 	assert(srcp != NULL);
1308 
1309 	retval = (struct mnttab *)calloc(1, sizeof (*retval));
1310 	if (retval == NULL) {
1311 		errno = ENOMEM;
1312 		return (NULL);
1313 	}
1314 	if (srcp->mnt_special != NULL) {
1315 		retval->mnt_special = strdup(srcp->mnt_special);
1316 		if (retval->mnt_special == NULL)
1317 			goto err;
1318 	}
1319 	if (srcp->mnt_fstype != NULL) {
1320 		retval->mnt_fstype = strdup(srcp->mnt_fstype);
1321 		if (retval->mnt_fstype == NULL)
1322 			goto err;
1323 	}
1324 	retval->mnt_mntopts = (char *)malloc(MAX_MNTOPT_STR * sizeof (char));
1325 	if (retval->mnt_mntopts == NULL)
1326 		goto err;
1327 	if (srcp->mnt_mntopts != NULL) {
1328 		if (strlcpy(retval->mnt_mntopts, srcp->mnt_mntopts,
1329 		    MAX_MNTOPT_STR * sizeof (char)) >= MAX_MNTOPT_STR *
1330 		    sizeof (char)) {
1331 			mnttab_destroy(retval);
1332 			errno = EOVERFLOW; /* similar to mount(2) behavior */
1333 			return (NULL);
1334 		}
1335 	} else {
1336 		retval->mnt_mntopts[0] = '\0';
1337 	}
1338 	return (retval);
1339 
1340 err:
1341 	mnttab_destroy(retval);
1342 	errno = ENOMEM;
1343 	return (NULL);
1344 }
1345 
1346 /*
1347  * Determine whether the specified ZFS dataset's mountpoint property is set
1348  * to "legacy".  If the specified dataset does not have a legacy mountpoint,
1349  * then the string pointer to which the mountpoint argument points is assigned
1350  * a dynamically-allocated string containing the dataset's mountpoint
1351  * property.  If the dataset's mountpoint property is "legacy" or a libzfs
1352  * error occurs, then the string pointer to which the mountpoint argument
1353  * points isn't modified.
1354  *
1355  * This function returns B_TRUE if it doesn't encounter any fatal errors.
1356  * It returns B_FALSE if it encounters a fatal error and sets errno to the
1357  * appropriate error code.
1358  */
1359 static boolean_t
1360 get_zfs_non_legacy_mountpoint(const char *dataset_name, char **mountpoint)
1361 {
1362 	zfs_handle_t *zhp;
1363 	char propbuf[ZFS_MAXPROPLEN];
1364 
1365 	assert(dataset_name != NULL);
1366 	assert(mountpoint != NULL);
1367 
1368 	if ((zhp = zfs_open(g_zfs, dataset_name, ZFS_TYPE_DATASET)) == NULL) {
1369 		errno = EINVAL;
1370 		return (B_FALSE);
1371 	}
1372 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf, sizeof (propbuf),
1373 	    NULL, NULL, 0, 0) != 0) {
1374 		zfs_close(zhp);
1375 		errno = EINVAL;
1376 		return (B_FALSE);
1377 	}
1378 	zfs_close(zhp);
1379 	if (strcmp(propbuf, "legacy") != 0) {
1380 		if ((*mountpoint = strdup(propbuf)) == NULL) {
1381 			errno = ENOMEM;
1382 			return (B_FALSE);
1383 		}
1384 	}
1385 	return (B_TRUE);
1386 }
1387 
1388 
1389 /*
1390  * This zonecfg_find_mounts() callback records information about mounts of
1391  * interest in a zonepath.  It also tallies the number of zone
1392  * root overlay mounts and the number of unexpected mounts found.
1393  * This function outputs errors using zerror() if it finds unexpected
1394  * mounts.  cookiep should point to an initialized zone_mounts_t structure.
1395  *
1396  * This function returns zero on success and a nonzero value on failure.
1397  */
1398 static int
1399 zone_mounts_cb(const struct mnttab *mountp, void *cookiep)
1400 {
1401 	zone_mounts_t *mounts;
1402 	const char *zone_mount_dir;
1403 
1404 	assert(mountp != NULL);
1405 	assert(cookiep != NULL);
1406 
1407 	mounts = (zone_mounts_t *)cookiep;
1408 	zone_mount_dir = mountp->mnt_mountp + mounts->zonepath_len;
1409 	if (strcmp(zone_mount_dir, "/root") == 0) {
1410 		/*
1411 		 * Check for an overlay mount.  If we already detected a /root
1412 		 * mount, then the current mount must be an overlay mount.
1413 		 */
1414 		if (mounts->root_mnttab != NULL) {
1415 			mounts->num_root_overlay_mounts++;
1416 			return (0);
1417 		}
1418 
1419 		/*
1420 		 * Store the root mount's mnttab information in the
1421 		 * zone_mounts_t structure for future use.
1422 		 */
1423 		if ((mounts->root_mnttab = mnttab_dup(mountp)) == NULL) {
1424 			zperror(cmd_to_str(CMD_MOVE), B_FALSE);
1425 			return (-1);
1426 		}
1427 
1428 		/*
1429 		 * Determine if the filesystem is a ZFS filesystem with a
1430 		 * non-legacy mountpoint.  If it is, then set the root
1431 		 * filesystem's mnttab's mnt_mountp field to a non-NULL
1432 		 * value, which will serve as a flag to indicate this special
1433 		 * condition.
1434 		 */
1435 		if (strcmp(mountp->mnt_fstype, MNTTYPE_ZFS) == 0 &&
1436 		    get_zfs_non_legacy_mountpoint(mountp->mnt_special,
1437 		    &mounts->root_mnttab->mnt_mountp) != B_TRUE) {
1438 			zperror(cmd_to_str(CMD_MOVE), B_FALSE);
1439 			return (-1);
1440 		}
1441 	} else {
1442 		/*
1443 		 * An unexpected mount was found.  Notify the user.
1444 		 */
1445 		if (mounts->num_unexpected_mounts == 0)
1446 			zerror(gettext("These file systems are mounted on "
1447 			    "subdirectories of %s.\n"), mounts->zonepath);
1448 		mounts->num_unexpected_mounts++;
1449 		(void) zfm_print(mountp, NULL);
1450 	}
1451 	return (0);
1452 }
1453 
1454 /*
1455  * Initialize the specified zone_mounts_t structure for the given zonepath.
1456  * If this function succeeds, it returns zero and the specified zone_mounts_t
1457  * structure contains information about mounts in the specified zonepath.
1458  * The function returns a nonzero value if it fails.  The zone_mounts_t
1459  * structure doesn't need be destroyed via zone_mounts_destroy() if this
1460  * function fails.
1461  */
1462 int
1463 zone_mounts_init(zone_mounts_t *mounts, const char *zonepath)
1464 {
1465 	assert(mounts != NULL);
1466 	assert(zonepath != NULL);
1467 
1468 	bzero(mounts, sizeof (*mounts));
1469 	if ((mounts->zonepath = strdup(zonepath)) == NULL) {
1470 		zerror(gettext("the process ran out of memory while checking "
1471 		    "for mounts in zonepath %s."), zonepath);
1472 		return (-1);
1473 	}
1474 	mounts->zonepath_len = strlen(zonepath);
1475 	if (zonecfg_find_mounts((char *)zonepath, zone_mounts_cb, mounts) ==
1476 	    -1) {
1477 		zerror(gettext("an error occurred while checking for mounts "
1478 		    "in zonepath %s."), zonepath);
1479 		zone_mounts_destroy(mounts);
1480 		return (-1);
1481 	}
1482 	return (0);
1483 }
1484 
1485 /*
1486  * Destroy the memory used by the specified zone_mounts_t structure's fields.
1487  * This function doesn't free the memory occupied by the structure itself
1488  * (i.e., it doesn't free the parameter).
1489  */
1490 void
1491 zone_mounts_destroy(zone_mounts_t *mounts)
1492 {
1493 	assert(mounts != NULL);
1494 
1495 	free(mounts->zonepath);
1496 	if (mounts->root_mnttab != NULL)
1497 		mnttab_destroy(mounts->root_mnttab);
1498 }
1499 
1500 /*
1501  * Mount a moving zone's root filesystem (if it had a root filesystem mount
1502  * prior to the move) using the specified zonepath.  mounts should refer to
1503  * the zone_mounts_t structure describing the zone's mount information.
1504  *
1505  * This function returns zero if the mount succeeds and a nonzero value
1506  * if it doesn't.
1507  */
1508 int
1509 zone_mount_rootfs(zone_mounts_t *mounts, const char *zonepath)
1510 {
1511 	char zoneroot[MAXPATHLEN];
1512 	struct mnttab *mtab;
1513 	int flags;
1514 
1515 	assert(mounts != NULL);
1516 	assert(zonepath != NULL);
1517 
1518 	/*
1519 	 * If there isn't a root filesystem, then don't do anything.
1520 	 */
1521 	mtab = mounts->root_mnttab;
1522 	if (mtab == NULL)
1523 		return (0);
1524 
1525 	/*
1526 	 * Determine the root filesystem's new mountpoint.
1527 	 */
1528 	if (snprintf(zoneroot, sizeof (zoneroot), "%s/root", zonepath) >=
1529 	    sizeof (zoneroot)) {
1530 		zerror(gettext("Zonepath %s is too long.\n"), zonepath);
1531 		return (-1);
1532 	}
1533 
1534 	/*
1535 	 * If the root filesystem is a non-legacy ZFS filesystem (i.e., if it's
1536 	 * mnt_mountp field is non-NULL), then make the filesystem's new
1537 	 * mount point its mountpoint property and mount the filesystem.
1538 	 */
1539 	if (mtab->mnt_mountp != NULL) {
1540 		zfs_handle_t *zhp;
1541 
1542 		if ((zhp = zfs_open(g_zfs, mtab->mnt_special,
1543 		    ZFS_TYPE_DATASET)) == NULL) {
1544 			zerror(gettext("could not get ZFS handle for the zone's"
1545 			    " root filesystem"));
1546 			return (-1);
1547 		}
1548 		if (zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1549 		    zoneroot) != 0) {
1550 			zerror(gettext("could not modify zone's root "
1551 			    "filesystem's mountpoint property"));
1552 			zfs_close(zhp);
1553 			return (-1);
1554 		}
1555 		if (zfs_mount(zhp, mtab->mnt_mntopts, 0) != 0) {
1556 			zerror(gettext("unable to mount zone root %s: %s"),
1557 			    zoneroot, libzfs_error_description(g_zfs));
1558 			if (zfs_prop_set(zhp,
1559 			    zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1560 			    mtab->mnt_mountp) != 0)
1561 				zerror(gettext("unable to restore zone's root "
1562 				    "filesystem's mountpoint property"));
1563 			zfs_close(zhp);
1564 			return (-1);
1565 		}
1566 		zfs_close(zhp);
1567 		return (0);
1568 	}
1569 
1570 	/*
1571 	 * The root filesystem is either a legacy-mounted ZFS filesystem or
1572 	 * a non-ZFS filesystem.  Use mount(2) to mount the root filesystem.
1573 	 */
1574 	if (mtab->mnt_mntopts != NULL)
1575 		flags = MS_OPTIONSTR;
1576 	else
1577 		flags = 0;
1578 	if (mount(mtab->mnt_special, zoneroot, flags, mtab->mnt_fstype, NULL, 0,
1579 	    mtab->mnt_mntopts, MAX_MNTOPT_STR * sizeof (char)) != 0) {
1580 		flags = errno;
1581 		zerror(gettext("unable to mount zone root %s: %s"), zoneroot,
1582 		    strerror(flags));
1583 		return (-1);
1584 	}
1585 	return (0);
1586 }
1587 
1588 /*
1589  * Unmount a moving zone's root filesystem (if such a mount exists) using the
1590  * specified zonepath.  mounts should refer to the zone_mounts_t structure
1591  * describing the zone's mount information.  If force is B_TRUE, then if the
1592  * unmount fails, then the function will try to forcibly unmount the zone's root
1593  * filesystem.
1594  *
1595  * This function returns zero if the unmount (forced or otherwise) succeeds;
1596  * otherwise, it returns a nonzero value.
1597  */
1598 int
1599 zone_unmount_rootfs(zone_mounts_t *mounts, const char *zonepath,
1600     boolean_t force)
1601 {
1602 	char zoneroot[MAXPATHLEN];
1603 	struct mnttab *mtab;
1604 	int err;
1605 
1606 	assert(mounts != NULL);
1607 	assert(zonepath != NULL);
1608 
1609 	/*
1610 	 * If there isn't a root filesystem, then don't do anything.
1611 	 */
1612 	mtab = mounts->root_mnttab;
1613 	if (mtab == NULL)
1614 		return (0);
1615 
1616 	/*
1617 	 * Determine the root filesystem's mountpoint.
1618 	 */
1619 	if (snprintf(zoneroot, sizeof (zoneroot), "%s/root", zonepath) >=
1620 	    sizeof (zoneroot)) {
1621 		zerror(gettext("Zonepath %s is too long.\n"), zonepath);
1622 		return (-1);
1623 	}
1624 
1625 	/*
1626 	 * If the root filesystem is a non-legacy ZFS fileystem, then unmount
1627 	 * the filesystem via libzfs.
1628 	 */
1629 	if (mtab->mnt_mountp != NULL) {
1630 		zfs_handle_t *zhp;
1631 
1632 		if ((zhp = zfs_open(g_zfs, mtab->mnt_special,
1633 		    ZFS_TYPE_DATASET)) == NULL) {
1634 			zerror(gettext("could not get ZFS handle for the zone's"
1635 			    " root filesystem"));
1636 			return (-1);
1637 		}
1638 		if (zfs_unmount(zhp, zoneroot, 0) != 0) {
1639 			if (force && zfs_unmount(zhp, zoneroot, MS_FORCE) ==
1640 			    0) {
1641 				zfs_close(zhp);
1642 				return (0);
1643 			}
1644 			zerror(gettext("unable to unmount zone root %s: %s"),
1645 			    zoneroot, libzfs_error_description(g_zfs));
1646 			zfs_close(zhp);
1647 			return (-1);
1648 		}
1649 		zfs_close(zhp);
1650 		return (0);
1651 	}
1652 
1653 	/*
1654 	 * Use umount(2) to unmount the root filesystem.  If this fails, then
1655 	 * forcibly unmount it if the force flag is set.
1656 	 */
1657 	if (umount(zoneroot) != 0) {
1658 		if (force && umount2(zoneroot, MS_FORCE) == 0)
1659 			return (0);
1660 		err = errno;
1661 		zerror(gettext("unable to unmount zone root %s: %s"), zoneroot,
1662 		    strerror(err));
1663 		return (-1);
1664 	}
1665 	return (0);
1666 }
1667 
1668 int
1669 init_zfs(void)
1670 {
1671 	if ((g_zfs = libzfs_init()) == NULL) {
1672 		(void) fprintf(stderr, gettext("failed to initialize ZFS "
1673 		    "library\n"));
1674 		return (Z_ERR);
1675 	}
1676 
1677 	return (Z_OK);
1678 }
1679