xref: /illumos-gate/usr/src/cmd/zoneadm/zfs.c (revision 4c28a617e3922d92a58e813a5b955eb526b9c386)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
25  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26  * Copyright (c) 2016 Martin Matuska. All rights reserved.
27  */
28 
29 /*
30  * This file contains the functions used to support the ZFS integration
31  * with zones.  This includes validation (e.g. zonecfg dataset), cloning,
32  * file system creation and destruction.
33  */
34 
35 #include <stdio.h>
36 #include <errno.h>
37 #include <unistd.h>
38 #include <string.h>
39 #include <locale.h>
40 #include <libintl.h>
41 #include <sys/stat.h>
42 #include <sys/statvfs.h>
43 #include <libgen.h>
44 #include <libzonecfg.h>
45 #include <sys/mnttab.h>
46 #include <libzfs.h>
47 #include <sys/mntent.h>
48 #include <values.h>
49 #include <strings.h>
50 #include <assert.h>
51 
52 #include "zoneadm.h"
53 
54 libzfs_handle_t *g_zfs;
55 
56 typedef struct zfs_mount_data {
57 	char		*match_name;
58 	zfs_handle_t	*match_handle;
59 } zfs_mount_data_t;
60 
61 typedef struct zfs_snapshot_data {
62 	char	*match_name;	/* zonename@SUNWzone */
63 	int	len;		/* strlen of match_name */
64 	int	max;		/* highest digit appended to snap name */
65 	int	num;		/* number of snapshots to rename */
66 	int	cntr;		/* counter for renaming snapshots */
67 } zfs_snapshot_data_t;
68 
69 typedef struct clone_data {
70 	zfs_handle_t	*clone_zhp;	/* clone dataset to promote */
71 	time_t		origin_creation; /* snapshot creation time of clone */
72 	const char	*snapshot;	/* snapshot of dataset being demoted */
73 } clone_data_t;
74 
75 /*
76  * A ZFS file system iterator call-back function which returns the
77  * zfs_handle_t for a ZFS file system on the specified mount point.
78  */
79 static int
80 match_mountpoint(zfs_handle_t *zhp, void *data)
81 {
82 	int			res;
83 	zfs_mount_data_t	*cbp;
84 	char			mp[ZFS_MAXPROPLEN];
85 
86 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
87 		zfs_close(zhp);
88 		return (0);
89 	}
90 
91 	/* First check if the dataset is mounted. */
92 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTED, mp, sizeof (mp), NULL, NULL,
93 	    0, B_FALSE) != 0 || strcmp(mp, "no") == 0) {
94 		zfs_close(zhp);
95 		return (0);
96 	}
97 
98 	/* Now check mount point. */
99 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
100 	    0, B_FALSE) != 0) {
101 		zfs_close(zhp);
102 		return (0);
103 	}
104 
105 	cbp = (zfs_mount_data_t *)data;
106 
107 	if (strcmp(mp, "legacy") == 0) {
108 		/* If legacy, must look in mnttab for mountpoint. */
109 		FILE		*fp;
110 		struct mnttab	entry;
111 		const char	*nm;
112 
113 		nm = zfs_get_name(zhp);
114 		if ((fp = fopen(MNTTAB, "r")) == NULL) {
115 			zfs_close(zhp);
116 			return (0);
117 		}
118 
119 		while (getmntent(fp, &entry) == 0) {
120 			if (strcmp(nm, entry.mnt_special) == 0) {
121 				if (strcmp(entry.mnt_mountp, cbp->match_name)
122 				    == 0) {
123 					(void) fclose(fp);
124 					cbp->match_handle = zhp;
125 					return (1);
126 				}
127 				break;
128 			}
129 		}
130 		(void) fclose(fp);
131 
132 	} else if (strcmp(mp, cbp->match_name) == 0) {
133 		cbp->match_handle = zhp;
134 		return (1);
135 	}
136 
137 	/* Iterate over any nested datasets. */
138 	res = zfs_iter_filesystems(zhp, match_mountpoint, data);
139 	zfs_close(zhp);
140 	return (res);
141 }
142 
143 /*
144  * Get ZFS handle for the specified mount point.
145  */
146 static zfs_handle_t *
147 mount2zhandle(char *mountpoint)
148 {
149 	zfs_mount_data_t	cb;
150 
151 	cb.match_name = mountpoint;
152 	cb.match_handle = NULL;
153 	(void) zfs_iter_root(g_zfs, match_mountpoint, &cb);
154 	return (cb.match_handle);
155 }
156 
157 /*
158  * Check if there is already a file system (zfs or any other type) mounted on
159  * path.
160  */
161 static boolean_t
162 is_mountpnt(char *path)
163 {
164 	FILE		*fp;
165 	struct mnttab	entry;
166 
167 	if ((fp = fopen(MNTTAB, "r")) == NULL)
168 		return (B_FALSE);
169 
170 	while (getmntent(fp, &entry) == 0) {
171 		if (strcmp(path, entry.mnt_mountp) == 0) {
172 			(void) fclose(fp);
173 			return (B_TRUE);
174 		}
175 	}
176 
177 	(void) fclose(fp);
178 	return (B_FALSE);
179 }
180 
181 /*
182  * Run the brand's pre-snapshot hook before we take a ZFS snapshot of the zone.
183  */
184 static int
185 pre_snapshot(char *presnapbuf)
186 {
187 	int status;
188 
189 	/* No brand-specific handler */
190 	if (presnapbuf[0] == '\0')
191 		return (Z_OK);
192 
193 	/* Run the hook */
194 	status = do_subproc(presnapbuf);
195 	if ((status = subproc_status(gettext("brand-specific presnapshot"),
196 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
197 		return (Z_ERR);
198 
199 	return (Z_OK);
200 }
201 
202 /*
203  * Run the brand's post-snapshot hook after we take a ZFS snapshot of the zone.
204  */
205 static int
206 post_snapshot(char *postsnapbuf)
207 {
208 	int status;
209 
210 	/* No brand-specific handler */
211 	if (postsnapbuf[0] == '\0')
212 		return (Z_OK);
213 
214 	/* Run the hook */
215 	status = do_subproc(postsnapbuf);
216 	if ((status = subproc_status(gettext("brand-specific postsnapshot"),
217 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
218 		return (Z_ERR);
219 
220 	return (Z_OK);
221 }
222 
223 /*
224  * This is a ZFS snapshot iterator call-back function which returns the
225  * highest number of SUNWzone snapshots that have been taken.
226  */
227 static int
228 get_snap_max(zfs_handle_t *zhp, void *data)
229 {
230 	int			res;
231 	zfs_snapshot_data_t	*cbp;
232 
233 	if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) {
234 		zfs_close(zhp);
235 		return (0);
236 	}
237 
238 	cbp = (zfs_snapshot_data_t *)data;
239 
240 	if (strncmp(zfs_get_name(zhp), cbp->match_name, cbp->len) == 0) {
241 		char	*nump;
242 		int	num;
243 
244 		cbp->num++;
245 		nump = (char *)(zfs_get_name(zhp) + cbp->len);
246 		num = atoi(nump);
247 		if (num > cbp->max)
248 			cbp->max = num;
249 	}
250 
251 	res = zfs_iter_snapshots(zhp, B_FALSE, get_snap_max, data);
252 	zfs_close(zhp);
253 	return (res);
254 }
255 
256 /*
257  * Take a ZFS snapshot to be used for cloning the zone.
258  */
259 static int
260 take_snapshot(zfs_handle_t *zhp, char *snapshot_name, int snap_size,
261     char *presnapbuf, char *postsnapbuf)
262 {
263 	int			res;
264 	char			template[ZFS_MAX_DATASET_NAME_LEN];
265 	zfs_snapshot_data_t	cb;
266 
267 	/*
268 	 * First we need to figure out the next available name for the
269 	 * zone snapshot.  Look through the list of zones snapshots for
270 	 * this file system to determine the maximum snapshot name.
271 	 */
272 	if (snprintf(template, sizeof (template), "%s@SUNWzone",
273 	    zfs_get_name(zhp)) >=  sizeof (template))
274 		return (Z_ERR);
275 
276 	cb.match_name = template;
277 	cb.len = strlen(template);
278 	cb.max = 0;
279 
280 	if (zfs_iter_snapshots(zhp, B_FALSE, get_snap_max, &cb) != 0)
281 		return (Z_ERR);
282 
283 	cb.max++;
284 
285 	if (snprintf(snapshot_name, snap_size, "%s@SUNWzone%d",
286 	    zfs_get_name(zhp), cb.max) >= snap_size)
287 		return (Z_ERR);
288 
289 	if (pre_snapshot(presnapbuf) != Z_OK)
290 		return (Z_ERR);
291 	res = zfs_snapshot(g_zfs, snapshot_name, B_FALSE, NULL);
292 	if (post_snapshot(postsnapbuf) != Z_OK)
293 		return (Z_ERR);
294 
295 	if (res != 0)
296 		return (Z_ERR);
297 	return (Z_OK);
298 }
299 
300 /*
301  * We are using an explicit snapshot from some earlier point in time so
302  * we need to validate it.  Run the brand specific hook.
303  */
304 static int
305 validate_snapshot(char *snapshot_name, char *snap_path, char *validsnapbuf)
306 {
307 	int status;
308 	char cmdbuf[MAXPATHLEN];
309 
310 	/* No brand-specific handler */
311 	if (validsnapbuf[0] == '\0')
312 		return (Z_OK);
313 
314 	/* pass args - snapshot_name & snap_path */
315 	if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %s %s", validsnapbuf,
316 	    snapshot_name, snap_path) >= sizeof (cmdbuf)) {
317 		zerror("Command line too long");
318 		return (Z_ERR);
319 	}
320 
321 	/* Run the hook */
322 	status = do_subproc(cmdbuf);
323 	if ((status = subproc_status(gettext("brand-specific validatesnapshot"),
324 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
325 		return (Z_ERR);
326 
327 	return (Z_OK);
328 }
329 
330 /*
331  * Remove the sw inventory file from inside this zonepath that we picked up out
332  * of the snapshot.
333  */
334 static int
335 clean_out_clone()
336 {
337 	int err;
338 	zone_dochandle_t handle;
339 
340 	if ((handle = zonecfg_init_handle()) == NULL) {
341 		zperror(cmd_to_str(CMD_CLONE), B_TRUE);
342 		return (Z_ERR);
343 	}
344 
345 	if ((err = zonecfg_get_handle(target_zone, handle)) != Z_OK) {
346 		errno = err;
347 		zperror(cmd_to_str(CMD_CLONE), B_TRUE);
348 		zonecfg_fini_handle(handle);
349 		return (Z_ERR);
350 	}
351 
352 	zonecfg_rm_detached(handle, B_FALSE);
353 	zonecfg_fini_handle(handle);
354 
355 	return (Z_OK);
356 }
357 
358 /*
359  * Make a ZFS clone on zonepath from snapshot_name.
360  */
361 static int
362 clone_snap(char *snapshot_name, char *zonepath)
363 {
364 	int		res = Z_OK;
365 	int		err;
366 	zfs_handle_t	*zhp;
367 	zfs_handle_t	*clone;
368 	nvlist_t	*props = NULL;
369 
370 	if ((zhp = zfs_open(g_zfs, snapshot_name, ZFS_TYPE_SNAPSHOT)) == NULL)
371 		return (Z_NO_ENTRY);
372 
373 	(void) printf(gettext("Cloning snapshot %s\n"), snapshot_name);
374 
375 	/*
376 	 * We turn off zfs SHARENFS and SHARESMB properties on the
377 	 * zoneroot dataset in order to prevent the GZ from sharing
378 	 * NGZ data by accident.
379 	 */
380 	if ((nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) ||
381 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARENFS),
382 	    "off") != 0) ||
383 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARESMB),
384 	    "off") != 0)) {
385 		nvlist_free(props);
386 		(void) fprintf(stderr, gettext("could not create ZFS clone "
387 		    "%s: out of memory\n"), zonepath);
388 		return (Z_ERR);
389 	}
390 
391 	err = zfs_clone(zhp, zonepath, props);
392 	zfs_close(zhp);
393 
394 	nvlist_free(props);
395 
396 	if (err != 0)
397 		return (Z_ERR);
398 
399 	/* create the mountpoint if necessary */
400 	if ((clone = zfs_open(g_zfs, zonepath, ZFS_TYPE_DATASET)) == NULL)
401 		return (Z_ERR);
402 
403 	/*
404 	 * The clone has been created so we need to print a diagnostic
405 	 * message if one of the following steps fails for some reason.
406 	 */
407 	if (zfs_mount(clone, NULL, 0) != 0) {
408 		(void) fprintf(stderr, gettext("could not mount ZFS clone "
409 		    "%s\n"), zfs_get_name(clone));
410 		res = Z_ERR;
411 
412 	} else if (clean_out_clone() != Z_OK) {
413 		(void) fprintf(stderr, gettext("could not remove the "
414 		    "software inventory from ZFS clone %s\n"),
415 		    zfs_get_name(clone));
416 		res = Z_ERR;
417 	}
418 
419 	zfs_close(clone);
420 	return (res);
421 }
422 
423 /*
424  * This function takes a zonepath and attempts to determine what the ZFS
425  * file system name (not mountpoint) should be for that path.  We do not
426  * assume that zonepath is an existing directory or ZFS fs since we use
427  * this function as part of the process of creating a new ZFS fs or clone.
428  *
429  * The way this works is that we look at the parent directory of the zonepath
430  * to see if it is a ZFS fs.  If it is, we get the name of that ZFS fs and
431  * append the last component of the zonepath to generate the ZFS name for the
432  * zonepath.  This matches the algorithm that ZFS uses for automatically
433  * mounting a new fs after it is created.
434  *
435  * Although a ZFS fs can be mounted anywhere, we don't worry about handling
436  * all of the complexity that a user could possibly configure with arbitrary
437  * mounts since there is no way to generate a ZFS name from a random path in
438  * the file system.  We only try to handle the automatic mounts that ZFS does
439  * for each file system.  ZFS restricts this so that a new fs must be created
440  * in an existing parent ZFS fs.  It then automatically mounts the new fs
441  * directly under the mountpoint for the parent fs using the last component
442  * of the name as the mountpoint directory.
443  *
444  * For example:
445  *    Name			Mountpoint
446  *    space/eng/dev/test/zone1	/project1/eng/dev/test/zone1
447  *
448  * Return Z_OK if the path mapped to a ZFS file system name, otherwise return
449  * Z_ERR.
450  */
451 static int
452 path2name(char *zonepath, char *zfs_name, int len)
453 {
454 	int		res;
455 	char		*bnm, *dnm, *dname, *bname;
456 	zfs_handle_t	*zhp;
457 	struct stat	stbuf;
458 
459 	/*
460 	 * We need two tmp strings to handle paths directly in / (e.g. /foo)
461 	 * since dirname will overwrite the first char after "/" in this case.
462 	 */
463 	if ((bnm = strdup(zonepath)) == NULL)
464 		return (Z_ERR);
465 
466 	if ((dnm = strdup(zonepath)) == NULL) {
467 		free(bnm);
468 		return (Z_ERR);
469 	}
470 
471 	bname = basename(bnm);
472 	dname = dirname(dnm);
473 
474 	/*
475 	 * This is a quick test to save iterating over all of the zfs datasets
476 	 * on the system (which can be a lot).  If the parent dir is not in a
477 	 * ZFS fs, then we're done.
478 	 */
479 	if (stat(dname, &stbuf) != 0 || !S_ISDIR(stbuf.st_mode) ||
480 	    strcmp(stbuf.st_fstype, MNTTYPE_ZFS) != 0) {
481 		free(bnm);
482 		free(dnm);
483 		return (Z_ERR);
484 	}
485 
486 	/* See if the parent directory is its own ZFS dataset. */
487 	if ((zhp = mount2zhandle(dname)) == NULL) {
488 		/*
489 		 * The parent is not a ZFS dataset so we can't automatically
490 		 * create a dataset on the given path.
491 		 */
492 		free(bnm);
493 		free(dnm);
494 		return (Z_ERR);
495 	}
496 
497 	res = snprintf(zfs_name, len, "%s/%s", zfs_get_name(zhp), bname);
498 
499 	free(bnm);
500 	free(dnm);
501 	zfs_close(zhp);
502 	if (res >= len)
503 		return (Z_ERR);
504 
505 	return (Z_OK);
506 }
507 
508 /*
509  * A ZFS file system iterator call-back function used to determine if the
510  * file system has dependents (snapshots & clones).
511  */
512 /* ARGSUSED */
513 static int
514 has_dependent(zfs_handle_t *zhp, void *data)
515 {
516 	zfs_close(zhp);
517 	return (1);
518 }
519 
520 /*
521  * Given a snapshot name, get the file system path where the snapshot lives.
522  * A snapshot name is of the form fs_name@snap_name.  For example, snapshot
523  * pl/zones/z1@SUNWzone1 would have a path of
524  * /pl/zones/z1/.zfs/snapshot/SUNWzone1.
525  */
526 static int
527 snap2path(char *snap_name, char *path, int len)
528 {
529 	char		*p;
530 	zfs_handle_t	*zhp;
531 	char		mp[ZFS_MAXPROPLEN];
532 
533 	if ((p = strrchr(snap_name, '@')) == NULL)
534 		return (Z_ERR);
535 
536 	/* Get the file system name from the snap_name. */
537 	*p = '\0';
538 	zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_DATASET);
539 	*p = '@';
540 	if (zhp == NULL)
541 		return (Z_ERR);
542 
543 	/* Get the file system mount point. */
544 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
545 	    0, B_FALSE) != 0) {
546 		zfs_close(zhp);
547 		return (Z_ERR);
548 	}
549 	zfs_close(zhp);
550 
551 	p++;
552 	if (snprintf(path, len, "%s/.zfs/snapshot/%s", mp, p) >= len)
553 		return (Z_ERR);
554 
555 	return (Z_OK);
556 }
557 
558 /*
559  * This callback function is used to iterate through a snapshot's dependencies
560  * to find a filesystem that is a direct clone of the snapshot being iterated.
561  */
562 static int
563 get_direct_clone(zfs_handle_t *zhp, void *data)
564 {
565 	clone_data_t	*cd = data;
566 	char		origin[ZFS_MAX_DATASET_NAME_LEN];
567 	char		ds_path[ZFS_MAX_DATASET_NAME_LEN];
568 
569 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
570 		zfs_close(zhp);
571 		return (0);
572 	}
573 
574 	(void) strlcpy(ds_path, zfs_get_name(zhp), sizeof (ds_path));
575 
576 	/* Make sure this is a direct clone of the snapshot we're iterating. */
577 	if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
578 	    NULL, 0, B_FALSE) != 0 || strcmp(origin, cd->snapshot) != 0) {
579 		zfs_close(zhp);
580 		return (0);
581 	}
582 
583 	if (cd->clone_zhp != NULL)
584 		zfs_close(cd->clone_zhp);
585 
586 	cd->clone_zhp = zhp;
587 	return (1);
588 }
589 
590 /*
591  * A ZFS file system iterator call-back function used to determine the clone
592  * to promote.  This function finds the youngest (i.e. last one taken) snapshot
593  * that has a clone.  If found, it returns a reference to that clone in the
594  * callback data.
595  */
596 static int
597 find_clone(zfs_handle_t *zhp, void *data)
598 {
599 	clone_data_t	*cd = data;
600 	time_t		snap_creation;
601 	int		zret = 0;
602 
603 	/* If snapshot has no clones, skip it */
604 	if (zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES) == 0) {
605 		zfs_close(zhp);
606 		return (0);
607 	}
608 
609 	cd->snapshot = zfs_get_name(zhp);
610 
611 	/* Get the creation time of this snapshot */
612 	snap_creation = (time_t)zfs_prop_get_int(zhp, ZFS_PROP_CREATION);
613 
614 	/*
615 	 * If this snapshot's creation time is greater than (i.e. younger than)
616 	 * the current youngest snapshot found, iterate this snapshot to
617 	 * get the right clone.
618 	 */
619 	if (snap_creation >= cd->origin_creation) {
620 		/*
621 		 * Iterate the dependents of this snapshot to find a clone
622 		 * that's a direct dependent.
623 		 */
624 		if ((zret = zfs_iter_dependents(zhp, B_FALSE, get_direct_clone,
625 		    cd)) == -1) {
626 			zfs_close(zhp);
627 			return (1);
628 		} else if (zret == 1) {
629 			/*
630 			 * Found a clone, update the origin_creation time
631 			 * in the callback data.
632 			 */
633 			cd->origin_creation = snap_creation;
634 		}
635 	}
636 
637 	zfs_close(zhp);
638 	return (0);
639 }
640 
641 /*
642  * A ZFS file system iterator call-back function used to remove standalone
643  * snapshots.
644  */
645 /* ARGSUSED */
646 static int
647 rm_snap(zfs_handle_t *zhp, void *data)
648 {
649 	/* If snapshot has clones, something is wrong */
650 	if (zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES) != 0) {
651 		zfs_close(zhp);
652 		return (1);
653 	}
654 
655 	if (zfs_unmount(zhp, NULL, 0) == 0) {
656 		(void) zfs_destroy(zhp, B_FALSE);
657 	}
658 
659 	zfs_close(zhp);
660 	return (0);
661 }
662 
663 /*
664  * A ZFS snapshot iterator call-back function which renames snapshots.
665  */
666 static int
667 rename_snap(zfs_handle_t *zhp, void *data)
668 {
669 	int			res;
670 	zfs_snapshot_data_t	*cbp;
671 	char			template[ZFS_MAX_DATASET_NAME_LEN];
672 
673 	cbp = (zfs_snapshot_data_t *)data;
674 
675 	/*
676 	 * When renaming snapshots with the iterator, the iterator can see
677 	 * the same snapshot after we've renamed up in the namespace.  To
678 	 * prevent this we check the count for the number of snapshots we have
679 	 * to rename and stop at that point.
680 	 */
681 	if (cbp->cntr >= cbp->num) {
682 		zfs_close(zhp);
683 		return (0);
684 	}
685 
686 	if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) {
687 		zfs_close(zhp);
688 		return (0);
689 	}
690 
691 	/* Only rename the snapshots we automatically generate when we clone. */
692 	if (strncmp(zfs_get_name(zhp), cbp->match_name, cbp->len) != 0) {
693 		zfs_close(zhp);
694 		return (0);
695 	}
696 
697 	(void) snprintf(template, sizeof (template), "%s%d", cbp->match_name,
698 	    cbp->max++);
699 
700 	res = (zfs_rename(zhp, template, B_FALSE, B_FALSE) != 0);
701 	if (res != 0)
702 		(void) fprintf(stderr, gettext("failed to rename snapshot %s "
703 		    "to %s: %s\n"), zfs_get_name(zhp), template,
704 		    libzfs_error_description(g_zfs));
705 
706 	cbp->cntr++;
707 
708 	zfs_close(zhp);
709 	return (res);
710 }
711 
712 /*
713  * Rename the source dataset's snapshots that are automatically generated when
714  * we clone a zone so that there won't be a name collision when we promote the
715  * cloned dataset.  Once the snapshots have been renamed, then promote the
716  * clone.
717  *
718  * The snapshot rename process gets the highest number on the snapshot names
719  * (the format is zonename@SUNWzoneXX where XX are digits) on both the source
720  * and clone datasets, then renames the source dataset snapshots starting at
721  * the next number.
722  */
723 static int
724 promote_clone(zfs_handle_t *src_zhp, zfs_handle_t *cln_zhp)
725 {
726 	zfs_snapshot_data_t	sd;
727 	char			nm[ZFS_MAX_DATASET_NAME_LEN];
728 	char			template[ZFS_MAX_DATASET_NAME_LEN];
729 
730 	(void) strlcpy(nm, zfs_get_name(cln_zhp), sizeof (nm));
731 	/*
732 	 * Start by getting the clone's snapshot max which we use
733 	 * during the rename of the original dataset's snapshots.
734 	 */
735 	(void) snprintf(template, sizeof (template), "%s@SUNWzone", nm);
736 	sd.match_name = template;
737 	sd.len = strlen(template);
738 	sd.max = 0;
739 
740 	if (zfs_iter_snapshots(cln_zhp, B_FALSE, get_snap_max, &sd) != 0)
741 		return (Z_ERR);
742 
743 	/*
744 	 * Now make sure the source's snapshot max is at least as high as
745 	 * the clone's snapshot max.
746 	 */
747 	(void) snprintf(template, sizeof (template), "%s@SUNWzone",
748 	    zfs_get_name(src_zhp));
749 	sd.match_name = template;
750 	sd.len = strlen(template);
751 	sd.num = 0;
752 
753 	if (zfs_iter_snapshots(src_zhp, B_FALSE, get_snap_max, &sd) != 0)
754 		return (Z_ERR);
755 
756 	/*
757 	 * Now rename the source dataset's snapshots so there's no
758 	 * conflict when we promote the clone.
759 	 */
760 	sd.max++;
761 	sd.cntr = 0;
762 	if (zfs_iter_snapshots(src_zhp, B_FALSE, rename_snap, &sd) != 0)
763 		return (Z_ERR);
764 
765 	/* close and reopen the clone dataset to get the latest info */
766 	zfs_close(cln_zhp);
767 	if ((cln_zhp = zfs_open(g_zfs, nm, ZFS_TYPE_FILESYSTEM)) == NULL)
768 		return (Z_ERR);
769 
770 	if (zfs_promote(cln_zhp) != 0) {
771 		(void) fprintf(stderr, gettext("failed to promote %s: %s\n"),
772 		    nm, libzfs_error_description(g_zfs));
773 		return (Z_ERR);
774 	}
775 
776 	zfs_close(cln_zhp);
777 	return (Z_OK);
778 }
779 
780 /*
781  * Promote the youngest clone.  That clone will then become the origin of all
782  * of the other clones that were hanging off of the source dataset.
783  */
784 int
785 promote_all_clones(zfs_handle_t *zhp)
786 {
787 	clone_data_t	cd;
788 	char		nm[ZFS_MAX_DATASET_NAME_LEN];
789 
790 	cd.clone_zhp = NULL;
791 	cd.origin_creation = 0;
792 	cd.snapshot = NULL;
793 
794 	if (zfs_iter_snapshots(zhp, B_FALSE, find_clone, &cd) != 0) {
795 		zfs_close(zhp);
796 		return (Z_ERR);
797 	}
798 
799 	/* Nothing to promote. */
800 	if (cd.clone_zhp == NULL)
801 		return (Z_OK);
802 
803 	/* Found the youngest clone to promote.  Promote it. */
804 	if (promote_clone(zhp, cd.clone_zhp) != 0) {
805 		zfs_close(cd.clone_zhp);
806 		zfs_close(zhp);
807 		return (Z_ERR);
808 	}
809 
810 	/* close and reopen the main dataset to get the latest info */
811 	(void) strlcpy(nm, zfs_get_name(zhp), sizeof (nm));
812 	zfs_close(zhp);
813 	if ((zhp = zfs_open(g_zfs, nm, ZFS_TYPE_FILESYSTEM)) == NULL)
814 		return (Z_ERR);
815 
816 	return (Z_OK);
817 }
818 
819 /*
820  * Clone a pre-existing ZFS snapshot, either by making a direct ZFS clone, if
821  * possible, or by copying the data from the snapshot to the zonepath.
822  */
823 int
824 clone_snapshot_zfs(char *snap_name, char *zonepath, char *validatesnap)
825 {
826 	int	err = Z_OK;
827 	char	clone_name[MAXPATHLEN];
828 	char	snap_path[MAXPATHLEN];
829 
830 	if (snap2path(snap_name, snap_path, sizeof (snap_path)) != Z_OK) {
831 		(void) fprintf(stderr, gettext("unable to find path for %s.\n"),
832 		    snap_name);
833 		return (Z_ERR);
834 	}
835 
836 	if (validate_snapshot(snap_name, snap_path, validatesnap) != Z_OK)
837 		return (Z_NO_ENTRY);
838 
839 	/*
840 	 * The zonepath cannot be ZFS cloned, try to copy the data from
841 	 * within the snapshot to the zonepath.
842 	 */
843 	if (path2name(zonepath, clone_name, sizeof (clone_name)) != Z_OK) {
844 		if ((err = clone_copy(snap_path, zonepath)) == Z_OK)
845 			if (clean_out_clone() != Z_OK)
846 				(void) fprintf(stderr,
847 				    gettext("could not remove the "
848 				    "software inventory from %s\n"), zonepath);
849 
850 		return (err);
851 	}
852 
853 	if ((err = clone_snap(snap_name, clone_name)) != Z_OK) {
854 		if (err != Z_NO_ENTRY) {
855 			/*
856 			 * Cloning the snapshot failed.  Fall back to trying
857 			 * to install the zone by copying from the snapshot.
858 			 */
859 			if ((err = clone_copy(snap_path, zonepath)) == Z_OK)
860 				if (clean_out_clone() != Z_OK)
861 					(void) fprintf(stderr,
862 					    gettext("could not remove the "
863 					    "software inventory from %s\n"),
864 					    zonepath);
865 		} else {
866 			/*
867 			 * The snapshot is unusable for some reason so restore
868 			 * the zone state to configured since we were unable to
869 			 * actually do anything about getting the zone
870 			 * installed.
871 			 */
872 			int tmp;
873 
874 			if ((tmp = zone_set_state(target_zone,
875 			    ZONE_STATE_CONFIGURED)) != Z_OK) {
876 				errno = tmp;
877 				zperror2(target_zone,
878 				    gettext("could not set state"));
879 			}
880 		}
881 	}
882 
883 	return (err);
884 }
885 
886 /*
887  * Attempt to clone a source_zone to a target zonepath by using a ZFS clone.
888  */
889 int
890 clone_zfs(char *source_zonepath, char *zonepath, char *presnapbuf,
891     char *postsnapbuf)
892 {
893 	zfs_handle_t	*zhp;
894 	char		clone_name[MAXPATHLEN];
895 	char		snap_name[MAXPATHLEN];
896 
897 	/*
898 	 * Try to get a zfs handle for the source_zonepath.  If this fails
899 	 * the source_zonepath is not ZFS so return an error.
900 	 */
901 	if ((zhp = mount2zhandle(source_zonepath)) == NULL)
902 		return (Z_ERR);
903 
904 	/*
905 	 * Check if there is a file system already mounted on zonepath.  If so,
906 	 * we can't clone to the path so we should fall back to copying.
907 	 */
908 	if (is_mountpnt(zonepath)) {
909 		zfs_close(zhp);
910 		(void) fprintf(stderr,
911 		    gettext("A file system is already mounted on %s,\n"
912 		    "preventing use of a ZFS clone.\n"), zonepath);
913 		return (Z_ERR);
914 	}
915 
916 	/*
917 	 * Instead of using path2name to get the clone name from the zonepath,
918 	 * we could generate a name from the source zone ZFS name.  However,
919 	 * this would mean we would create the clone under the ZFS fs of the
920 	 * source instead of what the zonepath says.  For example,
921 	 *
922 	 * source_zonepath		zonepath
923 	 * /pl/zones/dev/z1		/pl/zones/deploy/z2
924 	 *
925 	 * We don't want the clone to be under "dev", we want it under
926 	 * "deploy", so that we can leverage the normal attribute inheritance
927 	 * that ZFS provides in the fs hierarchy.
928 	 */
929 	if (path2name(zonepath, clone_name, sizeof (clone_name)) != Z_OK) {
930 		zfs_close(zhp);
931 		return (Z_ERR);
932 	}
933 
934 	if (take_snapshot(zhp, snap_name, sizeof (snap_name), presnapbuf,
935 	    postsnapbuf) != Z_OK) {
936 		zfs_close(zhp);
937 		return (Z_ERR);
938 	}
939 	zfs_close(zhp);
940 
941 	if (clone_snap(snap_name, clone_name) != Z_OK) {
942 		/* Clean up the snapshot we just took. */
943 		if ((zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_SNAPSHOT))
944 		    != NULL) {
945 			if (zfs_unmount(zhp, NULL, 0) == 0)
946 				(void) zfs_destroy(zhp, B_FALSE);
947 			zfs_close(zhp);
948 		}
949 
950 		return (Z_ERR);
951 	}
952 
953 	(void) printf(gettext("Instead of copying, a ZFS clone has been "
954 	    "created for this zone.\n"));
955 
956 	return (Z_OK);
957 }
958 
959 /*
960  * Attempt to create a ZFS file system for the specified zonepath.
961  * We either will successfully create a ZFS file system and get it mounted
962  * on the zonepath or we don't.  The caller doesn't care since a regular
963  * directory is used for the zonepath if no ZFS file system is mounted there.
964  */
965 void
966 create_zfs_zonepath(char *zonepath)
967 {
968 	zfs_handle_t	*zhp;
969 	char		zfs_name[MAXPATHLEN];
970 	nvlist_t	*props = NULL;
971 
972 	if (path2name(zonepath, zfs_name, sizeof (zfs_name)) != Z_OK)
973 		return;
974 
975 	/* Check if the dataset already exists. */
976 	if ((zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_DATASET)) != NULL) {
977 		zfs_close(zhp);
978 		return;
979 	}
980 
981 	/*
982 	 * We turn off zfs SHARENFS and SHARESMB properties on the
983 	 * zoneroot dataset in order to prevent the GZ from sharing
984 	 * NGZ data by accident.
985 	 */
986 	if ((nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) ||
987 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARENFS),
988 	    "off") != 0) ||
989 	    (nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARESMB),
990 	    "off") != 0)) {
991 		nvlist_free(props);
992 		(void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
993 		    "out of memory\n"), zfs_name);
994 	}
995 
996 	if (zfs_create(g_zfs, zfs_name, ZFS_TYPE_FILESYSTEM, props) != 0 ||
997 	    (zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_DATASET)) == NULL) {
998 		(void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
999 		    "%s\n"), zfs_name, libzfs_error_description(g_zfs));
1000 		nvlist_free(props);
1001 		return;
1002 	}
1003 
1004 	nvlist_free(props);
1005 
1006 	if (zfs_mount(zhp, NULL, 0) != 0) {
1007 		(void) fprintf(stderr, gettext("cannot mount ZFS dataset %s: "
1008 		    "%s\n"), zfs_name, libzfs_error_description(g_zfs));
1009 		(void) zfs_destroy(zhp, B_FALSE);
1010 	} else {
1011 		if (chmod(zonepath, S_IRWXU) != 0) {
1012 			(void) fprintf(stderr, gettext("file system %s "
1013 			    "successfully created, but chmod %o failed: %s\n"),
1014 			    zfs_name, S_IRWXU, strerror(errno));
1015 			(void) destroy_zfs(zonepath);
1016 		} else {
1017 			(void) printf(gettext("A ZFS file system has been "
1018 			    "created for this zone.\n"));
1019 		}
1020 	}
1021 
1022 	zfs_close(zhp);
1023 }
1024 
1025 /*
1026  * If the zonepath is a ZFS file system, attempt to destroy it.  We return Z_OK
1027  * if we were able to zfs_destroy the zonepath, otherwise we return Z_ERR
1028  * which means the caller should clean up the zonepath in the traditional
1029  * way.
1030  */
1031 int
1032 destroy_zfs(char *zonepath)
1033 {
1034 	zfs_handle_t	*zhp;
1035 	boolean_t	is_clone = B_FALSE;
1036 	char		origin[ZFS_MAXPROPLEN];
1037 
1038 	if ((zhp = mount2zhandle(zonepath)) == NULL)
1039 		return (Z_ERR);
1040 
1041 	if (promote_all_clones(zhp) != 0)
1042 		return (Z_ERR);
1043 
1044 	/* Now cleanup any snapshots remaining. */
1045 	if (zfs_iter_snapshots(zhp, B_FALSE, rm_snap, NULL) != 0) {
1046 		zfs_close(zhp);
1047 		return (Z_ERR);
1048 	}
1049 
1050 	/*
1051 	 * We can't destroy the file system if it has still has dependents.
1052 	 * There shouldn't be any at this point, but we'll double check.
1053 	 */
1054 	if (zfs_iter_dependents(zhp, B_TRUE, has_dependent, NULL) != 0) {
1055 		(void) fprintf(stderr, gettext("zfs destroy %s failed: the "
1056 		    "dataset still has dependents\n"), zfs_get_name(zhp));
1057 		zfs_close(zhp);
1058 		return (Z_ERR);
1059 	}
1060 
1061 	/*
1062 	 * This might be a clone.  Try to get the snapshot so we can attempt
1063 	 * to destroy that as well.
1064 	 */
1065 	if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
1066 	    NULL, 0, B_FALSE) == 0)
1067 		is_clone = B_TRUE;
1068 
1069 	if (zfs_unmount(zhp, NULL, 0) != 0) {
1070 		(void) fprintf(stderr, gettext("zfs unmount %s failed: %s\n"),
1071 		    zfs_get_name(zhp), libzfs_error_description(g_zfs));
1072 		zfs_close(zhp);
1073 		return (Z_ERR);
1074 	}
1075 
1076 	if (zfs_destroy(zhp, B_FALSE) != 0) {
1077 		/*
1078 		 * If the destroy fails for some reason, try to remount
1079 		 * the file system so that we can use "rm -rf" to clean up
1080 		 * instead.
1081 		 */
1082 		(void) fprintf(stderr, gettext("zfs destroy %s failed: %s\n"),
1083 		    zfs_get_name(zhp), libzfs_error_description(g_zfs));
1084 		(void) zfs_mount(zhp, NULL, 0);
1085 		zfs_close(zhp);
1086 		return (Z_ERR);
1087 	}
1088 
1089 	/*
1090 	 * If the zone has ever been moved then the mountpoint dir will not be
1091 	 * cleaned up by the zfs_destroy().  To handle this case try to clean
1092 	 * it up now but don't worry if it fails, that will be normal.
1093 	 */
1094 	(void) rmdir(zonepath);
1095 
1096 	(void) printf(gettext("The ZFS file system for this zone has been "
1097 	    "destroyed.\n"));
1098 
1099 	if (is_clone) {
1100 		zfs_handle_t	*ohp;
1101 
1102 		/*
1103 		 * Try to clean up the snapshot that the clone was taken from.
1104 		 */
1105 		if ((ohp = zfs_open(g_zfs, origin,
1106 		    ZFS_TYPE_SNAPSHOT)) != NULL) {
1107 			if (zfs_iter_dependents(ohp, B_TRUE, has_dependent,
1108 			    NULL) == 0 && zfs_unmount(ohp, NULL, 0) == 0)
1109 				(void) zfs_destroy(ohp, B_FALSE);
1110 			zfs_close(ohp);
1111 		}
1112 	}
1113 
1114 	zfs_close(zhp);
1115 	return (Z_OK);
1116 }
1117 
1118 /*
1119  * Return true if the path is its own zfs file system.  We determine this
1120  * by stat-ing the path to see if it is zfs and stat-ing the parent to see
1121  * if it is a different fs.
1122  */
1123 boolean_t
1124 is_zonepath_zfs(char *zonepath)
1125 {
1126 	int res;
1127 	char *path;
1128 	char *parent;
1129 	struct statvfs64 buf1, buf2;
1130 
1131 	if (statvfs64(zonepath, &buf1) != 0)
1132 		return (B_FALSE);
1133 
1134 	if (strcmp(buf1.f_basetype, "zfs") != 0)
1135 		return (B_FALSE);
1136 
1137 	if ((path = strdup(zonepath)) == NULL)
1138 		return (B_FALSE);
1139 
1140 	parent = dirname(path);
1141 	res = statvfs64(parent, &buf2);
1142 	free(path);
1143 
1144 	if (res != 0)
1145 		return (B_FALSE);
1146 
1147 	if (buf1.f_fsid == buf2.f_fsid)
1148 		return (B_FALSE);
1149 
1150 	return (B_TRUE);
1151 }
1152 
1153 /*
1154  * Implement the fast move of a ZFS file system by simply updating the
1155  * mountpoint.  Since it is file system already, we don't have the
1156  * issue of cross-file system copying.
1157  */
1158 int
1159 move_zfs(char *zonepath, char *new_zonepath)
1160 {
1161 	int		ret = Z_ERR;
1162 	zfs_handle_t	*zhp;
1163 
1164 	if ((zhp = mount2zhandle(zonepath)) == NULL)
1165 		return (Z_ERR);
1166 
1167 	if (zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1168 	    new_zonepath) == 0) {
1169 		/*
1170 		 * Clean up the old mount point.  We ignore any failure since
1171 		 * the zone is already successfully mounted on the new path.
1172 		 */
1173 		(void) rmdir(zonepath);
1174 		ret = Z_OK;
1175 	}
1176 
1177 	zfs_close(zhp);
1178 
1179 	return (ret);
1180 }
1181 
1182 /*
1183  * Validate that the given dataset exists on the system, and that neither it nor
1184  * its children are zvols.
1185  *
1186  * Note that we don't do anything with the 'zoned' property here.  All
1187  * management is done in zoneadmd when the zone is actually rebooted.  This
1188  * allows us to automatically set the zoned property even when a zone is
1189  * rebooted by the administrator.
1190  */
1191 int
1192 verify_datasets(zone_dochandle_t handle)
1193 {
1194 	int return_code = Z_OK;
1195 	struct zone_dstab dstab;
1196 	zfs_handle_t *zhp;
1197 	char propbuf[ZFS_MAXPROPLEN];
1198 	char source[ZFS_MAX_DATASET_NAME_LEN];
1199 	zprop_source_t srctype;
1200 
1201 	if (zonecfg_setdsent(handle) != Z_OK) {
1202 		/*
1203 		 * TRANSLATION_NOTE
1204 		 * zfs and dataset are literals that should not be translated.
1205 		 */
1206 		(void) fprintf(stderr, gettext("could not verify zfs datasets: "
1207 		    "unable to enumerate datasets\n"));
1208 		return (Z_ERR);
1209 	}
1210 
1211 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
1212 
1213 		if ((zhp = zfs_open(g_zfs, dstab.zone_dataset_name,
1214 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
1215 			(void) fprintf(stderr, gettext("could not verify zfs "
1216 			    "dataset %s: %s\n"), dstab.zone_dataset_name,
1217 			    libzfs_error_description(g_zfs));
1218 			return_code = Z_ERR;
1219 			continue;
1220 		}
1221 
1222 		if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf,
1223 		    sizeof (propbuf), &srctype, source,
1224 		    sizeof (source), 0) == 0 &&
1225 		    (srctype == ZPROP_SRC_INHERITED)) {
1226 			(void) fprintf(stderr, gettext("could not verify zfs "
1227 			    "dataset %s: mountpoint cannot be inherited\n"),
1228 			    dstab.zone_dataset_name);
1229 			return_code = Z_ERR;
1230 			zfs_close(zhp);
1231 			continue;
1232 		}
1233 
1234 		zfs_close(zhp);
1235 	}
1236 	(void) zonecfg_enddsent(handle);
1237 
1238 	return (return_code);
1239 }
1240 
1241 /*
1242  * Verify that the ZFS dataset exists, and its mountpoint
1243  * property is set to "legacy".
1244  */
1245 int
1246 verify_fs_zfs(struct zone_fstab *fstab)
1247 {
1248 	zfs_handle_t *zhp;
1249 	char propbuf[ZFS_MAXPROPLEN];
1250 
1251 	if ((zhp = zfs_open(g_zfs, fstab->zone_fs_special,
1252 	    ZFS_TYPE_DATASET)) == NULL) {
1253 		(void) fprintf(stderr, gettext("could not verify fs %s: "
1254 		    "could not access zfs dataset '%s'\n"),
1255 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1256 		return (Z_ERR);
1257 	}
1258 
1259 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
1260 		(void) fprintf(stderr, gettext("cannot verify fs %s: "
1261 		    "'%s' is not a file system\n"),
1262 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1263 		zfs_close(zhp);
1264 		return (Z_ERR);
1265 	}
1266 
1267 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf, sizeof (propbuf),
1268 	    NULL, NULL, 0, 0) != 0 || strcmp(propbuf, "legacy") != 0) {
1269 		(void) fprintf(stderr, gettext("could not verify fs %s: "
1270 		    "zfs '%s' mountpoint is not \"legacy\"\n"),
1271 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1272 		zfs_close(zhp);
1273 		return (Z_ERR);
1274 	}
1275 
1276 	zfs_close(zhp);
1277 	return (Z_OK);
1278 }
1279 
1280 /*
1281  * Destroy the specified mnttab structure that was created by mnttab_dup().
1282  * NOTE: The structure's mnt_time field isn't freed.
1283  */
1284 static void
1285 mnttab_destroy(struct mnttab *tabp)
1286 {
1287 	assert(tabp != NULL);
1288 
1289 	free(tabp->mnt_mountp);
1290 	free(tabp->mnt_special);
1291 	free(tabp->mnt_fstype);
1292 	free(tabp->mnt_mntopts);
1293 	free(tabp);
1294 }
1295 
1296 /*
1297  * Duplicate the specified mnttab structure.  The mnt_mountp and mnt_time
1298  * fields aren't duplicated.  This function returns a pointer to the new mnttab
1299  * structure or NULL if an error occurred.  If an error occurs, then this
1300  * function sets errno to reflect the error.  mnttab structures created by
1301  * this function should be destroyed via mnttab_destroy().
1302  */
1303 static struct mnttab *
1304 mnttab_dup(const struct mnttab *srcp)
1305 {
1306 	struct mnttab *retval;
1307 
1308 	assert(srcp != NULL);
1309 
1310 	retval = (struct mnttab *)calloc(1, sizeof (*retval));
1311 	if (retval == NULL) {
1312 		errno = ENOMEM;
1313 		return (NULL);
1314 	}
1315 	if (srcp->mnt_special != NULL) {
1316 		retval->mnt_special = strdup(srcp->mnt_special);
1317 		if (retval->mnt_special == NULL)
1318 			goto err;
1319 	}
1320 	if (srcp->mnt_fstype != NULL) {
1321 		retval->mnt_fstype = strdup(srcp->mnt_fstype);
1322 		if (retval->mnt_fstype == NULL)
1323 			goto err;
1324 	}
1325 	retval->mnt_mntopts = (char *)malloc(MAX_MNTOPT_STR * sizeof (char));
1326 	if (retval->mnt_mntopts == NULL)
1327 		goto err;
1328 	if (srcp->mnt_mntopts != NULL) {
1329 		if (strlcpy(retval->mnt_mntopts, srcp->mnt_mntopts,
1330 		    MAX_MNTOPT_STR * sizeof (char)) >= MAX_MNTOPT_STR *
1331 		    sizeof (char)) {
1332 			mnttab_destroy(retval);
1333 			errno = EOVERFLOW; /* similar to mount(2) behavior */
1334 			return (NULL);
1335 		}
1336 	} else {
1337 		retval->mnt_mntopts[0] = '\0';
1338 	}
1339 	return (retval);
1340 
1341 err:
1342 	mnttab_destroy(retval);
1343 	errno = ENOMEM;
1344 	return (NULL);
1345 }
1346 
1347 /*
1348  * Determine whether the specified ZFS dataset's mountpoint property is set
1349  * to "legacy".  If the specified dataset does not have a legacy mountpoint,
1350  * then the string pointer to which the mountpoint argument points is assigned
1351  * a dynamically-allocated string containing the dataset's mountpoint
1352  * property.  If the dataset's mountpoint property is "legacy" or a libzfs
1353  * error occurs, then the string pointer to which the mountpoint argument
1354  * points isn't modified.
1355  *
1356  * This function returns B_TRUE if it doesn't encounter any fatal errors.
1357  * It returns B_FALSE if it encounters a fatal error and sets errno to the
1358  * appropriate error code.
1359  */
1360 static boolean_t
1361 get_zfs_non_legacy_mountpoint(const char *dataset_name, char **mountpoint)
1362 {
1363 	zfs_handle_t *zhp;
1364 	char propbuf[ZFS_MAXPROPLEN];
1365 
1366 	assert(dataset_name != NULL);
1367 	assert(mountpoint != NULL);
1368 
1369 	if ((zhp = zfs_open(g_zfs, dataset_name, ZFS_TYPE_DATASET)) == NULL) {
1370 		errno = EINVAL;
1371 		return (B_FALSE);
1372 	}
1373 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf, sizeof (propbuf),
1374 	    NULL, NULL, 0, 0) != 0) {
1375 		zfs_close(zhp);
1376 		errno = EINVAL;
1377 		return (B_FALSE);
1378 	}
1379 	zfs_close(zhp);
1380 	if (strcmp(propbuf, "legacy") != 0) {
1381 		if ((*mountpoint = strdup(propbuf)) == NULL) {
1382 			errno = ENOMEM;
1383 			return (B_FALSE);
1384 		}
1385 	}
1386 	return (B_TRUE);
1387 }
1388 
1389 
1390 /*
1391  * This zonecfg_find_mounts() callback records information about mounts of
1392  * interest in a zonepath.  It also tallies the number of zone
1393  * root overlay mounts and the number of unexpected mounts found.
1394  * This function outputs errors using zerror() if it finds unexpected
1395  * mounts.  cookiep should point to an initialized zone_mounts_t structure.
1396  *
1397  * This function returns zero on success and a nonzero value on failure.
1398  */
1399 static int
1400 zone_mounts_cb(const struct mnttab *mountp, void *cookiep)
1401 {
1402 	zone_mounts_t *mounts;
1403 	const char *zone_mount_dir;
1404 
1405 	assert(mountp != NULL);
1406 	assert(cookiep != NULL);
1407 
1408 	mounts = (zone_mounts_t *)cookiep;
1409 	zone_mount_dir = mountp->mnt_mountp + mounts->zonepath_len;
1410 	if (strcmp(zone_mount_dir, "/root") == 0) {
1411 		/*
1412 		 * Check for an overlay mount.  If we already detected a /root
1413 		 * mount, then the current mount must be an overlay mount.
1414 		 */
1415 		if (mounts->root_mnttab != NULL) {
1416 			mounts->num_root_overlay_mounts++;
1417 			return (0);
1418 		}
1419 
1420 		/*
1421 		 * Store the root mount's mnttab information in the
1422 		 * zone_mounts_t structure for future use.
1423 		 */
1424 		if ((mounts->root_mnttab = mnttab_dup(mountp)) == NULL) {
1425 			zperror(cmd_to_str(CMD_MOVE), B_FALSE);
1426 			return (-1);
1427 		}
1428 
1429 		/*
1430 		 * Determine if the filesystem is a ZFS filesystem with a
1431 		 * non-legacy mountpoint.  If it is, then set the root
1432 		 * filesystem's mnttab's mnt_mountp field to a non-NULL
1433 		 * value, which will serve as a flag to indicate this special
1434 		 * condition.
1435 		 */
1436 		if (strcmp(mountp->mnt_fstype, MNTTYPE_ZFS) == 0 &&
1437 		    get_zfs_non_legacy_mountpoint(mountp->mnt_special,
1438 		    &mounts->root_mnttab->mnt_mountp) != B_TRUE) {
1439 			zperror(cmd_to_str(CMD_MOVE), B_FALSE);
1440 			return (-1);
1441 		}
1442 	} else {
1443 		/*
1444 		 * An unexpected mount was found.  Notify the user.
1445 		 */
1446 		if (mounts->num_unexpected_mounts == 0)
1447 			zerror(gettext("These file systems are mounted on "
1448 			    "subdirectories of %s.\n"), mounts->zonepath);
1449 		mounts->num_unexpected_mounts++;
1450 		(void) zfm_print(mountp, NULL);
1451 	}
1452 	return (0);
1453 }
1454 
1455 /*
1456  * Initialize the specified zone_mounts_t structure for the given zonepath.
1457  * If this function succeeds, it returns zero and the specified zone_mounts_t
1458  * structure contains information about mounts in the specified zonepath.
1459  * The function returns a nonzero value if it fails.  The zone_mounts_t
1460  * structure doesn't need be destroyed via zone_mounts_destroy() if this
1461  * function fails.
1462  */
1463 int
1464 zone_mounts_init(zone_mounts_t *mounts, const char *zonepath)
1465 {
1466 	assert(mounts != NULL);
1467 	assert(zonepath != NULL);
1468 
1469 	bzero(mounts, sizeof (*mounts));
1470 	if ((mounts->zonepath = strdup(zonepath)) == NULL) {
1471 		zerror(gettext("the process ran out of memory while checking "
1472 		    "for mounts in zonepath %s."), zonepath);
1473 		return (-1);
1474 	}
1475 	mounts->zonepath_len = strlen(zonepath);
1476 	if (zonecfg_find_mounts((char *)zonepath, zone_mounts_cb, mounts) ==
1477 	    -1) {
1478 		zerror(gettext("an error occurred while checking for mounts "
1479 		    "in zonepath %s."), zonepath);
1480 		zone_mounts_destroy(mounts);
1481 		return (-1);
1482 	}
1483 	return (0);
1484 }
1485 
1486 /*
1487  * Destroy the memory used by the specified zone_mounts_t structure's fields.
1488  * This function doesn't free the memory occupied by the structure itself
1489  * (i.e., it doesn't free the parameter).
1490  */
1491 void
1492 zone_mounts_destroy(zone_mounts_t *mounts)
1493 {
1494 	assert(mounts != NULL);
1495 
1496 	free(mounts->zonepath);
1497 	if (mounts->root_mnttab != NULL)
1498 		mnttab_destroy(mounts->root_mnttab);
1499 }
1500 
1501 /*
1502  * Mount a moving zone's root filesystem (if it had a root filesystem mount
1503  * prior to the move) using the specified zonepath.  mounts should refer to
1504  * the zone_mounts_t structure describing the zone's mount information.
1505  *
1506  * This function returns zero if the mount succeeds and a nonzero value
1507  * if it doesn't.
1508  */
1509 int
1510 zone_mount_rootfs(zone_mounts_t *mounts, const char *zonepath)
1511 {
1512 	char zoneroot[MAXPATHLEN];
1513 	struct mnttab *mtab;
1514 	int flags;
1515 
1516 	assert(mounts != NULL);
1517 	assert(zonepath != NULL);
1518 
1519 	/*
1520 	 * If there isn't a root filesystem, then don't do anything.
1521 	 */
1522 	mtab = mounts->root_mnttab;
1523 	if (mtab == NULL)
1524 		return (0);
1525 
1526 	/*
1527 	 * Determine the root filesystem's new mountpoint.
1528 	 */
1529 	if (snprintf(zoneroot, sizeof (zoneroot), "%s/root", zonepath) >=
1530 	    sizeof (zoneroot)) {
1531 		zerror(gettext("Zonepath %s is too long.\n"), zonepath);
1532 		return (-1);
1533 	}
1534 
1535 	/*
1536 	 * If the root filesystem is a non-legacy ZFS filesystem (i.e., if it's
1537 	 * mnt_mountp field is non-NULL), then make the filesystem's new
1538 	 * mount point its mountpoint property and mount the filesystem.
1539 	 */
1540 	if (mtab->mnt_mountp != NULL) {
1541 		zfs_handle_t *zhp;
1542 
1543 		if ((zhp = zfs_open(g_zfs, mtab->mnt_special,
1544 		    ZFS_TYPE_DATASET)) == NULL) {
1545 			zerror(gettext("could not get ZFS handle for the zone's"
1546 			    " root filesystem"));
1547 			return (-1);
1548 		}
1549 		if (zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1550 		    zoneroot) != 0) {
1551 			zerror(gettext("could not modify zone's root "
1552 			    "filesystem's mountpoint property"));
1553 			zfs_close(zhp);
1554 			return (-1);
1555 		}
1556 		if (zfs_mount(zhp, mtab->mnt_mntopts, 0) != 0) {
1557 			zerror(gettext("unable to mount zone root %s: %s"),
1558 			    zoneroot, libzfs_error_description(g_zfs));
1559 			if (zfs_prop_set(zhp,
1560 			    zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1561 			    mtab->mnt_mountp) != 0)
1562 				zerror(gettext("unable to restore zone's root "
1563 				    "filesystem's mountpoint property"));
1564 			zfs_close(zhp);
1565 			return (-1);
1566 		}
1567 		zfs_close(zhp);
1568 		return (0);
1569 	}
1570 
1571 	/*
1572 	 * The root filesystem is either a legacy-mounted ZFS filesystem or
1573 	 * a non-ZFS filesystem.  Use mount(2) to mount the root filesystem.
1574 	 */
1575 	if (mtab->mnt_mntopts != NULL)
1576 		flags = MS_OPTIONSTR;
1577 	else
1578 		flags = 0;
1579 	if (mount(mtab->mnt_special, zoneroot, flags, mtab->mnt_fstype, NULL, 0,
1580 	    mtab->mnt_mntopts, MAX_MNTOPT_STR * sizeof (char)) != 0) {
1581 		flags = errno;
1582 		zerror(gettext("unable to mount zone root %s: %s"), zoneroot,
1583 		    strerror(flags));
1584 		return (-1);
1585 	}
1586 	return (0);
1587 }
1588 
1589 /*
1590  * Unmount a moving zone's root filesystem (if such a mount exists) using the
1591  * specified zonepath.  mounts should refer to the zone_mounts_t structure
1592  * describing the zone's mount information.  If force is B_TRUE, then if the
1593  * unmount fails, then the function will try to forcibly unmount the zone's root
1594  * filesystem.
1595  *
1596  * This function returns zero if the unmount (forced or otherwise) succeeds;
1597  * otherwise, it returns a nonzero value.
1598  */
1599 int
1600 zone_unmount_rootfs(zone_mounts_t *mounts, const char *zonepath,
1601     boolean_t force)
1602 {
1603 	char zoneroot[MAXPATHLEN];
1604 	struct mnttab *mtab;
1605 	int err;
1606 
1607 	assert(mounts != NULL);
1608 	assert(zonepath != NULL);
1609 
1610 	/*
1611 	 * If there isn't a root filesystem, then don't do anything.
1612 	 */
1613 	mtab = mounts->root_mnttab;
1614 	if (mtab == NULL)
1615 		return (0);
1616 
1617 	/*
1618 	 * Determine the root filesystem's mountpoint.
1619 	 */
1620 	if (snprintf(zoneroot, sizeof (zoneroot), "%s/root", zonepath) >=
1621 	    sizeof (zoneroot)) {
1622 		zerror(gettext("Zonepath %s is too long.\n"), zonepath);
1623 		return (-1);
1624 	}
1625 
1626 	/*
1627 	 * If the root filesystem is a non-legacy ZFS fileystem, then unmount
1628 	 * the filesystem via libzfs.
1629 	 */
1630 	if (mtab->mnt_mountp != NULL) {
1631 		zfs_handle_t *zhp;
1632 
1633 		if ((zhp = zfs_open(g_zfs, mtab->mnt_special,
1634 		    ZFS_TYPE_DATASET)) == NULL) {
1635 			zerror(gettext("could not get ZFS handle for the zone's"
1636 			    " root filesystem"));
1637 			return (-1);
1638 		}
1639 		if (zfs_unmount(zhp, zoneroot, 0) != 0) {
1640 			if (force && zfs_unmount(zhp, zoneroot, MS_FORCE) ==
1641 			    0) {
1642 				zfs_close(zhp);
1643 				return (0);
1644 			}
1645 			zerror(gettext("unable to unmount zone root %s: %s"),
1646 			    zoneroot, libzfs_error_description(g_zfs));
1647 			zfs_close(zhp);
1648 			return (-1);
1649 		}
1650 		zfs_close(zhp);
1651 		return (0);
1652 	}
1653 
1654 	/*
1655 	 * Use umount(2) to unmount the root filesystem.  If this fails, then
1656 	 * forcibly unmount it if the force flag is set.
1657 	 */
1658 	if (umount(zoneroot) != 0) {
1659 		if (force && umount2(zoneroot, MS_FORCE) == 0)
1660 			return (0);
1661 		err = errno;
1662 		zerror(gettext("unable to unmount zone root %s: %s"), zoneroot,
1663 		    strerror(err));
1664 		return (-1);
1665 	}
1666 	return (0);
1667 }
1668 
1669 int
1670 init_zfs(void)
1671 {
1672 	if ((g_zfs = libzfs_init()) == NULL) {
1673 		(void) fprintf(stderr, gettext("failed to initialize ZFS "
1674 		    "library\n"));
1675 		return (Z_ERR);
1676 	}
1677 
1678 	return (Z_OK);
1679 }
1680