xref: /titanic_50/usr/src/cmd/zoneadm/zfs.c (revision e23c41c9edb2294649cde3d370ae755701f3f140)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * This file contains the functions used to support the ZFS integration
29  * with zones.  This includes validation (e.g. zonecfg dataset), cloning,
30  * file system creation and destruction.
31  */
32 
33 #include <stdio.h>
34 #include <errno.h>
35 #include <unistd.h>
36 #include <string.h>
37 #include <locale.h>
38 #include <libintl.h>
39 #include <sys/stat.h>
40 #include <sys/statvfs.h>
41 #include <libgen.h>
42 #include <libzonecfg.h>
43 #include <sys/mnttab.h>
44 #include <libzfs.h>
45 #include <sys/mntent.h>
46 #include <values.h>
47 
48 #include "zoneadm.h"
49 
50 libzfs_handle_t *g_zfs;
51 
52 typedef struct zfs_mount_data {
53 	char		*match_name;
54 	zfs_handle_t	*match_handle;
55 } zfs_mount_data_t;
56 
57 typedef struct zfs_snapshot_data {
58 	char	*match_name;	/* zonename@SUNWzone */
59 	int	len;		/* strlen of match_name */
60 	int	max;		/* highest digit appended to snap name */
61 	int	num;		/* number of snapshots to rename */
62 	int	cntr;		/* counter for renaming snapshots */
63 } zfs_snapshot_data_t;
64 
65 typedef struct clone_data {
66 	zfs_handle_t	*clone_zhp;	/* clone dataset to promote */
67 	time_t		origin_creation; /* snapshot creation time of clone */
68 	const char	*snapshot;	/* snapshot of dataset being demoted */
69 } clone_data_t;
70 
71 /*
72  * A ZFS file system iterator call-back function which is used to validate
73  * datasets imported into the zone.
74  */
75 /* ARGSUSED */
76 static int
77 check_zvol(zfs_handle_t *zhp, void *unused)
78 {
79 	int ret;
80 
81 	if (zfs_get_type(zhp) == ZFS_TYPE_VOLUME) {
82 		/*
83 		 * TRANSLATION_NOTE
84 		 * zfs and dataset are literals that should not be translated.
85 		 */
86 		(void) fprintf(stderr, gettext("cannot verify zfs dataset %s: "
87 		    "volumes cannot be specified as a zone dataset resource\n"),
88 		    zfs_get_name(zhp));
89 		ret = -1;
90 	} else {
91 		ret = zfs_iter_children(zhp, check_zvol, NULL);
92 	}
93 
94 	zfs_close(zhp);
95 
96 	return (ret);
97 }
98 
99 /*
100  * A ZFS file system iterator call-back function which returns the
101  * zfs_handle_t for a ZFS file system on the specified mount point.
102  */
103 static int
104 match_mountpoint(zfs_handle_t *zhp, void *data)
105 {
106 	int			res;
107 	zfs_mount_data_t	*cbp;
108 	char			mp[ZFS_MAXPROPLEN];
109 
110 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
111 		zfs_close(zhp);
112 		return (0);
113 	}
114 
115 	/* First check if the dataset is mounted. */
116 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTED, mp, sizeof (mp), NULL, NULL,
117 	    0, B_FALSE) != 0 || strcmp(mp, "no") == 0) {
118 		zfs_close(zhp);
119 		return (0);
120 	}
121 
122 	/* Now check mount point. */
123 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
124 	    0, B_FALSE) != 0) {
125 		zfs_close(zhp);
126 		return (0);
127 	}
128 
129 	cbp = (zfs_mount_data_t *)data;
130 
131 	if (strcmp(mp, "legacy") == 0) {
132 		/* If legacy, must look in mnttab for mountpoint. */
133 		FILE		*fp;
134 		struct mnttab	entry;
135 		const char	*nm;
136 
137 		nm = zfs_get_name(zhp);
138 		if ((fp = fopen(MNTTAB, "r")) == NULL) {
139 			zfs_close(zhp);
140 			return (0);
141 		}
142 
143 		while (getmntent(fp, &entry) == 0) {
144 			if (strcmp(nm, entry.mnt_special) == 0) {
145 				if (strcmp(entry.mnt_mountp, cbp->match_name)
146 				    == 0) {
147 					(void) fclose(fp);
148 					cbp->match_handle = zhp;
149 					return (1);
150 				}
151 				break;
152 			}
153 		}
154 		(void) fclose(fp);
155 
156 	} else if (strcmp(mp, cbp->match_name) == 0) {
157 		cbp->match_handle = zhp;
158 		return (1);
159 	}
160 
161 	/* Iterate over any nested datasets. */
162 	res = zfs_iter_filesystems(zhp, match_mountpoint, data);
163 	zfs_close(zhp);
164 	return (res);
165 }
166 
167 /*
168  * Get ZFS handle for the specified mount point.
169  */
170 static zfs_handle_t *
171 mount2zhandle(char *mountpoint)
172 {
173 	zfs_mount_data_t	cb;
174 
175 	cb.match_name = mountpoint;
176 	cb.match_handle = NULL;
177 	(void) zfs_iter_root(g_zfs, match_mountpoint, &cb);
178 	return (cb.match_handle);
179 }
180 
181 /*
182  * Check if there is already a file system (zfs or any other type) mounted on
183  * path.
184  */
185 static boolean_t
186 is_mountpnt(char *path)
187 {
188 	FILE		*fp;
189 	struct mnttab	entry;
190 
191 	if ((fp = fopen(MNTTAB, "r")) == NULL)
192 		return (B_FALSE);
193 
194 	while (getmntent(fp, &entry) == 0) {
195 		if (strcmp(path, entry.mnt_mountp) == 0) {
196 			(void) fclose(fp);
197 			return (B_TRUE);
198 		}
199 	}
200 
201 	(void) fclose(fp);
202 	return (B_FALSE);
203 }
204 
205 /*
206  * Run the brand's pre-snapshot hook before we take a ZFS snapshot of the zone.
207  */
208 static int
209 pre_snapshot(char *presnapbuf)
210 {
211 	int status;
212 
213 	/* No brand-specific handler */
214 	if (presnapbuf[0] == '\0')
215 		return (Z_OK);
216 
217 	/* Run the hook */
218 	status = do_subproc(presnapbuf);
219 	if ((status = subproc_status(gettext("brand-specific presnapshot"),
220 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
221 		return (Z_ERR);
222 
223 	return (Z_OK);
224 }
225 
226 /*
227  * Run the brand's post-snapshot hook after we take a ZFS snapshot of the zone.
228  */
229 static int
230 post_snapshot(char *postsnapbuf)
231 {
232 	int status;
233 
234 	/* No brand-specific handler */
235 	if (postsnapbuf[0] == '\0')
236 		return (Z_OK);
237 
238 	/* Run the hook */
239 	status = do_subproc(postsnapbuf);
240 	if ((status = subproc_status(gettext("brand-specific postsnapshot"),
241 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
242 		return (Z_ERR);
243 
244 	return (Z_OK);
245 }
246 
247 /*
248  * This is a ZFS snapshot iterator call-back function which returns the
249  * highest number of SUNWzone snapshots that have been taken.
250  */
251 static int
252 get_snap_max(zfs_handle_t *zhp, void *data)
253 {
254 	int			res;
255 	zfs_snapshot_data_t	*cbp;
256 
257 	if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) {
258 		zfs_close(zhp);
259 		return (0);
260 	}
261 
262 	cbp = (zfs_snapshot_data_t *)data;
263 
264 	if (strncmp(zfs_get_name(zhp), cbp->match_name, cbp->len) == 0) {
265 		char	*nump;
266 		int	num;
267 
268 		cbp->num++;
269 		nump = (char *)(zfs_get_name(zhp) + cbp->len);
270 		num = atoi(nump);
271 		if (num > cbp->max)
272 			cbp->max = num;
273 	}
274 
275 	res = zfs_iter_snapshots(zhp, get_snap_max, data);
276 	zfs_close(zhp);
277 	return (res);
278 }
279 
280 /*
281  * Take a ZFS snapshot to be used for cloning the zone.
282  */
283 static int
284 take_snapshot(zfs_handle_t *zhp, char *snapshot_name, int snap_size,
285     char *presnapbuf, char *postsnapbuf)
286 {
287 	int			res;
288 	char			template[ZFS_MAXNAMELEN];
289 	zfs_snapshot_data_t	cb;
290 
291 	/*
292 	 * First we need to figure out the next available name for the
293 	 * zone snapshot.  Look through the list of zones snapshots for
294 	 * this file system to determine the maximum snapshot name.
295 	 */
296 	if (snprintf(template, sizeof (template), "%s@SUNWzone",
297 	    zfs_get_name(zhp)) >=  sizeof (template))
298 		return (Z_ERR);
299 
300 	cb.match_name = template;
301 	cb.len = strlen(template);
302 	cb.max = 0;
303 
304 	if (zfs_iter_snapshots(zhp, get_snap_max, &cb) != 0)
305 		return (Z_ERR);
306 
307 	cb.max++;
308 
309 	if (snprintf(snapshot_name, snap_size, "%s@SUNWzone%d",
310 	    zfs_get_name(zhp), cb.max) >= snap_size)
311 		return (Z_ERR);
312 
313 	if (pre_snapshot(presnapbuf) != Z_OK)
314 		return (Z_ERR);
315 	res = zfs_snapshot(g_zfs, snapshot_name, B_FALSE, NULL);
316 	if (post_snapshot(postsnapbuf) != Z_OK)
317 		return (Z_ERR);
318 
319 	if (res != 0)
320 		return (Z_ERR);
321 	return (Z_OK);
322 }
323 
324 /*
325  * We are using an explicit snapshot from some earlier point in time so
326  * we need to validate it.  Run the brand specific hook.
327  */
328 static int
329 validate_snapshot(char *snapshot_name, char *snap_path, char *validsnapbuf)
330 {
331 	int status;
332 	char cmdbuf[MAXPATHLEN];
333 
334 	/* No brand-specific handler */
335 	if (validsnapbuf[0] == '\0')
336 		return (Z_OK);
337 
338 	/* pass args - snapshot_name & snap_path */
339 	if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %s %s", validsnapbuf,
340 	    snapshot_name, snap_path) >= sizeof (cmdbuf)) {
341 		zerror("Command line too long");
342 		return (Z_ERR);
343 	}
344 
345 	/* Run the hook */
346 	status = do_subproc(cmdbuf);
347 	if ((status = subproc_status(gettext("brand-specific validatesnapshot"),
348 	    status, B_FALSE)) != ZONE_SUBPROC_OK)
349 		return (Z_ERR);
350 
351 	return (Z_OK);
352 }
353 
354 /*
355  * Remove the sw inventory file from inside this zonepath that we picked up out
356  * of the snapshot.
357  */
358 static int
359 clean_out_clone()
360 {
361 	int err;
362 	zone_dochandle_t handle;
363 
364 	if ((handle = zonecfg_init_handle()) == NULL) {
365 		zperror(cmd_to_str(CMD_CLONE), B_TRUE);
366 		return (Z_ERR);
367 	}
368 
369 	if ((err = zonecfg_get_handle(target_zone, handle)) != Z_OK) {
370 		errno = err;
371 		zperror(cmd_to_str(CMD_CLONE), B_TRUE);
372 		zonecfg_fini_handle(handle);
373 		return (Z_ERR);
374 	}
375 
376 	zonecfg_rm_detached(handle, B_FALSE);
377 	zonecfg_fini_handle(handle);
378 
379 	return (Z_OK);
380 }
381 
382 /*
383  * Make a ZFS clone on zonepath from snapshot_name.
384  */
385 static int
386 clone_snap(char *snapshot_name, char *zonepath)
387 {
388 	int		res = Z_OK;
389 	int		err;
390 	zfs_handle_t	*zhp;
391 	zfs_handle_t	*clone;
392 	nvlist_t	*props = NULL;
393 
394 	if ((zhp = zfs_open(g_zfs, snapshot_name, ZFS_TYPE_SNAPSHOT)) == NULL)
395 		return (Z_NO_ENTRY);
396 
397 	(void) printf(gettext("Cloning snapshot %s\n"), snapshot_name);
398 
399 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0 ||
400 	    nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARENFS),
401 	    "off") != 0) {
402 		if (props != NULL)
403 			nvlist_free(props);
404 		(void) fprintf(stderr, gettext("could not create ZFS clone "
405 		    "%s: out of memory\n"), zonepath);
406 		return (Z_ERR);
407 	}
408 
409 	err = zfs_clone(zhp, zonepath, props);
410 	zfs_close(zhp);
411 
412 	nvlist_free(props);
413 
414 	if (err != 0)
415 		return (Z_ERR);
416 
417 	/* create the mountpoint if necessary */
418 	if ((clone = zfs_open(g_zfs, zonepath, ZFS_TYPE_DATASET)) == NULL)
419 		return (Z_ERR);
420 
421 	/*
422 	 * The clone has been created so we need to print a diagnostic
423 	 * message if one of the following steps fails for some reason.
424 	 */
425 	if (zfs_mount(clone, NULL, 0) != 0) {
426 		(void) fprintf(stderr, gettext("could not mount ZFS clone "
427 		    "%s\n"), zfs_get_name(clone));
428 		res = Z_ERR;
429 
430 	} else if (clean_out_clone() != Z_OK) {
431 		(void) fprintf(stderr, gettext("could not remove the "
432 		    "software inventory from ZFS clone %s\n"),
433 		    zfs_get_name(clone));
434 		res = Z_ERR;
435 	}
436 
437 	zfs_close(clone);
438 	return (res);
439 }
440 
441 /*
442  * This function takes a zonepath and attempts to determine what the ZFS
443  * file system name (not mountpoint) should be for that path.  We do not
444  * assume that zonepath is an existing directory or ZFS fs since we use
445  * this function as part of the process of creating a new ZFS fs or clone.
446  *
447  * The way this works is that we look at the parent directory of the zonepath
448  * to see if it is a ZFS fs.  If it is, we get the name of that ZFS fs and
449  * append the last component of the zonepath to generate the ZFS name for the
450  * zonepath.  This matches the algorithm that ZFS uses for automatically
451  * mounting a new fs after it is created.
452  *
453  * Although a ZFS fs can be mounted anywhere, we don't worry about handling
454  * all of the complexity that a user could possibly configure with arbitrary
455  * mounts since there is no way to generate a ZFS name from a random path in
456  * the file system.  We only try to handle the automatic mounts that ZFS does
457  * for each file system.  ZFS restricts this so that a new fs must be created
458  * in an existing parent ZFS fs.  It then automatically mounts the new fs
459  * directly under the mountpoint for the parent fs using the last component
460  * of the name as the mountpoint directory.
461  *
462  * For example:
463  *    Name			Mountpoint
464  *    space/eng/dev/test/zone1	/project1/eng/dev/test/zone1
465  *
466  * Return Z_OK if the path mapped to a ZFS file system name, otherwise return
467  * Z_ERR.
468  */
469 static int
470 path2name(char *zonepath, char *zfs_name, int len)
471 {
472 	int		res;
473 	char		*bnm, *dnm, *dname, *bname;
474 	zfs_handle_t	*zhp;
475 	struct stat	stbuf;
476 
477 	/*
478 	 * We need two tmp strings to handle paths directly in / (e.g. /foo)
479 	 * since dirname will overwrite the first char after "/" in this case.
480 	 */
481 	if ((bnm = strdup(zonepath)) == NULL)
482 		return (Z_ERR);
483 
484 	if ((dnm = strdup(zonepath)) == NULL) {
485 		free(bnm);
486 		return (Z_ERR);
487 	}
488 
489 	bname = basename(bnm);
490 	dname = dirname(dnm);
491 
492 	/*
493 	 * This is a quick test to save iterating over all of the zfs datasets
494 	 * on the system (which can be a lot).  If the parent dir is not in a
495 	 * ZFS fs, then we're done.
496 	 */
497 	if (stat(dname, &stbuf) != 0 || !S_ISDIR(stbuf.st_mode) ||
498 	    strcmp(stbuf.st_fstype, MNTTYPE_ZFS) != 0) {
499 		free(bnm);
500 		free(dnm);
501 		return (Z_ERR);
502 	}
503 
504 	/* See if the parent directory is its own ZFS dataset. */
505 	if ((zhp = mount2zhandle(dname)) == NULL) {
506 		/*
507 		 * The parent is not a ZFS dataset so we can't automatically
508 		 * create a dataset on the given path.
509 		 */
510 		free(bnm);
511 		free(dnm);
512 		return (Z_ERR);
513 	}
514 
515 	res = snprintf(zfs_name, len, "%s/%s", zfs_get_name(zhp), bname);
516 
517 	free(bnm);
518 	free(dnm);
519 	zfs_close(zhp);
520 	if (res >= len)
521 		return (Z_ERR);
522 
523 	return (Z_OK);
524 }
525 
526 /*
527  * A ZFS file system iterator call-back function used to determine if the
528  * file system has dependents (snapshots & clones).
529  */
530 /* ARGSUSED */
531 static int
532 has_dependent(zfs_handle_t *zhp, void *data)
533 {
534 	zfs_close(zhp);
535 	return (1);
536 }
537 
538 /*
539  * Given a snapshot name, get the file system path where the snapshot lives.
540  * A snapshot name is of the form fs_name@snap_name.  For example, snapshot
541  * pl/zones/z1@SUNWzone1 would have a path of
542  * /pl/zones/z1/.zfs/snapshot/SUNWzone1.
543  */
544 static int
545 snap2path(char *snap_name, char *path, int len)
546 {
547 	char		*p;
548 	zfs_handle_t	*zhp;
549 	char		mp[ZFS_MAXPROPLEN];
550 
551 	if ((p = strrchr(snap_name, '@')) == NULL)
552 		return (Z_ERR);
553 
554 	/* Get the file system name from the snap_name. */
555 	*p = '\0';
556 	zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_DATASET);
557 	*p = '@';
558 	if (zhp == NULL)
559 		return (Z_ERR);
560 
561 	/* Get the file system mount point. */
562 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, mp, sizeof (mp), NULL, NULL,
563 	    0, B_FALSE) != 0) {
564 		zfs_close(zhp);
565 		return (Z_ERR);
566 	}
567 	zfs_close(zhp);
568 
569 	p++;
570 	if (snprintf(path, len, "%s/.zfs/snapshot/%s", mp, p) >= len)
571 		return (Z_ERR);
572 
573 	return (Z_OK);
574 }
575 
576 /*
577  * This callback function is used to iterate through a snapshot's dependencies
578  * to find a filesystem that is a direct clone of the snapshot being iterated.
579  */
580 static int
581 get_direct_clone(zfs_handle_t *zhp, void *data)
582 {
583 	clone_data_t	*cd = data;
584 	char		origin[ZFS_MAXNAMELEN];
585 	char		ds_path[ZFS_MAXNAMELEN];
586 
587 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
588 		zfs_close(zhp);
589 		return (0);
590 	}
591 
592 	(void) strlcpy(ds_path, zfs_get_name(zhp), sizeof (ds_path));
593 
594 	/* Make sure this is a direct clone of the snapshot we're iterating. */
595 	if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
596 	    NULL, 0, B_FALSE) != 0 || strcmp(origin, cd->snapshot) != 0) {
597 		zfs_close(zhp);
598 		return (0);
599 	}
600 
601 	if (cd->clone_zhp != NULL)
602 		zfs_close(cd->clone_zhp);
603 
604 	cd->clone_zhp = zhp;
605 	return (1);
606 }
607 
608 /*
609  * A ZFS file system iterator call-back function used to determine the clone
610  * to promote.  This function finds the youngest (i.e. last one taken) snapshot
611  * that has a clone.  If found, it returns a reference to that clone in the
612  * callback data.
613  */
614 static int
615 find_clone(zfs_handle_t *zhp, void *data)
616 {
617 	clone_data_t	*cd = data;
618 	time_t		snap_creation;
619 	int		zret = 0;
620 
621 	/* If snapshot has no clones, skip it */
622 	if (zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES) == 0) {
623 		zfs_close(zhp);
624 		return (0);
625 	}
626 
627 	cd->snapshot = zfs_get_name(zhp);
628 
629 	/* Get the creation time of this snapshot */
630 	snap_creation = (time_t)zfs_prop_get_int(zhp, ZFS_PROP_CREATION);
631 
632 	/*
633 	 * If this snapshot's creation time is greater than (i.e. younger than)
634 	 * the current youngest snapshot found, iterate this snapshot to
635 	 * get the right clone.
636 	 */
637 	if (snap_creation >= cd->origin_creation) {
638 		/*
639 		 * Iterate the dependents of this snapshot to find a clone
640 		 * that's a direct dependent.
641 		 */
642 		if ((zret = zfs_iter_dependents(zhp, B_FALSE, get_direct_clone,
643 		    cd)) == -1) {
644 			zfs_close(zhp);
645 			return (1);
646 		} else if (zret == 1) {
647 			/*
648 			 * Found a clone, update the origin_creation time
649 			 * in the callback data.
650 			 */
651 			cd->origin_creation = snap_creation;
652 		}
653 	}
654 
655 	zfs_close(zhp);
656 	return (0);
657 }
658 
659 /*
660  * A ZFS file system iterator call-back function used to remove standalone
661  * snapshots.
662  */
663 /* ARGSUSED */
664 static int
665 rm_snap(zfs_handle_t *zhp, void *data)
666 {
667 	/* If snapshot has clones, something is wrong */
668 	if (zfs_prop_get_int(zhp, ZFS_PROP_NUMCLONES) != 0) {
669 		zfs_close(zhp);
670 		return (1);
671 	}
672 
673 	if (zfs_unmount(zhp, NULL, 0) == 0) {
674 		(void) zfs_destroy(zhp);
675 	}
676 
677 	zfs_close(zhp);
678 	return (0);
679 }
680 
681 /*
682  * A ZFS snapshot iterator call-back function which renames snapshots.
683  */
684 static int
685 rename_snap(zfs_handle_t *zhp, void *data)
686 {
687 	int			res;
688 	zfs_snapshot_data_t	*cbp;
689 	char			template[ZFS_MAXNAMELEN];
690 
691 	cbp = (zfs_snapshot_data_t *)data;
692 
693 	/*
694 	 * When renaming snapshots with the iterator, the iterator can see
695 	 * the same snapshot after we've renamed up in the namespace.  To
696 	 * prevent this we check the count for the number of snapshots we have
697 	 * to rename and stop at that point.
698 	 */
699 	if (cbp->cntr >= cbp->num) {
700 		zfs_close(zhp);
701 		return (0);
702 	}
703 
704 	if (zfs_get_type(zhp) != ZFS_TYPE_SNAPSHOT) {
705 		zfs_close(zhp);
706 		return (0);
707 	}
708 
709 	/* Only rename the snapshots we automatically generate when we clone. */
710 	if (strncmp(zfs_get_name(zhp), cbp->match_name, cbp->len) != 0) {
711 		zfs_close(zhp);
712 		return (0);
713 	}
714 
715 	(void) snprintf(template, sizeof (template), "%s%d", cbp->match_name,
716 	    cbp->max++);
717 
718 	res = (zfs_rename(zhp, template, B_FALSE) != 0);
719 	if (res != 0)
720 		(void) fprintf(stderr, gettext("failed to rename snapshot %s "
721 		    "to %s: %s\n"), zfs_get_name(zhp), template,
722 		    libzfs_error_description(g_zfs));
723 
724 	cbp->cntr++;
725 
726 	zfs_close(zhp);
727 	return (res);
728 }
729 
730 /*
731  * Rename the source dataset's snapshots that are automatically generated when
732  * we clone a zone so that there won't be a name collision when we promote the
733  * cloned dataset.  Once the snapshots have been renamed, then promote the
734  * clone.
735  *
736  * The snapshot rename process gets the highest number on the snapshot names
737  * (the format is zonename@SUNWzoneXX where XX are digits) on both the source
738  * and clone datasets, then renames the source dataset snapshots starting at
739  * the next number.
740  */
741 static int
742 promote_clone(zfs_handle_t *src_zhp, zfs_handle_t *cln_zhp)
743 {
744 	zfs_snapshot_data_t	sd;
745 	char			nm[ZFS_MAXNAMELEN];
746 	char			template[ZFS_MAXNAMELEN];
747 
748 	(void) strlcpy(nm, zfs_get_name(cln_zhp), sizeof (nm));
749 	/*
750 	 * Start by getting the clone's snapshot max which we use
751 	 * during the rename of the original dataset's snapshots.
752 	 */
753 	(void) snprintf(template, sizeof (template), "%s@SUNWzone", nm);
754 	sd.match_name = template;
755 	sd.len = strlen(template);
756 	sd.max = 0;
757 
758 	if (zfs_iter_snapshots(cln_zhp, get_snap_max, &sd) != 0)
759 		return (Z_ERR);
760 
761 	/*
762 	 * Now make sure the source's snapshot max is at least as high as
763 	 * the clone's snapshot max.
764 	 */
765 	(void) snprintf(template, sizeof (template), "%s@SUNWzone",
766 	    zfs_get_name(src_zhp));
767 	sd.match_name = template;
768 	sd.len = strlen(template);
769 	sd.num = 0;
770 
771 	if (zfs_iter_snapshots(src_zhp, get_snap_max, &sd) != 0)
772 		return (Z_ERR);
773 
774 	/*
775 	 * Now rename the source dataset's snapshots so there's no
776 	 * conflict when we promote the clone.
777 	 */
778 	sd.max++;
779 	sd.cntr = 0;
780 	if (zfs_iter_snapshots(src_zhp, rename_snap, &sd) != 0)
781 		return (Z_ERR);
782 
783 	/* close and reopen the clone dataset to get the latest info */
784 	zfs_close(cln_zhp);
785 	if ((cln_zhp = zfs_open(g_zfs, nm, ZFS_TYPE_FILESYSTEM)) == NULL)
786 		return (Z_ERR);
787 
788 	if (zfs_promote(cln_zhp) != 0) {
789 		(void) fprintf(stderr, gettext("failed to promote %s: %s\n"),
790 		    nm, libzfs_error_description(g_zfs));
791 		return (Z_ERR);
792 	}
793 
794 	zfs_close(cln_zhp);
795 	return (Z_OK);
796 }
797 
798 /*
799  * Promote the youngest clone.  That clone will then become the origin of all
800  * of the other clones that were hanging off of the source dataset.
801  */
802 int
803 promote_all_clones(zfs_handle_t *zhp)
804 {
805 	clone_data_t	cd;
806 	char		nm[ZFS_MAXNAMELEN];
807 
808 	cd.clone_zhp = NULL;
809 	cd.origin_creation = 0;
810 	cd.snapshot = NULL;
811 
812 	if (zfs_iter_snapshots(zhp, find_clone, &cd) != 0) {
813 		zfs_close(zhp);
814 		return (Z_ERR);
815 	}
816 
817 	/* Nothing to promote. */
818 	if (cd.clone_zhp == NULL)
819 		return (Z_OK);
820 
821 	/* Found the youngest clone to promote.  Promote it. */
822 	if (promote_clone(zhp, cd.clone_zhp) != 0) {
823 		zfs_close(cd.clone_zhp);
824 		zfs_close(zhp);
825 		return (Z_ERR);
826 	}
827 
828 	/* close and reopen the main dataset to get the latest info */
829 	(void) strlcpy(nm, zfs_get_name(zhp), sizeof (nm));
830 	zfs_close(zhp);
831 	if ((zhp = zfs_open(g_zfs, nm, ZFS_TYPE_FILESYSTEM)) == NULL)
832 		return (Z_ERR);
833 
834 	return (Z_OK);
835 }
836 
837 /*
838  * Clone a pre-existing ZFS snapshot, either by making a direct ZFS clone, if
839  * possible, or by copying the data from the snapshot to the zonepath.
840  */
841 int
842 clone_snapshot_zfs(char *snap_name, char *zonepath, char *validatesnap)
843 {
844 	int	err = Z_OK;
845 	char	clone_name[MAXPATHLEN];
846 	char	snap_path[MAXPATHLEN];
847 
848 	if (snap2path(snap_name, snap_path, sizeof (snap_path)) != Z_OK) {
849 		(void) fprintf(stderr, gettext("unable to find path for %s.\n"),
850 		    snap_name);
851 		return (Z_ERR);
852 	}
853 
854 	if (validate_snapshot(snap_name, snap_path, validatesnap) != Z_OK)
855 		return (Z_NO_ENTRY);
856 
857 	/*
858 	 * The zonepath cannot be ZFS cloned, try to copy the data from
859 	 * within the snapshot to the zonepath.
860 	 */
861 	if (path2name(zonepath, clone_name, sizeof (clone_name)) != Z_OK) {
862 		if ((err = clone_copy(snap_path, zonepath)) == Z_OK)
863 			if (clean_out_clone() != Z_OK)
864 				(void) fprintf(stderr,
865 				    gettext("could not remove the "
866 				    "software inventory from %s\n"), zonepath);
867 
868 		return (err);
869 	}
870 
871 	if ((err = clone_snap(snap_name, clone_name)) != Z_OK) {
872 		if (err != Z_NO_ENTRY) {
873 			/*
874 			 * Cloning the snapshot failed.  Fall back to trying
875 			 * to install the zone by copying from the snapshot.
876 			 */
877 			if ((err = clone_copy(snap_path, zonepath)) == Z_OK)
878 				if (clean_out_clone() != Z_OK)
879 					(void) fprintf(stderr,
880 					    gettext("could not remove the "
881 					    "software inventory from %s\n"),
882 					    zonepath);
883 		} else {
884 			/*
885 			 * The snapshot is unusable for some reason so restore
886 			 * the zone state to configured since we were unable to
887 			 * actually do anything about getting the zone
888 			 * installed.
889 			 */
890 			int tmp;
891 
892 			if ((tmp = zone_set_state(target_zone,
893 			    ZONE_STATE_CONFIGURED)) != Z_OK) {
894 				errno = tmp;
895 				zperror2(target_zone,
896 				    gettext("could not set state"));
897 			}
898 		}
899 	}
900 
901 	return (err);
902 }
903 
904 /*
905  * Attempt to clone a source_zone to a target zonepath by using a ZFS clone.
906  */
907 int
908 clone_zfs(char *source_zonepath, char *zonepath, char *presnapbuf,
909     char *postsnapbuf)
910 {
911 	zfs_handle_t	*zhp;
912 	char		clone_name[MAXPATHLEN];
913 	char		snap_name[MAXPATHLEN];
914 
915 	/*
916 	 * Try to get a zfs handle for the source_zonepath.  If this fails
917 	 * the source_zonepath is not ZFS so return an error.
918 	 */
919 	if ((zhp = mount2zhandle(source_zonepath)) == NULL)
920 		return (Z_ERR);
921 
922 	/*
923 	 * Check if there is a file system already mounted on zonepath.  If so,
924 	 * we can't clone to the path so we should fall back to copying.
925 	 */
926 	if (is_mountpnt(zonepath)) {
927 		zfs_close(zhp);
928 		(void) fprintf(stderr,
929 		    gettext("A file system is already mounted on %s,\n"
930 		    "preventing use of a ZFS clone.\n"), zonepath);
931 		return (Z_ERR);
932 	}
933 
934 	/*
935 	 * Instead of using path2name to get the clone name from the zonepath,
936 	 * we could generate a name from the source zone ZFS name.  However,
937 	 * this would mean we would create the clone under the ZFS fs of the
938 	 * source instead of what the zonepath says.  For example,
939 	 *
940 	 * source_zonepath		zonepath
941 	 * /pl/zones/dev/z1		/pl/zones/deploy/z2
942 	 *
943 	 * We don't want the clone to be under "dev", we want it under
944 	 * "deploy", so that we can leverage the normal attribute inheritance
945 	 * that ZFS provides in the fs hierarchy.
946 	 */
947 	if (path2name(zonepath, clone_name, sizeof (clone_name)) != Z_OK) {
948 		zfs_close(zhp);
949 		return (Z_ERR);
950 	}
951 
952 	if (take_snapshot(zhp, snap_name, sizeof (snap_name), presnapbuf,
953 	    postsnapbuf) != Z_OK) {
954 		zfs_close(zhp);
955 		return (Z_ERR);
956 	}
957 	zfs_close(zhp);
958 
959 	if (clone_snap(snap_name, clone_name) != Z_OK) {
960 		/* Clean up the snapshot we just took. */
961 		if ((zhp = zfs_open(g_zfs, snap_name, ZFS_TYPE_SNAPSHOT))
962 		    != NULL) {
963 			if (zfs_unmount(zhp, NULL, 0) == 0)
964 				(void) zfs_destroy(zhp);
965 			zfs_close(zhp);
966 		}
967 
968 		return (Z_ERR);
969 	}
970 
971 	(void) printf(gettext("Instead of copying, a ZFS clone has been "
972 	    "created for this zone.\n"));
973 
974 	return (Z_OK);
975 }
976 
977 /*
978  * Attempt to create a ZFS file system for the specified zonepath.
979  * We either will successfully create a ZFS file system and get it mounted
980  * on the zonepath or we don't.  The caller doesn't care since a regular
981  * directory is used for the zonepath if no ZFS file system is mounted there.
982  */
983 void
984 create_zfs_zonepath(char *zonepath)
985 {
986 	zfs_handle_t	*zhp;
987 	char		zfs_name[MAXPATHLEN];
988 	nvlist_t	*props = NULL;
989 
990 	if (path2name(zonepath, zfs_name, sizeof (zfs_name)) != Z_OK)
991 		return;
992 
993 	/* Check if the dataset already exists. */
994 	if ((zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_DATASET)) != NULL) {
995 		zfs_close(zhp);
996 		return;
997 	}
998 
999 	if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0 ||
1000 	    nvlist_add_string(props, zfs_prop_to_name(ZFS_PROP_SHARENFS),
1001 	    "off") != 0) {
1002 		if (props != NULL)
1003 			nvlist_free(props);
1004 		(void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
1005 		    "out of memory\n"), zfs_name);
1006 	}
1007 
1008 	if (zfs_create(g_zfs, zfs_name, ZFS_TYPE_FILESYSTEM, props) != 0 ||
1009 	    (zhp = zfs_open(g_zfs, zfs_name, ZFS_TYPE_DATASET)) == NULL) {
1010 		(void) fprintf(stderr, gettext("cannot create ZFS dataset %s: "
1011 		    "%s\n"), zfs_name, libzfs_error_description(g_zfs));
1012 		nvlist_free(props);
1013 		return;
1014 	}
1015 
1016 	nvlist_free(props);
1017 
1018 	if (zfs_mount(zhp, NULL, 0) != 0) {
1019 		(void) fprintf(stderr, gettext("cannot mount ZFS dataset %s: "
1020 		    "%s\n"), zfs_name, libzfs_error_description(g_zfs));
1021 		(void) zfs_destroy(zhp);
1022 	} else {
1023 		if (chmod(zonepath, S_IRWXU) != 0) {
1024 			(void) fprintf(stderr, gettext("file system %s "
1025 			    "successfully created, but chmod %o failed: %s\n"),
1026 			    zfs_name, S_IRWXU, strerror(errno));
1027 			(void) destroy_zfs(zonepath);
1028 		} else {
1029 			(void) printf(gettext("A ZFS file system has been "
1030 			    "created for this zone.\n"));
1031 		}
1032 	}
1033 
1034 	zfs_close(zhp);
1035 }
1036 
1037 /*
1038  * If the zonepath is a ZFS file system, attempt to destroy it.  We return Z_OK
1039  * if we were able to zfs_destroy the zonepath, otherwise we return Z_ERR
1040  * which means the caller should clean up the zonepath in the traditional
1041  * way.
1042  */
1043 int
1044 destroy_zfs(char *zonepath)
1045 {
1046 	zfs_handle_t	*zhp;
1047 	boolean_t	is_clone = B_FALSE;
1048 	char		origin[ZFS_MAXPROPLEN];
1049 
1050 	if ((zhp = mount2zhandle(zonepath)) == NULL)
1051 		return (Z_ERR);
1052 
1053 	if (promote_all_clones(zhp) != 0)
1054 		return (Z_ERR);
1055 
1056 	/* Now cleanup any snapshots remaining. */
1057 	if (zfs_iter_snapshots(zhp, rm_snap, NULL) != 0) {
1058 		zfs_close(zhp);
1059 		return (Z_ERR);
1060 	}
1061 
1062 	/*
1063 	 * We can't destroy the file system if it has still has dependents.
1064 	 * There shouldn't be any at this point, but we'll double check.
1065 	 */
1066 	if (zfs_iter_dependents(zhp, B_TRUE, has_dependent, NULL) != 0) {
1067 		(void) fprintf(stderr, gettext("zfs destroy %s failed: the "
1068 		    "dataset still has dependents\n"), zfs_get_name(zhp));
1069 		zfs_close(zhp);
1070 		return (Z_ERR);
1071 	}
1072 
1073 	/*
1074 	 * This might be a clone.  Try to get the snapshot so we can attempt
1075 	 * to destroy that as well.
1076 	 */
1077 	if (zfs_prop_get(zhp, ZFS_PROP_ORIGIN, origin, sizeof (origin), NULL,
1078 	    NULL, 0, B_FALSE) == 0)
1079 		is_clone = B_TRUE;
1080 
1081 	if (zfs_unmount(zhp, NULL, 0) != 0) {
1082 		(void) fprintf(stderr, gettext("zfs unmount %s failed: %s\n"),
1083 		    zfs_get_name(zhp), libzfs_error_description(g_zfs));
1084 		zfs_close(zhp);
1085 		return (Z_ERR);
1086 	}
1087 
1088 	if (zfs_destroy(zhp) != 0) {
1089 		/*
1090 		 * If the destroy fails for some reason, try to remount
1091 		 * the file system so that we can use "rm -rf" to clean up
1092 		 * instead.
1093 		 */
1094 		(void) fprintf(stderr, gettext("zfs destroy %s failed: %s\n"),
1095 		    zfs_get_name(zhp), libzfs_error_description(g_zfs));
1096 		(void) zfs_mount(zhp, NULL, 0);
1097 		zfs_close(zhp);
1098 		return (Z_ERR);
1099 	}
1100 
1101 	/*
1102 	 * If the zone has ever been moved then the mountpoint dir will not be
1103 	 * cleaned up by the zfs_destroy().  To handle this case try to clean
1104 	 * it up now but don't worry if it fails, that will be normal.
1105 	 */
1106 	(void) rmdir(zonepath);
1107 
1108 	(void) printf(gettext("The ZFS file system for this zone has been "
1109 	    "destroyed.\n"));
1110 
1111 	if (is_clone) {
1112 		zfs_handle_t	*ohp;
1113 
1114 		/*
1115 		 * Try to clean up the snapshot that the clone was taken from.
1116 		 */
1117 		if ((ohp = zfs_open(g_zfs, origin,
1118 		    ZFS_TYPE_SNAPSHOT)) != NULL) {
1119 			if (zfs_iter_dependents(ohp, B_TRUE, has_dependent,
1120 			    NULL) == 0 && zfs_unmount(ohp, NULL, 0) == 0)
1121 				(void) zfs_destroy(ohp);
1122 			zfs_close(ohp);
1123 		}
1124 	}
1125 
1126 	zfs_close(zhp);
1127 	return (Z_OK);
1128 }
1129 
1130 /*
1131  * Return true if the path is its own zfs file system.  We determine this
1132  * by stat-ing the path to see if it is zfs and stat-ing the parent to see
1133  * if it is a different fs.
1134  */
1135 boolean_t
1136 is_zonepath_zfs(char *zonepath)
1137 {
1138 	int res;
1139 	char *path;
1140 	char *parent;
1141 	struct statvfs64 buf1, buf2;
1142 
1143 	if (statvfs64(zonepath, &buf1) != 0)
1144 		return (B_FALSE);
1145 
1146 	if (strcmp(buf1.f_basetype, "zfs") != 0)
1147 		return (B_FALSE);
1148 
1149 	if ((path = strdup(zonepath)) == NULL)
1150 		return (B_FALSE);
1151 
1152 	parent = dirname(path);
1153 	res = statvfs64(parent, &buf2);
1154 	free(path);
1155 
1156 	if (res != 0)
1157 		return (B_FALSE);
1158 
1159 	if (buf1.f_fsid == buf2.f_fsid)
1160 		return (B_FALSE);
1161 
1162 	return (B_TRUE);
1163 }
1164 
1165 /*
1166  * Implement the fast move of a ZFS file system by simply updating the
1167  * mountpoint.  Since it is file system already, we don't have the
1168  * issue of cross-file system copying.
1169  */
1170 int
1171 move_zfs(char *zonepath, char *new_zonepath)
1172 {
1173 	int		ret = Z_ERR;
1174 	zfs_handle_t	*zhp;
1175 
1176 	if ((zhp = mount2zhandle(zonepath)) == NULL)
1177 		return (Z_ERR);
1178 
1179 	if (zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
1180 	    new_zonepath) == 0) {
1181 		/*
1182 		 * Clean up the old mount point.  We ignore any failure since
1183 		 * the zone is already successfully mounted on the new path.
1184 		 */
1185 		(void) rmdir(zonepath);
1186 		ret = Z_OK;
1187 	}
1188 
1189 	zfs_close(zhp);
1190 
1191 	return (ret);
1192 }
1193 
1194 /*
1195  * Validate that the given dataset exists on the system, and that neither it nor
1196  * its children are zvols.
1197  *
1198  * Note that we don't do anything with the 'zoned' property here.  All
1199  * management is done in zoneadmd when the zone is actually rebooted.  This
1200  * allows us to automatically set the zoned property even when a zone is
1201  * rebooted by the administrator.
1202  */
1203 int
1204 verify_datasets(zone_dochandle_t handle)
1205 {
1206 	int return_code = Z_OK;
1207 	struct zone_dstab dstab;
1208 	zfs_handle_t *zhp;
1209 	char propbuf[ZFS_MAXPROPLEN];
1210 	char source[ZFS_MAXNAMELEN];
1211 	zprop_source_t srctype;
1212 
1213 	if (zonecfg_setdsent(handle) != Z_OK) {
1214 		/*
1215 		 * TRANSLATION_NOTE
1216 		 * zfs and dataset are literals that should not be translated.
1217 		 */
1218 		(void) fprintf(stderr, gettext("could not verify zfs datasets: "
1219 		    "unable to enumerate datasets\n"));
1220 		return (Z_ERR);
1221 	}
1222 
1223 	while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
1224 
1225 		if ((zhp = zfs_open(g_zfs, dstab.zone_dataset_name,
1226 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
1227 			(void) fprintf(stderr, gettext("could not verify zfs "
1228 			    "dataset %s: %s\n"), dstab.zone_dataset_name,
1229 			    libzfs_error_description(g_zfs));
1230 			return_code = Z_ERR;
1231 			continue;
1232 		}
1233 
1234 		if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf,
1235 		    sizeof (propbuf), &srctype, source,
1236 		    sizeof (source), 0) == 0 &&
1237 		    (srctype == ZPROP_SRC_INHERITED)) {
1238 			(void) fprintf(stderr, gettext("could not verify zfs "
1239 			    "dataset %s: mountpoint cannot be inherited\n"),
1240 			    dstab.zone_dataset_name);
1241 			return_code = Z_ERR;
1242 			zfs_close(zhp);
1243 			continue;
1244 		}
1245 
1246 		if (zfs_get_type(zhp) == ZFS_TYPE_VOLUME) {
1247 			(void) fprintf(stderr, gettext("cannot verify zfs "
1248 			    "dataset %s: volumes cannot be specified as a "
1249 			    "zone dataset resource\n"),
1250 			    dstab.zone_dataset_name);
1251 			return_code = Z_ERR;
1252 		}
1253 
1254 		if (zfs_iter_children(zhp, check_zvol, NULL) != 0)
1255 			return_code = Z_ERR;
1256 
1257 		zfs_close(zhp);
1258 	}
1259 	(void) zonecfg_enddsent(handle);
1260 
1261 	return (return_code);
1262 }
1263 
1264 /*
1265  * Verify that the ZFS dataset exists, and its mountpoint
1266  * property is set to "legacy".
1267  */
1268 int
1269 verify_fs_zfs(struct zone_fstab *fstab)
1270 {
1271 	zfs_handle_t *zhp;
1272 	char propbuf[ZFS_MAXPROPLEN];
1273 
1274 	if ((zhp = zfs_open(g_zfs, fstab->zone_fs_special,
1275 	    ZFS_TYPE_DATASET)) == NULL) {
1276 		(void) fprintf(stderr, gettext("could not verify fs %s: "
1277 		    "could not access zfs dataset '%s'\n"),
1278 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1279 		return (Z_ERR);
1280 	}
1281 
1282 	if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
1283 		(void) fprintf(stderr, gettext("cannot verify fs %s: "
1284 		    "'%s' is not a file system\n"),
1285 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1286 		zfs_close(zhp);
1287 		return (Z_ERR);
1288 	}
1289 
1290 	if (zfs_prop_get(zhp, ZFS_PROP_MOUNTPOINT, propbuf, sizeof (propbuf),
1291 	    NULL, NULL, 0, 0) != 0 || strcmp(propbuf, "legacy") != 0) {
1292 		(void) fprintf(stderr, gettext("could not verify fs %s: "
1293 		    "zfs '%s' mountpoint is not \"legacy\"\n"),
1294 		    fstab->zone_fs_dir, fstab->zone_fs_special);
1295 		zfs_close(zhp);
1296 		return (Z_ERR);
1297 	}
1298 
1299 	zfs_close(zhp);
1300 	return (Z_OK);
1301 }
1302 
1303 int
1304 init_zfs(void)
1305 {
1306 	if ((g_zfs = libzfs_init()) == NULL) {
1307 		(void) fprintf(stderr, gettext("failed to initialize ZFS "
1308 		    "library\n"));
1309 		return (Z_ERR);
1310 	}
1311 
1312 	return (Z_OK);
1313 }
1314