xref: /titanic_41/usr/src/lib/libzfs/common/libzfs_sendrecv.c (revision 7be238fce69ba74b2163fc0ea898dfdc01a4aa22)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <assert.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <libdevinfo.h>
31 #include <libintl.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <strings.h>
35 #include <unistd.h>
36 #include <stddef.h>
37 #include <fcntl.h>
38 #include <sys/mount.h>
39 #include <sys/mntent.h>
40 #include <sys/mnttab.h>
41 #include <sys/avl.h>
42 #include <stddef.h>
43 
44 #include <libzfs.h>
45 
46 #include "zfs_namecheck.h"
47 #include "zfs_prop.h"
48 #include "libzfs_impl.h"
49 
50 #include <fletcher.c> /* XXX */
51 
52 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t,
53     int, avl_tree_t *, char **);
54 
55 /*
56  * Routines for dealing with the AVL tree of fs-nvlists
57  */
58 typedef struct fsavl_node {
59 	avl_node_t fn_node;
60 	nvlist_t *fn_nvfs;
61 	char *fn_snapname;
62 	uint64_t fn_guid;
63 } fsavl_node_t;
64 
65 static int
66 fsavl_compare(const void *arg1, const void *arg2)
67 {
68 	const fsavl_node_t *fn1 = arg1;
69 	const fsavl_node_t *fn2 = arg2;
70 
71 	if (fn1->fn_guid > fn2->fn_guid)
72 		return (+1);
73 	else if (fn1->fn_guid < fn2->fn_guid)
74 		return (-1);
75 	else
76 		return (0);
77 }
78 
79 /*
80  * Given the GUID of a snapshot, find its containing filesystem and
81  * (optionally) name.
82  */
83 static nvlist_t *
84 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
85 {
86 	fsavl_node_t fn_find;
87 	fsavl_node_t *fn;
88 
89 	fn_find.fn_guid = snapguid;
90 
91 	fn = avl_find(avl, &fn_find, NULL);
92 	if (fn) {
93 		if (snapname)
94 			*snapname = fn->fn_snapname;
95 		return (fn->fn_nvfs);
96 	}
97 	return (NULL);
98 }
99 
100 static void
101 fsavl_destroy(avl_tree_t *avl)
102 {
103 	fsavl_node_t *fn;
104 	void *cookie;
105 
106 	if (avl == NULL)
107 		return;
108 
109 	cookie = NULL;
110 	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
111 		free(fn);
112 	avl_destroy(avl);
113 	free(avl);
114 }
115 
116 /*
117  * Given an nvlist, produce an avl tree of snapshots, ordered by guid
118  */
119 static avl_tree_t *
120 fsavl_create(nvlist_t *fss)
121 {
122 	avl_tree_t *fsavl;
123 	nvpair_t *fselem = NULL;
124 
125 	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
126 		return (NULL);
127 
128 	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
129 	    offsetof(fsavl_node_t, fn_node));
130 
131 	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
132 		nvlist_t *nvfs, *snaps;
133 		nvpair_t *snapelem = NULL;
134 
135 		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
136 		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
137 
138 		while ((snapelem =
139 		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
140 			fsavl_node_t *fn;
141 			uint64_t guid;
142 
143 			VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
144 			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
145 				fsavl_destroy(fsavl);
146 				return (NULL);
147 			}
148 			fn->fn_nvfs = nvfs;
149 			fn->fn_snapname = nvpair_name(snapelem);
150 			fn->fn_guid = guid;
151 
152 			/*
153 			 * Note: if there are multiple snaps with the
154 			 * same GUID, we ignore all but one.
155 			 */
156 			if (avl_find(fsavl, fn, NULL) == NULL)
157 				avl_add(fsavl, fn);
158 			else
159 				free(fn);
160 		}
161 	}
162 
163 	return (fsavl);
164 }
165 
166 /*
167  * Routines for dealing with the giant nvlist of fs-nvlists, etc.
168  */
169 typedef struct send_data {
170 	uint64_t parent_fromsnap_guid;
171 	nvlist_t *parent_snaps;
172 	nvlist_t *fss;
173 	nvlist_t *snapprops;
174 	const char *fromsnap;
175 	const char *tosnap;
176 
177 	/*
178 	 * The header nvlist is of the following format:
179 	 * {
180 	 *   "tosnap" -> string
181 	 *   "fromsnap" -> string (if incremental)
182 	 *   "fss" -> {
183 	 *	id -> {
184 	 *
185 	 *	 "name" -> string (full name; for debugging)
186 	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
187 	 *
188 	 *	 "props" -> { name -> value (only if set here) }
189 	 *	 "snaps" -> { name (lastname) -> number (guid) }
190 	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
191 	 *
192 	 *	 "origin" -> number (guid) (if clone)
193 	 *	 "sent" -> boolean (not on-disk)
194 	 *	}
195 	 *   }
196 	 * }
197 	 *
198 	 */
199 } send_data_t;
200 
201 static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
202 
203 static int
204 send_iterate_snap(zfs_handle_t *zhp, void *arg)
205 {
206 	send_data_t *sd = arg;
207 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
208 	char *snapname;
209 	nvlist_t *nv;
210 
211 	snapname = strrchr(zhp->zfs_name, '@')+1;
212 
213 	VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
214 	/*
215 	 * NB: if there is no fromsnap here (it's a newly created fs in
216 	 * an incremental replication), we will substitute the tosnap.
217 	 */
218 	if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
219 	    (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
220 	    strcmp(snapname, sd->tosnap) == 0)) {
221 		sd->parent_fromsnap_guid = guid;
222 	}
223 
224 	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
225 	send_iterate_prop(zhp, nv);
226 	VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
227 	nvlist_free(nv);
228 
229 	zfs_close(zhp);
230 	return (0);
231 }
232 
233 static void
234 send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
235 {
236 	nvpair_t *elem = NULL;
237 
238 	while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
239 		char *propname = nvpair_name(elem);
240 		zfs_prop_t prop = zfs_name_to_prop(propname);
241 		nvlist_t *propnv;
242 
243 		assert(zfs_prop_user(propname) || prop != ZPROP_INVAL);
244 
245 		if (!zfs_prop_user(propname) && zfs_prop_readonly(prop))
246 			continue;
247 
248 		verify(nvpair_value_nvlist(elem, &propnv) == 0);
249 		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
250 		    prop == ZFS_PROP_REFQUOTA ||
251 		    prop == ZFS_PROP_REFRESERVATION) {
252 			/* these guys are modifyable, but have no source */
253 			uint64_t value;
254 			verify(nvlist_lookup_uint64(propnv,
255 			    ZPROP_VALUE, &value) == 0);
256 			if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
257 				continue;
258 		} else {
259 			char *source;
260 			if (nvlist_lookup_string(propnv,
261 			    ZPROP_SOURCE, &source) != 0)
262 				continue;
263 			if (strcmp(source, zhp->zfs_name) != 0)
264 				continue;
265 		}
266 
267 		if (zfs_prop_user(propname) ||
268 		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
269 			char *value;
270 			verify(nvlist_lookup_string(propnv,
271 			    ZPROP_VALUE, &value) == 0);
272 			VERIFY(0 == nvlist_add_string(nv, propname, value));
273 		} else {
274 			uint64_t value;
275 			verify(nvlist_lookup_uint64(propnv,
276 			    ZPROP_VALUE, &value) == 0);
277 			VERIFY(0 == nvlist_add_uint64(nv, propname, value));
278 		}
279 	}
280 }
281 
282 /*
283  * recursively generate nvlists describing datasets.  See comment
284  * for the data structure send_data_t above for description of contents
285  * of the nvlist.
286  */
287 static int
288 send_iterate_fs(zfs_handle_t *zhp, void *arg)
289 {
290 	send_data_t *sd = arg;
291 	nvlist_t *nvfs, *nv;
292 	int rv;
293 	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
294 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
295 	char guidstring[64];
296 
297 	VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
298 	VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
299 	VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
300 	    sd->parent_fromsnap_guid));
301 
302 	if (zhp->zfs_dmustats.dds_origin[0]) {
303 		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
304 		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
305 		if (origin == NULL)
306 			return (-1);
307 		VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
308 		    origin->zfs_dmustats.dds_guid));
309 	}
310 
311 	/* iterate over props */
312 	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
313 	send_iterate_prop(zhp, nv);
314 	VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
315 	nvlist_free(nv);
316 
317 	/* iterate over snaps, and set sd->parent_fromsnap_guid */
318 	sd->parent_fromsnap_guid = 0;
319 	VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
320 	VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
321 	(void) zfs_iter_snapshots(zhp, send_iterate_snap, sd);
322 	VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
323 	VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
324 	nvlist_free(sd->parent_snaps);
325 	nvlist_free(sd->snapprops);
326 
327 	/* add this fs to nvlist */
328 	(void) snprintf(guidstring, sizeof (guidstring),
329 	    "0x%llx", (longlong_t)guid);
330 	VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
331 	nvlist_free(nvfs);
332 
333 	/* iterate over children */
334 	rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
335 
336 	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
337 
338 	zfs_close(zhp);
339 	return (rv);
340 }
341 
342 static int
343 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
344     const char *tosnap, nvlist_t **nvlp, avl_tree_t **avlp)
345 {
346 	zfs_handle_t *zhp;
347 	send_data_t sd = { 0 };
348 	int error;
349 
350 	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
351 	if (zhp == NULL)
352 		return (EZFS_BADTYPE);
353 
354 	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
355 	sd.fromsnap = fromsnap;
356 	sd.tosnap = tosnap;
357 
358 	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
359 		nvlist_free(sd.fss);
360 		if (avlp != NULL)
361 			*avlp = NULL;
362 		*nvlp = NULL;
363 		return (error);
364 	}
365 
366 	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
367 		nvlist_free(sd.fss);
368 		*nvlp = NULL;
369 		return (EZFS_NOMEM);
370 	}
371 
372 	*nvlp = sd.fss;
373 	return (0);
374 }
375 
376 /*
377  * Routines for dealing with the sorted snapshot functionality
378  */
379 typedef struct zfs_node {
380 	zfs_handle_t	*zn_handle;
381 	avl_node_t	zn_avlnode;
382 } zfs_node_t;
383 
384 static int
385 zfs_sort_snaps(zfs_handle_t *zhp, void *data)
386 {
387 	avl_tree_t *avl = data;
388 	zfs_node_t *node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t));
389 
390 	node->zn_handle = zhp;
391 	avl_add(avl, node);
392 	return (0);
393 }
394 
395 /* ARGSUSED */
396 static int
397 zfs_snapshot_compare(const void *larg, const void *rarg)
398 {
399 	zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
400 	zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
401 	uint64_t lcreate, rcreate;
402 
403 	/*
404 	 * Sort them according to creation time.  We use the hidden
405 	 * CREATETXG property to get an absolute ordering of snapshots.
406 	 */
407 	lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG);
408 	rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG);
409 
410 	if (lcreate < rcreate)
411 		return (-1);
412 	else if (lcreate > rcreate)
413 		return (+1);
414 	else
415 		return (0);
416 }
417 
418 static int
419 zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data)
420 {
421 	int ret = 0;
422 	zfs_node_t *node;
423 	avl_tree_t avl;
424 	void *cookie = NULL;
425 
426 	avl_create(&avl, zfs_snapshot_compare,
427 	    sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode));
428 
429 	ret = zfs_iter_snapshots(zhp, zfs_sort_snaps, &avl);
430 
431 	for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node))
432 		ret |= callback(node->zn_handle, data);
433 
434 	while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL)
435 		free(node);
436 
437 	avl_destroy(&avl);
438 
439 	return (ret);
440 }
441 
442 /*
443  * Routines specific to "zfs send"
444  */
445 typedef struct send_dump_data {
446 	/* these are all just the short snapname (the part after the @) */
447 	const char *fromsnap;
448 	const char *tosnap;
449 	char lastsnap[ZFS_MAXNAMELEN];
450 	boolean_t seenfrom, seento, replicate, doall, fromorigin;
451 	boolean_t verbose;
452 	int outfd;
453 	boolean_t err;
454 	nvlist_t *fss;
455 	avl_tree_t *fsavl;
456 } send_dump_data_t;
457 
458 /*
459  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
460  * NULL) to the file descriptor specified by outfd.
461  */
462 static int
463 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin,
464     int outfd)
465 {
466 	zfs_cmd_t zc = { 0 };
467 	libzfs_handle_t *hdl = zhp->zfs_hdl;
468 
469 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
470 	assert(fromsnap == NULL || fromsnap[0] == '\0' || !fromorigin);
471 
472 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
473 	if (fromsnap)
474 		(void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_value));
475 	zc.zc_cookie = outfd;
476 	zc.zc_obj = fromorigin;
477 
478 	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) {
479 		char errbuf[1024];
480 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
481 		    "warning: cannot send '%s'"), zhp->zfs_name);
482 
483 		switch (errno) {
484 
485 		case EXDEV:
486 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
487 			    "not an earlier snapshot from the same fs"));
488 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
489 
490 		case ENOENT:
491 			if (zfs_dataset_exists(hdl, zc.zc_name,
492 			    ZFS_TYPE_SNAPSHOT)) {
493 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
494 				    "incremental source (@%s) does not exist"),
495 				    zc.zc_value);
496 			}
497 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
498 
499 		case EDQUOT:
500 		case EFBIG:
501 		case EIO:
502 		case ENOLINK:
503 		case ENOSPC:
504 		case ENOSTR:
505 		case ENXIO:
506 		case EPIPE:
507 		case ERANGE:
508 		case EFAULT:
509 		case EROFS:
510 			zfs_error_aux(hdl, strerror(errno));
511 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
512 
513 		default:
514 			return (zfs_standard_error(hdl, errno, errbuf));
515 		}
516 	}
517 
518 	return (0);
519 }
520 
521 static int
522 dump_snapshot(zfs_handle_t *zhp, void *arg)
523 {
524 	send_dump_data_t *sdd = arg;
525 	const char *thissnap;
526 	int err;
527 
528 	thissnap = strchr(zhp->zfs_name, '@') + 1;
529 
530 	if (sdd->fromsnap && !sdd->seenfrom &&
531 	    strcmp(sdd->fromsnap, thissnap) == 0) {
532 		sdd->seenfrom = B_TRUE;
533 		(void) strcpy(sdd->lastsnap, thissnap);
534 		zfs_close(zhp);
535 		return (0);
536 	}
537 
538 	if (sdd->seento || !sdd->seenfrom) {
539 		zfs_close(zhp);
540 		return (0);
541 	}
542 
543 	/* send it */
544 	if (sdd->verbose) {
545 		(void) fprintf(stderr, "sending from @%s to %s\n",
546 		    sdd->lastsnap, zhp->zfs_name);
547 	}
548 
549 	err = dump_ioctl(zhp, sdd->lastsnap,
550 	    sdd->lastsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
551 	    sdd->outfd);
552 
553 	if (!sdd->seento && strcmp(sdd->tosnap, thissnap) == 0)
554 		sdd->seento = B_TRUE;
555 
556 	(void) strcpy(sdd->lastsnap, thissnap);
557 	zfs_close(zhp);
558 	return (err);
559 }
560 
561 static int
562 dump_filesystem(zfs_handle_t *zhp, void *arg)
563 {
564 	int rv = 0;
565 	send_dump_data_t *sdd = arg;
566 	boolean_t missingfrom = B_FALSE;
567 	zfs_cmd_t zc = { 0 };
568 
569 	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
570 	    zhp->zfs_name, sdd->tosnap);
571 	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
572 		(void) fprintf(stderr, "WARNING: "
573 		    "could not send %s@%s: does not exist\n",
574 		    zhp->zfs_name, sdd->tosnap);
575 		sdd->err = B_TRUE;
576 		return (0);
577 	}
578 
579 	if (sdd->replicate && sdd->fromsnap) {
580 		/*
581 		 * If this fs does not have fromsnap, and we're doing
582 		 * recursive, we need to send a full stream from the
583 		 * beginning (or an incremental from the origin if this
584 		 * is a clone).  If we're doing non-recursive, then let
585 		 * them get the error.
586 		 */
587 		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
588 		    zhp->zfs_name, sdd->fromsnap);
589 		if (ioctl(zhp->zfs_hdl->libzfs_fd,
590 		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
591 			missingfrom = B_TRUE;
592 		}
593 	}
594 
595 	if (sdd->doall) {
596 		sdd->seenfrom = sdd->seento = sdd->lastsnap[0] = 0;
597 		if (sdd->fromsnap == NULL || missingfrom)
598 			sdd->seenfrom = B_TRUE;
599 
600 		rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
601 		if (!sdd->seenfrom) {
602 			(void) fprintf(stderr,
603 			    "WARNING: could not send %s@%s:\n"
604 			    "incremental source (%s@%s) does not exist\n",
605 			    zhp->zfs_name, sdd->tosnap,
606 			    zhp->zfs_name, sdd->fromsnap);
607 			sdd->err = B_TRUE;
608 		} else if (!sdd->seento) {
609 			if (sdd->fromsnap) {
610 				(void) fprintf(stderr,
611 				    "WARNING: could not send %s@%s:\n"
612 				    "incremental source (%s@%s) "
613 				    "is not earlier than it\n",
614 				    zhp->zfs_name, sdd->tosnap,
615 				    zhp->zfs_name, sdd->fromsnap);
616 			} else {
617 				(void) fprintf(stderr, "WARNING: "
618 				    "could not send %s@%s: does not exist\n",
619 				    zhp->zfs_name, sdd->tosnap);
620 			}
621 			sdd->err = B_TRUE;
622 		}
623 	} else {
624 		zfs_handle_t *snapzhp;
625 		char snapname[ZFS_MAXNAMELEN];
626 
627 		(void) snprintf(snapname, sizeof (snapname), "%s@%s",
628 		    zfs_get_name(zhp), sdd->tosnap);
629 		snapzhp = zfs_open(zhp->zfs_hdl, snapname, ZFS_TYPE_SNAPSHOT);
630 		if (snapzhp == NULL) {
631 			rv = -1;
632 		} else {
633 			rv = dump_ioctl(snapzhp,
634 			    missingfrom ? NULL : sdd->fromsnap,
635 			    sdd->fromorigin || missingfrom,
636 			    sdd->outfd);
637 			sdd->seento = B_TRUE;
638 			zfs_close(snapzhp);
639 		}
640 	}
641 
642 	return (rv);
643 }
644 
645 static int
646 dump_filesystems(zfs_handle_t *rzhp, void *arg)
647 {
648 	send_dump_data_t *sdd = arg;
649 	nvpair_t *fspair;
650 	boolean_t needagain, progress;
651 
652 	if (!sdd->replicate)
653 		return (dump_filesystem(rzhp, sdd));
654 
655 again:
656 	needagain = progress = B_FALSE;
657 	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
658 	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
659 		nvlist_t *fslist;
660 		char *fsname;
661 		zfs_handle_t *zhp;
662 		int err;
663 		uint64_t origin_guid = 0;
664 		nvlist_t *origin_nv;
665 
666 		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
667 		if (nvlist_lookup_boolean(fslist, "sent") == 0)
668 			continue;
669 
670 		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
671 		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
672 
673 		origin_nv = fsavl_find(sdd->fsavl, origin_guid, NULL);
674 		if (origin_nv &&
675 		    nvlist_lookup_boolean(origin_nv, "sent") == ENOENT) {
676 			/*
677 			 * origin has not been sent yet;
678 			 * skip this clone.
679 			 */
680 			needagain = B_TRUE;
681 			continue;
682 		}
683 
684 		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
685 		if (zhp == NULL)
686 			return (-1);
687 		err = dump_filesystem(zhp, sdd);
688 		VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
689 		progress = B_TRUE;
690 		zfs_close(zhp);
691 		if (err)
692 			return (err);
693 	}
694 	if (needagain) {
695 		assert(progress);
696 		goto again;
697 	}
698 	return (0);
699 }
700 
701 /*
702  * Generate a send stream for the dataset identified by the argument zhp.
703  *
704  * The content of the send stream is the snapshot identified by
705  * 'tosnap'.  Incremental streams are requested in two ways:
706  *     - from the snapshot identified by "fromsnap" (if non-null) or
707  *     - from the origin of the dataset identified by zhp, which must
708  *	 be a clone.  In this case, "fromsnap" is null and "fromorigin"
709  *	 is TRUE.
710  *
711  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
712  * uses a special header (with a version field of DMU_BACKUP_HEADER_VERSION)
713  * if "replicate" is set.  If "doall" is set, dump all the intermediate
714  * snapshots. The DMU_BACKUP_HEADER_VERSION header is used in the "doall"
715  * case too.
716  */
717 int
718 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
719     boolean_t replicate, boolean_t doall, boolean_t fromorigin,
720     boolean_t verbose, int outfd)
721 {
722 	char errbuf[1024];
723 	send_dump_data_t sdd = { 0 };
724 	int err;
725 	nvlist_t *fss = NULL;
726 	avl_tree_t *fsavl = NULL;
727 
728 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
729 	    "cannot send '%s'"), zhp->zfs_name);
730 
731 	if (fromsnap && fromsnap[0] == '\0') {
732 		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
733 		    "zero-length incremental source"));
734 		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
735 	}
736 
737 	if (replicate || doall) {
738 		dmu_replay_record_t drr = { 0 };
739 		char *packbuf = NULL;
740 		size_t buflen = 0;
741 		zio_cksum_t zc = { 0 };
742 
743 		assert(fromsnap || doall);
744 
745 		if (replicate) {
746 			nvlist_t *hdrnv;
747 
748 			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
749 			if (fromsnap) {
750 				VERIFY(0 == nvlist_add_string(hdrnv,
751 				    "fromsnap", fromsnap));
752 			}
753 			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
754 
755 			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
756 			    fromsnap, tosnap, &fss, &fsavl);
757 			if (err)
758 				return (err);
759 			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
760 			err = nvlist_pack(hdrnv, &packbuf, &buflen,
761 			    NV_ENCODE_XDR, 0);
762 			nvlist_free(hdrnv);
763 			if (err) {
764 				fsavl_destroy(fsavl);
765 				nvlist_free(fss);
766 				return (zfs_standard_error(zhp->zfs_hdl,
767 				    err, errbuf));
768 			}
769 		}
770 
771 		/* write first begin record */
772 		drr.drr_type = DRR_BEGIN;
773 		drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
774 		drr.drr_u.drr_begin.drr_version = DMU_BACKUP_HEADER_VERSION;
775 		(void) snprintf(drr.drr_u.drr_begin.drr_toname,
776 		    sizeof (drr.drr_u.drr_begin.drr_toname),
777 		    "%s@%s", zhp->zfs_name, tosnap);
778 		drr.drr_payloadlen = buflen;
779 		fletcher_4_incremental_native(&drr, sizeof (drr), &zc);
780 		err = write(outfd, &drr, sizeof (drr));
781 
782 		/* write header nvlist */
783 		if (err != -1) {
784 			fletcher_4_incremental_native(packbuf, buflen, &zc);
785 			err = write(outfd, packbuf, buflen);
786 		}
787 		free(packbuf);
788 		if (err == -1) {
789 			fsavl_destroy(fsavl);
790 			nvlist_free(fss);
791 			return (zfs_standard_error(zhp->zfs_hdl,
792 			    errno, errbuf));
793 		}
794 
795 		/* write end record */
796 		if (err != -1) {
797 			bzero(&drr, sizeof (drr));
798 			drr.drr_type = DRR_END;
799 			drr.drr_u.drr_end.drr_checksum = zc;
800 			err = write(outfd, &drr, sizeof (drr));
801 			if (err == -1) {
802 				fsavl_destroy(fsavl);
803 				nvlist_free(fss);
804 				return (zfs_standard_error(zhp->zfs_hdl,
805 				    errno, errbuf));
806 			}
807 		}
808 	}
809 
810 	/* dump each stream */
811 	sdd.fromsnap = fromsnap;
812 	sdd.tosnap = tosnap;
813 	sdd.outfd = outfd;
814 	sdd.replicate = replicate;
815 	sdd.doall = doall;
816 	sdd.fromorigin = fromorigin;
817 	sdd.fss = fss;
818 	sdd.fsavl = fsavl;
819 	sdd.verbose = verbose;
820 	err = dump_filesystems(zhp, &sdd);
821 	fsavl_destroy(fsavl);
822 	nvlist_free(fss);
823 
824 	if (replicate || doall) {
825 		/*
826 		 * write final end record.  NB: want to do this even if
827 		 * there was some error, because it might not be totally
828 		 * failed.
829 		 */
830 		dmu_replay_record_t drr = { 0 };
831 		drr.drr_type = DRR_END;
832 		if (write(outfd, &drr, sizeof (drr)) == -1) {
833 			return (zfs_standard_error(zhp->zfs_hdl,
834 			    errno, errbuf));
835 		}
836 	}
837 
838 	return (err || sdd.err);
839 }
840 
841 /*
842  * Routines specific to "zfs recv"
843  */
844 
845 static int
846 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
847     boolean_t byteswap, zio_cksum_t *zc)
848 {
849 	char *cp = buf;
850 	int rv;
851 	int len = ilen;
852 
853 	do {
854 		rv = read(fd, cp, len);
855 		cp += rv;
856 		len -= rv;
857 	} while (rv > 0);
858 
859 	if (rv < 0 || len != 0) {
860 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
861 		    "failed to read from stream"));
862 		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
863 		    "cannot receive")));
864 	}
865 
866 	if (zc) {
867 		if (byteswap)
868 			fletcher_4_incremental_byteswap(buf, ilen, zc);
869 		else
870 			fletcher_4_incremental_native(buf, ilen, zc);
871 	}
872 	return (0);
873 }
874 
875 static int
876 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
877     boolean_t byteswap, zio_cksum_t *zc)
878 {
879 	char *buf;
880 	int err;
881 
882 	buf = zfs_alloc(hdl, len);
883 	if (buf == NULL)
884 		return (ENOMEM);
885 
886 	err = recv_read(hdl, fd, buf, len, byteswap, zc);
887 	if (err != 0) {
888 		free(buf);
889 		return (err);
890 	}
891 
892 	err = nvlist_unpack(buf, len, nvp, 0);
893 	free(buf);
894 	if (err != 0) {
895 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
896 		    "stream (malformed nvlist)"));
897 		return (EINVAL);
898 	}
899 	return (0);
900 }
901 
902 static int
903 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
904     int baselen, char *newname, recvflags_t flags)
905 {
906 	static int seq;
907 	zfs_cmd_t zc = { 0 };
908 	int err;
909 	prop_changelist_t *clp;
910 	zfs_handle_t *zhp;
911 
912 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
913 	if (zhp == NULL)
914 		return (-1);
915 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
916 	    flags.force ? MS_FORCE : 0);
917 	zfs_close(zhp);
918 	if (clp == NULL)
919 		return (-1);
920 	err = changelist_prefix(clp);
921 	if (err)
922 		return (err);
923 
924 	zc.zc_objset_type = DMU_OST_ZFS;
925 	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
926 
927 	if (tryname) {
928 		(void) strcpy(newname, tryname);
929 
930 		(void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
931 
932 		if (flags.verbose) {
933 			(void) printf("attempting rename %s to %s\n",
934 			    zc.zc_name, zc.zc_value);
935 		}
936 		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
937 		if (err == 0)
938 			changelist_rename(clp, name, tryname);
939 	} else {
940 		err = ENOENT;
941 	}
942 
943 	if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) {
944 		seq++;
945 
946 		(void) strncpy(newname, name, baselen);
947 		(void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen,
948 		    "recv-%u-%u", getpid(), seq);
949 		(void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
950 
951 		if (flags.verbose) {
952 			(void) printf("failed - trying rename %s to %s\n",
953 			    zc.zc_name, zc.zc_value);
954 		}
955 		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
956 		if (err == 0)
957 			changelist_rename(clp, name, newname);
958 		if (err && flags.verbose) {
959 			(void) printf("failed (%u) - "
960 			    "will try again on next pass\n", errno);
961 		}
962 		err = EAGAIN;
963 	} else if (flags.verbose) {
964 		if (err == 0)
965 			(void) printf("success\n");
966 		else
967 			(void) printf("failed (%u)\n", errno);
968 	}
969 
970 	(void) changelist_postfix(clp);
971 	changelist_free(clp);
972 
973 	return (err);
974 }
975 
976 static int
977 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
978     char *newname, recvflags_t flags)
979 {
980 	zfs_cmd_t zc = { 0 };
981 	int err = 0;
982 	prop_changelist_t *clp;
983 	zfs_handle_t *zhp;
984 	boolean_t defer = B_FALSE;
985 	int spa_version;
986 
987 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
988 	if (zhp == NULL)
989 		return (-1);
990 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
991 	    flags.force ? MS_FORCE : 0);
992 	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
993 	    zfs_spa_version(zhp, &spa_version) == 0 &&
994 	    spa_version >= SPA_VERSION_USERREFS)
995 		defer = B_TRUE;
996 	zfs_close(zhp);
997 	if (clp == NULL)
998 		return (-1);
999 	err = changelist_prefix(clp);
1000 	if (err)
1001 		return (err);
1002 
1003 	zc.zc_objset_type = DMU_OST_ZFS;
1004 	zc.zc_defer_destroy = defer;
1005 	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1006 
1007 	if (flags.verbose)
1008 		(void) printf("attempting destroy %s\n", zc.zc_name);
1009 	err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
1010 	if (err == 0) {
1011 		if (flags.verbose)
1012 			(void) printf("success\n");
1013 		changelist_remove(clp, zc.zc_name);
1014 	}
1015 
1016 	(void) changelist_postfix(clp);
1017 	changelist_free(clp);
1018 
1019 	/*
1020 	 * Deferred destroy should always succeed. Since we can't tell
1021 	 * if it destroyed the dataset or just marked it for deferred
1022 	 * destroy, always do the rename just in case.
1023 	 */
1024 	if (err != 0 || defer)
1025 		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
1026 
1027 	return (err);
1028 }
1029 
1030 typedef struct guid_to_name_data {
1031 	uint64_t guid;
1032 	char *name;
1033 } guid_to_name_data_t;
1034 
1035 static int
1036 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
1037 {
1038 	guid_to_name_data_t *gtnd = arg;
1039 	int err;
1040 
1041 	if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
1042 		(void) strcpy(gtnd->name, zhp->zfs_name);
1043 		return (EEXIST);
1044 	}
1045 	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
1046 	zfs_close(zhp);
1047 	return (err);
1048 }
1049 
1050 static int
1051 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
1052     char *name)
1053 {
1054 	/* exhaustive search all local snapshots */
1055 	guid_to_name_data_t gtnd;
1056 	int err = 0;
1057 	zfs_handle_t *zhp;
1058 	char *cp;
1059 
1060 	gtnd.guid = guid;
1061 	gtnd.name = name;
1062 
1063 	if (strchr(parent, '@') == NULL) {
1064 		zhp = make_dataset_handle(hdl, parent);
1065 		if (zhp != NULL) {
1066 			err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1067 			zfs_close(zhp);
1068 			if (err == EEXIST)
1069 				return (0);
1070 		}
1071 	}
1072 
1073 	cp = strchr(parent, '/');
1074 	if (cp)
1075 		*cp = '\0';
1076 	zhp = make_dataset_handle(hdl, parent);
1077 	if (cp)
1078 		*cp = '/';
1079 
1080 	if (zhp) {
1081 		err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1082 		zfs_close(zhp);
1083 	}
1084 
1085 	return (err == EEXIST ? 0 : ENOENT);
1086 
1087 }
1088 
1089 /*
1090  * Return true if dataset guid1 is created before guid2.
1091  */
1092 static int
1093 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
1094     uint64_t guid1, uint64_t guid2)
1095 {
1096 	nvlist_t *nvfs;
1097 	char *fsname, *snapname;
1098 	char buf[ZFS_MAXNAMELEN];
1099 	int rv;
1100 	zfs_node_t zn1, zn2;
1101 
1102 	if (guid2 == 0)
1103 		return (0);
1104 	if (guid1 == 0)
1105 		return (1);
1106 
1107 	nvfs = fsavl_find(avl, guid1, &snapname);
1108 	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1109 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1110 	zn1.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1111 	if (zn1.zn_handle == NULL)
1112 		return (-1);
1113 
1114 	nvfs = fsavl_find(avl, guid2, &snapname);
1115 	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1116 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1117 	zn2.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1118 	if (zn2.zn_handle == NULL) {
1119 		zfs_close(zn2.zn_handle);
1120 		return (-1);
1121 	}
1122 
1123 	rv = (zfs_snapshot_compare(&zn1, &zn2) == -1);
1124 
1125 	zfs_close(zn1.zn_handle);
1126 	zfs_close(zn2.zn_handle);
1127 
1128 	return (rv);
1129 }
1130 
1131 static int
1132 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
1133     recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl)
1134 {
1135 	nvlist_t *local_nv;
1136 	avl_tree_t *local_avl;
1137 	nvpair_t *fselem, *nextfselem;
1138 	char *tosnap, *fromsnap;
1139 	char newname[ZFS_MAXNAMELEN];
1140 	int error;
1141 	boolean_t needagain, progress;
1142 	char *s1, *s2;
1143 
1144 	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
1145 	VERIFY(0 == nvlist_lookup_string(stream_nv, "tosnap", &tosnap));
1146 
1147 	if (flags.dryrun)
1148 		return (0);
1149 
1150 again:
1151 	needagain = progress = B_FALSE;
1152 
1153 	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
1154 	    &local_nv, &local_avl)) != 0)
1155 		return (error);
1156 
1157 	/*
1158 	 * Process deletes and renames
1159 	 */
1160 	for (fselem = nvlist_next_nvpair(local_nv, NULL);
1161 	    fselem; fselem = nextfselem) {
1162 		nvlist_t *nvfs, *snaps;
1163 		nvlist_t *stream_nvfs = NULL;
1164 		nvpair_t *snapelem, *nextsnapelem;
1165 		uint64_t fromguid = 0;
1166 		uint64_t originguid = 0;
1167 		uint64_t stream_originguid = 0;
1168 		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
1169 		char *fsname, *stream_fsname;
1170 
1171 		nextfselem = nvlist_next_nvpair(local_nv, fselem);
1172 
1173 		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
1174 		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
1175 		VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1176 		VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
1177 		    &parent_fromsnap_guid));
1178 		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
1179 
1180 		/*
1181 		 * First find the stream's fs, so we can check for
1182 		 * a different origin (due to "zfs promote")
1183 		 */
1184 		for (snapelem = nvlist_next_nvpair(snaps, NULL);
1185 		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
1186 			uint64_t thisguid;
1187 
1188 			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
1189 			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
1190 
1191 			if (stream_nvfs != NULL)
1192 				break;
1193 		}
1194 
1195 		/* check for promote */
1196 		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
1197 		    &stream_originguid);
1198 		if (stream_nvfs && originguid != stream_originguid) {
1199 			switch (created_before(hdl, local_avl,
1200 			    stream_originguid, originguid)) {
1201 			case 1: {
1202 				/* promote it! */
1203 				zfs_cmd_t zc = { 0 };
1204 				nvlist_t *origin_nvfs;
1205 				char *origin_fsname;
1206 
1207 				if (flags.verbose)
1208 					(void) printf("promoting %s\n", fsname);
1209 
1210 				origin_nvfs = fsavl_find(local_avl, originguid,
1211 				    NULL);
1212 				VERIFY(0 == nvlist_lookup_string(origin_nvfs,
1213 				    "name", &origin_fsname));
1214 				(void) strlcpy(zc.zc_value, origin_fsname,
1215 				    sizeof (zc.zc_value));
1216 				(void) strlcpy(zc.zc_name, fsname,
1217 				    sizeof (zc.zc_name));
1218 				error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
1219 				if (error == 0)
1220 					progress = B_TRUE;
1221 				break;
1222 			}
1223 			default:
1224 				break;
1225 			case -1:
1226 				fsavl_destroy(local_avl);
1227 				nvlist_free(local_nv);
1228 				return (-1);
1229 			}
1230 			/*
1231 			 * We had/have the wrong origin, therefore our
1232 			 * list of snapshots is wrong.  Need to handle
1233 			 * them on the next pass.
1234 			 */
1235 			needagain = B_TRUE;
1236 			continue;
1237 		}
1238 
1239 		for (snapelem = nvlist_next_nvpair(snaps, NULL);
1240 		    snapelem; snapelem = nextsnapelem) {
1241 			uint64_t thisguid;
1242 			char *stream_snapname;
1243 			nvlist_t *found, *props;
1244 
1245 			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
1246 
1247 			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
1248 			found = fsavl_find(stream_avl, thisguid,
1249 			    &stream_snapname);
1250 
1251 			/* check for delete */
1252 			if (found == NULL) {
1253 				char name[ZFS_MAXNAMELEN];
1254 
1255 				if (!flags.force)
1256 					continue;
1257 
1258 				(void) snprintf(name, sizeof (name), "%s@%s",
1259 				    fsname, nvpair_name(snapelem));
1260 
1261 				error = recv_destroy(hdl, name,
1262 				    strlen(fsname)+1, newname, flags);
1263 				if (error)
1264 					needagain = B_TRUE;
1265 				else
1266 					progress = B_TRUE;
1267 				continue;
1268 			}
1269 
1270 			stream_nvfs = found;
1271 
1272 			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
1273 			    &props) && 0 == nvlist_lookup_nvlist(props,
1274 			    stream_snapname, &props)) {
1275 				zfs_cmd_t zc = { 0 };
1276 
1277 				zc.zc_cookie = B_TRUE; /* clear current props */
1278 				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
1279 				    "%s@%s", fsname, nvpair_name(snapelem));
1280 				if (zcmd_write_src_nvlist(hdl, &zc,
1281 				    props) == 0) {
1282 					(void) zfs_ioctl(hdl,
1283 					    ZFS_IOC_SET_PROP, &zc);
1284 					zcmd_free_nvlists(&zc);
1285 				}
1286 			}
1287 
1288 			/* check for different snapname */
1289 			if (strcmp(nvpair_name(snapelem),
1290 			    stream_snapname) != 0) {
1291 				char name[ZFS_MAXNAMELEN];
1292 				char tryname[ZFS_MAXNAMELEN];
1293 
1294 				(void) snprintf(name, sizeof (name), "%s@%s",
1295 				    fsname, nvpair_name(snapelem));
1296 				(void) snprintf(tryname, sizeof (name), "%s@%s",
1297 				    fsname, stream_snapname);
1298 
1299 				error = recv_rename(hdl, name, tryname,
1300 				    strlen(fsname)+1, newname, flags);
1301 				if (error)
1302 					needagain = B_TRUE;
1303 				else
1304 					progress = B_TRUE;
1305 			}
1306 
1307 			if (strcmp(stream_snapname, fromsnap) == 0)
1308 				fromguid = thisguid;
1309 		}
1310 
1311 		/* check for delete */
1312 		if (stream_nvfs == NULL) {
1313 			if (!flags.force)
1314 				continue;
1315 
1316 			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
1317 			    newname, flags);
1318 			if (error)
1319 				needagain = B_TRUE;
1320 			else
1321 				progress = B_TRUE;
1322 			continue;
1323 		}
1324 
1325 		if (fromguid == 0 && flags.verbose) {
1326 			(void) printf("local fs %s does not have fromsnap "
1327 			    "(%s in stream); must have been deleted locally; "
1328 			    "ignoring\n", fsname, fromsnap);
1329 			continue;
1330 		}
1331 
1332 		VERIFY(0 == nvlist_lookup_string(stream_nvfs,
1333 		    "name", &stream_fsname));
1334 		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
1335 		    "parentfromsnap", &stream_parent_fromsnap_guid));
1336 
1337 		s1 = strrchr(fsname, '/');
1338 		s2 = strrchr(stream_fsname, '/');
1339 
1340 		/* check for rename */
1341 		if ((stream_parent_fromsnap_guid != 0 &&
1342 		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
1343 		    ((s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
1344 			nvlist_t *parent;
1345 			char tryname[ZFS_MAXNAMELEN];
1346 
1347 			parent = fsavl_find(local_avl,
1348 			    stream_parent_fromsnap_guid, NULL);
1349 			/*
1350 			 * NB: parent might not be found if we used the
1351 			 * tosnap for stream_parent_fromsnap_guid,
1352 			 * because the parent is a newly-created fs;
1353 			 * we'll be able to rename it after we recv the
1354 			 * new fs.
1355 			 */
1356 			if (parent != NULL) {
1357 				char *pname;
1358 
1359 				VERIFY(0 == nvlist_lookup_string(parent, "name",
1360 				    &pname));
1361 				(void) snprintf(tryname, sizeof (tryname),
1362 				    "%s%s", pname, strrchr(stream_fsname, '/'));
1363 			} else {
1364 				tryname[0] = '\0';
1365 				if (flags.verbose) {
1366 					(void) printf("local fs %s new parent "
1367 					    "not found\n", fsname);
1368 				}
1369 			}
1370 
1371 			error = recv_rename(hdl, fsname, tryname,
1372 			    strlen(tofs)+1, newname, flags);
1373 			if (error)
1374 				needagain = B_TRUE;
1375 			else
1376 				progress = B_TRUE;
1377 		}
1378 	}
1379 
1380 	fsavl_destroy(local_avl);
1381 	nvlist_free(local_nv);
1382 
1383 	if (needagain && progress) {
1384 		/* do another pass to fix up temporary names */
1385 		if (flags.verbose)
1386 			(void) printf("another pass:\n");
1387 		goto again;
1388 	}
1389 
1390 	return (needagain);
1391 }
1392 
1393 static int
1394 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
1395     recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
1396     char **top_zfs)
1397 {
1398 	nvlist_t *stream_nv = NULL;
1399 	avl_tree_t *stream_avl = NULL;
1400 	char *fromsnap = NULL;
1401 	char tofs[ZFS_MAXNAMELEN];
1402 	char errbuf[1024];
1403 	dmu_replay_record_t drre;
1404 	int error;
1405 	boolean_t anyerr = B_FALSE;
1406 	boolean_t softerr = B_FALSE;
1407 
1408 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1409 	    "cannot receive"));
1410 
1411 	if (strchr(destname, '@')) {
1412 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1413 		    "can not specify snapshot name for multi-snapshot stream"));
1414 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
1415 	}
1416 
1417 	assert(drr->drr_type == DRR_BEGIN);
1418 	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
1419 	assert(drr->drr_u.drr_begin.drr_version == DMU_BACKUP_HEADER_VERSION);
1420 
1421 	/*
1422 	 * Read in the nvlist from the stream.
1423 	 */
1424 	if (drr->drr_payloadlen != 0) {
1425 		if (!flags.isprefix) {
1426 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1427 			    "must use -d to receive replication "
1428 			    "(send -R) stream"));
1429 			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
1430 		}
1431 
1432 		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
1433 		    &stream_nv, flags.byteswap, zc);
1434 		if (error) {
1435 			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1436 			goto out;
1437 		}
1438 	}
1439 
1440 	/*
1441 	 * Read in the end record and verify checksum.
1442 	 */
1443 	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
1444 	    flags.byteswap, NULL)))
1445 		goto out;
1446 	if (flags.byteswap) {
1447 		drre.drr_type = BSWAP_32(drre.drr_type);
1448 		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
1449 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
1450 		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
1451 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
1452 		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
1453 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
1454 		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
1455 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
1456 	}
1457 	if (drre.drr_type != DRR_END) {
1458 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1459 		goto out;
1460 	}
1461 	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
1462 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1463 		    "incorrect header checksum"));
1464 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1465 		goto out;
1466 	}
1467 
1468 	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
1469 
1470 	if (drr->drr_payloadlen != 0) {
1471 		nvlist_t *stream_fss;
1472 
1473 		VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
1474 		    &stream_fss));
1475 		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
1476 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1477 			    "couldn't allocate avl tree"));
1478 			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
1479 			goto out;
1480 		}
1481 
1482 		if (fromsnap != NULL) {
1483 			(void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
1484 			if (flags.isprefix) {
1485 				int i = strcspn(drr->drr_u.drr_begin.drr_toname,
1486 				    "/@");
1487 				/* zfs_receive_one() will create_parents() */
1488 				(void) strlcat(tofs,
1489 				    &drr->drr_u.drr_begin.drr_toname[i],
1490 				    ZFS_MAXNAMELEN);
1491 				*strchr(tofs, '@') = '\0';
1492 			}
1493 			softerr = recv_incremental_replication(hdl, tofs,
1494 			    flags, stream_nv, stream_avl);
1495 		}
1496 	}
1497 
1498 
1499 	/* Finally, receive each contained stream */
1500 	do {
1501 		/*
1502 		 * we should figure out if it has a recoverable
1503 		 * error, in which case do a recv_skip() and drive on.
1504 		 * Note, if we fail due to already having this guid,
1505 		 * zfs_receive_one() will take care of it (ie,
1506 		 * recv_skip() and return 0).
1507 		 */
1508 		error = zfs_receive_impl(hdl, destname, flags, fd,
1509 		    stream_avl, top_zfs);
1510 		if (error == ENODATA) {
1511 			error = 0;
1512 			break;
1513 		}
1514 		anyerr |= error;
1515 	} while (error == 0);
1516 
1517 	if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
1518 		/*
1519 		 * Now that we have the fs's they sent us, try the
1520 		 * renames again.
1521 		 */
1522 		softerr = recv_incremental_replication(hdl, tofs, flags,
1523 		    stream_nv, stream_avl);
1524 	}
1525 
1526 out:
1527 	fsavl_destroy(stream_avl);
1528 	if (stream_nv)
1529 		nvlist_free(stream_nv);
1530 	if (softerr)
1531 		error = -2;
1532 	if (anyerr)
1533 		error = -1;
1534 	return (error);
1535 }
1536 
1537 static int
1538 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
1539 {
1540 	dmu_replay_record_t *drr;
1541 	void *buf = malloc(1<<20);
1542 
1543 	/* XXX would be great to use lseek if possible... */
1544 	drr = buf;
1545 
1546 	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
1547 	    byteswap, NULL) == 0) {
1548 		if (byteswap)
1549 			drr->drr_type = BSWAP_32(drr->drr_type);
1550 
1551 		switch (drr->drr_type) {
1552 		case DRR_BEGIN:
1553 			/* NB: not to be used on v2 stream packages */
1554 			assert(drr->drr_payloadlen == 0);
1555 			break;
1556 
1557 		case DRR_END:
1558 			free(buf);
1559 			return (0);
1560 
1561 		case DRR_OBJECT:
1562 			if (byteswap) {
1563 				drr->drr_u.drr_object.drr_bonuslen =
1564 				    BSWAP_32(drr->drr_u.drr_object.
1565 				    drr_bonuslen);
1566 			}
1567 			(void) recv_read(hdl, fd, buf,
1568 			    P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
1569 			    B_FALSE, NULL);
1570 			break;
1571 
1572 		case DRR_WRITE:
1573 			if (byteswap) {
1574 				drr->drr_u.drr_write.drr_length =
1575 				    BSWAP_64(drr->drr_u.drr_write.drr_length);
1576 			}
1577 			(void) recv_read(hdl, fd, buf,
1578 			    drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
1579 			break;
1580 
1581 		case DRR_FREEOBJECTS:
1582 		case DRR_FREE:
1583 			break;
1584 
1585 		default:
1586 			assert(!"invalid record type");
1587 		}
1588 	}
1589 
1590 	free(buf);
1591 	return (-1);
1592 }
1593 
1594 /*
1595  * Restores a backup of tosnap from the file descriptor specified by infd.
1596  */
1597 static int
1598 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
1599     recvflags_t flags, dmu_replay_record_t *drr,
1600     dmu_replay_record_t *drr_noswap, avl_tree_t *stream_avl,
1601     char **top_zfs)
1602 {
1603 	zfs_cmd_t zc = { 0 };
1604 	time_t begin_time;
1605 	int ioctl_err, ioctl_errno, err, choplen;
1606 	char *cp;
1607 	struct drr_begin *drrb = &drr->drr_u.drr_begin;
1608 	char errbuf[1024];
1609 	char chopprefix[ZFS_MAXNAMELEN];
1610 	boolean_t newfs = B_FALSE;
1611 	boolean_t stream_wantsnewfs;
1612 	uint64_t parent_snapguid = 0;
1613 	prop_changelist_t *clp = NULL;
1614 	nvlist_t *snapprops_nvlist = NULL;
1615 
1616 	begin_time = time(NULL);
1617 
1618 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1619 	    "cannot receive"));
1620 
1621 	if (stream_avl != NULL) {
1622 		char *snapname;
1623 		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
1624 		    &snapname);
1625 		nvlist_t *props;
1626 		int ret;
1627 
1628 		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
1629 		    &parent_snapguid);
1630 		err = nvlist_lookup_nvlist(fs, "props", &props);
1631 		if (err)
1632 			VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
1633 
1634 		if (flags.canmountoff) {
1635 			VERIFY(0 == nvlist_add_uint64(props,
1636 			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
1637 		}
1638 		ret = zcmd_write_src_nvlist(hdl, &zc, props);
1639 		if (err)
1640 			nvlist_free(props);
1641 
1642 		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
1643 			VERIFY(0 == nvlist_lookup_nvlist(props,
1644 			    snapname, &snapprops_nvlist));
1645 		}
1646 
1647 		if (ret != 0)
1648 			return (-1);
1649 	}
1650 
1651 	/*
1652 	 * Determine how much of the snapshot name stored in the stream
1653 	 * we are going to tack on to the name they specified on the
1654 	 * command line, and how much we are going to chop off.
1655 	 *
1656 	 * If they specified a snapshot, chop the entire name stored in
1657 	 * the stream.
1658 	 */
1659 	(void) strcpy(chopprefix, drrb->drr_toname);
1660 	if (flags.isprefix) {
1661 		/*
1662 		 * They specified a fs with -d, we want to tack on
1663 		 * everything but the pool name stored in the stream
1664 		 */
1665 		if (strchr(tosnap, '@')) {
1666 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
1667 			    "argument - snapshot not allowed with -d"));
1668 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
1669 		}
1670 		cp = strchr(chopprefix, '/');
1671 		if (cp == NULL)
1672 			cp = strchr(chopprefix, '@');
1673 		*cp = '\0';
1674 	} else if (strchr(tosnap, '@') == NULL) {
1675 		/*
1676 		 * If they specified a filesystem without -d, we want to
1677 		 * tack on everything after the fs specified in the
1678 		 * first name from the stream.
1679 		 */
1680 		cp = strchr(chopprefix, '@');
1681 		*cp = '\0';
1682 	}
1683 	choplen = strlen(chopprefix);
1684 
1685 	/*
1686 	 * Determine name of destination snapshot, store in zc_value.
1687 	 */
1688 	(void) strcpy(zc.zc_value, tosnap);
1689 	(void) strncat(zc.zc_value, drrb->drr_toname+choplen,
1690 	    sizeof (zc.zc_value));
1691 	if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
1692 		zcmd_free_nvlists(&zc);
1693 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
1694 	}
1695 
1696 	/*
1697 	 * Determine the name of the origin snapshot, store in zc_string.
1698 	 */
1699 	if (drrb->drr_flags & DRR_FLAG_CLONE) {
1700 		if (guid_to_name(hdl, tosnap,
1701 		    drrb->drr_fromguid, zc.zc_string) != 0) {
1702 			zcmd_free_nvlists(&zc);
1703 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1704 			    "local origin for clone %s does not exist"),
1705 			    zc.zc_value);
1706 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1707 		}
1708 		if (flags.verbose)
1709 			(void) printf("found clone origin %s\n", zc.zc_string);
1710 	}
1711 
1712 	stream_wantsnewfs = (drrb->drr_fromguid == NULL ||
1713 	    (drrb->drr_flags & DRR_FLAG_CLONE));
1714 
1715 	if (stream_wantsnewfs) {
1716 		/*
1717 		 * if the parent fs does not exist, look for it based on
1718 		 * the parent snap GUID
1719 		 */
1720 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1721 		    "cannot receive new filesystem stream"));
1722 
1723 		(void) strcpy(zc.zc_name, zc.zc_value);
1724 		cp = strrchr(zc.zc_name, '/');
1725 		if (cp)
1726 			*cp = '\0';
1727 		if (cp &&
1728 		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
1729 			char suffix[ZFS_MAXNAMELEN];
1730 			(void) strcpy(suffix, strrchr(zc.zc_value, '/'));
1731 			if (guid_to_name(hdl, tosnap, parent_snapguid,
1732 			    zc.zc_value) == 0) {
1733 				*strchr(zc.zc_value, '@') = '\0';
1734 				(void) strcat(zc.zc_value, suffix);
1735 			}
1736 		}
1737 	} else {
1738 		/*
1739 		 * if the fs does not exist, look for it based on the
1740 		 * fromsnap GUID
1741 		 */
1742 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1743 		    "cannot receive incremental stream"));
1744 
1745 		(void) strcpy(zc.zc_name, zc.zc_value);
1746 		*strchr(zc.zc_name, '@') = '\0';
1747 
1748 		if (!zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
1749 			char snap[ZFS_MAXNAMELEN];
1750 			(void) strcpy(snap, strchr(zc.zc_value, '@'));
1751 			if (guid_to_name(hdl, tosnap, drrb->drr_fromguid,
1752 			    zc.zc_value) == 0) {
1753 				*strchr(zc.zc_value, '@') = '\0';
1754 				(void) strcat(zc.zc_value, snap);
1755 			}
1756 		}
1757 	}
1758 
1759 	(void) strcpy(zc.zc_name, zc.zc_value);
1760 	*strchr(zc.zc_name, '@') = '\0';
1761 
1762 	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
1763 		zfs_handle_t *zhp;
1764 		/*
1765 		 * Destination fs exists.  Therefore this should either
1766 		 * be an incremental, or the stream specifies a new fs
1767 		 * (full stream or clone) and they want us to blow it
1768 		 * away (and have therefore specified -F and removed any
1769 		 * snapshots).
1770 		 */
1771 
1772 		if (stream_wantsnewfs) {
1773 			if (!flags.force) {
1774 				zcmd_free_nvlists(&zc);
1775 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1776 				    "destination '%s' exists\n"
1777 				    "must specify -F to overwrite it"),
1778 				    zc.zc_name);
1779 				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
1780 			}
1781 			if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
1782 			    &zc) == 0) {
1783 				zcmd_free_nvlists(&zc);
1784 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1785 				    "destination has snapshots (eg. %s)\n"
1786 				    "must destroy them to overwrite it"),
1787 				    zc.zc_name);
1788 				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
1789 			}
1790 		}
1791 
1792 		if ((zhp = zfs_open(hdl, zc.zc_name,
1793 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
1794 			zcmd_free_nvlists(&zc);
1795 			return (-1);
1796 		}
1797 
1798 		if (stream_wantsnewfs &&
1799 		    zhp->zfs_dmustats.dds_origin[0]) {
1800 			zcmd_free_nvlists(&zc);
1801 			zfs_close(zhp);
1802 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1803 			    "destination '%s' is a clone\n"
1804 			    "must destroy it to overwrite it"),
1805 			    zc.zc_name);
1806 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
1807 		}
1808 
1809 		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
1810 		    stream_wantsnewfs) {
1811 			/* We can't do online recv in this case */
1812 			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
1813 			if (clp == NULL) {
1814 				zfs_close(zhp);
1815 				zcmd_free_nvlists(&zc);
1816 				return (-1);
1817 			}
1818 			if (changelist_prefix(clp) != 0) {
1819 				changelist_free(clp);
1820 				zfs_close(zhp);
1821 				zcmd_free_nvlists(&zc);
1822 				return (-1);
1823 			}
1824 		}
1825 		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME &&
1826 		    zvol_remove_link(hdl, zhp->zfs_name) != 0) {
1827 			zfs_close(zhp);
1828 			zcmd_free_nvlists(&zc);
1829 			return (-1);
1830 		}
1831 		zfs_close(zhp);
1832 	} else {
1833 		/*
1834 		 * Destination filesystem does not exist.  Therefore we better
1835 		 * be creating a new filesystem (either from a full backup, or
1836 		 * a clone).  It would therefore be invalid if the user
1837 		 * specified only the pool name (i.e. if the destination name
1838 		 * contained no slash character).
1839 		 */
1840 		if (!stream_wantsnewfs ||
1841 		    (cp = strrchr(zc.zc_name, '/')) == NULL) {
1842 			zcmd_free_nvlists(&zc);
1843 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1844 			    "destination '%s' does not exist"), zc.zc_name);
1845 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1846 		}
1847 
1848 		/*
1849 		 * Trim off the final dataset component so we perform the
1850 		 * recvbackup ioctl to the filesystems's parent.
1851 		 */
1852 		*cp = '\0';
1853 
1854 		if (flags.isprefix && !flags.dryrun &&
1855 		    create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
1856 			zcmd_free_nvlists(&zc);
1857 			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
1858 		}
1859 
1860 		newfs = B_TRUE;
1861 	}
1862 
1863 	zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
1864 	zc.zc_cookie = infd;
1865 	zc.zc_guid = flags.force;
1866 	if (flags.verbose) {
1867 		(void) printf("%s %s stream of %s into %s\n",
1868 		    flags.dryrun ? "would receive" : "receiving",
1869 		    drrb->drr_fromguid ? "incremental" : "full",
1870 		    drrb->drr_toname, zc.zc_value);
1871 		(void) fflush(stdout);
1872 	}
1873 
1874 	if (flags.dryrun) {
1875 		zcmd_free_nvlists(&zc);
1876 		return (recv_skip(hdl, infd, flags.byteswap));
1877 	}
1878 
1879 	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
1880 	ioctl_errno = errno;
1881 	zcmd_free_nvlists(&zc);
1882 
1883 	if (err == 0 && snapprops_nvlist) {
1884 		zfs_cmd_t zc2 = { 0 };
1885 
1886 		(void) strcpy(zc2.zc_name, zc.zc_value);
1887 		if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
1888 			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
1889 			zcmd_free_nvlists(&zc2);
1890 		}
1891 	}
1892 
1893 	if (err && (ioctl_errno == ENOENT || ioctl_errno == ENODEV)) {
1894 		/*
1895 		 * It may be that this snapshot already exists,
1896 		 * in which case we want to consume & ignore it
1897 		 * rather than failing.
1898 		 */
1899 		avl_tree_t *local_avl;
1900 		nvlist_t *local_nv, *fs;
1901 		char *cp = strchr(zc.zc_value, '@');
1902 
1903 		/*
1904 		 * XXX Do this faster by just iterating over snaps in
1905 		 * this fs.  Also if zc_value does not exist, we will
1906 		 * get a strange "does not exist" error message.
1907 		 */
1908 		*cp = '\0';
1909 		if (gather_nvlist(hdl, zc.zc_value, NULL, NULL,
1910 		    &local_nv, &local_avl) == 0) {
1911 			*cp = '@';
1912 			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
1913 			fsavl_destroy(local_avl);
1914 			nvlist_free(local_nv);
1915 
1916 			if (fs != NULL) {
1917 				if (flags.verbose) {
1918 					(void) printf("snap %s already exists; "
1919 					    "ignoring\n", zc.zc_value);
1920 				}
1921 				ioctl_err = recv_skip(hdl, infd,
1922 				    flags.byteswap);
1923 			}
1924 		}
1925 		*cp = '@';
1926 	}
1927 
1928 
1929 	if (ioctl_err != 0) {
1930 		switch (ioctl_errno) {
1931 		case ENODEV:
1932 			cp = strchr(zc.zc_value, '@');
1933 			*cp = '\0';
1934 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1935 			    "most recent snapshot of %s does not\n"
1936 			    "match incremental source"), zc.zc_value);
1937 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
1938 			*cp = '@';
1939 			break;
1940 		case ETXTBSY:
1941 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1942 			    "destination %s has been modified\n"
1943 			    "since most recent snapshot"), zc.zc_name);
1944 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
1945 			break;
1946 		case EEXIST:
1947 			cp = strchr(zc.zc_value, '@');
1948 			if (newfs) {
1949 				/* it's the containing fs that exists */
1950 				*cp = '\0';
1951 			}
1952 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1953 			    "destination already exists"));
1954 			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
1955 			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
1956 			    zc.zc_value);
1957 			*cp = '@';
1958 			break;
1959 		case EINVAL:
1960 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1961 			break;
1962 		case ECKSUM:
1963 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1964 			    "invalid stream (checksum mismatch)"));
1965 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1966 			break;
1967 		default:
1968 			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
1969 		}
1970 	}
1971 
1972 	/*
1973 	 * Mount or recreate the /dev links for the target filesystem
1974 	 * (if created, or if we tore them down to do an incremental
1975 	 * restore), and the /dev links for the new snapshot (if
1976 	 * created). Also mount any children of the target filesystem
1977 	 * if we did a replication receive (indicated by stream_avl
1978 	 * being non-NULL).
1979 	 */
1980 	cp = strchr(zc.zc_value, '@');
1981 	if (cp && (ioctl_err == 0 || !newfs)) {
1982 		zfs_handle_t *h;
1983 
1984 		*cp = '\0';
1985 		h = zfs_open(hdl, zc.zc_value,
1986 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
1987 		if (h != NULL) {
1988 			if (h->zfs_type == ZFS_TYPE_VOLUME) {
1989 				*cp = '@';
1990 				err = zvol_create_link(hdl, h->zfs_name);
1991 				if (err == 0 && ioctl_err == 0)
1992 					err = zvol_create_link(hdl,
1993 					    zc.zc_value);
1994 			} else if (newfs || stream_avl) {
1995 				/*
1996 				 * Track the first/top of hierarchy fs,
1997 				 * for mounting and sharing later.
1998 				 */
1999 				if (top_zfs && *top_zfs == NULL)
2000 					*top_zfs = zfs_strdup(hdl, zc.zc_value);
2001 			}
2002 			zfs_close(h);
2003 		}
2004 		*cp = '@';
2005 	}
2006 
2007 	if (clp) {
2008 		err |= changelist_postfix(clp);
2009 		changelist_free(clp);
2010 	}
2011 
2012 	if (err || ioctl_err)
2013 		return (-1);
2014 
2015 	if (flags.verbose) {
2016 		char buf1[64];
2017 		char buf2[64];
2018 		uint64_t bytes = zc.zc_cookie;
2019 		time_t delta = time(NULL) - begin_time;
2020 		if (delta == 0)
2021 			delta = 1;
2022 		zfs_nicenum(bytes, buf1, sizeof (buf1));
2023 		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
2024 
2025 		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
2026 		    buf1, delta, buf2);
2027 	}
2028 
2029 	return (0);
2030 }
2031 
2032 static int
2033 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
2034     int infd, avl_tree_t *stream_avl, char **top_zfs)
2035 {
2036 	int err;
2037 	dmu_replay_record_t drr, drr_noswap;
2038 	struct drr_begin *drrb = &drr.drr_u.drr_begin;
2039 	char errbuf[1024];
2040 	zio_cksum_t zcksum = { 0 };
2041 
2042 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2043 	    "cannot receive"));
2044 
2045 	if (flags.isprefix &&
2046 	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
2047 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
2048 		    "(%s) does not exist"), tosnap);
2049 		return (zfs_error(hdl, EZFS_NOENT, errbuf));
2050 	}
2051 
2052 	/* read in the BEGIN record */
2053 	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
2054 	    &zcksum)))
2055 		return (err);
2056 
2057 	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
2058 		/* It's the double end record at the end of a package */
2059 		return (ENODATA);
2060 	}
2061 
2062 	/* the kernel needs the non-byteswapped begin record */
2063 	drr_noswap = drr;
2064 
2065 	flags.byteswap = B_FALSE;
2066 	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
2067 		/*
2068 		 * We computed the checksum in the wrong byteorder in
2069 		 * recv_read() above; do it again correctly.
2070 		 */
2071 		bzero(&zcksum, sizeof (zio_cksum_t));
2072 		fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
2073 		flags.byteswap = B_TRUE;
2074 
2075 		drr.drr_type = BSWAP_32(drr.drr_type);
2076 		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
2077 		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
2078 		drrb->drr_version = BSWAP_64(drrb->drr_version);
2079 		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
2080 		drrb->drr_type = BSWAP_32(drrb->drr_type);
2081 		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
2082 		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
2083 		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
2084 	}
2085 
2086 	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
2087 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2088 		    "stream (bad magic number)"));
2089 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2090 	}
2091 
2092 	if (strchr(drrb->drr_toname, '@') == NULL) {
2093 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2094 		    "stream (bad snapshot name)"));
2095 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2096 	}
2097 
2098 	if (drrb->drr_version == DMU_BACKUP_STREAM_VERSION) {
2099 		return (zfs_receive_one(hdl, infd, tosnap, flags,
2100 		    &drr, &drr_noswap, stream_avl, top_zfs));
2101 	} else if (drrb->drr_version == DMU_BACKUP_HEADER_VERSION) {
2102 		return (zfs_receive_package(hdl, infd, tosnap, flags,
2103 		    &drr, &zcksum, top_zfs));
2104 	} else {
2105 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2106 		    "stream is unsupported version %llu"),
2107 		    drrb->drr_version);
2108 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2109 	}
2110 }
2111 
2112 /*
2113  * Restores a backup of tosnap from the file descriptor specified by infd.
2114  * Return 0 on total success, -2 if some things couldn't be
2115  * destroyed/renamed/promoted, -1 if some things couldn't be received.
2116  * (-1 will override -2).
2117  */
2118 int
2119 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
2120     int infd, avl_tree_t *stream_avl)
2121 {
2122 	char *top_zfs = NULL;
2123 	int err;
2124 
2125 	err = zfs_receive_impl(hdl, tosnap, flags, infd, stream_avl, &top_zfs);
2126 
2127 	if (err == 0 && !flags.nomount && top_zfs) {
2128 		zfs_handle_t *zhp;
2129 		prop_changelist_t *clp;
2130 
2131 		zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
2132 		if (zhp != NULL) {
2133 			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
2134 			    CL_GATHER_MOUNT_ALWAYS, 0);
2135 			zfs_close(zhp);
2136 			if (clp != NULL) {
2137 				/* mount and share received datasets */
2138 				err = changelist_postfix(clp);
2139 				changelist_free(clp);
2140 			}
2141 		}
2142 		if (zhp == NULL || clp == NULL || err)
2143 			err = -1;
2144 	}
2145 	if (top_zfs)
2146 		free(top_zfs);
2147 
2148 	return (err);
2149 }
2150