xref: /titanic_44/usr/src/lib/libzfs/common/libzfs_sendrecv.c (revision 9a4611f412a6b1f7a0bc7d53d2bb046a95daa4bc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <assert.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <libdevinfo.h>
31 #include <libintl.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <strings.h>
35 #include <unistd.h>
36 #include <stddef.h>
37 #include <fcntl.h>
38 #include <sys/mount.h>
39 #include <sys/mntent.h>
40 #include <sys/mnttab.h>
41 #include <sys/avl.h>
42 #include <stddef.h>
43 
44 #include <libzfs.h>
45 
46 #include "zfs_namecheck.h"
47 #include "zfs_prop.h"
48 #include "libzfs_impl.h"
49 
50 #include <fletcher.c> /* XXX */
51 
52 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t,
53     int, avl_tree_t *, char **);
54 
55 /*
56  * Routines for dealing with the AVL tree of fs-nvlists
57  */
58 typedef struct fsavl_node {
59 	avl_node_t fn_node;
60 	nvlist_t *fn_nvfs;
61 	char *fn_snapname;
62 	uint64_t fn_guid;
63 } fsavl_node_t;
64 
65 static int
66 fsavl_compare(const void *arg1, const void *arg2)
67 {
68 	const fsavl_node_t *fn1 = arg1;
69 	const fsavl_node_t *fn2 = arg2;
70 
71 	if (fn1->fn_guid > fn2->fn_guid)
72 		return (+1);
73 	else if (fn1->fn_guid < fn2->fn_guid)
74 		return (-1);
75 	else
76 		return (0);
77 }
78 
79 /*
80  * Given the GUID of a snapshot, find its containing filesystem and
81  * (optionally) name.
82  */
83 static nvlist_t *
84 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
85 {
86 	fsavl_node_t fn_find;
87 	fsavl_node_t *fn;
88 
89 	fn_find.fn_guid = snapguid;
90 
91 	fn = avl_find(avl, &fn_find, NULL);
92 	if (fn) {
93 		if (snapname)
94 			*snapname = fn->fn_snapname;
95 		return (fn->fn_nvfs);
96 	}
97 	return (NULL);
98 }
99 
100 static void
101 fsavl_destroy(avl_tree_t *avl)
102 {
103 	fsavl_node_t *fn;
104 	void *cookie;
105 
106 	if (avl == NULL)
107 		return;
108 
109 	cookie = NULL;
110 	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
111 		free(fn);
112 	avl_destroy(avl);
113 	free(avl);
114 }
115 
116 /*
117  * Given an nvlist, produce an avl tree of snapshots, ordered by guid
118  */
119 static avl_tree_t *
120 fsavl_create(nvlist_t *fss)
121 {
122 	avl_tree_t *fsavl;
123 	nvpair_t *fselem = NULL;
124 
125 	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
126 		return (NULL);
127 
128 	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
129 	    offsetof(fsavl_node_t, fn_node));
130 
131 	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
132 		nvlist_t *nvfs, *snaps;
133 		nvpair_t *snapelem = NULL;
134 
135 		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
136 		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
137 
138 		while ((snapelem =
139 		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
140 			fsavl_node_t *fn;
141 			uint64_t guid;
142 
143 			VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
144 			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
145 				fsavl_destroy(fsavl);
146 				return (NULL);
147 			}
148 			fn->fn_nvfs = nvfs;
149 			fn->fn_snapname = nvpair_name(snapelem);
150 			fn->fn_guid = guid;
151 
152 			/*
153 			 * Note: if there are multiple snaps with the
154 			 * same GUID, we ignore all but one.
155 			 */
156 			if (avl_find(fsavl, fn, NULL) == NULL)
157 				avl_add(fsavl, fn);
158 			else
159 				free(fn);
160 		}
161 	}
162 
163 	return (fsavl);
164 }
165 
166 /*
167  * Routines for dealing with the giant nvlist of fs-nvlists, etc.
168  */
169 typedef struct send_data {
170 	uint64_t parent_fromsnap_guid;
171 	nvlist_t *parent_snaps;
172 	nvlist_t *fss;
173 	nvlist_t *snapprops;
174 	const char *fromsnap;
175 	const char *tosnap;
176 
177 	/*
178 	 * The header nvlist is of the following format:
179 	 * {
180 	 *   "tosnap" -> string
181 	 *   "fromsnap" -> string (if incremental)
182 	 *   "fss" -> {
183 	 *	id -> {
184 	 *
185 	 *	 "name" -> string (full name; for debugging)
186 	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
187 	 *
188 	 *	 "props" -> { name -> value (only if set here) }
189 	 *	 "snaps" -> { name (lastname) -> number (guid) }
190 	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
191 	 *
192 	 *	 "origin" -> number (guid) (if clone)
193 	 *	 "sent" -> boolean (not on-disk)
194 	 *	}
195 	 *   }
196 	 * }
197 	 *
198 	 */
199 } send_data_t;
200 
201 static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
202 
203 static int
204 send_iterate_snap(zfs_handle_t *zhp, void *arg)
205 {
206 	send_data_t *sd = arg;
207 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
208 	char *snapname;
209 	nvlist_t *nv;
210 
211 	snapname = strrchr(zhp->zfs_name, '@')+1;
212 
213 	VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
214 	/*
215 	 * NB: if there is no fromsnap here (it's a newly created fs in
216 	 * an incremental replication), we will substitute the tosnap.
217 	 */
218 	if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
219 	    (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
220 	    strcmp(snapname, sd->tosnap) == 0)) {
221 		sd->parent_fromsnap_guid = guid;
222 	}
223 
224 	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
225 	send_iterate_prop(zhp, nv);
226 	VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
227 	nvlist_free(nv);
228 
229 	zfs_close(zhp);
230 	return (0);
231 }
232 
233 static void
234 send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
235 {
236 	nvpair_t *elem = NULL;
237 
238 	while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
239 		char *propname = nvpair_name(elem);
240 		zfs_prop_t prop = zfs_name_to_prop(propname);
241 		nvlist_t *propnv;
242 
243 		assert(zfs_prop_user(propname) || prop != ZPROP_INVAL);
244 
245 		if (!zfs_prop_user(propname) && zfs_prop_readonly(prop))
246 			continue;
247 
248 		verify(nvpair_value_nvlist(elem, &propnv) == 0);
249 		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
250 		    prop == ZFS_PROP_REFQUOTA ||
251 		    prop == ZFS_PROP_REFRESERVATION) {
252 			/* these guys are modifyable, but have no source */
253 			uint64_t value;
254 			verify(nvlist_lookup_uint64(propnv,
255 			    ZPROP_VALUE, &value) == 0);
256 			if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
257 				continue;
258 		} else {
259 			char *source;
260 			if (nvlist_lookup_string(propnv,
261 			    ZPROP_SOURCE, &source) != 0)
262 				continue;
263 			if (strcmp(source, zhp->zfs_name) != 0)
264 				continue;
265 		}
266 
267 		if (zfs_prop_user(propname) ||
268 		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
269 			char *value;
270 			verify(nvlist_lookup_string(propnv,
271 			    ZPROP_VALUE, &value) == 0);
272 			VERIFY(0 == nvlist_add_string(nv, propname, value));
273 		} else {
274 			uint64_t value;
275 			verify(nvlist_lookup_uint64(propnv,
276 			    ZPROP_VALUE, &value) == 0);
277 			VERIFY(0 == nvlist_add_uint64(nv, propname, value));
278 		}
279 	}
280 }
281 
282 /*
283  * recursively generate nvlists describing datasets.  See comment
284  * for the data structure send_data_t above for description of contents
285  * of the nvlist.
286  */
287 static int
288 send_iterate_fs(zfs_handle_t *zhp, void *arg)
289 {
290 	send_data_t *sd = arg;
291 	nvlist_t *nvfs, *nv;
292 	int rv;
293 	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
294 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
295 	char guidstring[64];
296 
297 	VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
298 	VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
299 	VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
300 	    sd->parent_fromsnap_guid));
301 
302 	if (zhp->zfs_dmustats.dds_origin[0]) {
303 		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
304 		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
305 		if (origin == NULL)
306 			return (-1);
307 		VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
308 		    origin->zfs_dmustats.dds_guid));
309 	}
310 
311 	/* iterate over props */
312 	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
313 	send_iterate_prop(zhp, nv);
314 	VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
315 	nvlist_free(nv);
316 
317 	/* iterate over snaps, and set sd->parent_fromsnap_guid */
318 	sd->parent_fromsnap_guid = 0;
319 	VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
320 	VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
321 	(void) zfs_iter_snapshots(zhp, send_iterate_snap, sd);
322 	VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
323 	VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
324 	nvlist_free(sd->parent_snaps);
325 	nvlist_free(sd->snapprops);
326 
327 	/* add this fs to nvlist */
328 	(void) snprintf(guidstring, sizeof (guidstring),
329 	    "0x%llx", (longlong_t)guid);
330 	VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
331 	nvlist_free(nvfs);
332 
333 	/* iterate over children */
334 	rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
335 
336 	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
337 
338 	zfs_close(zhp);
339 	return (rv);
340 }
341 
342 static int
343 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
344     const char *tosnap, nvlist_t **nvlp, avl_tree_t **avlp)
345 {
346 	zfs_handle_t *zhp;
347 	send_data_t sd = { 0 };
348 	int error;
349 
350 	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
351 	if (zhp == NULL)
352 		return (EZFS_BADTYPE);
353 
354 	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
355 	sd.fromsnap = fromsnap;
356 	sd.tosnap = tosnap;
357 
358 	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
359 		nvlist_free(sd.fss);
360 		if (avlp != NULL)
361 			*avlp = NULL;
362 		*nvlp = NULL;
363 		return (error);
364 	}
365 
366 	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
367 		nvlist_free(sd.fss);
368 		*nvlp = NULL;
369 		return (EZFS_NOMEM);
370 	}
371 
372 	*nvlp = sd.fss;
373 	return (0);
374 }
375 
376 /*
377  * Routines for dealing with the sorted snapshot functionality
378  */
379 typedef struct zfs_node {
380 	zfs_handle_t	*zn_handle;
381 	avl_node_t	zn_avlnode;
382 } zfs_node_t;
383 
384 static int
385 zfs_sort_snaps(zfs_handle_t *zhp, void *data)
386 {
387 	avl_tree_t *avl = data;
388 	zfs_node_t *node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t));
389 
390 	node->zn_handle = zhp;
391 	avl_add(avl, node);
392 	return (0);
393 }
394 
395 /* ARGSUSED */
396 static int
397 zfs_snapshot_compare(const void *larg, const void *rarg)
398 {
399 	zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
400 	zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
401 	uint64_t lcreate, rcreate;
402 
403 	/*
404 	 * Sort them according to creation time.  We use the hidden
405 	 * CREATETXG property to get an absolute ordering of snapshots.
406 	 */
407 	lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG);
408 	rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG);
409 
410 	if (lcreate < rcreate)
411 		return (-1);
412 	else if (lcreate > rcreate)
413 		return (+1);
414 	else
415 		return (0);
416 }
417 
418 static int
419 zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data)
420 {
421 	int ret = 0;
422 	zfs_node_t *node;
423 	avl_tree_t avl;
424 	void *cookie = NULL;
425 
426 	avl_create(&avl, zfs_snapshot_compare,
427 	    sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode));
428 
429 	ret = zfs_iter_snapshots(zhp, zfs_sort_snaps, &avl);
430 
431 	for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node))
432 		ret |= callback(node->zn_handle, data);
433 
434 	while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL)
435 		free(node);
436 
437 	avl_destroy(&avl);
438 
439 	return (ret);
440 }
441 
442 /*
443  * Routines specific to "zfs send"
444  */
445 typedef struct send_dump_data {
446 	/* these are all just the short snapname (the part after the @) */
447 	const char *fromsnap;
448 	const char *tosnap;
449 	char lastsnap[ZFS_MAXNAMELEN];
450 	boolean_t seenfrom, seento, replicate, doall, fromorigin;
451 	boolean_t verbose;
452 	int outfd;
453 	boolean_t err;
454 	nvlist_t *fss;
455 	avl_tree_t *fsavl;
456 } send_dump_data_t;
457 
458 /*
459  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
460  * NULL) to the file descriptor specified by outfd.
461  */
462 static int
463 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin,
464     int outfd)
465 {
466 	zfs_cmd_t zc = { 0 };
467 	libzfs_handle_t *hdl = zhp->zfs_hdl;
468 
469 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
470 	assert(fromsnap == NULL || fromsnap[0] == '\0' || !fromorigin);
471 
472 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
473 	if (fromsnap)
474 		(void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_value));
475 	zc.zc_cookie = outfd;
476 	zc.zc_obj = fromorigin;
477 
478 	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) {
479 		char errbuf[1024];
480 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
481 		    "warning: cannot send '%s'"), zhp->zfs_name);
482 
483 		switch (errno) {
484 
485 		case EXDEV:
486 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
487 			    "not an earlier snapshot from the same fs"));
488 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
489 
490 		case ENOENT:
491 			if (zfs_dataset_exists(hdl, zc.zc_name,
492 			    ZFS_TYPE_SNAPSHOT)) {
493 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
494 				    "incremental source (@%s) does not exist"),
495 				    zc.zc_value);
496 			}
497 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
498 
499 		case EDQUOT:
500 		case EFBIG:
501 		case EIO:
502 		case ENOLINK:
503 		case ENOSPC:
504 		case ENOSTR:
505 		case ENXIO:
506 		case EPIPE:
507 		case ERANGE:
508 		case EFAULT:
509 		case EROFS:
510 			zfs_error_aux(hdl, strerror(errno));
511 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
512 
513 		default:
514 			return (zfs_standard_error(hdl, errno, errbuf));
515 		}
516 	}
517 
518 	return (0);
519 }
520 
521 static int
522 dump_snapshot(zfs_handle_t *zhp, void *arg)
523 {
524 	send_dump_data_t *sdd = arg;
525 	const char *thissnap;
526 	int err;
527 
528 	thissnap = strchr(zhp->zfs_name, '@') + 1;
529 
530 	if (sdd->fromsnap && !sdd->seenfrom &&
531 	    strcmp(sdd->fromsnap, thissnap) == 0) {
532 		sdd->seenfrom = B_TRUE;
533 		(void) strcpy(sdd->lastsnap, thissnap);
534 		zfs_close(zhp);
535 		return (0);
536 	}
537 
538 	if (sdd->seento || !sdd->seenfrom) {
539 		zfs_close(zhp);
540 		return (0);
541 	}
542 
543 	/* send it */
544 	if (sdd->verbose) {
545 		(void) fprintf(stderr, "sending from @%s to %s\n",
546 		    sdd->lastsnap, zhp->zfs_name);
547 	}
548 
549 	err = dump_ioctl(zhp, sdd->lastsnap,
550 	    sdd->lastsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
551 	    sdd->outfd);
552 
553 	if (!sdd->seento && strcmp(sdd->tosnap, thissnap) == 0)
554 		sdd->seento = B_TRUE;
555 
556 	(void) strcpy(sdd->lastsnap, thissnap);
557 	zfs_close(zhp);
558 	return (err);
559 }
560 
561 static int
562 dump_filesystem(zfs_handle_t *zhp, void *arg)
563 {
564 	int rv = 0;
565 	send_dump_data_t *sdd = arg;
566 	boolean_t missingfrom = B_FALSE;
567 	zfs_cmd_t zc = { 0 };
568 
569 	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
570 	    zhp->zfs_name, sdd->tosnap);
571 	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
572 		(void) fprintf(stderr, "WARNING: "
573 		    "could not send %s@%s: does not exist\n",
574 		    zhp->zfs_name, sdd->tosnap);
575 		sdd->err = B_TRUE;
576 		return (0);
577 	}
578 
579 	if (sdd->replicate && sdd->fromsnap) {
580 		/*
581 		 * If this fs does not have fromsnap, and we're doing
582 		 * recursive, we need to send a full stream from the
583 		 * beginning (or an incremental from the origin if this
584 		 * is a clone).  If we're doing non-recursive, then let
585 		 * them get the error.
586 		 */
587 		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
588 		    zhp->zfs_name, sdd->fromsnap);
589 		if (ioctl(zhp->zfs_hdl->libzfs_fd,
590 		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
591 			missingfrom = B_TRUE;
592 		}
593 	}
594 
595 	if (sdd->doall) {
596 		sdd->seenfrom = sdd->seento = sdd->lastsnap[0] = 0;
597 		if (sdd->fromsnap == NULL || missingfrom)
598 			sdd->seenfrom = B_TRUE;
599 
600 		rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
601 		if (!sdd->seenfrom) {
602 			(void) fprintf(stderr,
603 			    "WARNING: could not send %s@%s:\n"
604 			    "incremental source (%s@%s) does not exist\n",
605 			    zhp->zfs_name, sdd->tosnap,
606 			    zhp->zfs_name, sdd->fromsnap);
607 			sdd->err = B_TRUE;
608 		} else if (!sdd->seento) {
609 			if (sdd->fromsnap) {
610 				(void) fprintf(stderr,
611 				    "WARNING: could not send %s@%s:\n"
612 				    "incremental source (%s@%s) "
613 				    "is not earlier than it\n",
614 				    zhp->zfs_name, sdd->tosnap,
615 				    zhp->zfs_name, sdd->fromsnap);
616 			} else {
617 				(void) fprintf(stderr, "WARNING: "
618 				    "could not send %s@%s: does not exist\n",
619 				    zhp->zfs_name, sdd->tosnap);
620 			}
621 			sdd->err = B_TRUE;
622 		}
623 	} else {
624 		zfs_handle_t *snapzhp;
625 		char snapname[ZFS_MAXNAMELEN];
626 
627 		(void) snprintf(snapname, sizeof (snapname), "%s@%s",
628 		    zfs_get_name(zhp), sdd->tosnap);
629 		snapzhp = zfs_open(zhp->zfs_hdl, snapname, ZFS_TYPE_SNAPSHOT);
630 		if (snapzhp == NULL) {
631 			rv = -1;
632 		} else {
633 			rv = dump_ioctl(snapzhp,
634 			    missingfrom ? NULL : sdd->fromsnap,
635 			    sdd->fromorigin || missingfrom,
636 			    sdd->outfd);
637 			sdd->seento = B_TRUE;
638 			zfs_close(snapzhp);
639 		}
640 	}
641 
642 	return (rv);
643 }
644 
645 static int
646 dump_filesystems(zfs_handle_t *rzhp, void *arg)
647 {
648 	send_dump_data_t *sdd = arg;
649 	nvpair_t *fspair;
650 	boolean_t needagain, progress;
651 
652 	if (!sdd->replicate)
653 		return (dump_filesystem(rzhp, sdd));
654 
655 again:
656 	needagain = progress = B_FALSE;
657 	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
658 	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
659 		nvlist_t *fslist;
660 		char *fsname;
661 		zfs_handle_t *zhp;
662 		int err;
663 		uint64_t origin_guid = 0;
664 		nvlist_t *origin_nv;
665 
666 		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
667 		if (nvlist_lookup_boolean(fslist, "sent") == 0)
668 			continue;
669 
670 		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
671 		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
672 
673 		origin_nv = fsavl_find(sdd->fsavl, origin_guid, NULL);
674 		if (origin_nv &&
675 		    nvlist_lookup_boolean(origin_nv, "sent") == ENOENT) {
676 			/*
677 			 * origin has not been sent yet;
678 			 * skip this clone.
679 			 */
680 			needagain = B_TRUE;
681 			continue;
682 		}
683 
684 		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
685 		if (zhp == NULL)
686 			return (-1);
687 		err = dump_filesystem(zhp, sdd);
688 		VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
689 		progress = B_TRUE;
690 		zfs_close(zhp);
691 		if (err)
692 			return (err);
693 	}
694 	if (needagain) {
695 		assert(progress);
696 		goto again;
697 	}
698 	return (0);
699 }
700 
701 /*
702  * Generate a send stream for the dataset identified by the argument zhp.
703  *
704  * The content of the send stream is the snapshot identified by
705  * 'tosnap'.  Incremental streams are requested in two ways:
706  *     - from the snapshot identified by "fromsnap" (if non-null) or
707  *     - from the origin of the dataset identified by zhp, which must
708  *	 be a clone.  In this case, "fromsnap" is null and "fromorigin"
709  *	 is TRUE.
710  *
711  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
712  * uses a special header (with a version field of DMU_BACKUP_HEADER_VERSION)
713  * if "replicate" is set.  If "doall" is set, dump all the intermediate
714  * snapshots. The DMU_BACKUP_HEADER_VERSION header is used in the "doall"
715  * case too.
716  */
717 int
718 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
719     boolean_t replicate, boolean_t doall, boolean_t fromorigin,
720     boolean_t verbose, int outfd)
721 {
722 	char errbuf[1024];
723 	send_dump_data_t sdd = { 0 };
724 	int err;
725 	nvlist_t *fss = NULL;
726 	avl_tree_t *fsavl = NULL;
727 
728 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
729 	    "cannot send '%s'"), zhp->zfs_name);
730 
731 	if (fromsnap && fromsnap[0] == '\0') {
732 		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
733 		    "zero-length incremental source"));
734 		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
735 	}
736 
737 	if (replicate || doall) {
738 		dmu_replay_record_t drr = { 0 };
739 		char *packbuf = NULL;
740 		size_t buflen = 0;
741 		zio_cksum_t zc = { 0 };
742 
743 		assert(fromsnap || doall);
744 
745 		if (replicate) {
746 			nvlist_t *hdrnv;
747 
748 			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
749 			if (fromsnap) {
750 				VERIFY(0 == nvlist_add_string(hdrnv,
751 				    "fromsnap", fromsnap));
752 			}
753 			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
754 
755 			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
756 			    fromsnap, tosnap, &fss, &fsavl);
757 			if (err)
758 				return (err);
759 			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
760 			err = nvlist_pack(hdrnv, &packbuf, &buflen,
761 			    NV_ENCODE_XDR, 0);
762 			nvlist_free(hdrnv);
763 			if (err) {
764 				fsavl_destroy(fsavl);
765 				nvlist_free(fss);
766 				return (zfs_standard_error(zhp->zfs_hdl,
767 				    err, errbuf));
768 			}
769 		}
770 
771 		/* write first begin record */
772 		drr.drr_type = DRR_BEGIN;
773 		drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
774 		drr.drr_u.drr_begin.drr_version = DMU_BACKUP_HEADER_VERSION;
775 		(void) snprintf(drr.drr_u.drr_begin.drr_toname,
776 		    sizeof (drr.drr_u.drr_begin.drr_toname),
777 		    "%s@%s", zhp->zfs_name, tosnap);
778 		drr.drr_payloadlen = buflen;
779 		fletcher_4_incremental_native(&drr, sizeof (drr), &zc);
780 		err = write(outfd, &drr, sizeof (drr));
781 
782 		/* write header nvlist */
783 		if (err != -1) {
784 			fletcher_4_incremental_native(packbuf, buflen, &zc);
785 			err = write(outfd, packbuf, buflen);
786 		}
787 		free(packbuf);
788 		if (err == -1) {
789 			fsavl_destroy(fsavl);
790 			nvlist_free(fss);
791 			return (zfs_standard_error(zhp->zfs_hdl,
792 			    errno, errbuf));
793 		}
794 
795 		/* write end record */
796 		if (err != -1) {
797 			bzero(&drr, sizeof (drr));
798 			drr.drr_type = DRR_END;
799 			drr.drr_u.drr_end.drr_checksum = zc;
800 			err = write(outfd, &drr, sizeof (drr));
801 			if (err == -1) {
802 				fsavl_destroy(fsavl);
803 				nvlist_free(fss);
804 				return (zfs_standard_error(zhp->zfs_hdl,
805 				    errno, errbuf));
806 			}
807 		}
808 	}
809 
810 	/* dump each stream */
811 	sdd.fromsnap = fromsnap;
812 	sdd.tosnap = tosnap;
813 	sdd.outfd = outfd;
814 	sdd.replicate = replicate;
815 	sdd.doall = doall;
816 	sdd.fromorigin = fromorigin;
817 	sdd.fss = fss;
818 	sdd.fsavl = fsavl;
819 	sdd.verbose = verbose;
820 	err = dump_filesystems(zhp, &sdd);
821 	fsavl_destroy(fsavl);
822 	nvlist_free(fss);
823 
824 	if (replicate || doall) {
825 		/*
826 		 * write final end record.  NB: want to do this even if
827 		 * there was some error, because it might not be totally
828 		 * failed.
829 		 */
830 		dmu_replay_record_t drr = { 0 };
831 		drr.drr_type = DRR_END;
832 		if (write(outfd, &drr, sizeof (drr)) == -1) {
833 			return (zfs_standard_error(zhp->zfs_hdl,
834 			    errno, errbuf));
835 		}
836 	}
837 
838 	return (err || sdd.err);
839 }
840 
841 /*
842  * Routines specific to "zfs recv"
843  */
844 
845 static int
846 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
847     boolean_t byteswap, zio_cksum_t *zc)
848 {
849 	char *cp = buf;
850 	int rv;
851 	int len = ilen;
852 
853 	do {
854 		rv = read(fd, cp, len);
855 		cp += rv;
856 		len -= rv;
857 	} while (rv > 0);
858 
859 	if (rv < 0 || len != 0) {
860 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
861 		    "failed to read from stream"));
862 		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
863 		    "cannot receive")));
864 	}
865 
866 	if (zc) {
867 		if (byteswap)
868 			fletcher_4_incremental_byteswap(buf, ilen, zc);
869 		else
870 			fletcher_4_incremental_native(buf, ilen, zc);
871 	}
872 	return (0);
873 }
874 
875 static int
876 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
877     boolean_t byteswap, zio_cksum_t *zc)
878 {
879 	char *buf;
880 	int err;
881 
882 	buf = zfs_alloc(hdl, len);
883 	if (buf == NULL)
884 		return (ENOMEM);
885 
886 	err = recv_read(hdl, fd, buf, len, byteswap, zc);
887 	if (err != 0) {
888 		free(buf);
889 		return (err);
890 	}
891 
892 	err = nvlist_unpack(buf, len, nvp, 0);
893 	free(buf);
894 	if (err != 0) {
895 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
896 		    "stream (malformed nvlist)"));
897 		return (EINVAL);
898 	}
899 	return (0);
900 }
901 
902 static int
903 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
904     int baselen, char *newname, recvflags_t flags)
905 {
906 	static int seq;
907 	zfs_cmd_t zc = { 0 };
908 	int err;
909 	prop_changelist_t *clp;
910 	zfs_handle_t *zhp;
911 
912 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
913 	if (zhp == NULL)
914 		return (-1);
915 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
916 	    flags.force ? MS_FORCE : 0);
917 	zfs_close(zhp);
918 	if (clp == NULL)
919 		return (-1);
920 	err = changelist_prefix(clp);
921 	if (err)
922 		return (err);
923 
924 	zc.zc_objset_type = DMU_OST_ZFS;
925 	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
926 
927 	if (tryname) {
928 		(void) strcpy(newname, tryname);
929 
930 		(void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
931 
932 		if (flags.verbose) {
933 			(void) printf("attempting rename %s to %s\n",
934 			    zc.zc_name, zc.zc_value);
935 		}
936 		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
937 		if (err == 0)
938 			changelist_rename(clp, name, tryname);
939 	} else {
940 		err = ENOENT;
941 	}
942 
943 	if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) {
944 		seq++;
945 
946 		(void) strncpy(newname, name, baselen);
947 		(void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen,
948 		    "recv-%u-%u", getpid(), seq);
949 		(void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
950 
951 		if (flags.verbose) {
952 			(void) printf("failed - trying rename %s to %s\n",
953 			    zc.zc_name, zc.zc_value);
954 		}
955 		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
956 		if (err == 0)
957 			changelist_rename(clp, name, newname);
958 		if (err && flags.verbose) {
959 			(void) printf("failed (%u) - "
960 			    "will try again on next pass\n", errno);
961 		}
962 		err = EAGAIN;
963 	} else if (flags.verbose) {
964 		if (err == 0)
965 			(void) printf("success\n");
966 		else
967 			(void) printf("failed (%u)\n", errno);
968 	}
969 
970 	(void) changelist_postfix(clp);
971 	changelist_free(clp);
972 
973 	return (err);
974 }
975 
976 static int
977 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
978     char *newname, recvflags_t flags)
979 {
980 	zfs_cmd_t zc = { 0 };
981 	int err = 0;
982 	prop_changelist_t *clp;
983 	zfs_handle_t *zhp;
984 	boolean_t defer = B_FALSE;
985 	int spa_version;
986 
987 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
988 	if (zhp == NULL)
989 		return (-1);
990 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
991 	    flags.force ? MS_FORCE : 0);
992 	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
993 	    zfs_spa_version(zhp, &spa_version) == 0 &&
994 	    spa_version >= SPA_VERSION_USERREFS)
995 		defer = B_TRUE;
996 	zfs_close(zhp);
997 	if (clp == NULL)
998 		return (-1);
999 	err = changelist_prefix(clp);
1000 	if (err)
1001 		return (err);
1002 
1003 	zc.zc_objset_type = DMU_OST_ZFS;
1004 	zc.zc_defer_destroy = defer;
1005 	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1006 
1007 	if (flags.verbose)
1008 		(void) printf("attempting destroy %s\n", zc.zc_name);
1009 	err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
1010 	if (err == 0) {
1011 		if (flags.verbose)
1012 			(void) printf("success\n");
1013 		changelist_remove(clp, zc.zc_name);
1014 	}
1015 
1016 	(void) changelist_postfix(clp);
1017 	changelist_free(clp);
1018 
1019 	/*
1020 	 * Deferred destroy should always succeed. Since we can't tell
1021 	 * if it destroyed the dataset or just marked it for deferred
1022 	 * destroy, always do the rename just in case.
1023 	 */
1024 	if (err != 0 || defer)
1025 		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
1026 
1027 	return (err);
1028 }
1029 
1030 typedef struct guid_to_name_data {
1031 	uint64_t guid;
1032 	char *name;
1033 } guid_to_name_data_t;
1034 
1035 static int
1036 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
1037 {
1038 	guid_to_name_data_t *gtnd = arg;
1039 	int err;
1040 
1041 	if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
1042 		(void) strcpy(gtnd->name, zhp->zfs_name);
1043 		return (EEXIST);
1044 	}
1045 	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
1046 	zfs_close(zhp);
1047 	return (err);
1048 }
1049 
1050 static int
1051 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
1052     char *name)
1053 {
1054 	/* exhaustive search all local snapshots */
1055 	guid_to_name_data_t gtnd;
1056 	int err = 0;
1057 	zfs_handle_t *zhp;
1058 	char *cp;
1059 
1060 	gtnd.guid = guid;
1061 	gtnd.name = name;
1062 
1063 	if (strchr(parent, '@') == NULL) {
1064 		zhp = make_dataset_handle(hdl, parent);
1065 		if (zhp != NULL) {
1066 			err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1067 			zfs_close(zhp);
1068 			if (err == EEXIST)
1069 				return (0);
1070 		}
1071 	}
1072 
1073 	cp = strchr(parent, '/');
1074 	if (cp)
1075 		*cp = '\0';
1076 	zhp = make_dataset_handle(hdl, parent);
1077 	if (cp)
1078 		*cp = '/';
1079 
1080 	if (zhp) {
1081 		err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1082 		zfs_close(zhp);
1083 	}
1084 
1085 	return (err == EEXIST ? 0 : ENOENT);
1086 
1087 }
1088 
1089 /*
1090  * Return true if dataset guid1 is created before guid2.
1091  */
1092 static int
1093 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
1094     uint64_t guid1, uint64_t guid2)
1095 {
1096 	nvlist_t *nvfs;
1097 	char *fsname, *snapname;
1098 	char buf[ZFS_MAXNAMELEN];
1099 	int rv;
1100 	zfs_node_t zn1, zn2;
1101 
1102 	if (guid2 == 0)
1103 		return (0);
1104 	if (guid1 == 0)
1105 		return (1);
1106 
1107 	nvfs = fsavl_find(avl, guid1, &snapname);
1108 	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1109 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1110 	zn1.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1111 	if (zn1.zn_handle == NULL)
1112 		return (-1);
1113 
1114 	nvfs = fsavl_find(avl, guid2, &snapname);
1115 	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1116 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1117 	zn2.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1118 	if (zn2.zn_handle == NULL) {
1119 		zfs_close(zn2.zn_handle);
1120 		return (-1);
1121 	}
1122 
1123 	rv = (zfs_snapshot_compare(&zn1, &zn2) == -1);
1124 
1125 	zfs_close(zn1.zn_handle);
1126 	zfs_close(zn2.zn_handle);
1127 
1128 	return (rv);
1129 }
1130 
1131 static int
1132 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
1133     recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl)
1134 {
1135 	nvlist_t *local_nv;
1136 	avl_tree_t *local_avl;
1137 	nvpair_t *fselem, *nextfselem;
1138 	char *tosnap, *fromsnap;
1139 	char newname[ZFS_MAXNAMELEN];
1140 	int error;
1141 	boolean_t needagain, progress;
1142 
1143 	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
1144 	VERIFY(0 == nvlist_lookup_string(stream_nv, "tosnap", &tosnap));
1145 
1146 	if (flags.dryrun)
1147 		return (0);
1148 
1149 again:
1150 	needagain = progress = B_FALSE;
1151 
1152 	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
1153 	    &local_nv, &local_avl)) != 0)
1154 		return (error);
1155 
1156 	/*
1157 	 * Process deletes and renames
1158 	 */
1159 	for (fselem = nvlist_next_nvpair(local_nv, NULL);
1160 	    fselem; fselem = nextfselem) {
1161 		nvlist_t *nvfs, *snaps;
1162 		nvlist_t *stream_nvfs = NULL;
1163 		nvpair_t *snapelem, *nextsnapelem;
1164 		uint64_t fromguid = 0;
1165 		uint64_t originguid = 0;
1166 		uint64_t stream_originguid = 0;
1167 		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
1168 		char *fsname, *stream_fsname;
1169 
1170 		nextfselem = nvlist_next_nvpair(local_nv, fselem);
1171 
1172 		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
1173 		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
1174 		VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1175 		VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
1176 		    &parent_fromsnap_guid));
1177 		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
1178 
1179 		/*
1180 		 * First find the stream's fs, so we can check for
1181 		 * a different origin (due to "zfs promote")
1182 		 */
1183 		for (snapelem = nvlist_next_nvpair(snaps, NULL);
1184 		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
1185 			uint64_t thisguid;
1186 
1187 			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
1188 			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
1189 
1190 			if (stream_nvfs != NULL)
1191 				break;
1192 		}
1193 
1194 		/* check for promote */
1195 		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
1196 		    &stream_originguid);
1197 		if (stream_nvfs && originguid != stream_originguid) {
1198 			switch (created_before(hdl, local_avl,
1199 			    stream_originguid, originguid)) {
1200 			case 1: {
1201 				/* promote it! */
1202 				zfs_cmd_t zc = { 0 };
1203 				nvlist_t *origin_nvfs;
1204 				char *origin_fsname;
1205 
1206 				if (flags.verbose)
1207 					(void) printf("promoting %s\n", fsname);
1208 
1209 				origin_nvfs = fsavl_find(local_avl, originguid,
1210 				    NULL);
1211 				VERIFY(0 == nvlist_lookup_string(origin_nvfs,
1212 				    "name", &origin_fsname));
1213 				(void) strlcpy(zc.zc_value, origin_fsname,
1214 				    sizeof (zc.zc_value));
1215 				(void) strlcpy(zc.zc_name, fsname,
1216 				    sizeof (zc.zc_name));
1217 				error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
1218 				if (error == 0)
1219 					progress = B_TRUE;
1220 				break;
1221 			}
1222 			default:
1223 				break;
1224 			case -1:
1225 				fsavl_destroy(local_avl);
1226 				nvlist_free(local_nv);
1227 				return (-1);
1228 			}
1229 			/*
1230 			 * We had/have the wrong origin, therefore our
1231 			 * list of snapshots is wrong.  Need to handle
1232 			 * them on the next pass.
1233 			 */
1234 			needagain = B_TRUE;
1235 			continue;
1236 		}
1237 
1238 		for (snapelem = nvlist_next_nvpair(snaps, NULL);
1239 		    snapelem; snapelem = nextsnapelem) {
1240 			uint64_t thisguid;
1241 			char *stream_snapname;
1242 			nvlist_t *found, *props;
1243 
1244 			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
1245 
1246 			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
1247 			found = fsavl_find(stream_avl, thisguid,
1248 			    &stream_snapname);
1249 
1250 			/* check for delete */
1251 			if (found == NULL) {
1252 				char name[ZFS_MAXNAMELEN];
1253 
1254 				if (!flags.force)
1255 					continue;
1256 
1257 				(void) snprintf(name, sizeof (name), "%s@%s",
1258 				    fsname, nvpair_name(snapelem));
1259 
1260 				error = recv_destroy(hdl, name,
1261 				    strlen(fsname)+1, newname, flags);
1262 				if (error)
1263 					needagain = B_TRUE;
1264 				else
1265 					progress = B_TRUE;
1266 				continue;
1267 			}
1268 
1269 			stream_nvfs = found;
1270 
1271 			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
1272 			    &props) && 0 == nvlist_lookup_nvlist(props,
1273 			    stream_snapname, &props)) {
1274 				zfs_cmd_t zc = { 0 };
1275 
1276 				zc.zc_cookie = B_TRUE; /* clear current props */
1277 				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
1278 				    "%s@%s", fsname, nvpair_name(snapelem));
1279 				if (zcmd_write_src_nvlist(hdl, &zc,
1280 				    props) == 0) {
1281 					(void) zfs_ioctl(hdl,
1282 					    ZFS_IOC_SET_PROP, &zc);
1283 					zcmd_free_nvlists(&zc);
1284 				}
1285 			}
1286 
1287 			/* check for different snapname */
1288 			if (strcmp(nvpair_name(snapelem),
1289 			    stream_snapname) != 0) {
1290 				char name[ZFS_MAXNAMELEN];
1291 				char tryname[ZFS_MAXNAMELEN];
1292 
1293 				(void) snprintf(name, sizeof (name), "%s@%s",
1294 				    fsname, nvpair_name(snapelem));
1295 				(void) snprintf(tryname, sizeof (name), "%s@%s",
1296 				    fsname, stream_snapname);
1297 
1298 				error = recv_rename(hdl, name, tryname,
1299 				    strlen(fsname)+1, newname, flags);
1300 				if (error)
1301 					needagain = B_TRUE;
1302 				else
1303 					progress = B_TRUE;
1304 			}
1305 
1306 			if (strcmp(stream_snapname, fromsnap) == 0)
1307 				fromguid = thisguid;
1308 		}
1309 
1310 		/* check for delete */
1311 		if (stream_nvfs == NULL) {
1312 			if (!flags.force)
1313 				continue;
1314 
1315 			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
1316 			    newname, flags);
1317 			if (error)
1318 				needagain = B_TRUE;
1319 			else
1320 				progress = B_TRUE;
1321 			continue;
1322 		}
1323 
1324 		if (fromguid == 0 && flags.verbose) {
1325 			(void) printf("local fs %s does not have fromsnap "
1326 			    "(%s in stream); must have been deleted locally; "
1327 			    "ignoring\n", fsname, fromsnap);
1328 			continue;
1329 		}
1330 
1331 		VERIFY(0 == nvlist_lookup_string(stream_nvfs,
1332 		    "name", &stream_fsname));
1333 		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
1334 		    "parentfromsnap", &stream_parent_fromsnap_guid));
1335 
1336 		/* check for rename */
1337 		if ((stream_parent_fromsnap_guid != 0 &&
1338 		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
1339 		    strcmp(strrchr(fsname, '/'),
1340 		    strrchr(stream_fsname, '/')) != 0) {
1341 			nvlist_t *parent;
1342 			char tryname[ZFS_MAXNAMELEN];
1343 
1344 			parent = fsavl_find(local_avl,
1345 			    stream_parent_fromsnap_guid, NULL);
1346 			/*
1347 			 * NB: parent might not be found if we used the
1348 			 * tosnap for stream_parent_fromsnap_guid,
1349 			 * because the parent is a newly-created fs;
1350 			 * we'll be able to rename it after we recv the
1351 			 * new fs.
1352 			 */
1353 			if (parent != NULL) {
1354 				char *pname;
1355 
1356 				VERIFY(0 == nvlist_lookup_string(parent, "name",
1357 				    &pname));
1358 				(void) snprintf(tryname, sizeof (tryname),
1359 				    "%s%s", pname, strrchr(stream_fsname, '/'));
1360 			} else {
1361 				tryname[0] = '\0';
1362 				if (flags.verbose) {
1363 					(void) printf("local fs %s new parent "
1364 					    "not found\n", fsname);
1365 				}
1366 			}
1367 
1368 			error = recv_rename(hdl, fsname, tryname,
1369 			    strlen(tofs)+1, newname, flags);
1370 			if (error)
1371 				needagain = B_TRUE;
1372 			else
1373 				progress = B_TRUE;
1374 		}
1375 	}
1376 
1377 	fsavl_destroy(local_avl);
1378 	nvlist_free(local_nv);
1379 
1380 	if (needagain && progress) {
1381 		/* do another pass to fix up temporary names */
1382 		if (flags.verbose)
1383 			(void) printf("another pass:\n");
1384 		goto again;
1385 	}
1386 
1387 	return (needagain);
1388 }
1389 
1390 static int
1391 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
1392     recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
1393     char **top_zfs)
1394 {
1395 	nvlist_t *stream_nv = NULL;
1396 	avl_tree_t *stream_avl = NULL;
1397 	char *fromsnap = NULL;
1398 	char tofs[ZFS_MAXNAMELEN];
1399 	char errbuf[1024];
1400 	dmu_replay_record_t drre;
1401 	int error;
1402 	boolean_t anyerr = B_FALSE;
1403 	boolean_t softerr = B_FALSE;
1404 
1405 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1406 	    "cannot receive"));
1407 
1408 	if (strchr(destname, '@')) {
1409 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1410 		    "can not specify snapshot name for multi-snapshot stream"));
1411 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
1412 	}
1413 
1414 	assert(drr->drr_type == DRR_BEGIN);
1415 	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
1416 	assert(drr->drr_u.drr_begin.drr_version == DMU_BACKUP_HEADER_VERSION);
1417 
1418 	/*
1419 	 * Read in the nvlist from the stream.
1420 	 */
1421 	if (drr->drr_payloadlen != 0) {
1422 		if (!flags.isprefix) {
1423 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1424 			    "must use -d to receive replication "
1425 			    "(send -R) stream"));
1426 			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
1427 		}
1428 
1429 		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
1430 		    &stream_nv, flags.byteswap, zc);
1431 		if (error) {
1432 			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1433 			goto out;
1434 		}
1435 	}
1436 
1437 	/*
1438 	 * Read in the end record and verify checksum.
1439 	 */
1440 	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
1441 	    flags.byteswap, NULL)))
1442 		goto out;
1443 	if (flags.byteswap) {
1444 		drre.drr_type = BSWAP_32(drre.drr_type);
1445 		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
1446 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
1447 		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
1448 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
1449 		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
1450 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
1451 		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
1452 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
1453 	}
1454 	if (drre.drr_type != DRR_END) {
1455 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1456 		goto out;
1457 	}
1458 	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
1459 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1460 		    "incorrect header checksum"));
1461 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1462 		goto out;
1463 	}
1464 
1465 	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
1466 
1467 	if (drr->drr_payloadlen != 0) {
1468 		nvlist_t *stream_fss;
1469 
1470 		VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
1471 		    &stream_fss));
1472 		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
1473 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1474 			    "couldn't allocate avl tree"));
1475 			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
1476 			goto out;
1477 		}
1478 
1479 		if (fromsnap != NULL) {
1480 			(void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
1481 			if (flags.isprefix) {
1482 				int i = strcspn(drr->drr_u.drr_begin.drr_toname,
1483 				    "/@");
1484 				/* zfs_receive_one() will create_parents() */
1485 				(void) strlcat(tofs,
1486 				    &drr->drr_u.drr_begin.drr_toname[i],
1487 				    ZFS_MAXNAMELEN);
1488 				*strchr(tofs, '@') = '\0';
1489 			}
1490 			softerr = recv_incremental_replication(hdl, tofs,
1491 			    flags, stream_nv, stream_avl);
1492 		}
1493 	}
1494 
1495 
1496 	/* Finally, receive each contained stream */
1497 	do {
1498 		/*
1499 		 * we should figure out if it has a recoverable
1500 		 * error, in which case do a recv_skip() and drive on.
1501 		 * Note, if we fail due to already having this guid,
1502 		 * zfs_receive_one() will take care of it (ie,
1503 		 * recv_skip() and return 0).
1504 		 */
1505 		error = zfs_receive_impl(hdl, destname, flags, fd,
1506 		    stream_avl, top_zfs);
1507 		if (error == ENODATA) {
1508 			error = 0;
1509 			break;
1510 		}
1511 		anyerr |= error;
1512 	} while (error == 0);
1513 
1514 	if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
1515 		/*
1516 		 * Now that we have the fs's they sent us, try the
1517 		 * renames again.
1518 		 */
1519 		softerr = recv_incremental_replication(hdl, tofs, flags,
1520 		    stream_nv, stream_avl);
1521 	}
1522 
1523 out:
1524 	fsavl_destroy(stream_avl);
1525 	if (stream_nv)
1526 		nvlist_free(stream_nv);
1527 	if (softerr)
1528 		error = -2;
1529 	if (anyerr)
1530 		error = -1;
1531 	return (error);
1532 }
1533 
1534 static int
1535 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
1536 {
1537 	dmu_replay_record_t *drr;
1538 	void *buf = malloc(1<<20);
1539 
1540 	/* XXX would be great to use lseek if possible... */
1541 	drr = buf;
1542 
1543 	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
1544 	    byteswap, NULL) == 0) {
1545 		if (byteswap)
1546 			drr->drr_type = BSWAP_32(drr->drr_type);
1547 
1548 		switch (drr->drr_type) {
1549 		case DRR_BEGIN:
1550 			/* NB: not to be used on v2 stream packages */
1551 			assert(drr->drr_payloadlen == 0);
1552 			break;
1553 
1554 		case DRR_END:
1555 			free(buf);
1556 			return (0);
1557 
1558 		case DRR_OBJECT:
1559 			if (byteswap) {
1560 				drr->drr_u.drr_object.drr_bonuslen =
1561 				    BSWAP_32(drr->drr_u.drr_object.
1562 				    drr_bonuslen);
1563 			}
1564 			(void) recv_read(hdl, fd, buf,
1565 			    P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
1566 			    B_FALSE, NULL);
1567 			break;
1568 
1569 		case DRR_WRITE:
1570 			if (byteswap) {
1571 				drr->drr_u.drr_write.drr_length =
1572 				    BSWAP_64(drr->drr_u.drr_write.drr_length);
1573 			}
1574 			(void) recv_read(hdl, fd, buf,
1575 			    drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
1576 			break;
1577 
1578 		case DRR_FREEOBJECTS:
1579 		case DRR_FREE:
1580 			break;
1581 
1582 		default:
1583 			assert(!"invalid record type");
1584 		}
1585 	}
1586 
1587 	free(buf);
1588 	return (-1);
1589 }
1590 
1591 /*
1592  * Restores a backup of tosnap from the file descriptor specified by infd.
1593  */
1594 static int
1595 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
1596     recvflags_t flags, dmu_replay_record_t *drr,
1597     dmu_replay_record_t *drr_noswap, avl_tree_t *stream_avl,
1598     char **top_zfs)
1599 {
1600 	zfs_cmd_t zc = { 0 };
1601 	time_t begin_time;
1602 	int ioctl_err, ioctl_errno, err, choplen;
1603 	char *cp;
1604 	struct drr_begin *drrb = &drr->drr_u.drr_begin;
1605 	char errbuf[1024];
1606 	char chopprefix[ZFS_MAXNAMELEN];
1607 	boolean_t newfs = B_FALSE;
1608 	boolean_t stream_wantsnewfs;
1609 	uint64_t parent_snapguid = 0;
1610 	prop_changelist_t *clp = NULL;
1611 	nvlist_t *snapprops_nvlist = NULL;
1612 
1613 	begin_time = time(NULL);
1614 
1615 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1616 	    "cannot receive"));
1617 
1618 	if (stream_avl != NULL) {
1619 		char *snapname;
1620 		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
1621 		    &snapname);
1622 		nvlist_t *props;
1623 		int ret;
1624 
1625 		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
1626 		    &parent_snapguid);
1627 		err = nvlist_lookup_nvlist(fs, "props", &props);
1628 		if (err)
1629 			VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
1630 
1631 		if (flags.canmountoff) {
1632 			VERIFY(0 == nvlist_add_uint64(props,
1633 			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
1634 		}
1635 		ret = zcmd_write_src_nvlist(hdl, &zc, props);
1636 		if (err)
1637 			nvlist_free(props);
1638 
1639 		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
1640 			VERIFY(0 == nvlist_lookup_nvlist(props,
1641 			    snapname, &snapprops_nvlist));
1642 		}
1643 
1644 		if (ret != 0)
1645 			return (-1);
1646 	}
1647 
1648 	/*
1649 	 * Determine how much of the snapshot name stored in the stream
1650 	 * we are going to tack on to the name they specified on the
1651 	 * command line, and how much we are going to chop off.
1652 	 *
1653 	 * If they specified a snapshot, chop the entire name stored in
1654 	 * the stream.
1655 	 */
1656 	(void) strcpy(chopprefix, drrb->drr_toname);
1657 	if (flags.isprefix) {
1658 		/*
1659 		 * They specified a fs with -d, we want to tack on
1660 		 * everything but the pool name stored in the stream
1661 		 */
1662 		if (strchr(tosnap, '@')) {
1663 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
1664 			    "argument - snapshot not allowed with -d"));
1665 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
1666 		}
1667 		cp = strchr(chopprefix, '/');
1668 		if (cp == NULL)
1669 			cp = strchr(chopprefix, '@');
1670 		*cp = '\0';
1671 	} else if (strchr(tosnap, '@') == NULL) {
1672 		/*
1673 		 * If they specified a filesystem without -d, we want to
1674 		 * tack on everything after the fs specified in the
1675 		 * first name from the stream.
1676 		 */
1677 		cp = strchr(chopprefix, '@');
1678 		*cp = '\0';
1679 	}
1680 	choplen = strlen(chopprefix);
1681 
1682 	/*
1683 	 * Determine name of destination snapshot, store in zc_value.
1684 	 */
1685 	(void) strcpy(zc.zc_value, tosnap);
1686 	(void) strncat(zc.zc_value, drrb->drr_toname+choplen,
1687 	    sizeof (zc.zc_value));
1688 	if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
1689 		zcmd_free_nvlists(&zc);
1690 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
1691 	}
1692 
1693 	/*
1694 	 * Determine the name of the origin snapshot, store in zc_string.
1695 	 */
1696 	if (drrb->drr_flags & DRR_FLAG_CLONE) {
1697 		if (guid_to_name(hdl, tosnap,
1698 		    drrb->drr_fromguid, zc.zc_string) != 0) {
1699 			zcmd_free_nvlists(&zc);
1700 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1701 			    "local origin for clone %s does not exist"),
1702 			    zc.zc_value);
1703 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1704 		}
1705 		if (flags.verbose)
1706 			(void) printf("found clone origin %s\n", zc.zc_string);
1707 	}
1708 
1709 	stream_wantsnewfs = (drrb->drr_fromguid == NULL ||
1710 	    (drrb->drr_flags & DRR_FLAG_CLONE));
1711 
1712 	if (stream_wantsnewfs) {
1713 		/*
1714 		 * if the parent fs does not exist, look for it based on
1715 		 * the parent snap GUID
1716 		 */
1717 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1718 		    "cannot receive new filesystem stream"));
1719 
1720 		(void) strcpy(zc.zc_name, zc.zc_value);
1721 		cp = strrchr(zc.zc_name, '/');
1722 		if (cp)
1723 			*cp = '\0';
1724 		if (cp &&
1725 		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
1726 			char suffix[ZFS_MAXNAMELEN];
1727 			(void) strcpy(suffix, strrchr(zc.zc_value, '/'));
1728 			if (guid_to_name(hdl, tosnap, parent_snapguid,
1729 			    zc.zc_value) == 0) {
1730 				*strchr(zc.zc_value, '@') = '\0';
1731 				(void) strcat(zc.zc_value, suffix);
1732 			}
1733 		}
1734 	} else {
1735 		/*
1736 		 * if the fs does not exist, look for it based on the
1737 		 * fromsnap GUID
1738 		 */
1739 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1740 		    "cannot receive incremental stream"));
1741 
1742 		(void) strcpy(zc.zc_name, zc.zc_value);
1743 		*strchr(zc.zc_name, '@') = '\0';
1744 
1745 		if (!zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
1746 			char snap[ZFS_MAXNAMELEN];
1747 			(void) strcpy(snap, strchr(zc.zc_value, '@'));
1748 			if (guid_to_name(hdl, tosnap, drrb->drr_fromguid,
1749 			    zc.zc_value) == 0) {
1750 				*strchr(zc.zc_value, '@') = '\0';
1751 				(void) strcat(zc.zc_value, snap);
1752 			}
1753 		}
1754 	}
1755 
1756 	(void) strcpy(zc.zc_name, zc.zc_value);
1757 	*strchr(zc.zc_name, '@') = '\0';
1758 
1759 	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
1760 		zfs_handle_t *zhp;
1761 		/*
1762 		 * Destination fs exists.  Therefore this should either
1763 		 * be an incremental, or the stream specifies a new fs
1764 		 * (full stream or clone) and they want us to blow it
1765 		 * away (and have therefore specified -F and removed any
1766 		 * snapshots).
1767 		 */
1768 
1769 		if (stream_wantsnewfs) {
1770 			if (!flags.force) {
1771 				zcmd_free_nvlists(&zc);
1772 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1773 				    "destination '%s' exists\n"
1774 				    "must specify -F to overwrite it"),
1775 				    zc.zc_name);
1776 				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
1777 			}
1778 			if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
1779 			    &zc) == 0) {
1780 				zcmd_free_nvlists(&zc);
1781 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1782 				    "destination has snapshots (eg. %s)\n"
1783 				    "must destroy them to overwrite it"),
1784 				    zc.zc_name);
1785 				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
1786 			}
1787 		}
1788 
1789 		if ((zhp = zfs_open(hdl, zc.zc_name,
1790 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
1791 			zcmd_free_nvlists(&zc);
1792 			return (-1);
1793 		}
1794 
1795 		if (stream_wantsnewfs &&
1796 		    zhp->zfs_dmustats.dds_origin[0]) {
1797 			zcmd_free_nvlists(&zc);
1798 			zfs_close(zhp);
1799 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1800 			    "destination '%s' is a clone\n"
1801 			    "must destroy it to overwrite it"),
1802 			    zc.zc_name);
1803 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
1804 		}
1805 
1806 		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
1807 		    stream_wantsnewfs) {
1808 			/* We can't do online recv in this case */
1809 			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
1810 			if (clp == NULL) {
1811 				zfs_close(zhp);
1812 				zcmd_free_nvlists(&zc);
1813 				return (-1);
1814 			}
1815 			if (changelist_prefix(clp) != 0) {
1816 				changelist_free(clp);
1817 				zfs_close(zhp);
1818 				zcmd_free_nvlists(&zc);
1819 				return (-1);
1820 			}
1821 		}
1822 		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME &&
1823 		    zvol_remove_link(hdl, zhp->zfs_name) != 0) {
1824 			zfs_close(zhp);
1825 			zcmd_free_nvlists(&zc);
1826 			return (-1);
1827 		}
1828 		zfs_close(zhp);
1829 	} else {
1830 		/*
1831 		 * Destination filesystem does not exist.  Therefore we better
1832 		 * be creating a new filesystem (either from a full backup, or
1833 		 * a clone).  It would therefore be invalid if the user
1834 		 * specified only the pool name (i.e. if the destination name
1835 		 * contained no slash character).
1836 		 */
1837 		if (!stream_wantsnewfs ||
1838 		    (cp = strrchr(zc.zc_name, '/')) == NULL) {
1839 			zcmd_free_nvlists(&zc);
1840 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1841 			    "destination '%s' does not exist"), zc.zc_name);
1842 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1843 		}
1844 
1845 		/*
1846 		 * Trim off the final dataset component so we perform the
1847 		 * recvbackup ioctl to the filesystems's parent.
1848 		 */
1849 		*cp = '\0';
1850 
1851 		if (flags.isprefix && !flags.dryrun &&
1852 		    create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
1853 			zcmd_free_nvlists(&zc);
1854 			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
1855 		}
1856 
1857 		newfs = B_TRUE;
1858 	}
1859 
1860 	zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
1861 	zc.zc_cookie = infd;
1862 	zc.zc_guid = flags.force;
1863 	if (flags.verbose) {
1864 		(void) printf("%s %s stream of %s into %s\n",
1865 		    flags.dryrun ? "would receive" : "receiving",
1866 		    drrb->drr_fromguid ? "incremental" : "full",
1867 		    drrb->drr_toname, zc.zc_value);
1868 		(void) fflush(stdout);
1869 	}
1870 
1871 	if (flags.dryrun) {
1872 		zcmd_free_nvlists(&zc);
1873 		return (recv_skip(hdl, infd, flags.byteswap));
1874 	}
1875 
1876 	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
1877 	ioctl_errno = errno;
1878 	zcmd_free_nvlists(&zc);
1879 
1880 	if (err == 0 && snapprops_nvlist) {
1881 		zfs_cmd_t zc2 = { 0 };
1882 
1883 		(void) strcpy(zc2.zc_name, zc.zc_value);
1884 		if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
1885 			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
1886 			zcmd_free_nvlists(&zc2);
1887 		}
1888 	}
1889 
1890 	if (err && (ioctl_errno == ENOENT || ioctl_errno == ENODEV)) {
1891 		/*
1892 		 * It may be that this snapshot already exists,
1893 		 * in which case we want to consume & ignore it
1894 		 * rather than failing.
1895 		 */
1896 		avl_tree_t *local_avl;
1897 		nvlist_t *local_nv, *fs;
1898 		char *cp = strchr(zc.zc_value, '@');
1899 
1900 		/*
1901 		 * XXX Do this faster by just iterating over snaps in
1902 		 * this fs.  Also if zc_value does not exist, we will
1903 		 * get a strange "does not exist" error message.
1904 		 */
1905 		*cp = '\0';
1906 		if (gather_nvlist(hdl, zc.zc_value, NULL, NULL,
1907 		    &local_nv, &local_avl) == 0) {
1908 			*cp = '@';
1909 			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
1910 			fsavl_destroy(local_avl);
1911 			nvlist_free(local_nv);
1912 
1913 			if (fs != NULL) {
1914 				if (flags.verbose) {
1915 					(void) printf("snap %s already exists; "
1916 					    "ignoring\n", zc.zc_value);
1917 				}
1918 				ioctl_err = recv_skip(hdl, infd,
1919 				    flags.byteswap);
1920 			}
1921 		}
1922 		*cp = '@';
1923 	}
1924 
1925 
1926 	if (ioctl_err != 0) {
1927 		switch (ioctl_errno) {
1928 		case ENODEV:
1929 			cp = strchr(zc.zc_value, '@');
1930 			*cp = '\0';
1931 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1932 			    "most recent snapshot of %s does not\n"
1933 			    "match incremental source"), zc.zc_value);
1934 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
1935 			*cp = '@';
1936 			break;
1937 		case ETXTBSY:
1938 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1939 			    "destination %s has been modified\n"
1940 			    "since most recent snapshot"), zc.zc_name);
1941 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
1942 			break;
1943 		case EEXIST:
1944 			cp = strchr(zc.zc_value, '@');
1945 			if (newfs) {
1946 				/* it's the containing fs that exists */
1947 				*cp = '\0';
1948 			}
1949 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1950 			    "destination already exists"));
1951 			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
1952 			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
1953 			    zc.zc_value);
1954 			*cp = '@';
1955 			break;
1956 		case EINVAL:
1957 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1958 			break;
1959 		case ECKSUM:
1960 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1961 			    "invalid stream (checksum mismatch)"));
1962 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1963 			break;
1964 		default:
1965 			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
1966 		}
1967 	}
1968 
1969 	/*
1970 	 * Mount or recreate the /dev links for the target filesystem
1971 	 * (if created, or if we tore them down to do an incremental
1972 	 * restore), and the /dev links for the new snapshot (if
1973 	 * created). Also mount any children of the target filesystem
1974 	 * if we did a replication receive (indicated by stream_avl
1975 	 * being non-NULL).
1976 	 */
1977 	cp = strchr(zc.zc_value, '@');
1978 	if (cp && (ioctl_err == 0 || !newfs)) {
1979 		zfs_handle_t *h;
1980 
1981 		*cp = '\0';
1982 		h = zfs_open(hdl, zc.zc_value,
1983 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
1984 		if (h != NULL) {
1985 			if (h->zfs_type == ZFS_TYPE_VOLUME) {
1986 				*cp = '@';
1987 				err = zvol_create_link(hdl, h->zfs_name);
1988 				if (err == 0 && ioctl_err == 0)
1989 					err = zvol_create_link(hdl,
1990 					    zc.zc_value);
1991 			} else if (newfs || stream_avl) {
1992 				/*
1993 				 * Track the first/top of hierarchy fs,
1994 				 * for mounting and sharing later.
1995 				 */
1996 				if (top_zfs && *top_zfs == NULL)
1997 					*top_zfs = zfs_strdup(hdl, zc.zc_value);
1998 			}
1999 			zfs_close(h);
2000 		}
2001 		*cp = '@';
2002 	}
2003 
2004 	if (clp) {
2005 		err |= changelist_postfix(clp);
2006 		changelist_free(clp);
2007 	}
2008 
2009 	if (err || ioctl_err)
2010 		return (-1);
2011 
2012 	if (flags.verbose) {
2013 		char buf1[64];
2014 		char buf2[64];
2015 		uint64_t bytes = zc.zc_cookie;
2016 		time_t delta = time(NULL) - begin_time;
2017 		if (delta == 0)
2018 			delta = 1;
2019 		zfs_nicenum(bytes, buf1, sizeof (buf1));
2020 		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
2021 
2022 		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
2023 		    buf1, delta, buf2);
2024 	}
2025 
2026 	return (0);
2027 }
2028 
2029 static int
2030 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
2031     int infd, avl_tree_t *stream_avl, char **top_zfs)
2032 {
2033 	int err;
2034 	dmu_replay_record_t drr, drr_noswap;
2035 	struct drr_begin *drrb = &drr.drr_u.drr_begin;
2036 	char errbuf[1024];
2037 	zio_cksum_t zcksum = { 0 };
2038 
2039 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2040 	    "cannot receive"));
2041 
2042 	if (flags.isprefix &&
2043 	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
2044 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
2045 		    "(%s) does not exist"), tosnap);
2046 		return (zfs_error(hdl, EZFS_NOENT, errbuf));
2047 	}
2048 
2049 	/* read in the BEGIN record */
2050 	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
2051 	    &zcksum)))
2052 		return (err);
2053 
2054 	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
2055 		/* It's the double end record at the end of a package */
2056 		return (ENODATA);
2057 	}
2058 
2059 	/* the kernel needs the non-byteswapped begin record */
2060 	drr_noswap = drr;
2061 
2062 	flags.byteswap = B_FALSE;
2063 	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
2064 		/*
2065 		 * We computed the checksum in the wrong byteorder in
2066 		 * recv_read() above; do it again correctly.
2067 		 */
2068 		bzero(&zcksum, sizeof (zio_cksum_t));
2069 		fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
2070 		flags.byteswap = B_TRUE;
2071 
2072 		drr.drr_type = BSWAP_32(drr.drr_type);
2073 		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
2074 		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
2075 		drrb->drr_version = BSWAP_64(drrb->drr_version);
2076 		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
2077 		drrb->drr_type = BSWAP_32(drrb->drr_type);
2078 		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
2079 		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
2080 		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
2081 	}
2082 
2083 	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
2084 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2085 		    "stream (bad magic number)"));
2086 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2087 	}
2088 
2089 	if (strchr(drrb->drr_toname, '@') == NULL) {
2090 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2091 		    "stream (bad snapshot name)"));
2092 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2093 	}
2094 
2095 	if (drrb->drr_version == DMU_BACKUP_STREAM_VERSION) {
2096 		return (zfs_receive_one(hdl, infd, tosnap, flags,
2097 		    &drr, &drr_noswap, stream_avl, top_zfs));
2098 	} else if (drrb->drr_version == DMU_BACKUP_HEADER_VERSION) {
2099 		return (zfs_receive_package(hdl, infd, tosnap, flags,
2100 		    &drr, &zcksum, top_zfs));
2101 	} else {
2102 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2103 		    "stream is unsupported version %llu"),
2104 		    drrb->drr_version);
2105 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2106 	}
2107 }
2108 
2109 /*
2110  * Restores a backup of tosnap from the file descriptor specified by infd.
2111  * Return 0 on total success, -2 if some things couldn't be
2112  * destroyed/renamed/promoted, -1 if some things couldn't be received.
2113  * (-1 will override -2).
2114  */
2115 int
2116 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
2117     int infd, avl_tree_t *stream_avl)
2118 {
2119 	char *top_zfs = NULL;
2120 	int err;
2121 
2122 	err = zfs_receive_impl(hdl, tosnap, flags, infd, stream_avl, &top_zfs);
2123 
2124 	if (err == 0 && !flags.nomount && top_zfs) {
2125 		zfs_handle_t *zhp;
2126 		prop_changelist_t *clp;
2127 
2128 		zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
2129 		if (zhp != NULL) {
2130 			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
2131 			    CL_GATHER_MOUNT_ALWAYS, 0);
2132 			zfs_close(zhp);
2133 			if (clp != NULL) {
2134 				/* mount and share received datasets */
2135 				err = changelist_postfix(clp);
2136 				changelist_free(clp);
2137 			}
2138 		}
2139 		if (zhp == NULL || clp == NULL || err)
2140 			err = -1;
2141 	}
2142 	if (top_zfs)
2143 		free(top_zfs);
2144 
2145 	return (err);
2146 }
2147