xref: /titanic_50/usr/src/lib/libzfs/common/libzfs_sendrecv.c (revision 015a6ef6781cc3ceba8ad3bfbae98449b6002a1f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <assert.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <libdevinfo.h>
31 #include <libintl.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <strings.h>
35 #include <unistd.h>
36 #include <stddef.h>
37 #include <fcntl.h>
38 #include <sys/mount.h>
39 #include <sys/mntent.h>
40 #include <sys/mnttab.h>
41 #include <sys/avl.h>
42 #include <stddef.h>
43 
44 #include <libzfs.h>
45 
46 #include "zfs_namecheck.h"
47 #include "zfs_prop.h"
48 #include "libzfs_impl.h"
49 
50 #include <fletcher.c> /* XXX */
51 
52 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t,
53     int, avl_tree_t *, char **);
54 
55 /*
56  * Routines for dealing with the AVL tree of fs-nvlists
57  */
58 typedef struct fsavl_node {
59 	avl_node_t fn_node;
60 	nvlist_t *fn_nvfs;
61 	char *fn_snapname;
62 	uint64_t fn_guid;
63 } fsavl_node_t;
64 
65 static int
66 fsavl_compare(const void *arg1, const void *arg2)
67 {
68 	const fsavl_node_t *fn1 = arg1;
69 	const fsavl_node_t *fn2 = arg2;
70 
71 	if (fn1->fn_guid > fn2->fn_guid)
72 		return (+1);
73 	else if (fn1->fn_guid < fn2->fn_guid)
74 		return (-1);
75 	else
76 		return (0);
77 }
78 
79 /*
80  * Given the GUID of a snapshot, find its containing filesystem and
81  * (optionally) name.
82  */
83 static nvlist_t *
84 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
85 {
86 	fsavl_node_t fn_find;
87 	fsavl_node_t *fn;
88 
89 	fn_find.fn_guid = snapguid;
90 
91 	fn = avl_find(avl, &fn_find, NULL);
92 	if (fn) {
93 		if (snapname)
94 			*snapname = fn->fn_snapname;
95 		return (fn->fn_nvfs);
96 	}
97 	return (NULL);
98 }
99 
100 static void
101 fsavl_destroy(avl_tree_t *avl)
102 {
103 	fsavl_node_t *fn;
104 	void *cookie;
105 
106 	if (avl == NULL)
107 		return;
108 
109 	cookie = NULL;
110 	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
111 		free(fn);
112 	avl_destroy(avl);
113 	free(avl);
114 }
115 
116 /*
117  * Given an nvlist, produce an avl tree of snapshots, ordered by guid
118  */
119 static avl_tree_t *
120 fsavl_create(nvlist_t *fss)
121 {
122 	avl_tree_t *fsavl;
123 	nvpair_t *fselem = NULL;
124 
125 	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
126 		return (NULL);
127 
128 	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
129 	    offsetof(fsavl_node_t, fn_node));
130 
131 	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
132 		nvlist_t *nvfs, *snaps;
133 		nvpair_t *snapelem = NULL;
134 
135 		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
136 		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
137 
138 		while ((snapelem =
139 		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
140 			fsavl_node_t *fn;
141 			uint64_t guid;
142 
143 			VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
144 			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
145 				fsavl_destroy(fsavl);
146 				return (NULL);
147 			}
148 			fn->fn_nvfs = nvfs;
149 			fn->fn_snapname = nvpair_name(snapelem);
150 			fn->fn_guid = guid;
151 
152 			/*
153 			 * Note: if there are multiple snaps with the
154 			 * same GUID, we ignore all but one.
155 			 */
156 			if (avl_find(fsavl, fn, NULL) == NULL)
157 				avl_add(fsavl, fn);
158 			else
159 				free(fn);
160 		}
161 	}
162 
163 	return (fsavl);
164 }
165 
166 /*
167  * Routines for dealing with the giant nvlist of fs-nvlists, etc.
168  */
169 typedef struct send_data {
170 	uint64_t parent_fromsnap_guid;
171 	nvlist_t *parent_snaps;
172 	nvlist_t *fss;
173 	nvlist_t *snapprops;
174 	const char *fromsnap;
175 	const char *tosnap;
176 
177 	/*
178 	 * The header nvlist is of the following format:
179 	 * {
180 	 *   "tosnap" -> string
181 	 *   "fromsnap" -> string (if incremental)
182 	 *   "fss" -> {
183 	 *	id -> {
184 	 *
185 	 *	 "name" -> string (full name; for debugging)
186 	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
187 	 *
188 	 *	 "props" -> { name -> value (only if set here) }
189 	 *	 "snaps" -> { name (lastname) -> number (guid) }
190 	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
191 	 *
192 	 *	 "origin" -> number (guid) (if clone)
193 	 *	 "sent" -> boolean (not on-disk)
194 	 *	}
195 	 *   }
196 	 * }
197 	 *
198 	 */
199 } send_data_t;
200 
201 static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
202 
203 static int
204 send_iterate_snap(zfs_handle_t *zhp, void *arg)
205 {
206 	send_data_t *sd = arg;
207 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
208 	char *snapname;
209 	nvlist_t *nv;
210 
211 	snapname = strrchr(zhp->zfs_name, '@')+1;
212 
213 	VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
214 	/*
215 	 * NB: if there is no fromsnap here (it's a newly created fs in
216 	 * an incremental replication), we will substitute the tosnap.
217 	 */
218 	if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
219 	    (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
220 	    strcmp(snapname, sd->tosnap) == 0)) {
221 		sd->parent_fromsnap_guid = guid;
222 	}
223 
224 	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
225 	send_iterate_prop(zhp, nv);
226 	VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
227 	nvlist_free(nv);
228 
229 	zfs_close(zhp);
230 	return (0);
231 }
232 
233 static void
234 send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
235 {
236 	nvpair_t *elem = NULL;
237 
238 	while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
239 		char *propname = nvpair_name(elem);
240 		zfs_prop_t prop = zfs_name_to_prop(propname);
241 		nvlist_t *propnv;
242 
243 		assert(zfs_prop_user(propname) || prop != ZPROP_INVAL);
244 
245 		if (!zfs_prop_user(propname) && zfs_prop_readonly(prop))
246 			continue;
247 
248 		verify(nvpair_value_nvlist(elem, &propnv) == 0);
249 		if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
250 		    prop == ZFS_PROP_REFQUOTA ||
251 		    prop == ZFS_PROP_REFRESERVATION) {
252 			/* these guys are modifyable, but have no source */
253 			uint64_t value;
254 			verify(nvlist_lookup_uint64(propnv,
255 			    ZPROP_VALUE, &value) == 0);
256 			if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
257 				continue;
258 		} else {
259 			char *source;
260 			if (nvlist_lookup_string(propnv,
261 			    ZPROP_SOURCE, &source) != 0)
262 				continue;
263 			if (strcmp(source, zhp->zfs_name) != 0)
264 				continue;
265 		}
266 
267 		if (zfs_prop_user(propname) ||
268 		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
269 			char *value;
270 			verify(nvlist_lookup_string(propnv,
271 			    ZPROP_VALUE, &value) == 0);
272 			VERIFY(0 == nvlist_add_string(nv, propname, value));
273 		} else {
274 			uint64_t value;
275 			verify(nvlist_lookup_uint64(propnv,
276 			    ZPROP_VALUE, &value) == 0);
277 			VERIFY(0 == nvlist_add_uint64(nv, propname, value));
278 		}
279 	}
280 }
281 
282 /*
283  * recursively generate nvlists describing datasets.  See comment
284  * for the data structure send_data_t above for description of contents
285  * of the nvlist.
286  */
287 static int
288 send_iterate_fs(zfs_handle_t *zhp, void *arg)
289 {
290 	send_data_t *sd = arg;
291 	nvlist_t *nvfs, *nv;
292 	int rv;
293 	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
294 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
295 	char guidstring[64];
296 
297 	VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
298 	VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
299 	VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
300 	    sd->parent_fromsnap_guid));
301 
302 	if (zhp->zfs_dmustats.dds_origin[0]) {
303 		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
304 		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
305 		if (origin == NULL)
306 			return (-1);
307 		VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
308 		    origin->zfs_dmustats.dds_guid));
309 	}
310 
311 	/* iterate over props */
312 	VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
313 	send_iterate_prop(zhp, nv);
314 	VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
315 	nvlist_free(nv);
316 
317 	/* iterate over snaps, and set sd->parent_fromsnap_guid */
318 	sd->parent_fromsnap_guid = 0;
319 	VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
320 	VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
321 	(void) zfs_iter_snapshots(zhp, send_iterate_snap, sd);
322 	VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
323 	VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
324 	nvlist_free(sd->parent_snaps);
325 	nvlist_free(sd->snapprops);
326 
327 	/* add this fs to nvlist */
328 	(void) snprintf(guidstring, sizeof (guidstring),
329 	    "0x%llx", (longlong_t)guid);
330 	VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
331 	nvlist_free(nvfs);
332 
333 	/* iterate over children */
334 	rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
335 
336 	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
337 
338 	zfs_close(zhp);
339 	return (rv);
340 }
341 
342 static int
343 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
344     const char *tosnap, nvlist_t **nvlp, avl_tree_t **avlp)
345 {
346 	zfs_handle_t *zhp;
347 	send_data_t sd = { 0 };
348 	int error;
349 
350 	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
351 	if (zhp == NULL)
352 		return (EZFS_BADTYPE);
353 
354 	VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
355 	sd.fromsnap = fromsnap;
356 	sd.tosnap = tosnap;
357 
358 	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
359 		nvlist_free(sd.fss);
360 		if (avlp != NULL)
361 			*avlp = NULL;
362 		*nvlp = NULL;
363 		return (error);
364 	}
365 
366 	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
367 		nvlist_free(sd.fss);
368 		*nvlp = NULL;
369 		return (EZFS_NOMEM);
370 	}
371 
372 	*nvlp = sd.fss;
373 	return (0);
374 }
375 
376 /*
377  * Routines for dealing with the sorted snapshot functionality
378  */
379 typedef struct zfs_node {
380 	zfs_handle_t	*zn_handle;
381 	avl_node_t	zn_avlnode;
382 } zfs_node_t;
383 
384 static int
385 zfs_sort_snaps(zfs_handle_t *zhp, void *data)
386 {
387 	avl_tree_t *avl = data;
388 	zfs_node_t *node = zfs_alloc(zhp->zfs_hdl, sizeof (zfs_node_t));
389 
390 	node->zn_handle = zhp;
391 	avl_add(avl, node);
392 	return (0);
393 }
394 
395 /* ARGSUSED */
396 static int
397 zfs_snapshot_compare(const void *larg, const void *rarg)
398 {
399 	zfs_handle_t *l = ((zfs_node_t *)larg)->zn_handle;
400 	zfs_handle_t *r = ((zfs_node_t *)rarg)->zn_handle;
401 	uint64_t lcreate, rcreate;
402 
403 	/*
404 	 * Sort them according to creation time.  We use the hidden
405 	 * CREATETXG property to get an absolute ordering of snapshots.
406 	 */
407 	lcreate = zfs_prop_get_int(l, ZFS_PROP_CREATETXG);
408 	rcreate = zfs_prop_get_int(r, ZFS_PROP_CREATETXG);
409 
410 	if (lcreate < rcreate)
411 		return (-1);
412 	else if (lcreate > rcreate)
413 		return (+1);
414 	else
415 		return (0);
416 }
417 
418 int
419 zfs_iter_snapshots_sorted(zfs_handle_t *zhp, zfs_iter_f callback, void *data)
420 {
421 	int ret = 0;
422 	zfs_node_t *node;
423 	avl_tree_t avl;
424 	void *cookie = NULL;
425 
426 	avl_create(&avl, zfs_snapshot_compare,
427 	    sizeof (zfs_node_t), offsetof(zfs_node_t, zn_avlnode));
428 
429 	ret = zfs_iter_snapshots(zhp, zfs_sort_snaps, &avl);
430 
431 	for (node = avl_first(&avl); node != NULL; node = AVL_NEXT(&avl, node))
432 		ret |= callback(node->zn_handle, data);
433 
434 	while ((node = avl_destroy_nodes(&avl, &cookie)) != NULL)
435 		free(node);
436 
437 	avl_destroy(&avl);
438 
439 	return (ret);
440 }
441 
442 /*
443  * Routines specific to "zfs send"
444  */
445 typedef struct send_dump_data {
446 	/* these are all just the short snapname (the part after the @) */
447 	const char *fromsnap;
448 	const char *tosnap;
449 	char lastsnap[ZFS_MAXNAMELEN];
450 	boolean_t seenfrom, seento, replicate, doall, fromorigin;
451 	boolean_t verbose;
452 	int outfd;
453 	boolean_t err;
454 	nvlist_t *fss;
455 	avl_tree_t *fsavl;
456 } send_dump_data_t;
457 
458 /*
459  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
460  * NULL) to the file descriptor specified by outfd.
461  */
462 static int
463 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, boolean_t fromorigin,
464     int outfd)
465 {
466 	zfs_cmd_t zc = { 0 };
467 	libzfs_handle_t *hdl = zhp->zfs_hdl;
468 
469 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
470 	assert(fromsnap == NULL || fromsnap[0] == '\0' || !fromorigin);
471 
472 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
473 	if (fromsnap)
474 		(void) strlcpy(zc.zc_value, fromsnap, sizeof (zc.zc_value));
475 	zc.zc_cookie = outfd;
476 	zc.zc_obj = fromorigin;
477 
478 	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SEND, &zc) != 0) {
479 		char errbuf[1024];
480 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
481 		    "warning: cannot send '%s'"), zhp->zfs_name);
482 
483 		switch (errno) {
484 
485 		case EXDEV:
486 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
487 			    "not an earlier snapshot from the same fs"));
488 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
489 
490 		case ENOENT:
491 			if (zfs_dataset_exists(hdl, zc.zc_name,
492 			    ZFS_TYPE_SNAPSHOT)) {
493 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
494 				    "incremental source (@%s) does not exist"),
495 				    zc.zc_value);
496 			}
497 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
498 
499 		case EDQUOT:
500 		case EFBIG:
501 		case EIO:
502 		case ENOLINK:
503 		case ENOSPC:
504 		case ENOSTR:
505 		case ENXIO:
506 		case EPIPE:
507 		case ERANGE:
508 		case EFAULT:
509 		case EROFS:
510 			zfs_error_aux(hdl, strerror(errno));
511 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
512 
513 		default:
514 			return (zfs_standard_error(hdl, errno, errbuf));
515 		}
516 	}
517 
518 	return (0);
519 }
520 
521 static int
522 dump_snapshot(zfs_handle_t *zhp, void *arg)
523 {
524 	send_dump_data_t *sdd = arg;
525 	const char *thissnap;
526 	int err;
527 
528 	thissnap = strchr(zhp->zfs_name, '@') + 1;
529 
530 	if (sdd->fromsnap && !sdd->seenfrom &&
531 	    strcmp(sdd->fromsnap, thissnap) == 0) {
532 		sdd->seenfrom = B_TRUE;
533 		(void) strcpy(sdd->lastsnap, thissnap);
534 		zfs_close(zhp);
535 		return (0);
536 	}
537 
538 	if (sdd->seento || !sdd->seenfrom) {
539 		zfs_close(zhp);
540 		return (0);
541 	}
542 
543 	/* send it */
544 	if (sdd->verbose) {
545 		(void) fprintf(stderr, "sending from @%s to %s\n",
546 		    sdd->lastsnap, zhp->zfs_name);
547 	}
548 
549 	err = dump_ioctl(zhp, sdd->lastsnap,
550 	    sdd->lastsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate),
551 	    sdd->outfd);
552 
553 	if (!sdd->seento && strcmp(sdd->tosnap, thissnap) == 0)
554 		sdd->seento = B_TRUE;
555 
556 	(void) strcpy(sdd->lastsnap, thissnap);
557 	zfs_close(zhp);
558 	return (err);
559 }
560 
561 static int
562 dump_filesystem(zfs_handle_t *zhp, void *arg)
563 {
564 	int rv = 0;
565 	send_dump_data_t *sdd = arg;
566 	boolean_t missingfrom = B_FALSE;
567 	zfs_cmd_t zc = { 0 };
568 
569 	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
570 	    zhp->zfs_name, sdd->tosnap);
571 	if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
572 		(void) fprintf(stderr, "WARNING: "
573 		    "could not send %s@%s: does not exist\n",
574 		    zhp->zfs_name, sdd->tosnap);
575 		sdd->err = B_TRUE;
576 		return (0);
577 	}
578 
579 	if (sdd->replicate && sdd->fromsnap) {
580 		/*
581 		 * If this fs does not have fromsnap, and we're doing
582 		 * recursive, we need to send a full stream from the
583 		 * beginning (or an incremental from the origin if this
584 		 * is a clone).  If we're doing non-recursive, then let
585 		 * them get the error.
586 		 */
587 		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
588 		    zhp->zfs_name, sdd->fromsnap);
589 		if (ioctl(zhp->zfs_hdl->libzfs_fd,
590 		    ZFS_IOC_OBJSET_STATS, &zc) != 0) {
591 			missingfrom = B_TRUE;
592 		}
593 	}
594 
595 	if (sdd->doall) {
596 		sdd->seenfrom = sdd->seento = sdd->lastsnap[0] = 0;
597 		if (sdd->fromsnap == NULL || missingfrom)
598 			sdd->seenfrom = B_TRUE;
599 
600 		rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
601 		if (!sdd->seenfrom) {
602 			(void) fprintf(stderr,
603 			    "WARNING: could not send %s@%s:\n"
604 			    "incremental source (%s@%s) does not exist\n",
605 			    zhp->zfs_name, sdd->tosnap,
606 			    zhp->zfs_name, sdd->fromsnap);
607 			sdd->err = B_TRUE;
608 		} else if (!sdd->seento) {
609 			if (sdd->fromsnap) {
610 				(void) fprintf(stderr,
611 				    "WARNING: could not send %s@%s:\n"
612 				    "incremental source (%s@%s) "
613 				    "is not earlier than it\n",
614 				    zhp->zfs_name, sdd->tosnap,
615 				    zhp->zfs_name, sdd->fromsnap);
616 			} else {
617 				(void) fprintf(stderr, "WARNING: "
618 				    "could not send %s@%s: does not exist\n",
619 				    zhp->zfs_name, sdd->tosnap);
620 			}
621 			sdd->err = B_TRUE;
622 		}
623 	} else {
624 		zfs_handle_t *snapzhp;
625 		char snapname[ZFS_MAXNAMELEN];
626 
627 		(void) snprintf(snapname, sizeof (snapname), "%s@%s",
628 		    zfs_get_name(zhp), sdd->tosnap);
629 		snapzhp = zfs_open(zhp->zfs_hdl, snapname, ZFS_TYPE_SNAPSHOT);
630 		if (snapzhp == NULL) {
631 			rv = -1;
632 		} else {
633 			rv = dump_ioctl(snapzhp,
634 			    missingfrom ? NULL : sdd->fromsnap,
635 			    sdd->fromorigin || missingfrom,
636 			    sdd->outfd);
637 			sdd->seento = B_TRUE;
638 			zfs_close(snapzhp);
639 		}
640 	}
641 
642 	return (rv);
643 }
644 
645 static int
646 dump_filesystems(zfs_handle_t *rzhp, void *arg)
647 {
648 	send_dump_data_t *sdd = arg;
649 	nvpair_t *fspair;
650 	boolean_t needagain, progress;
651 
652 	if (!sdd->replicate)
653 		return (dump_filesystem(rzhp, sdd));
654 
655 again:
656 	needagain = progress = B_FALSE;
657 	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
658 	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
659 		nvlist_t *fslist;
660 		char *fsname;
661 		zfs_handle_t *zhp;
662 		int err;
663 		uint64_t origin_guid = 0;
664 		nvlist_t *origin_nv;
665 
666 		VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
667 		if (nvlist_lookup_boolean(fslist, "sent") == 0)
668 			continue;
669 
670 		VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
671 		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
672 
673 		origin_nv = fsavl_find(sdd->fsavl, origin_guid, NULL);
674 		if (origin_nv &&
675 		    nvlist_lookup_boolean(origin_nv, "sent") == ENOENT) {
676 			/*
677 			 * origin has not been sent yet;
678 			 * skip this clone.
679 			 */
680 			needagain = B_TRUE;
681 			continue;
682 		}
683 
684 		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
685 		if (zhp == NULL)
686 			return (-1);
687 		err = dump_filesystem(zhp, sdd);
688 		VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
689 		progress = B_TRUE;
690 		zfs_close(zhp);
691 		if (err)
692 			return (err);
693 	}
694 	if (needagain) {
695 		assert(progress);
696 		goto again;
697 	}
698 	return (0);
699 }
700 
701 /*
702  * Generate a send stream for the dataset identified by the argument zhp.
703  *
704  * The content of the send stream is the snapshot identified by
705  * 'tosnap'.  Incremental streams are requested in two ways:
706  *     - from the snapshot identified by "fromsnap" (if non-null) or
707  *     - from the origin of the dataset identified by zhp, which must
708  *	 be a clone.  In this case, "fromsnap" is null and "fromorigin"
709  *	 is TRUE.
710  *
711  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
712  * uses a special header (with a version field of DMU_BACKUP_HEADER_VERSION)
713  * if "replicate" is set.  If "doall" is set, dump all the intermediate
714  * snapshots. The DMU_BACKUP_HEADER_VERSION header is used in the "doall"
715  * case too.
716  */
717 int
718 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
719     boolean_t replicate, boolean_t doall, boolean_t fromorigin,
720     boolean_t verbose, int outfd)
721 {
722 	char errbuf[1024];
723 	send_dump_data_t sdd = { 0 };
724 	int err;
725 	nvlist_t *fss = NULL;
726 	avl_tree_t *fsavl = NULL;
727 	char holdtag[128];
728 	static uint64_t holdseq;
729 
730 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
731 	    "cannot send '%s'"), zhp->zfs_name);
732 
733 	if (fromsnap && fromsnap[0] == '\0') {
734 		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
735 		    "zero-length incremental source"));
736 		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
737 	}
738 
739 	if (replicate || doall) {
740 		dmu_replay_record_t drr = { 0 };
741 		char *packbuf = NULL;
742 		size_t buflen = 0;
743 		zio_cksum_t zc = { 0 };
744 
745 		assert(fromsnap || doall);
746 
747 		(void) snprintf(holdtag, sizeof (holdtag), ".send-%d-%llu",
748 		    getpid(), (u_longlong_t)holdseq);
749 		++holdseq;
750 		err = zfs_hold_range(zhp, fromsnap, tosnap, holdtag, B_TRUE);
751 		if (err)
752 			return (err);
753 		if (replicate) {
754 			nvlist_t *hdrnv;
755 
756 			VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
757 			if (fromsnap) {
758 				VERIFY(0 == nvlist_add_string(hdrnv,
759 				    "fromsnap", fromsnap));
760 			}
761 			VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
762 
763 			err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
764 			    fromsnap, tosnap, &fss, &fsavl);
765 			if (err) {
766 				(void) zfs_release_range(zhp, fromsnap, tosnap,
767 				    holdtag);
768 				return (err);
769 			}
770 			VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
771 			err = nvlist_pack(hdrnv, &packbuf, &buflen,
772 			    NV_ENCODE_XDR, 0);
773 			nvlist_free(hdrnv);
774 			if (err) {
775 				fsavl_destroy(fsavl);
776 				nvlist_free(fss);
777 				(void) zfs_release_range(zhp, fromsnap, tosnap,
778 				    holdtag);
779 				return (zfs_standard_error(zhp->zfs_hdl,
780 				    err, errbuf));
781 			}
782 		}
783 
784 		/* write first begin record */
785 		drr.drr_type = DRR_BEGIN;
786 		drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
787 		drr.drr_u.drr_begin.drr_version = DMU_BACKUP_HEADER_VERSION;
788 		(void) snprintf(drr.drr_u.drr_begin.drr_toname,
789 		    sizeof (drr.drr_u.drr_begin.drr_toname),
790 		    "%s@%s", zhp->zfs_name, tosnap);
791 		drr.drr_payloadlen = buflen;
792 		fletcher_4_incremental_native(&drr, sizeof (drr), &zc);
793 		err = write(outfd, &drr, sizeof (drr));
794 
795 		/* write header nvlist */
796 		if (err != -1) {
797 			fletcher_4_incremental_native(packbuf, buflen, &zc);
798 			err = write(outfd, packbuf, buflen);
799 		}
800 		free(packbuf);
801 		if (err == -1) {
802 			fsavl_destroy(fsavl);
803 			nvlist_free(fss);
804 			(void) zfs_release_range(zhp, fromsnap, tosnap,
805 			    holdtag);
806 			return (zfs_standard_error(zhp->zfs_hdl,
807 			    errno, errbuf));
808 		}
809 
810 		/* write end record */
811 		if (err != -1) {
812 			bzero(&drr, sizeof (drr));
813 			drr.drr_type = DRR_END;
814 			drr.drr_u.drr_end.drr_checksum = zc;
815 			err = write(outfd, &drr, sizeof (drr));
816 			if (err == -1) {
817 				fsavl_destroy(fsavl);
818 				nvlist_free(fss);
819 				(void) zfs_release_range(zhp, fromsnap, tosnap,
820 				    holdtag);
821 				return (zfs_standard_error(zhp->zfs_hdl,
822 				    errno, errbuf));
823 			}
824 		}
825 	}
826 
827 	/* dump each stream */
828 	sdd.fromsnap = fromsnap;
829 	sdd.tosnap = tosnap;
830 	sdd.outfd = outfd;
831 	sdd.replicate = replicate;
832 	sdd.doall = doall;
833 	sdd.fromorigin = fromorigin;
834 	sdd.fss = fss;
835 	sdd.fsavl = fsavl;
836 	sdd.verbose = verbose;
837 	err = dump_filesystems(zhp, &sdd);
838 	fsavl_destroy(fsavl);
839 	nvlist_free(fss);
840 
841 	if (replicate || doall) {
842 		/*
843 		 * write final end record.  NB: want to do this even if
844 		 * there was some error, because it might not be totally
845 		 * failed.
846 		 */
847 		dmu_replay_record_t drr = { 0 };
848 		drr.drr_type = DRR_END;
849 		if (write(outfd, &drr, sizeof (drr)) == -1) {
850 			return (zfs_standard_error(zhp->zfs_hdl,
851 			    errno, errbuf));
852 		}
853 		(void) zfs_release_range(zhp, fromsnap, tosnap, holdtag);
854 	}
855 
856 	return (err || sdd.err);
857 }
858 
859 /*
860  * Routines specific to "zfs recv"
861  */
862 
863 static int
864 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
865     boolean_t byteswap, zio_cksum_t *zc)
866 {
867 	char *cp = buf;
868 	int rv;
869 	int len = ilen;
870 
871 	do {
872 		rv = read(fd, cp, len);
873 		cp += rv;
874 		len -= rv;
875 	} while (rv > 0);
876 
877 	if (rv < 0 || len != 0) {
878 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
879 		    "failed to read from stream"));
880 		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
881 		    "cannot receive")));
882 	}
883 
884 	if (zc) {
885 		if (byteswap)
886 			fletcher_4_incremental_byteswap(buf, ilen, zc);
887 		else
888 			fletcher_4_incremental_native(buf, ilen, zc);
889 	}
890 	return (0);
891 }
892 
893 static int
894 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
895     boolean_t byteswap, zio_cksum_t *zc)
896 {
897 	char *buf;
898 	int err;
899 
900 	buf = zfs_alloc(hdl, len);
901 	if (buf == NULL)
902 		return (ENOMEM);
903 
904 	err = recv_read(hdl, fd, buf, len, byteswap, zc);
905 	if (err != 0) {
906 		free(buf);
907 		return (err);
908 	}
909 
910 	err = nvlist_unpack(buf, len, nvp, 0);
911 	free(buf);
912 	if (err != 0) {
913 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
914 		    "stream (malformed nvlist)"));
915 		return (EINVAL);
916 	}
917 	return (0);
918 }
919 
920 static int
921 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
922     int baselen, char *newname, recvflags_t flags)
923 {
924 	static int seq;
925 	zfs_cmd_t zc = { 0 };
926 	int err;
927 	prop_changelist_t *clp;
928 	zfs_handle_t *zhp;
929 
930 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
931 	if (zhp == NULL)
932 		return (-1);
933 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
934 	    flags.force ? MS_FORCE : 0);
935 	zfs_close(zhp);
936 	if (clp == NULL)
937 		return (-1);
938 	err = changelist_prefix(clp);
939 	if (err)
940 		return (err);
941 
942 	zc.zc_objset_type = DMU_OST_ZFS;
943 	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
944 
945 	if (tryname) {
946 		(void) strcpy(newname, tryname);
947 
948 		(void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
949 
950 		if (flags.verbose) {
951 			(void) printf("attempting rename %s to %s\n",
952 			    zc.zc_name, zc.zc_value);
953 		}
954 		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
955 		if (err == 0)
956 			changelist_rename(clp, name, tryname);
957 	} else {
958 		err = ENOENT;
959 	}
960 
961 	if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) {
962 		seq++;
963 
964 		(void) strncpy(newname, name, baselen);
965 		(void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen,
966 		    "recv-%u-%u", getpid(), seq);
967 		(void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
968 
969 		if (flags.verbose) {
970 			(void) printf("failed - trying rename %s to %s\n",
971 			    zc.zc_name, zc.zc_value);
972 		}
973 		err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
974 		if (err == 0)
975 			changelist_rename(clp, name, newname);
976 		if (err && flags.verbose) {
977 			(void) printf("failed (%u) - "
978 			    "will try again on next pass\n", errno);
979 		}
980 		err = EAGAIN;
981 	} else if (flags.verbose) {
982 		if (err == 0)
983 			(void) printf("success\n");
984 		else
985 			(void) printf("failed (%u)\n", errno);
986 	}
987 
988 	(void) changelist_postfix(clp);
989 	changelist_free(clp);
990 
991 	return (err);
992 }
993 
994 static int
995 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
996     char *newname, recvflags_t flags)
997 {
998 	zfs_cmd_t zc = { 0 };
999 	int err = 0;
1000 	prop_changelist_t *clp;
1001 	zfs_handle_t *zhp;
1002 	boolean_t defer = B_FALSE;
1003 	int spa_version;
1004 
1005 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1006 	if (zhp == NULL)
1007 		return (-1);
1008 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1009 	    flags.force ? MS_FORCE : 0);
1010 	if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
1011 	    zfs_spa_version(zhp, &spa_version) == 0 &&
1012 	    spa_version >= SPA_VERSION_USERREFS)
1013 		defer = B_TRUE;
1014 	zfs_close(zhp);
1015 	if (clp == NULL)
1016 		return (-1);
1017 	err = changelist_prefix(clp);
1018 	if (err)
1019 		return (err);
1020 
1021 	zc.zc_objset_type = DMU_OST_ZFS;
1022 	zc.zc_defer_destroy = defer;
1023 	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1024 
1025 	if (flags.verbose)
1026 		(void) printf("attempting destroy %s\n", zc.zc_name);
1027 	err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
1028 	if (err == 0) {
1029 		if (flags.verbose)
1030 			(void) printf("success\n");
1031 		changelist_remove(clp, zc.zc_name);
1032 	}
1033 
1034 	(void) changelist_postfix(clp);
1035 	changelist_free(clp);
1036 
1037 	/*
1038 	 * Deferred destroy should always succeed. Since we can't tell
1039 	 * if it destroyed the dataset or just marked it for deferred
1040 	 * destroy, always do the rename just in case.
1041 	 */
1042 	if (err != 0 || defer)
1043 		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
1044 
1045 	return (err);
1046 }
1047 
1048 typedef struct guid_to_name_data {
1049 	uint64_t guid;
1050 	char *name;
1051 } guid_to_name_data_t;
1052 
1053 static int
1054 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
1055 {
1056 	guid_to_name_data_t *gtnd = arg;
1057 	int err;
1058 
1059 	if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
1060 		(void) strcpy(gtnd->name, zhp->zfs_name);
1061 		return (EEXIST);
1062 	}
1063 	err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
1064 	zfs_close(zhp);
1065 	return (err);
1066 }
1067 
1068 static int
1069 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
1070     char *name)
1071 {
1072 	/* exhaustive search all local snapshots */
1073 	guid_to_name_data_t gtnd;
1074 	int err = 0;
1075 	zfs_handle_t *zhp;
1076 	char *cp;
1077 
1078 	gtnd.guid = guid;
1079 	gtnd.name = name;
1080 
1081 	if (strchr(parent, '@') == NULL) {
1082 		zhp = make_dataset_handle(hdl, parent);
1083 		if (zhp != NULL) {
1084 			err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1085 			zfs_close(zhp);
1086 			if (err == EEXIST)
1087 				return (0);
1088 		}
1089 	}
1090 
1091 	cp = strchr(parent, '/');
1092 	if (cp)
1093 		*cp = '\0';
1094 	zhp = make_dataset_handle(hdl, parent);
1095 	if (cp)
1096 		*cp = '/';
1097 
1098 	if (zhp) {
1099 		err = zfs_iter_children(zhp, guid_to_name_cb, &gtnd);
1100 		zfs_close(zhp);
1101 	}
1102 
1103 	return (err == EEXIST ? 0 : ENOENT);
1104 
1105 }
1106 
1107 /*
1108  * Return true if dataset guid1 is created before guid2.
1109  */
1110 static int
1111 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
1112     uint64_t guid1, uint64_t guid2)
1113 {
1114 	nvlist_t *nvfs;
1115 	char *fsname, *snapname;
1116 	char buf[ZFS_MAXNAMELEN];
1117 	int rv;
1118 	zfs_node_t zn1, zn2;
1119 
1120 	if (guid2 == 0)
1121 		return (0);
1122 	if (guid1 == 0)
1123 		return (1);
1124 
1125 	nvfs = fsavl_find(avl, guid1, &snapname);
1126 	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1127 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1128 	zn1.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1129 	if (zn1.zn_handle == NULL)
1130 		return (-1);
1131 
1132 	nvfs = fsavl_find(avl, guid2, &snapname);
1133 	VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1134 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1135 	zn2.zn_handle = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1136 	if (zn2.zn_handle == NULL) {
1137 		zfs_close(zn2.zn_handle);
1138 		return (-1);
1139 	}
1140 
1141 	rv = (zfs_snapshot_compare(&zn1, &zn2) == -1);
1142 
1143 	zfs_close(zn1.zn_handle);
1144 	zfs_close(zn2.zn_handle);
1145 
1146 	return (rv);
1147 }
1148 
1149 static int
1150 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
1151     recvflags_t flags, nvlist_t *stream_nv, avl_tree_t *stream_avl)
1152 {
1153 	nvlist_t *local_nv;
1154 	avl_tree_t *local_avl;
1155 	nvpair_t *fselem, *nextfselem;
1156 	char *tosnap, *fromsnap;
1157 	char newname[ZFS_MAXNAMELEN];
1158 	int error;
1159 	boolean_t needagain, progress;
1160 	char *s1, *s2;
1161 
1162 	VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
1163 	VERIFY(0 == nvlist_lookup_string(stream_nv, "tosnap", &tosnap));
1164 
1165 	if (flags.dryrun)
1166 		return (0);
1167 
1168 again:
1169 	needagain = progress = B_FALSE;
1170 
1171 	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
1172 	    &local_nv, &local_avl)) != 0)
1173 		return (error);
1174 
1175 	/*
1176 	 * Process deletes and renames
1177 	 */
1178 	for (fselem = nvlist_next_nvpair(local_nv, NULL);
1179 	    fselem; fselem = nextfselem) {
1180 		nvlist_t *nvfs, *snaps;
1181 		nvlist_t *stream_nvfs = NULL;
1182 		nvpair_t *snapelem, *nextsnapelem;
1183 		uint64_t fromguid = 0;
1184 		uint64_t originguid = 0;
1185 		uint64_t stream_originguid = 0;
1186 		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
1187 		char *fsname, *stream_fsname;
1188 
1189 		nextfselem = nvlist_next_nvpair(local_nv, fselem);
1190 
1191 		VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
1192 		VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
1193 		VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1194 		VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
1195 		    &parent_fromsnap_guid));
1196 		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
1197 
1198 		/*
1199 		 * First find the stream's fs, so we can check for
1200 		 * a different origin (due to "zfs promote")
1201 		 */
1202 		for (snapelem = nvlist_next_nvpair(snaps, NULL);
1203 		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
1204 			uint64_t thisguid;
1205 
1206 			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
1207 			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
1208 
1209 			if (stream_nvfs != NULL)
1210 				break;
1211 		}
1212 
1213 		/* check for promote */
1214 		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
1215 		    &stream_originguid);
1216 		if (stream_nvfs && originguid != stream_originguid) {
1217 			switch (created_before(hdl, local_avl,
1218 			    stream_originguid, originguid)) {
1219 			case 1: {
1220 				/* promote it! */
1221 				zfs_cmd_t zc = { 0 };
1222 				nvlist_t *origin_nvfs;
1223 				char *origin_fsname;
1224 
1225 				if (flags.verbose)
1226 					(void) printf("promoting %s\n", fsname);
1227 
1228 				origin_nvfs = fsavl_find(local_avl, originguid,
1229 				    NULL);
1230 				VERIFY(0 == nvlist_lookup_string(origin_nvfs,
1231 				    "name", &origin_fsname));
1232 				(void) strlcpy(zc.zc_value, origin_fsname,
1233 				    sizeof (zc.zc_value));
1234 				(void) strlcpy(zc.zc_name, fsname,
1235 				    sizeof (zc.zc_name));
1236 				error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
1237 				if (error == 0)
1238 					progress = B_TRUE;
1239 				break;
1240 			}
1241 			default:
1242 				break;
1243 			case -1:
1244 				fsavl_destroy(local_avl);
1245 				nvlist_free(local_nv);
1246 				return (-1);
1247 			}
1248 			/*
1249 			 * We had/have the wrong origin, therefore our
1250 			 * list of snapshots is wrong.  Need to handle
1251 			 * them on the next pass.
1252 			 */
1253 			needagain = B_TRUE;
1254 			continue;
1255 		}
1256 
1257 		for (snapelem = nvlist_next_nvpair(snaps, NULL);
1258 		    snapelem; snapelem = nextsnapelem) {
1259 			uint64_t thisguid;
1260 			char *stream_snapname;
1261 			nvlist_t *found, *props;
1262 
1263 			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
1264 
1265 			VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
1266 			found = fsavl_find(stream_avl, thisguid,
1267 			    &stream_snapname);
1268 
1269 			/* check for delete */
1270 			if (found == NULL) {
1271 				char name[ZFS_MAXNAMELEN];
1272 
1273 				if (!flags.force)
1274 					continue;
1275 
1276 				(void) snprintf(name, sizeof (name), "%s@%s",
1277 				    fsname, nvpair_name(snapelem));
1278 
1279 				error = recv_destroy(hdl, name,
1280 				    strlen(fsname)+1, newname, flags);
1281 				if (error)
1282 					needagain = B_TRUE;
1283 				else
1284 					progress = B_TRUE;
1285 				continue;
1286 			}
1287 
1288 			stream_nvfs = found;
1289 
1290 			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
1291 			    &props) && 0 == nvlist_lookup_nvlist(props,
1292 			    stream_snapname, &props)) {
1293 				zfs_cmd_t zc = { 0 };
1294 
1295 				zc.zc_cookie = B_TRUE; /* clear current props */
1296 				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
1297 				    "%s@%s", fsname, nvpair_name(snapelem));
1298 				if (zcmd_write_src_nvlist(hdl, &zc,
1299 				    props) == 0) {
1300 					(void) zfs_ioctl(hdl,
1301 					    ZFS_IOC_SET_PROP, &zc);
1302 					zcmd_free_nvlists(&zc);
1303 				}
1304 			}
1305 
1306 			/* check for different snapname */
1307 			if (strcmp(nvpair_name(snapelem),
1308 			    stream_snapname) != 0) {
1309 				char name[ZFS_MAXNAMELEN];
1310 				char tryname[ZFS_MAXNAMELEN];
1311 
1312 				(void) snprintf(name, sizeof (name), "%s@%s",
1313 				    fsname, nvpair_name(snapelem));
1314 				(void) snprintf(tryname, sizeof (name), "%s@%s",
1315 				    fsname, stream_snapname);
1316 
1317 				error = recv_rename(hdl, name, tryname,
1318 				    strlen(fsname)+1, newname, flags);
1319 				if (error)
1320 					needagain = B_TRUE;
1321 				else
1322 					progress = B_TRUE;
1323 			}
1324 
1325 			if (strcmp(stream_snapname, fromsnap) == 0)
1326 				fromguid = thisguid;
1327 		}
1328 
1329 		/* check for delete */
1330 		if (stream_nvfs == NULL) {
1331 			if (!flags.force)
1332 				continue;
1333 
1334 			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
1335 			    newname, flags);
1336 			if (error)
1337 				needagain = B_TRUE;
1338 			else
1339 				progress = B_TRUE;
1340 			continue;
1341 		}
1342 
1343 		if (fromguid == 0 && flags.verbose) {
1344 			(void) printf("local fs %s does not have fromsnap "
1345 			    "(%s in stream); must have been deleted locally; "
1346 			    "ignoring\n", fsname, fromsnap);
1347 			continue;
1348 		}
1349 
1350 		VERIFY(0 == nvlist_lookup_string(stream_nvfs,
1351 		    "name", &stream_fsname));
1352 		VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
1353 		    "parentfromsnap", &stream_parent_fromsnap_guid));
1354 
1355 		s1 = strrchr(fsname, '/');
1356 		s2 = strrchr(stream_fsname, '/');
1357 
1358 		/* check for rename */
1359 		if ((stream_parent_fromsnap_guid != 0 &&
1360 		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
1361 		    ((s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
1362 			nvlist_t *parent;
1363 			char tryname[ZFS_MAXNAMELEN];
1364 
1365 			parent = fsavl_find(local_avl,
1366 			    stream_parent_fromsnap_guid, NULL);
1367 			/*
1368 			 * NB: parent might not be found if we used the
1369 			 * tosnap for stream_parent_fromsnap_guid,
1370 			 * because the parent is a newly-created fs;
1371 			 * we'll be able to rename it after we recv the
1372 			 * new fs.
1373 			 */
1374 			if (parent != NULL) {
1375 				char *pname;
1376 
1377 				VERIFY(0 == nvlist_lookup_string(parent, "name",
1378 				    &pname));
1379 				(void) snprintf(tryname, sizeof (tryname),
1380 				    "%s%s", pname, strrchr(stream_fsname, '/'));
1381 			} else {
1382 				tryname[0] = '\0';
1383 				if (flags.verbose) {
1384 					(void) printf("local fs %s new parent "
1385 					    "not found\n", fsname);
1386 				}
1387 			}
1388 
1389 			error = recv_rename(hdl, fsname, tryname,
1390 			    strlen(tofs)+1, newname, flags);
1391 			if (error)
1392 				needagain = B_TRUE;
1393 			else
1394 				progress = B_TRUE;
1395 		}
1396 	}
1397 
1398 	fsavl_destroy(local_avl);
1399 	nvlist_free(local_nv);
1400 
1401 	if (needagain && progress) {
1402 		/* do another pass to fix up temporary names */
1403 		if (flags.verbose)
1404 			(void) printf("another pass:\n");
1405 		goto again;
1406 	}
1407 
1408 	return (needagain);
1409 }
1410 
1411 static int
1412 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
1413     recvflags_t flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
1414     char **top_zfs)
1415 {
1416 	nvlist_t *stream_nv = NULL;
1417 	avl_tree_t *stream_avl = NULL;
1418 	char *fromsnap = NULL;
1419 	char tofs[ZFS_MAXNAMELEN];
1420 	char errbuf[1024];
1421 	dmu_replay_record_t drre;
1422 	int error;
1423 	boolean_t anyerr = B_FALSE;
1424 	boolean_t softerr = B_FALSE;
1425 
1426 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1427 	    "cannot receive"));
1428 
1429 	if (strchr(destname, '@')) {
1430 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1431 		    "can not specify snapshot name for multi-snapshot stream"));
1432 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
1433 	}
1434 
1435 	assert(drr->drr_type == DRR_BEGIN);
1436 	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
1437 	assert(drr->drr_u.drr_begin.drr_version == DMU_BACKUP_HEADER_VERSION);
1438 
1439 	/*
1440 	 * Read in the nvlist from the stream.
1441 	 */
1442 	if (drr->drr_payloadlen != 0) {
1443 		if (!flags.isprefix) {
1444 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1445 			    "must use -d to receive replication "
1446 			    "(send -R) stream"));
1447 			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
1448 		}
1449 
1450 		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
1451 		    &stream_nv, flags.byteswap, zc);
1452 		if (error) {
1453 			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1454 			goto out;
1455 		}
1456 	}
1457 
1458 	/*
1459 	 * Read in the end record and verify checksum.
1460 	 */
1461 	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
1462 	    flags.byteswap, NULL)))
1463 		goto out;
1464 	if (flags.byteswap) {
1465 		drre.drr_type = BSWAP_32(drre.drr_type);
1466 		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
1467 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
1468 		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
1469 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
1470 		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
1471 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
1472 		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
1473 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
1474 	}
1475 	if (drre.drr_type != DRR_END) {
1476 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1477 		goto out;
1478 	}
1479 	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
1480 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1481 		    "incorrect header checksum"));
1482 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1483 		goto out;
1484 	}
1485 
1486 	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
1487 
1488 	if (drr->drr_payloadlen != 0) {
1489 		nvlist_t *stream_fss;
1490 
1491 		VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
1492 		    &stream_fss));
1493 		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
1494 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1495 			    "couldn't allocate avl tree"));
1496 			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
1497 			goto out;
1498 		}
1499 
1500 		if (fromsnap != NULL) {
1501 			(void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
1502 			if (flags.isprefix) {
1503 				int i = strcspn(drr->drr_u.drr_begin.drr_toname,
1504 				    "/@");
1505 				/* zfs_receive_one() will create_parents() */
1506 				(void) strlcat(tofs,
1507 				    &drr->drr_u.drr_begin.drr_toname[i],
1508 				    ZFS_MAXNAMELEN);
1509 				*strchr(tofs, '@') = '\0';
1510 			}
1511 			softerr = recv_incremental_replication(hdl, tofs,
1512 			    flags, stream_nv, stream_avl);
1513 		}
1514 	}
1515 
1516 
1517 	/* Finally, receive each contained stream */
1518 	do {
1519 		/*
1520 		 * we should figure out if it has a recoverable
1521 		 * error, in which case do a recv_skip() and drive on.
1522 		 * Note, if we fail due to already having this guid,
1523 		 * zfs_receive_one() will take care of it (ie,
1524 		 * recv_skip() and return 0).
1525 		 */
1526 		error = zfs_receive_impl(hdl, destname, flags, fd,
1527 		    stream_avl, top_zfs);
1528 		if (error == ENODATA) {
1529 			error = 0;
1530 			break;
1531 		}
1532 		anyerr |= error;
1533 	} while (error == 0);
1534 
1535 	if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
1536 		/*
1537 		 * Now that we have the fs's they sent us, try the
1538 		 * renames again.
1539 		 */
1540 		softerr = recv_incremental_replication(hdl, tofs, flags,
1541 		    stream_nv, stream_avl);
1542 	}
1543 
1544 out:
1545 	fsavl_destroy(stream_avl);
1546 	if (stream_nv)
1547 		nvlist_free(stream_nv);
1548 	if (softerr)
1549 		error = -2;
1550 	if (anyerr)
1551 		error = -1;
1552 	return (error);
1553 }
1554 
1555 static int
1556 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
1557 {
1558 	dmu_replay_record_t *drr;
1559 	void *buf = malloc(1<<20);
1560 
1561 	/* XXX would be great to use lseek if possible... */
1562 	drr = buf;
1563 
1564 	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
1565 	    byteswap, NULL) == 0) {
1566 		if (byteswap)
1567 			drr->drr_type = BSWAP_32(drr->drr_type);
1568 
1569 		switch (drr->drr_type) {
1570 		case DRR_BEGIN:
1571 			/* NB: not to be used on v2 stream packages */
1572 			assert(drr->drr_payloadlen == 0);
1573 			break;
1574 
1575 		case DRR_END:
1576 			free(buf);
1577 			return (0);
1578 
1579 		case DRR_OBJECT:
1580 			if (byteswap) {
1581 				drr->drr_u.drr_object.drr_bonuslen =
1582 				    BSWAP_32(drr->drr_u.drr_object.
1583 				    drr_bonuslen);
1584 			}
1585 			(void) recv_read(hdl, fd, buf,
1586 			    P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
1587 			    B_FALSE, NULL);
1588 			break;
1589 
1590 		case DRR_WRITE:
1591 			if (byteswap) {
1592 				drr->drr_u.drr_write.drr_length =
1593 				    BSWAP_64(drr->drr_u.drr_write.drr_length);
1594 			}
1595 			(void) recv_read(hdl, fd, buf,
1596 			    drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
1597 			break;
1598 
1599 		case DRR_FREEOBJECTS:
1600 		case DRR_FREE:
1601 			break;
1602 
1603 		default:
1604 			assert(!"invalid record type");
1605 		}
1606 	}
1607 
1608 	free(buf);
1609 	return (-1);
1610 }
1611 
1612 /*
1613  * Restores a backup of tosnap from the file descriptor specified by infd.
1614  */
1615 static int
1616 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
1617     recvflags_t flags, dmu_replay_record_t *drr,
1618     dmu_replay_record_t *drr_noswap, avl_tree_t *stream_avl,
1619     char **top_zfs)
1620 {
1621 	zfs_cmd_t zc = { 0 };
1622 	time_t begin_time;
1623 	int ioctl_err, ioctl_errno, err, choplen;
1624 	char *cp;
1625 	struct drr_begin *drrb = &drr->drr_u.drr_begin;
1626 	char errbuf[1024];
1627 	char chopprefix[ZFS_MAXNAMELEN];
1628 	boolean_t newfs = B_FALSE;
1629 	boolean_t stream_wantsnewfs;
1630 	uint64_t parent_snapguid = 0;
1631 	prop_changelist_t *clp = NULL;
1632 	nvlist_t *snapprops_nvlist = NULL;
1633 
1634 	begin_time = time(NULL);
1635 
1636 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1637 	    "cannot receive"));
1638 
1639 	if (stream_avl != NULL) {
1640 		char *snapname;
1641 		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
1642 		    &snapname);
1643 		nvlist_t *props;
1644 		int ret;
1645 
1646 		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
1647 		    &parent_snapguid);
1648 		err = nvlist_lookup_nvlist(fs, "props", &props);
1649 		if (err)
1650 			VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
1651 
1652 		if (flags.canmountoff) {
1653 			VERIFY(0 == nvlist_add_uint64(props,
1654 			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
1655 		}
1656 		ret = zcmd_write_src_nvlist(hdl, &zc, props);
1657 		if (err)
1658 			nvlist_free(props);
1659 
1660 		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
1661 			VERIFY(0 == nvlist_lookup_nvlist(props,
1662 			    snapname, &snapprops_nvlist));
1663 		}
1664 
1665 		if (ret != 0)
1666 			return (-1);
1667 	}
1668 
1669 	/*
1670 	 * Determine how much of the snapshot name stored in the stream
1671 	 * we are going to tack on to the name they specified on the
1672 	 * command line, and how much we are going to chop off.
1673 	 *
1674 	 * If they specified a snapshot, chop the entire name stored in
1675 	 * the stream.
1676 	 */
1677 	(void) strcpy(chopprefix, drrb->drr_toname);
1678 	if (flags.isprefix) {
1679 		/*
1680 		 * They specified a fs with -d, we want to tack on
1681 		 * everything but the pool name stored in the stream
1682 		 */
1683 		if (strchr(tosnap, '@')) {
1684 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
1685 			    "argument - snapshot not allowed with -d"));
1686 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
1687 		}
1688 		cp = strchr(chopprefix, '/');
1689 		if (cp == NULL)
1690 			cp = strchr(chopprefix, '@');
1691 		*cp = '\0';
1692 	} else if (strchr(tosnap, '@') == NULL) {
1693 		/*
1694 		 * If they specified a filesystem without -d, we want to
1695 		 * tack on everything after the fs specified in the
1696 		 * first name from the stream.
1697 		 */
1698 		cp = strchr(chopprefix, '@');
1699 		*cp = '\0';
1700 	}
1701 	choplen = strlen(chopprefix);
1702 
1703 	/*
1704 	 * Determine name of destination snapshot, store in zc_value.
1705 	 */
1706 	(void) strcpy(zc.zc_value, tosnap);
1707 	(void) strncat(zc.zc_value, drrb->drr_toname+choplen,
1708 	    sizeof (zc.zc_value));
1709 	if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
1710 		zcmd_free_nvlists(&zc);
1711 		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
1712 	}
1713 
1714 	/*
1715 	 * Determine the name of the origin snapshot, store in zc_string.
1716 	 */
1717 	if (drrb->drr_flags & DRR_FLAG_CLONE) {
1718 		if (guid_to_name(hdl, tosnap,
1719 		    drrb->drr_fromguid, zc.zc_string) != 0) {
1720 			zcmd_free_nvlists(&zc);
1721 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1722 			    "local origin for clone %s does not exist"),
1723 			    zc.zc_value);
1724 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1725 		}
1726 		if (flags.verbose)
1727 			(void) printf("found clone origin %s\n", zc.zc_string);
1728 	}
1729 
1730 	stream_wantsnewfs = (drrb->drr_fromguid == NULL ||
1731 	    (drrb->drr_flags & DRR_FLAG_CLONE));
1732 
1733 	if (stream_wantsnewfs) {
1734 		/*
1735 		 * if the parent fs does not exist, look for it based on
1736 		 * the parent snap GUID
1737 		 */
1738 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1739 		    "cannot receive new filesystem stream"));
1740 
1741 		(void) strcpy(zc.zc_name, zc.zc_value);
1742 		cp = strrchr(zc.zc_name, '/');
1743 		if (cp)
1744 			*cp = '\0';
1745 		if (cp &&
1746 		    !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
1747 			char suffix[ZFS_MAXNAMELEN];
1748 			(void) strcpy(suffix, strrchr(zc.zc_value, '/'));
1749 			if (guid_to_name(hdl, tosnap, parent_snapguid,
1750 			    zc.zc_value) == 0) {
1751 				*strchr(zc.zc_value, '@') = '\0';
1752 				(void) strcat(zc.zc_value, suffix);
1753 			}
1754 		}
1755 	} else {
1756 		/*
1757 		 * if the fs does not exist, look for it based on the
1758 		 * fromsnap GUID
1759 		 */
1760 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1761 		    "cannot receive incremental stream"));
1762 
1763 		(void) strcpy(zc.zc_name, zc.zc_value);
1764 		*strchr(zc.zc_name, '@') = '\0';
1765 
1766 		if (!zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
1767 			char snap[ZFS_MAXNAMELEN];
1768 			(void) strcpy(snap, strchr(zc.zc_value, '@'));
1769 			if (guid_to_name(hdl, tosnap, drrb->drr_fromguid,
1770 			    zc.zc_value) == 0) {
1771 				*strchr(zc.zc_value, '@') = '\0';
1772 				(void) strcat(zc.zc_value, snap);
1773 			}
1774 		}
1775 	}
1776 
1777 	(void) strcpy(zc.zc_name, zc.zc_value);
1778 	*strchr(zc.zc_name, '@') = '\0';
1779 
1780 	if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
1781 		zfs_handle_t *zhp;
1782 		/*
1783 		 * Destination fs exists.  Therefore this should either
1784 		 * be an incremental, or the stream specifies a new fs
1785 		 * (full stream or clone) and they want us to blow it
1786 		 * away (and have therefore specified -F and removed any
1787 		 * snapshots).
1788 		 */
1789 
1790 		if (stream_wantsnewfs) {
1791 			if (!flags.force) {
1792 				zcmd_free_nvlists(&zc);
1793 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1794 				    "destination '%s' exists\n"
1795 				    "must specify -F to overwrite it"),
1796 				    zc.zc_name);
1797 				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
1798 			}
1799 			if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
1800 			    &zc) == 0) {
1801 				zcmd_free_nvlists(&zc);
1802 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1803 				    "destination has snapshots (eg. %s)\n"
1804 				    "must destroy them to overwrite it"),
1805 				    zc.zc_name);
1806 				return (zfs_error(hdl, EZFS_EXISTS, errbuf));
1807 			}
1808 		}
1809 
1810 		if ((zhp = zfs_open(hdl, zc.zc_name,
1811 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
1812 			zcmd_free_nvlists(&zc);
1813 			return (-1);
1814 		}
1815 
1816 		if (stream_wantsnewfs &&
1817 		    zhp->zfs_dmustats.dds_origin[0]) {
1818 			zcmd_free_nvlists(&zc);
1819 			zfs_close(zhp);
1820 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1821 			    "destination '%s' is a clone\n"
1822 			    "must destroy it to overwrite it"),
1823 			    zc.zc_name);
1824 			return (zfs_error(hdl, EZFS_EXISTS, errbuf));
1825 		}
1826 
1827 		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
1828 		    stream_wantsnewfs) {
1829 			/* We can't do online recv in this case */
1830 			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
1831 			if (clp == NULL) {
1832 				zfs_close(zhp);
1833 				zcmd_free_nvlists(&zc);
1834 				return (-1);
1835 			}
1836 			if (changelist_prefix(clp) != 0) {
1837 				changelist_free(clp);
1838 				zfs_close(zhp);
1839 				zcmd_free_nvlists(&zc);
1840 				return (-1);
1841 			}
1842 		}
1843 		if (!flags.dryrun && zhp->zfs_type == ZFS_TYPE_VOLUME &&
1844 		    zvol_remove_link(hdl, zhp->zfs_name) != 0) {
1845 			zfs_close(zhp);
1846 			zcmd_free_nvlists(&zc);
1847 			return (-1);
1848 		}
1849 		zfs_close(zhp);
1850 	} else {
1851 		/*
1852 		 * Destination filesystem does not exist.  Therefore we better
1853 		 * be creating a new filesystem (either from a full backup, or
1854 		 * a clone).  It would therefore be invalid if the user
1855 		 * specified only the pool name (i.e. if the destination name
1856 		 * contained no slash character).
1857 		 */
1858 		if (!stream_wantsnewfs ||
1859 		    (cp = strrchr(zc.zc_name, '/')) == NULL) {
1860 			zcmd_free_nvlists(&zc);
1861 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1862 			    "destination '%s' does not exist"), zc.zc_name);
1863 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1864 		}
1865 
1866 		/*
1867 		 * Trim off the final dataset component so we perform the
1868 		 * recvbackup ioctl to the filesystems's parent.
1869 		 */
1870 		*cp = '\0';
1871 
1872 		if (flags.isprefix && !flags.dryrun &&
1873 		    create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
1874 			zcmd_free_nvlists(&zc);
1875 			return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
1876 		}
1877 
1878 		newfs = B_TRUE;
1879 	}
1880 
1881 	zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
1882 	zc.zc_cookie = infd;
1883 	zc.zc_guid = flags.force;
1884 	if (flags.verbose) {
1885 		(void) printf("%s %s stream of %s into %s\n",
1886 		    flags.dryrun ? "would receive" : "receiving",
1887 		    drrb->drr_fromguid ? "incremental" : "full",
1888 		    drrb->drr_toname, zc.zc_value);
1889 		(void) fflush(stdout);
1890 	}
1891 
1892 	if (flags.dryrun) {
1893 		zcmd_free_nvlists(&zc);
1894 		return (recv_skip(hdl, infd, flags.byteswap));
1895 	}
1896 
1897 	err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
1898 	ioctl_errno = errno;
1899 	zcmd_free_nvlists(&zc);
1900 
1901 	if (err == 0 && snapprops_nvlist) {
1902 		zfs_cmd_t zc2 = { 0 };
1903 
1904 		(void) strcpy(zc2.zc_name, zc.zc_value);
1905 		if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
1906 			(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
1907 			zcmd_free_nvlists(&zc2);
1908 		}
1909 	}
1910 
1911 	if (err && (ioctl_errno == ENOENT || ioctl_errno == ENODEV)) {
1912 		/*
1913 		 * It may be that this snapshot already exists,
1914 		 * in which case we want to consume & ignore it
1915 		 * rather than failing.
1916 		 */
1917 		avl_tree_t *local_avl;
1918 		nvlist_t *local_nv, *fs;
1919 		char *cp = strchr(zc.zc_value, '@');
1920 
1921 		/*
1922 		 * XXX Do this faster by just iterating over snaps in
1923 		 * this fs.  Also if zc_value does not exist, we will
1924 		 * get a strange "does not exist" error message.
1925 		 */
1926 		*cp = '\0';
1927 		if (gather_nvlist(hdl, zc.zc_value, NULL, NULL,
1928 		    &local_nv, &local_avl) == 0) {
1929 			*cp = '@';
1930 			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
1931 			fsavl_destroy(local_avl);
1932 			nvlist_free(local_nv);
1933 
1934 			if (fs != NULL) {
1935 				if (flags.verbose) {
1936 					(void) printf("snap %s already exists; "
1937 					    "ignoring\n", zc.zc_value);
1938 				}
1939 				ioctl_err = recv_skip(hdl, infd,
1940 				    flags.byteswap);
1941 			}
1942 		}
1943 		*cp = '@';
1944 	}
1945 
1946 
1947 	if (ioctl_err != 0) {
1948 		switch (ioctl_errno) {
1949 		case ENODEV:
1950 			cp = strchr(zc.zc_value, '@');
1951 			*cp = '\0';
1952 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1953 			    "most recent snapshot of %s does not\n"
1954 			    "match incremental source"), zc.zc_value);
1955 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
1956 			*cp = '@';
1957 			break;
1958 		case ETXTBSY:
1959 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1960 			    "destination %s has been modified\n"
1961 			    "since most recent snapshot"), zc.zc_name);
1962 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
1963 			break;
1964 		case EEXIST:
1965 			cp = strchr(zc.zc_value, '@');
1966 			if (newfs) {
1967 				/* it's the containing fs that exists */
1968 				*cp = '\0';
1969 			}
1970 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1971 			    "destination already exists"));
1972 			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
1973 			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
1974 			    zc.zc_value);
1975 			*cp = '@';
1976 			break;
1977 		case EINVAL:
1978 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1979 			break;
1980 		case ECKSUM:
1981 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1982 			    "invalid stream (checksum mismatch)"));
1983 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
1984 			break;
1985 		default:
1986 			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
1987 		}
1988 	}
1989 
1990 	/*
1991 	 * Mount or recreate the /dev links for the target filesystem
1992 	 * (if created, or if we tore them down to do an incremental
1993 	 * restore), and the /dev links for the new snapshot (if
1994 	 * created). Also mount any children of the target filesystem
1995 	 * if we did a replication receive (indicated by stream_avl
1996 	 * being non-NULL).
1997 	 */
1998 	cp = strchr(zc.zc_value, '@');
1999 	if (cp && (ioctl_err == 0 || !newfs)) {
2000 		zfs_handle_t *h;
2001 
2002 		*cp = '\0';
2003 		h = zfs_open(hdl, zc.zc_value,
2004 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
2005 		if (h != NULL) {
2006 			if (h->zfs_type == ZFS_TYPE_VOLUME) {
2007 				*cp = '@';
2008 				err = zvol_create_link(hdl, h->zfs_name);
2009 				if (err == 0 && ioctl_err == 0)
2010 					err = zvol_create_link(hdl,
2011 					    zc.zc_value);
2012 			} else if (newfs || stream_avl) {
2013 				/*
2014 				 * Track the first/top of hierarchy fs,
2015 				 * for mounting and sharing later.
2016 				 */
2017 				if (top_zfs && *top_zfs == NULL)
2018 					*top_zfs = zfs_strdup(hdl, zc.zc_value);
2019 			}
2020 			zfs_close(h);
2021 		}
2022 		*cp = '@';
2023 	}
2024 
2025 	if (clp) {
2026 		err |= changelist_postfix(clp);
2027 		changelist_free(clp);
2028 	}
2029 
2030 	if (err || ioctl_err)
2031 		return (-1);
2032 
2033 	if (flags.verbose) {
2034 		char buf1[64];
2035 		char buf2[64];
2036 		uint64_t bytes = zc.zc_cookie;
2037 		time_t delta = time(NULL) - begin_time;
2038 		if (delta == 0)
2039 			delta = 1;
2040 		zfs_nicenum(bytes, buf1, sizeof (buf1));
2041 		zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
2042 
2043 		(void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
2044 		    buf1, delta, buf2);
2045 	}
2046 
2047 	return (0);
2048 }
2049 
2050 static int
2051 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
2052     int infd, avl_tree_t *stream_avl, char **top_zfs)
2053 {
2054 	int err;
2055 	dmu_replay_record_t drr, drr_noswap;
2056 	struct drr_begin *drrb = &drr.drr_u.drr_begin;
2057 	char errbuf[1024];
2058 	zio_cksum_t zcksum = { 0 };
2059 
2060 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2061 	    "cannot receive"));
2062 
2063 	if (flags.isprefix &&
2064 	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
2065 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
2066 		    "(%s) does not exist"), tosnap);
2067 		return (zfs_error(hdl, EZFS_NOENT, errbuf));
2068 	}
2069 
2070 	/* read in the BEGIN record */
2071 	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
2072 	    &zcksum)))
2073 		return (err);
2074 
2075 	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
2076 		/* It's the double end record at the end of a package */
2077 		return (ENODATA);
2078 	}
2079 
2080 	/* the kernel needs the non-byteswapped begin record */
2081 	drr_noswap = drr;
2082 
2083 	flags.byteswap = B_FALSE;
2084 	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
2085 		/*
2086 		 * We computed the checksum in the wrong byteorder in
2087 		 * recv_read() above; do it again correctly.
2088 		 */
2089 		bzero(&zcksum, sizeof (zio_cksum_t));
2090 		fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
2091 		flags.byteswap = B_TRUE;
2092 
2093 		drr.drr_type = BSWAP_32(drr.drr_type);
2094 		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
2095 		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
2096 		drrb->drr_version = BSWAP_64(drrb->drr_version);
2097 		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
2098 		drrb->drr_type = BSWAP_32(drrb->drr_type);
2099 		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
2100 		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
2101 		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
2102 	}
2103 
2104 	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
2105 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2106 		    "stream (bad magic number)"));
2107 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2108 	}
2109 
2110 	if (strchr(drrb->drr_toname, '@') == NULL) {
2111 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2112 		    "stream (bad snapshot name)"));
2113 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2114 	}
2115 
2116 	if (drrb->drr_version == DMU_BACKUP_STREAM_VERSION) {
2117 		return (zfs_receive_one(hdl, infd, tosnap, flags,
2118 		    &drr, &drr_noswap, stream_avl, top_zfs));
2119 	} else if (drrb->drr_version == DMU_BACKUP_HEADER_VERSION) {
2120 		return (zfs_receive_package(hdl, infd, tosnap, flags,
2121 		    &drr, &zcksum, top_zfs));
2122 	} else {
2123 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2124 		    "stream is unsupported version %llu"),
2125 		    drrb->drr_version);
2126 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2127 	}
2128 }
2129 
2130 /*
2131  * Restores a backup of tosnap from the file descriptor specified by infd.
2132  * Return 0 on total success, -2 if some things couldn't be
2133  * destroyed/renamed/promoted, -1 if some things couldn't be received.
2134  * (-1 will override -2).
2135  */
2136 int
2137 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t flags,
2138     int infd, avl_tree_t *stream_avl)
2139 {
2140 	char *top_zfs = NULL;
2141 	int err;
2142 
2143 	err = zfs_receive_impl(hdl, tosnap, flags, infd, stream_avl, &top_zfs);
2144 
2145 	if (err == 0 && !flags.nomount && top_zfs) {
2146 		zfs_handle_t *zhp;
2147 		prop_changelist_t *clp;
2148 
2149 		zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
2150 		if (zhp != NULL) {
2151 			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
2152 			    CL_GATHER_MOUNT_ALWAYS, 0);
2153 			zfs_close(zhp);
2154 			if (clp != NULL) {
2155 				/* mount and share received datasets */
2156 				err = changelist_postfix(clp);
2157 				changelist_free(clp);
2158 			}
2159 		}
2160 		if (zhp == NULL || clp == NULL || err)
2161 			err = -1;
2162 	}
2163 	if (top_zfs)
2164 		free(top_zfs);
2165 
2166 	return (err);
2167 }
2168