xref: /freebsd/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2011, 2020 by Delphix. All rights reserved.
25  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26  * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
27  * All rights reserved
28  * Copyright (c) 2013 Steven Hartland. All rights reserved.
29  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
30  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
31  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
32  * Copyright (c) 2019 Datto Inc.
33  * Copyright (c) 2024, Klara, Inc.
34  */
35 
36 #include <assert.h>
37 #include <ctype.h>
38 #include <errno.h>
39 #include <libintl.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44 #include <stddef.h>
45 #include <fcntl.h>
46 #include <sys/mount.h>
47 #include <sys/mntent.h>
48 #include <sys/mnttab.h>
49 #include <sys/avl.h>
50 #include <sys/debug.h>
51 #include <sys/stat.h>
52 #include <pthread.h>
53 #include <umem.h>
54 #include <time.h>
55 
56 #include <libzfs.h>
57 #include <libzfs_core.h>
58 #include <libzutil.h>
59 
60 #include "zfs_namecheck.h"
61 #include "zfs_prop.h"
62 #include "zfs_fletcher.h"
63 #include "libzfs_impl.h"
64 #include <cityhash.h>
65 #include <zlib.h>
66 #include <sys/zio_checksum.h>
67 #include <sys/dsl_crypt.h>
68 #include <sys/ddt.h>
69 #include <sys/socket.h>
70 #include <sys/sha2.h>
71 
72 static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
73     recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **,
74     const char *, nvlist_t *);
75 static int guid_to_name_redact_snaps(libzfs_handle_t *hdl, const char *parent,
76     uint64_t guid, boolean_t bookmark_ok, uint64_t *redact_snap_guids,
77     uint64_t num_redact_snaps, char *name);
78 static int guid_to_name(libzfs_handle_t *, const char *,
79     uint64_t, boolean_t, char *);
80 
81 typedef struct progress_arg {
82 	zfs_handle_t *pa_zhp;
83 	int pa_fd;
84 	boolean_t pa_parsable;
85 	boolean_t pa_estimate;
86 	int pa_verbosity;
87 	boolean_t pa_astitle;
88 	boolean_t pa_progress;
89 	uint64_t pa_size;
90 } progress_arg_t;
91 
92 static int
93 dump_record(dmu_replay_record_t *drr, void *payload, size_t payload_len,
94     zio_cksum_t *zc, int outfd)
95 {
96 	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
97 	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
98 	fletcher_4_incremental_native(drr,
99 	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
100 	if (drr->drr_type != DRR_BEGIN) {
101 		ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
102 		    drr_checksum.drr_checksum));
103 		drr->drr_u.drr_checksum.drr_checksum = *zc;
104 	}
105 	fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
106 	    sizeof (zio_cksum_t), zc);
107 	if (write(outfd, drr, sizeof (*drr)) == -1)
108 		return (errno);
109 	if (payload_len != 0) {
110 		fletcher_4_incremental_native(payload, payload_len, zc);
111 		if (write(outfd, payload, payload_len) == -1)
112 			return (errno);
113 	}
114 	return (0);
115 }
116 
117 /*
118  * Routines for dealing with the AVL tree of fs-nvlists
119  */
120 typedef struct fsavl_node {
121 	avl_node_t fn_node;
122 	nvlist_t *fn_nvfs;
123 	const char *fn_snapname;
124 	uint64_t fn_guid;
125 } fsavl_node_t;
126 
127 static int
128 fsavl_compare(const void *arg1, const void *arg2)
129 {
130 	const fsavl_node_t *fn1 = (const fsavl_node_t *)arg1;
131 	const fsavl_node_t *fn2 = (const fsavl_node_t *)arg2;
132 
133 	return (TREE_CMP(fn1->fn_guid, fn2->fn_guid));
134 }
135 
136 /*
137  * Given the GUID of a snapshot, find its containing filesystem and
138  * (optionally) name.
139  */
140 static nvlist_t *
141 fsavl_find(avl_tree_t *avl, uint64_t snapguid, const char **snapname)
142 {
143 	fsavl_node_t fn_find;
144 	fsavl_node_t *fn;
145 
146 	fn_find.fn_guid = snapguid;
147 
148 	fn = avl_find(avl, &fn_find, NULL);
149 	if (fn) {
150 		if (snapname)
151 			*snapname = fn->fn_snapname;
152 		return (fn->fn_nvfs);
153 	}
154 	return (NULL);
155 }
156 
157 static void
158 fsavl_destroy(avl_tree_t *avl)
159 {
160 	fsavl_node_t *fn;
161 	void *cookie;
162 
163 	if (avl == NULL)
164 		return;
165 
166 	cookie = NULL;
167 	while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
168 		free(fn);
169 	avl_destroy(avl);
170 	free(avl);
171 }
172 
173 /*
174  * Given an nvlist, produce an avl tree of snapshots, ordered by guid
175  */
176 static avl_tree_t *
177 fsavl_create(nvlist_t *fss)
178 {
179 	avl_tree_t *fsavl;
180 	nvpair_t *fselem = NULL;
181 
182 	if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
183 		return (NULL);
184 
185 	avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
186 	    offsetof(fsavl_node_t, fn_node));
187 
188 	while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
189 		nvlist_t *nvfs, *snaps;
190 		nvpair_t *snapelem = NULL;
191 
192 		nvfs = fnvpair_value_nvlist(fselem);
193 		snaps = fnvlist_lookup_nvlist(nvfs, "snaps");
194 
195 		while ((snapelem =
196 		    nvlist_next_nvpair(snaps, snapelem)) != NULL) {
197 			fsavl_node_t *fn;
198 
199 			if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
200 				fsavl_destroy(fsavl);
201 				return (NULL);
202 			}
203 			fn->fn_nvfs = nvfs;
204 			fn->fn_snapname = nvpair_name(snapelem);
205 			fn->fn_guid = fnvpair_value_uint64(snapelem);
206 
207 			/*
208 			 * Note: if there are multiple snaps with the
209 			 * same GUID, we ignore all but one.
210 			 */
211 			avl_index_t where = 0;
212 			if (avl_find(fsavl, fn, &where) == NULL)
213 				avl_insert(fsavl, fn, where);
214 			else
215 				free(fn);
216 		}
217 	}
218 
219 	return (fsavl);
220 }
221 
222 /*
223  * Routines for dealing with the giant nvlist of fs-nvlists, etc.
224  */
225 typedef struct send_data {
226 	/*
227 	 * assigned inside every recursive call,
228 	 * restored from *_save on return:
229 	 *
230 	 * guid of fromsnap snapshot in parent dataset
231 	 * txg of fromsnap snapshot in current dataset
232 	 * txg of tosnap snapshot in current dataset
233 	 */
234 
235 	uint64_t parent_fromsnap_guid;
236 	uint64_t fromsnap_txg;
237 	uint64_t tosnap_txg;
238 
239 	/* the nvlists get accumulated during depth-first traversal */
240 	nvlist_t *parent_snaps;
241 	nvlist_t *fss;
242 	nvlist_t *snapprops;
243 	nvlist_t *snapholds;	/* user holds */
244 
245 	/* send-receive configuration, does not change during traversal */
246 	const char *fsname;
247 	const char *fromsnap;
248 	const char *tosnap;
249 	boolean_t recursive;
250 	boolean_t raw;
251 	boolean_t doall;
252 	boolean_t replicate;
253 	boolean_t skipmissing;
254 	boolean_t verbose;
255 	boolean_t backup;
256 	boolean_t seenfrom;
257 	boolean_t seento;
258 	boolean_t holds;	/* were holds requested with send -h */
259 	boolean_t props;
260 
261 	/*
262 	 * The header nvlist is of the following format:
263 	 * {
264 	 *   "tosnap" -> string
265 	 *   "fromsnap" -> string (if incremental)
266 	 *   "fss" -> {
267 	 *	id -> {
268 	 *
269 	 *	 "name" -> string (full name; for debugging)
270 	 *	 "parentfromsnap" -> number (guid of fromsnap in parent)
271 	 *
272 	 *	 "props" -> { name -> value (only if set here) }
273 	 *	 "snaps" -> { name (lastname) -> number (guid) }
274 	 *	 "snapprops" -> { name (lastname) -> { name -> value } }
275 	 *	 "snapholds" -> { name (lastname) -> { holdname -> crtime } }
276 	 *
277 	 *	 "origin" -> number (guid) (if clone)
278 	 *	 "is_encroot" -> boolean
279 	 *	 "sent" -> boolean (not on-disk)
280 	 *	}
281 	 *   }
282 	 * }
283 	 *
284 	 */
285 } send_data_t;
286 
287 static void
288 send_iterate_prop(zfs_handle_t *zhp, boolean_t received_only, nvlist_t *nv);
289 
290 /*
291  * Collect guid, valid props, optionally holds, etc. of a snapshot.
292  * This interface is intended for use as a zfs_iter_snapshots_v2_sorted visitor.
293  */
294 static int
295 send_iterate_snap(zfs_handle_t *zhp, void *arg)
296 {
297 	send_data_t *sd = arg;
298 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
299 	uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
300 	boolean_t isfromsnap, istosnap, istosnapwithnofrom;
301 	char *snapname;
302 	const char *from = sd->fromsnap;
303 	const char *to = sd->tosnap;
304 
305 	snapname = strrchr(zhp->zfs_name, '@');
306 	assert(snapname != NULL);
307 	++snapname;
308 
309 	isfromsnap = (from != NULL && strcmp(from, snapname) == 0);
310 	istosnap = (to != NULL && strcmp(to, snapname) == 0);
311 	istosnapwithnofrom = (istosnap && from == NULL);
312 
313 	if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
314 		if (sd->verbose) {
315 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
316 			    "skipping snapshot %s because it was created "
317 			    "after the destination snapshot (%s)\n"),
318 			    zhp->zfs_name, to);
319 		}
320 		zfs_close(zhp);
321 		return (0);
322 	}
323 
324 	fnvlist_add_uint64(sd->parent_snaps, snapname, guid);
325 
326 	/*
327 	 * NB: if there is no fromsnap here (it's a newly created fs in
328 	 * an incremental replication), we will substitute the tosnap.
329 	 */
330 	if (isfromsnap || (sd->parent_fromsnap_guid == 0 && istosnap))
331 		sd->parent_fromsnap_guid = guid;
332 
333 	if (!sd->recursive) {
334 		/*
335 		 * To allow a doall stream to work properly
336 		 * with a NULL fromsnap
337 		 */
338 		if (sd->doall && from == NULL && !sd->seenfrom)
339 			sd->seenfrom = B_TRUE;
340 
341 		if (!sd->seenfrom && isfromsnap) {
342 			sd->seenfrom = B_TRUE;
343 			zfs_close(zhp);
344 			return (0);
345 		}
346 
347 		if ((sd->seento || !sd->seenfrom) && !istosnapwithnofrom) {
348 			zfs_close(zhp);
349 			return (0);
350 		}
351 
352 		if (istosnap)
353 			sd->seento = B_TRUE;
354 	}
355 
356 	nvlist_t *nv = fnvlist_alloc();
357 	send_iterate_prop(zhp, sd->backup, nv);
358 	fnvlist_add_nvlist(sd->snapprops, snapname, nv);
359 	fnvlist_free(nv);
360 
361 	if (sd->holds) {
362 		nvlist_t *holds;
363 		if (lzc_get_holds(zhp->zfs_name, &holds) == 0) {
364 			fnvlist_add_nvlist(sd->snapholds, snapname, holds);
365 			fnvlist_free(holds);
366 		}
367 	}
368 
369 	zfs_close(zhp);
370 	return (0);
371 }
372 
373 /*
374  * Collect all valid props from the handle snap into an nvlist.
375  */
376 static void
377 send_iterate_prop(zfs_handle_t *zhp, boolean_t received_only, nvlist_t *nv)
378 {
379 	nvlist_t *props;
380 
381 	if (received_only)
382 		props = zfs_get_recvd_props(zhp);
383 	else
384 		props = zhp->zfs_props;
385 
386 	nvpair_t *elem = NULL;
387 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
388 		const char *propname = nvpair_name(elem);
389 		zfs_prop_t prop = zfs_name_to_prop(propname);
390 
391 		if (!zfs_prop_user(propname)) {
392 			/*
393 			 * Realistically, this should never happen.  However,
394 			 * we want the ability to add DSL properties without
395 			 * needing to make incompatible version changes.  We
396 			 * need to ignore unknown properties to allow older
397 			 * software to still send datasets containing these
398 			 * properties, with the unknown properties elided.
399 			 */
400 			if (prop == ZPROP_INVAL)
401 				continue;
402 
403 			if (zfs_prop_readonly(prop))
404 				continue;
405 		}
406 
407 		nvlist_t *propnv = fnvpair_value_nvlist(elem);
408 
409 		boolean_t isspacelimit = (prop == ZFS_PROP_QUOTA ||
410 		    prop == ZFS_PROP_RESERVATION ||
411 		    prop == ZFS_PROP_REFQUOTA ||
412 		    prop == ZFS_PROP_REFRESERVATION);
413 		if (isspacelimit && zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
414 			continue;
415 
416 		const char *source;
417 		if (nvlist_lookup_string(propnv, ZPROP_SOURCE, &source) == 0) {
418 			if (strcmp(source, zhp->zfs_name) != 0 &&
419 			    strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0)
420 				continue;
421 		} else {
422 			/*
423 			 * May have no source before SPA_VERSION_RECVD_PROPS,
424 			 * but is still modifiable.
425 			 */
426 			if (!isspacelimit)
427 				continue;
428 		}
429 
430 		if (zfs_prop_user(propname) ||
431 		    zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
432 			const char *value;
433 			value = fnvlist_lookup_string(propnv, ZPROP_VALUE);
434 			fnvlist_add_string(nv, propname, value);
435 		} else {
436 			uint64_t value;
437 			value = fnvlist_lookup_uint64(propnv, ZPROP_VALUE);
438 			fnvlist_add_uint64(nv, propname, value);
439 		}
440 	}
441 }
442 
443 /*
444  * returns snapshot guid
445  * and returns 0 if the snapshot does not exist
446  */
447 static uint64_t
448 get_snap_guid(libzfs_handle_t *hdl, const char *fs, const char *snap)
449 {
450 	char name[MAXPATHLEN + 1];
451 	uint64_t guid = 0;
452 
453 	if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
454 		return (guid);
455 
456 	(void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
457 	zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
458 	if (zhp != NULL) {
459 		guid = zfs_prop_get_int(zhp, ZFS_PROP_GUID);
460 		zfs_close(zhp);
461 	}
462 
463 	return (guid);
464 }
465 
466 /*
467  * returns snapshot creation txg
468  * and returns 0 if the snapshot does not exist
469  */
470 static uint64_t
471 get_snap_txg(libzfs_handle_t *hdl, const char *fs, const char *snap)
472 {
473 	char name[ZFS_MAX_DATASET_NAME_LEN];
474 	uint64_t txg = 0;
475 
476 	if (fs == NULL || fs[0] == '\0' || snap == NULL || snap[0] == '\0')
477 		return (txg);
478 
479 	(void) snprintf(name, sizeof (name), "%s@%s", fs, snap);
480 	if (zfs_dataset_exists(hdl, name, ZFS_TYPE_SNAPSHOT)) {
481 		zfs_handle_t *zhp = zfs_open(hdl, name, ZFS_TYPE_SNAPSHOT);
482 		if (zhp != NULL) {
483 			txg = zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG);
484 			zfs_close(zhp);
485 		}
486 	}
487 
488 	return (txg);
489 }
490 
491 /*
492  * Recursively generate nvlists describing datasets.  See comment
493  * for the data structure send_data_t above for description of contents
494  * of the nvlist.
495  */
496 static int
497 send_iterate_fs(zfs_handle_t *zhp, void *arg)
498 {
499 	send_data_t *sd = arg;
500 	nvlist_t *nvfs = NULL, *nv = NULL;
501 	int rv = 0;
502 	uint64_t min_txg = 0, max_txg = 0;
503 	uint64_t txg = zhp->zfs_dmustats.dds_creation_txg;
504 	uint64_t guid = zhp->zfs_dmustats.dds_guid;
505 	uint64_t fromsnap_txg, tosnap_txg;
506 	char guidstring[64];
507 
508 	/* These fields are restored on return from a recursive call. */
509 	uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
510 	uint64_t fromsnap_txg_save = sd->fromsnap_txg;
511 	uint64_t tosnap_txg_save = sd->tosnap_txg;
512 
513 	fromsnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->fromsnap);
514 	if (fromsnap_txg != 0)
515 		sd->fromsnap_txg = fromsnap_txg;
516 
517 	tosnap_txg = get_snap_txg(zhp->zfs_hdl, zhp->zfs_name, sd->tosnap);
518 	if (tosnap_txg != 0)
519 		sd->tosnap_txg = tosnap_txg;
520 
521 	/*
522 	 * On the send side, if the current dataset does not have tosnap,
523 	 * perform two additional checks:
524 	 *
525 	 * - Skip sending the current dataset if it was created later than
526 	 *   the parent tosnap.
527 	 * - Return error if the current dataset was created earlier than
528 	 *   the parent tosnap, unless --skip-missing specified. Then
529 	 *   just print a warning.
530 	 */
531 	if (sd->tosnap != NULL && tosnap_txg == 0) {
532 		if (sd->tosnap_txg != 0 && txg > sd->tosnap_txg) {
533 			if (sd->verbose) {
534 				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
535 				    "skipping dataset %s: snapshot %s does "
536 				    "not exist\n"), zhp->zfs_name, sd->tosnap);
537 			}
538 		} else if (sd->skipmissing) {
539 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
540 			    "WARNING: skipping dataset %s and its children:"
541 			    " snapshot %s does not exist\n"),
542 			    zhp->zfs_name, sd->tosnap);
543 		} else {
544 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
545 			    "cannot send %s@%s%s: snapshot %s@%s does not "
546 			    "exist\n"), sd->fsname, sd->tosnap, sd->recursive ?
547 			    dgettext(TEXT_DOMAIN, " recursively") : "",
548 			    zhp->zfs_name, sd->tosnap);
549 			rv = EZFS_NOENT;
550 		}
551 		goto out;
552 	}
553 
554 	nvfs = fnvlist_alloc();
555 	fnvlist_add_string(nvfs, "name", zhp->zfs_name);
556 	fnvlist_add_uint64(nvfs, "parentfromsnap", sd->parent_fromsnap_guid);
557 
558 	if (zhp->zfs_dmustats.dds_origin[0] != '\0') {
559 		zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
560 		    zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
561 		if (origin == NULL) {
562 			rv = -1;
563 			goto out;
564 		}
565 		fnvlist_add_uint64(nvfs, "origin",
566 		    origin->zfs_dmustats.dds_guid);
567 		zfs_close(origin);
568 	}
569 
570 	/* Iterate over props. */
571 	if (sd->props || sd->backup || sd->recursive) {
572 		nv = fnvlist_alloc();
573 		send_iterate_prop(zhp, sd->backup, nv);
574 		fnvlist_add_nvlist(nvfs, "props", nv);
575 	}
576 	if (zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) != ZIO_CRYPT_OFF) {
577 		boolean_t encroot;
578 
579 		/* Determine if this dataset is an encryption root. */
580 		if (zfs_crypto_get_encryption_root(zhp, &encroot, NULL) != 0) {
581 			rv = -1;
582 			goto out;
583 		}
584 
585 		if (encroot)
586 			fnvlist_add_boolean(nvfs, "is_encroot");
587 
588 		/*
589 		 * Encrypted datasets can only be sent with properties if
590 		 * the raw flag is specified because the receive side doesn't
591 		 * currently have a mechanism for recursively asking the user
592 		 * for new encryption parameters.
593 		 */
594 		if (!sd->raw) {
595 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
596 			    "cannot send %s@%s: encrypted dataset %s may not "
597 			    "be sent with properties without the raw flag\n"),
598 			    sd->fsname, sd->tosnap, zhp->zfs_name);
599 			rv = -1;
600 			goto out;
601 		}
602 
603 	}
604 
605 	/*
606 	 * Iterate over snaps, and set sd->parent_fromsnap_guid.
607 	 *
608 	 * If this is a "doall" send, a replicate send or we're just trying
609 	 * to gather a list of previous snapshots, iterate through all the
610 	 * snaps in the txg range. Otherwise just look at the one we're
611 	 * interested in.
612 	 */
613 	sd->parent_fromsnap_guid = 0;
614 	sd->parent_snaps = fnvlist_alloc();
615 	sd->snapprops = fnvlist_alloc();
616 	if (sd->holds)
617 		sd->snapholds = fnvlist_alloc();
618 	if (sd->doall || sd->replicate || sd->tosnap == NULL) {
619 		if (!sd->replicate && fromsnap_txg != 0)
620 			min_txg = fromsnap_txg;
621 		if (!sd->replicate && tosnap_txg != 0)
622 			max_txg = tosnap_txg;
623 		(void) zfs_iter_snapshots_sorted_v2(zhp, 0, send_iterate_snap,
624 		    sd, min_txg, max_txg);
625 	} else {
626 		char snapname[MAXPATHLEN] = { 0 };
627 		zfs_handle_t *snap;
628 
629 		(void) snprintf(snapname, sizeof (snapname), "%s@%s",
630 		    zhp->zfs_name, sd->tosnap);
631 		if (sd->fromsnap != NULL)
632 			sd->seenfrom = B_TRUE;
633 		snap = zfs_open(zhp->zfs_hdl, snapname, ZFS_TYPE_SNAPSHOT);
634 		if (snap != NULL)
635 			(void) send_iterate_snap(snap, sd);
636 	}
637 
638 	fnvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps);
639 	fnvlist_free(sd->parent_snaps);
640 	fnvlist_add_nvlist(nvfs, "snapprops", sd->snapprops);
641 	fnvlist_free(sd->snapprops);
642 	if (sd->holds) {
643 		fnvlist_add_nvlist(nvfs, "snapholds", sd->snapholds);
644 		fnvlist_free(sd->snapholds);
645 	}
646 
647 	/* Do not allow the size of the properties list to exceed the limit */
648 	if ((fnvlist_size(nvfs) + fnvlist_size(sd->fss)) >
649 	    zhp->zfs_hdl->libzfs_max_nvlist) {
650 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
651 		    "warning: cannot send %s@%s: the size of the list of "
652 		    "snapshots and properties is too large to be received "
653 		    "successfully.\n"
654 		    "Select a smaller number of snapshots to send.\n"),
655 		    zhp->zfs_name, sd->tosnap);
656 		rv = EZFS_NOSPC;
657 		goto out;
658 	}
659 	/* Add this fs to nvlist. */
660 	(void) snprintf(guidstring, sizeof (guidstring),
661 	    "0x%llx", (longlong_t)guid);
662 	fnvlist_add_nvlist(sd->fss, guidstring, nvfs);
663 
664 	/* Iterate over children. */
665 	if (sd->recursive)
666 		rv = zfs_iter_filesystems_v2(zhp, 0, send_iterate_fs, sd);
667 
668 out:
669 	/* Restore saved fields. */
670 	sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
671 	sd->fromsnap_txg = fromsnap_txg_save;
672 	sd->tosnap_txg = tosnap_txg_save;
673 
674 	fnvlist_free(nv);
675 	fnvlist_free(nvfs);
676 
677 	zfs_close(zhp);
678 	return (rv);
679 }
680 
681 static int
682 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
683     const char *tosnap, boolean_t recursive, boolean_t raw, boolean_t doall,
684     boolean_t replicate, boolean_t skipmissing, boolean_t verbose,
685     boolean_t backup, boolean_t holds, boolean_t props, nvlist_t **nvlp,
686     avl_tree_t **avlp)
687 {
688 	zfs_handle_t *zhp;
689 	send_data_t sd = { 0 };
690 	int error;
691 
692 	zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
693 	if (zhp == NULL)
694 		return (EZFS_BADTYPE);
695 
696 	sd.fss = fnvlist_alloc();
697 	sd.fsname = fsname;
698 	sd.fromsnap = fromsnap;
699 	sd.tosnap = tosnap;
700 	sd.recursive = recursive;
701 	sd.raw = raw;
702 	sd.doall = doall;
703 	sd.replicate = replicate;
704 	sd.skipmissing = skipmissing;
705 	sd.verbose = verbose;
706 	sd.backup = backup;
707 	sd.holds = holds;
708 	sd.props = props;
709 
710 	if ((error = send_iterate_fs(zhp, &sd)) != 0) {
711 		fnvlist_free(sd.fss);
712 		if (avlp != NULL)
713 			*avlp = NULL;
714 		*nvlp = NULL;
715 		return (error);
716 	}
717 
718 	if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
719 		fnvlist_free(sd.fss);
720 		*nvlp = NULL;
721 		return (EZFS_NOMEM);
722 	}
723 
724 	*nvlp = sd.fss;
725 	return (0);
726 }
727 
728 /*
729  * Routines specific to "zfs send"
730  */
731 typedef struct send_dump_data {
732 	/* these are all just the short snapname (the part after the @) */
733 	const char *fromsnap;
734 	const char *tosnap;
735 	char prevsnap[ZFS_MAX_DATASET_NAME_LEN];
736 	uint64_t prevsnap_obj;
737 	boolean_t seenfrom, seento, replicate, doall, fromorigin;
738 	boolean_t dryrun, parsable, progress, embed_data, std_out;
739 	boolean_t large_block, compress, raw, holds;
740 	boolean_t progressastitle;
741 	int outfd;
742 	boolean_t err;
743 	nvlist_t *fss;
744 	nvlist_t *snapholds;
745 	avl_tree_t *fsavl;
746 	snapfilter_cb_t *filter_cb;
747 	void *filter_cb_arg;
748 	nvlist_t *debugnv;
749 	char holdtag[ZFS_MAX_DATASET_NAME_LEN];
750 	int cleanup_fd;
751 	int verbosity;
752 	uint64_t size;
753 } send_dump_data_t;
754 
755 static int
756 zfs_send_space(zfs_handle_t *zhp, const char *snapname, const char *from,
757     enum lzc_send_flags flags, uint64_t *spacep)
758 {
759 	assert(snapname != NULL);
760 
761 	int error = lzc_send_space(snapname, from, flags, spacep);
762 	if (error == 0)
763 		return (0);
764 
765 	char errbuf[ERRBUFLEN];
766 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
767 	    "warning: cannot estimate space for '%s'"), snapname);
768 
769 	libzfs_handle_t *hdl = zhp->zfs_hdl;
770 	switch (error) {
771 	case EXDEV:
772 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
773 		    "not an earlier snapshot from the same fs"));
774 		return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
775 
776 	case ENOENT:
777 		if (zfs_dataset_exists(hdl, snapname,
778 		    ZFS_TYPE_SNAPSHOT)) {
779 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
780 			    "incremental source (%s) does not exist"),
781 			    snapname);
782 		}
783 		return (zfs_error(hdl, EZFS_NOENT, errbuf));
784 
785 	case EDQUOT:
786 	case EFBIG:
787 	case EIO:
788 	case ENOLINK:
789 	case ENOSPC:
790 	case ENOSTR:
791 	case ENXIO:
792 	case EPIPE:
793 	case ERANGE:
794 	case EFAULT:
795 	case EROFS:
796 	case EINVAL:
797 		zfs_error_aux(hdl, "%s", zfs_strerror(error));
798 		return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
799 
800 	default:
801 		return (zfs_standard_error(hdl, error, errbuf));
802 	}
803 }
804 
805 /*
806  * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
807  * NULL) to the file descriptor specified by outfd.
808  */
809 static int
810 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
811     boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
812     nvlist_t *debugnv)
813 {
814 	zfs_cmd_t zc = {"\0"};
815 	libzfs_handle_t *hdl = zhp->zfs_hdl;
816 	nvlist_t *thisdbg;
817 
818 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
819 	assert(fromsnap_obj == 0 || !fromorigin);
820 
821 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
822 	zc.zc_cookie = outfd;
823 	zc.zc_obj = fromorigin;
824 	zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
825 	zc.zc_fromobj = fromsnap_obj;
826 	zc.zc_flags = flags;
827 
828 	if (debugnv != NULL) {
829 		thisdbg = fnvlist_alloc();
830 		if (fromsnap != NULL && fromsnap[0] != '\0')
831 			fnvlist_add_string(thisdbg, "fromsnap", fromsnap);
832 	}
833 
834 	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
835 		char errbuf[ERRBUFLEN];
836 		int error = errno;
837 
838 		(void) snprintf(errbuf, sizeof (errbuf), "%s '%s'",
839 		    dgettext(TEXT_DOMAIN, "warning: cannot send"),
840 		    zhp->zfs_name);
841 
842 		if (debugnv != NULL) {
843 			fnvlist_add_uint64(thisdbg, "error", error);
844 			fnvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg);
845 			fnvlist_free(thisdbg);
846 		}
847 
848 		switch (error) {
849 		case EXDEV:
850 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
851 			    "not an earlier snapshot from the same fs"));
852 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
853 
854 		case EACCES:
855 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
856 			    "source key must be loaded"));
857 			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
858 
859 		case ENOENT:
860 			if (zfs_dataset_exists(hdl, zc.zc_name,
861 			    ZFS_TYPE_SNAPSHOT)) {
862 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
863 				    "incremental source (@%s) does not exist"),
864 				    zc.zc_value);
865 			}
866 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
867 
868 		case EDQUOT:
869 		case EFBIG:
870 		case EIO:
871 		case ENOLINK:
872 		case ENOSPC:
873 		case ENOSTR:
874 		case ENXIO:
875 		case EPIPE:
876 		case ERANGE:
877 		case EFAULT:
878 		case EROFS:
879 		case EINVAL:
880 			zfs_error_aux(hdl, "%s", zfs_strerror(errno));
881 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
882 
883 		default:
884 			return (zfs_standard_error(hdl, errno, errbuf));
885 		}
886 	}
887 
888 	if (debugnv != NULL) {
889 		fnvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg);
890 		fnvlist_free(thisdbg);
891 	}
892 
893 	return (0);
894 }
895 
896 static void
897 gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
898 {
899 	assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
900 
901 	/*
902 	 * zfs_send() only sets snapholds for sends that need them,
903 	 * e.g. replication and doall.
904 	 */
905 	if (sdd->snapholds == NULL)
906 		return;
907 
908 	fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
909 }
910 
911 int
912 zfs_send_progress(zfs_handle_t *zhp, int fd, uint64_t *bytes_written,
913     uint64_t *blocks_visited)
914 {
915 	zfs_cmd_t zc = {"\0"};
916 
917 	if (bytes_written != NULL)
918 		*bytes_written = 0;
919 	if (blocks_visited != NULL)
920 		*blocks_visited = 0;
921 	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
922 	zc.zc_cookie = fd;
923 	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
924 		return (errno);
925 	if (bytes_written != NULL)
926 		*bytes_written = zc.zc_cookie;
927 	if (blocks_visited != NULL)
928 		*blocks_visited = zc.zc_objset_type;
929 	return (0);
930 }
931 
932 static volatile boolean_t send_progress_thread_signal_duetotimer;
933 static void
934 send_progress_thread_act(int sig, siginfo_t *info, void *ucontext)
935 {
936 	(void) sig, (void) ucontext;
937 	send_progress_thread_signal_duetotimer = info->si_code == SI_TIMER;
938 }
939 
940 struct timer_desirability {
941 	timer_t timer;
942 	boolean_t desired;
943 };
944 static void
945 timer_delete_cleanup(void *timer)
946 {
947 	struct timer_desirability *td = timer;
948 	if (td->desired)
949 		timer_delete(td->timer);
950 }
951 
952 #ifdef SIGINFO
953 #define	SEND_PROGRESS_THREAD_PARENT_BLOCK_SIGINFO sigaddset(&new, SIGINFO)
954 #else
955 #define	SEND_PROGRESS_THREAD_PARENT_BLOCK_SIGINFO
956 #endif
957 #define	SEND_PROGRESS_THREAD_PARENT_BLOCK(old) { \
958 	sigset_t new; \
959 	sigemptyset(&new); \
960 	sigaddset(&new, SIGUSR1); \
961 	SEND_PROGRESS_THREAD_PARENT_BLOCK_SIGINFO; \
962 	pthread_sigmask(SIG_BLOCK, &new, old); \
963 }
964 
965 static void *
966 send_progress_thread(void *arg)
967 {
968 	progress_arg_t *pa = arg;
969 	zfs_handle_t *zhp = pa->pa_zhp;
970 	uint64_t bytes;
971 	uint64_t blocks;
972 	uint64_t total = pa->pa_size / 100;
973 	char buf[16];
974 	time_t t;
975 	struct tm tm;
976 	int err;
977 
978 	const struct sigaction signal_action =
979 	    {.sa_sigaction = send_progress_thread_act, .sa_flags = SA_SIGINFO};
980 	struct sigevent timer_cfg =
981 	    {.sigev_notify = SIGEV_SIGNAL, .sigev_signo = SIGUSR1};
982 	const struct itimerspec timer_time =
983 	    {.it_value = {.tv_sec = 1}, .it_interval = {.tv_sec = 1}};
984 	struct timer_desirability timer = {};
985 
986 	sigaction(SIGUSR1, &signal_action, NULL);
987 #ifdef SIGINFO
988 	sigaction(SIGINFO, &signal_action, NULL);
989 #endif
990 
991 	if ((timer.desired = pa->pa_progress || pa->pa_astitle)) {
992 		if (timer_create(CLOCK_MONOTONIC, &timer_cfg, &timer.timer))
993 			return ((void *)(uintptr_t)errno);
994 		(void) timer_settime(timer.timer, 0, &timer_time, NULL);
995 	}
996 	pthread_cleanup_push(timer_delete_cleanup, &timer);
997 
998 	if (!pa->pa_parsable && pa->pa_progress) {
999 		(void) fprintf(stderr,
1000 		    "TIME       %s   %sSNAPSHOT %s\n",
1001 		    pa->pa_estimate ? "BYTES" : " SENT",
1002 		    pa->pa_verbosity >= 2 ? "   BLOCKS    " : "",
1003 		    zhp->zfs_name);
1004 	}
1005 
1006 	/*
1007 	 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
1008 	 */
1009 	for (;;) {
1010 		pause();
1011 		if ((err = zfs_send_progress(zhp, pa->pa_fd, &bytes,
1012 		    &blocks)) != 0) {
1013 			if (err == EINTR || err == ENOENT)
1014 				err = 0;
1015 			pthread_exit(((void *)(uintptr_t)err));
1016 		}
1017 
1018 		(void) time(&t);
1019 		localtime_r(&t, &tm);
1020 
1021 		if (pa->pa_astitle) {
1022 			char buf_bytes[16];
1023 			char buf_size[16];
1024 			int pct;
1025 			zfs_nicenum(bytes, buf_bytes, sizeof (buf_bytes));
1026 			zfs_nicenum(pa->pa_size, buf_size, sizeof (buf_size));
1027 			pct = (total > 0) ? bytes / total : 100;
1028 			zfs_setproctitle("sending %s (%d%%: %s/%s)",
1029 			    zhp->zfs_name, MIN(pct, 100), buf_bytes, buf_size);
1030 		}
1031 
1032 		if (pa->pa_verbosity >= 2 && pa->pa_parsable) {
1033 			(void) fprintf(stderr,
1034 			    "%02d:%02d:%02d\t%llu\t%llu\t%s\n",
1035 			    tm.tm_hour, tm.tm_min, tm.tm_sec,
1036 			    (u_longlong_t)bytes, (u_longlong_t)blocks,
1037 			    zhp->zfs_name);
1038 		} else if (pa->pa_verbosity >= 2) {
1039 			zfs_nicenum(bytes, buf, sizeof (buf));
1040 			(void) fprintf(stderr,
1041 			    "%02d:%02d:%02d   %5s    %8llu    %s\n",
1042 			    tm.tm_hour, tm.tm_min, tm.tm_sec,
1043 			    buf, (u_longlong_t)blocks, zhp->zfs_name);
1044 		} else if (pa->pa_parsable) {
1045 			(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1046 			    tm.tm_hour, tm.tm_min, tm.tm_sec,
1047 			    (u_longlong_t)bytes, zhp->zfs_name);
1048 		} else if (pa->pa_progress ||
1049 		    !send_progress_thread_signal_duetotimer) {
1050 			zfs_nicebytes(bytes, buf, sizeof (buf));
1051 			(void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
1052 			    tm.tm_hour, tm.tm_min, tm.tm_sec,
1053 			    buf, zhp->zfs_name);
1054 		}
1055 	}
1056 	pthread_cleanup_pop(B_TRUE);
1057 	return (NULL);
1058 }
1059 
1060 static boolean_t
1061 send_progress_thread_exit(
1062     libzfs_handle_t *hdl, pthread_t ptid, sigset_t *oldmask)
1063 {
1064 	void *status = NULL;
1065 	(void) pthread_cancel(ptid);
1066 	(void) pthread_join(ptid, &status);
1067 	pthread_sigmask(SIG_SETMASK, oldmask, NULL);
1068 	int error = (int)(uintptr_t)status;
1069 	if (error != 0 && status != PTHREAD_CANCELED)
1070 		return (zfs_standard_error(hdl, error,
1071 		    dgettext(TEXT_DOMAIN, "progress thread exited nonzero")));
1072 	else
1073 		return (B_FALSE);
1074 }
1075 
1076 static void
1077 send_print_verbose(FILE *fout, const char *tosnap, const char *fromsnap,
1078     uint64_t size, boolean_t parsable)
1079 {
1080 	if (parsable) {
1081 		if (fromsnap != NULL) {
1082 			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1083 			    "incremental\t%s\t%s"), fromsnap, tosnap);
1084 		} else {
1085 /*
1086  * Workaround for GCC 12+ with UBSan enabled deficencies.
1087  *
1088  * GCC 12+ invoked with -fsanitize=undefined incorrectly reports the code
1089  * below as violating -Wformat-overflow.
1090  */
1091 #if defined(__GNUC__) && !defined(__clang__) && \
1092 	defined(ZFS_UBSAN_ENABLED) && defined(HAVE_FORMAT_OVERFLOW)
1093 #pragma GCC diagnostic push
1094 #pragma GCC diagnostic ignored "-Wformat-overflow"
1095 #endif
1096 			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1097 			    "full\t%s"), tosnap);
1098 #if defined(__GNUC__) && !defined(__clang__) && \
1099 	defined(ZFS_UBSAN_ENABLED) && defined(HAVE_FORMAT_OVERFLOW)
1100 #pragma GCC diagnostic pop
1101 #endif
1102 		}
1103 		(void) fprintf(fout, "\t%llu", (longlong_t)size);
1104 	} else {
1105 		if (fromsnap != NULL) {
1106 			if (strchr(fromsnap, '@') == NULL &&
1107 			    strchr(fromsnap, '#') == NULL) {
1108 				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1109 				    "send from @%s to %s"), fromsnap, tosnap);
1110 			} else {
1111 				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1112 				    "send from %s to %s"), fromsnap, tosnap);
1113 			}
1114 		} else {
1115 			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1116 			    "full send of %s"), tosnap);
1117 		}
1118 		if (size != 0) {
1119 			char buf[16];
1120 			zfs_nicebytes(size, buf, sizeof (buf));
1121 /*
1122  * Workaround for GCC 12+ with UBSan enabled deficencies.
1123  *
1124  * GCC 12+ invoked with -fsanitize=undefined incorrectly reports the code
1125  * below as violating -Wformat-overflow.
1126  */
1127 #if defined(__GNUC__) && !defined(__clang__) && \
1128 	defined(ZFS_UBSAN_ENABLED) && defined(HAVE_FORMAT_OVERFLOW)
1129 #pragma GCC diagnostic push
1130 #pragma GCC diagnostic ignored "-Wformat-overflow"
1131 #endif
1132 			(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1133 			    " estimated size is %s"), buf);
1134 #if defined(__GNUC__) && !defined(__clang__) && \
1135 	defined(ZFS_UBSAN_ENABLED) && defined(HAVE_FORMAT_OVERFLOW)
1136 #pragma GCC diagnostic pop
1137 #endif
1138 		}
1139 	}
1140 	(void) fprintf(fout, "\n");
1141 }
1142 
1143 /*
1144  * Send a single filesystem snapshot, updating the send dump data.
1145  * This interface is intended for use as a zfs_iter_snapshots_v2_sorted visitor.
1146  */
1147 static int
1148 dump_snapshot(zfs_handle_t *zhp, void *arg)
1149 {
1150 	send_dump_data_t *sdd = arg;
1151 	progress_arg_t pa = { 0 };
1152 	pthread_t tid;
1153 	char *thissnap;
1154 	enum lzc_send_flags flags = 0;
1155 	int err;
1156 	boolean_t isfromsnap, istosnap, fromorigin;
1157 	boolean_t exclude = B_FALSE;
1158 	FILE *fout = sdd->std_out ? stdout : stderr;
1159 
1160 	err = 0;
1161 	thissnap = strchr(zhp->zfs_name, '@') + 1;
1162 	isfromsnap = (sdd->fromsnap != NULL &&
1163 	    strcmp(sdd->fromsnap, thissnap) == 0);
1164 
1165 	if (!sdd->seenfrom && isfromsnap) {
1166 		gather_holds(zhp, sdd);
1167 		sdd->seenfrom = B_TRUE;
1168 		(void) strlcpy(sdd->prevsnap, thissnap, sizeof (sdd->prevsnap));
1169 		sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1170 		zfs_close(zhp);
1171 		return (0);
1172 	}
1173 
1174 	if (sdd->seento || !sdd->seenfrom) {
1175 		zfs_close(zhp);
1176 		return (0);
1177 	}
1178 
1179 	istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1180 	if (istosnap)
1181 		sdd->seento = B_TRUE;
1182 
1183 	if (sdd->large_block)
1184 		flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1185 	if (sdd->embed_data)
1186 		flags |= LZC_SEND_FLAG_EMBED_DATA;
1187 	if (sdd->compress)
1188 		flags |= LZC_SEND_FLAG_COMPRESS;
1189 	if (sdd->raw)
1190 		flags |= LZC_SEND_FLAG_RAW;
1191 
1192 	if (!sdd->doall && !isfromsnap && !istosnap) {
1193 		if (sdd->replicate) {
1194 			const char *snapname;
1195 			nvlist_t *snapprops;
1196 			/*
1197 			 * Filter out all intermediate snapshots except origin
1198 			 * snapshots needed to replicate clones.
1199 			 */
1200 			nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1201 			    zhp->zfs_dmustats.dds_guid, &snapname);
1202 
1203 			if (nvfs != NULL) {
1204 				snapprops = fnvlist_lookup_nvlist(nvfs,
1205 				    "snapprops");
1206 				snapprops = fnvlist_lookup_nvlist(snapprops,
1207 				    thissnap);
1208 				exclude = !nvlist_exists(snapprops,
1209 				    "is_clone_origin");
1210 			}
1211 		} else {
1212 			exclude = B_TRUE;
1213 		}
1214 	}
1215 
1216 	/*
1217 	 * If a filter function exists, call it to determine whether
1218 	 * this snapshot will be sent.
1219 	 */
1220 	if (exclude || (sdd->filter_cb != NULL &&
1221 	    sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1222 		/*
1223 		 * This snapshot is filtered out.  Don't send it, and don't
1224 		 * set prevsnap_obj, so it will be as if this snapshot didn't
1225 		 * exist, and the next accepted snapshot will be sent as
1226 		 * an incremental from the last accepted one, or as the
1227 		 * first (and full) snapshot in the case of a replication,
1228 		 * non-incremental send.
1229 		 */
1230 		zfs_close(zhp);
1231 		return (0);
1232 	}
1233 
1234 	gather_holds(zhp, sdd);
1235 	fromorigin = sdd->prevsnap[0] == '\0' &&
1236 	    (sdd->fromorigin || sdd->replicate);
1237 
1238 	if (sdd->verbosity != 0) {
1239 		uint64_t size = 0;
1240 		char fromds[ZFS_MAX_DATASET_NAME_LEN];
1241 
1242 		if (sdd->prevsnap[0] != '\0') {
1243 			(void) strlcpy(fromds, zhp->zfs_name, sizeof (fromds));
1244 			*(strchr(fromds, '@') + 1) = '\0';
1245 			(void) strlcat(fromds, sdd->prevsnap, sizeof (fromds));
1246 		}
1247 		if (zfs_send_space(zhp, zhp->zfs_name,
1248 		    sdd->prevsnap[0] ? fromds : NULL, flags, &size) == 0) {
1249 			send_print_verbose(fout, zhp->zfs_name,
1250 			    sdd->prevsnap[0] ? sdd->prevsnap : NULL,
1251 			    size, sdd->parsable);
1252 			sdd->size += size;
1253 		}
1254 	}
1255 
1256 	if (!sdd->dryrun) {
1257 		/*
1258 		 * If progress reporting is requested, spawn a new thread to
1259 		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1260 		 */
1261 		sigset_t oldmask;
1262 		{
1263 			pa.pa_zhp = zhp;
1264 			pa.pa_fd = sdd->outfd;
1265 			pa.pa_parsable = sdd->parsable;
1266 			pa.pa_estimate = B_FALSE;
1267 			pa.pa_verbosity = sdd->verbosity;
1268 			pa.pa_size = sdd->size;
1269 			pa.pa_astitle = sdd->progressastitle;
1270 			pa.pa_progress = sdd->progress;
1271 
1272 			if ((err = pthread_create(&tid, NULL,
1273 			    send_progress_thread, &pa)) != 0) {
1274 				zfs_close(zhp);
1275 				return (err);
1276 			}
1277 			SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
1278 		}
1279 
1280 		err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1281 		    fromorigin, sdd->outfd, flags, sdd->debugnv);
1282 
1283 		if (send_progress_thread_exit(zhp->zfs_hdl, tid, &oldmask))
1284 			return (-1);
1285 	}
1286 
1287 	(void) strlcpy(sdd->prevsnap, thissnap, sizeof (sdd->prevsnap));
1288 	sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1289 	zfs_close(zhp);
1290 	return (err);
1291 }
1292 
1293 /*
1294  * Send all snapshots for a filesystem, updating the send dump data.
1295  */
1296 static int
1297 dump_filesystem(zfs_handle_t *zhp, send_dump_data_t *sdd)
1298 {
1299 	int rv = 0;
1300 	boolean_t missingfrom = B_FALSE;
1301 	zfs_cmd_t zc = {"\0"};
1302 	uint64_t min_txg = 0, max_txg = 0;
1303 
1304 	/*
1305 	 * Make sure the tosnap exists.
1306 	 */
1307 	(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1308 	    zhp->zfs_name, sdd->tosnap);
1309 	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1310 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1311 		    "WARNING: could not send %s@%s: does not exist\n"),
1312 		    zhp->zfs_name, sdd->tosnap);
1313 		sdd->err = B_TRUE;
1314 		return (0);
1315 	}
1316 
1317 	/*
1318 	 * If this fs does not have fromsnap, and we're doing
1319 	 * recursive, we need to send a full stream from the
1320 	 * beginning (or an incremental from the origin if this
1321 	 * is a clone).  If we're doing non-recursive, then let
1322 	 * them get the error.
1323 	 */
1324 	if (sdd->replicate && sdd->fromsnap) {
1325 		/*
1326 		 * Make sure the fromsnap exists.
1327 		 */
1328 		(void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1329 		    zhp->zfs_name, sdd->fromsnap);
1330 		if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_OBJSET_STATS, &zc) != 0)
1331 			missingfrom = B_TRUE;
1332 	}
1333 
1334 	sdd->seenfrom = sdd->seento = B_FALSE;
1335 	sdd->prevsnap[0] = '\0';
1336 	sdd->prevsnap_obj = 0;
1337 	if (sdd->fromsnap == NULL || missingfrom)
1338 		sdd->seenfrom = B_TRUE;
1339 
1340 	/*
1341 	 * Iterate through all snapshots and process the ones we will be
1342 	 * sending. If we only have a "from" and "to" snapshot to deal
1343 	 * with, we can avoid iterating through all the other snapshots.
1344 	 */
1345 	if (sdd->doall || sdd->replicate || sdd->tosnap == NULL) {
1346 		if (!sdd->replicate) {
1347 			if (sdd->fromsnap != NULL) {
1348 				min_txg = get_snap_txg(zhp->zfs_hdl,
1349 				    zhp->zfs_name, sdd->fromsnap);
1350 			}
1351 			if (sdd->tosnap != NULL) {
1352 				max_txg = get_snap_txg(zhp->zfs_hdl,
1353 				    zhp->zfs_name, sdd->tosnap);
1354 			}
1355 		}
1356 		rv = zfs_iter_snapshots_sorted_v2(zhp, 0, dump_snapshot, sdd,
1357 		    min_txg, max_txg);
1358 	} else {
1359 		char snapname[MAXPATHLEN] = { 0 };
1360 		zfs_handle_t *snap;
1361 
1362 		/* Dump fromsnap. */
1363 		if (!sdd->seenfrom) {
1364 			(void) snprintf(snapname, sizeof (snapname),
1365 			    "%s@%s", zhp->zfs_name, sdd->fromsnap);
1366 			snap = zfs_open(zhp->zfs_hdl, snapname,
1367 			    ZFS_TYPE_SNAPSHOT);
1368 			if (snap != NULL)
1369 				rv = dump_snapshot(snap, sdd);
1370 			else
1371 				rv = errno;
1372 		}
1373 
1374 		/* Dump tosnap. */
1375 		if (rv == 0) {
1376 			(void) snprintf(snapname, sizeof (snapname),
1377 			    "%s@%s", zhp->zfs_name, sdd->tosnap);
1378 			snap = zfs_open(zhp->zfs_hdl, snapname,
1379 			    ZFS_TYPE_SNAPSHOT);
1380 			if (snap != NULL)
1381 				rv = dump_snapshot(snap, sdd);
1382 			else
1383 				rv = errno;
1384 		}
1385 	}
1386 
1387 	if (!sdd->seenfrom) {
1388 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1389 		    "WARNING: could not send %s@%s:\n"
1390 		    "incremental source (%s@%s) does not exist\n"),
1391 		    zhp->zfs_name, sdd->tosnap,
1392 		    zhp->zfs_name, sdd->fromsnap);
1393 		sdd->err = B_TRUE;
1394 	} else if (!sdd->seento) {
1395 		if (sdd->fromsnap) {
1396 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1397 			    "WARNING: could not send %s@%s:\n"
1398 			    "incremental source (%s@%s) "
1399 			    "is not earlier than it\n"),
1400 			    zhp->zfs_name, sdd->tosnap,
1401 			    zhp->zfs_name, sdd->fromsnap);
1402 		} else {
1403 			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1404 			    "WARNING: "
1405 			    "could not send %s@%s: does not exist\n"),
1406 			    zhp->zfs_name, sdd->tosnap);
1407 		}
1408 		sdd->err = B_TRUE;
1409 	}
1410 
1411 	return (rv);
1412 }
1413 
1414 /*
1415  * Send all snapshots for all filesystems in sdd.
1416  */
1417 static int
1418 dump_filesystems(zfs_handle_t *rzhp, send_dump_data_t *sdd)
1419 {
1420 	nvpair_t *fspair;
1421 	boolean_t needagain, progress;
1422 
1423 	if (!sdd->replicate)
1424 		return (dump_filesystem(rzhp, sdd));
1425 
1426 	/* Mark the clone origin snapshots. */
1427 	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1428 	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1429 		nvlist_t *nvfs;
1430 		uint64_t origin_guid = 0;
1431 
1432 		nvfs = fnvpair_value_nvlist(fspair);
1433 		(void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1434 		if (origin_guid != 0) {
1435 			const char *snapname;
1436 			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1437 			    origin_guid, &snapname);
1438 			if (origin_nv != NULL) {
1439 				nvlist_t *snapprops;
1440 				snapprops = fnvlist_lookup_nvlist(origin_nv,
1441 				    "snapprops");
1442 				snapprops = fnvlist_lookup_nvlist(snapprops,
1443 				    snapname);
1444 				fnvlist_add_boolean(snapprops,
1445 				    "is_clone_origin");
1446 			}
1447 		}
1448 	}
1449 again:
1450 	needagain = progress = B_FALSE;
1451 	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1452 	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1453 		nvlist_t *fslist, *parent_nv;
1454 		const char *fsname;
1455 		zfs_handle_t *zhp;
1456 		int err;
1457 		uint64_t origin_guid = 0;
1458 		uint64_t parent_guid = 0;
1459 
1460 		fslist = fnvpair_value_nvlist(fspair);
1461 		if (nvlist_lookup_boolean(fslist, "sent") == 0)
1462 			continue;
1463 
1464 		fsname = fnvlist_lookup_string(fslist, "name");
1465 		(void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1466 		(void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1467 		    &parent_guid);
1468 
1469 		if (parent_guid != 0) {
1470 			parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1471 			if (!nvlist_exists(parent_nv, "sent")) {
1472 				/* Parent has not been sent; skip this one. */
1473 				needagain = B_TRUE;
1474 				continue;
1475 			}
1476 		}
1477 
1478 		if (origin_guid != 0) {
1479 			nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1480 			    origin_guid, NULL);
1481 			if (origin_nv != NULL &&
1482 			    !nvlist_exists(origin_nv, "sent")) {
1483 				/*
1484 				 * Origin has not been sent yet;
1485 				 * skip this clone.
1486 				 */
1487 				needagain = B_TRUE;
1488 				continue;
1489 			}
1490 		}
1491 
1492 		zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1493 		if (zhp == NULL)
1494 			return (-1);
1495 		err = dump_filesystem(zhp, sdd);
1496 		fnvlist_add_boolean(fslist, "sent");
1497 		progress = B_TRUE;
1498 		zfs_close(zhp);
1499 		if (err)
1500 			return (err);
1501 	}
1502 	if (needagain) {
1503 		assert(progress);
1504 		goto again;
1505 	}
1506 
1507 	/* Clean out the sent flags in case we reuse this fss. */
1508 	for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1509 	    fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1510 		nvlist_t *fslist;
1511 
1512 		fslist = fnvpair_value_nvlist(fspair);
1513 		(void) nvlist_remove_all(fslist, "sent");
1514 	}
1515 
1516 	return (0);
1517 }
1518 
1519 nvlist_t *
1520 zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl, const char *token)
1521 {
1522 	unsigned int version;
1523 	int nread, i;
1524 	unsigned long long checksum, packed_len;
1525 
1526 	/*
1527 	 * Decode token header, which is:
1528 	 *   <token version>-<checksum of payload>-<uncompressed payload length>
1529 	 * Note that the only supported token version is 1.
1530 	 */
1531 	nread = sscanf(token, "%u-%llx-%llx-",
1532 	    &version, &checksum, &packed_len);
1533 	if (nread != 3) {
1534 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1535 		    "resume token is corrupt (invalid format)"));
1536 		return (NULL);
1537 	}
1538 
1539 	if (version != ZFS_SEND_RESUME_TOKEN_VERSION) {
1540 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1541 		    "resume token is corrupt (invalid version %u)"),
1542 		    version);
1543 		return (NULL);
1544 	}
1545 
1546 	/* Convert hexadecimal representation to binary. */
1547 	token = strrchr(token, '-') + 1;
1548 	int len = strlen(token) / 2;
1549 	unsigned char *compressed = zfs_alloc(hdl, len);
1550 	for (i = 0; i < len; i++) {
1551 		nread = sscanf(token + i * 2, "%2hhx", compressed + i);
1552 		if (nread != 1) {
1553 			free(compressed);
1554 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1555 			    "resume token is corrupt "
1556 			    "(payload is not hex-encoded)"));
1557 			return (NULL);
1558 		}
1559 	}
1560 
1561 	/* Verify checksum. */
1562 	zio_cksum_t cksum;
1563 	fletcher_4_native_varsize(compressed, len, &cksum);
1564 	if (cksum.zc_word[0] != checksum) {
1565 		free(compressed);
1566 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1567 		    "resume token is corrupt (incorrect checksum)"));
1568 		return (NULL);
1569 	}
1570 
1571 	/* Uncompress. */
1572 	void *packed = zfs_alloc(hdl, packed_len);
1573 	uLongf packed_len_long = packed_len;
1574 	if (uncompress(packed, &packed_len_long, compressed, len) != Z_OK ||
1575 	    packed_len_long != packed_len) {
1576 		free(packed);
1577 		free(compressed);
1578 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1579 		    "resume token is corrupt (decompression failed)"));
1580 		return (NULL);
1581 	}
1582 
1583 	/* Unpack nvlist. */
1584 	nvlist_t *nv;
1585 	int error = nvlist_unpack(packed, packed_len, &nv, KM_SLEEP);
1586 	free(packed);
1587 	free(compressed);
1588 	if (error != 0) {
1589 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1590 		    "resume token is corrupt (nvlist_unpack failed)"));
1591 		return (NULL);
1592 	}
1593 	return (nv);
1594 }
1595 
1596 static enum lzc_send_flags
1597 lzc_flags_from_sendflags(const sendflags_t *flags)
1598 {
1599 	enum lzc_send_flags lzc_flags = 0;
1600 
1601 	if (flags->largeblock)
1602 		lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1603 	if (flags->embed_data)
1604 		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
1605 	if (flags->compress)
1606 		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
1607 	if (flags->raw)
1608 		lzc_flags |= LZC_SEND_FLAG_RAW;
1609 	if (flags->saved)
1610 		lzc_flags |= LZC_SEND_FLAG_SAVED;
1611 
1612 	return (lzc_flags);
1613 }
1614 
1615 static int
1616 estimate_size(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
1617     uint64_t resumeobj, uint64_t resumeoff, uint64_t bytes,
1618     const char *redactbook, char *errbuf, uint64_t *sizep)
1619 {
1620 	uint64_t size;
1621 	FILE *fout = flags->dryrun ? stdout : stderr;
1622 	progress_arg_t pa = { 0 };
1623 	int err = 0;
1624 	pthread_t ptid;
1625 	sigset_t oldmask;
1626 
1627 	{
1628 		pa.pa_zhp = zhp;
1629 		pa.pa_fd = fd;
1630 		pa.pa_parsable = flags->parsable;
1631 		pa.pa_estimate = B_TRUE;
1632 		pa.pa_verbosity = flags->verbosity;
1633 
1634 		err = pthread_create(&ptid, NULL,
1635 		    send_progress_thread, &pa);
1636 		if (err != 0) {
1637 			zfs_error_aux(zhp->zfs_hdl, "%s", zfs_strerror(errno));
1638 			return (zfs_error(zhp->zfs_hdl,
1639 			    EZFS_THREADCREATEFAILED, errbuf));
1640 		}
1641 		SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
1642 	}
1643 
1644 	err = lzc_send_space_resume_redacted(zhp->zfs_name, from,
1645 	    lzc_flags_from_sendflags(flags), resumeobj, resumeoff, bytes,
1646 	    redactbook, fd, &size);
1647 	*sizep = size;
1648 
1649 	if (send_progress_thread_exit(zhp->zfs_hdl, ptid, &oldmask))
1650 		return (-1);
1651 
1652 	if (!flags->progress && !flags->parsable)
1653 		return (err);
1654 
1655 	if (err != 0) {
1656 		zfs_error_aux(zhp->zfs_hdl, "%s", zfs_strerror(err));
1657 		return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
1658 		    errbuf));
1659 	}
1660 	send_print_verbose(fout, zhp->zfs_name, from, size,
1661 	    flags->parsable);
1662 
1663 	if (flags->parsable) {
1664 		(void) fprintf(fout, "size\t%llu\n", (longlong_t)size);
1665 	} else {
1666 		char buf[16];
1667 		zfs_nicenum(size, buf, sizeof (buf));
1668 		(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1669 		    "total estimated size is %s\n"), buf);
1670 	}
1671 	return (0);
1672 }
1673 
1674 static boolean_t
1675 redact_snaps_contains(const uint64_t *snaps, uint64_t num_snaps, uint64_t guid)
1676 {
1677 	for (int i = 0; i < num_snaps; i++) {
1678 		if (snaps[i] == guid)
1679 			return (B_TRUE);
1680 	}
1681 	return (B_FALSE);
1682 }
1683 
1684 static boolean_t
1685 redact_snaps_equal(const uint64_t *snaps1, uint64_t num_snaps1,
1686     const uint64_t *snaps2, uint64_t num_snaps2)
1687 {
1688 	if (num_snaps1 != num_snaps2)
1689 		return (B_FALSE);
1690 	for (int i = 0; i < num_snaps1; i++) {
1691 		if (!redact_snaps_contains(snaps2, num_snaps2, snaps1[i]))
1692 			return (B_FALSE);
1693 	}
1694 	return (B_TRUE);
1695 }
1696 
1697 static int
1698 get_bookmarks(const char *path, nvlist_t **bmarksp)
1699 {
1700 	nvlist_t *props = fnvlist_alloc();
1701 	int error;
1702 
1703 	fnvlist_add_boolean(props, "redact_complete");
1704 	fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
1705 	error = lzc_get_bookmarks(path, props, bmarksp);
1706 	fnvlist_free(props);
1707 	return (error);
1708 }
1709 
1710 static nvpair_t *
1711 find_redact_pair(nvlist_t *bmarks, const uint64_t *redact_snap_guids,
1712     int num_redact_snaps)
1713 {
1714 	nvpair_t *pair;
1715 
1716 	for (pair = nvlist_next_nvpair(bmarks, NULL); pair;
1717 	    pair = nvlist_next_nvpair(bmarks, pair)) {
1718 
1719 		nvlist_t *bmark = fnvpair_value_nvlist(pair);
1720 		nvlist_t *vallist = fnvlist_lookup_nvlist(bmark,
1721 		    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
1722 		uint_t len = 0;
1723 		uint64_t *bmarksnaps = fnvlist_lookup_uint64_array(vallist,
1724 		    ZPROP_VALUE, &len);
1725 		if (redact_snaps_equal(redact_snap_guids,
1726 		    num_redact_snaps, bmarksnaps, len)) {
1727 			break;
1728 		}
1729 	}
1730 	return (pair);
1731 }
1732 
1733 static boolean_t
1734 get_redact_complete(nvpair_t *pair)
1735 {
1736 	nvlist_t *bmark = fnvpair_value_nvlist(pair);
1737 	nvlist_t *vallist = fnvlist_lookup_nvlist(bmark, "redact_complete");
1738 	boolean_t complete = fnvlist_lookup_boolean_value(vallist,
1739 	    ZPROP_VALUE);
1740 
1741 	return (complete);
1742 }
1743 
1744 /*
1745  * Check that the list of redaction snapshots in the bookmark matches the send
1746  * we're resuming, and return whether or not it's complete.
1747  *
1748  * Note that the caller needs to free the contents of *bookname with free() if
1749  * this function returns successfully.
1750  */
1751 static int
1752 find_redact_book(libzfs_handle_t *hdl, const char *path,
1753     const uint64_t *redact_snap_guids, int num_redact_snaps,
1754     char **bookname)
1755 {
1756 	char errbuf[ERRBUFLEN];
1757 	nvlist_t *bmarks;
1758 
1759 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1760 	    "cannot resume send"));
1761 
1762 	int error = get_bookmarks(path, &bmarks);
1763 	if (error != 0) {
1764 		if (error == ESRCH) {
1765 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1766 			    "nonexistent redaction bookmark provided"));
1767 		} else if (error == ENOENT) {
1768 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1769 			    "dataset to be sent no longer exists"));
1770 		} else {
1771 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1772 			    "unknown error: %s"), zfs_strerror(error));
1773 		}
1774 		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1775 	}
1776 	nvpair_t *pair = find_redact_pair(bmarks, redact_snap_guids,
1777 	    num_redact_snaps);
1778 	if (pair == NULL)  {
1779 		fnvlist_free(bmarks);
1780 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1781 		    "no appropriate redaction bookmark exists"));
1782 		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1783 	}
1784 	boolean_t complete = get_redact_complete(pair);
1785 	if (!complete) {
1786 		fnvlist_free(bmarks);
1787 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1788 		    "incomplete redaction bookmark provided"));
1789 		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
1790 	}
1791 	*bookname = strndup(nvpair_name(pair), ZFS_MAX_DATASET_NAME_LEN);
1792 	ASSERT3P(*bookname, !=, NULL);
1793 	fnvlist_free(bmarks);
1794 	return (0);
1795 }
1796 
1797 static enum lzc_send_flags
1798 lzc_flags_from_resume_nvl(nvlist_t *resume_nvl)
1799 {
1800 	enum lzc_send_flags lzc_flags = 0;
1801 
1802 	if (nvlist_exists(resume_nvl, "largeblockok"))
1803 		lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
1804 	if (nvlist_exists(resume_nvl, "embedok"))
1805 		lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
1806 	if (nvlist_exists(resume_nvl, "compressok"))
1807 		lzc_flags |= LZC_SEND_FLAG_COMPRESS;
1808 	if (nvlist_exists(resume_nvl, "rawok"))
1809 		lzc_flags |= LZC_SEND_FLAG_RAW;
1810 	if (nvlist_exists(resume_nvl, "savedok"))
1811 		lzc_flags |= LZC_SEND_FLAG_SAVED;
1812 
1813 	return (lzc_flags);
1814 }
1815 
1816 static int
1817 zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
1818     int outfd, nvlist_t *resume_nvl)
1819 {
1820 	char errbuf[ERRBUFLEN];
1821 	const char *toname;
1822 	const char *fromname = NULL;
1823 	uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
1824 	zfs_handle_t *zhp;
1825 	int error = 0;
1826 	char name[ZFS_MAX_DATASET_NAME_LEN];
1827 	FILE *fout = (flags->verbosity > 0 && flags->dryrun) ? stdout : stderr;
1828 	uint64_t *redact_snap_guids = NULL;
1829 	int num_redact_snaps = 0;
1830 	char *redact_book = NULL;
1831 	uint64_t size = 0;
1832 
1833 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1834 	    "cannot resume send"));
1835 
1836 	if (flags->verbosity != 0) {
1837 		(void) fprintf(fout, dgettext(TEXT_DOMAIN,
1838 		    "resume token contents:\n"));
1839 		nvlist_print(fout, resume_nvl);
1840 	}
1841 
1842 	if (nvlist_lookup_string(resume_nvl, "toname", &toname) != 0 ||
1843 	    nvlist_lookup_uint64(resume_nvl, "object", &resumeobj) != 0 ||
1844 	    nvlist_lookup_uint64(resume_nvl, "offset", &resumeoff) != 0 ||
1845 	    nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
1846 	    nvlist_lookup_uint64(resume_nvl, "toguid", &toguid) != 0) {
1847 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1848 		    "resume token is corrupt"));
1849 		return (zfs_error(hdl, EZFS_FAULT, errbuf));
1850 	}
1851 	fromguid = 0;
1852 	(void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid);
1853 
1854 	if (flags->saved) {
1855 		(void) strlcpy(name, toname, sizeof (name));
1856 	} else {
1857 		error = guid_to_name(hdl, toname, toguid, B_FALSE, name);
1858 		if (error != 0) {
1859 			if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
1860 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1861 				    "'%s' is no longer the same snapshot "
1862 				    "used in the initial send"), toname);
1863 			} else {
1864 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1865 				    "'%s' used in the initial send no "
1866 				    "longer exists"), toname);
1867 			}
1868 			return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1869 		}
1870 	}
1871 
1872 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1873 	if (zhp == NULL) {
1874 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1875 		    "unable to access '%s'"), name);
1876 		return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1877 	}
1878 
1879 	if (nvlist_lookup_uint64_array(resume_nvl, "book_redact_snaps",
1880 	    &redact_snap_guids, (uint_t *)&num_redact_snaps) != 0) {
1881 		num_redact_snaps = -1;
1882 	}
1883 
1884 	if (fromguid != 0) {
1885 		if (guid_to_name_redact_snaps(hdl, toname, fromguid, B_TRUE,
1886 		    redact_snap_guids, num_redact_snaps, name) != 0) {
1887 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1888 			    "incremental source %#llx no longer exists"),
1889 			    (longlong_t)fromguid);
1890 			return (zfs_error(hdl, EZFS_BADPATH, errbuf));
1891 		}
1892 		fromname = name;
1893 	}
1894 
1895 	redact_snap_guids = NULL;
1896 
1897 	if (nvlist_lookup_uint64_array(resume_nvl,
1898 	    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS), &redact_snap_guids,
1899 	    (uint_t *)&num_redact_snaps) == 0) {
1900 		char path[ZFS_MAX_DATASET_NAME_LEN];
1901 
1902 		(void) strlcpy(path, toname, sizeof (path));
1903 		char *at = strchr(path, '@');
1904 		ASSERT3P(at, !=, NULL);
1905 
1906 		*at = '\0';
1907 
1908 		if ((error = find_redact_book(hdl, path, redact_snap_guids,
1909 		    num_redact_snaps, &redact_book)) != 0) {
1910 			return (error);
1911 		}
1912 	}
1913 
1914 	enum lzc_send_flags lzc_flags = lzc_flags_from_sendflags(flags) |
1915 	    lzc_flags_from_resume_nvl(resume_nvl);
1916 
1917 	if (flags->verbosity != 0 || flags->progressastitle) {
1918 		/*
1919 		 * Some of these may have come from the resume token, set them
1920 		 * here for size estimate purposes.
1921 		 */
1922 		sendflags_t tmpflags = *flags;
1923 		if (lzc_flags & LZC_SEND_FLAG_LARGE_BLOCK)
1924 			tmpflags.largeblock = B_TRUE;
1925 		if (lzc_flags & LZC_SEND_FLAG_COMPRESS)
1926 			tmpflags.compress = B_TRUE;
1927 		if (lzc_flags & LZC_SEND_FLAG_EMBED_DATA)
1928 			tmpflags.embed_data = B_TRUE;
1929 		if (lzc_flags & LZC_SEND_FLAG_RAW)
1930 			tmpflags.raw = B_TRUE;
1931 		if (lzc_flags & LZC_SEND_FLAG_SAVED)
1932 			tmpflags.saved = B_TRUE;
1933 		error = estimate_size(zhp, fromname, outfd, &tmpflags,
1934 		    resumeobj, resumeoff, bytes, redact_book, errbuf, &size);
1935 	}
1936 
1937 	if (!flags->dryrun) {
1938 		progress_arg_t pa = { 0 };
1939 		pthread_t tid;
1940 		sigset_t oldmask;
1941 		/*
1942 		 * If progress reporting is requested, spawn a new thread to
1943 		 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1944 		 */
1945 		{
1946 			pa.pa_zhp = zhp;
1947 			pa.pa_fd = outfd;
1948 			pa.pa_parsable = flags->parsable;
1949 			pa.pa_estimate = B_FALSE;
1950 			pa.pa_verbosity = flags->verbosity;
1951 			pa.pa_size = size;
1952 			pa.pa_astitle = flags->progressastitle;
1953 			pa.pa_progress = flags->progress;
1954 
1955 			error = pthread_create(&tid, NULL,
1956 			    send_progress_thread, &pa);
1957 			if (error != 0) {
1958 				if (redact_book != NULL)
1959 					free(redact_book);
1960 				zfs_close(zhp);
1961 				return (error);
1962 			}
1963 			SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
1964 		}
1965 
1966 		error = lzc_send_resume_redacted(zhp->zfs_name, fromname, outfd,
1967 		    lzc_flags, resumeobj, resumeoff, redact_book);
1968 		if (redact_book != NULL)
1969 			free(redact_book);
1970 
1971 		if (send_progress_thread_exit(hdl, tid, &oldmask)) {
1972 			zfs_close(zhp);
1973 			return (-1);
1974 		}
1975 
1976 		char errbuf[ERRBUFLEN];
1977 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1978 		    "warning: cannot send '%s'"), zhp->zfs_name);
1979 
1980 		zfs_close(zhp);
1981 
1982 		switch (error) {
1983 		case 0:
1984 			return (0);
1985 		case EACCES:
1986 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1987 			    "source key must be loaded"));
1988 			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
1989 		case ESRCH:
1990 			if (lzc_exists(zhp->zfs_name)) {
1991 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1992 				    "incremental source could not be found"));
1993 			}
1994 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
1995 
1996 		case EXDEV:
1997 		case ENOENT:
1998 		case EDQUOT:
1999 		case EFBIG:
2000 		case EIO:
2001 		case ENOLINK:
2002 		case ENOSPC:
2003 		case ENOSTR:
2004 		case ENXIO:
2005 		case EPIPE:
2006 		case ERANGE:
2007 		case EFAULT:
2008 		case EROFS:
2009 			zfs_error_aux(hdl, "%s", zfs_strerror(errno));
2010 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
2011 
2012 		default:
2013 			return (zfs_standard_error(hdl, errno, errbuf));
2014 		}
2015 	} else {
2016 		if (redact_book != NULL)
2017 			free(redact_book);
2018 	}
2019 
2020 	zfs_close(zhp);
2021 
2022 	return (error);
2023 }
2024 
2025 struct zfs_send_resume_impl {
2026 	libzfs_handle_t *hdl;
2027 	sendflags_t *flags;
2028 	nvlist_t *resume_nvl;
2029 };
2030 
2031 static int
2032 zfs_send_resume_impl_cb(int outfd, void *arg)
2033 {
2034 	struct zfs_send_resume_impl *zsri = arg;
2035 	return (zfs_send_resume_impl_cb_impl(zsri->hdl, zsri->flags, outfd,
2036 	    zsri->resume_nvl));
2037 }
2038 
2039 static int
2040 zfs_send_resume_impl(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
2041     nvlist_t *resume_nvl)
2042 {
2043 	struct zfs_send_resume_impl zsri = {
2044 		.hdl = hdl,
2045 		.flags = flags,
2046 		.resume_nvl = resume_nvl,
2047 	};
2048 	return (lzc_send_wrapper(zfs_send_resume_impl_cb, outfd, &zsri));
2049 }
2050 
2051 int
2052 zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
2053     const char *resume_token)
2054 {
2055 	int ret;
2056 	char errbuf[ERRBUFLEN];
2057 	nvlist_t *resume_nvl;
2058 
2059 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2060 	    "cannot resume send"));
2061 
2062 	resume_nvl = zfs_send_resume_token_to_nvlist(hdl, resume_token);
2063 	if (resume_nvl == NULL) {
2064 		/*
2065 		 * zfs_error_aux has already been set by
2066 		 * zfs_send_resume_token_to_nvlist()
2067 		 */
2068 		return (zfs_error(hdl, EZFS_FAULT, errbuf));
2069 	}
2070 
2071 	ret = zfs_send_resume_impl(hdl, flags, outfd, resume_nvl);
2072 	fnvlist_free(resume_nvl);
2073 
2074 	return (ret);
2075 }
2076 
2077 int
2078 zfs_send_saved(zfs_handle_t *zhp, sendflags_t *flags, int outfd,
2079     const char *resume_token)
2080 {
2081 	int ret;
2082 	libzfs_handle_t *hdl = zhp->zfs_hdl;
2083 	nvlist_t *saved_nvl = NULL, *resume_nvl = NULL;
2084 	uint64_t saved_guid = 0, resume_guid = 0;
2085 	uint64_t obj = 0, off = 0, bytes = 0;
2086 	char token_buf[ZFS_MAXPROPLEN];
2087 	char errbuf[ERRBUFLEN];
2088 
2089 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2090 	    "saved send failed"));
2091 
2092 	ret = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
2093 	    token_buf, sizeof (token_buf), NULL, NULL, 0, B_TRUE);
2094 	if (ret != 0)
2095 		goto out;
2096 
2097 	saved_nvl = zfs_send_resume_token_to_nvlist(hdl, token_buf);
2098 	if (saved_nvl == NULL) {
2099 		/*
2100 		 * zfs_error_aux has already been set by
2101 		 * zfs_send_resume_token_to_nvlist()
2102 		 */
2103 		ret = zfs_error(hdl, EZFS_FAULT, errbuf);
2104 		goto out;
2105 	}
2106 
2107 	/*
2108 	 * If a resume token is provided we use the object and offset
2109 	 * from that instead of the default, which starts from the
2110 	 * beginning.
2111 	 */
2112 	if (resume_token != NULL) {
2113 		resume_nvl = zfs_send_resume_token_to_nvlist(hdl,
2114 		    resume_token);
2115 		if (resume_nvl == NULL) {
2116 			ret = zfs_error(hdl, EZFS_FAULT, errbuf);
2117 			goto out;
2118 		}
2119 
2120 		if (nvlist_lookup_uint64(resume_nvl, "object", &obj) != 0 ||
2121 		    nvlist_lookup_uint64(resume_nvl, "offset", &off) != 0 ||
2122 		    nvlist_lookup_uint64(resume_nvl, "bytes", &bytes) != 0 ||
2123 		    nvlist_lookup_uint64(resume_nvl, "toguid",
2124 		    &resume_guid) != 0) {
2125 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2126 			    "provided resume token is corrupt"));
2127 			ret = zfs_error(hdl, EZFS_FAULT, errbuf);
2128 			goto out;
2129 		}
2130 
2131 		if (nvlist_lookup_uint64(saved_nvl, "toguid",
2132 		    &saved_guid)) {
2133 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2134 			    "dataset's resume token is corrupt"));
2135 			ret = zfs_error(hdl, EZFS_FAULT, errbuf);
2136 			goto out;
2137 		}
2138 
2139 		if (resume_guid != saved_guid) {
2140 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2141 			    "provided resume token does not match dataset"));
2142 			ret = zfs_error(hdl, EZFS_BADBACKUP, errbuf);
2143 			goto out;
2144 		}
2145 	}
2146 
2147 	(void) nvlist_remove_all(saved_nvl, "object");
2148 	fnvlist_add_uint64(saved_nvl, "object", obj);
2149 
2150 	(void) nvlist_remove_all(saved_nvl, "offset");
2151 	fnvlist_add_uint64(saved_nvl, "offset", off);
2152 
2153 	(void) nvlist_remove_all(saved_nvl, "bytes");
2154 	fnvlist_add_uint64(saved_nvl, "bytes", bytes);
2155 
2156 	(void) nvlist_remove_all(saved_nvl, "toname");
2157 	fnvlist_add_string(saved_nvl, "toname", zhp->zfs_name);
2158 
2159 	ret = zfs_send_resume_impl(hdl, flags, outfd, saved_nvl);
2160 
2161 out:
2162 	fnvlist_free(saved_nvl);
2163 	fnvlist_free(resume_nvl);
2164 	return (ret);
2165 }
2166 
2167 /*
2168  * This function informs the target system that the recursive send is complete.
2169  * The record is also expected in the case of a send -p.
2170  */
2171 static int
2172 send_conclusion_record(int fd, zio_cksum_t *zc)
2173 {
2174 	dmu_replay_record_t drr;
2175 	memset(&drr, 0, sizeof (dmu_replay_record_t));
2176 	drr.drr_type = DRR_END;
2177 	if (zc != NULL)
2178 		drr.drr_u.drr_end.drr_checksum = *zc;
2179 	if (write(fd, &drr, sizeof (drr)) == -1) {
2180 		return (errno);
2181 	}
2182 	return (0);
2183 }
2184 
2185 /*
2186  * This function is responsible for sending the records that contain the
2187  * necessary information for the target system's libzfs to be able to set the
2188  * properties of the filesystem being received, or to be able to prepare for
2189  * a recursive receive.
2190  *
2191  * The "zhp" argument is the handle of the snapshot we are sending
2192  * (the "tosnap").  The "from" argument is the short snapshot name (the part
2193  * after the @) of the incremental source.
2194  */
2195 static int
2196 send_prelim_records(zfs_handle_t *zhp, const char *from, int fd,
2197     boolean_t gather_props, boolean_t recursive, boolean_t verbose,
2198     boolean_t dryrun, boolean_t raw, boolean_t replicate, boolean_t skipmissing,
2199     boolean_t backup, boolean_t holds, boolean_t props, boolean_t doall,
2200     nvlist_t **fssp, avl_tree_t **fsavlp)
2201 {
2202 	int err = 0;
2203 	char *packbuf = NULL;
2204 	size_t buflen = 0;
2205 	zio_cksum_t zc = { {0} };
2206 	int featureflags = 0;
2207 	/* name of filesystem/volume that contains snapshot we are sending */
2208 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
2209 	/* short name of snap we are sending */
2210 	const char *tosnap = "";
2211 
2212 	char errbuf[ERRBUFLEN];
2213 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2214 	    "warning: cannot send '%s'"), zhp->zfs_name);
2215 	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM && zfs_prop_get_int(zhp,
2216 	    ZFS_PROP_VERSION) >= ZPL_VERSION_SA) {
2217 		featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
2218 	}
2219 
2220 	if (holds)
2221 		featureflags |= DMU_BACKUP_FEATURE_HOLDS;
2222 
2223 	(void) strlcpy(tofs, zhp->zfs_name, ZFS_MAX_DATASET_NAME_LEN);
2224 	char *at = strchr(tofs, '@');
2225 	if (at != NULL) {
2226 		*at = '\0';
2227 		tosnap = at + 1;
2228 	}
2229 
2230 	if (gather_props) {
2231 		nvlist_t *hdrnv = fnvlist_alloc();
2232 		nvlist_t *fss = NULL;
2233 
2234 		if (from != NULL)
2235 			fnvlist_add_string(hdrnv, "fromsnap", from);
2236 		fnvlist_add_string(hdrnv, "tosnap", tosnap);
2237 		if (!recursive)
2238 			fnvlist_add_boolean(hdrnv, "not_recursive");
2239 
2240 		if (raw) {
2241 			fnvlist_add_boolean(hdrnv, "raw");
2242 		}
2243 
2244 		if (gather_nvlist(zhp->zfs_hdl, tofs,
2245 		    from, tosnap, recursive, raw, doall, replicate, skipmissing,
2246 		    verbose, backup, holds, props, &fss, fsavlp) != 0) {
2247 			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2248 			    errbuf));
2249 		}
2250 		/*
2251 		 * Do not allow the size of the properties list to exceed
2252 		 * the limit
2253 		 */
2254 		if ((fnvlist_size(fss) + fnvlist_size(hdrnv)) >
2255 		    zhp->zfs_hdl->libzfs_max_nvlist) {
2256 			(void) snprintf(errbuf, sizeof (errbuf),
2257 			    dgettext(TEXT_DOMAIN, "warning: cannot send '%s': "
2258 			    "the size of the list of snapshots and properties "
2259 			    "is too large to be received successfully.\n"
2260 			    "Select a smaller number of snapshots to send.\n"),
2261 			    zhp->zfs_name);
2262 			return (zfs_error(zhp->zfs_hdl, EZFS_NOSPC,
2263 			    errbuf));
2264 		}
2265 		fnvlist_add_nvlist(hdrnv, "fss", fss);
2266 		VERIFY0(nvlist_pack(hdrnv, &packbuf, &buflen, NV_ENCODE_XDR,
2267 		    0));
2268 		if (fssp != NULL) {
2269 			*fssp = fss;
2270 		} else {
2271 			fnvlist_free(fss);
2272 		}
2273 		fnvlist_free(hdrnv);
2274 	}
2275 
2276 	if (!dryrun) {
2277 		dmu_replay_record_t drr;
2278 		memset(&drr, 0, sizeof (dmu_replay_record_t));
2279 		/* write first begin record */
2280 		drr.drr_type = DRR_BEGIN;
2281 		drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
2282 		DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
2283 		    drr_versioninfo, DMU_COMPOUNDSTREAM);
2284 		DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
2285 		    drr_versioninfo, featureflags);
2286 		if (snprintf(drr.drr_u.drr_begin.drr_toname,
2287 		    sizeof (drr.drr_u.drr_begin.drr_toname), "%s@%s", tofs,
2288 		    tosnap) >= sizeof (drr.drr_u.drr_begin.drr_toname)) {
2289 			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2290 			    errbuf));
2291 		}
2292 		drr.drr_payloadlen = buflen;
2293 
2294 		err = dump_record(&drr, packbuf, buflen, &zc, fd);
2295 		free(packbuf);
2296 		if (err != 0) {
2297 			zfs_error_aux(zhp->zfs_hdl, "%s", zfs_strerror(err));
2298 			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2299 			    errbuf));
2300 		}
2301 		err = send_conclusion_record(fd, &zc);
2302 		if (err != 0) {
2303 			zfs_error_aux(zhp->zfs_hdl, "%s", zfs_strerror(err));
2304 			return (zfs_error(zhp->zfs_hdl, EZFS_BADBACKUP,
2305 			    errbuf));
2306 		}
2307 	}
2308 	return (0);
2309 }
2310 
2311 /*
2312  * Generate a send stream.  The "zhp" argument is the filesystem/volume
2313  * that contains the snapshot to send.  The "fromsnap" argument is the
2314  * short name (the part after the '@') of the snapshot that is the
2315  * incremental source to send from (if non-NULL).  The "tosnap" argument
2316  * is the short name of the snapshot to send.
2317  *
2318  * The content of the send stream is the snapshot identified by
2319  * 'tosnap'.  Incremental streams are requested in two ways:
2320  *     - from the snapshot identified by "fromsnap" (if non-null) or
2321  *     - from the origin of the dataset identified by zhp, which must
2322  *	 be a clone.  In this case, "fromsnap" is null and "fromorigin"
2323  *	 is TRUE.
2324  *
2325  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
2326  * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
2327  * if "replicate" is set.  If "doall" is set, dump all the intermediate
2328  * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
2329  * case too. If "props" is set, send properties.
2330  *
2331  * Pre-wrapped (cf. lzc_send_wrapper()).
2332  */
2333 static int
2334 zfs_send_cb_impl(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
2335     sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
2336     void *cb_arg, nvlist_t **debugnvp)
2337 {
2338 	char errbuf[ERRBUFLEN];
2339 	send_dump_data_t sdd = { 0 };
2340 	int err = 0;
2341 	nvlist_t *fss = NULL;
2342 	avl_tree_t *fsavl = NULL;
2343 	static uint64_t holdseq;
2344 	int spa_version;
2345 	FILE *fout;
2346 
2347 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2348 	    "cannot send '%s'"), zhp->zfs_name);
2349 
2350 	if (fromsnap && fromsnap[0] == '\0') {
2351 		zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
2352 		    "zero-length incremental source"));
2353 		return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
2354 	}
2355 
2356 	if (fromsnap) {
2357 		char full_fromsnap_name[ZFS_MAX_DATASET_NAME_LEN];
2358 		if (snprintf(full_fromsnap_name, sizeof (full_fromsnap_name),
2359 		    "%s@%s", zhp->zfs_name, fromsnap) >=
2360 		    sizeof (full_fromsnap_name)) {
2361 			err = EINVAL;
2362 			goto stderr_out;
2363 		}
2364 		zfs_handle_t *fromsnapn = zfs_open(zhp->zfs_hdl,
2365 		    full_fromsnap_name, ZFS_TYPE_SNAPSHOT);
2366 		if (fromsnapn == NULL) {
2367 			err = -1;
2368 			goto err_out;
2369 		}
2370 		zfs_close(fromsnapn);
2371 	}
2372 
2373 	if (flags->replicate || flags->doall || flags->props ||
2374 	    flags->holds || flags->backup) {
2375 		char full_tosnap_name[ZFS_MAX_DATASET_NAME_LEN];
2376 		if (snprintf(full_tosnap_name, sizeof (full_tosnap_name),
2377 		    "%s@%s", zhp->zfs_name, tosnap) >=
2378 		    sizeof (full_tosnap_name)) {
2379 			err = EINVAL;
2380 			goto stderr_out;
2381 		}
2382 		zfs_handle_t *tosnap = zfs_open(zhp->zfs_hdl,
2383 		    full_tosnap_name, ZFS_TYPE_SNAPSHOT);
2384 		if (tosnap == NULL) {
2385 			err = -1;
2386 			goto err_out;
2387 		}
2388 		err = send_prelim_records(tosnap, fromsnap, outfd,
2389 		    flags->replicate || flags->props || flags->holds,
2390 		    flags->replicate, flags->verbosity > 0, flags->dryrun,
2391 		    flags->raw, flags->replicate, flags->skipmissing,
2392 		    flags->backup, flags->holds, flags->props, flags->doall,
2393 		    &fss, &fsavl);
2394 		zfs_close(tosnap);
2395 		if (err != 0)
2396 			goto err_out;
2397 	}
2398 
2399 	/* dump each stream */
2400 	sdd.fromsnap = fromsnap;
2401 	sdd.tosnap = tosnap;
2402 	sdd.outfd = outfd;
2403 	sdd.replicate = flags->replicate;
2404 	sdd.doall = flags->doall;
2405 	sdd.fromorigin = flags->fromorigin;
2406 	sdd.fss = fss;
2407 	sdd.fsavl = fsavl;
2408 	sdd.verbosity = flags->verbosity;
2409 	sdd.parsable = flags->parsable;
2410 	sdd.progress = flags->progress;
2411 	sdd.progressastitle = flags->progressastitle;
2412 	sdd.dryrun = flags->dryrun;
2413 	sdd.large_block = flags->largeblock;
2414 	sdd.embed_data = flags->embed_data;
2415 	sdd.compress = flags->compress;
2416 	sdd.raw = flags->raw;
2417 	sdd.holds = flags->holds;
2418 	sdd.filter_cb = filter_func;
2419 	sdd.filter_cb_arg = cb_arg;
2420 	if (debugnvp)
2421 		sdd.debugnv = *debugnvp;
2422 	if (sdd.verbosity != 0 && sdd.dryrun)
2423 		sdd.std_out = B_TRUE;
2424 	fout = sdd.std_out ? stdout : stderr;
2425 
2426 	/*
2427 	 * Some flags require that we place user holds on the datasets that are
2428 	 * being sent so they don't get destroyed during the send. We can skip
2429 	 * this step if the pool is imported read-only since the datasets cannot
2430 	 * be destroyed.
2431 	 */
2432 	if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
2433 	    ZPOOL_PROP_READONLY, NULL) &&
2434 	    zfs_spa_version(zhp, &spa_version) == 0 &&
2435 	    spa_version >= SPA_VERSION_USERREFS &&
2436 	    (flags->doall || flags->replicate)) {
2437 		++holdseq;
2438 		(void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
2439 		    ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
2440 		sdd.cleanup_fd = open(ZFS_DEV, O_RDWR | O_CLOEXEC);
2441 		if (sdd.cleanup_fd < 0) {
2442 			err = errno;
2443 			goto stderr_out;
2444 		}
2445 		sdd.snapholds = fnvlist_alloc();
2446 	} else {
2447 		sdd.cleanup_fd = -1;
2448 		sdd.snapholds = NULL;
2449 	}
2450 
2451 	if (flags->verbosity != 0 || sdd.snapholds != NULL) {
2452 		/*
2453 		 * Do a verbose no-op dry run to get all the verbose output
2454 		 * or to gather snapshot hold's before generating any data,
2455 		 * then do a non-verbose real run to generate the streams.
2456 		 */
2457 		sdd.dryrun = B_TRUE;
2458 		err = dump_filesystems(zhp, &sdd);
2459 
2460 		if (err != 0)
2461 			goto stderr_out;
2462 
2463 		if (flags->verbosity != 0) {
2464 			if (flags->parsable) {
2465 				(void) fprintf(fout, "size\t%llu\n",
2466 				    (longlong_t)sdd.size);
2467 			} else {
2468 				char buf[16];
2469 				zfs_nicebytes(sdd.size, buf, sizeof (buf));
2470 				(void) fprintf(fout, dgettext(TEXT_DOMAIN,
2471 				    "total estimated size is %s\n"), buf);
2472 			}
2473 		}
2474 
2475 		/* Ensure no snaps found is treated as an error. */
2476 		if (!sdd.seento) {
2477 			err = ENOENT;
2478 			goto err_out;
2479 		}
2480 
2481 		/* Skip the second run if dryrun was requested. */
2482 		if (flags->dryrun)
2483 			goto err_out;
2484 
2485 		if (sdd.snapholds != NULL) {
2486 			err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
2487 			if (err != 0)
2488 				goto stderr_out;
2489 
2490 			fnvlist_free(sdd.snapholds);
2491 			sdd.snapholds = NULL;
2492 		}
2493 
2494 		sdd.dryrun = B_FALSE;
2495 		sdd.verbosity = 0;
2496 	}
2497 
2498 	err = dump_filesystems(zhp, &sdd);
2499 	fsavl_destroy(fsavl);
2500 	fnvlist_free(fss);
2501 
2502 	/* Ensure no snaps found is treated as an error. */
2503 	if (err == 0 && !sdd.seento)
2504 		err = ENOENT;
2505 
2506 	if (sdd.cleanup_fd != -1) {
2507 		VERIFY(0 == close(sdd.cleanup_fd));
2508 		sdd.cleanup_fd = -1;
2509 	}
2510 
2511 	if (!flags->dryrun && (flags->replicate || flags->doall ||
2512 	    flags->props || flags->backup || flags->holds)) {
2513 		/*
2514 		 * write final end record.  NB: want to do this even if
2515 		 * there was some error, because it might not be totally
2516 		 * failed.
2517 		 */
2518 		int err2 = send_conclusion_record(outfd, NULL);
2519 		if (err2 != 0)
2520 			return (zfs_standard_error(zhp->zfs_hdl, err2, errbuf));
2521 	}
2522 
2523 	return (err || sdd.err);
2524 
2525 stderr_out:
2526 	err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
2527 err_out:
2528 	fsavl_destroy(fsavl);
2529 	fnvlist_free(fss);
2530 	fnvlist_free(sdd.snapholds);
2531 
2532 	if (sdd.cleanup_fd != -1)
2533 		VERIFY(0 == close(sdd.cleanup_fd));
2534 	return (err);
2535 }
2536 
2537 struct zfs_send {
2538 	zfs_handle_t *zhp;
2539 	const char *fromsnap;
2540 	const char *tosnap;
2541 	sendflags_t *flags;
2542 	snapfilter_cb_t *filter_func;
2543 	void *cb_arg;
2544 	nvlist_t **debugnvp;
2545 };
2546 
2547 static int
2548 zfs_send_cb(int outfd, void *arg)
2549 {
2550 	struct zfs_send *zs = arg;
2551 	return (zfs_send_cb_impl(zs->zhp, zs->fromsnap, zs->tosnap, zs->flags,
2552 	    outfd, zs->filter_func, zs->cb_arg, zs->debugnvp));
2553 }
2554 
2555 int
2556 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
2557     sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
2558     void *cb_arg, nvlist_t **debugnvp)
2559 {
2560 	struct zfs_send arg = {
2561 		.zhp = zhp,
2562 		.fromsnap = fromsnap,
2563 		.tosnap = tosnap,
2564 		.flags = flags,
2565 		.filter_func = filter_func,
2566 		.cb_arg = cb_arg,
2567 		.debugnvp = debugnvp,
2568 	};
2569 	return (lzc_send_wrapper(zfs_send_cb, outfd, &arg));
2570 }
2571 
2572 
2573 static zfs_handle_t *
2574 name_to_dir_handle(libzfs_handle_t *hdl, const char *snapname)
2575 {
2576 	char dirname[ZFS_MAX_DATASET_NAME_LEN];
2577 	(void) strlcpy(dirname, snapname, ZFS_MAX_DATASET_NAME_LEN);
2578 	char *c = strchr(dirname, '@');
2579 	if (c != NULL)
2580 		*c = '\0';
2581 	return (zfs_open(hdl, dirname, ZFS_TYPE_DATASET));
2582 }
2583 
2584 /*
2585  * Returns B_TRUE if earlier is an earlier snapshot in later's timeline; either
2586  * an earlier snapshot in the same filesystem, or a snapshot before later's
2587  * origin, or it's origin's origin, etc.
2588  */
2589 static boolean_t
2590 snapshot_is_before(zfs_handle_t *earlier, zfs_handle_t *later)
2591 {
2592 	boolean_t ret;
2593 	uint64_t later_txg =
2594 	    (later->zfs_type == ZFS_TYPE_FILESYSTEM ||
2595 	    later->zfs_type == ZFS_TYPE_VOLUME ?
2596 	    UINT64_MAX : zfs_prop_get_int(later, ZFS_PROP_CREATETXG));
2597 	uint64_t earlier_txg = zfs_prop_get_int(earlier, ZFS_PROP_CREATETXG);
2598 
2599 	if (earlier_txg >= later_txg)
2600 		return (B_FALSE);
2601 
2602 	zfs_handle_t *earlier_dir = name_to_dir_handle(earlier->zfs_hdl,
2603 	    earlier->zfs_name);
2604 	zfs_handle_t *later_dir = name_to_dir_handle(later->zfs_hdl,
2605 	    later->zfs_name);
2606 
2607 	if (strcmp(earlier_dir->zfs_name, later_dir->zfs_name) == 0) {
2608 		zfs_close(earlier_dir);
2609 		zfs_close(later_dir);
2610 		return (B_TRUE);
2611 	}
2612 
2613 	char clonename[ZFS_MAX_DATASET_NAME_LEN];
2614 	if (zfs_prop_get(later_dir, ZFS_PROP_ORIGIN, clonename,
2615 	    ZFS_MAX_DATASET_NAME_LEN, NULL, NULL, 0, B_TRUE) != 0) {
2616 		zfs_close(earlier_dir);
2617 		zfs_close(later_dir);
2618 		return (B_FALSE);
2619 	}
2620 
2621 	zfs_handle_t *origin = zfs_open(earlier->zfs_hdl, clonename,
2622 	    ZFS_TYPE_DATASET);
2623 	uint64_t origin_txg = zfs_prop_get_int(origin, ZFS_PROP_CREATETXG);
2624 
2625 	/*
2626 	 * If "earlier" is exactly the origin, then
2627 	 * snapshot_is_before(earlier, origin) will return false (because
2628 	 * they're the same).
2629 	 */
2630 	if (origin_txg == earlier_txg &&
2631 	    strcmp(origin->zfs_name, earlier->zfs_name) == 0) {
2632 		zfs_close(earlier_dir);
2633 		zfs_close(later_dir);
2634 		zfs_close(origin);
2635 		return (B_TRUE);
2636 	}
2637 	zfs_close(earlier_dir);
2638 	zfs_close(later_dir);
2639 
2640 	ret = snapshot_is_before(earlier, origin);
2641 	zfs_close(origin);
2642 	return (ret);
2643 }
2644 
2645 /*
2646  * The "zhp" argument is the handle of the dataset to send (typically a
2647  * snapshot).  The "from" argument is the full name of the snapshot or
2648  * bookmark that is the incremental source.
2649  *
2650  * Pre-wrapped (cf. lzc_send_wrapper()).
2651  */
2652 static int
2653 zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd,
2654     sendflags_t *flags, const char *redactbook)
2655 {
2656 	int err;
2657 	libzfs_handle_t *hdl = zhp->zfs_hdl;
2658 	char *name = zhp->zfs_name;
2659 	pthread_t ptid;
2660 	progress_arg_t pa = { 0 };
2661 	uint64_t size = 0;
2662 
2663 	char errbuf[ERRBUFLEN];
2664 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2665 	    "warning: cannot send '%s'"), name);
2666 
2667 	if (from != NULL && strchr(from, '@')) {
2668 		zfs_handle_t *from_zhp = zfs_open(hdl, from,
2669 		    ZFS_TYPE_DATASET);
2670 		if (from_zhp == NULL)
2671 			return (-1);
2672 		if (!snapshot_is_before(from_zhp, zhp)) {
2673 			zfs_close(from_zhp);
2674 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2675 			    "not an earlier snapshot from the same fs"));
2676 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
2677 		}
2678 		zfs_close(from_zhp);
2679 	}
2680 
2681 	if (redactbook != NULL) {
2682 		char bookname[ZFS_MAX_DATASET_NAME_LEN];
2683 		nvlist_t *redact_snaps;
2684 		zfs_handle_t *book_zhp;
2685 		char *at, *pound;
2686 		int dsnamelen;
2687 
2688 		pound = strchr(redactbook, '#');
2689 		if (pound != NULL)
2690 			redactbook = pound + 1;
2691 		at = strchr(name, '@');
2692 		if (at == NULL) {
2693 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2694 			    "cannot do a redacted send to a filesystem"));
2695 			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
2696 		}
2697 		dsnamelen = at - name;
2698 		if (snprintf(bookname, sizeof (bookname), "%.*s#%s",
2699 		    dsnamelen, name, redactbook)
2700 		    >= sizeof (bookname)) {
2701 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2702 			    "invalid bookmark name"));
2703 			return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2704 		}
2705 		book_zhp = zfs_open(hdl, bookname, ZFS_TYPE_BOOKMARK);
2706 		if (book_zhp == NULL)
2707 			return (-1);
2708 		if (nvlist_lookup_nvlist(book_zhp->zfs_props,
2709 		    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS),
2710 		    &redact_snaps) != 0 || redact_snaps == NULL) {
2711 			zfs_close(book_zhp);
2712 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2713 			    "not a redaction bookmark"));
2714 			return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
2715 		}
2716 		zfs_close(book_zhp);
2717 	}
2718 
2719 	/*
2720 	 * Send fs properties
2721 	 */
2722 	if (flags->props || flags->holds || flags->backup) {
2723 		/*
2724 		 * Note: the header generated by send_prelim_records()
2725 		 * assumes that the incremental source is in the same
2726 		 * filesystem/volume as the target (which is a requirement
2727 		 * when doing "zfs send -R").  But that isn't always the
2728 		 * case here (e.g. send from snap in origin, or send from
2729 		 * bookmark).  We pass from=NULL, which will omit this
2730 		 * information from the prelim records; it isn't used
2731 		 * when receiving this type of stream.
2732 		 */
2733 		err = send_prelim_records(zhp, NULL, fd, B_TRUE, B_FALSE,
2734 		    flags->verbosity > 0, flags->dryrun, flags->raw,
2735 		    flags->replicate, B_FALSE, flags->backup, flags->holds,
2736 		    flags->props, flags->doall, NULL, NULL);
2737 		if (err != 0)
2738 			return (err);
2739 	}
2740 
2741 	/*
2742 	 * Perform size estimate if verbose was specified.
2743 	 */
2744 	if (flags->verbosity != 0 || flags->progressastitle) {
2745 		err = estimate_size(zhp, from, fd, flags, 0, 0, 0, redactbook,
2746 		    errbuf, &size);
2747 		if (err != 0)
2748 			return (err);
2749 	}
2750 
2751 	if (flags->dryrun)
2752 		return (0);
2753 
2754 	/*
2755 	 * If progress reporting is requested, spawn a new thread to poll
2756 	 * ZFS_IOC_SEND_PROGRESS at a regular interval.
2757 	 */
2758 	sigset_t oldmask;
2759 	{
2760 		pa.pa_zhp = zhp;
2761 		pa.pa_fd = fd;
2762 		pa.pa_parsable = flags->parsable;
2763 		pa.pa_estimate = B_FALSE;
2764 		pa.pa_verbosity = flags->verbosity;
2765 		pa.pa_size = size;
2766 		pa.pa_astitle = flags->progressastitle;
2767 		pa.pa_progress = flags->progress;
2768 
2769 		err = pthread_create(&ptid, NULL,
2770 		    send_progress_thread, &pa);
2771 		if (err != 0) {
2772 			zfs_error_aux(zhp->zfs_hdl, "%s", zfs_strerror(errno));
2773 			return (zfs_error(zhp->zfs_hdl,
2774 			    EZFS_THREADCREATEFAILED, errbuf));
2775 		}
2776 		SEND_PROGRESS_THREAD_PARENT_BLOCK(&oldmask);
2777 	}
2778 
2779 	err = lzc_send_redacted(name, from, fd,
2780 	    lzc_flags_from_sendflags(flags), redactbook);
2781 
2782 	if (send_progress_thread_exit(hdl, ptid, &oldmask))
2783 			return (-1);
2784 
2785 	if (err == 0 && (flags->props || flags->holds || flags->backup)) {
2786 		/* Write the final end record. */
2787 		err = send_conclusion_record(fd, NULL);
2788 		if (err != 0)
2789 			return (zfs_standard_error(hdl, err, errbuf));
2790 	}
2791 	if (err != 0) {
2792 		switch (errno) {
2793 		case EXDEV:
2794 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2795 			    "not an earlier snapshot from the same fs"));
2796 			return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
2797 
2798 		case ENOENT:
2799 		case ESRCH:
2800 			if (lzc_exists(name)) {
2801 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2802 				    "incremental source (%s) does not exist"),
2803 				    from);
2804 			}
2805 			return (zfs_error(hdl, EZFS_NOENT, errbuf));
2806 
2807 		case EACCES:
2808 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2809 			    "dataset key must be loaded"));
2810 			return (zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf));
2811 
2812 		case EBUSY:
2813 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2814 			    "target is busy; if a filesystem, "
2815 			    "it must not be mounted"));
2816 			return (zfs_error(hdl, EZFS_BUSY, errbuf));
2817 
2818 		case EDQUOT:
2819 		case EFAULT:
2820 		case EFBIG:
2821 		case EINVAL:
2822 		case EIO:
2823 		case ENOLINK:
2824 		case ENOSPC:
2825 		case ENOSTR:
2826 		case ENXIO:
2827 		case EPIPE:
2828 		case ERANGE:
2829 		case EROFS:
2830 			zfs_error_aux(hdl, "%s", zfs_strerror(errno));
2831 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
2832 		case ZFS_ERR_STREAM_LARGE_MICROZAP:
2833 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2834 			    "source snapshot contains large microzaps, "
2835 			    "need -L (--large-block) or -w (--raw) to "
2836 			    "generate stream"));
2837 			return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
2838 		default:
2839 			return (zfs_standard_error(hdl, errno, errbuf));
2840 		}
2841 	}
2842 	return (err != 0);
2843 }
2844 
2845 struct zfs_send_one {
2846 	zfs_handle_t *zhp;
2847 	const char *from;
2848 	sendflags_t *flags;
2849 	const char *redactbook;
2850 };
2851 
2852 static int
2853 zfs_send_one_cb(int fd, void *arg)
2854 {
2855 	struct zfs_send_one *zso = arg;
2856 	return (zfs_send_one_cb_impl(zso->zhp, zso->from, fd, zso->flags,
2857 	    zso->redactbook));
2858 }
2859 
2860 int
2861 zfs_send_one(zfs_handle_t *zhp, const char *from, int fd, sendflags_t *flags,
2862     const char *redactbook)
2863 {
2864 	struct zfs_send_one zso = {
2865 		.zhp = zhp,
2866 		.from = from,
2867 		.flags = flags,
2868 		.redactbook = redactbook,
2869 	};
2870 	return (lzc_send_wrapper(zfs_send_one_cb, fd, &zso));
2871 }
2872 
2873 /*
2874  * Routines specific to "zfs recv"
2875  */
2876 
2877 static int
2878 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
2879     boolean_t byteswap, zio_cksum_t *zc)
2880 {
2881 	char *cp = buf;
2882 	int rv;
2883 	int len = ilen;
2884 
2885 	do {
2886 		rv = read(fd, cp, len);
2887 		cp += rv;
2888 		len -= rv;
2889 	} while (rv > 0);
2890 
2891 	if (rv < 0 || len != 0) {
2892 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2893 		    "failed to read from stream"));
2894 		return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
2895 		    "cannot receive")));
2896 	}
2897 
2898 	if (zc) {
2899 		if (byteswap)
2900 			fletcher_4_incremental_byteswap(buf, ilen, zc);
2901 		else
2902 			fletcher_4_incremental_native(buf, ilen, zc);
2903 	}
2904 	return (0);
2905 }
2906 
2907 static int
2908 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
2909     boolean_t byteswap, zio_cksum_t *zc)
2910 {
2911 	char *buf;
2912 	int err;
2913 
2914 	buf = zfs_alloc(hdl, len);
2915 
2916 	if (len > hdl->libzfs_max_nvlist) {
2917 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "nvlist too large"));
2918 		free(buf);
2919 		return (ENOMEM);
2920 	}
2921 
2922 	err = recv_read(hdl, fd, buf, len, byteswap, zc);
2923 	if (err != 0) {
2924 		free(buf);
2925 		return (err);
2926 	}
2927 
2928 	err = nvlist_unpack(buf, len, nvp, 0);
2929 	free(buf);
2930 	if (err != 0) {
2931 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2932 		    "stream (malformed nvlist)"));
2933 		return (EINVAL);
2934 	}
2935 	return (0);
2936 }
2937 
2938 /*
2939  * Returns the grand origin (origin of origin of origin...) of a given handle.
2940  * If this dataset is not a clone, it simply returns a copy of the original
2941  * handle.
2942  */
2943 static zfs_handle_t *
2944 recv_open_grand_origin(zfs_handle_t *zhp)
2945 {
2946 	char origin[ZFS_MAX_DATASET_NAME_LEN];
2947 	zprop_source_t src;
2948 	zfs_handle_t *ozhp = zfs_handle_dup(zhp);
2949 
2950 	while (ozhp != NULL) {
2951 		if (zfs_prop_get(ozhp, ZFS_PROP_ORIGIN, origin,
2952 		    sizeof (origin), &src, NULL, 0, B_FALSE) != 0)
2953 			break;
2954 
2955 		(void) zfs_close(ozhp);
2956 		ozhp = zfs_open(zhp->zfs_hdl, origin, ZFS_TYPE_FILESYSTEM);
2957 	}
2958 
2959 	return (ozhp);
2960 }
2961 
2962 static int
2963 recv_rename_impl(zfs_handle_t *zhp, const char *name, const char *newname)
2964 {
2965 	int err;
2966 	zfs_handle_t *ozhp = NULL;
2967 
2968 	/*
2969 	 * Attempt to rename the dataset. If it fails with EACCES we have
2970 	 * attempted to rename the dataset outside of its encryption root.
2971 	 * Force the dataset to become an encryption root and try again.
2972 	 */
2973 	err = lzc_rename(name, newname);
2974 	if (err == EACCES) {
2975 		ozhp = recv_open_grand_origin(zhp);
2976 		if (ozhp == NULL) {
2977 			err = ENOENT;
2978 			goto out;
2979 		}
2980 
2981 		err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
2982 		    NULL, NULL, 0);
2983 		if (err != 0)
2984 			goto out;
2985 
2986 		err = lzc_rename(name, newname);
2987 	}
2988 
2989 out:
2990 	if (ozhp != NULL)
2991 		zfs_close(ozhp);
2992 	return (err);
2993 }
2994 
2995 static int
2996 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
2997     int baselen, char *newname, recvflags_t *flags)
2998 {
2999 	static int seq;
3000 	int err;
3001 	prop_changelist_t *clp = NULL;
3002 	zfs_handle_t *zhp = NULL;
3003 
3004 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
3005 	if (zhp == NULL) {
3006 		err = -1;
3007 		goto out;
3008 	}
3009 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
3010 	    flags->force ? MS_FORCE : 0);
3011 	if (clp == NULL) {
3012 		err = -1;
3013 		goto out;
3014 	}
3015 	err = changelist_prefix(clp);
3016 	if (err)
3017 		goto out;
3018 
3019 	if (tryname) {
3020 		(void) strlcpy(newname, tryname, ZFS_MAX_DATASET_NAME_LEN);
3021 		if (flags->verbose) {
3022 			(void) printf("attempting rename %s to %s\n",
3023 			    name, newname);
3024 		}
3025 		err = recv_rename_impl(zhp, name, newname);
3026 		if (err == 0)
3027 			changelist_rename(clp, name, tryname);
3028 	} else {
3029 		err = ENOENT;
3030 	}
3031 
3032 	if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
3033 		seq++;
3034 
3035 		(void) snprintf(newname, ZFS_MAX_DATASET_NAME_LEN,
3036 		    "%.*srecv-%u-%u", baselen, name, getpid(), seq);
3037 
3038 		if (flags->verbose) {
3039 			(void) printf("failed - trying rename %s to %s\n",
3040 			    name, newname);
3041 		}
3042 		err = recv_rename_impl(zhp, name, newname);
3043 		if (err == 0)
3044 			changelist_rename(clp, name, newname);
3045 		if (err && flags->verbose) {
3046 			(void) printf("failed (%u) - "
3047 			    "will try again on next pass\n", errno);
3048 		}
3049 		err = EAGAIN;
3050 	} else if (flags->verbose) {
3051 		if (err == 0)
3052 			(void) printf("success\n");
3053 		else
3054 			(void) printf("failed (%u)\n", errno);
3055 	}
3056 
3057 	(void) changelist_postfix(clp);
3058 
3059 out:
3060 	if (clp != NULL)
3061 		changelist_free(clp);
3062 	if (zhp != NULL)
3063 		zfs_close(zhp);
3064 
3065 	return (err);
3066 }
3067 
3068 static int
3069 recv_promote(libzfs_handle_t *hdl, const char *fsname,
3070     const char *origin_fsname, recvflags_t *flags)
3071 {
3072 	int err;
3073 	zfs_cmd_t zc = {"\0"};
3074 	zfs_handle_t *zhp = NULL, *ozhp = NULL;
3075 
3076 	if (flags->verbose)
3077 		(void) printf("promoting %s\n", fsname);
3078 
3079 	(void) strlcpy(zc.zc_value, origin_fsname, sizeof (zc.zc_value));
3080 	(void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
3081 
3082 	/*
3083 	 * Attempt to promote the dataset. If it fails with EACCES the
3084 	 * promotion would cause this dataset to leave its encryption root.
3085 	 * Force the origin to become an encryption root and try again.
3086 	 */
3087 	err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
3088 	if (err == EACCES) {
3089 		zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
3090 		if (zhp == NULL) {
3091 			err = -1;
3092 			goto out;
3093 		}
3094 
3095 		ozhp = recv_open_grand_origin(zhp);
3096 		if (ozhp == NULL) {
3097 			err = -1;
3098 			goto out;
3099 		}
3100 
3101 		err = lzc_change_key(ozhp->zfs_name, DCP_CMD_FORCE_NEW_KEY,
3102 		    NULL, NULL, 0);
3103 		if (err != 0)
3104 			goto out;
3105 
3106 		err = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
3107 	}
3108 
3109 out:
3110 	if (zhp != NULL)
3111 		zfs_close(zhp);
3112 	if (ozhp != NULL)
3113 		zfs_close(ozhp);
3114 
3115 	return (err);
3116 }
3117 
3118 static int
3119 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
3120     char *newname, recvflags_t *flags)
3121 {
3122 	int err = 0;
3123 	prop_changelist_t *clp;
3124 	zfs_handle_t *zhp;
3125 	boolean_t defer = B_FALSE;
3126 	int spa_version;
3127 
3128 	zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
3129 	if (zhp == NULL)
3130 		return (-1);
3131 	zfs_type_t type = zfs_get_type(zhp);
3132 	if (type == ZFS_TYPE_SNAPSHOT &&
3133 	    zfs_spa_version(zhp, &spa_version) == 0 &&
3134 	    spa_version >= SPA_VERSION_USERREFS)
3135 		defer = B_TRUE;
3136 	clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
3137 	    flags->force ? MS_FORCE : 0);
3138 	zfs_close(zhp);
3139 	if (clp == NULL)
3140 		return (-1);
3141 
3142 	err = changelist_prefix(clp);
3143 	if (err)
3144 		return (err);
3145 
3146 	if (flags->verbose)
3147 		(void) printf("attempting destroy %s\n", name);
3148 	if (type == ZFS_TYPE_SNAPSHOT) {
3149 		nvlist_t *nv = fnvlist_alloc();
3150 		fnvlist_add_boolean(nv, name);
3151 		err = lzc_destroy_snaps(nv, defer, NULL);
3152 		fnvlist_free(nv);
3153 	} else {
3154 		err = lzc_destroy(name);
3155 	}
3156 	if (err == 0) {
3157 		if (flags->verbose)
3158 			(void) printf("success\n");
3159 		changelist_remove(clp, name);
3160 	}
3161 
3162 	(void) changelist_postfix(clp);
3163 	changelist_free(clp);
3164 
3165 	/*
3166 	 * Deferred destroy might destroy the snapshot or only mark it to be
3167 	 * destroyed later, and it returns success in either case.
3168 	 */
3169 	if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
3170 	    ZFS_TYPE_SNAPSHOT))) {
3171 		err = recv_rename(hdl, name, NULL, baselen, newname, flags);
3172 	}
3173 
3174 	return (err);
3175 }
3176 
3177 typedef struct guid_to_name_data {
3178 	uint64_t guid;
3179 	boolean_t bookmark_ok;
3180 	char *name;
3181 	char *skip;
3182 	uint64_t *redact_snap_guids;
3183 	uint64_t num_redact_snaps;
3184 } guid_to_name_data_t;
3185 
3186 static boolean_t
3187 redact_snaps_match(zfs_handle_t *zhp, guid_to_name_data_t *gtnd)
3188 {
3189 	uint64_t *bmark_snaps;
3190 	uint_t bmark_num_snaps;
3191 	nvlist_t *nvl;
3192 	if (zhp->zfs_type != ZFS_TYPE_BOOKMARK)
3193 		return (B_FALSE);
3194 
3195 	nvl = fnvlist_lookup_nvlist(zhp->zfs_props,
3196 	    zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS));
3197 	bmark_snaps = fnvlist_lookup_uint64_array(nvl, ZPROP_VALUE,
3198 	    &bmark_num_snaps);
3199 	if (bmark_num_snaps != gtnd->num_redact_snaps)
3200 		return (B_FALSE);
3201 	int i = 0;
3202 	for (; i < bmark_num_snaps; i++) {
3203 		int j = 0;
3204 		for (; j < bmark_num_snaps; j++) {
3205 			if (bmark_snaps[i] == gtnd->redact_snap_guids[j])
3206 				break;
3207 		}
3208 		if (j == bmark_num_snaps)
3209 			break;
3210 	}
3211 	return (i == bmark_num_snaps);
3212 }
3213 
3214 static int
3215 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
3216 {
3217 	guid_to_name_data_t *gtnd = arg;
3218 	const char *slash;
3219 	int err;
3220 
3221 	if (gtnd->skip != NULL &&
3222 	    (slash = strrchr(zhp->zfs_name, '/')) != NULL &&
3223 	    strcmp(slash + 1, gtnd->skip) == 0) {
3224 		zfs_close(zhp);
3225 		return (0);
3226 	}
3227 
3228 	if (zfs_prop_get_int(zhp, ZFS_PROP_GUID) == gtnd->guid &&
3229 	    (gtnd->num_redact_snaps == -1 || redact_snaps_match(zhp, gtnd))) {
3230 		(void) strcpy(gtnd->name, zhp->zfs_name);
3231 		zfs_close(zhp);
3232 		return (EEXIST);
3233 	}
3234 
3235 	err = zfs_iter_children_v2(zhp, 0, guid_to_name_cb, gtnd);
3236 	if (err != EEXIST && gtnd->bookmark_ok)
3237 		err = zfs_iter_bookmarks_v2(zhp, 0, guid_to_name_cb, gtnd);
3238 	zfs_close(zhp);
3239 	return (err);
3240 }
3241 
3242 /*
3243  * Attempt to find the local dataset associated with this guid.  In the case of
3244  * multiple matches, we attempt to find the "best" match by searching
3245  * progressively larger portions of the hierarchy.  This allows one to send a
3246  * tree of datasets individually and guarantee that we will find the source
3247  * guid within that hierarchy, even if there are multiple matches elsewhere.
3248  *
3249  * If num_redact_snaps is not -1, we attempt to find a redaction bookmark with
3250  * the specified number of redaction snapshots.  If num_redact_snaps isn't 0 or
3251  * -1, then redact_snap_guids will be an array of the guids of the snapshots the
3252  * redaction bookmark was created with.  If num_redact_snaps is -1, then we will
3253  * attempt to find a snapshot or bookmark (if bookmark_ok is passed) with the
3254  * given guid.  Note that a redaction bookmark can be returned if
3255  * num_redact_snaps == -1.
3256  */
3257 static int
3258 guid_to_name_redact_snaps(libzfs_handle_t *hdl, const char *parent,
3259     uint64_t guid, boolean_t bookmark_ok, uint64_t *redact_snap_guids,
3260     uint64_t num_redact_snaps, char *name)
3261 {
3262 	char pname[ZFS_MAX_DATASET_NAME_LEN];
3263 	guid_to_name_data_t gtnd;
3264 
3265 	gtnd.guid = guid;
3266 	gtnd.bookmark_ok = bookmark_ok;
3267 	gtnd.name = name;
3268 	gtnd.skip = NULL;
3269 	gtnd.redact_snap_guids = redact_snap_guids;
3270 	gtnd.num_redact_snaps = num_redact_snaps;
3271 
3272 	/*
3273 	 * Search progressively larger portions of the hierarchy, starting
3274 	 * with the filesystem specified by 'parent'.  This will
3275 	 * select the "most local" version of the origin snapshot in the case
3276 	 * that there are multiple matching snapshots in the system.
3277 	 */
3278 	(void) strlcpy(pname, parent, sizeof (pname));
3279 	char *cp = strrchr(pname, '@');
3280 	if (cp == NULL)
3281 		cp = strchr(pname, '\0');
3282 	for (; cp != NULL; cp = strrchr(pname, '/')) {
3283 		/* Chop off the last component and open the parent */
3284 		*cp = '\0';
3285 		zfs_handle_t *zhp = make_dataset_handle(hdl, pname);
3286 
3287 		if (zhp == NULL)
3288 			continue;
3289 		int err = guid_to_name_cb(zfs_handle_dup(zhp), &gtnd);
3290 		if (err != EEXIST)
3291 			err = zfs_iter_children_v2(zhp, 0, guid_to_name_cb,
3292 			    &gtnd);
3293 		if (err != EEXIST && bookmark_ok)
3294 			err = zfs_iter_bookmarks_v2(zhp, 0, guid_to_name_cb,
3295 			    &gtnd);
3296 		zfs_close(zhp);
3297 		if (err == EEXIST)
3298 			return (0);
3299 
3300 		/*
3301 		 * Remember the last portion of the dataset so we skip it next
3302 		 * time through (as we've already searched that portion of the
3303 		 * hierarchy).
3304 		 */
3305 		gtnd.skip = strrchr(pname, '/') + 1;
3306 	}
3307 
3308 	return (ENOENT);
3309 }
3310 
3311 static int
3312 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
3313     boolean_t bookmark_ok, char *name)
3314 {
3315 	return (guid_to_name_redact_snaps(hdl, parent, guid, bookmark_ok, NULL,
3316 	    -1, name));
3317 }
3318 
3319 /*
3320  * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
3321  * guid1 is after guid2.
3322  */
3323 static int
3324 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
3325     uint64_t guid1, uint64_t guid2)
3326 {
3327 	nvlist_t *nvfs;
3328 	const char *fsname = NULL, *snapname = NULL;
3329 	char buf[ZFS_MAX_DATASET_NAME_LEN];
3330 	int rv;
3331 	zfs_handle_t *guid1hdl, *guid2hdl;
3332 	uint64_t create1, create2;
3333 
3334 	if (guid2 == 0)
3335 		return (0);
3336 	if (guid1 == 0)
3337 		return (1);
3338 
3339 	nvfs = fsavl_find(avl, guid1, &snapname);
3340 	fsname = fnvlist_lookup_string(nvfs, "name");
3341 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
3342 	guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
3343 	if (guid1hdl == NULL)
3344 		return (-1);
3345 
3346 	nvfs = fsavl_find(avl, guid2, &snapname);
3347 	fsname = fnvlist_lookup_string(nvfs, "name");
3348 	(void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
3349 	guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
3350 	if (guid2hdl == NULL) {
3351 		zfs_close(guid1hdl);
3352 		return (-1);
3353 	}
3354 
3355 	create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
3356 	create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
3357 
3358 	if (create1 < create2)
3359 		rv = -1;
3360 	else if (create1 > create2)
3361 		rv = +1;
3362 	else
3363 		rv = 0;
3364 
3365 	zfs_close(guid1hdl);
3366 	zfs_close(guid2hdl);
3367 
3368 	return (rv);
3369 }
3370 
3371 /*
3372  * This function reestablishes the hierarchy of encryption roots after a
3373  * recursive incremental receive has completed. This must be done after the
3374  * second call to recv_incremental_replication() has renamed and promoted all
3375  * sent datasets to their final locations in the dataset hierarchy.
3376  */
3377 static int
3378 recv_fix_encryption_hierarchy(libzfs_handle_t *hdl, const char *top_zfs,
3379     nvlist_t *stream_nv)
3380 {
3381 	int err;
3382 	nvpair_t *fselem = NULL;
3383 	nvlist_t *stream_fss;
3384 
3385 	stream_fss = fnvlist_lookup_nvlist(stream_nv, "fss");
3386 
3387 	while ((fselem = nvlist_next_nvpair(stream_fss, fselem)) != NULL) {
3388 		zfs_handle_t *zhp = NULL;
3389 		uint64_t crypt;
3390 		nvlist_t *snaps, *props, *stream_nvfs = NULL;
3391 		nvpair_t *snapel = NULL;
3392 		boolean_t is_encroot, is_clone, stream_encroot;
3393 		char *cp;
3394 		const char *stream_keylocation = NULL;
3395 		char keylocation[MAXNAMELEN];
3396 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
3397 
3398 		keylocation[0] = '\0';
3399 		stream_nvfs = fnvpair_value_nvlist(fselem);
3400 		snaps = fnvlist_lookup_nvlist(stream_nvfs, "snaps");
3401 		props = fnvlist_lookup_nvlist(stream_nvfs, "props");
3402 		stream_encroot = nvlist_exists(stream_nvfs, "is_encroot");
3403 
3404 		/* find a snapshot from the stream that exists locally */
3405 		err = ENOENT;
3406 		while ((snapel = nvlist_next_nvpair(snaps, snapel)) != NULL) {
3407 			uint64_t guid;
3408 
3409 			guid = fnvpair_value_uint64(snapel);
3410 			err = guid_to_name(hdl, top_zfs, guid, B_FALSE,
3411 			    fsname);
3412 			if (err == 0)
3413 				break;
3414 		}
3415 
3416 		if (err != 0)
3417 			continue;
3418 
3419 		cp = strchr(fsname, '@');
3420 		if (cp != NULL)
3421 			*cp = '\0';
3422 
3423 		zhp = zfs_open(hdl, fsname, ZFS_TYPE_DATASET);
3424 		if (zhp == NULL) {
3425 			err = ENOENT;
3426 			goto error;
3427 		}
3428 
3429 		crypt = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION);
3430 		is_clone = zhp->zfs_dmustats.dds_origin[0] != '\0';
3431 		(void) zfs_crypto_get_encryption_root(zhp, &is_encroot, NULL);
3432 
3433 		/* we don't need to do anything for unencrypted datasets */
3434 		if (crypt == ZIO_CRYPT_OFF) {
3435 			zfs_close(zhp);
3436 			continue;
3437 		}
3438 
3439 		/*
3440 		 * If the dataset is flagged as an encryption root, was not
3441 		 * received as a clone and is not currently an encryption root,
3442 		 * force it to become one. Fixup the keylocation if necessary.
3443 		 */
3444 		if (stream_encroot) {
3445 			if (!is_clone && !is_encroot) {
3446 				err = lzc_change_key(fsname,
3447 				    DCP_CMD_FORCE_NEW_KEY, NULL, NULL, 0);
3448 				if (err != 0) {
3449 					zfs_close(zhp);
3450 					goto error;
3451 				}
3452 			}
3453 
3454 			stream_keylocation = fnvlist_lookup_string(props,
3455 			    zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
3456 
3457 			/*
3458 			 * Refresh the properties in case the call to
3459 			 * lzc_change_key() changed the value.
3460 			 */
3461 			zfs_refresh_properties(zhp);
3462 			err = zfs_prop_get(zhp, ZFS_PROP_KEYLOCATION,
3463 			    keylocation, sizeof (keylocation), NULL, NULL,
3464 			    0, B_TRUE);
3465 			if (err != 0) {
3466 				zfs_close(zhp);
3467 				goto error;
3468 			}
3469 
3470 			if (strcmp(keylocation, stream_keylocation) != 0) {
3471 				err = zfs_prop_set(zhp,
3472 				    zfs_prop_to_name(ZFS_PROP_KEYLOCATION),
3473 				    stream_keylocation);
3474 				if (err != 0) {
3475 					zfs_close(zhp);
3476 					goto error;
3477 				}
3478 			}
3479 		}
3480 
3481 		/*
3482 		 * If the dataset is not flagged as an encryption root and is
3483 		 * currently an encryption root, force it to inherit from its
3484 		 * parent. The root of a raw send should never be
3485 		 * force-inherited.
3486 		 */
3487 		if (!stream_encroot && is_encroot &&
3488 		    strcmp(top_zfs, fsname) != 0) {
3489 			err = lzc_change_key(fsname, DCP_CMD_FORCE_INHERIT,
3490 			    NULL, NULL, 0);
3491 			if (err != 0) {
3492 				zfs_close(zhp);
3493 				goto error;
3494 			}
3495 		}
3496 
3497 		zfs_close(zhp);
3498 	}
3499 
3500 	return (0);
3501 
3502 error:
3503 	return (err);
3504 }
3505 
3506 static int
3507 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
3508     recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
3509     nvlist_t *renamed)
3510 {
3511 	nvlist_t *local_nv, *deleted = NULL;
3512 	avl_tree_t *local_avl;
3513 	nvpair_t *fselem, *nextfselem;
3514 	const char *fromsnap;
3515 	char newname[ZFS_MAX_DATASET_NAME_LEN];
3516 	char guidname[32];
3517 	int error;
3518 	boolean_t needagain, progress, recursive;
3519 	const char *s1, *s2;
3520 
3521 	fromsnap = fnvlist_lookup_string(stream_nv, "fromsnap");
3522 
3523 	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3524 	    ENOENT);
3525 
3526 	if (flags->dryrun)
3527 		return (0);
3528 
3529 again:
3530 	needagain = progress = B_FALSE;
3531 
3532 	deleted = fnvlist_alloc();
3533 
3534 	if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
3535 	    recursive, B_TRUE, B_FALSE, recursive, B_FALSE, B_FALSE, B_FALSE,
3536 	    B_FALSE, B_TRUE, &local_nv, &local_avl)) != 0)
3537 		return (error);
3538 
3539 	/*
3540 	 * Process deletes and renames
3541 	 */
3542 	for (fselem = nvlist_next_nvpair(local_nv, NULL);
3543 	    fselem; fselem = nextfselem) {
3544 		nvlist_t *nvfs, *snaps;
3545 		nvlist_t *stream_nvfs = NULL;
3546 		nvpair_t *snapelem, *nextsnapelem;
3547 		uint64_t fromguid = 0;
3548 		uint64_t originguid = 0;
3549 		uint64_t stream_originguid = 0;
3550 		uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
3551 		const char *fsname, *stream_fsname;
3552 
3553 		nextfselem = nvlist_next_nvpair(local_nv, fselem);
3554 
3555 		nvfs = fnvpair_value_nvlist(fselem);
3556 		snaps = fnvlist_lookup_nvlist(nvfs, "snaps");
3557 		fsname = fnvlist_lookup_string(nvfs, "name");
3558 		parent_fromsnap_guid = fnvlist_lookup_uint64(nvfs,
3559 		    "parentfromsnap");
3560 		(void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
3561 
3562 		/*
3563 		 * First find the stream's fs, so we can check for
3564 		 * a different origin (due to "zfs promote")
3565 		 */
3566 		for (snapelem = nvlist_next_nvpair(snaps, NULL);
3567 		    snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
3568 			uint64_t thisguid;
3569 
3570 			thisguid = fnvpair_value_uint64(snapelem);
3571 			stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
3572 
3573 			if (stream_nvfs != NULL)
3574 				break;
3575 		}
3576 
3577 		/* check for promote */
3578 		(void) nvlist_lookup_uint64(stream_nvfs, "origin",
3579 		    &stream_originguid);
3580 		if (stream_nvfs && originguid != stream_originguid) {
3581 			switch (created_before(hdl, local_avl,
3582 			    stream_originguid, originguid)) {
3583 			case 1: {
3584 				/* promote it! */
3585 				nvlist_t *origin_nvfs;
3586 				const char *origin_fsname;
3587 
3588 				origin_nvfs = fsavl_find(local_avl, originguid,
3589 				    NULL);
3590 				origin_fsname = fnvlist_lookup_string(
3591 				    origin_nvfs, "name");
3592 				error = recv_promote(hdl, fsname, origin_fsname,
3593 				    flags);
3594 				if (error == 0)
3595 					progress = B_TRUE;
3596 				break;
3597 			}
3598 			default:
3599 				break;
3600 			case -1:
3601 				fsavl_destroy(local_avl);
3602 				fnvlist_free(local_nv);
3603 				return (-1);
3604 			}
3605 			/*
3606 			 * We had/have the wrong origin, therefore our
3607 			 * list of snapshots is wrong.  Need to handle
3608 			 * them on the next pass.
3609 			 */
3610 			needagain = B_TRUE;
3611 			continue;
3612 		}
3613 
3614 		for (snapelem = nvlist_next_nvpair(snaps, NULL);
3615 		    snapelem; snapelem = nextsnapelem) {
3616 			uint64_t thisguid;
3617 			const char *stream_snapname;
3618 			nvlist_t *found, *props;
3619 
3620 			nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
3621 
3622 			thisguid = fnvpair_value_uint64(snapelem);
3623 			found = fsavl_find(stream_avl, thisguid,
3624 			    &stream_snapname);
3625 
3626 			/* check for delete */
3627 			if (found == NULL) {
3628 				char name[ZFS_MAX_DATASET_NAME_LEN];
3629 
3630 				if (!flags->force)
3631 					continue;
3632 
3633 				(void) snprintf(name, sizeof (name), "%s@%s",
3634 				    fsname, nvpair_name(snapelem));
3635 
3636 				error = recv_destroy(hdl, name,
3637 				    strlen(fsname)+1, newname, flags);
3638 				if (error)
3639 					needagain = B_TRUE;
3640 				else
3641 					progress = B_TRUE;
3642 				sprintf(guidname, "%llu",
3643 				    (u_longlong_t)thisguid);
3644 				nvlist_add_boolean(deleted, guidname);
3645 				continue;
3646 			}
3647 
3648 			stream_nvfs = found;
3649 
3650 			if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
3651 			    &props) && 0 == nvlist_lookup_nvlist(props,
3652 			    stream_snapname, &props)) {
3653 				zfs_cmd_t zc = {"\0"};
3654 
3655 				zc.zc_cookie = B_TRUE; /* received */
3656 				(void) snprintf(zc.zc_name, sizeof (zc.zc_name),
3657 				    "%s@%s", fsname, nvpair_name(snapelem));
3658 				zcmd_write_src_nvlist(hdl, &zc, props);
3659 				(void) zfs_ioctl(hdl,
3660 				    ZFS_IOC_SET_PROP, &zc);
3661 				zcmd_free_nvlists(&zc);
3662 			}
3663 
3664 			/* check for different snapname */
3665 			if (strcmp(nvpair_name(snapelem),
3666 			    stream_snapname) != 0) {
3667 				char name[ZFS_MAX_DATASET_NAME_LEN];
3668 				char tryname[ZFS_MAX_DATASET_NAME_LEN];
3669 
3670 				(void) snprintf(name, sizeof (name), "%s@%s",
3671 				    fsname, nvpair_name(snapelem));
3672 				(void) snprintf(tryname, sizeof (name), "%s@%s",
3673 				    fsname, stream_snapname);
3674 
3675 				error = recv_rename(hdl, name, tryname,
3676 				    strlen(fsname)+1, newname, flags);
3677 				if (error)
3678 					needagain = B_TRUE;
3679 				else
3680 					progress = B_TRUE;
3681 			}
3682 
3683 			if (strcmp(stream_snapname, fromsnap) == 0)
3684 				fromguid = thisguid;
3685 		}
3686 
3687 		/* check for delete */
3688 		if (stream_nvfs == NULL) {
3689 			if (!flags->force)
3690 				continue;
3691 
3692 			error = recv_destroy(hdl, fsname, strlen(tofs)+1,
3693 			    newname, flags);
3694 			if (error)
3695 				needagain = B_TRUE;
3696 			else
3697 				progress = B_TRUE;
3698 			sprintf(guidname, "%llu",
3699 			    (u_longlong_t)parent_fromsnap_guid);
3700 			nvlist_add_boolean(deleted, guidname);
3701 			continue;
3702 		}
3703 
3704 		if (fromguid == 0) {
3705 			if (flags->verbose) {
3706 				(void) printf("local fs %s does not have "
3707 				    "fromsnap (%s in stream); must have "
3708 				    "been deleted locally; ignoring\n",
3709 				    fsname, fromsnap);
3710 			}
3711 			continue;
3712 		}
3713 
3714 		stream_fsname = fnvlist_lookup_string(stream_nvfs, "name");
3715 		stream_parent_fromsnap_guid = fnvlist_lookup_uint64(
3716 		    stream_nvfs, "parentfromsnap");
3717 
3718 		s1 = strrchr(fsname, '/');
3719 		s2 = strrchr(stream_fsname, '/');
3720 
3721 		/*
3722 		 * Check if we're going to rename based on parent guid change
3723 		 * and the current parent guid was also deleted. If it was then
3724 		 * rename will fail and is likely unneeded, so avoid this and
3725 		 * force an early retry to determine the new
3726 		 * parent_fromsnap_guid.
3727 		 */
3728 		if (stream_parent_fromsnap_guid != 0 &&
3729 		    parent_fromsnap_guid != 0 &&
3730 		    stream_parent_fromsnap_guid != parent_fromsnap_guid) {
3731 			sprintf(guidname, "%llu",
3732 			    (u_longlong_t)parent_fromsnap_guid);
3733 			if (nvlist_exists(deleted, guidname)) {
3734 				progress = B_TRUE;
3735 				needagain = B_TRUE;
3736 				goto doagain;
3737 			}
3738 		}
3739 
3740 		/*
3741 		 * Check for rename. If the exact receive path is specified, it
3742 		 * does not count as a rename, but we still need to check the
3743 		 * datasets beneath it.
3744 		 */
3745 		if ((stream_parent_fromsnap_guid != 0 &&
3746 		    parent_fromsnap_guid != 0 &&
3747 		    stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
3748 		    ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
3749 		    (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
3750 			nvlist_t *parent;
3751 			char tryname[ZFS_MAX_DATASET_NAME_LEN];
3752 
3753 			parent = fsavl_find(local_avl,
3754 			    stream_parent_fromsnap_guid, NULL);
3755 			/*
3756 			 * NB: parent might not be found if we used the
3757 			 * tosnap for stream_parent_fromsnap_guid,
3758 			 * because the parent is a newly-created fs;
3759 			 * we'll be able to rename it after we recv the
3760 			 * new fs.
3761 			 */
3762 			if (parent != NULL) {
3763 				const char *pname;
3764 
3765 				pname = fnvlist_lookup_string(parent, "name");
3766 				(void) snprintf(tryname, sizeof (tryname),
3767 				    "%s%s", pname, strrchr(stream_fsname, '/'));
3768 			} else {
3769 				tryname[0] = '\0';
3770 				if (flags->verbose) {
3771 					(void) printf("local fs %s new parent "
3772 					    "not found\n", fsname);
3773 				}
3774 			}
3775 
3776 			newname[0] = '\0';
3777 
3778 			error = recv_rename(hdl, fsname, tryname,
3779 			    strlen(tofs)+1, newname, flags);
3780 
3781 			if (renamed != NULL && newname[0] != '\0') {
3782 				fnvlist_add_boolean(renamed, newname);
3783 			}
3784 
3785 			if (error)
3786 				needagain = B_TRUE;
3787 			else
3788 				progress = B_TRUE;
3789 		}
3790 	}
3791 
3792 doagain:
3793 	fsavl_destroy(local_avl);
3794 	fnvlist_free(local_nv);
3795 	fnvlist_free(deleted);
3796 
3797 	if (needagain && progress) {
3798 		/* do another pass to fix up temporary names */
3799 		if (flags->verbose)
3800 			(void) printf("another pass:\n");
3801 		goto again;
3802 	}
3803 
3804 	return (needagain || error != 0);
3805 }
3806 
3807 static int
3808 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
3809     recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
3810     char **top_zfs, nvlist_t *cmdprops)
3811 {
3812 	nvlist_t *stream_nv = NULL;
3813 	avl_tree_t *stream_avl = NULL;
3814 	const char *fromsnap = NULL;
3815 	const char *sendsnap = NULL;
3816 	char *cp;
3817 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
3818 	char sendfs[ZFS_MAX_DATASET_NAME_LEN];
3819 	char errbuf[ERRBUFLEN];
3820 	dmu_replay_record_t drre;
3821 	int error;
3822 	boolean_t anyerr = B_FALSE;
3823 	boolean_t softerr = B_FALSE;
3824 	boolean_t recursive, raw;
3825 
3826 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3827 	    "cannot receive"));
3828 
3829 	assert(drr->drr_type == DRR_BEGIN);
3830 	assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
3831 	assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
3832 	    DMU_COMPOUNDSTREAM);
3833 
3834 	/*
3835 	 * Read in the nvlist from the stream.
3836 	 */
3837 	if (drr->drr_payloadlen != 0) {
3838 		error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
3839 		    &stream_nv, flags->byteswap, zc);
3840 		if (error) {
3841 			error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3842 			goto out;
3843 		}
3844 	}
3845 
3846 	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
3847 	    ENOENT);
3848 	raw = (nvlist_lookup_boolean(stream_nv, "raw") == 0);
3849 
3850 	if (recursive && strchr(destname, '@')) {
3851 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3852 		    "cannot specify snapshot name for multi-snapshot stream"));
3853 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3854 		goto out;
3855 	}
3856 
3857 	/*
3858 	 * Read in the end record and verify checksum.
3859 	 */
3860 	if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
3861 	    flags->byteswap, NULL)))
3862 		goto out;
3863 	if (flags->byteswap) {
3864 		drre.drr_type = BSWAP_32(drre.drr_type);
3865 		drre.drr_u.drr_end.drr_checksum.zc_word[0] =
3866 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
3867 		drre.drr_u.drr_end.drr_checksum.zc_word[1] =
3868 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
3869 		drre.drr_u.drr_end.drr_checksum.zc_word[2] =
3870 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
3871 		drre.drr_u.drr_end.drr_checksum.zc_word[3] =
3872 		    BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
3873 	}
3874 	if (drre.drr_type != DRR_END) {
3875 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3876 		goto out;
3877 	}
3878 	if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
3879 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3880 		    "incorrect header checksum"));
3881 		error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
3882 		goto out;
3883 	}
3884 
3885 	(void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
3886 
3887 	if (drr->drr_payloadlen != 0) {
3888 		nvlist_t *stream_fss;
3889 
3890 		stream_fss = fnvlist_lookup_nvlist(stream_nv, "fss");
3891 		if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
3892 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3893 			    "couldn't allocate avl tree"));
3894 			error = zfs_error(hdl, EZFS_NOMEM, errbuf);
3895 			goto out;
3896 		}
3897 
3898 		if (fromsnap != NULL && recursive) {
3899 			nvlist_t *renamed = NULL;
3900 			nvpair_t *pair = NULL;
3901 
3902 			(void) strlcpy(tofs, destname, sizeof (tofs));
3903 			if (flags->isprefix) {
3904 				struct drr_begin *drrb = &drr->drr_u.drr_begin;
3905 				int i;
3906 
3907 				if (flags->istail) {
3908 					cp = strrchr(drrb->drr_toname, '/');
3909 					if (cp == NULL) {
3910 						(void) strlcat(tofs, "/",
3911 						    sizeof (tofs));
3912 						i = 0;
3913 					} else {
3914 						i = (cp - drrb->drr_toname);
3915 					}
3916 				} else {
3917 					i = strcspn(drrb->drr_toname, "/@");
3918 				}
3919 				/* zfs_receive_one() will create_parents() */
3920 				(void) strlcat(tofs, &drrb->drr_toname[i],
3921 				    sizeof (tofs));
3922 				*strchr(tofs, '@') = '\0';
3923 			}
3924 
3925 			if (!flags->dryrun && !flags->nomount) {
3926 				renamed = fnvlist_alloc();
3927 			}
3928 
3929 			softerr = recv_incremental_replication(hdl, tofs, flags,
3930 			    stream_nv, stream_avl, renamed);
3931 
3932 			/* Unmount renamed filesystems before receiving. */
3933 			while ((pair = nvlist_next_nvpair(renamed,
3934 			    pair)) != NULL) {
3935 				zfs_handle_t *zhp;
3936 				prop_changelist_t *clp = NULL;
3937 
3938 				zhp = zfs_open(hdl, nvpair_name(pair),
3939 				    ZFS_TYPE_FILESYSTEM);
3940 				if (zhp != NULL) {
3941 					clp = changelist_gather(zhp,
3942 					    ZFS_PROP_MOUNTPOINT, 0,
3943 					    flags->forceunmount ? MS_FORCE : 0);
3944 					zfs_close(zhp);
3945 					if (clp != NULL) {
3946 						softerr |=
3947 						    changelist_prefix(clp);
3948 						changelist_free(clp);
3949 					}
3950 				}
3951 			}
3952 
3953 			fnvlist_free(renamed);
3954 		}
3955 	}
3956 
3957 	/*
3958 	 * Get the fs specified by the first path in the stream (the top level
3959 	 * specified by 'zfs send') and pass it to each invocation of
3960 	 * zfs_receive_one().
3961 	 */
3962 	(void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
3963 	    sizeof (sendfs));
3964 	if ((cp = strchr(sendfs, '@')) != NULL) {
3965 		*cp = '\0';
3966 		/*
3967 		 * Find the "sendsnap", the final snapshot in a replication
3968 		 * stream.  zfs_receive_one() handles certain errors
3969 		 * differently, depending on if the contained stream is the
3970 		 * last one or not.
3971 		 */
3972 		sendsnap = (cp + 1);
3973 	}
3974 
3975 	/* Finally, receive each contained stream */
3976 	do {
3977 		/*
3978 		 * we should figure out if it has a recoverable
3979 		 * error, in which case do a recv_skip() and drive on.
3980 		 * Note, if we fail due to already having this guid,
3981 		 * zfs_receive_one() will take care of it (ie,
3982 		 * recv_skip() and return 0).
3983 		 */
3984 		error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
3985 		    sendfs, stream_nv, stream_avl, top_zfs, sendsnap, cmdprops);
3986 		if (error == ENODATA) {
3987 			error = 0;
3988 			break;
3989 		}
3990 		anyerr |= error;
3991 	} while (error == 0);
3992 
3993 	if (drr->drr_payloadlen != 0 && recursive && fromsnap != NULL) {
3994 		/*
3995 		 * Now that we have the fs's they sent us, try the
3996 		 * renames again.
3997 		 */
3998 		softerr = recv_incremental_replication(hdl, tofs, flags,
3999 		    stream_nv, stream_avl, NULL);
4000 	}
4001 
4002 	if (raw && softerr == 0 && *top_zfs != NULL) {
4003 		softerr = recv_fix_encryption_hierarchy(hdl, *top_zfs,
4004 		    stream_nv);
4005 	}
4006 
4007 out:
4008 	fsavl_destroy(stream_avl);
4009 	fnvlist_free(stream_nv);
4010 	if (softerr)
4011 		error = -2;
4012 	if (anyerr)
4013 		error = -1;
4014 	return (error);
4015 }
4016 
4017 static void
4018 trunc_prop_errs(int truncated)
4019 {
4020 	ASSERT(truncated != 0);
4021 
4022 	if (truncated == 1)
4023 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
4024 		    "1 more property could not be set\n"));
4025 	else
4026 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
4027 		    "%d more properties could not be set\n"), truncated);
4028 }
4029 
4030 static int
4031 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
4032 {
4033 	dmu_replay_record_t *drr;
4034 	void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
4035 	uint64_t payload_size;
4036 	char errbuf[ERRBUFLEN];
4037 
4038 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4039 	    "cannot receive"));
4040 
4041 	/* XXX would be great to use lseek if possible... */
4042 	drr = buf;
4043 
4044 	while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
4045 	    byteswap, NULL) == 0) {
4046 		if (byteswap)
4047 			drr->drr_type = BSWAP_32(drr->drr_type);
4048 
4049 		switch (drr->drr_type) {
4050 		case DRR_BEGIN:
4051 			if (drr->drr_payloadlen != 0) {
4052 				(void) recv_read(hdl, fd, buf,
4053 				    drr->drr_payloadlen, B_FALSE, NULL);
4054 			}
4055 			break;
4056 
4057 		case DRR_END:
4058 			free(buf);
4059 			return (0);
4060 
4061 		case DRR_OBJECT:
4062 			if (byteswap) {
4063 				drr->drr_u.drr_object.drr_bonuslen =
4064 				    BSWAP_32(drr->drr_u.drr_object.
4065 				    drr_bonuslen);
4066 				drr->drr_u.drr_object.drr_raw_bonuslen =
4067 				    BSWAP_32(drr->drr_u.drr_object.
4068 				    drr_raw_bonuslen);
4069 			}
4070 
4071 			payload_size =
4072 			    DRR_OBJECT_PAYLOAD_SIZE(&drr->drr_u.drr_object);
4073 			(void) recv_read(hdl, fd, buf, payload_size,
4074 			    B_FALSE, NULL);
4075 			break;
4076 
4077 		case DRR_WRITE:
4078 			if (byteswap) {
4079 				drr->drr_u.drr_write.drr_logical_size =
4080 				    BSWAP_64(
4081 				    drr->drr_u.drr_write.drr_logical_size);
4082 				drr->drr_u.drr_write.drr_compressed_size =
4083 				    BSWAP_64(
4084 				    drr->drr_u.drr_write.drr_compressed_size);
4085 			}
4086 			payload_size =
4087 			    DRR_WRITE_PAYLOAD_SIZE(&drr->drr_u.drr_write);
4088 			assert(payload_size <= SPA_MAXBLOCKSIZE);
4089 			(void) recv_read(hdl, fd, buf,
4090 			    payload_size, B_FALSE, NULL);
4091 			break;
4092 		case DRR_SPILL:
4093 			if (byteswap) {
4094 				drr->drr_u.drr_spill.drr_length =
4095 				    BSWAP_64(drr->drr_u.drr_spill.drr_length);
4096 				drr->drr_u.drr_spill.drr_compressed_size =
4097 				    BSWAP_64(drr->drr_u.drr_spill.
4098 				    drr_compressed_size);
4099 			}
4100 
4101 			payload_size =
4102 			    DRR_SPILL_PAYLOAD_SIZE(&drr->drr_u.drr_spill);
4103 			(void) recv_read(hdl, fd, buf, payload_size,
4104 			    B_FALSE, NULL);
4105 			break;
4106 		case DRR_WRITE_EMBEDDED:
4107 			if (byteswap) {
4108 				drr->drr_u.drr_write_embedded.drr_psize =
4109 				    BSWAP_32(drr->drr_u.drr_write_embedded.
4110 				    drr_psize);
4111 			}
4112 			(void) recv_read(hdl, fd, buf,
4113 			    P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
4114 			    8), B_FALSE, NULL);
4115 			break;
4116 		case DRR_OBJECT_RANGE:
4117 		case DRR_WRITE_BYREF:
4118 		case DRR_FREEOBJECTS:
4119 		case DRR_FREE:
4120 			break;
4121 
4122 		default:
4123 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4124 			    "invalid record type"));
4125 			free(buf);
4126 			return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
4127 		}
4128 	}
4129 
4130 	free(buf);
4131 	return (-1);
4132 }
4133 
4134 static void
4135 recv_ecksum_set_aux(libzfs_handle_t *hdl, const char *target_snap,
4136     boolean_t resumable, boolean_t checksum)
4137 {
4138 	char target_fs[ZFS_MAX_DATASET_NAME_LEN];
4139 
4140 	zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, (checksum ?
4141 	    "checksum mismatch" : "incomplete stream")));
4142 
4143 	if (!resumable)
4144 		return;
4145 	(void) strlcpy(target_fs, target_snap, sizeof (target_fs));
4146 	*strchr(target_fs, '@') = '\0';
4147 	zfs_handle_t *zhp = zfs_open(hdl, target_fs,
4148 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
4149 	if (zhp == NULL)
4150 		return;
4151 
4152 	char token_buf[ZFS_MAXPROPLEN];
4153 	int error = zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN,
4154 	    token_buf, sizeof (token_buf),
4155 	    NULL, NULL, 0, B_TRUE);
4156 	if (error == 0) {
4157 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4158 		    "checksum mismatch or incomplete stream.\n"
4159 		    "Partially received snapshot is saved.\n"
4160 		    "A resuming stream can be generated on the sending "
4161 		    "system by running:\n"
4162 		    "    zfs send -t %s"),
4163 		    token_buf);
4164 	}
4165 	zfs_close(zhp);
4166 }
4167 
4168 /*
4169  * Prepare a new nvlist of properties that are to override (-o) or be excluded
4170  * (-x) from the received dataset
4171  * recvprops: received properties from the send stream
4172  * cmdprops: raw input properties from command line
4173  * origprops: properties, both locally-set and received, currently set on the
4174  *            target dataset if it exists, NULL otherwise.
4175  * oxprops: valid output override (-o) and excluded (-x) properties
4176  */
4177 static int
4178 zfs_setup_cmdline_props(libzfs_handle_t *hdl, zfs_type_t type,
4179     char *fsname, boolean_t zoned, boolean_t recursive, boolean_t newfs,
4180     boolean_t raw, boolean_t toplevel, nvlist_t *recvprops, nvlist_t *cmdprops,
4181     nvlist_t *origprops, nvlist_t **oxprops, uint8_t **wkeydata_out,
4182     uint_t *wkeylen_out, const char *errbuf)
4183 {
4184 	nvpair_t *nvp;
4185 	nvlist_t *oprops, *voprops;
4186 	zfs_handle_t *zhp = NULL;
4187 	zpool_handle_t *zpool_hdl = NULL;
4188 	char *cp;
4189 	int ret = 0;
4190 	char namebuf[ZFS_MAX_DATASET_NAME_LEN];
4191 
4192 	if (nvlist_empty(cmdprops))
4193 		return (0); /* No properties to override or exclude */
4194 
4195 	*oxprops = fnvlist_alloc();
4196 	oprops = fnvlist_alloc();
4197 
4198 	strlcpy(namebuf, fsname, ZFS_MAX_DATASET_NAME_LEN);
4199 
4200 	/*
4201 	 * Get our dataset handle. The target dataset may not exist yet.
4202 	 */
4203 	if (zfs_dataset_exists(hdl, namebuf, ZFS_TYPE_DATASET)) {
4204 		zhp = zfs_open(hdl, namebuf, ZFS_TYPE_DATASET);
4205 		if (zhp == NULL) {
4206 			ret = -1;
4207 			goto error;
4208 		}
4209 	}
4210 
4211 	/* open the zpool handle */
4212 	cp = strchr(namebuf, '/');
4213 	if (cp != NULL)
4214 		*cp = '\0';
4215 	zpool_hdl = zpool_open(hdl, namebuf);
4216 	if (zpool_hdl == NULL) {
4217 		ret = -1;
4218 		goto error;
4219 	}
4220 
4221 	/* restore namebuf to match fsname for later use */
4222 	if (cp != NULL)
4223 		*cp = '/';
4224 
4225 	/*
4226 	 * first iteration: process excluded (-x) properties now and gather
4227 	 * added (-o) properties to be later processed by zfs_valid_proplist()
4228 	 */
4229 	nvp = NULL;
4230 	while ((nvp = nvlist_next_nvpair(cmdprops, nvp)) != NULL) {
4231 		const char *name = nvpair_name(nvp);
4232 		zfs_prop_t prop = zfs_name_to_prop(name);
4233 
4234 		/*
4235 		 * It turns out, if we don't normalize "aliased" names
4236 		 * e.g. compress= against the "real" names (e.g. compression)
4237 		 * here, then setting/excluding them does not work as
4238 		 * intended.
4239 		 *
4240 		 * But since user-defined properties wouldn't have a valid
4241 		 * mapping here, we do this conditional dance.
4242 		 */
4243 		const char *newname = name;
4244 		if (prop >= ZFS_PROP_TYPE)
4245 			newname = zfs_prop_to_name(prop);
4246 
4247 		/* "origin" is processed separately, don't handle it here */
4248 		if (prop == ZFS_PROP_ORIGIN)
4249 			continue;
4250 
4251 		/* raw streams can't override encryption properties */
4252 		if ((zfs_prop_encryption_key_param(prop) ||
4253 		    prop == ZFS_PROP_ENCRYPTION) && raw) {
4254 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4255 			    "encryption property '%s' cannot "
4256 			    "be set or excluded for raw streams."), name);
4257 			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4258 			goto error;
4259 		}
4260 
4261 		/*
4262 		 * For plain replicated send, we can ignore encryption
4263 		 * properties other than first stream
4264 		 */
4265 		if ((zfs_prop_encryption_key_param(prop) || prop ==
4266 		    ZFS_PROP_ENCRYPTION) && !newfs && recursive && !raw) {
4267 			continue;
4268 		}
4269 
4270 		/* incremental streams can only exclude encryption properties */
4271 		if ((zfs_prop_encryption_key_param(prop) ||
4272 		    prop == ZFS_PROP_ENCRYPTION) && !newfs &&
4273 		    nvpair_type(nvp) != DATA_TYPE_BOOLEAN) {
4274 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4275 			    "encryption property '%s' cannot "
4276 			    "be set for incremental streams."), name);
4277 			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4278 			goto error;
4279 		}
4280 
4281 		switch (nvpair_type(nvp)) {
4282 		case DATA_TYPE_BOOLEAN: /* -x property */
4283 			/*
4284 			 * DATA_TYPE_BOOLEAN is the way we're asked to "exclude"
4285 			 * a property: this is done by forcing an explicit
4286 			 * inherit on the destination so the effective value is
4287 			 * not the one we received from the send stream.
4288 			 */
4289 			if (!zfs_prop_valid_for_type(prop, type, B_FALSE) &&
4290 			    !zfs_prop_user(name)) {
4291 				(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
4292 				    "Warning: %s: property '%s' does not "
4293 				    "apply to datasets of this type\n"),
4294 				    fsname, name);
4295 				continue;
4296 			}
4297 			/*
4298 			 * We do this only if the property is not already
4299 			 * locally-set, in which case its value will take
4300 			 * priority over the received anyway.
4301 			 */
4302 			if (nvlist_exists(origprops, newname)) {
4303 				nvlist_t *attrs;
4304 				const char *source = NULL;
4305 
4306 				attrs = fnvlist_lookup_nvlist(origprops,
4307 				    newname);
4308 				if (nvlist_lookup_string(attrs,
4309 				    ZPROP_SOURCE, &source) == 0 &&
4310 				    strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0)
4311 					continue;
4312 			}
4313 			/*
4314 			 * We can't force an explicit inherit on non-inheritable
4315 			 * properties: if we're asked to exclude this kind of
4316 			 * values we remove them from "recvprops" input nvlist.
4317 			 */
4318 			if (!zfs_prop_user(name) && /* can be inherited too */
4319 			    !zfs_prop_inheritable(prop) &&
4320 			    nvlist_exists(recvprops, newname))
4321 				fnvlist_remove(recvprops, newname);
4322 			else
4323 				fnvlist_add_boolean(*oxprops, newname);
4324 			break;
4325 		case DATA_TYPE_STRING: /* -o property=value */
4326 			/*
4327 			 * we're trying to override a property that does not
4328 			 * make sense for this type of dataset, but we don't
4329 			 * want to fail if the receive is recursive: this comes
4330 			 * in handy when the send stream contains, for
4331 			 * instance, a child ZVOL and we're trying to receive
4332 			 * it with "-o atime=on"
4333 			 */
4334 			if (!zfs_prop_valid_for_type(prop, type, B_FALSE) &&
4335 			    !zfs_prop_user(name)) {
4336 				if (recursive)
4337 					continue;
4338 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4339 				    "property '%s' does not apply to datasets "
4340 				    "of this type"), name);
4341 				ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4342 				goto error;
4343 			}
4344 			fnvlist_add_string(oprops, newname,
4345 			    fnvpair_value_string(nvp));
4346 			break;
4347 		default:
4348 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4349 			    "property '%s' must be a string or boolean"), name);
4350 			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4351 			goto error;
4352 		}
4353 	}
4354 
4355 	if (toplevel) {
4356 		/* convert override strings properties to native */
4357 		if ((voprops = zfs_valid_proplist(hdl, ZFS_TYPE_DATASET,
4358 		    oprops, zoned, zhp, zpool_hdl, B_FALSE, errbuf)) == NULL) {
4359 			ret = zfs_error(hdl, EZFS_BADPROP, errbuf);
4360 			goto error;
4361 		}
4362 
4363 		/*
4364 		 * zfs_crypto_create() requires the parent name. Get it
4365 		 * by truncating the fsname copy stored in namebuf.
4366 		 */
4367 		cp = strrchr(namebuf, '/');
4368 		if (cp != NULL)
4369 			*cp = '\0';
4370 
4371 		if (!raw && !(!newfs && recursive) &&
4372 		    zfs_crypto_create(hdl, namebuf, voprops, NULL,
4373 		    B_FALSE, wkeydata_out, wkeylen_out) != 0) {
4374 			fnvlist_free(voprops);
4375 			ret = zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
4376 			goto error;
4377 		}
4378 
4379 		/* second pass: process "-o" properties */
4380 		fnvlist_merge(*oxprops, voprops);
4381 		fnvlist_free(voprops);
4382 	} else {
4383 		/* override props on child dataset are inherited */
4384 		nvp = NULL;
4385 		while ((nvp = nvlist_next_nvpair(oprops, nvp)) != NULL) {
4386 			const char *name = nvpair_name(nvp);
4387 			fnvlist_add_boolean(*oxprops, name);
4388 		}
4389 	}
4390 
4391 error:
4392 	if (zhp != NULL)
4393 		zfs_close(zhp);
4394 	if (zpool_hdl != NULL)
4395 		zpool_close(zpool_hdl);
4396 	fnvlist_free(oprops);
4397 	return (ret);
4398 }
4399 
4400 /*
4401  * Restores a backup of tosnap from the file descriptor specified by infd.
4402  */
4403 static int
4404 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
4405     const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
4406     dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
4407     avl_tree_t *stream_avl, char **top_zfs,
4408     const char *finalsnap, nvlist_t *cmdprops)
4409 {
4410 	struct timespec begin_time;
4411 	int ioctl_err, ioctl_errno, err;
4412 	char *cp;
4413 	struct drr_begin *drrb = &drr->drr_u.drr_begin;
4414 	char errbuf[ERRBUFLEN];
4415 	const char *chopprefix;
4416 	boolean_t newfs = B_FALSE;
4417 	boolean_t stream_wantsnewfs, stream_resumingnewfs;
4418 	boolean_t newprops = B_FALSE;
4419 	uint64_t read_bytes = 0;
4420 	uint64_t errflags = 0;
4421 	uint64_t parent_snapguid = 0;
4422 	prop_changelist_t *clp = NULL;
4423 	nvlist_t *snapprops_nvlist = NULL;
4424 	nvlist_t *snapholds_nvlist = NULL;
4425 	zprop_errflags_t prop_errflags;
4426 	nvlist_t *prop_errors = NULL;
4427 	boolean_t recursive;
4428 	const char *snapname = NULL;
4429 	char destsnap[MAXPATHLEN * 2];
4430 	char origin[MAXNAMELEN] = {0};
4431 	char name[MAXPATHLEN];
4432 	char tmp_keylocation[MAXNAMELEN] = {0};
4433 	nvlist_t *rcvprops = NULL; /* props received from the send stream */
4434 	nvlist_t *oxprops = NULL; /* override (-o) and exclude (-x) props */
4435 	nvlist_t *origprops = NULL; /* original props (if destination exists) */
4436 	zfs_type_t type = ZFS_TYPE_INVALID;
4437 	boolean_t toplevel = B_FALSE;
4438 	boolean_t zoned = B_FALSE;
4439 	boolean_t hastoken = B_FALSE;
4440 	boolean_t redacted;
4441 	uint8_t *wkeydata = NULL;
4442 	uint_t wkeylen = 0;
4443 
4444 #ifndef CLOCK_MONOTONIC_RAW
4445 #define	CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC
4446 #endif
4447 	clock_gettime(CLOCK_MONOTONIC_RAW, &begin_time);
4448 
4449 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4450 	    "cannot receive"));
4451 
4452 	recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
4453 	    ENOENT);
4454 
4455 	/* Did the user request holds be skipped via zfs recv -k? */
4456 	boolean_t holds = flags->holds && !flags->skipholds;
4457 
4458 	if (stream_avl != NULL) {
4459 		const char *keylocation = NULL;
4460 		nvlist_t *lookup = NULL;
4461 		nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
4462 		    &snapname);
4463 
4464 		(void) nvlist_lookup_uint64(fs, "parentfromsnap",
4465 		    &parent_snapguid);
4466 		err = nvlist_lookup_nvlist(fs, "props", &rcvprops);
4467 		if (err) {
4468 			rcvprops = fnvlist_alloc();
4469 			newprops = B_TRUE;
4470 		}
4471 
4472 		/*
4473 		 * The keylocation property may only be set on encryption roots,
4474 		 * but this dataset might not become an encryption root until
4475 		 * recv_fix_encryption_hierarchy() is called. That function
4476 		 * will fixup the keylocation anyway, so we temporarily unset
4477 		 * the keylocation for now to avoid any errors from the receive
4478 		 * ioctl.
4479 		 */
4480 		err = nvlist_lookup_string(rcvprops,
4481 		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION), &keylocation);
4482 		if (err == 0) {
4483 			strlcpy(tmp_keylocation, keylocation, MAXNAMELEN);
4484 			(void) nvlist_remove_all(rcvprops,
4485 			    zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
4486 		}
4487 
4488 		if (flags->canmountoff) {
4489 			fnvlist_add_uint64(rcvprops,
4490 			    zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0);
4491 		} else if (newprops) {	/* nothing in rcvprops, eliminate it */
4492 			fnvlist_free(rcvprops);
4493 			rcvprops = NULL;
4494 			newprops = B_FALSE;
4495 		}
4496 		if (0 == nvlist_lookup_nvlist(fs, "snapprops", &lookup)) {
4497 			snapprops_nvlist = fnvlist_lookup_nvlist(lookup,
4498 			    snapname);
4499 		}
4500 		if (holds) {
4501 			if (0 == nvlist_lookup_nvlist(fs, "snapholds",
4502 			    &lookup)) {
4503 				snapholds_nvlist = fnvlist_lookup_nvlist(
4504 				    lookup, snapname);
4505 			}
4506 		}
4507 	}
4508 
4509 	cp = NULL;
4510 
4511 	/*
4512 	 * Determine how much of the snapshot name stored in the stream
4513 	 * we are going to tack on to the name they specified on the
4514 	 * command line, and how much we are going to chop off.
4515 	 *
4516 	 * If they specified a snapshot, chop the entire name stored in
4517 	 * the stream.
4518 	 */
4519 	if (flags->istail) {
4520 		/*
4521 		 * A filesystem was specified with -e. We want to tack on only
4522 		 * the tail of the sent snapshot path.
4523 		 */
4524 		if (strchr(tosnap, '@')) {
4525 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
4526 			    "argument - snapshot not allowed with -e"));
4527 			err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4528 			goto out;
4529 		}
4530 
4531 		chopprefix = strrchr(sendfs, '/');
4532 
4533 		if (chopprefix == NULL) {
4534 			/*
4535 			 * The tail is the poolname, so we need to
4536 			 * prepend a path separator.
4537 			 */
4538 			int len = strlen(drrb->drr_toname);
4539 			cp = umem_alloc(len + 2, UMEM_NOFAIL);
4540 			cp[0] = '/';
4541 			(void) strcpy(&cp[1], drrb->drr_toname);
4542 			chopprefix = cp;
4543 		} else {
4544 			chopprefix = drrb->drr_toname + (chopprefix - sendfs);
4545 		}
4546 	} else if (flags->isprefix) {
4547 		/*
4548 		 * A filesystem was specified with -d. We want to tack on
4549 		 * everything but the first element of the sent snapshot path
4550 		 * (all but the pool name).
4551 		 */
4552 		if (strchr(tosnap, '@')) {
4553 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
4554 			    "argument - snapshot not allowed with -d"));
4555 			err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4556 			goto out;
4557 		}
4558 
4559 		chopprefix = strchr(drrb->drr_toname, '/');
4560 		if (chopprefix == NULL)
4561 			chopprefix = strchr(drrb->drr_toname, '@');
4562 	} else if (strchr(tosnap, '@') == NULL) {
4563 		/*
4564 		 * If a filesystem was specified without -d or -e, we want to
4565 		 * tack on everything after the fs specified by 'zfs send'.
4566 		 */
4567 		chopprefix = drrb->drr_toname + strlen(sendfs);
4568 	} else {
4569 		/* A snapshot was specified as an exact path (no -d or -e). */
4570 		if (recursive) {
4571 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4572 			    "cannot specify snapshot name for multi-snapshot "
4573 			    "stream"));
4574 			err = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4575 			goto out;
4576 		}
4577 		chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
4578 	}
4579 
4580 	ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
4581 	ASSERT(chopprefix > drrb->drr_toname || strchr(sendfs, '/') == NULL);
4582 	ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname) ||
4583 	    strchr(sendfs, '/') == NULL);
4584 	ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
4585 	    chopprefix[0] == '\0');
4586 
4587 	/*
4588 	 * Determine name of destination snapshot.
4589 	 */
4590 	(void) strlcpy(destsnap, tosnap, sizeof (destsnap));
4591 	(void) strlcat(destsnap, chopprefix, sizeof (destsnap));
4592 	if (cp != NULL)
4593 		umem_free(cp, strlen(cp) + 1);
4594 	if (!zfs_name_valid(destsnap, ZFS_TYPE_SNAPSHOT)) {
4595 		err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4596 		goto out;
4597 	}
4598 
4599 	/*
4600 	 * Determine the name of the origin snapshot.
4601 	 */
4602 	if (originsnap) {
4603 		(void) strlcpy(origin, originsnap, sizeof (origin));
4604 		if (flags->verbose)
4605 			(void) printf("using provided clone origin %s\n",
4606 			    origin);
4607 	} else if (drrb->drr_flags & DRR_FLAG_CLONE) {
4608 		if (guid_to_name(hdl, destsnap,
4609 		    drrb->drr_fromguid, B_FALSE, origin) != 0) {
4610 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4611 			    "local origin for clone %s does not exist"),
4612 			    destsnap);
4613 			err = zfs_error(hdl, EZFS_NOENT, errbuf);
4614 			goto out;
4615 		}
4616 		if (flags->verbose)
4617 			(void) printf("found clone origin %s\n", origin);
4618 	}
4619 
4620 	if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4621 	    DMU_BACKUP_FEATURE_DEDUP)) {
4622 		(void) fprintf(stderr,
4623 		    gettext("ERROR: \"zfs receive\" no longer supports "
4624 		    "deduplicated send streams.  Use\n"
4625 		    "the \"zstream redup\" command to convert this stream "
4626 		    "to a regular,\n"
4627 		    "non-deduplicated stream.\n"));
4628 		err = zfs_error(hdl, EZFS_NOTSUP, errbuf);
4629 		goto out;
4630 	}
4631 
4632 	boolean_t resuming = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4633 	    DMU_BACKUP_FEATURE_RESUMING;
4634 	boolean_t raw = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4635 	    DMU_BACKUP_FEATURE_RAW;
4636 	boolean_t embedded = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4637 	    DMU_BACKUP_FEATURE_EMBED_DATA;
4638 	stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
4639 	    (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && !resuming;
4640 	stream_resumingnewfs = (drrb->drr_fromguid == 0 ||
4641 	    (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap) && resuming;
4642 
4643 	if (stream_wantsnewfs) {
4644 		/*
4645 		 * if the parent fs does not exist, look for it based on
4646 		 * the parent snap GUID
4647 		 */
4648 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
4649 		    "cannot receive new filesystem stream"));
4650 
4651 		(void) strlcpy(name, destsnap, sizeof (name));
4652 		cp = strrchr(name, '/');
4653 		if (cp)
4654 			*cp = '\0';
4655 		if (cp &&
4656 		    !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
4657 			char suffix[ZFS_MAX_DATASET_NAME_LEN];
4658 			(void) strlcpy(suffix, strrchr(destsnap, '/'),
4659 			    sizeof (suffix));
4660 			if (guid_to_name(hdl, name, parent_snapguid,
4661 			    B_FALSE, destsnap) == 0) {
4662 				*strchr(destsnap, '@') = '\0';
4663 				(void) strlcat(destsnap, suffix,
4664 				    sizeof (destsnap));
4665 			}
4666 		}
4667 	} else {
4668 		/*
4669 		 * If the fs does not exist, look for it based on the
4670 		 * fromsnap GUID.
4671 		 */
4672 		if (resuming) {
4673 			(void) snprintf(errbuf, sizeof (errbuf),
4674 			    dgettext(TEXT_DOMAIN,
4675 			    "cannot receive resume stream"));
4676 		} else {
4677 			(void) snprintf(errbuf, sizeof (errbuf),
4678 			    dgettext(TEXT_DOMAIN,
4679 			    "cannot receive incremental stream"));
4680 		}
4681 
4682 		(void) strlcpy(name, destsnap, sizeof (name));
4683 		*strchr(name, '@') = '\0';
4684 
4685 		/*
4686 		 * If the exact receive path was specified and this is the
4687 		 * topmost path in the stream, then if the fs does not exist we
4688 		 * should look no further.
4689 		 */
4690 		if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
4691 		    strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
4692 		    !zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
4693 			char snap[ZFS_MAX_DATASET_NAME_LEN];
4694 			(void) strlcpy(snap, strchr(destsnap, '@'),
4695 			    sizeof (snap));
4696 			if (guid_to_name(hdl, name, drrb->drr_fromguid,
4697 			    B_FALSE, destsnap) == 0) {
4698 				*strchr(destsnap, '@') = '\0';
4699 				(void) strlcat(destsnap, snap,
4700 				    sizeof (destsnap));
4701 			}
4702 		}
4703 	}
4704 
4705 	(void) strlcpy(name, destsnap, sizeof (name));
4706 	*strchr(name, '@') = '\0';
4707 
4708 	redacted = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
4709 	    DMU_BACKUP_FEATURE_REDACTED;
4710 
4711 	if (flags->heal) {
4712 		if (flags->isprefix || flags->istail || flags->force ||
4713 		    flags->canmountoff || flags->resumable || flags->nomount ||
4714 		    flags->skipholds) {
4715 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4716 			    "corrective recv can not be used when combined with"
4717 			    " this flag"));
4718 			err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4719 			goto out;
4720 		}
4721 		uint64_t guid =
4722 		    get_snap_guid(hdl, name, strchr(destsnap, '@') + 1);
4723 		if (guid == 0) {
4724 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4725 			    "corrective recv must specify an existing snapshot"
4726 			    " to heal"));
4727 			err = zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
4728 			goto out;
4729 		} else if (guid != drrb->drr_toguid) {
4730 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4731 			    "local snapshot doesn't match the snapshot"
4732 			    " in the provided stream"));
4733 			err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
4734 			goto out;
4735 		}
4736 	} else if (zfs_dataset_exists(hdl, name, ZFS_TYPE_DATASET)) {
4737 		zfs_cmd_t zc = {"\0"};
4738 		zfs_handle_t *zhp = NULL;
4739 		boolean_t encrypted;
4740 
4741 		(void) strcpy(zc.zc_name, name);
4742 
4743 		/*
4744 		 * Destination fs exists.  It must be one of these cases:
4745 		 *  - an incremental send stream
4746 		 *  - the stream specifies a new fs (full stream or clone)
4747 		 *    and they want us to blow away the existing fs (and
4748 		 *    have therefore specified -F and removed any snapshots)
4749 		 *  - we are resuming a failed receive.
4750 		 */
4751 		if (stream_wantsnewfs) {
4752 			boolean_t is_volume = drrb->drr_type == DMU_OST_ZVOL;
4753 			if (!flags->force) {
4754 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4755 				    "destination '%s' exists\n"
4756 				    "must specify -F to overwrite it"), name);
4757 				err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4758 				goto out;
4759 			}
4760 			if (zfs_ioctl(hdl, ZFS_IOC_SNAPSHOT_LIST_NEXT,
4761 			    &zc) == 0) {
4762 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4763 				    "destination has snapshots (eg. %s)\n"
4764 				    "must destroy them to overwrite it"),
4765 				    zc.zc_name);
4766 				err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4767 				goto out;
4768 			}
4769 			if (is_volume && strrchr(name, '/') == NULL) {
4770 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4771 				    "destination %s is the root dataset\n"
4772 				    "cannot overwrite with a ZVOL"),
4773 				    name);
4774 				err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4775 				goto out;
4776 			}
4777 			if (is_volume &&
4778 			    zfs_ioctl(hdl, ZFS_IOC_DATASET_LIST_NEXT,
4779 			    &zc) == 0) {
4780 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4781 				    "destination has children (eg. %s)\n"
4782 				    "cannot overwrite with a ZVOL"),
4783 				    zc.zc_name);
4784 				err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
4785 				goto out;
4786 			}
4787 		}
4788 
4789 		if ((zhp = zfs_open(hdl, name,
4790 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
4791 			err = -1;
4792 			goto out;
4793 		}
4794 
4795 		/*
4796 		 * When receiving full/newfs on existing dataset, then it
4797 		 * should be done with "-F" flag. Its enforced for initial
4798 		 * receive in previous checks in this function.
4799 		 * Similarly, on resuming full/newfs recv on existing dataset,
4800 		 * it should be done with "-F" flag.
4801 		 *
4802 		 * When dataset doesn't exist, then full/newfs recv is done on
4803 		 * newly created dataset and it's marked INCONSISTENT. But
4804 		 * When receiving on existing dataset, recv is first done on
4805 		 * %recv and its marked INCONSISTENT. Existing dataset is not
4806 		 * marked INCONSISTENT.
4807 		 * Resume of full/newfs receive with dataset not INCONSISTENT
4808 		 * indicates that its resuming newfs on existing dataset. So,
4809 		 * enforce "-F" flag in this case.
4810 		 */
4811 		if (stream_resumingnewfs &&
4812 		    !zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT) &&
4813 		    !flags->force) {
4814 			zfs_close(zhp);
4815 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4816 			    "Resuming recv on existing destination '%s'\n"
4817 			    "must specify -F to overwrite it"), name);
4818 			err = zfs_error(hdl, EZFS_RESUME_EXISTS, errbuf);
4819 			goto out;
4820 		}
4821 
4822 		if (stream_wantsnewfs &&
4823 		    zhp->zfs_dmustats.dds_origin[0]) {
4824 			zfs_close(zhp);
4825 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4826 			    "destination '%s' is a clone\n"
4827 			    "must destroy it to overwrite it"), name);
4828 			err = zfs_error(hdl, EZFS_EXISTS, errbuf);
4829 			goto out;
4830 		}
4831 
4832 		/*
4833 		 * Raw sends can not be performed as an incremental on top
4834 		 * of existing unencrypted datasets. zfs recv -F can't be
4835 		 * used to blow away an existing encrypted filesystem. This
4836 		 * is because it would require the dsl dir to point to the
4837 		 * new key (or lack of a key) and the old key at the same
4838 		 * time. The -F flag may still be used for deleting
4839 		 * intermediate snapshots that would otherwise prevent the
4840 		 * receive from working.
4841 		 */
4842 		encrypted = zfs_prop_get_int(zhp, ZFS_PROP_ENCRYPTION) !=
4843 		    ZIO_CRYPT_OFF;
4844 		if (!stream_wantsnewfs && !encrypted && raw) {
4845 			zfs_close(zhp);
4846 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4847 			    "cannot perform raw receive on top of "
4848 			    "existing unencrypted dataset"));
4849 			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4850 			goto out;
4851 		}
4852 
4853 		if (stream_wantsnewfs && flags->force &&
4854 		    ((raw && !encrypted) || encrypted)) {
4855 			zfs_close(zhp);
4856 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4857 			    "zfs receive -F cannot be used to destroy an "
4858 			    "encrypted filesystem or overwrite an "
4859 			    "unencrypted one with an encrypted one"));
4860 			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4861 			goto out;
4862 		}
4863 
4864 		if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
4865 		    (stream_wantsnewfs || stream_resumingnewfs)) {
4866 			/* We can't do online recv in this case */
4867 			clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
4868 			    flags->forceunmount ? MS_FORCE : 0);
4869 			if (clp == NULL) {
4870 				zfs_close(zhp);
4871 				err = -1;
4872 				goto out;
4873 			}
4874 			if (changelist_prefix(clp) != 0) {
4875 				changelist_free(clp);
4876 				zfs_close(zhp);
4877 				err = -1;
4878 				goto out;
4879 			}
4880 		}
4881 
4882 		/*
4883 		 * If we are resuming a newfs, set newfs here so that we will
4884 		 * mount it if the recv succeeds this time.  We can tell
4885 		 * that it was a newfs on the first recv because the fs
4886 		 * itself will be inconsistent (if the fs existed when we
4887 		 * did the first recv, we would have received it into
4888 		 * .../%recv).
4889 		 */
4890 		if (resuming && zfs_prop_get_int(zhp, ZFS_PROP_INCONSISTENT))
4891 			newfs = B_TRUE;
4892 
4893 		/* we want to know if we're zoned when validating -o|-x props */
4894 		zoned = zfs_prop_get_int(zhp, ZFS_PROP_ZONED);
4895 
4896 		/* may need this info later, get it now we have zhp around */
4897 		if (zfs_prop_get(zhp, ZFS_PROP_RECEIVE_RESUME_TOKEN, NULL, 0,
4898 		    NULL, NULL, 0, B_TRUE) == 0)
4899 			hastoken = B_TRUE;
4900 
4901 		/* gather existing properties on destination */
4902 		origprops = fnvlist_alloc();
4903 		fnvlist_merge(origprops, zhp->zfs_props);
4904 		fnvlist_merge(origprops, zhp->zfs_user_props);
4905 
4906 		zfs_close(zhp);
4907 	} else {
4908 		zfs_handle_t *zhp;
4909 
4910 		/*
4911 		 * Destination filesystem does not exist.  Therefore we better
4912 		 * be creating a new filesystem (either from a full backup, or
4913 		 * a clone).  It would therefore be invalid if the user
4914 		 * specified only the pool name (i.e. if the destination name
4915 		 * contained no slash character).
4916 		 */
4917 		cp = strrchr(name, '/');
4918 
4919 		if (!stream_wantsnewfs || cp == NULL) {
4920 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4921 			    "destination '%s' does not exist"), name);
4922 			err = zfs_error(hdl, EZFS_NOENT, errbuf);
4923 			goto out;
4924 		}
4925 
4926 		/*
4927 		 * Trim off the final dataset component so we perform the
4928 		 * recvbackup ioctl to the filesystems's parent.
4929 		 */
4930 		*cp = '\0';
4931 
4932 		if (flags->isprefix && !flags->istail && !flags->dryrun &&
4933 		    create_parents(hdl, destsnap, strlen(tosnap)) != 0) {
4934 			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4935 			goto out;
4936 		}
4937 
4938 		/* validate parent */
4939 		zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
4940 		if (zhp == NULL) {
4941 			err = zfs_error(hdl, EZFS_BADRESTORE, errbuf);
4942 			goto out;
4943 		}
4944 		if (zfs_get_type(zhp) != ZFS_TYPE_FILESYSTEM) {
4945 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4946 			    "parent '%s' is not a filesystem"), name);
4947 			err = zfs_error(hdl, EZFS_WRONG_PARENT, errbuf);
4948 			zfs_close(zhp);
4949 			goto out;
4950 		}
4951 
4952 		zfs_close(zhp);
4953 
4954 		newfs = B_TRUE;
4955 		*cp = '/';
4956 	}
4957 
4958 	if (flags->verbose) {
4959 		(void) printf("%s %s%s stream of %s into %s\n",
4960 		    flags->dryrun ? "would receive" : "receiving",
4961 		    flags->heal ? "corrective " : "",
4962 		    drrb->drr_fromguid ? "incremental" : "full",
4963 		    drrb->drr_toname, destsnap);
4964 		(void) fflush(stdout);
4965 	}
4966 
4967 	/*
4968 	 * If this is the top-level dataset, record it so we can use it
4969 	 * for recursive operations later.
4970 	 */
4971 	if (top_zfs != NULL &&
4972 	    (*top_zfs == NULL || strcmp(*top_zfs, name) == 0)) {
4973 		toplevel = B_TRUE;
4974 		if (*top_zfs == NULL)
4975 			*top_zfs = zfs_strdup(hdl, name);
4976 	}
4977 
4978 	if (drrb->drr_type == DMU_OST_ZVOL) {
4979 		type = ZFS_TYPE_VOLUME;
4980 	} else if (drrb->drr_type == DMU_OST_ZFS) {
4981 		type = ZFS_TYPE_FILESYSTEM;
4982 	} else {
4983 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
4984 		    "invalid record type: 0x%d"), drrb->drr_type);
4985 		err = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
4986 		goto out;
4987 	}
4988 	if ((err = zfs_setup_cmdline_props(hdl, type, name, zoned, recursive,
4989 	    stream_wantsnewfs, raw, toplevel, rcvprops, cmdprops, origprops,
4990 	    &oxprops, &wkeydata, &wkeylen, errbuf)) != 0)
4991 		goto out;
4992 
4993 	/*
4994 	 * When sending with properties (zfs send -p), the encryption property
4995 	 * is not included because it is a SETONCE property and therefore
4996 	 * treated as read only. However, we are always able to determine its
4997 	 * value because raw sends will include it in the DRR_BDEGIN payload
4998 	 * and non-raw sends with properties are not allowed for encrypted
4999 	 * datasets. Therefore, if this is a non-raw properties stream, we can
5000 	 * infer that the value should be ZIO_CRYPT_OFF and manually add that
5001 	 * to the received properties.
5002 	 */
5003 	if (stream_wantsnewfs && !raw && rcvprops != NULL &&
5004 	    !nvlist_exists(cmdprops, zfs_prop_to_name(ZFS_PROP_ENCRYPTION))) {
5005 		if (oxprops == NULL)
5006 			oxprops = fnvlist_alloc();
5007 		fnvlist_add_uint64(oxprops,
5008 		    zfs_prop_to_name(ZFS_PROP_ENCRYPTION), ZIO_CRYPT_OFF);
5009 	}
5010 
5011 	if (flags->dryrun) {
5012 		void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
5013 
5014 		/*
5015 		 * We have read the DRR_BEGIN record, but we have
5016 		 * not yet read the payload. For non-dryrun sends
5017 		 * this will be done by the kernel, so we must
5018 		 * emulate that here, before attempting to read
5019 		 * more records.
5020 		 */
5021 		err = recv_read(hdl, infd, buf, drr->drr_payloadlen,
5022 		    flags->byteswap, NULL);
5023 		free(buf);
5024 		if (err != 0)
5025 			goto out;
5026 
5027 		err = recv_skip(hdl, infd, flags->byteswap);
5028 		goto out;
5029 	}
5030 
5031 	if (flags->heal) {
5032 		err = ioctl_err = lzc_receive_with_heal(destsnap, rcvprops,
5033 		    oxprops, wkeydata, wkeylen, origin, flags->force,
5034 		    flags->heal, flags->resumable, raw, infd, drr_noswap, -1,
5035 		    &read_bytes, &errflags, NULL, &prop_errors);
5036 	} else {
5037 		err = ioctl_err = lzc_receive_with_cmdprops(destsnap, rcvprops,
5038 		    oxprops, wkeydata, wkeylen, origin, flags->force,
5039 		    flags->resumable, raw, infd, drr_noswap, -1, &read_bytes,
5040 		    &errflags, NULL, &prop_errors);
5041 	}
5042 	ioctl_errno = ioctl_err;
5043 	prop_errflags = errflags;
5044 
5045 	if (err == 0) {
5046 		nvpair_t *prop_err = NULL;
5047 
5048 		while ((prop_err = nvlist_next_nvpair(prop_errors,
5049 		    prop_err)) != NULL) {
5050 			char tbuf[1024];
5051 			zfs_prop_t prop;
5052 			int intval;
5053 
5054 			prop = zfs_name_to_prop(nvpair_name(prop_err));
5055 			(void) nvpair_value_int32(prop_err, &intval);
5056 			if (strcmp(nvpair_name(prop_err),
5057 			    ZPROP_N_MORE_ERRORS) == 0) {
5058 				trunc_prop_errs(intval);
5059 				break;
5060 			} else if (snapname == NULL || finalsnap == NULL ||
5061 			    strcmp(finalsnap, snapname) == 0 ||
5062 			    strcmp(nvpair_name(prop_err),
5063 			    zfs_prop_to_name(ZFS_PROP_REFQUOTA)) != 0) {
5064 				/*
5065 				 * Skip the special case of, for example,
5066 				 * "refquota", errors on intermediate
5067 				 * snapshots leading up to a final one.
5068 				 * That's why we have all of the checks above.
5069 				 *
5070 				 * See zfs_ioctl.c's extract_delay_props() for
5071 				 * a list of props which can fail on
5072 				 * intermediate snapshots, but shouldn't
5073 				 * affect the overall receive.
5074 				 */
5075 				(void) snprintf(tbuf, sizeof (tbuf),
5076 				    dgettext(TEXT_DOMAIN,
5077 				    "cannot receive %s property on %s"),
5078 				    nvpair_name(prop_err), name);
5079 				zfs_setprop_error(hdl, prop, intval, tbuf);
5080 			}
5081 		}
5082 	}
5083 
5084 	if (err == 0 && snapprops_nvlist) {
5085 		zfs_cmd_t zc = {"\0"};
5086 
5087 		(void) strlcpy(zc.zc_name, destsnap, sizeof (zc.zc_name));
5088 		zc.zc_cookie = B_TRUE; /* received */
5089 		zcmd_write_src_nvlist(hdl, &zc, snapprops_nvlist);
5090 		(void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc);
5091 		zcmd_free_nvlists(&zc);
5092 	}
5093 	if (err == 0 && snapholds_nvlist) {
5094 		nvpair_t *pair;
5095 		nvlist_t *holds, *errors = NULL;
5096 		int cleanup_fd = -1;
5097 
5098 		VERIFY(0 == nvlist_alloc(&holds, 0, KM_SLEEP));
5099 		for (pair = nvlist_next_nvpair(snapholds_nvlist, NULL);
5100 		    pair != NULL;
5101 		    pair = nvlist_next_nvpair(snapholds_nvlist, pair)) {
5102 			fnvlist_add_string(holds, destsnap, nvpair_name(pair));
5103 		}
5104 		(void) lzc_hold(holds, cleanup_fd, &errors);
5105 		fnvlist_free(snapholds_nvlist);
5106 		fnvlist_free(holds);
5107 	}
5108 
5109 	if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
5110 		/*
5111 		 * It may be that this snapshot already exists,
5112 		 * in which case we want to consume & ignore it
5113 		 * rather than failing.
5114 		 */
5115 		avl_tree_t *local_avl;
5116 		nvlist_t *local_nv, *fs;
5117 		cp = strchr(destsnap, '@');
5118 
5119 		/*
5120 		 * XXX Do this faster by just iterating over snaps in
5121 		 * this fs.  Also if zc_value does not exist, we will
5122 		 * get a strange "does not exist" error message.
5123 		 */
5124 		*cp = '\0';
5125 		if (gather_nvlist(hdl, destsnap, NULL, NULL, B_FALSE, B_TRUE,
5126 		    B_FALSE, B_FALSE, B_FALSE, B_FALSE, B_FALSE, B_FALSE,
5127 		    B_TRUE, &local_nv, &local_avl) == 0) {
5128 			*cp = '@';
5129 			fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
5130 			fsavl_destroy(local_avl);
5131 			fnvlist_free(local_nv);
5132 
5133 			if (fs != NULL) {
5134 				if (flags->verbose) {
5135 					(void) printf("snap %s already exists; "
5136 					    "ignoring\n", destsnap);
5137 				}
5138 				err = ioctl_err = recv_skip(hdl, infd,
5139 				    flags->byteswap);
5140 			}
5141 		}
5142 		*cp = '@';
5143 	}
5144 
5145 	if (ioctl_err != 0) {
5146 		switch (ioctl_errno) {
5147 		case ENODEV:
5148 			cp = strchr(destsnap, '@');
5149 			*cp = '\0';
5150 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5151 			    "most recent snapshot of %s does not\n"
5152 			    "match incremental source"), destsnap);
5153 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
5154 			*cp = '@';
5155 			break;
5156 		case ETXTBSY:
5157 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5158 			    "destination %s has been modified\n"
5159 			    "since most recent snapshot"), name);
5160 			(void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
5161 			break;
5162 		case EACCES:
5163 			if (flags->heal) {
5164 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5165 				    "key must be loaded to do a non-raw "
5166 				    "corrective recv on an encrypted "
5167 				    "dataset."));
5168 			} else if (raw && stream_wantsnewfs) {
5169 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5170 				    "failed to create encryption key"));
5171 			} else if (raw && !stream_wantsnewfs) {
5172 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5173 				    "encryption key does not match "
5174 				    "existing key"));
5175 			} else {
5176 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5177 				    "inherited key must be loaded"));
5178 			}
5179 			(void) zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
5180 			break;
5181 		case EEXIST:
5182 			cp = strchr(destsnap, '@');
5183 			if (newfs) {
5184 				/* it's the containing fs that exists */
5185 				*cp = '\0';
5186 			}
5187 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5188 			    "destination already exists"));
5189 			(void) zfs_error_fmt(hdl, EZFS_EXISTS,
5190 			    dgettext(TEXT_DOMAIN, "cannot restore to %s"),
5191 			    destsnap);
5192 			*cp = '@';
5193 			break;
5194 		case EINVAL:
5195 			if (embedded && !raw) {
5196 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5197 				    "incompatible embedded data stream "
5198 				    "feature with encrypted receive."));
5199 			} else if (flags->resumable) {
5200 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5201 				    "kernel modules must be upgraded to "
5202 				    "receive this stream."));
5203 			}
5204 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5205 			break;
5206 		case ECKSUM:
5207 		case ZFS_ERR_STREAM_TRUNCATED:
5208 			if (flags->heal)
5209 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5210 				    "corrective receive was not able to "
5211 				    "reconstruct the data needed for "
5212 				    "healing."));
5213 			else
5214 				recv_ecksum_set_aux(hdl, destsnap,
5215 				    flags->resumable, ioctl_err == ECKSUM);
5216 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5217 			break;
5218 		case ZFS_ERR_STREAM_LARGE_BLOCK_MISMATCH:
5219 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5220 			    "incremental send stream requires -L "
5221 			    "(--large-block), to match previous receive."));
5222 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5223 			break;
5224 		case ENOTSUP:
5225 			if (flags->heal)
5226 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5227 				    "stream is not compatible with the "
5228 				    "data in the pool."));
5229 			else
5230 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5231 				    "pool must be upgraded to receive this "
5232 				    "stream."));
5233 			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
5234 			break;
5235 		case ZFS_ERR_CRYPTO_NOTSUP:
5236 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5237 			    "stream uses crypto parameters not compatible with "
5238 			    "this pool"));
5239 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5240 			break;
5241 		case EDQUOT:
5242 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5243 			    "destination %s space quota exceeded."), name);
5244 			(void) zfs_error(hdl, EZFS_NOSPC, errbuf);
5245 			break;
5246 		case ZFS_ERR_FROM_IVSET_GUID_MISSING:
5247 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5248 			    "IV set guid missing. See errata %u at "
5249 			    "https://openzfs.github.io/openzfs-docs/msg/"
5250 			    "ZFS-8000-ER."),
5251 			    ZPOOL_ERRATA_ZOL_8308_ENCRYPTION);
5252 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5253 			break;
5254 		case ZFS_ERR_FROM_IVSET_GUID_MISMATCH:
5255 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5256 			    "IV set guid mismatch. See the 'zfs receive' "
5257 			    "man page section\n discussing the limitations "
5258 			    "of raw encrypted send streams."));
5259 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5260 			break;
5261 		case ZFS_ERR_SPILL_BLOCK_FLAG_MISSING:
5262 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5263 			    "Spill block flag missing for raw send.\n"
5264 			    "The zfs software on the sending system must "
5265 			    "be updated."));
5266 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5267 			break;
5268 		case ZFS_ERR_RESUME_EXISTS:
5269 			cp = strchr(destsnap, '@');
5270 			if (newfs) {
5271 				/* it's the containing fs that exists */
5272 				*cp = '\0';
5273 			}
5274 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5275 			    "Resuming recv on existing dataset without force"));
5276 			(void) zfs_error_fmt(hdl, EZFS_RESUME_EXISTS,
5277 			    dgettext(TEXT_DOMAIN, "cannot resume recv %s"),
5278 			    destsnap);
5279 			*cp = '@';
5280 			break;
5281 		case E2BIG:
5282 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5283 			    "zfs receive required kernel memory allocation "
5284 			    "larger than the system can support. Please file "
5285 			    "an issue at the OpenZFS issue tracker:\n"
5286 			    "https://github.com/openzfs/zfs/issues/new"));
5287 			(void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
5288 			break;
5289 		case EBUSY:
5290 			if (hastoken) {
5291 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5292 				    "destination %s contains "
5293 				    "partially-complete state from "
5294 				    "\"zfs receive -s\"."), name);
5295 				(void) zfs_error(hdl, EZFS_BUSY, errbuf);
5296 				break;
5297 			}
5298 			zfs_fallthrough;
5299 		default:
5300 			(void) zfs_standard_error(hdl, ioctl_errno, errbuf);
5301 		}
5302 	}
5303 
5304 	/*
5305 	 * Mount the target filesystem (if created).  Also mount any
5306 	 * children of the target filesystem if we did a replication
5307 	 * receive (indicated by stream_avl being non-NULL).
5308 	 */
5309 	if (clp) {
5310 		if (!flags->nomount)
5311 			err |= changelist_postfix(clp);
5312 		changelist_free(clp);
5313 	}
5314 
5315 	if ((newfs || stream_avl) && type == ZFS_TYPE_FILESYSTEM && !redacted)
5316 		flags->domount = B_TRUE;
5317 
5318 	if (prop_errflags & ZPROP_ERR_NOCLEAR) {
5319 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
5320 		    "failed to clear unreceived properties on %s"), name);
5321 		(void) fprintf(stderr, "\n");
5322 	}
5323 	if (prop_errflags & ZPROP_ERR_NORESTORE) {
5324 		(void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
5325 		    "failed to restore original properties on %s"), name);
5326 		(void) fprintf(stderr, "\n");
5327 	}
5328 
5329 	if (err || ioctl_err) {
5330 		err = -1;
5331 		goto out;
5332 	}
5333 
5334 	if (flags->verbose) {
5335 		char buf1[64];
5336 		char buf2[64];
5337 		uint64_t bytes = read_bytes;
5338 		struct timespec delta;
5339 		clock_gettime(CLOCK_MONOTONIC_RAW, &delta);
5340 		if (begin_time.tv_nsec > delta.tv_nsec) {
5341 			delta.tv_nsec =
5342 			    1000000000 + delta.tv_nsec - begin_time.tv_nsec;
5343 			delta.tv_sec -= 1;
5344 		} else
5345 			delta.tv_nsec -= begin_time.tv_nsec;
5346 		delta.tv_sec -= begin_time.tv_sec;
5347 		if (delta.tv_sec == 0 && delta.tv_nsec == 0)
5348 			delta.tv_nsec = 1;
5349 		double delta_f = delta.tv_sec + (delta.tv_nsec / 1e9);
5350 		zfs_nicebytes(bytes, buf1, sizeof (buf1));
5351 		zfs_nicebytes(bytes / delta_f, buf2, sizeof (buf2));
5352 
5353 		(void) printf("received %s stream in %.2f seconds (%s/sec)\n",
5354 		    buf1, delta_f, buf2);
5355 	}
5356 
5357 	err = 0;
5358 out:
5359 	if (prop_errors != NULL)
5360 		fnvlist_free(prop_errors);
5361 
5362 	if (tmp_keylocation[0] != '\0') {
5363 		fnvlist_add_string(rcvprops,
5364 		    zfs_prop_to_name(ZFS_PROP_KEYLOCATION), tmp_keylocation);
5365 	}
5366 
5367 	if (newprops)
5368 		fnvlist_free(rcvprops);
5369 
5370 	fnvlist_free(oxprops);
5371 	fnvlist_free(origprops);
5372 
5373 	return (err);
5374 }
5375 
5376 /*
5377  * Check properties we were asked to override (both -o|-x)
5378  */
5379 static boolean_t
5380 zfs_receive_checkprops(libzfs_handle_t *hdl, nvlist_t *props,
5381     const char *errbuf)
5382 {
5383 	nvpair_t *nvp = NULL;
5384 	zfs_prop_t prop;
5385 	const char *name;
5386 
5387 	while ((nvp = nvlist_next_nvpair(props, nvp)) != NULL) {
5388 		name = nvpair_name(nvp);
5389 		prop = zfs_name_to_prop(name);
5390 
5391 		if (prop == ZPROP_USERPROP) {
5392 			if (!zfs_prop_user(name)) {
5393 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5394 				    "%s: invalid property '%s'"), errbuf, name);
5395 				return (B_FALSE);
5396 			}
5397 			continue;
5398 		}
5399 		/*
5400 		 * "origin" is readonly but is used to receive datasets as
5401 		 * clones so we don't raise an error here
5402 		 */
5403 		if (prop == ZFS_PROP_ORIGIN)
5404 			continue;
5405 
5406 		/* encryption params have their own verification later */
5407 		if (prop == ZFS_PROP_ENCRYPTION ||
5408 		    zfs_prop_encryption_key_param(prop))
5409 			continue;
5410 
5411 		/*
5412 		 * cannot override readonly, set-once and other specific
5413 		 * settable properties
5414 		 */
5415 		if (zfs_prop_readonly(prop) || prop == ZFS_PROP_VERSION ||
5416 		    prop == ZFS_PROP_VOLSIZE) {
5417 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5418 			    "%s: invalid property '%s'"), errbuf, name);
5419 			return (B_FALSE);
5420 		}
5421 	}
5422 
5423 	return (B_TRUE);
5424 }
5425 
5426 static int
5427 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
5428     const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
5429     nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs,
5430     const char *finalsnap, nvlist_t *cmdprops)
5431 {
5432 	int err;
5433 	dmu_replay_record_t drr, drr_noswap;
5434 	struct drr_begin *drrb = &drr.drr_u.drr_begin;
5435 	char errbuf[ERRBUFLEN];
5436 	zio_cksum_t zcksum = { { 0 } };
5437 	uint64_t featureflags;
5438 	int hdrtype;
5439 
5440 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
5441 	    "cannot receive"));
5442 
5443 	/* check cmdline props, raise an error if they cannot be received */
5444 	if (!zfs_receive_checkprops(hdl, cmdprops, errbuf))
5445 		return (zfs_error(hdl, EZFS_BADPROP, errbuf));
5446 
5447 	if (flags->isprefix &&
5448 	    !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
5449 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
5450 		    "(%s) does not exist"), tosnap);
5451 		return (zfs_error(hdl, EZFS_NOENT, errbuf));
5452 	}
5453 	if (originsnap &&
5454 	    !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) {
5455 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs "
5456 		    "(%s) does not exist"), originsnap);
5457 		return (zfs_error(hdl, EZFS_NOENT, errbuf));
5458 	}
5459 
5460 	/* read in the BEGIN record */
5461 	if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
5462 	    &zcksum)))
5463 		return (err);
5464 
5465 	if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
5466 		/* It's the double end record at the end of a package */
5467 		return (ENODATA);
5468 	}
5469 
5470 	/* the kernel needs the non-byteswapped begin record */
5471 	drr_noswap = drr;
5472 
5473 	flags->byteswap = B_FALSE;
5474 	if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
5475 		/*
5476 		 * We computed the checksum in the wrong byteorder in
5477 		 * recv_read() above; do it again correctly.
5478 		 */
5479 		memset(&zcksum, 0, sizeof (zio_cksum_t));
5480 		fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
5481 		flags->byteswap = B_TRUE;
5482 
5483 		drr.drr_type = BSWAP_32(drr.drr_type);
5484 		drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
5485 		drrb->drr_magic = BSWAP_64(drrb->drr_magic);
5486 		drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
5487 		drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
5488 		drrb->drr_type = BSWAP_32(drrb->drr_type);
5489 		drrb->drr_flags = BSWAP_32(drrb->drr_flags);
5490 		drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
5491 		drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
5492 	}
5493 
5494 	if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
5495 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
5496 		    "stream (bad magic number)"));
5497 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
5498 	}
5499 
5500 	featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
5501 	hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
5502 
5503 	if (!DMU_STREAM_SUPPORTED(featureflags) ||
5504 	    (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
5505 		/*
5506 		 * Let's be explicit about this one, since rather than
5507 		 * being a new feature we can't know, it's an old
5508 		 * feature we dropped.
5509 		 */
5510 		if (featureflags & DMU_BACKUP_FEATURE_DEDUP) {
5511 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5512 			    "stream has deprecated feature: dedup, try "
5513 			    "'zstream redup [send in a file] | zfs recv "
5514 			    "[...]'"));
5515 		} else {
5516 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
5517 			    "stream has unsupported feature, feature flags = "
5518 			    "%llx (unknown flags = %llx)"),
5519 			    (u_longlong_t)featureflags,
5520 			    (u_longlong_t)((featureflags) &
5521 			    ~DMU_BACKUP_FEATURE_MASK));
5522 		}
5523 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
5524 	}
5525 
5526 	/* Holds feature is set once in the compound stream header. */
5527 	if (featureflags & DMU_BACKUP_FEATURE_HOLDS)
5528 		flags->holds = B_TRUE;
5529 
5530 	if (strchr(drrb->drr_toname, '@') == NULL) {
5531 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
5532 		    "stream (bad snapshot name)"));
5533 		return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
5534 	}
5535 
5536 	if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
5537 		char nonpackage_sendfs[ZFS_MAX_DATASET_NAME_LEN];
5538 		if (sendfs == NULL) {
5539 			/*
5540 			 * We were not called from zfs_receive_package(). Get
5541 			 * the fs specified by 'zfs send'.
5542 			 */
5543 			char *cp;
5544 			(void) strlcpy(nonpackage_sendfs,
5545 			    drr.drr_u.drr_begin.drr_toname,
5546 			    sizeof (nonpackage_sendfs));
5547 			if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
5548 				*cp = '\0';
5549 			sendfs = nonpackage_sendfs;
5550 			VERIFY(finalsnap == NULL);
5551 		}
5552 		return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
5553 		    &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
5554 		    finalsnap, cmdprops));
5555 	} else {
5556 		assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
5557 		    DMU_COMPOUNDSTREAM);
5558 		return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
5559 		    &zcksum, top_zfs, cmdprops));
5560 	}
5561 }
5562 
5563 /*
5564  * Restores a backup of tosnap from the file descriptor specified by infd.
5565  * Return 0 on total success, -2 if some things couldn't be
5566  * destroyed/renamed/promoted, -1 if some things couldn't be received.
5567  * (-1 will override -2, if -1 and the resumable flag was specified the
5568  * transfer can be resumed if the sending side supports it).
5569  */
5570 int
5571 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
5572     recvflags_t *flags, int infd, avl_tree_t *stream_avl)
5573 {
5574 	char *top_zfs = NULL;
5575 	int err;
5576 	struct stat sb;
5577 	const char *originsnap = NULL;
5578 
5579 	/*
5580 	 * The only way fstat can fail is if we do not have a valid file
5581 	 * descriptor.
5582 	 */
5583 	if (fstat(infd, &sb) == -1) {
5584 		perror("fstat");
5585 		return (-2);
5586 	}
5587 
5588 	if (props) {
5589 		err = nvlist_lookup_string(props, "origin", &originsnap);
5590 		if (err && err != ENOENT)
5591 			return (err);
5592 	}
5593 
5594 	err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
5595 	    stream_avl, &top_zfs, NULL, props);
5596 
5597 	if (err == 0 && !flags->nomount && flags->domount && top_zfs) {
5598 		zfs_handle_t *zhp = NULL;
5599 		prop_changelist_t *clp = NULL;
5600 
5601 		zhp = zfs_open(hdl, top_zfs,
5602 		    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
5603 		if (zhp == NULL) {
5604 			err = -1;
5605 			goto out;
5606 		} else {
5607 			if (zhp->zfs_type == ZFS_TYPE_VOLUME) {
5608 				zfs_close(zhp);
5609 				goto out;
5610 			}
5611 
5612 			clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
5613 			    CL_GATHER_MOUNT_ALWAYS,
5614 			    flags->forceunmount ? MS_FORCE : 0);
5615 			zfs_close(zhp);
5616 			if (clp == NULL) {
5617 				err = -1;
5618 				goto out;
5619 			}
5620 
5621 			/* mount and share received datasets */
5622 			err = changelist_postfix(clp);
5623 			changelist_free(clp);
5624 			if (err != 0)
5625 				err = -1;
5626 		}
5627 	}
5628 
5629 out:
5630 	if (top_zfs)
5631 		free(top_zfs);
5632 
5633 	return (err);
5634 }
5635