xref: /titanic_41/usr/src/lib/libzfs_core/common/libzfs_core.c (revision 675fc291908baceb17f92b0b6d961439aaddafc9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
25  */
26 
27 /*
28  * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
29  * It has the following characteristics:
30  *
31  *  - Thread Safe.  libzfs_core is accessible concurrently from multiple
32  *  threads.  This is accomplished primarily by avoiding global data
33  *  (e.g. caching).  Since it's thread-safe, there is no reason for a
34  *  process to have multiple libzfs "instances".  Therefore, we store
35  *  our few pieces of data (e.g. the file descriptor) in global
36  *  variables.  The fd is reference-counted so that the libzfs_core
37  *  library can be "initialized" multiple times (e.g. by different
38  *  consumers within the same process).
39  *
40  *  - Committed Interface.  The libzfs_core interface will be committed,
41  *  therefore consumers can compile against it and be confident that
42  *  their code will continue to work on future releases of this code.
43  *  Currently, the interface is Evolving (not Committed), but we intend
44  *  to commit to it once it is more complete and we determine that it
45  *  meets the needs of all consumers.
46  *
47  *  - Programatic Error Handling.  libzfs_core communicates errors with
48  *  defined error numbers, and doesn't print anything to stdout/stderr.
49  *
50  *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments
51  *  to/from the kernel ioctls.  There is generally a 1:1 correspondence
52  *  between libzfs_core functions and ioctls to /dev/zfs.
53  *
54  *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1
55  *  with kernel ioctls, and kernel ioctls are general atomic, each
56  *  libzfs_core function is atomic.  For example, creating multiple
57  *  snapshots with a single call to lzc_snapshot() is atomic -- it
58  *  can't fail with only some of the requested snapshots created, even
59  *  in the event of power loss or system crash.
60  *
61  *  - Continued libzfs Support.  Some higher-level operations (e.g.
62  *  support for "zfs send -R") are too complicated to fit the scope of
63  *  libzfs_core.  This functionality will continue to live in libzfs.
64  *  Where appropriate, libzfs will use the underlying atomic operations
65  *  of libzfs_core.  For example, libzfs may implement "zfs send -R |
66  *  zfs receive" by using individual "send one snapshot", rename,
67  *  destroy, and "receive one snapshot" operations in libzfs_core.
68  *  /sbin/zfs and /zbin/zpool will link with both libzfs and
69  *  libzfs_core.  Other consumers should aim to use only libzfs_core,
70  *  since that will be the supported, stable interface going forwards.
71  */
72 
73 #include <libzfs_core.h>
74 #include <ctype.h>
75 #include <unistd.h>
76 #include <stdlib.h>
77 #include <string.h>
78 #include <errno.h>
79 #include <fcntl.h>
80 #include <pthread.h>
81 #include <sys/nvpair.h>
82 #include <sys/param.h>
83 #include <sys/types.h>
84 #include <sys/stat.h>
85 #include <sys/zfs_ioctl.h>
86 
87 static int g_fd;
88 static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
89 static int g_refcount;
90 
91 int
libzfs_core_init(void)92 libzfs_core_init(void)
93 {
94 	(void) pthread_mutex_lock(&g_lock);
95 	if (g_refcount == 0) {
96 		g_fd = open("/dev/zfs", O_RDWR);
97 		if (g_fd < 0) {
98 			(void) pthread_mutex_unlock(&g_lock);
99 			return (errno);
100 		}
101 	}
102 	g_refcount++;
103 	(void) pthread_mutex_unlock(&g_lock);
104 	return (0);
105 }
106 
107 void
libzfs_core_fini(void)108 libzfs_core_fini(void)
109 {
110 	(void) pthread_mutex_lock(&g_lock);
111 	ASSERT3S(g_refcount, >, 0);
112 	g_refcount--;
113 	if (g_refcount == 0)
114 		(void) close(g_fd);
115 	(void) pthread_mutex_unlock(&g_lock);
116 }
117 
118 static int
lzc_ioctl(zfs_ioc_t ioc,const char * name,nvlist_t * source,nvlist_t ** resultp)119 lzc_ioctl(zfs_ioc_t ioc, const char *name,
120     nvlist_t *source, nvlist_t **resultp)
121 {
122 	zfs_cmd_t zc = { 0 };
123 	int error = 0;
124 	char *packed;
125 	size_t size;
126 
127 	ASSERT3S(g_refcount, >, 0);
128 
129 	(void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
130 
131 	packed = fnvlist_pack(source, &size);
132 	zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
133 	zc.zc_nvlist_src_size = size;
134 
135 	if (resultp != NULL) {
136 		*resultp = NULL;
137 		zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
138 		zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
139 		    malloc(zc.zc_nvlist_dst_size);
140 		if (zc.zc_nvlist_dst == NULL) {
141 			error = ENOMEM;
142 			goto out;
143 		}
144 	}
145 
146 	while (ioctl(g_fd, ioc, &zc) != 0) {
147 		if (errno == ENOMEM && resultp != NULL) {
148 			free((void *)(uintptr_t)zc.zc_nvlist_dst);
149 			zc.zc_nvlist_dst_size *= 2;
150 			zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
151 			    malloc(zc.zc_nvlist_dst_size);
152 			if (zc.zc_nvlist_dst == NULL) {
153 				error = ENOMEM;
154 				goto out;
155 			}
156 		} else {
157 			error = errno;
158 			break;
159 		}
160 	}
161 	if (zc.zc_nvlist_dst_filled) {
162 		*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
163 		    zc.zc_nvlist_dst_size);
164 	}
165 
166 out:
167 	fnvlist_pack_free(packed, size);
168 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
169 	return (error);
170 }
171 
172 int
lzc_create(const char * fsname,dmu_objset_type_t type,nvlist_t * props)173 lzc_create(const char *fsname, dmu_objset_type_t type, nvlist_t *props)
174 {
175 	int error;
176 	nvlist_t *args = fnvlist_alloc();
177 	fnvlist_add_int32(args, "type", type);
178 	if (props != NULL)
179 		fnvlist_add_nvlist(args, "props", props);
180 	error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
181 	nvlist_free(args);
182 	return (error);
183 }
184 
185 int
lzc_clone(const char * fsname,const char * origin,nvlist_t * props)186 lzc_clone(const char *fsname, const char *origin,
187     nvlist_t *props)
188 {
189 	int error;
190 	nvlist_t *args = fnvlist_alloc();
191 	fnvlist_add_string(args, "origin", origin);
192 	if (props != NULL)
193 		fnvlist_add_nvlist(args, "props", props);
194 	error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
195 	nvlist_free(args);
196 	return (error);
197 }
198 
199 /*
200  * Creates snapshots.
201  *
202  * The keys in the snaps nvlist are the snapshots to be created.
203  * They must all be in the same pool.
204  *
205  * The props nvlist is properties to set.  Currently only user properties
206  * are supported.  { user:prop_name -> string value }
207  *
208  * The returned results nvlist will have an entry for each snapshot that failed.
209  * The value will be the (int32) error code.
210  *
211  * The return value will be 0 if all snapshots were created, otherwise it will
212  * be the errno of a (unspecified) snapshot that failed.
213  */
214 int
lzc_snapshot(nvlist_t * snaps,nvlist_t * props,nvlist_t ** errlist)215 lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
216 {
217 	nvpair_t *elem;
218 	nvlist_t *args;
219 	int error;
220 	char pool[ZFS_MAX_DATASET_NAME_LEN];
221 
222 	*errlist = NULL;
223 
224 	/* determine the pool name */
225 	elem = nvlist_next_nvpair(snaps, NULL);
226 	if (elem == NULL)
227 		return (0);
228 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
229 	pool[strcspn(pool, "/@")] = '\0';
230 
231 	args = fnvlist_alloc();
232 	fnvlist_add_nvlist(args, "snaps", snaps);
233 	if (props != NULL)
234 		fnvlist_add_nvlist(args, "props", props);
235 
236 	error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
237 	nvlist_free(args);
238 
239 	return (error);
240 }
241 
242 /*
243  * Destroys snapshots.
244  *
245  * The keys in the snaps nvlist are the snapshots to be destroyed.
246  * They must all be in the same pool.
247  *
248  * Snapshots that do not exist will be silently ignored.
249  *
250  * If 'defer' is not set, and a snapshot has user holds or clones, the
251  * destroy operation will fail and none of the snapshots will be
252  * destroyed.
253  *
254  * If 'defer' is set, and a snapshot has user holds or clones, it will be
255  * marked for deferred destruction, and will be destroyed when the last hold
256  * or clone is removed/destroyed.
257  *
258  * The return value will be 0 if all snapshots were destroyed (or marked for
259  * later destruction if 'defer' is set) or didn't exist to begin with.
260  *
261  * Otherwise the return value will be the errno of a (unspecified) snapshot
262  * that failed, no snapshots will be destroyed, and the errlist will have an
263  * entry for each snapshot that failed.  The value in the errlist will be
264  * the (int32) error code.
265  */
266 int
lzc_destroy_snaps(nvlist_t * snaps,boolean_t defer,nvlist_t ** errlist)267 lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
268 {
269 	nvpair_t *elem;
270 	nvlist_t *args;
271 	int error;
272 	char pool[ZFS_MAX_DATASET_NAME_LEN];
273 
274 	/* determine the pool name */
275 	elem = nvlist_next_nvpair(snaps, NULL);
276 	if (elem == NULL)
277 		return (0);
278 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
279 	pool[strcspn(pool, "/@")] = '\0';
280 
281 	args = fnvlist_alloc();
282 	fnvlist_add_nvlist(args, "snaps", snaps);
283 	if (defer)
284 		fnvlist_add_boolean(args, "defer");
285 
286 	error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
287 	nvlist_free(args);
288 
289 	return (error);
290 }
291 
292 int
lzc_snaprange_space(const char * firstsnap,const char * lastsnap,uint64_t * usedp)293 lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
294     uint64_t *usedp)
295 {
296 	nvlist_t *args;
297 	nvlist_t *result;
298 	int err;
299 	char fs[ZFS_MAX_DATASET_NAME_LEN];
300 	char *atp;
301 
302 	/* determine the fs name */
303 	(void) strlcpy(fs, firstsnap, sizeof (fs));
304 	atp = strchr(fs, '@');
305 	if (atp == NULL)
306 		return (EINVAL);
307 	*atp = '\0';
308 
309 	args = fnvlist_alloc();
310 	fnvlist_add_string(args, "firstsnap", firstsnap);
311 
312 	err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
313 	nvlist_free(args);
314 	if (err == 0)
315 		*usedp = fnvlist_lookup_uint64(result, "used");
316 	fnvlist_free(result);
317 
318 	return (err);
319 }
320 
321 boolean_t
lzc_exists(const char * dataset)322 lzc_exists(const char *dataset)
323 {
324 	/*
325 	 * The objset_stats ioctl is still legacy, so we need to construct our
326 	 * own zfs_cmd_t rather than using zfsc_ioctl().
327 	 */
328 	zfs_cmd_t zc = { 0 };
329 
330 	(void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
331 	return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
332 }
333 
334 /*
335  * Create "user holds" on snapshots.  If there is a hold on a snapshot,
336  * the snapshot can not be destroyed.  (However, it can be marked for deletion
337  * by lzc_destroy_snaps(defer=B_TRUE).)
338  *
339  * The keys in the nvlist are snapshot names.
340  * The snapshots must all be in the same pool.
341  * The value is the name of the hold (string type).
342  *
343  * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL).
344  * In this case, when the cleanup_fd is closed (including on process
345  * termination), the holds will be released.  If the system is shut down
346  * uncleanly, the holds will be released when the pool is next opened
347  * or imported.
348  *
349  * Holds for snapshots which don't exist will be skipped and have an entry
350  * added to errlist, but will not cause an overall failure.
351  *
352  * The return value will be 0 if all holds, for snapshots that existed,
353  * were succesfully created.
354  *
355  * Otherwise the return value will be the errno of a (unspecified) hold that
356  * failed and no holds will be created.
357  *
358  * In all cases the errlist will have an entry for each hold that failed
359  * (name = snapshot), with its value being the error code (int32).
360  */
361 int
lzc_hold(nvlist_t * holds,int cleanup_fd,nvlist_t ** errlist)362 lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
363 {
364 	char pool[ZFS_MAX_DATASET_NAME_LEN];
365 	nvlist_t *args;
366 	nvpair_t *elem;
367 	int error;
368 
369 	/* determine the pool name */
370 	elem = nvlist_next_nvpair(holds, NULL);
371 	if (elem == NULL)
372 		return (0);
373 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
374 	pool[strcspn(pool, "/@")] = '\0';
375 
376 	args = fnvlist_alloc();
377 	fnvlist_add_nvlist(args, "holds", holds);
378 	if (cleanup_fd != -1)
379 		fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
380 
381 	error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
382 	nvlist_free(args);
383 	return (error);
384 }
385 
386 /*
387  * Release "user holds" on snapshots.  If the snapshot has been marked for
388  * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
389  * any clones, and all the user holds are removed, then the snapshot will be
390  * destroyed.
391  *
392  * The keys in the nvlist are snapshot names.
393  * The snapshots must all be in the same pool.
394  * The value is a nvlist whose keys are the holds to remove.
395  *
396  * Holds which failed to release because they didn't exist will have an entry
397  * added to errlist, but will not cause an overall failure.
398  *
399  * The return value will be 0 if the nvl holds was empty or all holds that
400  * existed, were successfully removed.
401  *
402  * Otherwise the return value will be the errno of a (unspecified) hold that
403  * failed to release and no holds will be released.
404  *
405  * In all cases the errlist will have an entry for each hold that failed to
406  * to release.
407  */
408 int
lzc_release(nvlist_t * holds,nvlist_t ** errlist)409 lzc_release(nvlist_t *holds, nvlist_t **errlist)
410 {
411 	char pool[ZFS_MAX_DATASET_NAME_LEN];
412 	nvpair_t *elem;
413 
414 	/* determine the pool name */
415 	elem = nvlist_next_nvpair(holds, NULL);
416 	if (elem == NULL)
417 		return (0);
418 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
419 	pool[strcspn(pool, "/@")] = '\0';
420 
421 	return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
422 }
423 
424 /*
425  * Retrieve list of user holds on the specified snapshot.
426  *
427  * On success, *holdsp will be set to a nvlist which the caller must free.
428  * The keys are the names of the holds, and the value is the creation time
429  * of the hold (uint64) in seconds since the epoch.
430  */
431 int
lzc_get_holds(const char * snapname,nvlist_t ** holdsp)432 lzc_get_holds(const char *snapname, nvlist_t **holdsp)
433 {
434 	int error;
435 	nvlist_t *innvl = fnvlist_alloc();
436 	error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp);
437 	fnvlist_free(innvl);
438 	return (error);
439 }
440 
441 /*
442  * Generate a zfs send stream for the specified snapshot and write it to
443  * the specified file descriptor.
444  *
445  * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
446  *
447  * If "from" is NULL, a full (non-incremental) stream will be sent.
448  * If "from" is non-NULL, it must be the full name of a snapshot or
449  * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
450  * "pool/fs#earlier_bmark").  If non-NULL, the specified snapshot or
451  * bookmark must represent an earlier point in the history of "snapname").
452  * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
453  * or it can be the origin of "snapname"'s filesystem, or an earlier
454  * snapshot in the origin, etc.
455  *
456  * "fd" is the file descriptor to write the send stream to.
457  *
458  * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
459  * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
460  * records with drr_blksz > 128K.
461  *
462  * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
463  * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
464  * which the receiving system must support (as indicated by support
465  * for the "embedded_data" feature).
466  */
467 int
lzc_send(const char * snapname,const char * from,int fd,enum lzc_send_flags flags)468 lzc_send(const char *snapname, const char *from, int fd,
469     enum lzc_send_flags flags)
470 {
471 	return (lzc_send_resume(snapname, from, fd, flags, 0, 0));
472 }
473 
474 int
lzc_send_resume(const char * snapname,const char * from,int fd,enum lzc_send_flags flags,uint64_t resumeobj,uint64_t resumeoff)475 lzc_send_resume(const char *snapname, const char *from, int fd,
476     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
477 {
478 	nvlist_t *args;
479 	int err;
480 
481 	args = fnvlist_alloc();
482 	fnvlist_add_int32(args, "fd", fd);
483 	if (from != NULL)
484 		fnvlist_add_string(args, "fromsnap", from);
485 	if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
486 		fnvlist_add_boolean(args, "largeblockok");
487 	if (flags & LZC_SEND_FLAG_EMBED_DATA)
488 		fnvlist_add_boolean(args, "embedok");
489 	if (resumeobj != 0 || resumeoff != 0) {
490 		fnvlist_add_uint64(args, "resume_object", resumeobj);
491 		fnvlist_add_uint64(args, "resume_offset", resumeoff);
492 	}
493 	err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
494 	nvlist_free(args);
495 	return (err);
496 }
497 
498 /*
499  * "from" can be NULL, a snapshot, or a bookmark.
500  *
501  * If from is NULL, a full (non-incremental) stream will be estimated.  This
502  * is calculated very efficiently.
503  *
504  * If from is a snapshot, lzc_send_space uses the deadlists attached to
505  * each snapshot to efficiently estimate the stream size.
506  *
507  * If from is a bookmark, the indirect blocks in the destination snapshot
508  * are traversed, looking for blocks with a birth time since the creation TXG of
509  * the snapshot this bookmark was created from.  This will result in
510  * significantly more I/O and be less efficient than a send space estimation on
511  * an equivalent snapshot.
512  */
513 int
lzc_send_space(const char * snapname,const char * from,uint64_t * spacep)514 lzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
515 {
516 	nvlist_t *args;
517 	nvlist_t *result;
518 	int err;
519 
520 	args = fnvlist_alloc();
521 	if (from != NULL)
522 		fnvlist_add_string(args, "from", from);
523 	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
524 	nvlist_free(args);
525 	if (err == 0)
526 		*spacep = fnvlist_lookup_uint64(result, "space");
527 	nvlist_free(result);
528 	return (err);
529 }
530 
531 static int
recv_read(int fd,void * buf,int ilen)532 recv_read(int fd, void *buf, int ilen)
533 {
534 	char *cp = buf;
535 	int rv;
536 	int len = ilen;
537 
538 	do {
539 		rv = read(fd, cp, len);
540 		cp += rv;
541 		len -= rv;
542 	} while (rv > 0);
543 
544 	if (rv < 0 || len != 0)
545 		return (EIO);
546 
547 	return (0);
548 }
549 
550 static int
lzc_receive_impl(const char * snapname,nvlist_t * props,const char * origin,boolean_t force,boolean_t resumable,int fd)551 lzc_receive_impl(const char *snapname, nvlist_t *props, const char *origin,
552     boolean_t force, boolean_t resumable, int fd)
553 {
554 	/*
555 	 * The receive ioctl is still legacy, so we need to construct our own
556 	 * zfs_cmd_t rather than using zfsc_ioctl().
557 	 */
558 	zfs_cmd_t zc = { 0 };
559 	char *atp;
560 	char *packed = NULL;
561 	size_t size;
562 	int error;
563 
564 	ASSERT3S(g_refcount, >, 0);
565 
566 	/* zc_name is name of containing filesystem */
567 	(void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name));
568 	atp = strchr(zc.zc_name, '@');
569 	if (atp == NULL)
570 		return (EINVAL);
571 	*atp = '\0';
572 
573 	/* if the fs does not exist, try its parent. */
574 	if (!lzc_exists(zc.zc_name)) {
575 		char *slashp = strrchr(zc.zc_name, '/');
576 		if (slashp == NULL)
577 			return (ENOENT);
578 		*slashp = '\0';
579 
580 	}
581 
582 	/* zc_value is full name of the snapshot to create */
583 	(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
584 
585 	if (props != NULL) {
586 		/* zc_nvlist_src is props to set */
587 		packed = fnvlist_pack(props, &size);
588 		zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
589 		zc.zc_nvlist_src_size = size;
590 	}
591 
592 	/* zc_string is name of clone origin (if DRR_FLAG_CLONE) */
593 	if (origin != NULL)
594 		(void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string));
595 
596 	/* zc_begin_record is non-byteswapped BEGIN record */
597 	error = recv_read(fd, &zc.zc_begin_record, sizeof (zc.zc_begin_record));
598 	if (error != 0)
599 		goto out;
600 
601 	/* zc_cookie is fd to read from */
602 	zc.zc_cookie = fd;
603 
604 	/* zc guid is force flag */
605 	zc.zc_guid = force;
606 
607 	zc.zc_resumable = resumable;
608 
609 	/* zc_cleanup_fd is unused */
610 	zc.zc_cleanup_fd = -1;
611 
612 	error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
613 	if (error != 0)
614 		error = errno;
615 
616 out:
617 	if (packed != NULL)
618 		fnvlist_pack_free(packed, size);
619 	free((void*)(uintptr_t)zc.zc_nvlist_dst);
620 	return (error);
621 }
622 
623 /*
624  * The simplest receive case: receive from the specified fd, creating the
625  * specified snapshot.  Apply the specified properties as "received" properties
626  * (which can be overridden by locally-set properties).  If the stream is a
627  * clone, its origin snapshot must be specified by 'origin'.  The 'force'
628  * flag will cause the target filesystem to be rolled back or destroyed if
629  * necessary to receive.
630  *
631  * Return 0 on success or an errno on failure.
632  *
633  * Note: this interface does not work on dedup'd streams
634  * (those with DMU_BACKUP_FEATURE_DEDUP).
635  */
636 int
lzc_receive(const char * snapname,nvlist_t * props,const char * origin,boolean_t force,int fd)637 lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
638     boolean_t force, int fd)
639 {
640 	return (lzc_receive_impl(snapname, props, origin, force, B_FALSE, fd));
641 }
642 
643 /*
644  * Like lzc_receive, but if the receive fails due to premature stream
645  * termination, the intermediate state will be preserved on disk.  In this
646  * case, ECKSUM will be returned.  The receive may subsequently be resumed
647  * with a resuming send stream generated by lzc_send_resume().
648  */
649 int
lzc_receive_resumable(const char * snapname,nvlist_t * props,const char * origin,boolean_t force,int fd)650 lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
651     boolean_t force, int fd)
652 {
653 	return (lzc_receive_impl(snapname, props, origin, force, B_TRUE, fd));
654 }
655 
656 /*
657  * Roll back this filesystem or volume to its most recent snapshot.
658  * If snapnamebuf is not NULL, it will be filled in with the name
659  * of the most recent snapshot.
660  *
661  * Return 0 on success or an errno on failure.
662  */
663 int
lzc_rollback(const char * fsname,char * snapnamebuf,int snapnamelen)664 lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
665 {
666 	nvlist_t *args;
667 	nvlist_t *result;
668 	int err;
669 
670 	args = fnvlist_alloc();
671 	err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
672 	nvlist_free(args);
673 	if (err == 0 && snapnamebuf != NULL) {
674 		const char *snapname = fnvlist_lookup_string(result, "target");
675 		(void) strlcpy(snapnamebuf, snapname, snapnamelen);
676 	}
677 	return (err);
678 }
679 
680 /*
681  * Creates bookmarks.
682  *
683  * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to
684  * the name of the snapshot (e.g. "pool/fs@snap").  All the bookmarks and
685  * snapshots must be in the same pool.
686  *
687  * The returned results nvlist will have an entry for each bookmark that failed.
688  * The value will be the (int32) error code.
689  *
690  * The return value will be 0 if all bookmarks were created, otherwise it will
691  * be the errno of a (undetermined) bookmarks that failed.
692  */
693 int
lzc_bookmark(nvlist_t * bookmarks,nvlist_t ** errlist)694 lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
695 {
696 	nvpair_t *elem;
697 	int error;
698 	char pool[ZFS_MAX_DATASET_NAME_LEN];
699 
700 	/* determine the pool name */
701 	elem = nvlist_next_nvpair(bookmarks, NULL);
702 	if (elem == NULL)
703 		return (0);
704 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
705 	pool[strcspn(pool, "/#")] = '\0';
706 
707 	error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
708 
709 	return (error);
710 }
711 
712 /*
713  * Retrieve bookmarks.
714  *
715  * Retrieve the list of bookmarks for the given file system. The props
716  * parameter is an nvlist of property names (with no values) that will be
717  * returned for each bookmark.
718  *
719  * The following are valid properties on bookmarks, all of which are numbers
720  * (represented as uint64 in the nvlist)
721  *
722  * "guid" - globally unique identifier of the snapshot it refers to
723  * "createtxg" - txg when the snapshot it refers to was created
724  * "creation" - timestamp when the snapshot it refers to was created
725  *
726  * The format of the returned nvlist as follows:
727  * <short name of bookmark> -> {
728  *     <name of property> -> {
729  *         "value" -> uint64
730  *     }
731  *  }
732  */
733 int
lzc_get_bookmarks(const char * fsname,nvlist_t * props,nvlist_t ** bmarks)734 lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
735 {
736 	return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
737 }
738 
739 /*
740  * Destroys bookmarks.
741  *
742  * The keys in the bmarks nvlist are the bookmarks to be destroyed.
743  * They must all be in the same pool.  Bookmarks are specified as
744  * <fs>#<bmark>.
745  *
746  * Bookmarks that do not exist will be silently ignored.
747  *
748  * The return value will be 0 if all bookmarks that existed were destroyed.
749  *
750  * Otherwise the return value will be the errno of a (undetermined) bookmark
751  * that failed, no bookmarks will be destroyed, and the errlist will have an
752  * entry for each bookmarks that failed.  The value in the errlist will be
753  * the (int32) error code.
754  */
755 int
lzc_destroy_bookmarks(nvlist_t * bmarks,nvlist_t ** errlist)756 lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
757 {
758 	nvpair_t *elem;
759 	int error;
760 	char pool[ZFS_MAX_DATASET_NAME_LEN];
761 
762 	/* determine the pool name */
763 	elem = nvlist_next_nvpair(bmarks, NULL);
764 	if (elem == NULL)
765 		return (0);
766 	(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
767 	pool[strcspn(pool, "/#")] = '\0';
768 
769 	error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
770 
771 	return (error);
772 }
773