1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy * CDDL HEADER START
3eda14cbcSMatt Macy *
4eda14cbcSMatt Macy * This file and its contents are supplied under the terms of the
5eda14cbcSMatt Macy * Common Development and Distribution License ("CDDL"), version 1.0.
6eda14cbcSMatt Macy * You may only use this file in accordance with the terms of version
7eda14cbcSMatt Macy * 1.0 of the CDDL.
8eda14cbcSMatt Macy *
9eda14cbcSMatt Macy * A full copy of the text of the CDDL should have accompanied this
10eda14cbcSMatt Macy * source. A copy of the CDDL is also available via the Internet at
11eda14cbcSMatt Macy * http://www.illumos.org/license/CDDL.
12eda14cbcSMatt Macy *
13eda14cbcSMatt Macy * CDDL HEADER END
14eda14cbcSMatt Macy */
15eda14cbcSMatt Macy
16eda14cbcSMatt Macy /*
17eda14cbcSMatt Macy * Copyright (c) 2013, 2018 by Delphix. All rights reserved.
18eda14cbcSMatt Macy * Copyright 2017 Nexenta Systems, Inc.
19eda14cbcSMatt Macy * Copyright 2019, 2020 by Christian Schwarz. All rights reserved.
20eda14cbcSMatt Macy */
21eda14cbcSMatt Macy
22eda14cbcSMatt Macy #include <sys/zfs_context.h>
23eda14cbcSMatt Macy #include <sys/dsl_dataset.h>
24eda14cbcSMatt Macy #include <sys/dsl_dir.h>
25eda14cbcSMatt Macy #include <sys/dsl_prop.h>
26eda14cbcSMatt Macy #include <sys/dsl_synctask.h>
27eda14cbcSMatt Macy #include <sys/dsl_destroy.h>
28eda14cbcSMatt Macy #include <sys/dmu_impl.h>
29eda14cbcSMatt Macy #include <sys/dmu_tx.h>
30eda14cbcSMatt Macy #include <sys/arc.h>
31eda14cbcSMatt Macy #include <sys/zap.h>
32eda14cbcSMatt Macy #include <sys/zfeature.h>
33eda14cbcSMatt Macy #include <sys/spa.h>
34eda14cbcSMatt Macy #include <sys/dsl_bookmark.h>
35eda14cbcSMatt Macy #include <zfs_namecheck.h>
36eda14cbcSMatt Macy #include <sys/dmu_send.h>
372ad756a6SMartin Matuska #include <sys/dbuf.h>
38eda14cbcSMatt Macy
39eda14cbcSMatt Macy static int
dsl_bookmark_hold_ds(dsl_pool_t * dp,const char * fullname,dsl_dataset_t ** dsp,const void * tag,char ** shortnamep)40eda14cbcSMatt Macy dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname,
41a0b956f5SMartin Matuska dsl_dataset_t **dsp, const void *tag, char **shortnamep)
42eda14cbcSMatt Macy {
43eda14cbcSMatt Macy char buf[ZFS_MAX_DATASET_NAME_LEN];
44eda14cbcSMatt Macy char *hashp;
45eda14cbcSMatt Macy
46eda14cbcSMatt Macy if (strlen(fullname) >= ZFS_MAX_DATASET_NAME_LEN)
47eda14cbcSMatt Macy return (SET_ERROR(ENAMETOOLONG));
48eda14cbcSMatt Macy hashp = strchr(fullname, '#');
49eda14cbcSMatt Macy if (hashp == NULL)
50eda14cbcSMatt Macy return (SET_ERROR(EINVAL));
51eda14cbcSMatt Macy
52eda14cbcSMatt Macy *shortnamep = hashp + 1;
53eda14cbcSMatt Macy if (zfs_component_namecheck(*shortnamep, NULL, NULL))
54eda14cbcSMatt Macy return (SET_ERROR(EINVAL));
55eda14cbcSMatt Macy (void) strlcpy(buf, fullname, hashp - fullname + 1);
56eda14cbcSMatt Macy return (dsl_dataset_hold(dp, buf, tag, dsp));
57eda14cbcSMatt Macy }
58eda14cbcSMatt Macy
59eda14cbcSMatt Macy /*
60eda14cbcSMatt Macy * When reading BOOKMARK_V1 bookmarks, the BOOKMARK_V2 fields are guaranteed
61eda14cbcSMatt Macy * to be zeroed.
62eda14cbcSMatt Macy *
63eda14cbcSMatt Macy * Returns ESRCH if bookmark is not found.
64eda14cbcSMatt Macy * Note, we need to use the ZAP rather than the AVL to look up bookmarks
65eda14cbcSMatt Macy * by name, because only the ZAP honors the casesensitivity setting.
66eda14cbcSMatt Macy */
67eda14cbcSMatt Macy int
dsl_bookmark_lookup_impl(dsl_dataset_t * ds,const char * shortname,zfs_bookmark_phys_t * bmark_phys)68eda14cbcSMatt Macy dsl_bookmark_lookup_impl(dsl_dataset_t *ds, const char *shortname,
69eda14cbcSMatt Macy zfs_bookmark_phys_t *bmark_phys)
70eda14cbcSMatt Macy {
71eda14cbcSMatt Macy objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
72eda14cbcSMatt Macy uint64_t bmark_zapobj = ds->ds_bookmarks_obj;
73eda14cbcSMatt Macy matchtype_t mt = 0;
74eda14cbcSMatt Macy int err;
75eda14cbcSMatt Macy
76eda14cbcSMatt Macy if (bmark_zapobj == 0)
77eda14cbcSMatt Macy return (SET_ERROR(ESRCH));
78eda14cbcSMatt Macy
79eda14cbcSMatt Macy if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
80eda14cbcSMatt Macy mt = MT_NORMALIZE;
81eda14cbcSMatt Macy
82eda14cbcSMatt Macy /*
83eda14cbcSMatt Macy * Zero out the bookmark in case the one stored on disk
84eda14cbcSMatt Macy * is in an older, shorter format.
85eda14cbcSMatt Macy */
86da5137abSMartin Matuska memset(bmark_phys, 0, sizeof (*bmark_phys));
87eda14cbcSMatt Macy
88eda14cbcSMatt Macy err = zap_lookup_norm(mos, bmark_zapobj, shortname, sizeof (uint64_t),
89eda14cbcSMatt Macy sizeof (*bmark_phys) / sizeof (uint64_t), bmark_phys, mt, NULL, 0,
90eda14cbcSMatt Macy NULL);
91eda14cbcSMatt Macy
92eda14cbcSMatt Macy return (err == ENOENT ? SET_ERROR(ESRCH) : err);
93eda14cbcSMatt Macy }
94eda14cbcSMatt Macy
95eda14cbcSMatt Macy /*
96eda14cbcSMatt Macy * If later_ds is non-NULL, this will return EXDEV if the specified bookmark
97eda14cbcSMatt Macy * does not represents an earlier point in later_ds's timeline. However,
98eda14cbcSMatt Macy * bmp will still be filled in if we return EXDEV.
99eda14cbcSMatt Macy *
100eda14cbcSMatt Macy * Returns ENOENT if the dataset containing the bookmark does not exist.
101eda14cbcSMatt Macy * Returns ESRCH if the dataset exists but the bookmark was not found in it.
102eda14cbcSMatt Macy */
103eda14cbcSMatt Macy int
dsl_bookmark_lookup(dsl_pool_t * dp,const char * fullname,dsl_dataset_t * later_ds,zfs_bookmark_phys_t * bmp)104eda14cbcSMatt Macy dsl_bookmark_lookup(dsl_pool_t *dp, const char *fullname,
105eda14cbcSMatt Macy dsl_dataset_t *later_ds, zfs_bookmark_phys_t *bmp)
106eda14cbcSMatt Macy {
107eda14cbcSMatt Macy char *shortname;
108eda14cbcSMatt Macy dsl_dataset_t *ds;
109eda14cbcSMatt Macy int error;
110eda14cbcSMatt Macy
111eda14cbcSMatt Macy error = dsl_bookmark_hold_ds(dp, fullname, &ds, FTAG, &shortname);
112eda14cbcSMatt Macy if (error != 0)
113eda14cbcSMatt Macy return (error);
114eda14cbcSMatt Macy
115eda14cbcSMatt Macy error = dsl_bookmark_lookup_impl(ds, shortname, bmp);
116eda14cbcSMatt Macy if (error == 0 && later_ds != NULL) {
117eda14cbcSMatt Macy if (!dsl_dataset_is_before(later_ds, ds, bmp->zbm_creation_txg))
118eda14cbcSMatt Macy error = SET_ERROR(EXDEV);
119eda14cbcSMatt Macy }
120eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG);
121eda14cbcSMatt Macy return (error);
122eda14cbcSMatt Macy }
123eda14cbcSMatt Macy
124eda14cbcSMatt Macy /*
125eda14cbcSMatt Macy * Validates that
126eda14cbcSMatt Macy * - bmark is a full dataset path of a bookmark (bookmark_namecheck)
127eda14cbcSMatt Macy * - source is a full path of a snapshot or bookmark
128eda14cbcSMatt Macy * ({bookmark,snapshot}_namecheck)
129eda14cbcSMatt Macy *
130eda14cbcSMatt Macy * Returns 0 if valid, -1 otherwise.
131eda14cbcSMatt Macy */
132eda14cbcSMatt Macy static int
dsl_bookmark_create_nvl_validate_pair(const char * bmark,const char * source)133eda14cbcSMatt Macy dsl_bookmark_create_nvl_validate_pair(const char *bmark, const char *source)
134eda14cbcSMatt Macy {
135eda14cbcSMatt Macy if (bookmark_namecheck(bmark, NULL, NULL) != 0)
136eda14cbcSMatt Macy return (-1);
137eda14cbcSMatt Macy
138eda14cbcSMatt Macy int is_bmark, is_snap;
139eda14cbcSMatt Macy is_bmark = bookmark_namecheck(source, NULL, NULL) == 0;
140eda14cbcSMatt Macy is_snap = snapshot_namecheck(source, NULL, NULL) == 0;
141eda14cbcSMatt Macy if (!is_bmark && !is_snap)
142eda14cbcSMatt Macy return (-1);
143eda14cbcSMatt Macy
144eda14cbcSMatt Macy return (0);
145eda14cbcSMatt Macy }
146eda14cbcSMatt Macy
147eda14cbcSMatt Macy /*
148eda14cbcSMatt Macy * Check that the given nvlist corresponds to the following schema:
149eda14cbcSMatt Macy * { newbookmark -> source, ... }
150eda14cbcSMatt Macy * where
151eda14cbcSMatt Macy * - each pair passes dsl_bookmark_create_nvl_validate_pair
152eda14cbcSMatt Macy * - all newbookmarks are in the same pool
153eda14cbcSMatt Macy * - all newbookmarks have unique names
154eda14cbcSMatt Macy *
155eda14cbcSMatt Macy * Note that this function is only validates above schema. Callers must ensure
156eda14cbcSMatt Macy * that the bookmarks can be created, e.g. that sources exist.
157eda14cbcSMatt Macy *
158eda14cbcSMatt Macy * Returns 0 if the nvlist adheres to above schema.
159eda14cbcSMatt Macy * Returns -1 if it doesn't.
160eda14cbcSMatt Macy */
161eda14cbcSMatt Macy int
dsl_bookmark_create_nvl_validate(nvlist_t * bmarks)162eda14cbcSMatt Macy dsl_bookmark_create_nvl_validate(nvlist_t *bmarks)
163eda14cbcSMatt Macy {
1642a58b312SMartin Matuska const char *first = NULL;
165da5137abSMartin Matuska size_t first_len = 0;
166eda14cbcSMatt Macy
167eda14cbcSMatt Macy for (nvpair_t *pair = nvlist_next_nvpair(bmarks, NULL);
168eda14cbcSMatt Macy pair != NULL; pair = nvlist_next_nvpair(bmarks, pair)) {
169eda14cbcSMatt Macy
1702a58b312SMartin Matuska const char *bmark = nvpair_name(pair);
1712a58b312SMartin Matuska const char *source;
172eda14cbcSMatt Macy
173eda14cbcSMatt Macy /* list structure: values must be snapshots XOR bookmarks */
174eda14cbcSMatt Macy if (nvpair_value_string(pair, &source) != 0)
175eda14cbcSMatt Macy return (-1);
176eda14cbcSMatt Macy if (dsl_bookmark_create_nvl_validate_pair(bmark, source) != 0)
177eda14cbcSMatt Macy return (-1);
178eda14cbcSMatt Macy
179eda14cbcSMatt Macy /* same pool check */
180eda14cbcSMatt Macy if (first == NULL) {
1812a58b312SMartin Matuska const char *cp = strpbrk(bmark, "/#");
182eda14cbcSMatt Macy if (cp == NULL)
183eda14cbcSMatt Macy return (-1);
184eda14cbcSMatt Macy first = bmark;
185eda14cbcSMatt Macy first_len = cp - bmark;
186eda14cbcSMatt Macy }
187eda14cbcSMatt Macy if (strncmp(first, bmark, first_len) != 0)
188eda14cbcSMatt Macy return (-1);
189eda14cbcSMatt Macy switch (*(bmark + first_len)) {
190eda14cbcSMatt Macy case '/': /* fallthrough */
191eda14cbcSMatt Macy case '#':
192eda14cbcSMatt Macy break;
193eda14cbcSMatt Macy default:
194eda14cbcSMatt Macy return (-1);
195eda14cbcSMatt Macy }
196eda14cbcSMatt Macy
197eda14cbcSMatt Macy /* unique newbookmark names; todo: O(n^2) */
198eda14cbcSMatt Macy for (nvpair_t *pair2 = nvlist_next_nvpair(bmarks, pair);
199eda14cbcSMatt Macy pair2 != NULL; pair2 = nvlist_next_nvpair(bmarks, pair2)) {
200eda14cbcSMatt Macy if (strcmp(nvpair_name(pair), nvpair_name(pair2)) == 0)
201eda14cbcSMatt Macy return (-1);
202eda14cbcSMatt Macy }
203eda14cbcSMatt Macy
204eda14cbcSMatt Macy }
205eda14cbcSMatt Macy return (0);
206eda14cbcSMatt Macy }
207eda14cbcSMatt Macy
208eda14cbcSMatt Macy /*
209eda14cbcSMatt Macy * expects that newbm and source have been validated using
210eda14cbcSMatt Macy * dsl_bookmark_create_nvl_validate_pair
211eda14cbcSMatt Macy */
212eda14cbcSMatt Macy static int
dsl_bookmark_create_check_impl(dsl_pool_t * dp,const char * newbm,const char * source)213eda14cbcSMatt Macy dsl_bookmark_create_check_impl(dsl_pool_t *dp,
214eda14cbcSMatt Macy const char *newbm, const char *source)
215eda14cbcSMatt Macy {
216eda14cbcSMatt Macy ASSERT0(dsl_bookmark_create_nvl_validate_pair(newbm, source));
217eda14cbcSMatt Macy /* defer source namecheck until we know it's a snapshot or bookmark */
218eda14cbcSMatt Macy
219eda14cbcSMatt Macy int error;
220eda14cbcSMatt Macy dsl_dataset_t *newbm_ds;
221eda14cbcSMatt Macy char *newbm_short;
222eda14cbcSMatt Macy zfs_bookmark_phys_t bmark_phys;
223eda14cbcSMatt Macy
224eda14cbcSMatt Macy error = dsl_bookmark_hold_ds(dp, newbm, &newbm_ds, FTAG, &newbm_short);
225eda14cbcSMatt Macy if (error != 0)
226eda14cbcSMatt Macy return (error);
227eda14cbcSMatt Macy
228eda14cbcSMatt Macy /* Verify that the new bookmark does not already exist */
229eda14cbcSMatt Macy error = dsl_bookmark_lookup_impl(newbm_ds, newbm_short, &bmark_phys);
230eda14cbcSMatt Macy switch (error) {
231eda14cbcSMatt Macy case ESRCH:
232eda14cbcSMatt Macy /* happy path: new bmark doesn't exist, proceed after switch */
233eda14cbcSMatt Macy break;
234eda14cbcSMatt Macy case 0:
235eda14cbcSMatt Macy error = SET_ERROR(EEXIST);
236eda14cbcSMatt Macy goto eholdnewbmds;
237eda14cbcSMatt Macy default:
23816038816SMartin Matuska /* dsl_bookmark_lookup_impl already did SET_ERROR */
239eda14cbcSMatt Macy goto eholdnewbmds;
240eda14cbcSMatt Macy }
241eda14cbcSMatt Macy
242eda14cbcSMatt Macy /* error is retval of the following if-cascade */
243eda14cbcSMatt Macy if (strchr(source, '@') != NULL) {
244eda14cbcSMatt Macy dsl_dataset_t *source_snap_ds;
245eda14cbcSMatt Macy ASSERT3S(snapshot_namecheck(source, NULL, NULL), ==, 0);
246eda14cbcSMatt Macy error = dsl_dataset_hold(dp, source, FTAG, &source_snap_ds);
247eda14cbcSMatt Macy if (error == 0) {
248eda14cbcSMatt Macy VERIFY(source_snap_ds->ds_is_snapshot);
249eda14cbcSMatt Macy /*
250eda14cbcSMatt Macy * Verify that source snapshot is an earlier point in
251eda14cbcSMatt Macy * newbm_ds's timeline (source may be newbm_ds's origin)
252eda14cbcSMatt Macy */
253eda14cbcSMatt Macy if (!dsl_dataset_is_before(newbm_ds, source_snap_ds, 0))
254eda14cbcSMatt Macy error = SET_ERROR(
255eda14cbcSMatt Macy ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR);
256eda14cbcSMatt Macy dsl_dataset_rele(source_snap_ds, FTAG);
257eda14cbcSMatt Macy }
258eda14cbcSMatt Macy } else if (strchr(source, '#') != NULL) {
259eda14cbcSMatt Macy zfs_bookmark_phys_t source_phys;
260eda14cbcSMatt Macy ASSERT3S(bookmark_namecheck(source, NULL, NULL), ==, 0);
261eda14cbcSMatt Macy /*
262eda14cbcSMatt Macy * Source must exists and be an earlier point in newbm_ds's
263eda14cbcSMatt Macy * timeline (newbm_ds's origin may be a snap of source's ds)
264eda14cbcSMatt Macy */
265eda14cbcSMatt Macy error = dsl_bookmark_lookup(dp, source, newbm_ds, &source_phys);
266eda14cbcSMatt Macy switch (error) {
267eda14cbcSMatt Macy case 0:
268eda14cbcSMatt Macy break; /* happy path */
269eda14cbcSMatt Macy case EXDEV:
270eda14cbcSMatt Macy error = SET_ERROR(ZFS_ERR_BOOKMARK_SOURCE_NOT_ANCESTOR);
271eda14cbcSMatt Macy break;
272eda14cbcSMatt Macy default:
27316038816SMartin Matuska /* dsl_bookmark_lookup already did SET_ERROR */
274eda14cbcSMatt Macy break;
275eda14cbcSMatt Macy }
276eda14cbcSMatt Macy } else {
277eda14cbcSMatt Macy /*
278eda14cbcSMatt Macy * dsl_bookmark_create_nvl_validate validates that source is
279eda14cbcSMatt Macy * either snapshot or bookmark
280eda14cbcSMatt Macy */
281eda14cbcSMatt Macy panic("unreachable code: %s", source);
282eda14cbcSMatt Macy }
283eda14cbcSMatt Macy
284eda14cbcSMatt Macy eholdnewbmds:
285eda14cbcSMatt Macy dsl_dataset_rele(newbm_ds, FTAG);
286eda14cbcSMatt Macy return (error);
287eda14cbcSMatt Macy }
288eda14cbcSMatt Macy
289eda14cbcSMatt Macy int
dsl_bookmark_create_check(void * arg,dmu_tx_t * tx)290eda14cbcSMatt Macy dsl_bookmark_create_check(void *arg, dmu_tx_t *tx)
291eda14cbcSMatt Macy {
292eda14cbcSMatt Macy dsl_bookmark_create_arg_t *dbca = arg;
293eda14cbcSMatt Macy int rv = 0;
294eda14cbcSMatt Macy int schema_err = 0;
295eda14cbcSMatt Macy ASSERT3P(dbca, !=, NULL);
296eda14cbcSMatt Macy ASSERT3P(dbca->dbca_bmarks, !=, NULL);
297eda14cbcSMatt Macy /* dbca->dbca_errors is allowed to be NULL */
298eda14cbcSMatt Macy
299eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx);
300eda14cbcSMatt Macy
301eda14cbcSMatt Macy if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARKS))
302eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP));
303eda14cbcSMatt Macy
304eda14cbcSMatt Macy if (dsl_bookmark_create_nvl_validate(dbca->dbca_bmarks) != 0)
305eda14cbcSMatt Macy rv = schema_err = SET_ERROR(EINVAL);
306eda14cbcSMatt Macy
307eda14cbcSMatt Macy for (nvpair_t *pair = nvlist_next_nvpair(dbca->dbca_bmarks, NULL);
308eda14cbcSMatt Macy pair != NULL; pair = nvlist_next_nvpair(dbca->dbca_bmarks, pair)) {
3092a58b312SMartin Matuska const char *new = nvpair_name(pair);
310eda14cbcSMatt Macy
311eda14cbcSMatt Macy int error = schema_err;
312eda14cbcSMatt Macy if (error == 0) {
3132a58b312SMartin Matuska const char *source = fnvpair_value_string(pair);
314eda14cbcSMatt Macy error = dsl_bookmark_create_check_impl(dp, new, source);
315eda14cbcSMatt Macy if (error != 0)
316eda14cbcSMatt Macy error = SET_ERROR(error);
317eda14cbcSMatt Macy }
318eda14cbcSMatt Macy
319eda14cbcSMatt Macy if (error != 0) {
320eda14cbcSMatt Macy rv = error;
321eda14cbcSMatt Macy if (dbca->dbca_errors != NULL)
322eda14cbcSMatt Macy fnvlist_add_int32(dbca->dbca_errors,
323eda14cbcSMatt Macy new, error);
324eda14cbcSMatt Macy }
325eda14cbcSMatt Macy }
326eda14cbcSMatt Macy
327eda14cbcSMatt Macy return (rv);
328eda14cbcSMatt Macy }
329eda14cbcSMatt Macy
330eda14cbcSMatt Macy static dsl_bookmark_node_t *
dsl_bookmark_node_alloc(char * shortname)331eda14cbcSMatt Macy dsl_bookmark_node_alloc(char *shortname)
332eda14cbcSMatt Macy {
333eda14cbcSMatt Macy dsl_bookmark_node_t *dbn = kmem_alloc(sizeof (*dbn), KM_SLEEP);
334eda14cbcSMatt Macy dbn->dbn_name = spa_strdup(shortname);
335eda14cbcSMatt Macy dbn->dbn_dirty = B_FALSE;
336eda14cbcSMatt Macy mutex_init(&dbn->dbn_lock, NULL, MUTEX_DEFAULT, NULL);
337eda14cbcSMatt Macy return (dbn);
338eda14cbcSMatt Macy }
339eda14cbcSMatt Macy
340eda14cbcSMatt Macy /*
341eda14cbcSMatt Macy * Set the fields in the zfs_bookmark_phys_t based on the specified snapshot.
342eda14cbcSMatt Macy */
343eda14cbcSMatt Macy static void
dsl_bookmark_set_phys(zfs_bookmark_phys_t * zbm,dsl_dataset_t * snap)344eda14cbcSMatt Macy dsl_bookmark_set_phys(zfs_bookmark_phys_t *zbm, dsl_dataset_t *snap)
345eda14cbcSMatt Macy {
346eda14cbcSMatt Macy spa_t *spa = dsl_dataset_get_spa(snap);
347eda14cbcSMatt Macy objset_t *mos = spa_get_dsl(spa)->dp_meta_objset;
348eda14cbcSMatt Macy dsl_dataset_phys_t *dsp = dsl_dataset_phys(snap);
349a0b956f5SMartin Matuska
350a0b956f5SMartin Matuska memset(zbm, 0, sizeof (zfs_bookmark_phys_t));
351eda14cbcSMatt Macy zbm->zbm_guid = dsp->ds_guid;
352eda14cbcSMatt Macy zbm->zbm_creation_txg = dsp->ds_creation_txg;
353eda14cbcSMatt Macy zbm->zbm_creation_time = dsp->ds_creation_time;
354eda14cbcSMatt Macy zbm->zbm_redaction_obj = 0;
355eda14cbcSMatt Macy
356eda14cbcSMatt Macy /*
357eda14cbcSMatt Macy * If the dataset is encrypted create a larger bookmark to
358eda14cbcSMatt Macy * accommodate the IVset guid. The IVset guid was added
359eda14cbcSMatt Macy * after the encryption feature to prevent a problem with
360eda14cbcSMatt Macy * raw sends. If we encounter an encrypted dataset without
361eda14cbcSMatt Macy * an IVset guid we fall back to a normal bookmark.
362eda14cbcSMatt Macy */
363eda14cbcSMatt Macy if (snap->ds_dir->dd_crypto_obj != 0 &&
364eda14cbcSMatt Macy spa_feature_is_enabled(spa, SPA_FEATURE_BOOKMARK_V2)) {
365eda14cbcSMatt Macy (void) zap_lookup(mos, snap->ds_object,
366eda14cbcSMatt Macy DS_FIELD_IVSET_GUID, sizeof (uint64_t), 1,
367eda14cbcSMatt Macy &zbm->zbm_ivset_guid);
368eda14cbcSMatt Macy }
369eda14cbcSMatt Macy
370eda14cbcSMatt Macy if (spa_feature_is_enabled(spa, SPA_FEATURE_BOOKMARK_WRITTEN)) {
371eda14cbcSMatt Macy zbm->zbm_flags = ZBM_FLAG_SNAPSHOT_EXISTS | ZBM_FLAG_HAS_FBN;
372eda14cbcSMatt Macy zbm->zbm_referenced_bytes_refd = dsp->ds_referenced_bytes;
373eda14cbcSMatt Macy zbm->zbm_compressed_bytes_refd = dsp->ds_compressed_bytes;
374eda14cbcSMatt Macy zbm->zbm_uncompressed_bytes_refd = dsp->ds_uncompressed_bytes;
375eda14cbcSMatt Macy
376eda14cbcSMatt Macy dsl_dataset_t *nextds;
377eda14cbcSMatt Macy VERIFY0(dsl_dataset_hold_obj(snap->ds_dir->dd_pool,
378eda14cbcSMatt Macy dsp->ds_next_snap_obj, FTAG, &nextds));
379eda14cbcSMatt Macy dsl_deadlist_space(&nextds->ds_deadlist,
380eda14cbcSMatt Macy &zbm->zbm_referenced_freed_before_next_snap,
381eda14cbcSMatt Macy &zbm->zbm_compressed_freed_before_next_snap,
382eda14cbcSMatt Macy &zbm->zbm_uncompressed_freed_before_next_snap);
383eda14cbcSMatt Macy dsl_dataset_rele(nextds, FTAG);
384eda14cbcSMatt Macy }
385eda14cbcSMatt Macy }
386eda14cbcSMatt Macy
387eda14cbcSMatt Macy /*
388eda14cbcSMatt Macy * Add dsl_bookmark_node_t `dbn` to the given dataset and increment appropriate
389eda14cbcSMatt Macy * SPA feature counters.
390eda14cbcSMatt Macy */
391eda14cbcSMatt Macy void
dsl_bookmark_node_add(dsl_dataset_t * hds,dsl_bookmark_node_t * dbn,dmu_tx_t * tx)392eda14cbcSMatt Macy dsl_bookmark_node_add(dsl_dataset_t *hds, dsl_bookmark_node_t *dbn,
393eda14cbcSMatt Macy dmu_tx_t *tx)
394eda14cbcSMatt Macy {
395eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx);
396eda14cbcSMatt Macy objset_t *mos = dp->dp_meta_objset;
397eda14cbcSMatt Macy
398eda14cbcSMatt Macy if (hds->ds_bookmarks_obj == 0) {
399eda14cbcSMatt Macy hds->ds_bookmarks_obj = zap_create_norm(mos,
400eda14cbcSMatt Macy U8_TEXTPREP_TOUPPER, DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0,
401eda14cbcSMatt Macy tx);
402eda14cbcSMatt Macy spa_feature_incr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
403eda14cbcSMatt Macy
404eda14cbcSMatt Macy dsl_dataset_zapify(hds, tx);
405eda14cbcSMatt Macy VERIFY0(zap_add(mos, hds->ds_object,
406eda14cbcSMatt Macy DS_FIELD_BOOKMARK_NAMES,
407eda14cbcSMatt Macy sizeof (hds->ds_bookmarks_obj), 1,
408eda14cbcSMatt Macy &hds->ds_bookmarks_obj, tx));
409eda14cbcSMatt Macy }
410eda14cbcSMatt Macy
411eda14cbcSMatt Macy avl_add(&hds->ds_bookmarks, dbn);
412eda14cbcSMatt Macy
413eda14cbcSMatt Macy /*
414eda14cbcSMatt Macy * To maintain backwards compatibility with software that doesn't
415eda14cbcSMatt Macy * understand SPA_FEATURE_BOOKMARK_V2, we need to use the smallest
416eda14cbcSMatt Macy * possible bookmark size.
417eda14cbcSMatt Macy */
418eda14cbcSMatt Macy uint64_t bookmark_phys_size = BOOKMARK_PHYS_SIZE_V1;
419eda14cbcSMatt Macy if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARK_V2) &&
420eda14cbcSMatt Macy (dbn->dbn_phys.zbm_ivset_guid != 0 || dbn->dbn_phys.zbm_flags &
421eda14cbcSMatt Macy ZBM_FLAG_HAS_FBN || dbn->dbn_phys.zbm_redaction_obj != 0)) {
422eda14cbcSMatt Macy bookmark_phys_size = BOOKMARK_PHYS_SIZE_V2;
423eda14cbcSMatt Macy spa_feature_incr(dp->dp_spa, SPA_FEATURE_BOOKMARK_V2, tx);
424eda14cbcSMatt Macy }
425eda14cbcSMatt Macy
426da5137abSMartin Matuska zfs_bookmark_phys_t zero_phys = { 0 };
427da5137abSMartin Matuska ASSERT0(memcmp(((char *)&dbn->dbn_phys) + bookmark_phys_size,
428eda14cbcSMatt Macy &zero_phys, sizeof (zfs_bookmark_phys_t) - bookmark_phys_size));
429eda14cbcSMatt Macy
430eda14cbcSMatt Macy VERIFY0(zap_add(mos, hds->ds_bookmarks_obj, dbn->dbn_name,
431eda14cbcSMatt Macy sizeof (uint64_t), bookmark_phys_size / sizeof (uint64_t),
432eda14cbcSMatt Macy &dbn->dbn_phys, tx));
433eda14cbcSMatt Macy }
434eda14cbcSMatt Macy
435eda14cbcSMatt Macy /*
436eda14cbcSMatt Macy * If redaction_list is non-null, we create a redacted bookmark and redaction
437eda14cbcSMatt Macy * list, and store the object number of the redaction list in redact_obj.
438eda14cbcSMatt Macy */
439eda14cbcSMatt Macy static void
dsl_bookmark_create_sync_impl_snap(const char * bookmark,const char * snapshot,dmu_tx_t * tx,uint64_t num_redact_snaps,uint64_t * redact_snaps,const void * tag,redaction_list_t ** redaction_list)440eda14cbcSMatt Macy dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
441a0b956f5SMartin Matuska dmu_tx_t *tx, uint64_t num_redact_snaps, uint64_t *redact_snaps,
442a0b956f5SMartin Matuska const void *tag, redaction_list_t **redaction_list)
443eda14cbcSMatt Macy {
444eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx);
445eda14cbcSMatt Macy objset_t *mos = dp->dp_meta_objset;
446eda14cbcSMatt Macy dsl_dataset_t *snapds, *bmark_fs;
447eda14cbcSMatt Macy char *shortname;
448eda14cbcSMatt Macy boolean_t bookmark_redacted;
449eda14cbcSMatt Macy uint64_t *dsredactsnaps;
450eda14cbcSMatt Macy uint64_t dsnumsnaps;
451eda14cbcSMatt Macy
452eda14cbcSMatt Macy VERIFY0(dsl_dataset_hold(dp, snapshot, FTAG, &snapds));
453eda14cbcSMatt Macy VERIFY0(dsl_bookmark_hold_ds(dp, bookmark, &bmark_fs, FTAG,
454eda14cbcSMatt Macy &shortname));
455eda14cbcSMatt Macy
456eda14cbcSMatt Macy dsl_bookmark_node_t *dbn = dsl_bookmark_node_alloc(shortname);
457eda14cbcSMatt Macy dsl_bookmark_set_phys(&dbn->dbn_phys, snapds);
458eda14cbcSMatt Macy
459eda14cbcSMatt Macy bookmark_redacted = dsl_dataset_get_uint64_array_feature(snapds,
460eda14cbcSMatt Macy SPA_FEATURE_REDACTED_DATASETS, &dsnumsnaps, &dsredactsnaps);
461eda14cbcSMatt Macy if (redaction_list != NULL || bookmark_redacted) {
462eda14cbcSMatt Macy redaction_list_t *local_rl;
4632ad756a6SMartin Matuska boolean_t spill = B_FALSE;
464eda14cbcSMatt Macy if (bookmark_redacted) {
465eda14cbcSMatt Macy redact_snaps = dsredactsnaps;
466eda14cbcSMatt Macy num_redact_snaps = dsnumsnaps;
467eda14cbcSMatt Macy }
4682ad756a6SMartin Matuska int bonuslen = sizeof (redaction_list_phys_t) +
4692ad756a6SMartin Matuska num_redact_snaps * sizeof (uint64_t);
4702ad756a6SMartin Matuska if (bonuslen > dmu_bonus_max())
4712ad756a6SMartin Matuska spill = B_TRUE;
472eda14cbcSMatt Macy dbn->dbn_phys.zbm_redaction_obj = dmu_object_alloc(mos,
473eda14cbcSMatt Macy DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
4742ad756a6SMartin Matuska DMU_OTN_UINT64_METADATA, spill ? 0 : bonuslen, tx);
475eda14cbcSMatt Macy spa_feature_incr(dp->dp_spa,
476eda14cbcSMatt Macy SPA_FEATURE_REDACTION_BOOKMARKS, tx);
4772ad756a6SMartin Matuska if (spill) {
4782ad756a6SMartin Matuska spa_feature_incr(dp->dp_spa,
4792ad756a6SMartin Matuska SPA_FEATURE_REDACTION_LIST_SPILL, tx);
4802ad756a6SMartin Matuska }
481eda14cbcSMatt Macy
482eda14cbcSMatt Macy VERIFY0(dsl_redaction_list_hold_obj(dp,
483eda14cbcSMatt Macy dbn->dbn_phys.zbm_redaction_obj, tag, &local_rl));
484eda14cbcSMatt Macy dsl_redaction_list_long_hold(dp, local_rl, tag);
485eda14cbcSMatt Macy
4862ad756a6SMartin Matuska if (!spill) {
4872ad756a6SMartin Matuska ASSERT3U(local_rl->rl_bonus->db_size, >=, bonuslen);
4882ad756a6SMartin Matuska dmu_buf_will_dirty(local_rl->rl_bonus, tx);
4892ad756a6SMartin Matuska } else {
4902ad756a6SMartin Matuska dmu_buf_t *db;
4912ad756a6SMartin Matuska VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus,
4922ad756a6SMartin Matuska DB_RF_MUST_SUCCEED, FTAG, &db));
493188408daSMartin Matuska dmu_buf_will_fill(db, tx, B_FALSE);
4942ad756a6SMartin Matuska VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen,
4952ad756a6SMartin Matuska SPA_MINBLOCKSIZE), tx));
4962ad756a6SMartin Matuska local_rl->rl_phys = db->db_data;
4972ad756a6SMartin Matuska local_rl->rl_dbuf = db;
4982ad756a6SMartin Matuska }
499da5137abSMartin Matuska memcpy(local_rl->rl_phys->rlp_snaps, redact_snaps,
500eda14cbcSMatt Macy sizeof (uint64_t) * num_redact_snaps);
501eda14cbcSMatt Macy local_rl->rl_phys->rlp_num_snaps = num_redact_snaps;
502eda14cbcSMatt Macy if (bookmark_redacted) {
503eda14cbcSMatt Macy ASSERT3P(redaction_list, ==, NULL);
504eda14cbcSMatt Macy local_rl->rl_phys->rlp_last_blkid = UINT64_MAX;
505eda14cbcSMatt Macy local_rl->rl_phys->rlp_last_object = UINT64_MAX;
506eda14cbcSMatt Macy dsl_redaction_list_long_rele(local_rl, tag);
507eda14cbcSMatt Macy dsl_redaction_list_rele(local_rl, tag);
508eda14cbcSMatt Macy } else {
509eda14cbcSMatt Macy *redaction_list = local_rl;
510eda14cbcSMatt Macy }
511eda14cbcSMatt Macy }
512eda14cbcSMatt Macy
513eda14cbcSMatt Macy if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) {
514eda14cbcSMatt Macy spa_feature_incr(dp->dp_spa,
515eda14cbcSMatt Macy SPA_FEATURE_BOOKMARK_WRITTEN, tx);
516eda14cbcSMatt Macy }
517eda14cbcSMatt Macy
518eda14cbcSMatt Macy dsl_bookmark_node_add(bmark_fs, dbn, tx);
519eda14cbcSMatt Macy
520eda14cbcSMatt Macy spa_history_log_internal_ds(bmark_fs, "bookmark", tx,
521eda14cbcSMatt Macy "name=%s creation_txg=%llu target_snap=%llu redact_obj=%llu",
522eda14cbcSMatt Macy shortname, (longlong_t)dbn->dbn_phys.zbm_creation_txg,
523eda14cbcSMatt Macy (longlong_t)snapds->ds_object,
524eda14cbcSMatt Macy (longlong_t)dbn->dbn_phys.zbm_redaction_obj);
525eda14cbcSMatt Macy
526eda14cbcSMatt Macy dsl_dataset_rele(bmark_fs, FTAG);
527eda14cbcSMatt Macy dsl_dataset_rele(snapds, FTAG);
528eda14cbcSMatt Macy }
529eda14cbcSMatt Macy
530eda14cbcSMatt Macy
531eda14cbcSMatt Macy static void
dsl_bookmark_create_sync_impl_book(const char * new_name,const char * source_name,dmu_tx_t * tx)532eda14cbcSMatt Macy dsl_bookmark_create_sync_impl_book(
533eda14cbcSMatt Macy const char *new_name, const char *source_name, dmu_tx_t *tx)
534eda14cbcSMatt Macy {
535eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx);
536eda14cbcSMatt Macy dsl_dataset_t *bmark_fs_source, *bmark_fs_new;
537eda14cbcSMatt Macy char *source_shortname, *new_shortname;
538eda14cbcSMatt Macy zfs_bookmark_phys_t source_phys;
539eda14cbcSMatt Macy
540eda14cbcSMatt Macy VERIFY0(dsl_bookmark_hold_ds(dp, source_name, &bmark_fs_source, FTAG,
541eda14cbcSMatt Macy &source_shortname));
542eda14cbcSMatt Macy VERIFY0(dsl_bookmark_hold_ds(dp, new_name, &bmark_fs_new, FTAG,
543eda14cbcSMatt Macy &new_shortname));
544eda14cbcSMatt Macy
545eda14cbcSMatt Macy /*
546eda14cbcSMatt Macy * create a copy of the source bookmark by copying most of its members
547eda14cbcSMatt Macy *
548eda14cbcSMatt Macy * Caveat: bookmarking a redaction bookmark yields a normal bookmark
549eda14cbcSMatt Macy * -----------------------------------------------------------------
550eda14cbcSMatt Macy * Reasoning:
551eda14cbcSMatt Macy * - The zbm_redaction_obj would be referred to by both source and new
552eda14cbcSMatt Macy * bookmark, but would be destroyed once either source or new is
55316038816SMartin Matuska * destroyed, resulting in use-after-free of the referred object.
554eda14cbcSMatt Macy * - User expectation when issuing the `zfs bookmark` command is that
555eda14cbcSMatt Macy * a normal bookmark of the source is created
556eda14cbcSMatt Macy *
557eda14cbcSMatt Macy * Design Alternatives For Full Redaction Bookmark Copying:
558eda14cbcSMatt Macy * - reference-count the redaction object => would require on-disk
559eda14cbcSMatt Macy * format change for existing redaction objects
560eda14cbcSMatt Macy * - Copy the redaction object => cannot be done in syncing context
561eda14cbcSMatt Macy * because the redaction object might be too large
562eda14cbcSMatt Macy */
563eda14cbcSMatt Macy
564eda14cbcSMatt Macy VERIFY0(dsl_bookmark_lookup_impl(bmark_fs_source, source_shortname,
565eda14cbcSMatt Macy &source_phys));
566eda14cbcSMatt Macy dsl_bookmark_node_t *new_dbn = dsl_bookmark_node_alloc(new_shortname);
567eda14cbcSMatt Macy
568eda14cbcSMatt Macy memcpy(&new_dbn->dbn_phys, &source_phys, sizeof (source_phys));
569eda14cbcSMatt Macy new_dbn->dbn_phys.zbm_redaction_obj = 0;
570eda14cbcSMatt Macy
571eda14cbcSMatt Macy /* update feature counters */
572eda14cbcSMatt Macy if (new_dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) {
573eda14cbcSMatt Macy spa_feature_incr(dp->dp_spa,
574eda14cbcSMatt Macy SPA_FEATURE_BOOKMARK_WRITTEN, tx);
575eda14cbcSMatt Macy }
576eda14cbcSMatt Macy /* no need for redaction bookmark counter; nulled zbm_redaction_obj */
577eda14cbcSMatt Macy /* dsl_bookmark_node_add bumps bookmarks and v2-bookmarks counter */
578eda14cbcSMatt Macy
579eda14cbcSMatt Macy /*
580eda14cbcSMatt Macy * write new bookmark
581eda14cbcSMatt Macy *
582eda14cbcSMatt Macy * Note that dsl_bookmark_lookup_impl guarantees that, if source is a
583eda14cbcSMatt Macy * v1 bookmark, the v2-only fields are zeroed.
584eda14cbcSMatt Macy * And dsl_bookmark_node_add writes back a v1-sized bookmark if
585eda14cbcSMatt Macy * v2 bookmarks are disabled and/or v2-only fields are zeroed.
586eda14cbcSMatt Macy * => bookmark copying works on pre-bookmark-v2 pools
587eda14cbcSMatt Macy */
588eda14cbcSMatt Macy dsl_bookmark_node_add(bmark_fs_new, new_dbn, tx);
589eda14cbcSMatt Macy
590eda14cbcSMatt Macy spa_history_log_internal_ds(bmark_fs_source, "bookmark", tx,
591eda14cbcSMatt Macy "name=%s creation_txg=%llu source_guid=%llu",
592eda14cbcSMatt Macy new_shortname, (longlong_t)new_dbn->dbn_phys.zbm_creation_txg,
593eda14cbcSMatt Macy (longlong_t)source_phys.zbm_guid);
594eda14cbcSMatt Macy
595eda14cbcSMatt Macy dsl_dataset_rele(bmark_fs_source, FTAG);
596eda14cbcSMatt Macy dsl_dataset_rele(bmark_fs_new, FTAG);
597eda14cbcSMatt Macy }
598eda14cbcSMatt Macy
599eda14cbcSMatt Macy void
dsl_bookmark_create_sync(void * arg,dmu_tx_t * tx)600eda14cbcSMatt Macy dsl_bookmark_create_sync(void *arg, dmu_tx_t *tx)
601eda14cbcSMatt Macy {
602eda14cbcSMatt Macy dsl_bookmark_create_arg_t *dbca = arg;
603eda14cbcSMatt Macy
604eda14cbcSMatt Macy ASSERT(spa_feature_is_enabled(dmu_tx_pool(tx)->dp_spa,
605eda14cbcSMatt Macy SPA_FEATURE_BOOKMARKS));
606eda14cbcSMatt Macy
607eda14cbcSMatt Macy for (nvpair_t *pair = nvlist_next_nvpair(dbca->dbca_bmarks, NULL);
608eda14cbcSMatt Macy pair != NULL; pair = nvlist_next_nvpair(dbca->dbca_bmarks, pair)) {
609eda14cbcSMatt Macy
6102a58b312SMartin Matuska const char *new = nvpair_name(pair);
6112a58b312SMartin Matuska const char *source = fnvpair_value_string(pair);
612eda14cbcSMatt Macy
613eda14cbcSMatt Macy if (strchr(source, '@') != NULL) {
614eda14cbcSMatt Macy dsl_bookmark_create_sync_impl_snap(new, source, tx,
615eda14cbcSMatt Macy 0, NULL, NULL, NULL);
616eda14cbcSMatt Macy } else if (strchr(source, '#') != NULL) {
617eda14cbcSMatt Macy dsl_bookmark_create_sync_impl_book(new, source, tx);
618eda14cbcSMatt Macy } else {
619eda14cbcSMatt Macy panic("unreachable code");
620eda14cbcSMatt Macy }
621eda14cbcSMatt Macy
622eda14cbcSMatt Macy }
623eda14cbcSMatt Macy }
624eda14cbcSMatt Macy
625eda14cbcSMatt Macy /*
626eda14cbcSMatt Macy * The bookmarks must all be in the same pool.
627eda14cbcSMatt Macy */
628eda14cbcSMatt Macy int
dsl_bookmark_create(nvlist_t * bmarks,nvlist_t * errors)629eda14cbcSMatt Macy dsl_bookmark_create(nvlist_t *bmarks, nvlist_t *errors)
630eda14cbcSMatt Macy {
631eda14cbcSMatt Macy nvpair_t *pair;
632eda14cbcSMatt Macy dsl_bookmark_create_arg_t dbca;
633eda14cbcSMatt Macy
634eda14cbcSMatt Macy pair = nvlist_next_nvpair(bmarks, NULL);
635eda14cbcSMatt Macy if (pair == NULL)
636eda14cbcSMatt Macy return (0);
637eda14cbcSMatt Macy
638eda14cbcSMatt Macy dbca.dbca_bmarks = bmarks;
639eda14cbcSMatt Macy dbca.dbca_errors = errors;
640eda14cbcSMatt Macy
641eda14cbcSMatt Macy return (dsl_sync_task(nvpair_name(pair), dsl_bookmark_create_check,
642eda14cbcSMatt Macy dsl_bookmark_create_sync, &dbca,
643eda14cbcSMatt Macy fnvlist_num_pairs(bmarks), ZFS_SPACE_CHECK_NORMAL));
644eda14cbcSMatt Macy }
645eda14cbcSMatt Macy
646eda14cbcSMatt Macy static int
dsl_bookmark_create_redacted_check(void * arg,dmu_tx_t * tx)647eda14cbcSMatt Macy dsl_bookmark_create_redacted_check(void *arg, dmu_tx_t *tx)
648eda14cbcSMatt Macy {
649eda14cbcSMatt Macy dsl_bookmark_create_redacted_arg_t *dbcra = arg;
650eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx);
651eda14cbcSMatt Macy int rv = 0;
652eda14cbcSMatt Macy
653eda14cbcSMatt Macy if (!spa_feature_is_enabled(dp->dp_spa,
654eda14cbcSMatt Macy SPA_FEATURE_REDACTION_BOOKMARKS))
655eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP));
656eda14cbcSMatt Macy /*
6572ad756a6SMartin Matuska * If the list of redact snaps will not fit in the bonus buffer (or
6582ad756a6SMartin Matuska * spill block, with the REDACTION_LIST_SPILL feature) with the
6592ad756a6SMartin Matuska * furthest reached object and offset, fail.
660eda14cbcSMatt Macy */
6612ad756a6SMartin Matuska uint64_t snaplimit = ((spa_feature_is_enabled(dp->dp_spa,
6622ad756a6SMartin Matuska SPA_FEATURE_REDACTION_LIST_SPILL) ? spa_maxblocksize(dp->dp_spa) :
6632ad756a6SMartin Matuska dmu_bonus_max()) -
6642ad756a6SMartin Matuska sizeof (redaction_list_phys_t)) / sizeof (uint64_t);
6652ad756a6SMartin Matuska if (dbcra->dbcra_numsnaps > snaplimit)
666eda14cbcSMatt Macy return (SET_ERROR(E2BIG));
667eda14cbcSMatt Macy
668eda14cbcSMatt Macy if (dsl_bookmark_create_nvl_validate_pair(
669eda14cbcSMatt Macy dbcra->dbcra_bmark, dbcra->dbcra_snap) != 0)
670eda14cbcSMatt Macy return (SET_ERROR(EINVAL));
671eda14cbcSMatt Macy
672eda14cbcSMatt Macy rv = dsl_bookmark_create_check_impl(dp,
673eda14cbcSMatt Macy dbcra->dbcra_bmark, dbcra->dbcra_snap);
674eda14cbcSMatt Macy return (rv);
675eda14cbcSMatt Macy }
676eda14cbcSMatt Macy
677eda14cbcSMatt Macy static void
dsl_bookmark_create_redacted_sync(void * arg,dmu_tx_t * tx)678eda14cbcSMatt Macy dsl_bookmark_create_redacted_sync(void *arg, dmu_tx_t *tx)
679eda14cbcSMatt Macy {
680eda14cbcSMatt Macy dsl_bookmark_create_redacted_arg_t *dbcra = arg;
681eda14cbcSMatt Macy dsl_bookmark_create_sync_impl_snap(dbcra->dbcra_bmark,
682eda14cbcSMatt Macy dbcra->dbcra_snap, tx, dbcra->dbcra_numsnaps, dbcra->dbcra_snaps,
683eda14cbcSMatt Macy dbcra->dbcra_tag, dbcra->dbcra_rl);
684eda14cbcSMatt Macy }
685eda14cbcSMatt Macy
686eda14cbcSMatt Macy int
dsl_bookmark_create_redacted(const char * bookmark,const char * snapshot,uint64_t numsnaps,uint64_t * snapguids,const void * tag,redaction_list_t ** rl)687eda14cbcSMatt Macy dsl_bookmark_create_redacted(const char *bookmark, const char *snapshot,
688a0b956f5SMartin Matuska uint64_t numsnaps, uint64_t *snapguids, const void *tag,
689a0b956f5SMartin Matuska redaction_list_t **rl)
690eda14cbcSMatt Macy {
691eda14cbcSMatt Macy dsl_bookmark_create_redacted_arg_t dbcra;
692eda14cbcSMatt Macy
693eda14cbcSMatt Macy dbcra.dbcra_bmark = bookmark;
694eda14cbcSMatt Macy dbcra.dbcra_snap = snapshot;
695eda14cbcSMatt Macy dbcra.dbcra_rl = rl;
696eda14cbcSMatt Macy dbcra.dbcra_numsnaps = numsnaps;
697eda14cbcSMatt Macy dbcra.dbcra_snaps = snapguids;
698eda14cbcSMatt Macy dbcra.dbcra_tag = tag;
699eda14cbcSMatt Macy
700eda14cbcSMatt Macy return (dsl_sync_task(bookmark, dsl_bookmark_create_redacted_check,
701eda14cbcSMatt Macy dsl_bookmark_create_redacted_sync, &dbcra, 5,
702eda14cbcSMatt Macy ZFS_SPACE_CHECK_NORMAL));
703eda14cbcSMatt Macy }
704eda14cbcSMatt Macy
705eda14cbcSMatt Macy /*
706eda14cbcSMatt Macy * Retrieve the list of properties given in the 'props' nvlist for a bookmark.
707eda14cbcSMatt Macy * If 'props' is NULL, retrieves all properties.
708eda14cbcSMatt Macy */
709eda14cbcSMatt Macy static void
dsl_bookmark_fetch_props(dsl_pool_t * dp,zfs_bookmark_phys_t * bmark_phys,nvlist_t * props,nvlist_t * out_props)710eda14cbcSMatt Macy dsl_bookmark_fetch_props(dsl_pool_t *dp, zfs_bookmark_phys_t *bmark_phys,
711eda14cbcSMatt Macy nvlist_t *props, nvlist_t *out_props)
712eda14cbcSMatt Macy {
713eda14cbcSMatt Macy ASSERT3P(dp, !=, NULL);
714eda14cbcSMatt Macy ASSERT3P(bmark_phys, !=, NULL);
715eda14cbcSMatt Macy ASSERT3P(out_props, !=, NULL);
716eda14cbcSMatt Macy ASSERT(RRW_LOCK_HELD(&dp->dp_config_rwlock));
717eda14cbcSMatt Macy
718eda14cbcSMatt Macy if (props == NULL || nvlist_exists(props,
719eda14cbcSMatt Macy zfs_prop_to_name(ZFS_PROP_GUID))) {
720eda14cbcSMatt Macy dsl_prop_nvlist_add_uint64(out_props,
721eda14cbcSMatt Macy ZFS_PROP_GUID, bmark_phys->zbm_guid);
722eda14cbcSMatt Macy }
723eda14cbcSMatt Macy if (props == NULL || nvlist_exists(props,
724eda14cbcSMatt Macy zfs_prop_to_name(ZFS_PROP_CREATETXG))) {
725eda14cbcSMatt Macy dsl_prop_nvlist_add_uint64(out_props,
726eda14cbcSMatt Macy ZFS_PROP_CREATETXG, bmark_phys->zbm_creation_txg);
727eda14cbcSMatt Macy }
728eda14cbcSMatt Macy if (props == NULL || nvlist_exists(props,
729eda14cbcSMatt Macy zfs_prop_to_name(ZFS_PROP_CREATION))) {
730eda14cbcSMatt Macy dsl_prop_nvlist_add_uint64(out_props,
731eda14cbcSMatt Macy ZFS_PROP_CREATION, bmark_phys->zbm_creation_time);
732eda14cbcSMatt Macy }
733eda14cbcSMatt Macy if (props == NULL || nvlist_exists(props,
734eda14cbcSMatt Macy zfs_prop_to_name(ZFS_PROP_IVSET_GUID))) {
735eda14cbcSMatt Macy dsl_prop_nvlist_add_uint64(out_props,
736eda14cbcSMatt Macy ZFS_PROP_IVSET_GUID, bmark_phys->zbm_ivset_guid);
737eda14cbcSMatt Macy }
738eda14cbcSMatt Macy if (bmark_phys->zbm_flags & ZBM_FLAG_HAS_FBN) {
739eda14cbcSMatt Macy if (props == NULL || nvlist_exists(props,
740eda14cbcSMatt Macy zfs_prop_to_name(ZFS_PROP_REFERENCED))) {
741eda14cbcSMatt Macy dsl_prop_nvlist_add_uint64(out_props,
742eda14cbcSMatt Macy ZFS_PROP_REFERENCED,
743eda14cbcSMatt Macy bmark_phys->zbm_referenced_bytes_refd);
744eda14cbcSMatt Macy }
745eda14cbcSMatt Macy if (props == NULL || nvlist_exists(props,
746eda14cbcSMatt Macy zfs_prop_to_name(ZFS_PROP_LOGICALREFERENCED))) {
747eda14cbcSMatt Macy dsl_prop_nvlist_add_uint64(out_props,
748eda14cbcSMatt Macy ZFS_PROP_LOGICALREFERENCED,
749eda14cbcSMatt Macy bmark_phys->zbm_uncompressed_bytes_refd);
750eda14cbcSMatt Macy }
751eda14cbcSMatt Macy if (props == NULL || nvlist_exists(props,
752eda14cbcSMatt Macy zfs_prop_to_name(ZFS_PROP_REFRATIO))) {
753eda14cbcSMatt Macy uint64_t ratio =
754eda14cbcSMatt Macy bmark_phys->zbm_compressed_bytes_refd == 0 ? 100 :
755eda14cbcSMatt Macy bmark_phys->zbm_uncompressed_bytes_refd * 100 /
756eda14cbcSMatt Macy bmark_phys->zbm_compressed_bytes_refd;
757eda14cbcSMatt Macy dsl_prop_nvlist_add_uint64(out_props,
758eda14cbcSMatt Macy ZFS_PROP_REFRATIO, ratio);
759eda14cbcSMatt Macy }
760eda14cbcSMatt Macy }
761eda14cbcSMatt Macy
762eda14cbcSMatt Macy if ((props == NULL || nvlist_exists(props, "redact_snaps") ||
763eda14cbcSMatt Macy nvlist_exists(props, "redact_complete")) &&
764eda14cbcSMatt Macy bmark_phys->zbm_redaction_obj != 0) {
765eda14cbcSMatt Macy redaction_list_t *rl;
766eda14cbcSMatt Macy int err = dsl_redaction_list_hold_obj(dp,
767eda14cbcSMatt Macy bmark_phys->zbm_redaction_obj, FTAG, &rl);
768eda14cbcSMatt Macy if (err == 0) {
769eda14cbcSMatt Macy if (nvlist_exists(props, "redact_snaps")) {
770eda14cbcSMatt Macy nvlist_t *nvl;
771eda14cbcSMatt Macy nvl = fnvlist_alloc();
772eda14cbcSMatt Macy fnvlist_add_uint64_array(nvl, ZPROP_VALUE,
773eda14cbcSMatt Macy rl->rl_phys->rlp_snaps,
774eda14cbcSMatt Macy rl->rl_phys->rlp_num_snaps);
775eda14cbcSMatt Macy fnvlist_add_nvlist(out_props, "redact_snaps",
776eda14cbcSMatt Macy nvl);
777eda14cbcSMatt Macy nvlist_free(nvl);
778eda14cbcSMatt Macy }
779eda14cbcSMatt Macy if (nvlist_exists(props, "redact_complete")) {
780eda14cbcSMatt Macy nvlist_t *nvl;
781eda14cbcSMatt Macy nvl = fnvlist_alloc();
782eda14cbcSMatt Macy fnvlist_add_boolean_value(nvl, ZPROP_VALUE,
783eda14cbcSMatt Macy rl->rl_phys->rlp_last_blkid == UINT64_MAX &&
784eda14cbcSMatt Macy rl->rl_phys->rlp_last_object == UINT64_MAX);
785eda14cbcSMatt Macy fnvlist_add_nvlist(out_props, "redact_complete",
786eda14cbcSMatt Macy nvl);
787eda14cbcSMatt Macy nvlist_free(nvl);
788eda14cbcSMatt Macy }
789eda14cbcSMatt Macy dsl_redaction_list_rele(rl, FTAG);
790eda14cbcSMatt Macy }
791eda14cbcSMatt Macy }
792eda14cbcSMatt Macy }
793eda14cbcSMatt Macy
794eda14cbcSMatt Macy int
dsl_get_bookmarks_impl(dsl_dataset_t * ds,nvlist_t * props,nvlist_t * outnvl)795eda14cbcSMatt Macy dsl_get_bookmarks_impl(dsl_dataset_t *ds, nvlist_t *props, nvlist_t *outnvl)
796eda14cbcSMatt Macy {
797eda14cbcSMatt Macy dsl_pool_t *dp = ds->ds_dir->dd_pool;
798eda14cbcSMatt Macy
799eda14cbcSMatt Macy ASSERT(dsl_pool_config_held(dp));
800eda14cbcSMatt Macy
801eda14cbcSMatt Macy if (dsl_dataset_is_snapshot(ds))
802eda14cbcSMatt Macy return (SET_ERROR(EINVAL));
803eda14cbcSMatt Macy
804eda14cbcSMatt Macy for (dsl_bookmark_node_t *dbn = avl_first(&ds->ds_bookmarks);
805eda14cbcSMatt Macy dbn != NULL; dbn = AVL_NEXT(&ds->ds_bookmarks, dbn)) {
806eda14cbcSMatt Macy nvlist_t *out_props = fnvlist_alloc();
807eda14cbcSMatt Macy
808eda14cbcSMatt Macy dsl_bookmark_fetch_props(dp, &dbn->dbn_phys, props, out_props);
809eda14cbcSMatt Macy
810eda14cbcSMatt Macy fnvlist_add_nvlist(outnvl, dbn->dbn_name, out_props);
811eda14cbcSMatt Macy fnvlist_free(out_props);
812eda14cbcSMatt Macy }
813eda14cbcSMatt Macy return (0);
814eda14cbcSMatt Macy }
815eda14cbcSMatt Macy
816eda14cbcSMatt Macy /*
817eda14cbcSMatt Macy * Comparison func for ds_bookmarks AVL tree. We sort the bookmarks by
818eda14cbcSMatt Macy * their TXG, then by their FBN-ness. The "FBN-ness" component ensures
819eda14cbcSMatt Macy * that all bookmarks at the same TXG that HAS_FBN are adjacent, which
820eda14cbcSMatt Macy * dsl_bookmark_destroy_sync_impl() depends on. Note that there may be
821eda14cbcSMatt Macy * multiple bookmarks at the same TXG (with the same FBN-ness). In this
822eda14cbcSMatt Macy * case we differentiate them by an arbitrary metric (in this case,
823eda14cbcSMatt Macy * their names).
824eda14cbcSMatt Macy */
825eda14cbcSMatt Macy static int
dsl_bookmark_compare(const void * l,const void * r)826eda14cbcSMatt Macy dsl_bookmark_compare(const void *l, const void *r)
827eda14cbcSMatt Macy {
828eda14cbcSMatt Macy const dsl_bookmark_node_t *ldbn = l;
829eda14cbcSMatt Macy const dsl_bookmark_node_t *rdbn = r;
830eda14cbcSMatt Macy
831eda14cbcSMatt Macy int64_t cmp = TREE_CMP(ldbn->dbn_phys.zbm_creation_txg,
832eda14cbcSMatt Macy rdbn->dbn_phys.zbm_creation_txg);
833eda14cbcSMatt Macy if (likely(cmp))
834eda14cbcSMatt Macy return (cmp);
835eda14cbcSMatt Macy cmp = TREE_CMP((ldbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN),
836eda14cbcSMatt Macy (rdbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN));
837eda14cbcSMatt Macy if (likely(cmp))
838eda14cbcSMatt Macy return (cmp);
839eda14cbcSMatt Macy cmp = strcmp(ldbn->dbn_name, rdbn->dbn_name);
840eda14cbcSMatt Macy return (TREE_ISIGN(cmp));
841eda14cbcSMatt Macy }
842eda14cbcSMatt Macy
843eda14cbcSMatt Macy /*
844eda14cbcSMatt Macy * Cache this (head) dataset's bookmarks in the ds_bookmarks AVL tree.
845eda14cbcSMatt Macy */
846eda14cbcSMatt Macy int
dsl_bookmark_init_ds(dsl_dataset_t * ds)847eda14cbcSMatt Macy dsl_bookmark_init_ds(dsl_dataset_t *ds)
848eda14cbcSMatt Macy {
849eda14cbcSMatt Macy dsl_pool_t *dp = ds->ds_dir->dd_pool;
850eda14cbcSMatt Macy objset_t *mos = dp->dp_meta_objset;
851eda14cbcSMatt Macy
852eda14cbcSMatt Macy ASSERT(!ds->ds_is_snapshot);
853eda14cbcSMatt Macy
854eda14cbcSMatt Macy avl_create(&ds->ds_bookmarks, dsl_bookmark_compare,
855eda14cbcSMatt Macy sizeof (dsl_bookmark_node_t),
856eda14cbcSMatt Macy offsetof(dsl_bookmark_node_t, dbn_node));
857eda14cbcSMatt Macy
858eda14cbcSMatt Macy if (!dsl_dataset_is_zapified(ds))
859eda14cbcSMatt Macy return (0);
860eda14cbcSMatt Macy
861eda14cbcSMatt Macy int zaperr = zap_lookup(mos, ds->ds_object, DS_FIELD_BOOKMARK_NAMES,
862eda14cbcSMatt Macy sizeof (ds->ds_bookmarks_obj), 1, &ds->ds_bookmarks_obj);
863eda14cbcSMatt Macy if (zaperr == ENOENT)
864eda14cbcSMatt Macy return (0);
865eda14cbcSMatt Macy if (zaperr != 0)
866eda14cbcSMatt Macy return (zaperr);
867eda14cbcSMatt Macy
868eda14cbcSMatt Macy if (ds->ds_bookmarks_obj == 0)
869eda14cbcSMatt Macy return (0);
870eda14cbcSMatt Macy
871eda14cbcSMatt Macy int err = 0;
872eda14cbcSMatt Macy zap_cursor_t zc;
873*7a7741afSMartin Matuska zap_attribute_t *attr;
874eda14cbcSMatt Macy
875*7a7741afSMartin Matuska attr = zap_attribute_alloc();
876eda14cbcSMatt Macy for (zap_cursor_init(&zc, mos, ds->ds_bookmarks_obj);
877*7a7741afSMartin Matuska (err = zap_cursor_retrieve(&zc, attr)) == 0;
878eda14cbcSMatt Macy zap_cursor_advance(&zc)) {
879eda14cbcSMatt Macy dsl_bookmark_node_t *dbn =
880*7a7741afSMartin Matuska dsl_bookmark_node_alloc(attr->za_name);
881eda14cbcSMatt Macy
882eda14cbcSMatt Macy err = dsl_bookmark_lookup_impl(ds,
883eda14cbcSMatt Macy dbn->dbn_name, &dbn->dbn_phys);
884eda14cbcSMatt Macy ASSERT3U(err, !=, ENOENT);
885eda14cbcSMatt Macy if (err != 0) {
886eda14cbcSMatt Macy kmem_free(dbn, sizeof (*dbn));
887eda14cbcSMatt Macy break;
888eda14cbcSMatt Macy }
889eda14cbcSMatt Macy avl_add(&ds->ds_bookmarks, dbn);
890eda14cbcSMatt Macy }
891eda14cbcSMatt Macy zap_cursor_fini(&zc);
892*7a7741afSMartin Matuska zap_attribute_free(attr);
893eda14cbcSMatt Macy if (err == ENOENT)
894eda14cbcSMatt Macy err = 0;
895eda14cbcSMatt Macy return (err);
896eda14cbcSMatt Macy }
897eda14cbcSMatt Macy
898eda14cbcSMatt Macy void
dsl_bookmark_fini_ds(dsl_dataset_t * ds)899eda14cbcSMatt Macy dsl_bookmark_fini_ds(dsl_dataset_t *ds)
900eda14cbcSMatt Macy {
901eda14cbcSMatt Macy void *cookie = NULL;
902eda14cbcSMatt Macy dsl_bookmark_node_t *dbn;
903eda14cbcSMatt Macy
904eda14cbcSMatt Macy if (ds->ds_is_snapshot)
905eda14cbcSMatt Macy return;
906eda14cbcSMatt Macy
907eda14cbcSMatt Macy while ((dbn = avl_destroy_nodes(&ds->ds_bookmarks, &cookie)) != NULL) {
908eda14cbcSMatt Macy spa_strfree(dbn->dbn_name);
909eda14cbcSMatt Macy mutex_destroy(&dbn->dbn_lock);
910eda14cbcSMatt Macy kmem_free(dbn, sizeof (*dbn));
911eda14cbcSMatt Macy }
912eda14cbcSMatt Macy avl_destroy(&ds->ds_bookmarks);
913eda14cbcSMatt Macy }
914eda14cbcSMatt Macy
915eda14cbcSMatt Macy /*
916eda14cbcSMatt Macy * Retrieve the bookmarks that exist in the specified dataset, and the
917eda14cbcSMatt Macy * requested properties of each bookmark.
918eda14cbcSMatt Macy *
919eda14cbcSMatt Macy * The "props" nvlist specifies which properties are requested.
920eda14cbcSMatt Macy * See lzc_get_bookmarks() for the list of valid properties.
921eda14cbcSMatt Macy */
922eda14cbcSMatt Macy int
dsl_get_bookmarks(const char * dsname,nvlist_t * props,nvlist_t * outnvl)923eda14cbcSMatt Macy dsl_get_bookmarks(const char *dsname, nvlist_t *props, nvlist_t *outnvl)
924eda14cbcSMatt Macy {
925eda14cbcSMatt Macy dsl_pool_t *dp;
926eda14cbcSMatt Macy dsl_dataset_t *ds;
927eda14cbcSMatt Macy int err;
928eda14cbcSMatt Macy
929eda14cbcSMatt Macy err = dsl_pool_hold(dsname, FTAG, &dp);
930eda14cbcSMatt Macy if (err != 0)
931eda14cbcSMatt Macy return (err);
932eda14cbcSMatt Macy err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
933eda14cbcSMatt Macy if (err != 0) {
934eda14cbcSMatt Macy dsl_pool_rele(dp, FTAG);
935eda14cbcSMatt Macy return (err);
936eda14cbcSMatt Macy }
937eda14cbcSMatt Macy
938eda14cbcSMatt Macy err = dsl_get_bookmarks_impl(ds, props, outnvl);
939eda14cbcSMatt Macy
940eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG);
941eda14cbcSMatt Macy dsl_pool_rele(dp, FTAG);
942eda14cbcSMatt Macy return (err);
943eda14cbcSMatt Macy }
944eda14cbcSMatt Macy
945eda14cbcSMatt Macy /*
946eda14cbcSMatt Macy * Retrieve all properties for a single bookmark in the given dataset.
947eda14cbcSMatt Macy */
948eda14cbcSMatt Macy int
dsl_get_bookmark_props(const char * dsname,const char * bmname,nvlist_t * props)949eda14cbcSMatt Macy dsl_get_bookmark_props(const char *dsname, const char *bmname, nvlist_t *props)
950eda14cbcSMatt Macy {
951eda14cbcSMatt Macy dsl_pool_t *dp;
952eda14cbcSMatt Macy dsl_dataset_t *ds;
953eda14cbcSMatt Macy zfs_bookmark_phys_t bmark_phys = { 0 };
954eda14cbcSMatt Macy int err;
955eda14cbcSMatt Macy
956eda14cbcSMatt Macy err = dsl_pool_hold(dsname, FTAG, &dp);
957eda14cbcSMatt Macy if (err != 0)
958eda14cbcSMatt Macy return (err);
959eda14cbcSMatt Macy err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
960eda14cbcSMatt Macy if (err != 0) {
961eda14cbcSMatt Macy dsl_pool_rele(dp, FTAG);
962eda14cbcSMatt Macy return (err);
963eda14cbcSMatt Macy }
964eda14cbcSMatt Macy
965eda14cbcSMatt Macy err = dsl_bookmark_lookup_impl(ds, bmname, &bmark_phys);
966eda14cbcSMatt Macy if (err != 0)
967eda14cbcSMatt Macy goto out;
968eda14cbcSMatt Macy
969eda14cbcSMatt Macy dsl_bookmark_fetch_props(dp, &bmark_phys, NULL, props);
970eda14cbcSMatt Macy out:
971eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG);
972eda14cbcSMatt Macy dsl_pool_rele(dp, FTAG);
973eda14cbcSMatt Macy return (err);
974eda14cbcSMatt Macy }
975eda14cbcSMatt Macy
976eda14cbcSMatt Macy typedef struct dsl_bookmark_destroy_arg {
977eda14cbcSMatt Macy nvlist_t *dbda_bmarks;
978eda14cbcSMatt Macy nvlist_t *dbda_success;
979eda14cbcSMatt Macy nvlist_t *dbda_errors;
980eda14cbcSMatt Macy } dsl_bookmark_destroy_arg_t;
981eda14cbcSMatt Macy
982eda14cbcSMatt Macy static void
dsl_bookmark_destroy_sync_impl(dsl_dataset_t * ds,const char * name,dmu_tx_t * tx)983eda14cbcSMatt Macy dsl_bookmark_destroy_sync_impl(dsl_dataset_t *ds, const char *name,
984eda14cbcSMatt Macy dmu_tx_t *tx)
985eda14cbcSMatt Macy {
986eda14cbcSMatt Macy objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
987eda14cbcSMatt Macy uint64_t bmark_zapobj = ds->ds_bookmarks_obj;
988eda14cbcSMatt Macy matchtype_t mt = 0;
989eda14cbcSMatt Macy uint64_t int_size, num_ints;
990eda14cbcSMatt Macy /*
991eda14cbcSMatt Macy * 'search' must be zeroed so that dbn_flags (which is used in
992eda14cbcSMatt Macy * dsl_bookmark_compare()) will be zeroed even if the on-disk
993eda14cbcSMatt Macy * (in ZAP) bookmark is shorter than offsetof(dbn_flags).
994eda14cbcSMatt Macy */
995eda14cbcSMatt Macy dsl_bookmark_node_t search = { 0 };
996eda14cbcSMatt Macy char realname[ZFS_MAX_DATASET_NAME_LEN];
997eda14cbcSMatt Macy
998eda14cbcSMatt Macy /*
999eda14cbcSMatt Macy * Find the real name of this bookmark, which may be different
1000eda14cbcSMatt Macy * from the given name if the dataset is case-insensitive. Then
1001eda14cbcSMatt Macy * use the real name to find the node in the ds_bookmarks AVL tree.
1002eda14cbcSMatt Macy */
1003eda14cbcSMatt Macy
1004eda14cbcSMatt Macy if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
1005eda14cbcSMatt Macy mt = MT_NORMALIZE;
1006eda14cbcSMatt Macy
1007eda14cbcSMatt Macy VERIFY0(zap_length(mos, bmark_zapobj, name, &int_size, &num_ints));
1008eda14cbcSMatt Macy
1009eda14cbcSMatt Macy ASSERT3U(int_size, ==, sizeof (uint64_t));
1010eda14cbcSMatt Macy
1011eda14cbcSMatt Macy if (num_ints * int_size > BOOKMARK_PHYS_SIZE_V1) {
1012eda14cbcSMatt Macy spa_feature_decr(dmu_objset_spa(mos),
1013eda14cbcSMatt Macy SPA_FEATURE_BOOKMARK_V2, tx);
1014eda14cbcSMatt Macy }
1015eda14cbcSMatt Macy VERIFY0(zap_lookup_norm(mos, bmark_zapobj, name, sizeof (uint64_t),
1016eda14cbcSMatt Macy num_ints, &search.dbn_phys, mt, realname, sizeof (realname), NULL));
1017eda14cbcSMatt Macy
1018eda14cbcSMatt Macy search.dbn_name = realname;
1019eda14cbcSMatt Macy dsl_bookmark_node_t *dbn = avl_find(&ds->ds_bookmarks, &search, NULL);
1020eda14cbcSMatt Macy ASSERT(dbn != NULL);
1021eda14cbcSMatt Macy
1022eda14cbcSMatt Macy if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) {
1023eda14cbcSMatt Macy /*
1024eda14cbcSMatt Macy * If this bookmark HAS_FBN, and it is before the most
1025eda14cbcSMatt Macy * recent snapshot, then its TXG is a key in the head's
1026eda14cbcSMatt Macy * deadlist (and all clones' heads' deadlists). If this is
1027eda14cbcSMatt Macy * the last thing keeping the key (i.e. there are no more
1028eda14cbcSMatt Macy * bookmarks with HAS_FBN at this TXG, and there is no
1029eda14cbcSMatt Macy * snapshot at this TXG), then remove the key.
1030eda14cbcSMatt Macy *
1031eda14cbcSMatt Macy * Note that this algorithm depends on ds_bookmarks being
1032eda14cbcSMatt Macy * sorted such that all bookmarks at the same TXG with
1033eda14cbcSMatt Macy * HAS_FBN are adjacent (with no non-HAS_FBN bookmarks
1034eda14cbcSMatt Macy * at the same TXG in between them). If this were not
1035eda14cbcSMatt Macy * the case, we would need to examine *all* bookmarks
1036eda14cbcSMatt Macy * at this TXG, rather than just the adjacent ones.
1037eda14cbcSMatt Macy */
1038eda14cbcSMatt Macy
1039eda14cbcSMatt Macy dsl_bookmark_node_t *dbn_prev =
1040eda14cbcSMatt Macy AVL_PREV(&ds->ds_bookmarks, dbn);
1041eda14cbcSMatt Macy dsl_bookmark_node_t *dbn_next =
1042eda14cbcSMatt Macy AVL_NEXT(&ds->ds_bookmarks, dbn);
1043eda14cbcSMatt Macy
1044eda14cbcSMatt Macy boolean_t more_bookmarks_at_this_txg =
1045eda14cbcSMatt Macy (dbn_prev != NULL && dbn_prev->dbn_phys.zbm_creation_txg ==
1046eda14cbcSMatt Macy dbn->dbn_phys.zbm_creation_txg &&
1047eda14cbcSMatt Macy (dbn_prev->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)) ||
1048eda14cbcSMatt Macy (dbn_next != NULL && dbn_next->dbn_phys.zbm_creation_txg ==
1049eda14cbcSMatt Macy dbn->dbn_phys.zbm_creation_txg &&
1050eda14cbcSMatt Macy (dbn_next->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN));
1051eda14cbcSMatt Macy
1052eda14cbcSMatt Macy if (!(dbn->dbn_phys.zbm_flags & ZBM_FLAG_SNAPSHOT_EXISTS) &&
1053eda14cbcSMatt Macy !more_bookmarks_at_this_txg &&
1054eda14cbcSMatt Macy dbn->dbn_phys.zbm_creation_txg <
1055eda14cbcSMatt Macy dsl_dataset_phys(ds)->ds_prev_snap_txg) {
1056eda14cbcSMatt Macy dsl_dir_remove_clones_key(ds->ds_dir,
1057eda14cbcSMatt Macy dbn->dbn_phys.zbm_creation_txg, tx);
1058eda14cbcSMatt Macy dsl_deadlist_remove_key(&ds->ds_deadlist,
1059eda14cbcSMatt Macy dbn->dbn_phys.zbm_creation_txg, tx);
1060eda14cbcSMatt Macy }
1061eda14cbcSMatt Macy
1062eda14cbcSMatt Macy spa_feature_decr(dmu_objset_spa(mos),
1063eda14cbcSMatt Macy SPA_FEATURE_BOOKMARK_WRITTEN, tx);
1064eda14cbcSMatt Macy }
1065eda14cbcSMatt Macy
1066eda14cbcSMatt Macy if (dbn->dbn_phys.zbm_redaction_obj != 0) {
10672ad756a6SMartin Matuska dnode_t *rl;
10682ad756a6SMartin Matuska VERIFY0(dnode_hold(mos,
10692ad756a6SMartin Matuska dbn->dbn_phys.zbm_redaction_obj, FTAG, &rl));
10702ad756a6SMartin Matuska if (rl->dn_have_spill) {
10712ad756a6SMartin Matuska spa_feature_decr(dmu_objset_spa(mos),
10722ad756a6SMartin Matuska SPA_FEATURE_REDACTION_LIST_SPILL, tx);
10732ad756a6SMartin Matuska }
10742ad756a6SMartin Matuska dnode_rele(rl, FTAG);
1075eda14cbcSMatt Macy VERIFY0(dmu_object_free(mos,
1076eda14cbcSMatt Macy dbn->dbn_phys.zbm_redaction_obj, tx));
1077eda14cbcSMatt Macy spa_feature_decr(dmu_objset_spa(mos),
1078eda14cbcSMatt Macy SPA_FEATURE_REDACTION_BOOKMARKS, tx);
1079eda14cbcSMatt Macy }
1080eda14cbcSMatt Macy
1081eda14cbcSMatt Macy avl_remove(&ds->ds_bookmarks, dbn);
1082eda14cbcSMatt Macy spa_strfree(dbn->dbn_name);
1083eda14cbcSMatt Macy mutex_destroy(&dbn->dbn_lock);
1084eda14cbcSMatt Macy kmem_free(dbn, sizeof (*dbn));
1085eda14cbcSMatt Macy
1086eda14cbcSMatt Macy VERIFY0(zap_remove_norm(mos, bmark_zapobj, name, mt, tx));
1087eda14cbcSMatt Macy }
1088eda14cbcSMatt Macy
1089eda14cbcSMatt Macy static int
dsl_bookmark_destroy_check(void * arg,dmu_tx_t * tx)1090eda14cbcSMatt Macy dsl_bookmark_destroy_check(void *arg, dmu_tx_t *tx)
1091eda14cbcSMatt Macy {
1092eda14cbcSMatt Macy dsl_bookmark_destroy_arg_t *dbda = arg;
1093eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx);
1094eda14cbcSMatt Macy int rv = 0;
1095eda14cbcSMatt Macy
1096eda14cbcSMatt Macy ASSERT(nvlist_empty(dbda->dbda_success));
1097eda14cbcSMatt Macy ASSERT(nvlist_empty(dbda->dbda_errors));
1098eda14cbcSMatt Macy
1099eda14cbcSMatt Macy if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARKS))
1100eda14cbcSMatt Macy return (0);
1101eda14cbcSMatt Macy
1102eda14cbcSMatt Macy for (nvpair_t *pair = nvlist_next_nvpair(dbda->dbda_bmarks, NULL);
1103eda14cbcSMatt Macy pair != NULL; pair = nvlist_next_nvpair(dbda->dbda_bmarks, pair)) {
1104eda14cbcSMatt Macy const char *fullname = nvpair_name(pair);
1105eda14cbcSMatt Macy dsl_dataset_t *ds;
1106eda14cbcSMatt Macy zfs_bookmark_phys_t bm;
1107eda14cbcSMatt Macy int error;
1108eda14cbcSMatt Macy char *shortname;
1109eda14cbcSMatt Macy
1110eda14cbcSMatt Macy error = dsl_bookmark_hold_ds(dp, fullname, &ds,
1111eda14cbcSMatt Macy FTAG, &shortname);
1112eda14cbcSMatt Macy if (error == ENOENT) {
1113eda14cbcSMatt Macy /* ignore it; the bookmark is "already destroyed" */
1114eda14cbcSMatt Macy continue;
1115eda14cbcSMatt Macy }
1116eda14cbcSMatt Macy if (error == 0) {
1117eda14cbcSMatt Macy error = dsl_bookmark_lookup_impl(ds, shortname, &bm);
1118eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG);
1119eda14cbcSMatt Macy if (error == ESRCH) {
1120eda14cbcSMatt Macy /*
1121eda14cbcSMatt Macy * ignore it; the bookmark is
1122eda14cbcSMatt Macy * "already destroyed"
1123eda14cbcSMatt Macy */
1124eda14cbcSMatt Macy continue;
1125eda14cbcSMatt Macy }
1126eda14cbcSMatt Macy if (error == 0 && bm.zbm_redaction_obj != 0) {
1127eda14cbcSMatt Macy redaction_list_t *rl = NULL;
1128eda14cbcSMatt Macy error = dsl_redaction_list_hold_obj(tx->tx_pool,
1129eda14cbcSMatt Macy bm.zbm_redaction_obj, FTAG, &rl);
1130eda14cbcSMatt Macy if (error == ENOENT) {
1131eda14cbcSMatt Macy error = 0;
1132eda14cbcSMatt Macy } else if (error == 0 &&
1133eda14cbcSMatt Macy dsl_redaction_list_long_held(rl)) {
1134eda14cbcSMatt Macy error = SET_ERROR(EBUSY);
1135eda14cbcSMatt Macy }
1136eda14cbcSMatt Macy if (rl != NULL) {
1137eda14cbcSMatt Macy dsl_redaction_list_rele(rl, FTAG);
1138eda14cbcSMatt Macy }
1139eda14cbcSMatt Macy }
1140eda14cbcSMatt Macy }
1141eda14cbcSMatt Macy if (error == 0) {
1142eda14cbcSMatt Macy if (dmu_tx_is_syncing(tx)) {
1143eda14cbcSMatt Macy fnvlist_add_boolean(dbda->dbda_success,
1144eda14cbcSMatt Macy fullname);
1145eda14cbcSMatt Macy }
1146eda14cbcSMatt Macy } else {
1147eda14cbcSMatt Macy fnvlist_add_int32(dbda->dbda_errors, fullname, error);
1148eda14cbcSMatt Macy rv = error;
1149eda14cbcSMatt Macy }
1150eda14cbcSMatt Macy }
1151eda14cbcSMatt Macy return (rv);
1152eda14cbcSMatt Macy }
1153eda14cbcSMatt Macy
1154eda14cbcSMatt Macy static void
dsl_bookmark_destroy_sync(void * arg,dmu_tx_t * tx)1155eda14cbcSMatt Macy dsl_bookmark_destroy_sync(void *arg, dmu_tx_t *tx)
1156eda14cbcSMatt Macy {
1157eda14cbcSMatt Macy dsl_bookmark_destroy_arg_t *dbda = arg;
1158eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx);
1159eda14cbcSMatt Macy objset_t *mos = dp->dp_meta_objset;
1160eda14cbcSMatt Macy
1161eda14cbcSMatt Macy for (nvpair_t *pair = nvlist_next_nvpair(dbda->dbda_success, NULL);
1162eda14cbcSMatt Macy pair != NULL; pair = nvlist_next_nvpair(dbda->dbda_success, pair)) {
1163eda14cbcSMatt Macy dsl_dataset_t *ds;
1164eda14cbcSMatt Macy char *shortname;
1165eda14cbcSMatt Macy uint64_t zap_cnt;
1166eda14cbcSMatt Macy
1167eda14cbcSMatt Macy VERIFY0(dsl_bookmark_hold_ds(dp, nvpair_name(pair),
1168eda14cbcSMatt Macy &ds, FTAG, &shortname));
1169eda14cbcSMatt Macy dsl_bookmark_destroy_sync_impl(ds, shortname, tx);
1170eda14cbcSMatt Macy
1171eda14cbcSMatt Macy /*
1172eda14cbcSMatt Macy * If all of this dataset's bookmarks have been destroyed,
1173eda14cbcSMatt Macy * free the zap object and decrement the feature's use count.
1174eda14cbcSMatt Macy */
1175eda14cbcSMatt Macy VERIFY0(zap_count(mos, ds->ds_bookmarks_obj, &zap_cnt));
1176eda14cbcSMatt Macy if (zap_cnt == 0) {
1177eda14cbcSMatt Macy dmu_buf_will_dirty(ds->ds_dbuf, tx);
1178eda14cbcSMatt Macy VERIFY0(zap_destroy(mos, ds->ds_bookmarks_obj, tx));
1179eda14cbcSMatt Macy ds->ds_bookmarks_obj = 0;
1180eda14cbcSMatt Macy spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
1181eda14cbcSMatt Macy VERIFY0(zap_remove(mos, ds->ds_object,
1182eda14cbcSMatt Macy DS_FIELD_BOOKMARK_NAMES, tx));
1183eda14cbcSMatt Macy }
1184eda14cbcSMatt Macy
1185eda14cbcSMatt Macy spa_history_log_internal_ds(ds, "remove bookmark", tx,
1186eda14cbcSMatt Macy "name=%s", shortname);
1187eda14cbcSMatt Macy
1188eda14cbcSMatt Macy dsl_dataset_rele(ds, FTAG);
1189eda14cbcSMatt Macy }
1190eda14cbcSMatt Macy }
1191eda14cbcSMatt Macy
1192eda14cbcSMatt Macy /*
1193eda14cbcSMatt Macy * The bookmarks must all be in the same pool.
1194eda14cbcSMatt Macy */
1195eda14cbcSMatt Macy int
dsl_bookmark_destroy(nvlist_t * bmarks,nvlist_t * errors)1196eda14cbcSMatt Macy dsl_bookmark_destroy(nvlist_t *bmarks, nvlist_t *errors)
1197eda14cbcSMatt Macy {
1198eda14cbcSMatt Macy int rv;
1199eda14cbcSMatt Macy dsl_bookmark_destroy_arg_t dbda;
1200eda14cbcSMatt Macy nvpair_t *pair = nvlist_next_nvpair(bmarks, NULL);
1201eda14cbcSMatt Macy if (pair == NULL)
1202eda14cbcSMatt Macy return (0);
1203eda14cbcSMatt Macy
1204eda14cbcSMatt Macy dbda.dbda_bmarks = bmarks;
1205eda14cbcSMatt Macy dbda.dbda_errors = errors;
1206eda14cbcSMatt Macy dbda.dbda_success = fnvlist_alloc();
1207eda14cbcSMatt Macy
1208eda14cbcSMatt Macy rv = dsl_sync_task(nvpair_name(pair), dsl_bookmark_destroy_check,
1209eda14cbcSMatt Macy dsl_bookmark_destroy_sync, &dbda, fnvlist_num_pairs(bmarks),
1210eda14cbcSMatt Macy ZFS_SPACE_CHECK_RESERVED);
1211eda14cbcSMatt Macy fnvlist_free(dbda.dbda_success);
1212eda14cbcSMatt Macy return (rv);
1213eda14cbcSMatt Macy }
1214eda14cbcSMatt Macy
1215eda14cbcSMatt Macy /* Return B_TRUE if there are any long holds on this dataset. */
1216eda14cbcSMatt Macy boolean_t
dsl_redaction_list_long_held(redaction_list_t * rl)1217eda14cbcSMatt Macy dsl_redaction_list_long_held(redaction_list_t *rl)
1218eda14cbcSMatt Macy {
1219eda14cbcSMatt Macy return (!zfs_refcount_is_zero(&rl->rl_longholds));
1220eda14cbcSMatt Macy }
1221eda14cbcSMatt Macy
1222eda14cbcSMatt Macy void
dsl_redaction_list_long_hold(dsl_pool_t * dp,redaction_list_t * rl,const void * tag)1223a0b956f5SMartin Matuska dsl_redaction_list_long_hold(dsl_pool_t *dp, redaction_list_t *rl,
1224a0b956f5SMartin Matuska const void *tag)
1225eda14cbcSMatt Macy {
1226eda14cbcSMatt Macy ASSERT(dsl_pool_config_held(dp));
1227eda14cbcSMatt Macy (void) zfs_refcount_add(&rl->rl_longholds, tag);
1228eda14cbcSMatt Macy }
1229eda14cbcSMatt Macy
1230eda14cbcSMatt Macy void
dsl_redaction_list_long_rele(redaction_list_t * rl,const void * tag)1231a0b956f5SMartin Matuska dsl_redaction_list_long_rele(redaction_list_t *rl, const void *tag)
1232eda14cbcSMatt Macy {
1233eda14cbcSMatt Macy (void) zfs_refcount_remove(&rl->rl_longholds, tag);
1234eda14cbcSMatt Macy }
1235eda14cbcSMatt Macy
1236eda14cbcSMatt Macy static void
redaction_list_evict_sync(void * rlu)1237eda14cbcSMatt Macy redaction_list_evict_sync(void *rlu)
1238eda14cbcSMatt Macy {
1239eda14cbcSMatt Macy redaction_list_t *rl = rlu;
1240eda14cbcSMatt Macy zfs_refcount_destroy(&rl->rl_longholds);
1241eda14cbcSMatt Macy
1242eda14cbcSMatt Macy kmem_free(rl, sizeof (redaction_list_t));
1243eda14cbcSMatt Macy }
1244eda14cbcSMatt Macy
1245eda14cbcSMatt Macy void
dsl_redaction_list_rele(redaction_list_t * rl,const void * tag)1246a0b956f5SMartin Matuska dsl_redaction_list_rele(redaction_list_t *rl, const void *tag)
1247eda14cbcSMatt Macy {
12482ad756a6SMartin Matuska if (rl->rl_bonus != rl->rl_dbuf)
1249eda14cbcSMatt Macy dmu_buf_rele(rl->rl_dbuf, tag);
12502ad756a6SMartin Matuska dmu_buf_rele(rl->rl_bonus, tag);
1251eda14cbcSMatt Macy }
1252eda14cbcSMatt Macy
1253eda14cbcSMatt Macy int
dsl_redaction_list_hold_obj(dsl_pool_t * dp,uint64_t rlobj,const void * tag,redaction_list_t ** rlp)1254a0b956f5SMartin Matuska dsl_redaction_list_hold_obj(dsl_pool_t *dp, uint64_t rlobj, const void *tag,
1255eda14cbcSMatt Macy redaction_list_t **rlp)
1256eda14cbcSMatt Macy {
1257eda14cbcSMatt Macy objset_t *mos = dp->dp_meta_objset;
12582ad756a6SMartin Matuska dmu_buf_t *dbuf, *spill_dbuf;
1259eda14cbcSMatt Macy redaction_list_t *rl;
1260eda14cbcSMatt Macy int err;
1261eda14cbcSMatt Macy
1262eda14cbcSMatt Macy ASSERT(dsl_pool_config_held(dp));
1263eda14cbcSMatt Macy
1264eda14cbcSMatt Macy err = dmu_bonus_hold(mos, rlobj, tag, &dbuf);
1265eda14cbcSMatt Macy if (err != 0)
1266eda14cbcSMatt Macy return (err);
1267eda14cbcSMatt Macy
1268eda14cbcSMatt Macy rl = dmu_buf_get_user(dbuf);
1269eda14cbcSMatt Macy if (rl == NULL) {
1270eda14cbcSMatt Macy redaction_list_t *winner = NULL;
1271eda14cbcSMatt Macy
1272eda14cbcSMatt Macy rl = kmem_zalloc(sizeof (redaction_list_t), KM_SLEEP);
12732ad756a6SMartin Matuska rl->rl_bonus = dbuf;
12742ad756a6SMartin Matuska if (dmu_spill_hold_existing(dbuf, tag, &spill_dbuf) == 0) {
12752ad756a6SMartin Matuska rl->rl_dbuf = spill_dbuf;
12762ad756a6SMartin Matuska } else {
1277eda14cbcSMatt Macy rl->rl_dbuf = dbuf;
12782ad756a6SMartin Matuska }
1279eda14cbcSMatt Macy rl->rl_object = rlobj;
12802ad756a6SMartin Matuska rl->rl_phys = rl->rl_dbuf->db_data;
1281eda14cbcSMatt Macy rl->rl_mos = dp->dp_meta_objset;
1282eda14cbcSMatt Macy zfs_refcount_create(&rl->rl_longholds);
1283eda14cbcSMatt Macy dmu_buf_init_user(&rl->rl_dbu, redaction_list_evict_sync, NULL,
12842ad756a6SMartin Matuska &rl->rl_bonus);
1285eda14cbcSMatt Macy if ((winner = dmu_buf_set_user_ie(dbuf, &rl->rl_dbu)) != NULL) {
1286eda14cbcSMatt Macy kmem_free(rl, sizeof (*rl));
1287eda14cbcSMatt Macy rl = winner;
1288eda14cbcSMatt Macy }
1289eda14cbcSMatt Macy }
1290eda14cbcSMatt Macy *rlp = rl;
1291eda14cbcSMatt Macy return (0);
1292eda14cbcSMatt Macy }
1293eda14cbcSMatt Macy
1294eda14cbcSMatt Macy /*
1295eda14cbcSMatt Macy * Snapshot ds is being destroyed.
1296eda14cbcSMatt Macy *
1297eda14cbcSMatt Macy * Adjust the "freed_before_next" of any bookmarks between this snap
1298eda14cbcSMatt Macy * and the previous snapshot, because their "next snapshot" is changing.
1299eda14cbcSMatt Macy *
1300eda14cbcSMatt Macy * If there are any bookmarks with HAS_FBN at this snapshot, remove
1301eda14cbcSMatt Macy * their HAS_SNAP flag (note: there can be at most one snapshot of
1302eda14cbcSMatt Macy * each filesystem at a given txg), and return B_TRUE. In this case
1303eda14cbcSMatt Macy * the caller can not remove the key in the deadlist at this TXG, because
1304eda14cbcSMatt Macy * the HAS_FBN bookmarks require the key be there.
1305eda14cbcSMatt Macy *
1306eda14cbcSMatt Macy * Returns B_FALSE if there are no bookmarks with HAS_FBN at this
1307eda14cbcSMatt Macy * snapshot's TXG. In this case the caller can remove the key in the
1308eda14cbcSMatt Macy * deadlist at this TXG.
1309eda14cbcSMatt Macy */
1310eda14cbcSMatt Macy boolean_t
dsl_bookmark_ds_destroyed(dsl_dataset_t * ds,dmu_tx_t * tx)1311eda14cbcSMatt Macy dsl_bookmark_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx)
1312eda14cbcSMatt Macy {
1313eda14cbcSMatt Macy dsl_pool_t *dp = ds->ds_dir->dd_pool;
1314eda14cbcSMatt Macy
1315eda14cbcSMatt Macy dsl_dataset_t *head, *next;
1316eda14cbcSMatt Macy VERIFY0(dsl_dataset_hold_obj(dp,
1317eda14cbcSMatt Macy dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &head));
1318eda14cbcSMatt Macy VERIFY0(dsl_dataset_hold_obj(dp,
1319eda14cbcSMatt Macy dsl_dataset_phys(ds)->ds_next_snap_obj, FTAG, &next));
1320eda14cbcSMatt Macy
1321eda14cbcSMatt Macy /*
1322eda14cbcSMatt Macy * Find the first bookmark that HAS_FBN at or after the
1323eda14cbcSMatt Macy * previous snapshot.
1324eda14cbcSMatt Macy */
1325eda14cbcSMatt Macy dsl_bookmark_node_t search = { 0 };
1326eda14cbcSMatt Macy avl_index_t idx;
1327eda14cbcSMatt Macy search.dbn_phys.zbm_creation_txg =
1328eda14cbcSMatt Macy dsl_dataset_phys(ds)->ds_prev_snap_txg;
1329eda14cbcSMatt Macy search.dbn_phys.zbm_flags = ZBM_FLAG_HAS_FBN;
1330eda14cbcSMatt Macy /*
1331eda14cbcSMatt Macy * The empty-string name can't be in the AVL, and it compares
1332eda14cbcSMatt Macy * before any entries with this TXG.
1333eda14cbcSMatt Macy */
1334a0b956f5SMartin Matuska search.dbn_name = (char *)"";
1335eda14cbcSMatt Macy VERIFY3P(avl_find(&head->ds_bookmarks, &search, &idx), ==, NULL);
1336eda14cbcSMatt Macy dsl_bookmark_node_t *dbn =
1337eda14cbcSMatt Macy avl_nearest(&head->ds_bookmarks, idx, AVL_AFTER);
1338eda14cbcSMatt Macy
1339eda14cbcSMatt Macy /*
1340eda14cbcSMatt Macy * Iterate over all bookmarks that are at or after the previous
1341eda14cbcSMatt Macy * snapshot, and before this (being deleted) snapshot. Adjust
1342eda14cbcSMatt Macy * their FBN based on their new next snapshot.
1343eda14cbcSMatt Macy */
1344eda14cbcSMatt Macy for (; dbn != NULL && dbn->dbn_phys.zbm_creation_txg <
1345eda14cbcSMatt Macy dsl_dataset_phys(ds)->ds_creation_txg;
1346eda14cbcSMatt Macy dbn = AVL_NEXT(&head->ds_bookmarks, dbn)) {
1347eda14cbcSMatt Macy if (!(dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN))
1348eda14cbcSMatt Macy continue;
1349eda14cbcSMatt Macy /*
1350eda14cbcSMatt Macy * Increase our FBN by the amount of space that was live
1351eda14cbcSMatt Macy * (referenced) at the time of this bookmark (i.e.
1352eda14cbcSMatt Macy * birth <= zbm_creation_txg), and killed between this
1353eda14cbcSMatt Macy * (being deleted) snapshot and the next snapshot (i.e.
1354eda14cbcSMatt Macy * on the next snapshot's deadlist). (Space killed before
1355eda14cbcSMatt Macy * this are already on our FBN.)
1356eda14cbcSMatt Macy */
1357eda14cbcSMatt Macy uint64_t referenced, compressed, uncompressed;
1358eda14cbcSMatt Macy dsl_deadlist_space_range(&next->ds_deadlist,
1359eda14cbcSMatt Macy 0, dbn->dbn_phys.zbm_creation_txg,
1360eda14cbcSMatt Macy &referenced, &compressed, &uncompressed);
1361eda14cbcSMatt Macy dbn->dbn_phys.zbm_referenced_freed_before_next_snap +=
1362eda14cbcSMatt Macy referenced;
1363eda14cbcSMatt Macy dbn->dbn_phys.zbm_compressed_freed_before_next_snap +=
1364eda14cbcSMatt Macy compressed;
1365eda14cbcSMatt Macy dbn->dbn_phys.zbm_uncompressed_freed_before_next_snap +=
1366eda14cbcSMatt Macy uncompressed;
1367eda14cbcSMatt Macy VERIFY0(zap_update(dp->dp_meta_objset, head->ds_bookmarks_obj,
1368eda14cbcSMatt Macy dbn->dbn_name, sizeof (uint64_t),
1369eda14cbcSMatt Macy sizeof (zfs_bookmark_phys_t) / sizeof (uint64_t),
1370eda14cbcSMatt Macy &dbn->dbn_phys, tx));
1371eda14cbcSMatt Macy }
1372eda14cbcSMatt Macy dsl_dataset_rele(next, FTAG);
1373eda14cbcSMatt Macy
1374eda14cbcSMatt Macy /*
1375eda14cbcSMatt Macy * There may be several bookmarks at this txg (the TXG of the
1376eda14cbcSMatt Macy * snapshot being deleted). We need to clear the SNAPSHOT_EXISTS
1377eda14cbcSMatt Macy * flag on all of them, and return TRUE if there is at least 1
1378eda14cbcSMatt Macy * bookmark here with HAS_FBN (thus preventing the deadlist
1379eda14cbcSMatt Macy * key from being removed).
1380eda14cbcSMatt Macy */
1381eda14cbcSMatt Macy boolean_t rv = B_FALSE;
1382eda14cbcSMatt Macy for (; dbn != NULL && dbn->dbn_phys.zbm_creation_txg ==
1383eda14cbcSMatt Macy dsl_dataset_phys(ds)->ds_creation_txg;
1384eda14cbcSMatt Macy dbn = AVL_NEXT(&head->ds_bookmarks, dbn)) {
1385eda14cbcSMatt Macy if (!(dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)) {
1386eda14cbcSMatt Macy ASSERT(!(dbn->dbn_phys.zbm_flags &
1387eda14cbcSMatt Macy ZBM_FLAG_SNAPSHOT_EXISTS));
1388eda14cbcSMatt Macy continue;
1389eda14cbcSMatt Macy }
1390eda14cbcSMatt Macy ASSERT(dbn->dbn_phys.zbm_flags & ZBM_FLAG_SNAPSHOT_EXISTS);
1391eda14cbcSMatt Macy dbn->dbn_phys.zbm_flags &= ~ZBM_FLAG_SNAPSHOT_EXISTS;
1392eda14cbcSMatt Macy VERIFY0(zap_update(dp->dp_meta_objset, head->ds_bookmarks_obj,
1393eda14cbcSMatt Macy dbn->dbn_name, sizeof (uint64_t),
1394eda14cbcSMatt Macy sizeof (zfs_bookmark_phys_t) / sizeof (uint64_t),
1395eda14cbcSMatt Macy &dbn->dbn_phys, tx));
1396eda14cbcSMatt Macy rv = B_TRUE;
1397eda14cbcSMatt Macy }
1398eda14cbcSMatt Macy dsl_dataset_rele(head, FTAG);
1399eda14cbcSMatt Macy return (rv);
1400eda14cbcSMatt Macy }
1401eda14cbcSMatt Macy
1402eda14cbcSMatt Macy /*
1403eda14cbcSMatt Macy * A snapshot is being created of this (head) dataset.
1404eda14cbcSMatt Macy *
1405eda14cbcSMatt Macy * We don't keep keys in the deadlist for the most recent snapshot, or any
1406eda14cbcSMatt Macy * bookmarks at or after it, because there can't be any blocks on the
1407eda14cbcSMatt Macy * deadlist in this range. Now that the most recent snapshot is after
1408eda14cbcSMatt Macy * all bookmarks, we need to add these keys. Note that the caller always
1409eda14cbcSMatt Macy * adds a key at the previous snapshot, so we only add keys for bookmarks
1410eda14cbcSMatt Macy * after that.
1411eda14cbcSMatt Macy */
1412eda14cbcSMatt Macy void
dsl_bookmark_snapshotted(dsl_dataset_t * ds,dmu_tx_t * tx)1413eda14cbcSMatt Macy dsl_bookmark_snapshotted(dsl_dataset_t *ds, dmu_tx_t *tx)
1414eda14cbcSMatt Macy {
1415eda14cbcSMatt Macy uint64_t last_key_added = UINT64_MAX;
1416eda14cbcSMatt Macy for (dsl_bookmark_node_t *dbn = avl_last(&ds->ds_bookmarks);
1417eda14cbcSMatt Macy dbn != NULL && dbn->dbn_phys.zbm_creation_txg >
1418eda14cbcSMatt Macy dsl_dataset_phys(ds)->ds_prev_snap_txg;
1419eda14cbcSMatt Macy dbn = AVL_PREV(&ds->ds_bookmarks, dbn)) {
1420eda14cbcSMatt Macy uint64_t creation_txg = dbn->dbn_phys.zbm_creation_txg;
1421eda14cbcSMatt Macy ASSERT3U(creation_txg, <=, last_key_added);
1422eda14cbcSMatt Macy /*
1423eda14cbcSMatt Macy * Note, there may be multiple bookmarks at this TXG,
1424eda14cbcSMatt Macy * and we only want to add the key for this TXG once.
1425eda14cbcSMatt Macy * The ds_bookmarks AVL is sorted by TXG, so we will visit
1426eda14cbcSMatt Macy * these bookmarks in sequence.
1427eda14cbcSMatt Macy */
1428eda14cbcSMatt Macy if ((dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) &&
1429eda14cbcSMatt Macy creation_txg != last_key_added) {
1430eda14cbcSMatt Macy dsl_deadlist_add_key(&ds->ds_deadlist,
1431eda14cbcSMatt Macy creation_txg, tx);
1432eda14cbcSMatt Macy last_key_added = creation_txg;
1433eda14cbcSMatt Macy }
1434eda14cbcSMatt Macy }
1435eda14cbcSMatt Macy }
1436eda14cbcSMatt Macy
1437eda14cbcSMatt Macy /*
1438eda14cbcSMatt Macy * The next snapshot of the origin dataset has changed, due to
1439eda14cbcSMatt Macy * promote or clone swap. If there are any bookmarks at this dataset,
1440eda14cbcSMatt Macy * we need to update their zbm_*_freed_before_next_snap to reflect this.
1441eda14cbcSMatt Macy * The head dataset has the relevant bookmarks in ds_bookmarks.
1442eda14cbcSMatt Macy */
1443eda14cbcSMatt Macy void
dsl_bookmark_next_changed(dsl_dataset_t * head,dsl_dataset_t * origin,dmu_tx_t * tx)1444eda14cbcSMatt Macy dsl_bookmark_next_changed(dsl_dataset_t *head, dsl_dataset_t *origin,
1445eda14cbcSMatt Macy dmu_tx_t *tx)
1446eda14cbcSMatt Macy {
1447eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx);
1448eda14cbcSMatt Macy
1449eda14cbcSMatt Macy /*
1450eda14cbcSMatt Macy * Find the first bookmark that HAS_FBN at the origin snapshot.
1451eda14cbcSMatt Macy */
1452eda14cbcSMatt Macy dsl_bookmark_node_t search = { 0 };
1453eda14cbcSMatt Macy avl_index_t idx;
1454eda14cbcSMatt Macy search.dbn_phys.zbm_creation_txg =
1455eda14cbcSMatt Macy dsl_dataset_phys(origin)->ds_creation_txg;
1456eda14cbcSMatt Macy search.dbn_phys.zbm_flags = ZBM_FLAG_HAS_FBN;
1457eda14cbcSMatt Macy /*
1458eda14cbcSMatt Macy * The empty-string name can't be in the AVL, and it compares
1459eda14cbcSMatt Macy * before any entries with this TXG.
1460eda14cbcSMatt Macy */
1461a0b956f5SMartin Matuska search.dbn_name = (char *)"";
1462eda14cbcSMatt Macy VERIFY3P(avl_find(&head->ds_bookmarks, &search, &idx), ==, NULL);
1463eda14cbcSMatt Macy dsl_bookmark_node_t *dbn =
1464eda14cbcSMatt Macy avl_nearest(&head->ds_bookmarks, idx, AVL_AFTER);
1465eda14cbcSMatt Macy
1466eda14cbcSMatt Macy /*
1467eda14cbcSMatt Macy * Iterate over all bookmarks that are at the origin txg.
1468eda14cbcSMatt Macy * Adjust their FBN based on their new next snapshot.
1469eda14cbcSMatt Macy */
1470eda14cbcSMatt Macy for (; dbn != NULL && dbn->dbn_phys.zbm_creation_txg ==
1471eda14cbcSMatt Macy dsl_dataset_phys(origin)->ds_creation_txg &&
1472eda14cbcSMatt Macy (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN);
1473eda14cbcSMatt Macy dbn = AVL_NEXT(&head->ds_bookmarks, dbn)) {
1474eda14cbcSMatt Macy
1475eda14cbcSMatt Macy /*
1476eda14cbcSMatt Macy * Bookmark is at the origin, therefore its
1477eda14cbcSMatt Macy * "next dataset" is changing, so we need
1478eda14cbcSMatt Macy * to reset its FBN by recomputing it in
1479eda14cbcSMatt Macy * dsl_bookmark_set_phys().
1480eda14cbcSMatt Macy */
1481eda14cbcSMatt Macy ASSERT3U(dbn->dbn_phys.zbm_guid, ==,
1482eda14cbcSMatt Macy dsl_dataset_phys(origin)->ds_guid);
1483eda14cbcSMatt Macy ASSERT3U(dbn->dbn_phys.zbm_referenced_bytes_refd, ==,
1484eda14cbcSMatt Macy dsl_dataset_phys(origin)->ds_referenced_bytes);
1485eda14cbcSMatt Macy ASSERT(dbn->dbn_phys.zbm_flags &
1486eda14cbcSMatt Macy ZBM_FLAG_SNAPSHOT_EXISTS);
1487eda14cbcSMatt Macy /*
1488eda14cbcSMatt Macy * Save and restore the zbm_redaction_obj, which
1489eda14cbcSMatt Macy * is zeroed by dsl_bookmark_set_phys().
1490eda14cbcSMatt Macy */
1491eda14cbcSMatt Macy uint64_t redaction_obj =
1492eda14cbcSMatt Macy dbn->dbn_phys.zbm_redaction_obj;
1493eda14cbcSMatt Macy dsl_bookmark_set_phys(&dbn->dbn_phys, origin);
1494eda14cbcSMatt Macy dbn->dbn_phys.zbm_redaction_obj = redaction_obj;
1495eda14cbcSMatt Macy
1496eda14cbcSMatt Macy VERIFY0(zap_update(dp->dp_meta_objset, head->ds_bookmarks_obj,
1497eda14cbcSMatt Macy dbn->dbn_name, sizeof (uint64_t),
1498eda14cbcSMatt Macy sizeof (zfs_bookmark_phys_t) / sizeof (uint64_t),
1499eda14cbcSMatt Macy &dbn->dbn_phys, tx));
1500eda14cbcSMatt Macy }
1501eda14cbcSMatt Macy }
1502eda14cbcSMatt Macy
1503eda14cbcSMatt Macy /*
1504eda14cbcSMatt Macy * This block is no longer referenced by this (head) dataset.
1505eda14cbcSMatt Macy *
1506eda14cbcSMatt Macy * Adjust the FBN of any bookmarks that reference this block, whose "next"
1507eda14cbcSMatt Macy * is the head dataset.
1508eda14cbcSMatt Macy */
1509eda14cbcSMatt Macy void
dsl_bookmark_block_killed(dsl_dataset_t * ds,const blkptr_t * bp,dmu_tx_t * tx)1510eda14cbcSMatt Macy dsl_bookmark_block_killed(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
1511eda14cbcSMatt Macy {
1512e92ffd9bSMartin Matuska (void) tx;
1513e92ffd9bSMartin Matuska
1514eda14cbcSMatt Macy /*
1515eda14cbcSMatt Macy * Iterate over bookmarks whose "next" is the head dataset.
1516eda14cbcSMatt Macy */
1517eda14cbcSMatt Macy for (dsl_bookmark_node_t *dbn = avl_last(&ds->ds_bookmarks);
1518eda14cbcSMatt Macy dbn != NULL && dbn->dbn_phys.zbm_creation_txg >=
1519eda14cbcSMatt Macy dsl_dataset_phys(ds)->ds_prev_snap_txg;
1520eda14cbcSMatt Macy dbn = AVL_PREV(&ds->ds_bookmarks, dbn)) {
1521eda14cbcSMatt Macy /*
1522eda14cbcSMatt Macy * If the block was live (referenced) at the time of this
1523eda14cbcSMatt Macy * bookmark, add its space to the bookmark's FBN.
1524eda14cbcSMatt Macy */
1525783d3ff6SMartin Matuska if (BP_GET_LOGICAL_BIRTH(bp) <=
1526783d3ff6SMartin Matuska dbn->dbn_phys.zbm_creation_txg &&
1527eda14cbcSMatt Macy (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)) {
1528eda14cbcSMatt Macy mutex_enter(&dbn->dbn_lock);
1529eda14cbcSMatt Macy dbn->dbn_phys.zbm_referenced_freed_before_next_snap +=
1530eda14cbcSMatt Macy bp_get_dsize_sync(dsl_dataset_get_spa(ds), bp);
1531eda14cbcSMatt Macy dbn->dbn_phys.zbm_compressed_freed_before_next_snap +=
1532eda14cbcSMatt Macy BP_GET_PSIZE(bp);
1533eda14cbcSMatt Macy dbn->dbn_phys.zbm_uncompressed_freed_before_next_snap +=
1534eda14cbcSMatt Macy BP_GET_UCSIZE(bp);
1535eda14cbcSMatt Macy /*
1536eda14cbcSMatt Macy * Changing the ZAP object here would be too
1537eda14cbcSMatt Macy * expensive. Also, we may be called from the zio
1538eda14cbcSMatt Macy * interrupt thread, which can't block on i/o.
1539eda14cbcSMatt Macy * Therefore, we mark this bookmark as dirty and
1540eda14cbcSMatt Macy * modify the ZAP once per txg, in
1541eda14cbcSMatt Macy * dsl_bookmark_sync_done().
1542eda14cbcSMatt Macy */
1543eda14cbcSMatt Macy dbn->dbn_dirty = B_TRUE;
1544eda14cbcSMatt Macy mutex_exit(&dbn->dbn_lock);
1545eda14cbcSMatt Macy }
1546eda14cbcSMatt Macy }
1547eda14cbcSMatt Macy }
1548eda14cbcSMatt Macy
1549eda14cbcSMatt Macy void
dsl_bookmark_sync_done(dsl_dataset_t * ds,dmu_tx_t * tx)1550eda14cbcSMatt Macy dsl_bookmark_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx)
1551eda14cbcSMatt Macy {
1552eda14cbcSMatt Macy dsl_pool_t *dp = dmu_tx_pool(tx);
1553eda14cbcSMatt Macy
1554eda14cbcSMatt Macy if (dsl_dataset_is_snapshot(ds))
1555eda14cbcSMatt Macy return;
1556eda14cbcSMatt Macy
1557eda14cbcSMatt Macy /*
1558eda14cbcSMatt Macy * We only dirty bookmarks that are at or after the most recent
1559eda14cbcSMatt Macy * snapshot. We can't create snapshots between
1560eda14cbcSMatt Macy * dsl_bookmark_block_killed() and dsl_bookmark_sync_done(), so we
1561eda14cbcSMatt Macy * don't need to look at any bookmarks before ds_prev_snap_txg.
1562eda14cbcSMatt Macy */
1563eda14cbcSMatt Macy for (dsl_bookmark_node_t *dbn = avl_last(&ds->ds_bookmarks);
1564eda14cbcSMatt Macy dbn != NULL && dbn->dbn_phys.zbm_creation_txg >=
1565eda14cbcSMatt Macy dsl_dataset_phys(ds)->ds_prev_snap_txg;
1566eda14cbcSMatt Macy dbn = AVL_PREV(&ds->ds_bookmarks, dbn)) {
1567eda14cbcSMatt Macy if (dbn->dbn_dirty) {
1568eda14cbcSMatt Macy /*
1569eda14cbcSMatt Macy * We only dirty nodes with HAS_FBN, therefore
1570eda14cbcSMatt Macy * we can always use the current bookmark struct size.
1571eda14cbcSMatt Macy */
1572eda14cbcSMatt Macy ASSERT(dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN);
1573eda14cbcSMatt Macy VERIFY0(zap_update(dp->dp_meta_objset,
1574eda14cbcSMatt Macy ds->ds_bookmarks_obj,
1575eda14cbcSMatt Macy dbn->dbn_name, sizeof (uint64_t),
1576eda14cbcSMatt Macy sizeof (zfs_bookmark_phys_t) / sizeof (uint64_t),
1577eda14cbcSMatt Macy &dbn->dbn_phys, tx));
1578eda14cbcSMatt Macy dbn->dbn_dirty = B_FALSE;
1579eda14cbcSMatt Macy }
1580eda14cbcSMatt Macy }
1581eda14cbcSMatt Macy #ifdef ZFS_DEBUG
1582eda14cbcSMatt Macy for (dsl_bookmark_node_t *dbn = avl_first(&ds->ds_bookmarks);
1583eda14cbcSMatt Macy dbn != NULL; dbn = AVL_NEXT(&ds->ds_bookmarks, dbn)) {
1584eda14cbcSMatt Macy ASSERT(!dbn->dbn_dirty);
1585eda14cbcSMatt Macy }
1586eda14cbcSMatt Macy #endif
1587eda14cbcSMatt Macy }
1588eda14cbcSMatt Macy
1589eda14cbcSMatt Macy /*
1590eda14cbcSMatt Macy * Return the TXG of the most recent bookmark (or 0 if there are no bookmarks).
1591eda14cbcSMatt Macy */
1592eda14cbcSMatt Macy uint64_t
dsl_bookmark_latest_txg(dsl_dataset_t * ds)1593eda14cbcSMatt Macy dsl_bookmark_latest_txg(dsl_dataset_t *ds)
1594eda14cbcSMatt Macy {
1595eda14cbcSMatt Macy ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
1596eda14cbcSMatt Macy dsl_bookmark_node_t *dbn = avl_last(&ds->ds_bookmarks);
1597eda14cbcSMatt Macy if (dbn == NULL)
1598eda14cbcSMatt Macy return (0);
1599eda14cbcSMatt Macy return (dbn->dbn_phys.zbm_creation_txg);
1600eda14cbcSMatt Macy }
1601eda14cbcSMatt Macy
1602eda14cbcSMatt Macy /*
1603eda14cbcSMatt Macy * Compare the redact_block_phys_t to the bookmark. If the last block in the
1604eda14cbcSMatt Macy * redact_block_phys_t is before the bookmark, return -1. If the first block in
1605eda14cbcSMatt Macy * the redact_block_phys_t is after the bookmark, return 1. Otherwise, the
1606eda14cbcSMatt Macy * bookmark is inside the range of the redact_block_phys_t, and we return 0.
1607eda14cbcSMatt Macy */
1608eda14cbcSMatt Macy static int
redact_block_zb_compare(redact_block_phys_t * first,zbookmark_phys_t * second)1609eda14cbcSMatt Macy redact_block_zb_compare(redact_block_phys_t *first,
1610eda14cbcSMatt Macy zbookmark_phys_t *second)
1611eda14cbcSMatt Macy {
1612eda14cbcSMatt Macy /*
1613eda14cbcSMatt Macy * If the block_phys is for a previous object, or the last block in the
1614eda14cbcSMatt Macy * block_phys is strictly before the block in the bookmark, the
1615eda14cbcSMatt Macy * block_phys is earlier.
1616eda14cbcSMatt Macy */
1617eda14cbcSMatt Macy if (first->rbp_object < second->zb_object ||
1618eda14cbcSMatt Macy (first->rbp_object == second->zb_object &&
1619eda14cbcSMatt Macy first->rbp_blkid + (redact_block_get_count(first) - 1) <
1620eda14cbcSMatt Macy second->zb_blkid)) {
1621eda14cbcSMatt Macy return (-1);
1622eda14cbcSMatt Macy }
1623eda14cbcSMatt Macy
1624eda14cbcSMatt Macy /*
1625eda14cbcSMatt Macy * If the bookmark is for a previous object, or the block in the
1626eda14cbcSMatt Macy * bookmark is strictly before the first block in the block_phys, the
1627eda14cbcSMatt Macy * bookmark is earlier.
1628eda14cbcSMatt Macy */
1629eda14cbcSMatt Macy if (first->rbp_object > second->zb_object ||
1630eda14cbcSMatt Macy (first->rbp_object == second->zb_object &&
1631eda14cbcSMatt Macy first->rbp_blkid > second->zb_blkid)) {
1632eda14cbcSMatt Macy return (1);
1633eda14cbcSMatt Macy }
1634eda14cbcSMatt Macy
1635eda14cbcSMatt Macy return (0);
1636eda14cbcSMatt Macy }
1637eda14cbcSMatt Macy
1638eda14cbcSMatt Macy /*
1639eda14cbcSMatt Macy * Traverse the redaction list in the provided object, and call the callback for
1640eda14cbcSMatt Macy * each entry we find. Don't call the callback for any records before resume.
1641eda14cbcSMatt Macy */
1642eda14cbcSMatt Macy int
dsl_redaction_list_traverse(redaction_list_t * rl,zbookmark_phys_t * resume,rl_traverse_callback_t cb,void * arg)1643eda14cbcSMatt Macy dsl_redaction_list_traverse(redaction_list_t *rl, zbookmark_phys_t *resume,
1644eda14cbcSMatt Macy rl_traverse_callback_t cb, void *arg)
1645eda14cbcSMatt Macy {
1646eda14cbcSMatt Macy objset_t *mos = rl->rl_mos;
1647eda14cbcSMatt Macy int err = 0;
1648eda14cbcSMatt Macy
1649eda14cbcSMatt Macy if (rl->rl_phys->rlp_last_object != UINT64_MAX ||
1650eda14cbcSMatt Macy rl->rl_phys->rlp_last_blkid != UINT64_MAX) {
1651eda14cbcSMatt Macy /*
1652eda14cbcSMatt Macy * When we finish a send, we update the last object and offset
1653eda14cbcSMatt Macy * to UINT64_MAX. If a send fails partway through, the last
1654eda14cbcSMatt Macy * object and offset will have some other value, indicating how
1655eda14cbcSMatt Macy * far the send got. The redaction list must be complete before
1656eda14cbcSMatt Macy * it can be traversed, so return EINVAL if the last object and
1657eda14cbcSMatt Macy * blkid are not set to UINT64_MAX.
1658eda14cbcSMatt Macy */
1659eda14cbcSMatt Macy return (SET_ERROR(EINVAL));
1660eda14cbcSMatt Macy }
1661eda14cbcSMatt Macy
1662eda14cbcSMatt Macy /*
16637877fdebSMatt Macy * This allows us to skip the binary search and resume checking logic
16647877fdebSMatt Macy * below, if we're not resuming a redacted send.
1665eda14cbcSMatt Macy */
16667877fdebSMatt Macy if (ZB_IS_ZERO(resume))
16677877fdebSMatt Macy resume = NULL;
16687877fdebSMatt Macy
16697877fdebSMatt Macy /*
16707877fdebSMatt Macy * Binary search for the point to resume from.
16717877fdebSMatt Macy */
16727877fdebSMatt Macy uint64_t maxidx = rl->rl_phys->rlp_num_entries - 1;
16737877fdebSMatt Macy uint64_t minidx = 0;
16747877fdebSMatt Macy while (resume != NULL && maxidx > minidx) {
16757877fdebSMatt Macy redact_block_phys_t rbp = { 0 };
16767877fdebSMatt Macy ASSERT3U(maxidx, >, minidx);
16777877fdebSMatt Macy uint64_t mididx = minidx + ((maxidx - minidx) / 2);
16787877fdebSMatt Macy err = dmu_read(mos, rl->rl_object, mididx * sizeof (rbp),
16797877fdebSMatt Macy sizeof (rbp), &rbp, DMU_READ_NO_PREFETCH);
1680eda14cbcSMatt Macy if (err != 0)
1681eda14cbcSMatt Macy break;
1682eda14cbcSMatt Macy
16837877fdebSMatt Macy int cmp = redact_block_zb_compare(&rbp, resume);
1684eda14cbcSMatt Macy
16857877fdebSMatt Macy if (cmp == 0) {
16867877fdebSMatt Macy minidx = mididx;
1687eda14cbcSMatt Macy break;
16887877fdebSMatt Macy } else if (cmp > 0) {
16897877fdebSMatt Macy maxidx =
16907877fdebSMatt Macy (mididx == minidx ? minidx : mididx - 1);
16917877fdebSMatt Macy } else {
16927877fdebSMatt Macy minidx = mididx + 1;
16937877fdebSMatt Macy }
1694eda14cbcSMatt Macy }
1695eda14cbcSMatt Macy
16967877fdebSMatt Macy unsigned int bufsize = SPA_OLD_MAXBLOCKSIZE;
16977877fdebSMatt Macy redact_block_phys_t *buf = zio_data_buf_alloc(bufsize);
16987877fdebSMatt Macy
16997877fdebSMatt Macy unsigned int entries_per_buf = bufsize / sizeof (redact_block_phys_t);
17007877fdebSMatt Macy uint64_t start_block = minidx / entries_per_buf;
17017877fdebSMatt Macy err = dmu_read(mos, rl->rl_object, start_block * bufsize, bufsize, buf,
17027877fdebSMatt Macy DMU_READ_PREFETCH);
17037877fdebSMatt Macy
17047877fdebSMatt Macy for (uint64_t curidx = minidx;
1705eda14cbcSMatt Macy err == 0 && curidx < rl->rl_phys->rlp_num_entries;
1706eda14cbcSMatt Macy curidx++) {
1707eda14cbcSMatt Macy /*
1708eda14cbcSMatt Macy * We read in the redaction list one block at a time. Once we
1709eda14cbcSMatt Macy * finish with all the entries in a given block, we read in a
1710eda14cbcSMatt Macy * new one. The predictive prefetcher will take care of any
1711eda14cbcSMatt Macy * prefetching, and this code shouldn't be the bottleneck, so we
1712eda14cbcSMatt Macy * don't need to do manual prefetching.
1713eda14cbcSMatt Macy */
17147877fdebSMatt Macy if (curidx % entries_per_buf == 0) {
1715eda14cbcSMatt Macy err = dmu_read(mos, rl->rl_object, curidx *
1716eda14cbcSMatt Macy sizeof (*buf), bufsize, buf,
1717eda14cbcSMatt Macy DMU_READ_PREFETCH);
1718eda14cbcSMatt Macy if (err != 0)
1719eda14cbcSMatt Macy break;
1720eda14cbcSMatt Macy }
17217877fdebSMatt Macy redact_block_phys_t *rb = &buf[curidx % entries_per_buf];
1722eda14cbcSMatt Macy /*
1723eda14cbcSMatt Macy * If resume is non-null, we should either not send the data, or
1724eda14cbcSMatt Macy * null out resume so we don't have to keep doing these
1725eda14cbcSMatt Macy * comparisons.
1726eda14cbcSMatt Macy */
1727eda14cbcSMatt Macy if (resume != NULL) {
17287877fdebSMatt Macy /*
17297877fdebSMatt Macy * It is possible that after the binary search we got
17307877fdebSMatt Macy * a record before the resume point. There's two cases
17317877fdebSMatt Macy * where this can occur. If the record is the last
17327877fdebSMatt Macy * redaction record, and the resume point is after the
17337877fdebSMatt Macy * end of the redacted data, curidx will be the last
17347877fdebSMatt Macy * redaction record. In that case, the loop will end
17357877fdebSMatt Macy * after this iteration. The second case is if the
17367877fdebSMatt Macy * resume point is between two redaction records, the
17377877fdebSMatt Macy * binary search can return either the record before
17387877fdebSMatt Macy * or after the resume point. In that case, the next
17397877fdebSMatt Macy * iteration will be greater than the resume point.
17407877fdebSMatt Macy */
1741eda14cbcSMatt Macy if (redact_block_zb_compare(rb, resume) < 0) {
17427877fdebSMatt Macy ASSERT3U(curidx, ==, minidx);
1743eda14cbcSMatt Macy continue;
1744eda14cbcSMatt Macy } else {
1745eda14cbcSMatt Macy /*
1746eda14cbcSMatt Macy * If the place to resume is in the middle of
1747eda14cbcSMatt Macy * the range described by this
1748eda14cbcSMatt Macy * redact_block_phys, then modify the
1749eda14cbcSMatt Macy * redact_block_phys in memory so we generate
1750eda14cbcSMatt Macy * the right records.
1751eda14cbcSMatt Macy */
1752eda14cbcSMatt Macy if (resume->zb_object == rb->rbp_object &&
1753eda14cbcSMatt Macy resume->zb_blkid > rb->rbp_blkid) {
1754eda14cbcSMatt Macy uint64_t diff = resume->zb_blkid -
1755eda14cbcSMatt Macy rb->rbp_blkid;
1756eda14cbcSMatt Macy rb->rbp_blkid = resume->zb_blkid;
1757eda14cbcSMatt Macy redact_block_set_count(rb,
1758eda14cbcSMatt Macy redact_block_get_count(rb) - diff);
1759eda14cbcSMatt Macy }
1760eda14cbcSMatt Macy resume = NULL;
1761eda14cbcSMatt Macy }
1762eda14cbcSMatt Macy }
1763eda14cbcSMatt Macy
17647877fdebSMatt Macy if (cb(rb, arg) != 0) {
17657877fdebSMatt Macy err = EINTR;
1766eda14cbcSMatt Macy break;
1767eda14cbcSMatt Macy }
17687877fdebSMatt Macy }
1769eda14cbcSMatt Macy
1770eda14cbcSMatt Macy zio_data_buf_free(buf, bufsize);
1771eda14cbcSMatt Macy return (err);
1772eda14cbcSMatt Macy }
1773