xref: /freebsd/sys/contrib/openzfs/module/zfs/vdev_indirect_births.c (revision da5137abdf463bb5fee85061958a14dd12bc043e)
1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy  * CDDL HEADER START
3eda14cbcSMatt Macy  *
4eda14cbcSMatt Macy  * This file and its contents are supplied under the terms of the
5eda14cbcSMatt Macy  * Common Development and Distribution License ("CDDL"), version 1.0.
6eda14cbcSMatt Macy  * You may only use this file in accordance with the terms of version
7eda14cbcSMatt Macy  * 1.0 of the CDDL.
8eda14cbcSMatt Macy  *
9eda14cbcSMatt Macy  * A full copy of the text of the CDDL should have accompanied this
10eda14cbcSMatt Macy  * source.  A copy of the CDDL is also available via the Internet at
11eda14cbcSMatt Macy  * http://www.illumos.org/license/CDDL.
12eda14cbcSMatt Macy  *
13eda14cbcSMatt Macy  * CDDL HEADER END
14eda14cbcSMatt Macy  */
15eda14cbcSMatt Macy 
16eda14cbcSMatt Macy /*
17eda14cbcSMatt Macy  * Copyright (c) 2015 by Delphix. All rights reserved.
18eda14cbcSMatt Macy  */
19eda14cbcSMatt Macy 
20eda14cbcSMatt Macy #include <sys/dmu_tx.h>
21eda14cbcSMatt Macy #include <sys/spa.h>
22eda14cbcSMatt Macy #include <sys/dmu.h>
23eda14cbcSMatt Macy #include <sys/dsl_pool.h>
24eda14cbcSMatt Macy #include <sys/vdev_indirect_births.h>
25eda14cbcSMatt Macy 
26eda14cbcSMatt Macy #ifdef ZFS_DEBUG
27eda14cbcSMatt Macy static boolean_t
vdev_indirect_births_verify(vdev_indirect_births_t * vib)28eda14cbcSMatt Macy vdev_indirect_births_verify(vdev_indirect_births_t *vib)
29eda14cbcSMatt Macy {
30eda14cbcSMatt Macy 	ASSERT(vib != NULL);
31eda14cbcSMatt Macy 
32eda14cbcSMatt Macy 	ASSERT(vib->vib_object != 0);
33eda14cbcSMatt Macy 	ASSERT(vib->vib_objset != NULL);
34eda14cbcSMatt Macy 	ASSERT(vib->vib_phys != NULL);
35eda14cbcSMatt Macy 	ASSERT(vib->vib_dbuf != NULL);
36eda14cbcSMatt Macy 
37eda14cbcSMatt Macy 	EQUIV(vib->vib_phys->vib_count > 0, vib->vib_entries != NULL);
38eda14cbcSMatt Macy 
39eda14cbcSMatt Macy 	return (B_TRUE);
40eda14cbcSMatt Macy }
41e92ffd9bSMartin Matuska #else
42e92ffd9bSMartin Matuska #define	vdev_indirect_births_verify(vib) ((void) sizeof (vib), B_TRUE)
43eda14cbcSMatt Macy #endif
44eda14cbcSMatt Macy 
45eda14cbcSMatt Macy uint64_t
vdev_indirect_births_count(vdev_indirect_births_t * vib)46eda14cbcSMatt Macy vdev_indirect_births_count(vdev_indirect_births_t *vib)
47eda14cbcSMatt Macy {
48eda14cbcSMatt Macy 	ASSERT(vdev_indirect_births_verify(vib));
49eda14cbcSMatt Macy 
50eda14cbcSMatt Macy 	return (vib->vib_phys->vib_count);
51eda14cbcSMatt Macy }
52eda14cbcSMatt Macy 
53eda14cbcSMatt Macy uint64_t
vdev_indirect_births_object(vdev_indirect_births_t * vib)54eda14cbcSMatt Macy vdev_indirect_births_object(vdev_indirect_births_t *vib)
55eda14cbcSMatt Macy {
56eda14cbcSMatt Macy 	ASSERT(vdev_indirect_births_verify(vib));
57eda14cbcSMatt Macy 
58eda14cbcSMatt Macy 	return (vib->vib_object);
59eda14cbcSMatt Macy }
60eda14cbcSMatt Macy 
61eda14cbcSMatt Macy static uint64_t
vdev_indirect_births_size_impl(vdev_indirect_births_t * vib)62eda14cbcSMatt Macy vdev_indirect_births_size_impl(vdev_indirect_births_t *vib)
63eda14cbcSMatt Macy {
64eda14cbcSMatt Macy 	return (vib->vib_phys->vib_count * sizeof (*vib->vib_entries));
65eda14cbcSMatt Macy }
66eda14cbcSMatt Macy 
67eda14cbcSMatt Macy void
vdev_indirect_births_close(vdev_indirect_births_t * vib)68eda14cbcSMatt Macy vdev_indirect_births_close(vdev_indirect_births_t *vib)
69eda14cbcSMatt Macy {
70eda14cbcSMatt Macy 	ASSERT(vdev_indirect_births_verify(vib));
71eda14cbcSMatt Macy 
72eda14cbcSMatt Macy 	if (vib->vib_phys->vib_count > 0) {
73eda14cbcSMatt Macy 		uint64_t births_size = vdev_indirect_births_size_impl(vib);
74eda14cbcSMatt Macy 
75eda14cbcSMatt Macy 		vmem_free(vib->vib_entries, births_size);
76eda14cbcSMatt Macy 		vib->vib_entries = NULL;
77eda14cbcSMatt Macy 	}
78eda14cbcSMatt Macy 
79eda14cbcSMatt Macy 	dmu_buf_rele(vib->vib_dbuf, vib);
80eda14cbcSMatt Macy 
81eda14cbcSMatt Macy 	vib->vib_objset = NULL;
82eda14cbcSMatt Macy 	vib->vib_object = 0;
83eda14cbcSMatt Macy 	vib->vib_dbuf = NULL;
84eda14cbcSMatt Macy 	vib->vib_phys = NULL;
85eda14cbcSMatt Macy 
86eda14cbcSMatt Macy 	kmem_free(vib, sizeof (*vib));
87eda14cbcSMatt Macy }
88eda14cbcSMatt Macy 
89eda14cbcSMatt Macy uint64_t
vdev_indirect_births_alloc(objset_t * os,dmu_tx_t * tx)90eda14cbcSMatt Macy vdev_indirect_births_alloc(objset_t *os, dmu_tx_t *tx)
91eda14cbcSMatt Macy {
92eda14cbcSMatt Macy 	ASSERT(dmu_tx_is_syncing(tx));
93eda14cbcSMatt Macy 
94eda14cbcSMatt Macy 	return (dmu_object_alloc(os,
95eda14cbcSMatt Macy 	    DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
96eda14cbcSMatt Macy 	    DMU_OTN_UINT64_METADATA, sizeof (vdev_indirect_birth_phys_t),
97eda14cbcSMatt Macy 	    tx));
98eda14cbcSMatt Macy }
99eda14cbcSMatt Macy 
100eda14cbcSMatt Macy vdev_indirect_births_t *
vdev_indirect_births_open(objset_t * os,uint64_t births_object)101eda14cbcSMatt Macy vdev_indirect_births_open(objset_t *os, uint64_t births_object)
102eda14cbcSMatt Macy {
103eda14cbcSMatt Macy 	vdev_indirect_births_t *vib = kmem_zalloc(sizeof (*vib), KM_SLEEP);
104eda14cbcSMatt Macy 
105eda14cbcSMatt Macy 	vib->vib_objset = os;
106eda14cbcSMatt Macy 	vib->vib_object = births_object;
107eda14cbcSMatt Macy 
108eda14cbcSMatt Macy 	VERIFY0(dmu_bonus_hold(os, vib->vib_object, vib, &vib->vib_dbuf));
109eda14cbcSMatt Macy 	vib->vib_phys = vib->vib_dbuf->db_data;
110eda14cbcSMatt Macy 
111eda14cbcSMatt Macy 	if (vib->vib_phys->vib_count > 0) {
112eda14cbcSMatt Macy 		uint64_t births_size = vdev_indirect_births_size_impl(vib);
113eda14cbcSMatt Macy 		vib->vib_entries = vmem_alloc(births_size, KM_SLEEP);
114eda14cbcSMatt Macy 		VERIFY0(dmu_read(vib->vib_objset, vib->vib_object, 0,
115eda14cbcSMatt Macy 		    births_size, vib->vib_entries, DMU_READ_PREFETCH));
116eda14cbcSMatt Macy 	}
117eda14cbcSMatt Macy 
118eda14cbcSMatt Macy 	ASSERT(vdev_indirect_births_verify(vib));
119eda14cbcSMatt Macy 
120eda14cbcSMatt Macy 	return (vib);
121eda14cbcSMatt Macy }
122eda14cbcSMatt Macy 
123eda14cbcSMatt Macy void
vdev_indirect_births_free(objset_t * os,uint64_t object,dmu_tx_t * tx)124eda14cbcSMatt Macy vdev_indirect_births_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
125eda14cbcSMatt Macy {
126eda14cbcSMatt Macy 	VERIFY0(dmu_object_free(os, object, tx));
127eda14cbcSMatt Macy }
128eda14cbcSMatt Macy 
129eda14cbcSMatt Macy void
vdev_indirect_births_add_entry(vdev_indirect_births_t * vib,uint64_t max_offset,uint64_t txg,dmu_tx_t * tx)130eda14cbcSMatt Macy vdev_indirect_births_add_entry(vdev_indirect_births_t *vib,
131eda14cbcSMatt Macy     uint64_t max_offset, uint64_t txg, dmu_tx_t *tx)
132eda14cbcSMatt Macy {
133eda14cbcSMatt Macy 	vdev_indirect_birth_entry_phys_t vibe;
134eda14cbcSMatt Macy 	uint64_t old_size;
135eda14cbcSMatt Macy 	uint64_t new_size;
136eda14cbcSMatt Macy 	vdev_indirect_birth_entry_phys_t *new_entries;
137eda14cbcSMatt Macy 
138eda14cbcSMatt Macy 	ASSERT(dmu_tx_is_syncing(tx));
139eda14cbcSMatt Macy 	ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx)));
140eda14cbcSMatt Macy 	ASSERT(vdev_indirect_births_verify(vib));
141eda14cbcSMatt Macy 
142eda14cbcSMatt Macy 	dmu_buf_will_dirty(vib->vib_dbuf, tx);
143eda14cbcSMatt Macy 
144eda14cbcSMatt Macy 	vibe.vibe_offset = max_offset;
145eda14cbcSMatt Macy 	vibe.vibe_phys_birth_txg = txg;
146eda14cbcSMatt Macy 
147eda14cbcSMatt Macy 	old_size = vdev_indirect_births_size_impl(vib);
148eda14cbcSMatt Macy 	dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe),
149eda14cbcSMatt Macy 	    &vibe, tx);
150eda14cbcSMatt Macy 	vib->vib_phys->vib_count++;
151eda14cbcSMatt Macy 	new_size = vdev_indirect_births_size_impl(vib);
152eda14cbcSMatt Macy 
153eda14cbcSMatt Macy 	new_entries = vmem_alloc(new_size, KM_SLEEP);
154eda14cbcSMatt Macy 	if (old_size > 0) {
155*da5137abSMartin Matuska 		memcpy(new_entries, vib->vib_entries, old_size);
156eda14cbcSMatt Macy 		vmem_free(vib->vib_entries, old_size);
157eda14cbcSMatt Macy 	}
158eda14cbcSMatt Macy 	new_entries[vib->vib_phys->vib_count - 1] = vibe;
159eda14cbcSMatt Macy 	vib->vib_entries = new_entries;
160eda14cbcSMatt Macy }
161eda14cbcSMatt Macy 
162eda14cbcSMatt Macy uint64_t
vdev_indirect_births_last_entry_txg(vdev_indirect_births_t * vib)163eda14cbcSMatt Macy vdev_indirect_births_last_entry_txg(vdev_indirect_births_t *vib)
164eda14cbcSMatt Macy {
165eda14cbcSMatt Macy 	ASSERT(vdev_indirect_births_verify(vib));
166eda14cbcSMatt Macy 	ASSERT(vib->vib_phys->vib_count > 0);
167eda14cbcSMatt Macy 
168eda14cbcSMatt Macy 	vdev_indirect_birth_entry_phys_t *last =
169eda14cbcSMatt Macy 	    &vib->vib_entries[vib->vib_phys->vib_count - 1];
170eda14cbcSMatt Macy 	return (last->vibe_phys_birth_txg);
171eda14cbcSMatt Macy }
172eda14cbcSMatt Macy 
173eda14cbcSMatt Macy /*
174eda14cbcSMatt Macy  * Return the txg in which the given range was copied (i.e. its physical
175eda14cbcSMatt Macy  * birth txg).  The specified offset+asize must be contiguously mapped
176eda14cbcSMatt Macy  * (i.e. not a split block).
177eda14cbcSMatt Macy  *
178eda14cbcSMatt Macy  * The entries are sorted by increasing phys_birth, and also by increasing
179eda14cbcSMatt Macy  * offset.  We find the specified offset by binary search.  Note that we
180eda14cbcSMatt Macy  * can not use bsearch() because looking at each entry independently is
181eda14cbcSMatt Macy  * insufficient to find the correct entry.  Each entry implicitly relies
182eda14cbcSMatt Macy  * on the previous entry: an entry indicates that the offsets from the
183eda14cbcSMatt Macy  * end of the previous entry to the end of this entry were written in the
184eda14cbcSMatt Macy  * specified txg.
185eda14cbcSMatt Macy  */
186eda14cbcSMatt Macy uint64_t
vdev_indirect_births_physbirth(vdev_indirect_births_t * vib,uint64_t offset,uint64_t asize)187eda14cbcSMatt Macy vdev_indirect_births_physbirth(vdev_indirect_births_t *vib, uint64_t offset,
188eda14cbcSMatt Macy     uint64_t asize)
189eda14cbcSMatt Macy {
190eda14cbcSMatt Macy 	vdev_indirect_birth_entry_phys_t *base;
191eda14cbcSMatt Macy 	vdev_indirect_birth_entry_phys_t *last;
192eda14cbcSMatt Macy 
193eda14cbcSMatt Macy 	ASSERT(vdev_indirect_births_verify(vib));
194eda14cbcSMatt Macy 	ASSERT(vib->vib_phys->vib_count > 0);
195eda14cbcSMatt Macy 
196eda14cbcSMatt Macy 	base = vib->vib_entries;
197eda14cbcSMatt Macy 	last = base + vib->vib_phys->vib_count - 1;
198eda14cbcSMatt Macy 
199eda14cbcSMatt Macy 	ASSERT3U(offset, <, last->vibe_offset);
200eda14cbcSMatt Macy 
201eda14cbcSMatt Macy 	while (last >= base) {
202eda14cbcSMatt Macy 		vdev_indirect_birth_entry_phys_t *p =
203eda14cbcSMatt Macy 		    base + ((last - base) / 2);
204eda14cbcSMatt Macy 		if (offset >= p->vibe_offset) {
205eda14cbcSMatt Macy 			base = p + 1;
206eda14cbcSMatt Macy 		} else if (p == vib->vib_entries ||
207eda14cbcSMatt Macy 		    offset >= (p - 1)->vibe_offset) {
208eda14cbcSMatt Macy 			ASSERT3U(offset + asize, <=, p->vibe_offset);
209eda14cbcSMatt Macy 			return (p->vibe_phys_birth_txg);
210eda14cbcSMatt Macy 		} else {
211eda14cbcSMatt Macy 			last = p - 1;
212eda14cbcSMatt Macy 		}
213eda14cbcSMatt Macy 	}
214eda14cbcSMatt Macy 	ASSERT(!"offset not found");
215eda14cbcSMatt Macy 	return (-1);
216eda14cbcSMatt Macy }
217eda14cbcSMatt Macy 
218eda14cbcSMatt Macy #if defined(_KERNEL)
219eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_births_add_entry);
220eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_births_alloc);
221eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_births_close);
222eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_births_count);
223eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_births_free);
224eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_births_last_entry_txg);
225eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_births_object);
226eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_births_open);
227eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_births_physbirth);
228eda14cbcSMatt Macy #endif
229