xref: /illumos-gate/usr/src/uts/common/fs/zfs/vdev_indirect_births.c (revision d48be21240dfd051b689384ce2b23479d757f2d8)
1 /*
2  * CDDL HEADER START
3  *
4  * This file and its contents are supplied under the terms of the
5  * Common Development and Distribution License ("CDDL"), version 1.0.
6  * You may only use this file in accordance with the terms of version
7  * 1.0 of the CDDL.
8  *
9  * A full copy of the text of the CDDL should have accompanied this
10  * source.  A copy of the CDDL is also available via the Internet at
11  * http://www.illumos.org/license/CDDL.
12  *
13  * CDDL HEADER END
14  */
15 
16 /*
17  * Copyright (c) 2015 by Delphix. All rights reserved.
18  */
19 
20 #include <sys/dmu_tx.h>
21 #include <sys/spa.h>
22 #include <sys/dmu.h>
23 #include <sys/dsl_pool.h>
24 #include <sys/vdev_indirect_births.h>
25 
26 static boolean_t
27 vdev_indirect_births_verify(vdev_indirect_births_t *vib)
28 {
29 	ASSERT(vib != NULL);
30 
31 	ASSERT(vib->vib_object != 0);
32 	ASSERT(vib->vib_objset != NULL);
33 	ASSERT(vib->vib_phys != NULL);
34 	ASSERT(vib->vib_dbuf != NULL);
35 
36 	EQUIV(vib->vib_phys->vib_count > 0, vib->vib_entries != NULL);
37 
38 	return (B_TRUE);
39 }
40 
41 uint64_t
42 vdev_indirect_births_count(vdev_indirect_births_t *vib)
43 {
44 	ASSERT(vdev_indirect_births_verify(vib));
45 
46 	return (vib->vib_phys->vib_count);
47 }
48 
49 uint64_t
50 vdev_indirect_births_object(vdev_indirect_births_t *vib)
51 {
52 	ASSERT(vdev_indirect_births_verify(vib));
53 
54 	return (vib->vib_object);
55 }
56 
57 static uint64_t
58 vdev_indirect_births_size_impl(vdev_indirect_births_t *vib)
59 {
60 	return (vib->vib_phys->vib_count * sizeof (*vib->vib_entries));
61 }
62 
63 void
64 vdev_indirect_births_close(vdev_indirect_births_t *vib)
65 {
66 	ASSERT(vdev_indirect_births_verify(vib));
67 
68 	if (vib->vib_phys->vib_count > 0) {
69 		uint64_t births_size = vdev_indirect_births_size_impl(vib);
70 
71 		kmem_free(vib->vib_entries, births_size);
72 		vib->vib_entries = NULL;
73 	}
74 
75 	dmu_buf_rele(vib->vib_dbuf, vib);
76 
77 	vib->vib_objset = NULL;
78 	vib->vib_object = 0;
79 	vib->vib_dbuf = NULL;
80 	vib->vib_phys = NULL;
81 
82 	kmem_free(vib, sizeof (*vib));
83 }
84 
85 uint64_t
86 vdev_indirect_births_alloc(objset_t *os, dmu_tx_t *tx)
87 {
88 	ASSERT(dmu_tx_is_syncing(tx));
89 
90 	return (dmu_object_alloc(os,
91 	    DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
92 	    DMU_OTN_UINT64_METADATA, sizeof (vdev_indirect_birth_phys_t),
93 	    tx));
94 }
95 
96 vdev_indirect_births_t *
97 vdev_indirect_births_open(objset_t *os, uint64_t births_object)
98 {
99 	vdev_indirect_births_t *vib = kmem_zalloc(sizeof (*vib), KM_SLEEP);
100 
101 	vib->vib_objset = os;
102 	vib->vib_object = births_object;
103 
104 	VERIFY0(dmu_bonus_hold(os, vib->vib_object, vib, &vib->vib_dbuf));
105 	vib->vib_phys = vib->vib_dbuf->db_data;
106 
107 	if (vib->vib_phys->vib_count > 0) {
108 		uint64_t births_size = vdev_indirect_births_size_impl(vib);
109 		vib->vib_entries = kmem_alloc(births_size, KM_SLEEP);
110 		VERIFY0(dmu_read(vib->vib_objset, vib->vib_object, 0,
111 		    births_size, vib->vib_entries, DMU_READ_PREFETCH));
112 	}
113 
114 	ASSERT(vdev_indirect_births_verify(vib));
115 
116 	return (vib);
117 }
118 
119 void
120 vdev_indirect_births_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
121 {
122 	VERIFY0(dmu_object_free(os, object, tx));
123 }
124 
125 void
126 vdev_indirect_births_add_entry(vdev_indirect_births_t *vib,
127     uint64_t max_offset, uint64_t txg, dmu_tx_t *tx)
128 {
129 	vdev_indirect_birth_entry_phys_t vibe;
130 	uint64_t old_size;
131 	uint64_t new_size;
132 	vdev_indirect_birth_entry_phys_t *new_entries;
133 
134 	ASSERT(dmu_tx_is_syncing(tx));
135 	ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx)));
136 	ASSERT(vdev_indirect_births_verify(vib));
137 
138 	dmu_buf_will_dirty(vib->vib_dbuf, tx);
139 
140 	vibe.vibe_offset = max_offset;
141 	vibe.vibe_phys_birth_txg = txg;
142 
143 	old_size = vdev_indirect_births_size_impl(vib);
144 	dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe),
145 	    &vibe, tx);
146 	vib->vib_phys->vib_count++;
147 	new_size = vdev_indirect_births_size_impl(vib);
148 
149 	new_entries = kmem_alloc(new_size, KM_SLEEP);
150 	if (old_size > 0) {
151 		bcopy(vib->vib_entries, new_entries, old_size);
152 		kmem_free(vib->vib_entries, old_size);
153 	}
154 	new_entries[vib->vib_phys->vib_count - 1] = vibe;
155 	vib->vib_entries = new_entries;
156 }
157 
158 uint64_t
159 vdev_indirect_births_last_entry_txg(vdev_indirect_births_t *vib)
160 {
161 	ASSERT(vdev_indirect_births_verify(vib));
162 	ASSERT(vib->vib_phys->vib_count > 0);
163 
164 	vdev_indirect_birth_entry_phys_t *last =
165 	    &vib->vib_entries[vib->vib_phys->vib_count - 1];
166 	return (last->vibe_phys_birth_txg);
167 }
168 
169 /*
170  * Return the txg in which the given range was copied (i.e. its physical
171  * birth txg).  The specified offset+asize must be contiguously mapped
172  * (i.e. not a split block).
173  *
174  * The entries are sorted by increasing phys_birth, and also by increasing
175  * offset.  We find the specified offset by binary search.  Note that we
176  * can not use bsearch() because looking at each entry independently is
177  * insufficient to find the correct entry.  Each entry implicitly relies
178  * on the previous entry: an entry indicates that the offsets from the
179  * end of the previous entry to the end of this entry were written in the
180  * specified txg.
181  */
182 uint64_t
183 vdev_indirect_births_physbirth(vdev_indirect_births_t *vib, uint64_t offset,
184     uint64_t asize)
185 {
186 	vdev_indirect_birth_entry_phys_t *base;
187 	vdev_indirect_birth_entry_phys_t *last;
188 
189 	ASSERT(vdev_indirect_births_verify(vib));
190 	ASSERT(vib->vib_phys->vib_count > 0);
191 
192 	base = vib->vib_entries;
193 	last = base + vib->vib_phys->vib_count - 1;
194 
195 	ASSERT3U(offset, <, last->vibe_offset);
196 
197 	while (last >= base) {
198 		vdev_indirect_birth_entry_phys_t *p =
199 		    base + ((last - base) / 2);
200 		if (offset >= p->vibe_offset) {
201 			base = p + 1;
202 		} else if (p == vib->vib_entries ||
203 		    offset >= (p - 1)->vibe_offset) {
204 			ASSERT3U(offset + asize, <=, p->vibe_offset);
205 			return (p->vibe_phys_birth_txg);
206 		} else {
207 			last = p - 1;
208 		}
209 	}
210 	ASSERT(!"offset not found");
211 	return (-1);
212 }
213