1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/bplist.h> 27 #include <sys/zfs_context.h> 28 29 static int 30 bplist_hold(bplist_t *bpl) 31 { 32 ASSERT(MUTEX_HELD(&bpl->bpl_lock)); 33 if (bpl->bpl_dbuf == NULL) { 34 int err = dmu_bonus_hold(bpl->bpl_mos, 35 bpl->bpl_object, bpl, &bpl->bpl_dbuf); 36 if (err) 37 return (err); 38 bpl->bpl_phys = bpl->bpl_dbuf->db_data; 39 } 40 return (0); 41 } 42 43 uint64_t 44 bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx) 45 { 46 int size; 47 48 size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ? 49 BPLIST_SIZE_V0 : sizeof (bplist_phys_t); 50 51 return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize, 52 DMU_OT_BPLIST_HDR, size, tx)); 53 } 54 55 void 56 bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx) 57 { 58 VERIFY(dmu_object_free(mos, object, tx) == 0); 59 } 60 61 int 62 bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object) 63 { 64 dmu_object_info_t doi; 65 int err; 66 67 err = dmu_object_info(mos, object, &doi); 68 if (err) 69 return (err); 70 71 mutex_enter(&bpl->bpl_lock); 72 73 ASSERT(bpl->bpl_dbuf == NULL); 74 ASSERT(bpl->bpl_phys == NULL); 75 ASSERT(bpl->bpl_cached_dbuf == NULL); 76 ASSERT(bpl->bpl_queue == NULL); 77 ASSERT(object != 0); 78 ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST); 79 ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR); 80 81 bpl->bpl_mos = mos; 82 bpl->bpl_object = object; 83 bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1); 84 bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT; 85 bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t)); 86 87 mutex_exit(&bpl->bpl_lock); 88 return (0); 89 } 90 91 void 92 bplist_close(bplist_t *bpl) 93 { 94 mutex_enter(&bpl->bpl_lock); 95 96 ASSERT(bpl->bpl_queue == NULL); 97 98 if (bpl->bpl_cached_dbuf) { 99 dmu_buf_rele(bpl->bpl_cached_dbuf, bpl); 100 bpl->bpl_cached_dbuf = NULL; 101 } 102 if (bpl->bpl_dbuf) { 103 dmu_buf_rele(bpl->bpl_dbuf, bpl); 104 bpl->bpl_dbuf = NULL; 105 bpl->bpl_phys = NULL; 106 } 107 108 mutex_exit(&bpl->bpl_lock); 109 } 110 111 boolean_t 112 bplist_empty(bplist_t *bpl) 113 { 114 boolean_t rv; 115 116 if (bpl->bpl_object == 0) 117 return (B_TRUE); 118 119 mutex_enter(&bpl->bpl_lock); 120 VERIFY(0 == bplist_hold(bpl)); /* XXX */ 121 rv = (bpl->bpl_phys->bpl_entries == 0); 122 mutex_exit(&bpl->bpl_lock); 123 124 return (rv); 125 } 126 127 static int 128 bplist_cache(bplist_t *bpl, uint64_t blkid) 129 { 130 int err = 0; 131 132 if (bpl->bpl_cached_dbuf == NULL || 133 bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) { 134 if (bpl->bpl_cached_dbuf != NULL) 135 dmu_buf_rele(bpl->bpl_cached_dbuf, bpl); 136 err = dmu_buf_hold(bpl->bpl_mos, 137 bpl->bpl_object, blkid << bpl->bpl_blockshift, 138 bpl, &bpl->bpl_cached_dbuf); 139 ASSERT(err || bpl->bpl_cached_dbuf->db_size == 140 1ULL << bpl->bpl_blockshift); 141 } 142 return (err); 143 } 144 145 int 146 bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp) 147 { 148 uint64_t blk, off; 149 blkptr_t *bparray; 150 int err; 151 152 mutex_enter(&bpl->bpl_lock); 153 154 err = bplist_hold(bpl); 155 if (err) { 156 mutex_exit(&bpl->bpl_lock); 157 return (err); 158 } 159 160 if (*itorp >= bpl->bpl_phys->bpl_entries) { 161 mutex_exit(&bpl->bpl_lock); 162 return (ENOENT); 163 } 164 165 blk = *itorp >> bpl->bpl_bpshift; 166 off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift); 167 168 err = bplist_cache(bpl, blk); 169 if (err) { 170 mutex_exit(&bpl->bpl_lock); 171 return (err); 172 } 173 174 bparray = bpl->bpl_cached_dbuf->db_data; 175 *bp = bparray[off]; 176 (*itorp)++; 177 mutex_exit(&bpl->bpl_lock); 178 return (0); 179 } 180 181 int 182 bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx) 183 { 184 uint64_t blk, off; 185 blkptr_t *bparray; 186 int err; 187 188 ASSERT(!BP_IS_HOLE(bp)); 189 mutex_enter(&bpl->bpl_lock); 190 err = bplist_hold(bpl); 191 if (err) 192 return (err); 193 194 blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift; 195 off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift); 196 197 err = bplist_cache(bpl, blk); 198 if (err) { 199 mutex_exit(&bpl->bpl_lock); 200 return (err); 201 } 202 203 dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx); 204 bparray = bpl->bpl_cached_dbuf->db_data; 205 bparray[off] = *bp; 206 207 /* We never need the fill count. */ 208 bparray[off].blk_fill = 0; 209 210 /* The bplist will compress better if we can leave off the checksum */ 211 bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum)); 212 213 dmu_buf_will_dirty(bpl->bpl_dbuf, tx); 214 bpl->bpl_phys->bpl_entries++; 215 bpl->bpl_phys->bpl_bytes += 216 bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), bp); 217 if (bpl->bpl_havecomp) { 218 bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp); 219 bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp); 220 } 221 mutex_exit(&bpl->bpl_lock); 222 223 return (0); 224 } 225 226 /* 227 * Deferred entry; will be written later by bplist_sync(). 228 */ 229 void 230 bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp) 231 { 232 bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP); 233 234 ASSERT(!BP_IS_HOLE(bp)); 235 mutex_enter(&bpl->bpl_lock); 236 bpq->bpq_blk = *bp; 237 bpq->bpq_next = bpl->bpl_queue; 238 bpl->bpl_queue = bpq; 239 mutex_exit(&bpl->bpl_lock); 240 } 241 242 void 243 bplist_sync(bplist_t *bpl, dmu_tx_t *tx) 244 { 245 bplist_q_t *bpq; 246 247 mutex_enter(&bpl->bpl_lock); 248 while ((bpq = bpl->bpl_queue) != NULL) { 249 bpl->bpl_queue = bpq->bpq_next; 250 mutex_exit(&bpl->bpl_lock); 251 VERIFY(0 == bplist_enqueue(bpl, &bpq->bpq_blk, tx)); 252 kmem_free(bpq, sizeof (*bpq)); 253 mutex_enter(&bpl->bpl_lock); 254 } 255 mutex_exit(&bpl->bpl_lock); 256 } 257 258 void 259 bplist_vacate(bplist_t *bpl, dmu_tx_t *tx) 260 { 261 mutex_enter(&bpl->bpl_lock); 262 ASSERT3P(bpl->bpl_queue, ==, NULL); 263 VERIFY(0 == bplist_hold(bpl)); 264 dmu_buf_will_dirty(bpl->bpl_dbuf, tx); 265 VERIFY(0 == dmu_free_range(bpl->bpl_mos, 266 bpl->bpl_object, 0, -1ULL, tx)); 267 bpl->bpl_phys->bpl_entries = 0; 268 bpl->bpl_phys->bpl_bytes = 0; 269 if (bpl->bpl_havecomp) { 270 bpl->bpl_phys->bpl_comp = 0; 271 bpl->bpl_phys->bpl_uncomp = 0; 272 } 273 mutex_exit(&bpl->bpl_lock); 274 } 275 276 int 277 bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 278 { 279 int err; 280 281 mutex_enter(&bpl->bpl_lock); 282 283 err = bplist_hold(bpl); 284 if (err) { 285 mutex_exit(&bpl->bpl_lock); 286 return (err); 287 } 288 289 *usedp = bpl->bpl_phys->bpl_bytes; 290 if (bpl->bpl_havecomp) { 291 *compp = bpl->bpl_phys->bpl_comp; 292 *uncompp = bpl->bpl_phys->bpl_uncomp; 293 } 294 mutex_exit(&bpl->bpl_lock); 295 296 if (!bpl->bpl_havecomp) { 297 uint64_t itor = 0, comp = 0, uncomp = 0; 298 blkptr_t bp; 299 300 while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) { 301 comp += BP_GET_PSIZE(&bp); 302 uncomp += BP_GET_UCSIZE(&bp); 303 } 304 if (err == ENOENT) 305 err = 0; 306 *compp = comp; 307 *uncompp = uncomp; 308 } 309 310 return (err); 311 } 312 313 /* 314 * Return (in *dasizep) the amount of space on the deadlist which is: 315 * mintxg < blk_birth <= maxtxg 316 */ 317 int 318 bplist_space_birthrange(bplist_t *bpl, uint64_t mintxg, uint64_t maxtxg, 319 uint64_t *dasizep) 320 { 321 uint64_t size = 0; 322 uint64_t itor = 0; 323 blkptr_t bp; 324 int err; 325 326 /* 327 * As an optimization, if they want the whole txg range, just 328 * get bpl_bytes rather than iterating over the bps. 329 */ 330 if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX) { 331 mutex_enter(&bpl->bpl_lock); 332 err = bplist_hold(bpl); 333 if (err == 0) 334 *dasizep = bpl->bpl_phys->bpl_bytes; 335 mutex_exit(&bpl->bpl_lock); 336 return (err); 337 } 338 339 while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) { 340 if (bp.blk_birth > mintxg && bp.blk_birth <= maxtxg) { 341 size += 342 bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), &bp); 343 } 344 } 345 if (err == ENOENT) 346 err = 0; 347 *dasizep = size; 348 return (err); 349 } 350