1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/bplist.h> 27 #include <sys/zfs_context.h> 28 29 void 30 bplist_init(bplist_t *bpl) 31 { 32 bzero(bpl, sizeof (*bpl)); 33 mutex_init(&bpl->bpl_lock, NULL, MUTEX_DEFAULT, NULL); 34 } 35 36 void 37 bplist_fini(bplist_t *bpl) 38 { 39 ASSERT(bpl->bpl_queue == NULL); 40 mutex_destroy(&bpl->bpl_lock); 41 } 42 43 static int 44 bplist_hold(bplist_t *bpl) 45 { 46 ASSERT(MUTEX_HELD(&bpl->bpl_lock)); 47 if (bpl->bpl_dbuf == NULL) { 48 int err = dmu_bonus_hold(bpl->bpl_mos, 49 bpl->bpl_object, bpl, &bpl->bpl_dbuf); 50 if (err) 51 return (err); 52 bpl->bpl_phys = bpl->bpl_dbuf->db_data; 53 } 54 return (0); 55 } 56 57 uint64_t 58 bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx) 59 { 60 int size; 61 62 size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ? 63 BPLIST_SIZE_V0 : sizeof (bplist_phys_t); 64 65 return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize, 66 DMU_OT_BPLIST_HDR, size, tx)); 67 } 68 69 void 70 bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx) 71 { 72 VERIFY(dmu_object_free(mos, object, tx) == 0); 73 } 74 75 int 76 bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object) 77 { 78 dmu_object_info_t doi; 79 int err; 80 81 err = dmu_object_info(mos, object, &doi); 82 if (err) 83 return (err); 84 85 mutex_enter(&bpl->bpl_lock); 86 87 ASSERT(bpl->bpl_dbuf == NULL); 88 ASSERT(bpl->bpl_phys == NULL); 89 ASSERT(bpl->bpl_cached_dbuf == NULL); 90 ASSERT(bpl->bpl_queue == NULL); 91 ASSERT(object != 0); 92 ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST); 93 ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR); 94 95 bpl->bpl_mos = mos; 96 bpl->bpl_object = object; 97 bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1); 98 bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT; 99 bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t)); 100 101 mutex_exit(&bpl->bpl_lock); 102 return (0); 103 } 104 105 void 106 bplist_close(bplist_t *bpl) 107 { 108 mutex_enter(&bpl->bpl_lock); 109 110 ASSERT(bpl->bpl_queue == NULL); 111 112 if (bpl->bpl_cached_dbuf) { 113 dmu_buf_rele(bpl->bpl_cached_dbuf, bpl); 114 bpl->bpl_cached_dbuf = NULL; 115 } 116 if (bpl->bpl_dbuf) { 117 dmu_buf_rele(bpl->bpl_dbuf, bpl); 118 bpl->bpl_dbuf = NULL; 119 bpl->bpl_phys = NULL; 120 } 121 122 mutex_exit(&bpl->bpl_lock); 123 } 124 125 boolean_t 126 bplist_empty(bplist_t *bpl) 127 { 128 boolean_t rv; 129 130 if (bpl->bpl_object == 0) 131 return (B_TRUE); 132 133 mutex_enter(&bpl->bpl_lock); 134 VERIFY(0 == bplist_hold(bpl)); /* XXX */ 135 rv = (bpl->bpl_phys->bpl_entries == 0); 136 mutex_exit(&bpl->bpl_lock); 137 138 return (rv); 139 } 140 141 static int 142 bplist_cache(bplist_t *bpl, uint64_t blkid) 143 { 144 int err = 0; 145 146 if (bpl->bpl_cached_dbuf == NULL || 147 bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) { 148 if (bpl->bpl_cached_dbuf != NULL) 149 dmu_buf_rele(bpl->bpl_cached_dbuf, bpl); 150 err = dmu_buf_hold(bpl->bpl_mos, 151 bpl->bpl_object, blkid << bpl->bpl_blockshift, 152 bpl, &bpl->bpl_cached_dbuf); 153 ASSERT(err || bpl->bpl_cached_dbuf->db_size == 154 1ULL << bpl->bpl_blockshift); 155 } 156 return (err); 157 } 158 159 int 160 bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp) 161 { 162 uint64_t blk, off; 163 blkptr_t *bparray; 164 int err; 165 166 mutex_enter(&bpl->bpl_lock); 167 168 err = bplist_hold(bpl); 169 if (err) { 170 mutex_exit(&bpl->bpl_lock); 171 return (err); 172 } 173 174 if (*itorp >= bpl->bpl_phys->bpl_entries) { 175 mutex_exit(&bpl->bpl_lock); 176 return (ENOENT); 177 } 178 179 blk = *itorp >> bpl->bpl_bpshift; 180 off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift); 181 182 err = bplist_cache(bpl, blk); 183 if (err) { 184 mutex_exit(&bpl->bpl_lock); 185 return (err); 186 } 187 188 bparray = bpl->bpl_cached_dbuf->db_data; 189 *bp = bparray[off]; 190 (*itorp)++; 191 mutex_exit(&bpl->bpl_lock); 192 return (0); 193 } 194 195 int 196 bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx) 197 { 198 uint64_t blk, off; 199 blkptr_t *bparray; 200 int err; 201 202 ASSERT(!BP_IS_HOLE(bp)); 203 mutex_enter(&bpl->bpl_lock); 204 err = bplist_hold(bpl); 205 if (err) 206 return (err); 207 208 blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift; 209 off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift); 210 211 err = bplist_cache(bpl, blk); 212 if (err) { 213 mutex_exit(&bpl->bpl_lock); 214 return (err); 215 } 216 217 dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx); 218 bparray = bpl->bpl_cached_dbuf->db_data; 219 bparray[off] = *bp; 220 221 /* We never need the fill count. */ 222 bparray[off].blk_fill = 0; 223 224 /* The bplist will compress better if we can leave off the checksum */ 225 if (!BP_GET_DEDUP(&bparray[off])) 226 bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum)); 227 228 dmu_buf_will_dirty(bpl->bpl_dbuf, tx); 229 bpl->bpl_phys->bpl_entries++; 230 bpl->bpl_phys->bpl_bytes += 231 bp_get_dsize_sync(dmu_objset_spa(bpl->bpl_mos), bp); 232 if (bpl->bpl_havecomp) { 233 bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp); 234 bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp); 235 } 236 mutex_exit(&bpl->bpl_lock); 237 238 return (0); 239 } 240 241 void 242 bplist_enqueue_cb(void *bpl, const blkptr_t *bp, dmu_tx_t *tx) 243 { 244 VERIFY(bplist_enqueue(bpl, bp, tx) == 0); 245 } 246 247 /* 248 * Deferred entry; will be processed later by bplist_sync(). 249 */ 250 void 251 bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp) 252 { 253 bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP); 254 255 ASSERT(!BP_IS_HOLE(bp)); 256 mutex_enter(&bpl->bpl_lock); 257 bpq->bpq_blk = *bp; 258 bpq->bpq_next = bpl->bpl_queue; 259 bpl->bpl_queue = bpq; 260 mutex_exit(&bpl->bpl_lock); 261 } 262 263 void 264 bplist_sync(bplist_t *bpl, bplist_sync_cb_t *func, void *arg, dmu_tx_t *tx) 265 { 266 bplist_q_t *bpq; 267 268 mutex_enter(&bpl->bpl_lock); 269 while ((bpq = bpl->bpl_queue) != NULL) { 270 bpl->bpl_queue = bpq->bpq_next; 271 mutex_exit(&bpl->bpl_lock); 272 func(arg, &bpq->bpq_blk, tx); 273 kmem_free(bpq, sizeof (*bpq)); 274 mutex_enter(&bpl->bpl_lock); 275 } 276 mutex_exit(&bpl->bpl_lock); 277 } 278 279 void 280 bplist_vacate(bplist_t *bpl, dmu_tx_t *tx) 281 { 282 mutex_enter(&bpl->bpl_lock); 283 ASSERT3P(bpl->bpl_queue, ==, NULL); 284 VERIFY(0 == bplist_hold(bpl)); 285 dmu_buf_will_dirty(bpl->bpl_dbuf, tx); 286 VERIFY(0 == dmu_free_range(bpl->bpl_mos, 287 bpl->bpl_object, 0, -1ULL, tx)); 288 bpl->bpl_phys->bpl_entries = 0; 289 bpl->bpl_phys->bpl_bytes = 0; 290 if (bpl->bpl_havecomp) { 291 bpl->bpl_phys->bpl_comp = 0; 292 bpl->bpl_phys->bpl_uncomp = 0; 293 } 294 mutex_exit(&bpl->bpl_lock); 295 } 296 297 int 298 bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 299 { 300 int err; 301 302 mutex_enter(&bpl->bpl_lock); 303 304 err = bplist_hold(bpl); 305 if (err) { 306 mutex_exit(&bpl->bpl_lock); 307 return (err); 308 } 309 310 *usedp = bpl->bpl_phys->bpl_bytes; 311 if (bpl->bpl_havecomp) { 312 *compp = bpl->bpl_phys->bpl_comp; 313 *uncompp = bpl->bpl_phys->bpl_uncomp; 314 } 315 mutex_exit(&bpl->bpl_lock); 316 317 if (!bpl->bpl_havecomp) { 318 uint64_t itor = 0, comp = 0, uncomp = 0; 319 blkptr_t bp; 320 321 while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) { 322 comp += BP_GET_PSIZE(&bp); 323 uncomp += BP_GET_UCSIZE(&bp); 324 } 325 if (err == ENOENT) 326 err = 0; 327 *compp = comp; 328 *uncompp = uncomp; 329 } 330 331 return (err); 332 } 333 334 /* 335 * Return (in *dsizep) the amount of space on the deadlist which is: 336 * mintxg < blk_birth <= maxtxg 337 */ 338 int 339 bplist_space_birthrange(bplist_t *bpl, uint64_t mintxg, uint64_t maxtxg, 340 uint64_t *dsizep) 341 { 342 uint64_t size = 0; 343 uint64_t itor = 0; 344 blkptr_t bp; 345 int err; 346 347 /* 348 * As an optimization, if they want the whole txg range, just 349 * get bpl_bytes rather than iterating over the bps. 350 */ 351 if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX) { 352 mutex_enter(&bpl->bpl_lock); 353 err = bplist_hold(bpl); 354 if (err == 0) 355 *dsizep = bpl->bpl_phys->bpl_bytes; 356 mutex_exit(&bpl->bpl_lock); 357 return (err); 358 } 359 360 while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) { 361 if (bp.blk_birth > mintxg && bp.blk_birth <= maxtxg) { 362 size += bp_get_dsize(dmu_objset_spa(bpl->bpl_mos), &bp); 363 } 364 } 365 if (err == ENOENT) 366 err = 0; 367 *dsizep = size; 368 return (err); 369 } 370