1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/bplist.h> 26 #include <sys/zfs_context.h> 27 28 void 29 bplist_init(bplist_t *bpl) 30 { 31 bzero(bpl, sizeof (*bpl)); 32 mutex_init(&bpl->bpl_lock, NULL, MUTEX_DEFAULT, NULL); 33 mutex_init(&bpl->bpl_q_lock, NULL, MUTEX_DEFAULT, NULL); 34 list_create(&bpl->bpl_queue, sizeof (bplist_q_t), 35 offsetof(bplist_q_t, bpq_node)); 36 } 37 38 void 39 bplist_fini(bplist_t *bpl) 40 { 41 ASSERT(list_is_empty(&bpl->bpl_queue)); 42 list_destroy(&bpl->bpl_queue); 43 mutex_destroy(&bpl->bpl_q_lock); 44 mutex_destroy(&bpl->bpl_lock); 45 } 46 47 static int 48 bplist_hold(bplist_t *bpl) 49 { 50 ASSERT(MUTEX_HELD(&bpl->bpl_lock)); 51 if (bpl->bpl_dbuf == NULL) { 52 int err = dmu_bonus_hold(bpl->bpl_mos, 53 bpl->bpl_object, bpl, &bpl->bpl_dbuf); 54 if (err) 55 return (err); 56 bpl->bpl_phys = bpl->bpl_dbuf->db_data; 57 } 58 return (0); 59 } 60 61 uint64_t 62 bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx) 63 { 64 int size; 65 66 size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ? 67 BPLIST_SIZE_V0 : sizeof (bplist_phys_t); 68 69 return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize, 70 DMU_OT_BPLIST_HDR, size, tx)); 71 } 72 73 void 74 bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx) 75 { 76 VERIFY(dmu_object_free(mos, object, tx) == 0); 77 } 78 79 int 80 bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object) 81 { 82 dmu_object_info_t doi; 83 int err; 84 85 err = dmu_object_info(mos, object, &doi); 86 if (err) 87 return (err); 88 89 mutex_enter(&bpl->bpl_lock); 90 91 ASSERT(bpl->bpl_dbuf == NULL); 92 ASSERT(bpl->bpl_phys == NULL); 93 ASSERT(bpl->bpl_cached_dbuf == NULL); 94 ASSERT(list_is_empty(&bpl->bpl_queue)); 95 ASSERT(object != 0); 96 ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST); 97 ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR); 98 99 bpl->bpl_mos = mos; 100 bpl->bpl_object = object; 101 bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1); 102 bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT; 103 bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t)); 104 105 mutex_exit(&bpl->bpl_lock); 106 return (0); 107 } 108 109 void 110 bplist_close(bplist_t *bpl) 111 { 112 mutex_enter(&bpl->bpl_lock); 113 114 ASSERT(list_is_empty(&bpl->bpl_queue)); 115 116 if (bpl->bpl_cached_dbuf) { 117 dmu_buf_rele(bpl->bpl_cached_dbuf, bpl); 118 bpl->bpl_cached_dbuf = NULL; 119 } 120 if (bpl->bpl_dbuf) { 121 dmu_buf_rele(bpl->bpl_dbuf, bpl); 122 bpl->bpl_dbuf = NULL; 123 bpl->bpl_phys = NULL; 124 } 125 126 mutex_exit(&bpl->bpl_lock); 127 } 128 129 boolean_t 130 bplist_empty(bplist_t *bpl) 131 { 132 boolean_t rv; 133 134 if (bpl->bpl_object == 0) 135 return (B_TRUE); 136 137 mutex_enter(&bpl->bpl_lock); 138 VERIFY(0 == bplist_hold(bpl)); /* XXX */ 139 rv = (bpl->bpl_phys->bpl_entries == 0); 140 mutex_exit(&bpl->bpl_lock); 141 142 return (rv); 143 } 144 145 static int 146 bplist_cache(bplist_t *bpl, uint64_t blkid) 147 { 148 int err = 0; 149 150 if (bpl->bpl_cached_dbuf == NULL || 151 bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) { 152 if (bpl->bpl_cached_dbuf != NULL) 153 dmu_buf_rele(bpl->bpl_cached_dbuf, bpl); 154 err = dmu_buf_hold(bpl->bpl_mos, 155 bpl->bpl_object, blkid << bpl->bpl_blockshift, 156 bpl, &bpl->bpl_cached_dbuf, DMU_READ_PREFETCH); 157 ASSERT(err || bpl->bpl_cached_dbuf->db_size == 158 1ULL << bpl->bpl_blockshift); 159 } 160 return (err); 161 } 162 163 int 164 bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp) 165 { 166 uint64_t blk, off; 167 blkptr_t *bparray; 168 int err; 169 170 mutex_enter(&bpl->bpl_lock); 171 172 err = bplist_hold(bpl); 173 if (err) { 174 mutex_exit(&bpl->bpl_lock); 175 return (err); 176 } 177 178 do { 179 if (*itorp >= bpl->bpl_phys->bpl_entries) { 180 mutex_exit(&bpl->bpl_lock); 181 return (ENOENT); 182 } 183 184 blk = *itorp >> bpl->bpl_bpshift; 185 off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift); 186 187 err = bplist_cache(bpl, blk); 188 if (err) { 189 mutex_exit(&bpl->bpl_lock); 190 return (err); 191 } 192 193 bparray = bpl->bpl_cached_dbuf->db_data; 194 *bp = bparray[off]; 195 (*itorp)++; 196 } while (bp->blk_birth == 0); 197 198 mutex_exit(&bpl->bpl_lock); 199 return (0); 200 } 201 202 int 203 bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx) 204 { 205 uint64_t blk, off; 206 blkptr_t *bparray; 207 int err; 208 209 ASSERT(!BP_IS_HOLE(bp)); 210 mutex_enter(&bpl->bpl_lock); 211 err = bplist_hold(bpl); 212 if (err) { 213 mutex_exit(&bpl->bpl_lock); 214 return (err); 215 } 216 217 blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift; 218 off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift); 219 220 err = bplist_cache(bpl, blk); 221 if (err) { 222 mutex_exit(&bpl->bpl_lock); 223 return (err); 224 } 225 226 dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx); 227 bparray = bpl->bpl_cached_dbuf->db_data; 228 bparray[off] = *bp; 229 230 /* We never need the fill count. */ 231 bparray[off].blk_fill = 0; 232 233 /* The bplist will compress better if we can leave off the checksum */ 234 if (!BP_GET_DEDUP(&bparray[off])) 235 bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum)); 236 237 dmu_buf_will_dirty(bpl->bpl_dbuf, tx); 238 bpl->bpl_phys->bpl_entries++; 239 bpl->bpl_phys->bpl_bytes += 240 bp_get_dsize_sync(dmu_objset_spa(bpl->bpl_mos), bp); 241 if (bpl->bpl_havecomp) { 242 bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp); 243 bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp); 244 } 245 mutex_exit(&bpl->bpl_lock); 246 247 return (0); 248 } 249 250 void 251 bplist_enqueue_cb(void *bpl, const blkptr_t *bp, dmu_tx_t *tx) 252 { 253 VERIFY(bplist_enqueue(bpl, bp, tx) == 0); 254 } 255 256 /* 257 * Deferred entry; will be processed later by bplist_sync(). 258 */ 259 void 260 bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp) 261 { 262 bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP); 263 264 ASSERT(!BP_IS_HOLE(bp)); 265 mutex_enter(&bpl->bpl_q_lock); 266 bpq->bpq_blk = *bp; 267 list_insert_tail(&bpl->bpl_queue, bpq); 268 mutex_exit(&bpl->bpl_q_lock); 269 } 270 271 void 272 bplist_sync(bplist_t *bpl, bplist_sync_cb_t *func, void *arg, dmu_tx_t *tx) 273 { 274 bplist_q_t *bpq; 275 276 mutex_enter(&bpl->bpl_q_lock); 277 while (bpq = list_head(&bpl->bpl_queue)) { 278 list_remove(&bpl->bpl_queue, bpq); 279 mutex_exit(&bpl->bpl_q_lock); 280 func(arg, &bpq->bpq_blk, tx); 281 kmem_free(bpq, sizeof (*bpq)); 282 mutex_enter(&bpl->bpl_q_lock); 283 } 284 mutex_exit(&bpl->bpl_q_lock); 285 } 286 287 void 288 bplist_vacate(bplist_t *bpl, dmu_tx_t *tx) 289 { 290 mutex_enter(&bpl->bpl_lock); 291 ASSERT(list_is_empty(&bpl->bpl_queue)); 292 VERIFY(0 == bplist_hold(bpl)); 293 dmu_buf_will_dirty(bpl->bpl_dbuf, tx); 294 VERIFY(0 == dmu_free_range(bpl->bpl_mos, 295 bpl->bpl_object, 0, -1ULL, tx)); 296 bpl->bpl_phys->bpl_entries = 0; 297 bpl->bpl_phys->bpl_bytes = 0; 298 if (bpl->bpl_havecomp) { 299 bpl->bpl_phys->bpl_comp = 0; 300 bpl->bpl_phys->bpl_uncomp = 0; 301 } 302 mutex_exit(&bpl->bpl_lock); 303 } 304 305 int 306 bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 307 { 308 int err; 309 310 mutex_enter(&bpl->bpl_lock); 311 312 err = bplist_hold(bpl); 313 if (err) { 314 mutex_exit(&bpl->bpl_lock); 315 return (err); 316 } 317 318 *usedp = bpl->bpl_phys->bpl_bytes; 319 if (bpl->bpl_havecomp) { 320 *compp = bpl->bpl_phys->bpl_comp; 321 *uncompp = bpl->bpl_phys->bpl_uncomp; 322 } 323 mutex_exit(&bpl->bpl_lock); 324 325 if (!bpl->bpl_havecomp) { 326 uint64_t itor = 0, comp = 0, uncomp = 0; 327 blkptr_t bp; 328 329 while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) { 330 comp += BP_GET_PSIZE(&bp); 331 uncomp += BP_GET_UCSIZE(&bp); 332 } 333 if (err == ENOENT) 334 err = 0; 335 *compp = comp; 336 *uncompp = uncomp; 337 } 338 339 return (err); 340 } 341 342 /* 343 * Return (in *dsizep) the amount of space on the deadlist which is: 344 * mintxg < blk_birth <= maxtxg 345 */ 346 int 347 bplist_space_birthrange(bplist_t *bpl, uint64_t mintxg, uint64_t maxtxg, 348 uint64_t *dsizep) 349 { 350 uint64_t size = 0; 351 uint64_t itor = 0; 352 blkptr_t bp; 353 int err; 354 355 /* 356 * As an optimization, if they want the whole txg range, just 357 * get bpl_bytes rather than iterating over the bps. 358 */ 359 if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX) { 360 mutex_enter(&bpl->bpl_lock); 361 err = bplist_hold(bpl); 362 if (err == 0) 363 *dsizep = bpl->bpl_phys->bpl_bytes; 364 mutex_exit(&bpl->bpl_lock); 365 return (err); 366 } 367 368 while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) { 369 if (bp.blk_birth > mintxg && bp.blk_birth <= maxtxg) { 370 size += bp_get_dsize(dmu_objset_spa(bpl->bpl_mos), &bp); 371 } 372 } 373 if (err == ENOENT) 374 err = 0; 375 *dsizep = size; 376 return (err); 377 } 378