1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/bplist.h> 26 #include <sys/zfs_context.h> 27 28 void 29 bplist_init(bplist_t *bpl) 30 { 31 bzero(bpl, sizeof (*bpl)); 32 mutex_init(&bpl->bpl_lock, NULL, MUTEX_DEFAULT, NULL); 33 mutex_init(&bpl->bpl_q_lock, NULL, MUTEX_DEFAULT, NULL); 34 list_create(&bpl->bpl_queue, sizeof (bplist_q_t), 35 offsetof(bplist_q_t, bpq_node)); 36 } 37 38 void 39 bplist_fini(bplist_t *bpl) 40 { 41 ASSERT(list_is_empty(&bpl->bpl_queue)); 42 list_destroy(&bpl->bpl_queue); 43 mutex_destroy(&bpl->bpl_q_lock); 44 mutex_destroy(&bpl->bpl_lock); 45 } 46 47 static int 48 bplist_hold(bplist_t *bpl) 49 { 50 ASSERT(MUTEX_HELD(&bpl->bpl_lock)); 51 if (bpl->bpl_dbuf == NULL) { 52 int err = dmu_bonus_hold(bpl->bpl_mos, 53 bpl->bpl_object, bpl, &bpl->bpl_dbuf); 54 if (err) 55 return (err); 56 bpl->bpl_phys = bpl->bpl_dbuf->db_data; 57 } 58 return (0); 59 } 60 61 uint64_t 62 bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx) 63 { 64 int size; 65 66 size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ? 67 BPLIST_SIZE_V0 : sizeof (bplist_phys_t); 68 69 return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize, 70 DMU_OT_BPLIST_HDR, size, tx)); 71 } 72 73 void 74 bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx) 75 { 76 VERIFY(dmu_object_free(mos, object, tx) == 0); 77 } 78 79 int 80 bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object) 81 { 82 dmu_object_info_t doi; 83 int err; 84 85 err = dmu_object_info(mos, object, &doi); 86 if (err) 87 return (err); 88 89 mutex_enter(&bpl->bpl_lock); 90 91 ASSERT(bpl->bpl_dbuf == NULL); 92 ASSERT(bpl->bpl_phys == NULL); 93 ASSERT(bpl->bpl_cached_dbuf == NULL); 94 ASSERT(list_is_empty(&bpl->bpl_queue)); 95 ASSERT(object != 0); 96 ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST); 97 ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR); 98 99 bpl->bpl_mos = mos; 100 bpl->bpl_object = object; 101 bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1); 102 bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT; 103 bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t)); 104 105 mutex_exit(&bpl->bpl_lock); 106 return (0); 107 } 108 109 void 110 bplist_close(bplist_t *bpl) 111 { 112 mutex_enter(&bpl->bpl_lock); 113 114 ASSERT(list_is_empty(&bpl->bpl_queue)); 115 116 if (bpl->bpl_cached_dbuf) { 117 dmu_buf_rele(bpl->bpl_cached_dbuf, bpl); 118 bpl->bpl_cached_dbuf = NULL; 119 } 120 if (bpl->bpl_dbuf) { 121 dmu_buf_rele(bpl->bpl_dbuf, bpl); 122 bpl->bpl_dbuf = NULL; 123 bpl->bpl_phys = NULL; 124 } 125 126 mutex_exit(&bpl->bpl_lock); 127 } 128 129 boolean_t 130 bplist_empty(bplist_t *bpl) 131 { 132 boolean_t rv; 133 134 if (bpl->bpl_object == 0) 135 return (B_TRUE); 136 137 mutex_enter(&bpl->bpl_lock); 138 VERIFY(0 == bplist_hold(bpl)); /* XXX */ 139 rv = (bpl->bpl_phys->bpl_entries == 0); 140 mutex_exit(&bpl->bpl_lock); 141 142 return (rv); 143 } 144 145 static int 146 bplist_cache(bplist_t *bpl, uint64_t blkid) 147 { 148 int err = 0; 149 150 if (bpl->bpl_cached_dbuf == NULL || 151 bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) { 152 if (bpl->bpl_cached_dbuf != NULL) 153 dmu_buf_rele(bpl->bpl_cached_dbuf, bpl); 154 err = dmu_buf_hold(bpl->bpl_mos, 155 bpl->bpl_object, blkid << bpl->bpl_blockshift, 156 bpl, &bpl->bpl_cached_dbuf, DMU_READ_PREFETCH); 157 ASSERT(err || bpl->bpl_cached_dbuf->db_size == 158 1ULL << bpl->bpl_blockshift); 159 } 160 return (err); 161 } 162 163 int 164 bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp) 165 { 166 uint64_t blk, off; 167 blkptr_t *bparray; 168 int err; 169 170 mutex_enter(&bpl->bpl_lock); 171 172 err = bplist_hold(bpl); 173 if (err) { 174 mutex_exit(&bpl->bpl_lock); 175 return (err); 176 } 177 178 if (*itorp >= bpl->bpl_phys->bpl_entries) { 179 mutex_exit(&bpl->bpl_lock); 180 return (ENOENT); 181 } 182 183 blk = *itorp >> bpl->bpl_bpshift; 184 off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift); 185 186 err = bplist_cache(bpl, blk); 187 if (err) { 188 mutex_exit(&bpl->bpl_lock); 189 return (err); 190 } 191 192 bparray = bpl->bpl_cached_dbuf->db_data; 193 *bp = bparray[off]; 194 (*itorp)++; 195 mutex_exit(&bpl->bpl_lock); 196 return (0); 197 } 198 199 int 200 bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx) 201 { 202 uint64_t blk, off; 203 blkptr_t *bparray; 204 int err; 205 206 ASSERT(!BP_IS_HOLE(bp)); 207 mutex_enter(&bpl->bpl_lock); 208 err = bplist_hold(bpl); 209 if (err) 210 return (err); 211 212 blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift; 213 off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift); 214 215 err = bplist_cache(bpl, blk); 216 if (err) { 217 mutex_exit(&bpl->bpl_lock); 218 return (err); 219 } 220 221 dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx); 222 bparray = bpl->bpl_cached_dbuf->db_data; 223 bparray[off] = *bp; 224 225 /* We never need the fill count. */ 226 bparray[off].blk_fill = 0; 227 228 /* The bplist will compress better if we can leave off the checksum */ 229 if (!BP_GET_DEDUP(&bparray[off])) 230 bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum)); 231 232 dmu_buf_will_dirty(bpl->bpl_dbuf, tx); 233 bpl->bpl_phys->bpl_entries++; 234 bpl->bpl_phys->bpl_bytes += 235 bp_get_dsize_sync(dmu_objset_spa(bpl->bpl_mos), bp); 236 if (bpl->bpl_havecomp) { 237 bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp); 238 bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp); 239 } 240 mutex_exit(&bpl->bpl_lock); 241 242 return (0); 243 } 244 245 void 246 bplist_enqueue_cb(void *bpl, const blkptr_t *bp, dmu_tx_t *tx) 247 { 248 VERIFY(bplist_enqueue(bpl, bp, tx) == 0); 249 } 250 251 /* 252 * Deferred entry; will be processed later by bplist_sync(). 253 */ 254 void 255 bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp) 256 { 257 bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP); 258 259 ASSERT(!BP_IS_HOLE(bp)); 260 mutex_enter(&bpl->bpl_q_lock); 261 bpq->bpq_blk = *bp; 262 list_insert_tail(&bpl->bpl_queue, bpq); 263 mutex_exit(&bpl->bpl_q_lock); 264 } 265 266 void 267 bplist_sync(bplist_t *bpl, bplist_sync_cb_t *func, void *arg, dmu_tx_t *tx) 268 { 269 bplist_q_t *bpq; 270 271 mutex_enter(&bpl->bpl_q_lock); 272 while (bpq = list_head(&bpl->bpl_queue)) { 273 list_remove(&bpl->bpl_queue, bpq); 274 mutex_exit(&bpl->bpl_q_lock); 275 func(arg, &bpq->bpq_blk, tx); 276 kmem_free(bpq, sizeof (*bpq)); 277 mutex_enter(&bpl->bpl_q_lock); 278 } 279 mutex_exit(&bpl->bpl_q_lock); 280 } 281 282 void 283 bplist_vacate(bplist_t *bpl, dmu_tx_t *tx) 284 { 285 mutex_enter(&bpl->bpl_lock); 286 ASSERT(list_is_empty(&bpl->bpl_queue)); 287 VERIFY(0 == bplist_hold(bpl)); 288 dmu_buf_will_dirty(bpl->bpl_dbuf, tx); 289 VERIFY(0 == dmu_free_range(bpl->bpl_mos, 290 bpl->bpl_object, 0, -1ULL, tx)); 291 bpl->bpl_phys->bpl_entries = 0; 292 bpl->bpl_phys->bpl_bytes = 0; 293 if (bpl->bpl_havecomp) { 294 bpl->bpl_phys->bpl_comp = 0; 295 bpl->bpl_phys->bpl_uncomp = 0; 296 } 297 mutex_exit(&bpl->bpl_lock); 298 } 299 300 int 301 bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 302 { 303 int err; 304 305 mutex_enter(&bpl->bpl_lock); 306 307 err = bplist_hold(bpl); 308 if (err) { 309 mutex_exit(&bpl->bpl_lock); 310 return (err); 311 } 312 313 *usedp = bpl->bpl_phys->bpl_bytes; 314 if (bpl->bpl_havecomp) { 315 *compp = bpl->bpl_phys->bpl_comp; 316 *uncompp = bpl->bpl_phys->bpl_uncomp; 317 } 318 mutex_exit(&bpl->bpl_lock); 319 320 if (!bpl->bpl_havecomp) { 321 uint64_t itor = 0, comp = 0, uncomp = 0; 322 blkptr_t bp; 323 324 while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) { 325 comp += BP_GET_PSIZE(&bp); 326 uncomp += BP_GET_UCSIZE(&bp); 327 } 328 if (err == ENOENT) 329 err = 0; 330 *compp = comp; 331 *uncompp = uncomp; 332 } 333 334 return (err); 335 } 336 337 /* 338 * Return (in *dsizep) the amount of space on the deadlist which is: 339 * mintxg < blk_birth <= maxtxg 340 */ 341 int 342 bplist_space_birthrange(bplist_t *bpl, uint64_t mintxg, uint64_t maxtxg, 343 uint64_t *dsizep) 344 { 345 uint64_t size = 0; 346 uint64_t itor = 0; 347 blkptr_t bp; 348 int err; 349 350 /* 351 * As an optimization, if they want the whole txg range, just 352 * get bpl_bytes rather than iterating over the bps. 353 */ 354 if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX) { 355 mutex_enter(&bpl->bpl_lock); 356 err = bplist_hold(bpl); 357 if (err == 0) 358 *dsizep = bpl->bpl_phys->bpl_bytes; 359 mutex_exit(&bpl->bpl_lock); 360 return (err); 361 } 362 363 while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) { 364 if (bp.blk_birth > mintxg && bp.blk_birth <= maxtxg) { 365 size += bp_get_dsize(dmu_objset_spa(bpl->bpl_mos), &bp); 366 } 367 } 368 if (err == ENOENT) 369 err = 0; 370 *dsizep = size; 371 return (err); 372 } 373