1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/bplist.h> 27 #include <sys/zfs_context.h> 28 29 void 30 bplist_init(bplist_t *bpl) 31 { 32 bzero(bpl, sizeof (*bpl)); 33 mutex_init(&bpl->bpl_lock, NULL, MUTEX_DEFAULT, NULL); 34 mutex_init(&bpl->bpl_q_lock, NULL, MUTEX_DEFAULT, NULL); 35 list_create(&bpl->bpl_queue, sizeof (bplist_q_t), 36 offsetof(bplist_q_t, bpq_node)); 37 } 38 39 void 40 bplist_fini(bplist_t *bpl) 41 { 42 ASSERT(list_is_empty(&bpl->bpl_queue)); 43 list_destroy(&bpl->bpl_queue); 44 mutex_destroy(&bpl->bpl_q_lock); 45 mutex_destroy(&bpl->bpl_lock); 46 } 47 48 static int 49 bplist_hold(bplist_t *bpl) 50 { 51 ASSERT(MUTEX_HELD(&bpl->bpl_lock)); 52 if (bpl->bpl_dbuf == NULL) { 53 int err = dmu_bonus_hold(bpl->bpl_mos, 54 bpl->bpl_object, bpl, &bpl->bpl_dbuf); 55 if (err) 56 return (err); 57 bpl->bpl_phys = bpl->bpl_dbuf->db_data; 58 } 59 return (0); 60 } 61 62 uint64_t 63 bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx) 64 { 65 int size; 66 67 size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ? 68 BPLIST_SIZE_V0 : sizeof (bplist_phys_t); 69 70 return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize, 71 DMU_OT_BPLIST_HDR, size, tx)); 72 } 73 74 void 75 bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx) 76 { 77 VERIFY(dmu_object_free(mos, object, tx) == 0); 78 } 79 80 int 81 bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object) 82 { 83 dmu_object_info_t doi; 84 int err; 85 86 err = dmu_object_info(mos, object, &doi); 87 if (err) 88 return (err); 89 90 mutex_enter(&bpl->bpl_lock); 91 92 ASSERT(bpl->bpl_dbuf == NULL); 93 ASSERT(bpl->bpl_phys == NULL); 94 ASSERT(bpl->bpl_cached_dbuf == NULL); 95 ASSERT(list_is_empty(&bpl->bpl_queue)); 96 ASSERT(object != 0); 97 ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST); 98 ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR); 99 100 bpl->bpl_mos = mos; 101 bpl->bpl_object = object; 102 bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1); 103 bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT; 104 bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t)); 105 106 mutex_exit(&bpl->bpl_lock); 107 return (0); 108 } 109 110 void 111 bplist_close(bplist_t *bpl) 112 { 113 mutex_enter(&bpl->bpl_lock); 114 115 ASSERT(list_is_empty(&bpl->bpl_queue)); 116 117 if (bpl->bpl_cached_dbuf) { 118 dmu_buf_rele(bpl->bpl_cached_dbuf, bpl); 119 bpl->bpl_cached_dbuf = NULL; 120 } 121 if (bpl->bpl_dbuf) { 122 dmu_buf_rele(bpl->bpl_dbuf, bpl); 123 bpl->bpl_dbuf = NULL; 124 bpl->bpl_phys = NULL; 125 } 126 127 mutex_exit(&bpl->bpl_lock); 128 } 129 130 boolean_t 131 bplist_empty(bplist_t *bpl) 132 { 133 boolean_t rv; 134 135 if (bpl->bpl_object == 0) 136 return (B_TRUE); 137 138 mutex_enter(&bpl->bpl_lock); 139 VERIFY(0 == bplist_hold(bpl)); /* XXX */ 140 rv = (bpl->bpl_phys->bpl_entries == 0); 141 mutex_exit(&bpl->bpl_lock); 142 143 return (rv); 144 } 145 146 static int 147 bplist_cache(bplist_t *bpl, uint64_t blkid) 148 { 149 int err = 0; 150 151 if (bpl->bpl_cached_dbuf == NULL || 152 bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) { 153 if (bpl->bpl_cached_dbuf != NULL) 154 dmu_buf_rele(bpl->bpl_cached_dbuf, bpl); 155 err = dmu_buf_hold(bpl->bpl_mos, 156 bpl->bpl_object, blkid << bpl->bpl_blockshift, 157 bpl, &bpl->bpl_cached_dbuf); 158 ASSERT(err || bpl->bpl_cached_dbuf->db_size == 159 1ULL << bpl->bpl_blockshift); 160 } 161 return (err); 162 } 163 164 int 165 bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp) 166 { 167 uint64_t blk, off; 168 blkptr_t *bparray; 169 int err; 170 171 mutex_enter(&bpl->bpl_lock); 172 173 err = bplist_hold(bpl); 174 if (err) { 175 mutex_exit(&bpl->bpl_lock); 176 return (err); 177 } 178 179 if (*itorp >= bpl->bpl_phys->bpl_entries) { 180 mutex_exit(&bpl->bpl_lock); 181 return (ENOENT); 182 } 183 184 blk = *itorp >> bpl->bpl_bpshift; 185 off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift); 186 187 err = bplist_cache(bpl, blk); 188 if (err) { 189 mutex_exit(&bpl->bpl_lock); 190 return (err); 191 } 192 193 bparray = bpl->bpl_cached_dbuf->db_data; 194 *bp = bparray[off]; 195 (*itorp)++; 196 mutex_exit(&bpl->bpl_lock); 197 return (0); 198 } 199 200 int 201 bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx) 202 { 203 uint64_t blk, off; 204 blkptr_t *bparray; 205 int err; 206 207 ASSERT(!BP_IS_HOLE(bp)); 208 mutex_enter(&bpl->bpl_lock); 209 err = bplist_hold(bpl); 210 if (err) 211 return (err); 212 213 blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift; 214 off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift); 215 216 err = bplist_cache(bpl, blk); 217 if (err) { 218 mutex_exit(&bpl->bpl_lock); 219 return (err); 220 } 221 222 dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx); 223 bparray = bpl->bpl_cached_dbuf->db_data; 224 bparray[off] = *bp; 225 226 /* We never need the fill count. */ 227 bparray[off].blk_fill = 0; 228 229 /* The bplist will compress better if we can leave off the checksum */ 230 if (!BP_GET_DEDUP(&bparray[off])) 231 bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum)); 232 233 dmu_buf_will_dirty(bpl->bpl_dbuf, tx); 234 bpl->bpl_phys->bpl_entries++; 235 bpl->bpl_phys->bpl_bytes += 236 bp_get_dsize_sync(dmu_objset_spa(bpl->bpl_mos), bp); 237 if (bpl->bpl_havecomp) { 238 bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp); 239 bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp); 240 } 241 mutex_exit(&bpl->bpl_lock); 242 243 return (0); 244 } 245 246 void 247 bplist_enqueue_cb(void *bpl, const blkptr_t *bp, dmu_tx_t *tx) 248 { 249 VERIFY(bplist_enqueue(bpl, bp, tx) == 0); 250 } 251 252 /* 253 * Deferred entry; will be processed later by bplist_sync(). 254 */ 255 void 256 bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp) 257 { 258 bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP); 259 260 ASSERT(!BP_IS_HOLE(bp)); 261 mutex_enter(&bpl->bpl_q_lock); 262 bpq->bpq_blk = *bp; 263 list_insert_tail(&bpl->bpl_queue, bpq); 264 mutex_exit(&bpl->bpl_q_lock); 265 } 266 267 void 268 bplist_sync(bplist_t *bpl, bplist_sync_cb_t *func, void *arg, dmu_tx_t *tx) 269 { 270 bplist_q_t *bpq; 271 272 mutex_enter(&bpl->bpl_q_lock); 273 while (bpq = list_head(&bpl->bpl_queue)) { 274 list_remove(&bpl->bpl_queue, bpq); 275 mutex_exit(&bpl->bpl_q_lock); 276 func(arg, &bpq->bpq_blk, tx); 277 kmem_free(bpq, sizeof (*bpq)); 278 mutex_enter(&bpl->bpl_q_lock); 279 } 280 mutex_exit(&bpl->bpl_q_lock); 281 } 282 283 void 284 bplist_vacate(bplist_t *bpl, dmu_tx_t *tx) 285 { 286 mutex_enter(&bpl->bpl_lock); 287 ASSERT(list_is_empty(&bpl->bpl_queue)); 288 VERIFY(0 == bplist_hold(bpl)); 289 dmu_buf_will_dirty(bpl->bpl_dbuf, tx); 290 VERIFY(0 == dmu_free_range(bpl->bpl_mos, 291 bpl->bpl_object, 0, -1ULL, tx)); 292 bpl->bpl_phys->bpl_entries = 0; 293 bpl->bpl_phys->bpl_bytes = 0; 294 if (bpl->bpl_havecomp) { 295 bpl->bpl_phys->bpl_comp = 0; 296 bpl->bpl_phys->bpl_uncomp = 0; 297 } 298 mutex_exit(&bpl->bpl_lock); 299 } 300 301 int 302 bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 303 { 304 int err; 305 306 mutex_enter(&bpl->bpl_lock); 307 308 err = bplist_hold(bpl); 309 if (err) { 310 mutex_exit(&bpl->bpl_lock); 311 return (err); 312 } 313 314 *usedp = bpl->bpl_phys->bpl_bytes; 315 if (bpl->bpl_havecomp) { 316 *compp = bpl->bpl_phys->bpl_comp; 317 *uncompp = bpl->bpl_phys->bpl_uncomp; 318 } 319 mutex_exit(&bpl->bpl_lock); 320 321 if (!bpl->bpl_havecomp) { 322 uint64_t itor = 0, comp = 0, uncomp = 0; 323 blkptr_t bp; 324 325 while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) { 326 comp += BP_GET_PSIZE(&bp); 327 uncomp += BP_GET_UCSIZE(&bp); 328 } 329 if (err == ENOENT) 330 err = 0; 331 *compp = comp; 332 *uncompp = uncomp; 333 } 334 335 return (err); 336 } 337 338 /* 339 * Return (in *dsizep) the amount of space on the deadlist which is: 340 * mintxg < blk_birth <= maxtxg 341 */ 342 int 343 bplist_space_birthrange(bplist_t *bpl, uint64_t mintxg, uint64_t maxtxg, 344 uint64_t *dsizep) 345 { 346 uint64_t size = 0; 347 uint64_t itor = 0; 348 blkptr_t bp; 349 int err; 350 351 /* 352 * As an optimization, if they want the whole txg range, just 353 * get bpl_bytes rather than iterating over the bps. 354 */ 355 if (mintxg < TXG_INITIAL && maxtxg == UINT64_MAX) { 356 mutex_enter(&bpl->bpl_lock); 357 err = bplist_hold(bpl); 358 if (err == 0) 359 *dsizep = bpl->bpl_phys->bpl_bytes; 360 mutex_exit(&bpl->bpl_lock); 361 return (err); 362 } 363 364 while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) { 365 if (bp.blk_birth > mintxg && bp.blk_birth <= maxtxg) { 366 size += bp_get_dsize(dmu_objset_spa(bpl->bpl_mos), &bp); 367 } 368 } 369 if (err == ENOENT) 370 err = 0; 371 *dsizep = size; 372 return (err); 373 } 374