1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/bplist.h> 29 #include <sys/zfs_context.h> 30 31 static int 32 bplist_hold(bplist_t *bpl) 33 { 34 ASSERT(MUTEX_HELD(&bpl->bpl_lock)); 35 if (bpl->bpl_dbuf == NULL) { 36 int err = dmu_bonus_hold(bpl->bpl_mos, 37 bpl->bpl_object, bpl, &bpl->bpl_dbuf); 38 if (err) 39 return (err); 40 bpl->bpl_phys = bpl->bpl_dbuf->db_data; 41 } 42 return (0); 43 } 44 45 uint64_t 46 bplist_create(objset_t *mos, int blocksize, dmu_tx_t *tx) 47 { 48 int size; 49 50 size = spa_version(dmu_objset_spa(mos)) < SPA_VERSION_BPLIST_ACCOUNT ? 51 BPLIST_SIZE_V0 : sizeof (bplist_phys_t); 52 53 return (dmu_object_alloc(mos, DMU_OT_BPLIST, blocksize, 54 DMU_OT_BPLIST_HDR, size, tx)); 55 } 56 57 void 58 bplist_destroy(objset_t *mos, uint64_t object, dmu_tx_t *tx) 59 { 60 VERIFY(dmu_object_free(mos, object, tx) == 0); 61 } 62 63 int 64 bplist_open(bplist_t *bpl, objset_t *mos, uint64_t object) 65 { 66 dmu_object_info_t doi; 67 int err; 68 69 err = dmu_object_info(mos, object, &doi); 70 if (err) 71 return (err); 72 73 mutex_enter(&bpl->bpl_lock); 74 75 ASSERT(bpl->bpl_dbuf == NULL); 76 ASSERT(bpl->bpl_phys == NULL); 77 ASSERT(bpl->bpl_cached_dbuf == NULL); 78 ASSERT(bpl->bpl_queue == NULL); 79 ASSERT(object != 0); 80 ASSERT3U(doi.doi_type, ==, DMU_OT_BPLIST); 81 ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_BPLIST_HDR); 82 83 bpl->bpl_mos = mos; 84 bpl->bpl_object = object; 85 bpl->bpl_blockshift = highbit(doi.doi_data_block_size - 1); 86 bpl->bpl_bpshift = bpl->bpl_blockshift - SPA_BLKPTRSHIFT; 87 bpl->bpl_havecomp = (doi.doi_bonus_size == sizeof (bplist_phys_t)); 88 89 mutex_exit(&bpl->bpl_lock); 90 return (0); 91 } 92 93 void 94 bplist_close(bplist_t *bpl) 95 { 96 mutex_enter(&bpl->bpl_lock); 97 98 ASSERT(bpl->bpl_queue == NULL); 99 100 if (bpl->bpl_cached_dbuf) { 101 dmu_buf_rele(bpl->bpl_cached_dbuf, bpl); 102 bpl->bpl_cached_dbuf = NULL; 103 } 104 if (bpl->bpl_dbuf) { 105 dmu_buf_rele(bpl->bpl_dbuf, bpl); 106 bpl->bpl_dbuf = NULL; 107 bpl->bpl_phys = NULL; 108 } 109 110 mutex_exit(&bpl->bpl_lock); 111 } 112 113 boolean_t 114 bplist_empty(bplist_t *bpl) 115 { 116 boolean_t rv; 117 118 if (bpl->bpl_object == 0) 119 return (B_TRUE); 120 121 mutex_enter(&bpl->bpl_lock); 122 VERIFY(0 == bplist_hold(bpl)); /* XXX */ 123 rv = (bpl->bpl_phys->bpl_entries == 0); 124 mutex_exit(&bpl->bpl_lock); 125 126 return (rv); 127 } 128 129 static int 130 bplist_cache(bplist_t *bpl, uint64_t blkid) 131 { 132 int err = 0; 133 134 if (bpl->bpl_cached_dbuf == NULL || 135 bpl->bpl_cached_dbuf->db_offset != (blkid << bpl->bpl_blockshift)) { 136 if (bpl->bpl_cached_dbuf != NULL) 137 dmu_buf_rele(bpl->bpl_cached_dbuf, bpl); 138 err = dmu_buf_hold(bpl->bpl_mos, 139 bpl->bpl_object, blkid << bpl->bpl_blockshift, 140 bpl, &bpl->bpl_cached_dbuf); 141 ASSERT(err || bpl->bpl_cached_dbuf->db_size == 142 1ULL << bpl->bpl_blockshift); 143 } 144 return (err); 145 } 146 147 int 148 bplist_iterate(bplist_t *bpl, uint64_t *itorp, blkptr_t *bp) 149 { 150 uint64_t blk, off; 151 blkptr_t *bparray; 152 int err; 153 154 mutex_enter(&bpl->bpl_lock); 155 156 err = bplist_hold(bpl); 157 if (err) { 158 mutex_exit(&bpl->bpl_lock); 159 return (err); 160 } 161 162 if (*itorp >= bpl->bpl_phys->bpl_entries) { 163 mutex_exit(&bpl->bpl_lock); 164 return (ENOENT); 165 } 166 167 blk = *itorp >> bpl->bpl_bpshift; 168 off = P2PHASE(*itorp, 1ULL << bpl->bpl_bpshift); 169 170 err = bplist_cache(bpl, blk); 171 if (err) { 172 mutex_exit(&bpl->bpl_lock); 173 return (err); 174 } 175 176 bparray = bpl->bpl_cached_dbuf->db_data; 177 *bp = bparray[off]; 178 (*itorp)++; 179 mutex_exit(&bpl->bpl_lock); 180 return (0); 181 } 182 183 int 184 bplist_enqueue(bplist_t *bpl, blkptr_t *bp, dmu_tx_t *tx) 185 { 186 uint64_t blk, off; 187 blkptr_t *bparray; 188 int err; 189 190 ASSERT(!BP_IS_HOLE(bp)); 191 mutex_enter(&bpl->bpl_lock); 192 err = bplist_hold(bpl); 193 if (err) 194 return (err); 195 196 blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift; 197 off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift); 198 199 err = bplist_cache(bpl, blk); 200 if (err) { 201 mutex_exit(&bpl->bpl_lock); 202 return (err); 203 } 204 205 dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx); 206 bparray = bpl->bpl_cached_dbuf->db_data; 207 bparray[off] = *bp; 208 209 /* We never need the fill count. */ 210 bparray[off].blk_fill = 0; 211 212 /* The bplist will compress better if we can leave off the checksum */ 213 bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum)); 214 215 dmu_buf_will_dirty(bpl->bpl_dbuf, tx); 216 bpl->bpl_phys->bpl_entries++; 217 bpl->bpl_phys->bpl_bytes += 218 bp_get_dasize(dmu_objset_spa(bpl->bpl_mos), bp); 219 if (bpl->bpl_havecomp) { 220 bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp); 221 bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp); 222 } 223 mutex_exit(&bpl->bpl_lock); 224 225 return (0); 226 } 227 228 /* 229 * Deferred entry; will be written later by bplist_sync(). 230 */ 231 void 232 bplist_enqueue_deferred(bplist_t *bpl, blkptr_t *bp) 233 { 234 bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP); 235 236 ASSERT(!BP_IS_HOLE(bp)); 237 mutex_enter(&bpl->bpl_lock); 238 bpq->bpq_blk = *bp; 239 bpq->bpq_next = bpl->bpl_queue; 240 bpl->bpl_queue = bpq; 241 mutex_exit(&bpl->bpl_lock); 242 } 243 244 void 245 bplist_sync(bplist_t *bpl, dmu_tx_t *tx) 246 { 247 bplist_q_t *bpq; 248 249 mutex_enter(&bpl->bpl_lock); 250 while ((bpq = bpl->bpl_queue) != NULL) { 251 bpl->bpl_queue = bpq->bpq_next; 252 mutex_exit(&bpl->bpl_lock); 253 VERIFY(0 == bplist_enqueue(bpl, &bpq->bpq_blk, tx)); 254 kmem_free(bpq, sizeof (*bpq)); 255 mutex_enter(&bpl->bpl_lock); 256 } 257 mutex_exit(&bpl->bpl_lock); 258 } 259 260 void 261 bplist_vacate(bplist_t *bpl, dmu_tx_t *tx) 262 { 263 mutex_enter(&bpl->bpl_lock); 264 ASSERT3P(bpl->bpl_queue, ==, NULL); 265 VERIFY(0 == bplist_hold(bpl)); 266 dmu_buf_will_dirty(bpl->bpl_dbuf, tx); 267 VERIFY(0 == dmu_free_range(bpl->bpl_mos, 268 bpl->bpl_object, 0, -1ULL, tx)); 269 bpl->bpl_phys->bpl_entries = 0; 270 bpl->bpl_phys->bpl_bytes = 0; 271 if (bpl->bpl_havecomp) { 272 bpl->bpl_phys->bpl_comp = 0; 273 bpl->bpl_phys->bpl_uncomp = 0; 274 } 275 mutex_exit(&bpl->bpl_lock); 276 } 277 278 int 279 bplist_space(bplist_t *bpl, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 280 { 281 uint64_t itor = 0, comp = 0, uncomp = 0; 282 int err; 283 blkptr_t bp; 284 285 mutex_enter(&bpl->bpl_lock); 286 287 err = bplist_hold(bpl); 288 if (err) { 289 mutex_exit(&bpl->bpl_lock); 290 return (err); 291 } 292 293 *usedp = bpl->bpl_phys->bpl_bytes; 294 if (bpl->bpl_havecomp) { 295 *compp = bpl->bpl_phys->bpl_comp; 296 *uncompp = bpl->bpl_phys->bpl_uncomp; 297 } 298 mutex_exit(&bpl->bpl_lock); 299 300 if (!bpl->bpl_havecomp) { 301 while ((err = bplist_iterate(bpl, &itor, &bp)) == 0) { 302 comp += BP_GET_PSIZE(&bp); 303 uncomp += BP_GET_UCSIZE(&bp); 304 } 305 if (err == ENOENT) 306 err = 0; 307 *compp = comp; 308 *uncompp = uncomp; 309 } 310 311 return (err); 312 } 313