1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2013 by Delphix. All rights reserved. 24 */ 25 26 #include <sys/dmu.h> 27 #include <sys/dmu_objset.h> 28 #include <sys/dmu_tx.h> 29 #include <sys/dnode.h> 30 #include <sys/zap.h> 31 #include <sys/zfeature.h> 32 33 uint64_t 34 dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize, 35 dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 36 { 37 uint64_t object; 38 uint64_t L2_dnode_count = DNODES_PER_BLOCK << 39 (DMU_META_DNODE(os)->dn_indblkshift - SPA_BLKPTRSHIFT); 40 dnode_t *dn = NULL; 41 int restarted = B_FALSE; 42 43 mutex_enter(&os->os_obj_lock); 44 for (;;) { 45 object = os->os_obj_next; 46 /* 47 * Each time we polish off an L2 bp worth of dnodes 48 * (2^13 objects), move to another L2 bp that's still 49 * reasonably sparse (at most 1/4 full). Look from the 50 * beginning once, but after that keep looking from here. 51 * If we can't find one, just keep going from here. 52 */ 53 if (P2PHASE(object, L2_dnode_count) == 0) { 54 uint64_t offset = restarted ? object << DNODE_SHIFT : 0; 55 int error = dnode_next_offset(DMU_META_DNODE(os), 56 DNODE_FIND_HOLE, 57 &offset, 2, DNODES_PER_BLOCK >> 2, 0); 58 restarted = B_TRUE; 59 if (error == 0) 60 object = offset >> DNODE_SHIFT; 61 } 62 os->os_obj_next = ++object; 63 64 /* 65 * XXX We should check for an i/o error here and return 66 * up to our caller. Actually we should pre-read it in 67 * dmu_tx_assign(), but there is currently no mechanism 68 * to do so. 69 */ 70 (void) dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, 71 FTAG, &dn); 72 if (dn) 73 break; 74 75 if (dmu_object_next(os, &object, B_TRUE, 0) == 0) 76 os->os_obj_next = object - 1; 77 } 78 79 dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx); 80 dnode_rele(dn, FTAG); 81 82 mutex_exit(&os->os_obj_lock); 83 84 dmu_tx_add_new_object(tx, os, object); 85 return (object); 86 } 87 88 int 89 dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot, 90 int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 91 { 92 dnode_t *dn; 93 int err; 94 95 if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx)) 96 return (SET_ERROR(EBADF)); 97 98 err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, FTAG, &dn); 99 if (err) 100 return (err); 101 dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx); 102 dnode_rele(dn, FTAG); 103 104 dmu_tx_add_new_object(tx, os, object); 105 return (0); 106 } 107 108 int 109 dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot, 110 int blocksize, dmu_object_type_t bonustype, int bonuslen) 111 { 112 dnode_t *dn; 113 dmu_tx_t *tx; 114 int nblkptr; 115 int err; 116 117 if (object == DMU_META_DNODE_OBJECT) 118 return (SET_ERROR(EBADF)); 119 120 err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 121 FTAG, &dn); 122 if (err) 123 return (err); 124 125 if (dn->dn_type == ot && dn->dn_datablksz == blocksize && 126 dn->dn_bonustype == bonustype && dn->dn_bonuslen == bonuslen) { 127 /* nothing is changing, this is a noop */ 128 dnode_rele(dn, FTAG); 129 return (0); 130 } 131 132 if (bonustype == DMU_OT_SA) { 133 nblkptr = 1; 134 } else { 135 nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT); 136 } 137 138 /* 139 * If we are losing blkptrs or changing the block size this must 140 * be a new file instance. We must clear out the previous file 141 * contents before we can change this type of metadata in the dnode. 142 */ 143 if (dn->dn_nblkptr > nblkptr || dn->dn_datablksz != blocksize) { 144 err = dmu_free_long_range(os, object, 0, DMU_OBJECT_END); 145 if (err) 146 goto out; 147 } 148 149 tx = dmu_tx_create(os); 150 dmu_tx_hold_bonus(tx, object); 151 err = dmu_tx_assign(tx, TXG_WAIT); 152 if (err) { 153 dmu_tx_abort(tx); 154 goto out; 155 } 156 157 dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx); 158 159 dmu_tx_commit(tx); 160 out: 161 dnode_rele(dn, FTAG); 162 163 return (err); 164 } 165 166 int 167 dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx) 168 { 169 dnode_t *dn; 170 int err; 171 172 ASSERT(object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx)); 173 174 err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 175 FTAG, &dn); 176 if (err) 177 return (err); 178 179 ASSERT(dn->dn_type != DMU_OT_NONE); 180 dnode_free_range(dn, 0, DMU_OBJECT_END, tx); 181 dnode_free(dn, tx); 182 dnode_rele(dn, FTAG); 183 184 return (0); 185 } 186 187 int 188 dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg) 189 { 190 uint64_t offset = (*objectp + 1) << DNODE_SHIFT; 191 int error; 192 193 error = dnode_next_offset(DMU_META_DNODE(os), 194 (hole ? DNODE_FIND_HOLE : 0), &offset, 0, DNODES_PER_BLOCK, txg); 195 196 *objectp = offset >> DNODE_SHIFT; 197 198 return (error); 199 } 200 201 /* 202 * Turn this object from old_type into DMU_OTN_ZAP_METADATA, and bump the 203 * refcount on SPA_FEATURE_EXTENSIBLE_DATASET. 204 * 205 * Only for use from syncing context, on MOS objects. 206 */ 207 void 208 dmu_object_zapify(objset_t *mos, uint64_t object, dmu_object_type_t old_type, 209 dmu_tx_t *tx) 210 { 211 dnode_t *dn; 212 213 ASSERT(dmu_tx_is_syncing(tx)); 214 215 VERIFY0(dnode_hold(mos, object, FTAG, &dn)); 216 if (dn->dn_type == DMU_OTN_ZAP_METADATA) { 217 dnode_rele(dn, FTAG); 218 return; 219 } 220 ASSERT3U(dn->dn_type, ==, old_type); 221 ASSERT0(dn->dn_maxblkid); 222 dn->dn_next_type[tx->tx_txg & TXG_MASK] = dn->dn_type = 223 DMU_OTN_ZAP_METADATA; 224 dnode_setdirty(dn, tx); 225 dnode_rele(dn, FTAG); 226 227 mzap_create_impl(mos, object, 0, 0, tx); 228 229 spa_feature_incr(dmu_objset_spa(mos), 230 SPA_FEATURE_EXTENSIBLE_DATASET, tx); 231 } 232 233 void 234 dmu_object_free_zapified(objset_t *mos, uint64_t object, dmu_tx_t *tx) 235 { 236 dnode_t *dn; 237 dmu_object_type_t t; 238 239 ASSERT(dmu_tx_is_syncing(tx)); 240 241 VERIFY0(dnode_hold(mos, object, FTAG, &dn)); 242 t = dn->dn_type; 243 dnode_rele(dn, FTAG); 244 245 if (t == DMU_OTN_ZAP_METADATA) { 246 spa_feature_decr(dmu_objset_spa(mos), 247 SPA_FEATURE_EXTENSIBLE_DATASET, tx); 248 } 249 VERIFY0(dmu_object_free(mos, object, tx)); 250 } 251