1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/dsl_pool.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_synctask.h> 32 #include <sys/dmu_tx.h> 33 #include <sys/dmu_objset.h> 34 #include <sys/arc.h> 35 #include <sys/zap.h> 36 #include <sys/zfs_context.h> 37 #include <sys/fs/zfs.h> 38 39 static int 40 dsl_pool_open_mos_dir(dsl_pool_t *dp, dsl_dir_t **ddp) 41 { 42 uint64_t obj; 43 int err; 44 45 err = zap_lookup(dp->dp_meta_objset, 46 dp->dp_root_dir->dd_phys->dd_child_dir_zapobj, 47 MOS_DIR_NAME, sizeof (obj), 1, &obj); 48 if (err) 49 return (err); 50 51 return (dsl_dir_open_obj(dp, obj, MOS_DIR_NAME, dp, ddp)); 52 } 53 54 static dsl_pool_t * 55 dsl_pool_open_impl(spa_t *spa, uint64_t txg) 56 { 57 dsl_pool_t *dp; 58 blkptr_t *bp = spa_get_rootblkptr(spa); 59 60 dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP); 61 dp->dp_spa = spa; 62 dp->dp_meta_rootbp = *bp; 63 rw_init(&dp->dp_config_rwlock, NULL, RW_DEFAULT, NULL); 64 txg_init(dp, txg); 65 66 txg_list_create(&dp->dp_dirty_datasets, 67 offsetof(dsl_dataset_t, ds_dirty_link)); 68 txg_list_create(&dp->dp_dirty_dirs, 69 offsetof(dsl_dir_t, dd_dirty_link)); 70 txg_list_create(&dp->dp_sync_tasks, 71 offsetof(dsl_sync_task_group_t, dstg_node)); 72 list_create(&dp->dp_synced_objsets, sizeof (dsl_dataset_t), 73 offsetof(dsl_dataset_t, ds_synced_link)); 74 75 return (dp); 76 } 77 78 int 79 dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp) 80 { 81 int err; 82 dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); 83 objset_impl_t *osi; 84 85 rw_enter(&dp->dp_config_rwlock, RW_READER); 86 err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, &osi); 87 if (err) 88 goto out; 89 dp->dp_meta_objset = &osi->os; 90 91 err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 92 DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, 93 &dp->dp_root_dir_obj); 94 if (err) 95 goto out; 96 97 err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, 98 NULL, dp, &dp->dp_root_dir); 99 if (err) 100 goto out; 101 102 err = dsl_pool_open_mos_dir(dp, &dp->dp_mos_dir); 103 if (err) 104 goto out; 105 106 out: 107 rw_exit(&dp->dp_config_rwlock); 108 if (err) 109 dsl_pool_close(dp); 110 else 111 *dpp = dp; 112 113 return (err); 114 } 115 116 void 117 dsl_pool_close(dsl_pool_t *dp) 118 { 119 /* drop our reference from dsl_pool_open() */ 120 if (dp->dp_mos_dir) 121 dsl_dir_close(dp->dp_mos_dir, dp); 122 if (dp->dp_root_dir) 123 dsl_dir_close(dp->dp_root_dir, dp); 124 125 /* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */ 126 if (dp->dp_meta_objset) 127 dmu_objset_evict(NULL, dp->dp_meta_objset->os); 128 129 txg_list_destroy(&dp->dp_dirty_datasets); 130 txg_list_destroy(&dp->dp_dirty_dirs); 131 list_destroy(&dp->dp_synced_objsets); 132 133 arc_flush(); 134 txg_fini(dp); 135 rw_destroy(&dp->dp_config_rwlock); 136 kmem_free(dp, sizeof (dsl_pool_t)); 137 } 138 139 dsl_pool_t * 140 dsl_pool_create(spa_t *spa, uint64_t txg) 141 { 142 int err; 143 dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); 144 dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg); 145 dp->dp_meta_objset = &dmu_objset_create_impl(spa, 146 NULL, DMU_OST_META, tx)->os; 147 148 /* create the pool directory */ 149 err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 150 DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx); 151 ASSERT3U(err, ==, 0); 152 153 /* create and open the root dir */ 154 dsl_dataset_create_root(dp, &dp->dp_root_dir_obj, tx); 155 VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj, 156 NULL, dp, &dp->dp_root_dir)); 157 158 /* create and open the meta-objset dir */ 159 (void) dsl_dir_create_sync(dp->dp_root_dir, MOS_DIR_NAME, tx); 160 VERIFY(0 == dsl_pool_open_mos_dir(dp, &dp->dp_mos_dir)); 161 162 dmu_tx_commit(tx); 163 164 return (dp); 165 } 166 167 void 168 dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) 169 { 170 dmu_tx_t *tx; 171 objset_impl_t *mosi = dp->dp_meta_objset->os; 172 173 tx = dmu_tx_create_assigned(dp, txg); 174 175 do { 176 dsl_dir_t *dd; 177 dsl_dataset_t *ds; 178 dsl_sync_task_group_t *dstg; 179 180 while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) { 181 if (!list_link_active(&ds->ds_synced_link)) 182 list_insert_tail(&dp->dp_synced_objsets, ds); 183 dsl_dataset_sync(ds, tx); 184 } 185 while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) 186 dsl_sync_task_group_sync(dstg, tx); 187 while (dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) 188 dsl_dir_sync(dd, tx); 189 /* 190 * We need to loop since dsl_sync_task_group_sync() 191 * could create a new (dirty) objset. 192 * XXX - isn't this taken care of by the spa's sync to 193 * convergence loop? 194 */ 195 } while (!txg_list_empty(&dp->dp_dirty_datasets, txg)); 196 197 if (list_head(&mosi->os_dirty_dnodes[txg & TXG_MASK]) != NULL || 198 list_head(&mosi->os_free_dnodes[txg & TXG_MASK]) != NULL) { 199 dmu_objset_sync(mosi, tx); 200 dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", ""); 201 spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); 202 } 203 204 dmu_tx_commit(tx); 205 } 206 207 void 208 dsl_pool_zil_clean(dsl_pool_t *dp) 209 { 210 dsl_dataset_t *ds; 211 212 while (ds = list_head(&dp->dp_synced_objsets)) { 213 list_remove(&dp->dp_synced_objsets, ds); 214 ASSERT(ds->ds_user_ptr != NULL); 215 zil_clean(((objset_impl_t *)ds->ds_user_ptr)->os_zil); 216 } 217 } 218 219 int 220 dsl_pool_sync_context(dsl_pool_t *dp) 221 { 222 /* 223 * Yeah, this is cheesy. But the SPA needs some way to let 224 * the sync threads invoke spa_open() and spa_close() while 225 * it holds the namespace lock. I'm certainly open to better 226 * ideas for how to determine whether the current thread is 227 * operating on behalf of spa_sync(). This works for now. 228 */ 229 return (curthread == dp->dp_tx.tx_sync_thread || 230 BP_IS_HOLE(&dp->dp_meta_rootbp)); 231 } 232 233 uint64_t 234 dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree) 235 { 236 uint64_t space, resv; 237 238 /* 239 * Reserve about 1.6% (1/64), or at least 32MB, for allocation 240 * efficiency. 241 * XXX The intent log is not accounted for, so it must fit 242 * within this slop. 243 * 244 * If we're trying to assess whether it's OK to do a free, 245 * cut the reservation in half to allow forward progress 246 * (e.g. make it possible to rm(1) files from a full pool). 247 */ 248 space = spa_get_dspace(dp->dp_spa); 249 resv = MAX(space >> 6, SPA_MINDEVSIZE >> 1); 250 if (netfree) 251 resv >>= 1; 252 253 return (space - resv); 254 } 255