1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/dsl_pool.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_synctask.h> 32 #include <sys/dmu_tx.h> 33 #include <sys/dmu_objset.h> 34 #include <sys/arc.h> 35 #include <sys/zap.h> 36 #include <sys/zio.h> 37 #include <sys/zfs_context.h> 38 #include <sys/fs/zfs.h> 39 40 static int 41 dsl_pool_open_mos_dir(dsl_pool_t *dp, dsl_dir_t **ddp) 42 { 43 uint64_t obj; 44 int err; 45 46 err = zap_lookup(dp->dp_meta_objset, 47 dp->dp_root_dir->dd_phys->dd_child_dir_zapobj, 48 MOS_DIR_NAME, sizeof (obj), 1, &obj); 49 if (err) 50 return (err); 51 52 return (dsl_dir_open_obj(dp, obj, MOS_DIR_NAME, dp, ddp)); 53 } 54 55 static dsl_pool_t * 56 dsl_pool_open_impl(spa_t *spa, uint64_t txg) 57 { 58 dsl_pool_t *dp; 59 blkptr_t *bp = spa_get_rootblkptr(spa); 60 61 dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP); 62 dp->dp_spa = spa; 63 dp->dp_meta_rootbp = *bp; 64 rw_init(&dp->dp_config_rwlock, NULL, RW_DEFAULT, NULL); 65 txg_init(dp, txg); 66 67 txg_list_create(&dp->dp_dirty_datasets, 68 offsetof(dsl_dataset_t, ds_dirty_link)); 69 txg_list_create(&dp->dp_dirty_dirs, 70 offsetof(dsl_dir_t, dd_dirty_link)); 71 txg_list_create(&dp->dp_sync_tasks, 72 offsetof(dsl_sync_task_group_t, dstg_node)); 73 list_create(&dp->dp_synced_datasets, sizeof (dsl_dataset_t), 74 offsetof(dsl_dataset_t, ds_synced_link)); 75 76 return (dp); 77 } 78 79 int 80 dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp) 81 { 82 int err; 83 dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); 84 objset_impl_t *osi; 85 86 rw_enter(&dp->dp_config_rwlock, RW_READER); 87 err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp, &osi); 88 if (err) 89 goto out; 90 dp->dp_meta_objset = &osi->os; 91 92 err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 93 DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, 94 &dp->dp_root_dir_obj); 95 if (err) 96 goto out; 97 98 err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, 99 NULL, dp, &dp->dp_root_dir); 100 if (err) 101 goto out; 102 103 err = dsl_pool_open_mos_dir(dp, &dp->dp_mos_dir); 104 if (err) 105 goto out; 106 107 out: 108 rw_exit(&dp->dp_config_rwlock); 109 if (err) 110 dsl_pool_close(dp); 111 else 112 *dpp = dp; 113 114 return (err); 115 } 116 117 void 118 dsl_pool_close(dsl_pool_t *dp) 119 { 120 /* drop our reference from dsl_pool_open() */ 121 if (dp->dp_mos_dir) 122 dsl_dir_close(dp->dp_mos_dir, dp); 123 if (dp->dp_root_dir) 124 dsl_dir_close(dp->dp_root_dir, dp); 125 126 /* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */ 127 if (dp->dp_meta_objset) 128 dmu_objset_evict(NULL, dp->dp_meta_objset->os); 129 130 txg_list_destroy(&dp->dp_dirty_datasets); 131 txg_list_destroy(&dp->dp_dirty_dirs); 132 list_destroy(&dp->dp_synced_datasets); 133 134 arc_flush(); 135 txg_fini(dp); 136 rw_destroy(&dp->dp_config_rwlock); 137 kmem_free(dp, sizeof (dsl_pool_t)); 138 } 139 140 dsl_pool_t * 141 dsl_pool_create(spa_t *spa, uint64_t txg) 142 { 143 int err; 144 dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); 145 dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg); 146 dp->dp_meta_objset = &dmu_objset_create_impl(spa, 147 NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx)->os; 148 149 /* create the pool directory */ 150 err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, 151 DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx); 152 ASSERT3U(err, ==, 0); 153 154 /* create and open the root dir */ 155 dsl_dataset_create_root(dp, &dp->dp_root_dir_obj, tx); 156 VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj, 157 NULL, dp, &dp->dp_root_dir)); 158 159 /* create and open the meta-objset dir */ 160 (void) dsl_dir_create_sync(dp->dp_root_dir, MOS_DIR_NAME, tx); 161 VERIFY(0 == dsl_pool_open_mos_dir(dp, &dp->dp_mos_dir)); 162 163 dmu_tx_commit(tx); 164 165 return (dp); 166 } 167 168 void 169 dsl_pool_sync(dsl_pool_t *dp, uint64_t txg) 170 { 171 zio_t *zio; 172 dmu_tx_t *tx; 173 dsl_dir_t *dd; 174 dsl_dataset_t *ds; 175 dsl_sync_task_group_t *dstg; 176 objset_impl_t *mosi = dp->dp_meta_objset->os; 177 int err; 178 179 tx = dmu_tx_create_assigned(dp, txg); 180 181 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 182 while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) { 183 if (!list_link_active(&ds->ds_synced_link)) 184 list_insert_tail(&dp->dp_synced_datasets, ds); 185 else 186 dmu_buf_rele(ds->ds_dbuf, ds); 187 dsl_dataset_sync(ds, zio, tx); 188 } 189 err = zio_wait(zio); 190 ASSERT(err == 0); 191 192 while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) 193 dsl_sync_task_group_sync(dstg, tx); 194 while (dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) 195 dsl_dir_sync(dd, tx); 196 197 if (list_head(&mosi->os_dirty_dnodes[txg & TXG_MASK]) != NULL || 198 list_head(&mosi->os_free_dnodes[txg & TXG_MASK]) != NULL) { 199 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 200 dmu_objset_sync(mosi, zio, tx); 201 err = zio_wait(zio); 202 ASSERT(err == 0); 203 dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", ""); 204 spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp); 205 } 206 207 dmu_tx_commit(tx); 208 } 209 210 void 211 dsl_pool_zil_clean(dsl_pool_t *dp) 212 { 213 dsl_dataset_t *ds; 214 215 while (ds = list_head(&dp->dp_synced_datasets)) { 216 list_remove(&dp->dp_synced_datasets, ds); 217 ASSERT(ds->ds_user_ptr != NULL); 218 zil_clean(((objset_impl_t *)ds->ds_user_ptr)->os_zil); 219 dmu_buf_rele(ds->ds_dbuf, ds); 220 } 221 } 222 223 /* 224 * TRUE if the current thread is the tx_sync_thread or if we 225 * are being called from SPA context during pool initialization. 226 */ 227 int 228 dsl_pool_sync_context(dsl_pool_t *dp) 229 { 230 return (curthread == dp->dp_tx.tx_sync_thread || 231 spa_get_dsl(dp->dp_spa) == NULL); 232 } 233 234 uint64_t 235 dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree) 236 { 237 uint64_t space, resv; 238 239 /* 240 * Reserve about 1.6% (1/64), or at least 32MB, for allocation 241 * efficiency. 242 * XXX The intent log is not accounted for, so it must fit 243 * within this slop. 244 * 245 * If we're trying to assess whether it's OK to do a free, 246 * cut the reservation in half to allow forward progress 247 * (e.g. make it possible to rm(1) files from a full pool). 248 */ 249 space = spa_get_dspace(dp->dp_spa); 250 resv = MAX(space >> 6, SPA_MINDEVSIZE >> 1); 251 if (netfree) 252 resv >>= 1; 253 254 return (space - resv); 255 } 256