1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_prop.h> 32 #include <sys/dsl_synctask.h> 33 #include <sys/dmu_traverse.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/arc.h> 36 #include <sys/zio.h> 37 #include <sys/zap.h> 38 #include <sys/unique.h> 39 #include <sys/zfs_context.h> 40 41 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 42 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 43 static dsl_checkfunc_t dsl_dataset_rollback_check; 44 static dsl_syncfunc_t dsl_dataset_rollback_sync; 45 static dsl_checkfunc_t dsl_dataset_destroy_check; 46 static dsl_syncfunc_t dsl_dataset_destroy_sync; 47 48 #define DOS_REF_MAX (1ULL << 62) 49 50 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 51 52 /* 53 * We use weighted reference counts to express the various forms of exclusion 54 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open 55 * is DOS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. 56 * This makes the exclusion logic simple: the total refcnt for all opens cannot 57 * exceed DOS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their 58 * weight (DOS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume 59 * just over half of the refcnt space, so there can't be more than one, but it 60 * can peacefully coexist with any number of STANDARD opens. 61 */ 62 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { 63 0, /* DOS_MODE_NONE - invalid */ 64 1, /* DOS_MODE_STANDARD - unlimited number */ 65 (DOS_REF_MAX >> 1) + 1, /* DOS_MODE_PRIMARY - only one of these */ 66 DOS_REF_MAX /* DOS_MODE_EXCLUSIVE - no other opens */ 67 }; 68 69 70 void 71 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 72 { 73 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 74 int compressed = BP_GET_PSIZE(bp); 75 int uncompressed = BP_GET_UCSIZE(bp); 76 77 dprintf_bp(bp, "born, ds=%p\n", ds); 78 79 ASSERT(dmu_tx_is_syncing(tx)); 80 /* It could have been compressed away to nothing */ 81 if (BP_IS_HOLE(bp)) 82 return; 83 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 84 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 85 if (ds == NULL) { 86 /* 87 * Account for the meta-objset space in its placeholder 88 * dsl_dir. 89 */ 90 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 91 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 92 used, compressed, uncompressed, tx); 93 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 94 return; 95 } 96 dmu_buf_will_dirty(ds->ds_dbuf, tx); 97 mutex_enter(&ds->ds_lock); 98 ds->ds_phys->ds_used_bytes += used; 99 ds->ds_phys->ds_compressed_bytes += compressed; 100 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 101 ds->ds_phys->ds_unique_bytes += used; 102 mutex_exit(&ds->ds_lock); 103 dsl_dir_diduse_space(ds->ds_dir, 104 used, compressed, uncompressed, tx); 105 } 106 107 void 108 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 109 { 110 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 111 int compressed = BP_GET_PSIZE(bp); 112 int uncompressed = BP_GET_UCSIZE(bp); 113 114 ASSERT(dmu_tx_is_syncing(tx)); 115 if (BP_IS_HOLE(bp)) 116 return; 117 118 ASSERT(used > 0); 119 if (ds == NULL) { 120 /* 121 * Account for the meta-objset space in its placeholder 122 * dataset. 123 */ 124 /* XXX this can fail, what do we do when it does? */ 125 (void) arc_free(NULL, tx->tx_pool->dp_spa, 126 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); 127 bzero(bp, sizeof (blkptr_t)); 128 129 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 130 -used, -compressed, -uncompressed, tx); 131 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 132 return; 133 } 134 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 135 136 dmu_buf_will_dirty(ds->ds_dbuf, tx); 137 138 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 139 dprintf_bp(bp, "freeing: %s", ""); 140 /* XXX check return code? */ 141 (void) arc_free(NULL, tx->tx_pool->dp_spa, 142 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); 143 144 mutex_enter(&ds->ds_lock); 145 /* XXX unique_bytes is not accurate for head datasets */ 146 /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */ 147 ds->ds_phys->ds_unique_bytes -= used; 148 mutex_exit(&ds->ds_lock); 149 dsl_dir_diduse_space(ds->ds_dir, 150 -used, -compressed, -uncompressed, tx); 151 } else { 152 dprintf_bp(bp, "putting on dead list: %s", ""); 153 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 154 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 155 if (ds->ds_phys->ds_prev_snap_obj != 0) { 156 ASSERT3U(ds->ds_prev->ds_object, ==, 157 ds->ds_phys->ds_prev_snap_obj); 158 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 159 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 160 ds->ds_object && bp->blk_birth > 161 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 162 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 163 mutex_enter(&ds->ds_prev->ds_lock); 164 ds->ds_prev->ds_phys->ds_unique_bytes += 165 used; 166 mutex_exit(&ds->ds_prev->ds_lock); 167 } 168 } 169 } 170 bzero(bp, sizeof (blkptr_t)); 171 mutex_enter(&ds->ds_lock); 172 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 173 ds->ds_phys->ds_used_bytes -= used; 174 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 175 ds->ds_phys->ds_compressed_bytes -= compressed; 176 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 177 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 178 mutex_exit(&ds->ds_lock); 179 } 180 181 uint64_t 182 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 183 { 184 uint64_t trysnap = 0; 185 186 if (ds == NULL) 187 return (0); 188 /* 189 * The snapshot creation could fail, but that would cause an 190 * incorrect FALSE return, which would only result in an 191 * overestimation of the amount of space that an operation would 192 * consume, which is OK. 193 * 194 * There's also a small window where we could miss a pending 195 * snapshot, because we could set the sync task in the quiescing 196 * phase. So this should only be used as a guess. 197 */ 198 if (ds->ds_trysnap_txg > 199 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 200 trysnap = ds->ds_trysnap_txg; 201 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 202 } 203 204 int 205 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 206 { 207 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 208 } 209 210 /* ARGSUSED */ 211 static void 212 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 213 { 214 dsl_dataset_t *ds = dsv; 215 dsl_pool_t *dp = ds->ds_dir->dd_pool; 216 217 /* open_refcount == DOS_REF_MAX when deleting */ 218 ASSERT(ds->ds_open_refcount == 0 || 219 ds->ds_open_refcount == DOS_REF_MAX); 220 221 dprintf_ds(ds, "evicting %s\n", ""); 222 223 unique_remove(ds->ds_phys->ds_fsid_guid); 224 225 if (ds->ds_user_ptr != NULL) 226 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 227 228 if (ds->ds_prev) { 229 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 230 ds->ds_prev = NULL; 231 } 232 233 bplist_close(&ds->ds_deadlist); 234 dsl_dir_close(ds->ds_dir, ds); 235 236 if (list_link_active(&ds->ds_synced_link)) 237 list_remove(&dp->dp_synced_objsets, ds); 238 239 mutex_destroy(&ds->ds_lock); 240 mutex_destroy(&ds->ds_deadlist.bpl_lock); 241 242 kmem_free(ds, sizeof (dsl_dataset_t)); 243 } 244 245 static int 246 dsl_dataset_get_snapname(dsl_dataset_t *ds) 247 { 248 dsl_dataset_phys_t *headphys; 249 int err; 250 dmu_buf_t *headdbuf; 251 dsl_pool_t *dp = ds->ds_dir->dd_pool; 252 objset_t *mos = dp->dp_meta_objset; 253 254 if (ds->ds_snapname[0]) 255 return (0); 256 if (ds->ds_phys->ds_next_snap_obj == 0) 257 return (0); 258 259 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 260 FTAG, &headdbuf); 261 if (err) 262 return (err); 263 headphys = headdbuf->db_data; 264 err = zap_value_search(dp->dp_meta_objset, 265 headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname); 266 dmu_buf_rele(headdbuf, FTAG); 267 return (err); 268 } 269 270 int 271 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, 272 int mode, void *tag, dsl_dataset_t **dsp) 273 { 274 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 275 objset_t *mos = dp->dp_meta_objset; 276 dmu_buf_t *dbuf; 277 dsl_dataset_t *ds; 278 int err; 279 280 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 281 dsl_pool_sync_context(dp)); 282 283 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 284 if (err) 285 return (err); 286 ds = dmu_buf_get_user(dbuf); 287 if (ds == NULL) { 288 dsl_dataset_t *winner; 289 290 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 291 ds->ds_dbuf = dbuf; 292 ds->ds_object = dsobj; 293 ds->ds_phys = dbuf->db_data; 294 295 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 296 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 297 NULL); 298 299 err = bplist_open(&ds->ds_deadlist, 300 mos, ds->ds_phys->ds_deadlist_obj); 301 if (err == 0) { 302 err = dsl_dir_open_obj(dp, 303 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 304 } 305 if (err) { 306 /* 307 * we don't really need to close the blist if we 308 * just opened it. 309 */ 310 mutex_destroy(&ds->ds_lock); 311 mutex_destroy(&ds->ds_deadlist.bpl_lock); 312 kmem_free(ds, sizeof (dsl_dataset_t)); 313 dmu_buf_rele(dbuf, tag); 314 return (err); 315 } 316 317 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { 318 ds->ds_snapname[0] = '\0'; 319 if (ds->ds_phys->ds_prev_snap_obj) { 320 err = dsl_dataset_open_obj(dp, 321 ds->ds_phys->ds_prev_snap_obj, NULL, 322 DS_MODE_NONE, ds, &ds->ds_prev); 323 } 324 } else { 325 if (snapname) { 326 #ifdef ZFS_DEBUG 327 dsl_dataset_phys_t *headphys; 328 dmu_buf_t *headdbuf; 329 err = dmu_bonus_hold(mos, 330 ds->ds_dir->dd_phys->dd_head_dataset_obj, 331 FTAG, &headdbuf); 332 if (err == 0) { 333 headphys = headdbuf->db_data; 334 uint64_t foundobj; 335 err = zap_lookup(dp->dp_meta_objset, 336 headphys->ds_snapnames_zapobj, 337 snapname, sizeof (foundobj), 1, 338 &foundobj); 339 ASSERT3U(foundobj, ==, dsobj); 340 dmu_buf_rele(headdbuf, FTAG); 341 } 342 #endif 343 (void) strcat(ds->ds_snapname, snapname); 344 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 345 err = dsl_dataset_get_snapname(ds); 346 } 347 } 348 349 if (err == 0) { 350 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 351 dsl_dataset_evict); 352 } 353 if (err || winner) { 354 bplist_close(&ds->ds_deadlist); 355 if (ds->ds_prev) { 356 dsl_dataset_close(ds->ds_prev, 357 DS_MODE_NONE, ds); 358 } 359 dsl_dir_close(ds->ds_dir, ds); 360 mutex_destroy(&ds->ds_lock); 361 mutex_destroy(&ds->ds_deadlist.bpl_lock); 362 kmem_free(ds, sizeof (dsl_dataset_t)); 363 if (err) { 364 dmu_buf_rele(dbuf, tag); 365 return (err); 366 } 367 ds = winner; 368 } else { 369 uint64_t new = 370 unique_insert(ds->ds_phys->ds_fsid_guid); 371 if (new != ds->ds_phys->ds_fsid_guid) { 372 /* XXX it won't necessarily be synced... */ 373 ds->ds_phys->ds_fsid_guid = new; 374 } 375 } 376 } 377 ASSERT3P(ds->ds_dbuf, ==, dbuf); 378 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 379 380 mutex_enter(&ds->ds_lock); 381 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && 382 (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && 383 !DS_MODE_IS_INCONSISTENT(mode)) || 384 (ds->ds_open_refcount + weight > DOS_REF_MAX)) { 385 mutex_exit(&ds->ds_lock); 386 dsl_dataset_close(ds, DS_MODE_NONE, tag); 387 return (EBUSY); 388 } 389 ds->ds_open_refcount += weight; 390 mutex_exit(&ds->ds_lock); 391 392 *dsp = ds; 393 return (0); 394 } 395 396 int 397 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, 398 void *tag, dsl_dataset_t **dsp) 399 { 400 dsl_dir_t *dd; 401 dsl_pool_t *dp; 402 const char *tail; 403 uint64_t obj; 404 dsl_dataset_t *ds = NULL; 405 int err = 0; 406 407 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); 408 if (err) 409 return (err); 410 411 dp = dd->dd_pool; 412 obj = dd->dd_phys->dd_head_dataset_obj; 413 rw_enter(&dp->dp_config_rwlock, RW_READER); 414 if (obj == 0) { 415 /* A dataset with no associated objset */ 416 err = ENOENT; 417 goto out; 418 } 419 420 if (tail != NULL) { 421 objset_t *mos = dp->dp_meta_objset; 422 423 err = dsl_dataset_open_obj(dp, obj, NULL, 424 DS_MODE_NONE, tag, &ds); 425 if (err) 426 goto out; 427 obj = ds->ds_phys->ds_snapnames_zapobj; 428 dsl_dataset_close(ds, DS_MODE_NONE, tag); 429 ds = NULL; 430 431 if (tail[0] != '@') { 432 err = ENOENT; 433 goto out; 434 } 435 tail++; 436 437 /* Look for a snapshot */ 438 if (!DS_MODE_IS_READONLY(mode)) { 439 err = EROFS; 440 goto out; 441 } 442 dprintf("looking for snapshot '%s'\n", tail); 443 err = zap_lookup(mos, obj, tail, 8, 1, &obj); 444 if (err) 445 goto out; 446 } 447 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); 448 449 out: 450 rw_exit(&dp->dp_config_rwlock); 451 dsl_dir_close(dd, FTAG); 452 453 ASSERT3U((err == 0), ==, (ds != NULL)); 454 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ 455 456 *dsp = ds; 457 return (err); 458 } 459 460 int 461 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) 462 { 463 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); 464 } 465 466 void 467 dsl_dataset_name(dsl_dataset_t *ds, char *name) 468 { 469 if (ds == NULL) { 470 (void) strcpy(name, "mos"); 471 } else { 472 dsl_dir_name(ds->ds_dir, name); 473 VERIFY(0 == dsl_dataset_get_snapname(ds)); 474 if (ds->ds_snapname[0]) { 475 (void) strcat(name, "@"); 476 if (!MUTEX_HELD(&ds->ds_lock)) { 477 /* 478 * We use a "recursive" mutex so that we 479 * can call dprintf_ds() with ds_lock held. 480 */ 481 mutex_enter(&ds->ds_lock); 482 (void) strcat(name, ds->ds_snapname); 483 mutex_exit(&ds->ds_lock); 484 } else { 485 (void) strcat(name, ds->ds_snapname); 486 } 487 } 488 } 489 } 490 491 void 492 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) 493 { 494 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 495 mutex_enter(&ds->ds_lock); 496 ASSERT3U(ds->ds_open_refcount, >=, weight); 497 ds->ds_open_refcount -= weight; 498 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", 499 mode, ds->ds_open_refcount); 500 mutex_exit(&ds->ds_lock); 501 502 dmu_buf_rele(ds->ds_dbuf, tag); 503 } 504 505 void 506 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) 507 { 508 objset_t *mos = dp->dp_meta_objset; 509 dmu_buf_t *dbuf; 510 dsl_dataset_phys_t *dsphys; 511 dsl_dataset_t *ds; 512 uint64_t dsobj; 513 dsl_dir_t *dd; 514 515 dsl_dir_create_root(mos, ddobjp, tx); 516 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); 517 518 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 519 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 520 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 521 dmu_buf_will_dirty(dbuf, tx); 522 dsphys = dbuf->db_data; 523 dsphys->ds_dir_obj = dd->dd_object; 524 dsphys->ds_fsid_guid = unique_create(); 525 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 526 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 527 sizeof (dsphys->ds_guid)); 528 dsphys->ds_snapnames_zapobj = 529 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 530 dsphys->ds_creation_time = gethrestime_sec(); 531 dsphys->ds_creation_txg = tx->tx_txg; 532 dsphys->ds_deadlist_obj = 533 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 534 dmu_buf_rele(dbuf, FTAG); 535 536 dmu_buf_will_dirty(dd->dd_dbuf, tx); 537 dd->dd_phys->dd_head_dataset_obj = dsobj; 538 dsl_dir_close(dd, FTAG); 539 540 VERIFY(0 == 541 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); 542 (void) dmu_objset_create_impl(dp->dp_spa, ds, DMU_OST_ZFS, tx); 543 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 544 } 545 546 uint64_t 547 dsl_dataset_create_sync(dsl_dir_t *pdd, 548 const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) 549 { 550 dsl_pool_t *dp = pdd->dd_pool; 551 dmu_buf_t *dbuf; 552 dsl_dataset_phys_t *dsphys; 553 uint64_t dsobj, ddobj; 554 objset_t *mos = dp->dp_meta_objset; 555 dsl_dir_t *dd; 556 557 ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp); 558 ASSERT(clone_parent == NULL || 559 clone_parent->ds_phys->ds_num_children > 0); 560 ASSERT(lastname[0] != '@'); 561 ASSERT(dmu_tx_is_syncing(tx)); 562 563 ddobj = dsl_dir_create_sync(pdd, lastname, tx); 564 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 565 566 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 567 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 568 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 569 dmu_buf_will_dirty(dbuf, tx); 570 dsphys = dbuf->db_data; 571 dsphys->ds_dir_obj = dd->dd_object; 572 dsphys->ds_fsid_guid = unique_create(); 573 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 574 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 575 sizeof (dsphys->ds_guid)); 576 dsphys->ds_snapnames_zapobj = 577 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 578 dsphys->ds_creation_time = gethrestime_sec(); 579 dsphys->ds_creation_txg = tx->tx_txg; 580 dsphys->ds_deadlist_obj = 581 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 582 if (clone_parent) { 583 dsphys->ds_prev_snap_obj = clone_parent->ds_object; 584 dsphys->ds_prev_snap_txg = 585 clone_parent->ds_phys->ds_creation_txg; 586 dsphys->ds_used_bytes = 587 clone_parent->ds_phys->ds_used_bytes; 588 dsphys->ds_compressed_bytes = 589 clone_parent->ds_phys->ds_compressed_bytes; 590 dsphys->ds_uncompressed_bytes = 591 clone_parent->ds_phys->ds_uncompressed_bytes; 592 dsphys->ds_bp = clone_parent->ds_phys->ds_bp; 593 594 dmu_buf_will_dirty(clone_parent->ds_dbuf, tx); 595 clone_parent->ds_phys->ds_num_children++; 596 597 dmu_buf_will_dirty(dd->dd_dbuf, tx); 598 dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object; 599 } 600 dmu_buf_rele(dbuf, FTAG); 601 602 dmu_buf_will_dirty(dd->dd_dbuf, tx); 603 dd->dd_phys->dd_head_dataset_obj = dsobj; 604 dsl_dir_close(dd, FTAG); 605 606 return (dsobj); 607 } 608 609 struct destroyarg { 610 dsl_sync_task_group_t *dstg; 611 char *snapname; 612 void *tag; 613 char *failed; 614 }; 615 616 static int 617 dsl_snapshot_destroy_one(char *name, void *arg) 618 { 619 struct destroyarg *da = arg; 620 dsl_dataset_t *ds; 621 char *cp; 622 int err; 623 624 (void) strcat(name, "@"); 625 (void) strcat(name, da->snapname); 626 err = dsl_dataset_open(name, 627 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 628 da->tag, &ds); 629 cp = strchr(name, '@'); 630 *cp = '\0'; 631 if (err == ENOENT) 632 return (0); 633 if (err) { 634 (void) strcpy(da->failed, name); 635 return (err); 636 } 637 638 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 639 dsl_dataset_destroy_sync, ds, da->tag, 0); 640 return (0); 641 } 642 643 /* 644 * Destroy 'snapname' in all descendants of 'fsname'. 645 */ 646 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 647 int 648 dsl_snapshots_destroy(char *fsname, char *snapname) 649 { 650 int err; 651 struct destroyarg da; 652 dsl_sync_task_t *dst; 653 spa_t *spa; 654 char *cp; 655 656 cp = strchr(fsname, '/'); 657 if (cp) { 658 *cp = '\0'; 659 err = spa_open(fsname, &spa, FTAG); 660 *cp = '/'; 661 } else { 662 err = spa_open(fsname, &spa, FTAG); 663 } 664 if (err) 665 return (err); 666 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 667 da.snapname = snapname; 668 da.tag = FTAG; 669 da.failed = fsname; 670 671 err = dmu_objset_find(fsname, 672 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 673 674 if (err == 0) 675 err = dsl_sync_task_group_wait(da.dstg); 676 677 for (dst = list_head(&da.dstg->dstg_tasks); dst; 678 dst = list_next(&da.dstg->dstg_tasks, dst)) { 679 dsl_dataset_t *ds = dst->dst_arg1; 680 if (dst->dst_err) { 681 dsl_dataset_name(ds, fsname); 682 cp = strchr(fsname, '@'); 683 *cp = '\0'; 684 } 685 /* 686 * If it was successful, destroy_sync would have 687 * closed the ds 688 */ 689 if (err) 690 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 691 } 692 693 dsl_sync_task_group_destroy(da.dstg); 694 spa_close(spa, FTAG); 695 return (err); 696 } 697 698 int 699 dsl_dataset_destroy(const char *name) 700 { 701 int err; 702 dsl_sync_task_group_t *dstg; 703 objset_t *os; 704 dsl_dataset_t *ds; 705 dsl_dir_t *dd; 706 uint64_t obj; 707 708 if (strchr(name, '@')) { 709 /* Destroying a snapshot is simpler */ 710 err = dsl_dataset_open(name, 711 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 712 FTAG, &ds); 713 if (err) 714 return (err); 715 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 716 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 717 ds, FTAG, 0); 718 if (err) 719 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 720 return (err); 721 } 722 723 err = dmu_objset_open(name, DMU_OST_ANY, 724 DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); 725 if (err) 726 return (err); 727 ds = os->os->os_dsl_dataset; 728 dd = ds->ds_dir; 729 730 /* 731 * Check for errors and mark this ds as inconsistent, in 732 * case we crash while freeing the objects. 733 */ 734 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 735 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 736 if (err) { 737 dmu_objset_close(os); 738 return (err); 739 } 740 741 /* 742 * remove the objects in open context, so that we won't 743 * have too much to do in syncing context. 744 */ 745 for (obj = 0; err == 0; 746 err = dmu_object_next(os, &obj, FALSE)) { 747 dmu_tx_t *tx = dmu_tx_create(os); 748 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); 749 dmu_tx_hold_bonus(tx, obj); 750 err = dmu_tx_assign(tx, TXG_WAIT); 751 if (err) { 752 /* 753 * Perhaps there is not enough disk 754 * space. Just deal with it from 755 * dsl_dataset_destroy_sync(). 756 */ 757 dmu_tx_abort(tx); 758 continue; 759 } 760 VERIFY(0 == dmu_object_free(os, obj, tx)); 761 dmu_tx_commit(tx); 762 } 763 /* Make sure it's not dirty before we finish destroying it. */ 764 txg_wait_synced(dd->dd_pool, 0); 765 766 dmu_objset_close(os); 767 if (err != ESRCH) 768 return (err); 769 770 err = dsl_dataset_open(name, 771 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 772 FTAG, &ds); 773 if (err) 774 return (err); 775 776 err = dsl_dir_open(name, FTAG, &dd, NULL); 777 if (err) { 778 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 779 return (err); 780 } 781 782 /* 783 * Blow away the dsl_dir + head dataset. 784 */ 785 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 786 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 787 dsl_dataset_destroy_sync, ds, FTAG, 0); 788 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 789 dsl_dir_destroy_sync, dd, FTAG, 0); 790 err = dsl_sync_task_group_wait(dstg); 791 dsl_sync_task_group_destroy(dstg); 792 /* if it is successful, *destroy_sync will close the ds+dd */ 793 if (err) { 794 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 795 dsl_dir_close(dd, FTAG); 796 } 797 return (err); 798 } 799 800 int 801 dsl_dataset_rollback(dsl_dataset_t *ds) 802 { 803 ASSERT3U(ds->ds_open_refcount, ==, DOS_REF_MAX); 804 return (dsl_sync_task_do(ds->ds_dir->dd_pool, 805 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, 806 ds, NULL, 0)); 807 } 808 809 void * 810 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 811 void *p, dsl_dataset_evict_func_t func) 812 { 813 void *old; 814 815 mutex_enter(&ds->ds_lock); 816 old = ds->ds_user_ptr; 817 if (old == NULL) { 818 ds->ds_user_ptr = p; 819 ds->ds_user_evict_func = func; 820 } 821 mutex_exit(&ds->ds_lock); 822 return (old); 823 } 824 825 void * 826 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 827 { 828 return (ds->ds_user_ptr); 829 } 830 831 832 void 833 dsl_dataset_get_blkptr(dsl_dataset_t *ds, blkptr_t *bp) 834 { 835 *bp = ds->ds_phys->ds_bp; 836 } 837 838 void 839 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 840 { 841 ASSERT(dmu_tx_is_syncing(tx)); 842 /* If it's the meta-objset, set dp_meta_rootbp */ 843 if (ds == NULL) { 844 tx->tx_pool->dp_meta_rootbp = *bp; 845 } else { 846 dmu_buf_will_dirty(ds->ds_dbuf, tx); 847 ds->ds_phys->ds_bp = *bp; 848 } 849 } 850 851 spa_t * 852 dsl_dataset_get_spa(dsl_dataset_t *ds) 853 { 854 return (ds->ds_dir->dd_pool->dp_spa); 855 } 856 857 void 858 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 859 { 860 dsl_pool_t *dp; 861 862 if (ds == NULL) /* this is the meta-objset */ 863 return; 864 865 ASSERT(ds->ds_user_ptr != NULL); 866 867 if (ds->ds_phys->ds_next_snap_obj != 0) 868 panic("dirtying snapshot!"); 869 870 dp = ds->ds_dir->dd_pool; 871 872 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 873 /* up the hold count until we can be written out */ 874 dmu_buf_add_ref(ds->ds_dbuf, ds); 875 } 876 } 877 878 struct killarg { 879 uint64_t *usedp; 880 uint64_t *compressedp; 881 uint64_t *uncompressedp; 882 zio_t *zio; 883 dmu_tx_t *tx; 884 }; 885 886 static int 887 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 888 { 889 struct killarg *ka = arg; 890 blkptr_t *bp = &bc->bc_blkptr; 891 892 ASSERT3U(bc->bc_errno, ==, 0); 893 894 /* 895 * Since this callback is not called concurrently, no lock is 896 * needed on the accounting values. 897 */ 898 *ka->usedp += bp_get_dasize(spa, bp); 899 *ka->compressedp += BP_GET_PSIZE(bp); 900 *ka->uncompressedp += BP_GET_UCSIZE(bp); 901 /* XXX check for EIO? */ 902 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, 903 ARC_NOWAIT); 904 return (0); 905 } 906 907 /* ARGSUSED */ 908 static int 909 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) 910 { 911 dsl_dataset_t *ds = arg1; 912 913 /* 914 * There must be a previous snapshot. I suppose we could roll 915 * it back to being empty (and re-initialize the upper (ZPL) 916 * layer). But for now there's no way to do this via the user 917 * interface. 918 */ 919 if (ds->ds_phys->ds_prev_snap_txg == 0) 920 return (EINVAL); 921 922 /* 923 * This must not be a snapshot. 924 */ 925 if (ds->ds_phys->ds_next_snap_obj != 0) 926 return (EINVAL); 927 928 /* 929 * If we made changes this txg, traverse_dsl_dataset won't find 930 * them. Try again. 931 */ 932 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 933 return (EAGAIN); 934 935 return (0); 936 } 937 938 /* ARGSUSED */ 939 static void 940 dsl_dataset_rollback_sync(void *arg1, void *arg2, dmu_tx_t *tx) 941 { 942 dsl_dataset_t *ds = arg1; 943 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 944 945 dmu_buf_will_dirty(ds->ds_dbuf, tx); 946 947 /* Zero out the deadlist. */ 948 bplist_close(&ds->ds_deadlist); 949 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 950 ds->ds_phys->ds_deadlist_obj = 951 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 952 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 953 ds->ds_phys->ds_deadlist_obj)); 954 955 { 956 /* Free blkptrs that we gave birth to */ 957 zio_t *zio; 958 uint64_t used = 0, compressed = 0, uncompressed = 0; 959 struct killarg ka; 960 961 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 962 ZIO_FLAG_MUSTSUCCEED); 963 ka.usedp = &used; 964 ka.compressedp = &compressed; 965 ka.uncompressedp = &uncompressed; 966 ka.zio = zio; 967 ka.tx = tx; 968 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 969 ADVANCE_POST, kill_blkptr, &ka); 970 (void) zio_wait(zio); 971 972 dsl_dir_diduse_space(ds->ds_dir, 973 -used, -compressed, -uncompressed, tx); 974 } 975 976 /* Change our contents to that of the prev snapshot */ 977 ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); 978 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 979 ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; 980 ds->ds_phys->ds_compressed_bytes = 981 ds->ds_prev->ds_phys->ds_compressed_bytes; 982 ds->ds_phys->ds_uncompressed_bytes = 983 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 984 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 985 ds->ds_phys->ds_unique_bytes = 0; 986 987 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 988 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 989 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 990 } 991 } 992 993 /* ARGSUSED */ 994 static int 995 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 996 { 997 dsl_dataset_t *ds = arg1; 998 999 /* 1000 * Can't delete a head dataset if there are snapshots of it. 1001 * (Except if the only snapshots are from the branch we cloned 1002 * from.) 1003 */ 1004 if (ds->ds_prev != NULL && 1005 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1006 return (EINVAL); 1007 1008 return (0); 1009 } 1010 1011 /* ARGSUSED */ 1012 static void 1013 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1014 { 1015 dsl_dataset_t *ds = arg1; 1016 1017 /* Mark it as inconsistent on-disk, in case we crash */ 1018 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1019 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1020 } 1021 1022 /* ARGSUSED */ 1023 static int 1024 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1025 { 1026 dsl_dataset_t *ds = arg1; 1027 1028 /* Can't delete a branch point. */ 1029 if (ds->ds_phys->ds_num_children > 1) 1030 return (EEXIST); 1031 1032 /* 1033 * Can't delete a head dataset if there are snapshots of it. 1034 * (Except if the only snapshots are from the branch we cloned 1035 * from.) 1036 */ 1037 if (ds->ds_prev != NULL && 1038 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1039 return (EINVAL); 1040 1041 /* 1042 * If we made changes this txg, traverse_dsl_dataset won't find 1043 * them. Try again. 1044 */ 1045 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1046 return (EAGAIN); 1047 1048 /* XXX we should do some i/o error checking... */ 1049 return (0); 1050 } 1051 1052 static void 1053 dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) 1054 { 1055 dsl_dataset_t *ds = arg1; 1056 uint64_t used = 0, compressed = 0, uncompressed = 0; 1057 zio_t *zio; 1058 int err; 1059 int after_branch_point = FALSE; 1060 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1061 objset_t *mos = dp->dp_meta_objset; 1062 dsl_dataset_t *ds_prev = NULL; 1063 uint64_t obj; 1064 1065 ASSERT3U(ds->ds_open_refcount, ==, DOS_REF_MAX); 1066 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 1067 ASSERT(ds->ds_prev == NULL || 1068 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1069 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1070 1071 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1072 1073 obj = ds->ds_object; 1074 1075 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1076 if (ds->ds_prev) { 1077 ds_prev = ds->ds_prev; 1078 } else { 1079 VERIFY(0 == dsl_dataset_open_obj(dp, 1080 ds->ds_phys->ds_prev_snap_obj, NULL, 1081 DS_MODE_NONE, FTAG, &ds_prev)); 1082 } 1083 after_branch_point = 1084 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1085 1086 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1087 if (after_branch_point && 1088 ds->ds_phys->ds_next_snap_obj == 0) { 1089 /* This clone is toast. */ 1090 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1091 ds_prev->ds_phys->ds_num_children--; 1092 } else if (!after_branch_point) { 1093 ds_prev->ds_phys->ds_next_snap_obj = 1094 ds->ds_phys->ds_next_snap_obj; 1095 } 1096 } 1097 1098 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1099 1100 if (ds->ds_phys->ds_next_snap_obj != 0) { 1101 blkptr_t bp; 1102 dsl_dataset_t *ds_next; 1103 uint64_t itor = 0; 1104 1105 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1106 1107 VERIFY(0 == dsl_dataset_open_obj(dp, 1108 ds->ds_phys->ds_next_snap_obj, NULL, 1109 DS_MODE_NONE, FTAG, &ds_next)); 1110 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1111 1112 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1113 ds_next->ds_phys->ds_prev_snap_obj = 1114 ds->ds_phys->ds_prev_snap_obj; 1115 ds_next->ds_phys->ds_prev_snap_txg = 1116 ds->ds_phys->ds_prev_snap_txg; 1117 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1118 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1119 1120 /* 1121 * Transfer to our deadlist (which will become next's 1122 * new deadlist) any entries from next's current 1123 * deadlist which were born before prev, and free the 1124 * other entries. 1125 * 1126 * XXX we're doing this long task with the config lock held 1127 */ 1128 while (bplist_iterate(&ds_next->ds_deadlist, &itor, 1129 &bp) == 0) { 1130 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1131 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1132 &bp, tx)); 1133 if (ds_prev && !after_branch_point && 1134 bp.blk_birth > 1135 ds_prev->ds_phys->ds_prev_snap_txg) { 1136 ds_prev->ds_phys->ds_unique_bytes += 1137 bp_get_dasize(dp->dp_spa, &bp); 1138 } 1139 } else { 1140 used += bp_get_dasize(dp->dp_spa, &bp); 1141 compressed += BP_GET_PSIZE(&bp); 1142 uncompressed += BP_GET_UCSIZE(&bp); 1143 /* XXX check return value? */ 1144 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, 1145 &bp, NULL, NULL, ARC_NOWAIT); 1146 } 1147 } 1148 1149 /* free next's deadlist */ 1150 bplist_close(&ds_next->ds_deadlist); 1151 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1152 1153 /* set next's deadlist to our deadlist */ 1154 ds_next->ds_phys->ds_deadlist_obj = 1155 ds->ds_phys->ds_deadlist_obj; 1156 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1157 ds_next->ds_phys->ds_deadlist_obj)); 1158 ds->ds_phys->ds_deadlist_obj = 0; 1159 1160 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1161 /* 1162 * Update next's unique to include blocks which 1163 * were previously shared by only this snapshot 1164 * and it. Those blocks will be born after the 1165 * prev snap and before this snap, and will have 1166 * died after the next snap and before the one 1167 * after that (ie. be on the snap after next's 1168 * deadlist). 1169 * 1170 * XXX we're doing this long task with the 1171 * config lock held 1172 */ 1173 dsl_dataset_t *ds_after_next; 1174 1175 VERIFY(0 == dsl_dataset_open_obj(dp, 1176 ds_next->ds_phys->ds_next_snap_obj, NULL, 1177 DS_MODE_NONE, FTAG, &ds_after_next)); 1178 itor = 0; 1179 while (bplist_iterate(&ds_after_next->ds_deadlist, 1180 &itor, &bp) == 0) { 1181 if (bp.blk_birth > 1182 ds->ds_phys->ds_prev_snap_txg && 1183 bp.blk_birth <= 1184 ds->ds_phys->ds_creation_txg) { 1185 ds_next->ds_phys->ds_unique_bytes += 1186 bp_get_dasize(dp->dp_spa, &bp); 1187 } 1188 } 1189 1190 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); 1191 ASSERT3P(ds_next->ds_prev, ==, NULL); 1192 } else { 1193 /* 1194 * It would be nice to update the head dataset's 1195 * unique. To do so we would have to traverse 1196 * it for blocks born after ds_prev, which is 1197 * pretty expensive just to maintain something 1198 * for debugging purposes. 1199 */ 1200 ASSERT3P(ds_next->ds_prev, ==, ds); 1201 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, 1202 ds_next); 1203 if (ds_prev) { 1204 VERIFY(0 == dsl_dataset_open_obj(dp, 1205 ds->ds_phys->ds_prev_snap_obj, NULL, 1206 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); 1207 } else { 1208 ds_next->ds_prev = NULL; 1209 } 1210 } 1211 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); 1212 1213 /* 1214 * NB: unique_bytes is not accurate for head objsets 1215 * because we don't update it when we delete the most 1216 * recent snapshot -- see above comment. 1217 */ 1218 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1219 } else { 1220 /* 1221 * There's no next snapshot, so this is a head dataset. 1222 * Destroy the deadlist. Unless it's a clone, the 1223 * deadlist should be empty. (If it's a clone, it's 1224 * safe to ignore the deadlist contents.) 1225 */ 1226 struct killarg ka; 1227 1228 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1229 bplist_close(&ds->ds_deadlist); 1230 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1231 ds->ds_phys->ds_deadlist_obj = 0; 1232 1233 /* 1234 * Free everything that we point to (that's born after 1235 * the previous snapshot, if we are a clone) 1236 * 1237 * XXX we're doing this long task with the config lock held 1238 */ 1239 ka.usedp = &used; 1240 ka.compressedp = &compressed; 1241 ka.uncompressedp = &uncompressed; 1242 ka.zio = zio; 1243 ka.tx = tx; 1244 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1245 ADVANCE_POST, kill_blkptr, &ka); 1246 ASSERT3U(err, ==, 0); 1247 } 1248 1249 err = zio_wait(zio); 1250 ASSERT3U(err, ==, 0); 1251 1252 dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); 1253 1254 if (ds->ds_phys->ds_snapnames_zapobj) { 1255 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1256 ASSERT(err == 0); 1257 } 1258 1259 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1260 /* Erase the link in the dataset */ 1261 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1262 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1263 /* 1264 * dsl_dir_sync_destroy() called us, they'll destroy 1265 * the dataset. 1266 */ 1267 } else { 1268 /* remove from snapshot namespace */ 1269 dsl_dataset_t *ds_head; 1270 VERIFY(0 == dsl_dataset_open_obj(dp, 1271 ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, 1272 DS_MODE_NONE, FTAG, &ds_head)); 1273 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1274 #ifdef ZFS_DEBUG 1275 { 1276 uint64_t val; 1277 err = zap_lookup(mos, 1278 ds_head->ds_phys->ds_snapnames_zapobj, 1279 ds->ds_snapname, 8, 1, &val); 1280 ASSERT3U(err, ==, 0); 1281 ASSERT3U(val, ==, obj); 1282 } 1283 #endif 1284 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, 1285 ds->ds_snapname, tx); 1286 ASSERT(err == 0); 1287 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); 1288 } 1289 1290 if (ds_prev && ds->ds_prev != ds_prev) 1291 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1292 1293 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); 1294 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1295 } 1296 1297 /* ARGSUSED */ 1298 int 1299 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1300 { 1301 objset_t *os = arg1; 1302 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1303 const char *snapname = arg2; 1304 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1305 int err; 1306 uint64_t value; 1307 1308 /* 1309 * We don't allow multiple snapshots of the same txg. If there 1310 * is already one, try again. 1311 */ 1312 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1313 return (EAGAIN); 1314 1315 /* 1316 * Check for conflicting name snapshot name. 1317 */ 1318 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1319 snapname, 8, 1, &value); 1320 if (err == 0) 1321 return (EEXIST); 1322 if (err != ENOENT) 1323 return (err); 1324 1325 ds->ds_trysnap_txg = tx->tx_txg; 1326 return (0); 1327 } 1328 1329 void 1330 dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1331 { 1332 objset_t *os = arg1; 1333 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1334 const char *snapname = arg2; 1335 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1336 dmu_buf_t *dbuf; 1337 dsl_dataset_phys_t *dsphys; 1338 uint64_t dsobj; 1339 objset_t *mos = dp->dp_meta_objset; 1340 int err; 1341 1342 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1343 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1344 1345 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1346 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1347 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1348 dmu_buf_will_dirty(dbuf, tx); 1349 dsphys = dbuf->db_data; 1350 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1351 dsphys->ds_fsid_guid = unique_create(); 1352 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 1353 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1354 sizeof (dsphys->ds_guid)); 1355 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1356 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1357 dsphys->ds_next_snap_obj = ds->ds_object; 1358 dsphys->ds_num_children = 1; 1359 dsphys->ds_creation_time = gethrestime_sec(); 1360 dsphys->ds_creation_txg = tx->tx_txg; 1361 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1362 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1363 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1364 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1365 dsphys->ds_flags = ds->ds_phys->ds_flags; 1366 dsphys->ds_bp = ds->ds_phys->ds_bp; 1367 dmu_buf_rele(dbuf, FTAG); 1368 1369 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1370 if (ds->ds_prev) { 1371 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1372 ds->ds_object || 1373 ds->ds_prev->ds_phys->ds_num_children > 1); 1374 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1375 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1376 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1377 ds->ds_prev->ds_phys->ds_creation_txg); 1378 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1379 } 1380 } 1381 1382 bplist_close(&ds->ds_deadlist); 1383 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1384 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); 1385 ds->ds_phys->ds_prev_snap_obj = dsobj; 1386 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; 1387 ds->ds_phys->ds_unique_bytes = 0; 1388 ds->ds_phys->ds_deadlist_obj = 1389 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1390 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1391 ds->ds_phys->ds_deadlist_obj)); 1392 1393 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1394 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1395 snapname, 8, 1, &dsobj, tx); 1396 ASSERT(err == 0); 1397 1398 if (ds->ds_prev) 1399 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 1400 VERIFY(0 == dsl_dataset_open_obj(dp, 1401 ds->ds_phys->ds_prev_snap_obj, snapname, 1402 DS_MODE_NONE, ds, &ds->ds_prev)); 1403 } 1404 1405 void 1406 dsl_dataset_sync(dsl_dataset_t *ds, dmu_tx_t *tx) 1407 { 1408 ASSERT(dmu_tx_is_syncing(tx)); 1409 ASSERT(ds->ds_user_ptr != NULL); 1410 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1411 1412 dmu_objset_sync(ds->ds_user_ptr, tx); 1413 dsl_dir_dirty(ds->ds_dir, tx); 1414 bplist_close(&ds->ds_deadlist); 1415 1416 dmu_buf_rele(ds->ds_dbuf, ds); 1417 } 1418 1419 void 1420 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1421 { 1422 dsl_dir_stats(ds->ds_dir, nv); 1423 1424 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1425 ds->ds_phys->ds_creation_time); 1426 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1427 ds->ds_phys->ds_creation_txg); 1428 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, 1429 ds->ds_phys->ds_used_bytes); 1430 1431 if (ds->ds_phys->ds_next_snap_obj) { 1432 /* 1433 * This is a snapshot; override the dd's space used with 1434 * our unique space and compression ratio. 1435 */ 1436 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1437 ds->ds_phys->ds_unique_bytes); 1438 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 1439 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1440 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1441 ds->ds_phys->ds_compressed_bytes)); 1442 } 1443 } 1444 1445 void 1446 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1447 { 1448 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1449 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1450 if (ds->ds_phys->ds_next_snap_obj) { 1451 stat->dds_is_snapshot = B_TRUE; 1452 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1453 } 1454 1455 /* clone origin is really a dsl_dir thing... */ 1456 if (ds->ds_dir->dd_phys->dd_clone_parent_obj) { 1457 dsl_dataset_t *ods; 1458 1459 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 1460 VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool, 1461 ds->ds_dir->dd_phys->dd_clone_parent_obj, 1462 NULL, DS_MODE_NONE, FTAG, &ods)); 1463 dsl_dataset_name(ods, stat->dds_clone_of); 1464 dsl_dataset_close(ods, DS_MODE_NONE, FTAG); 1465 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 1466 } 1467 } 1468 1469 uint64_t 1470 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1471 { 1472 return (ds->ds_phys->ds_fsid_guid); 1473 } 1474 1475 void 1476 dsl_dataset_space(dsl_dataset_t *ds, 1477 uint64_t *refdbytesp, uint64_t *availbytesp, 1478 uint64_t *usedobjsp, uint64_t *availobjsp) 1479 { 1480 *refdbytesp = ds->ds_phys->ds_used_bytes; 1481 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1482 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 1483 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1484 } 1485 1486 /* ARGSUSED */ 1487 static int 1488 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 1489 { 1490 dsl_dataset_t *ds = arg1; 1491 char *newsnapname = arg2; 1492 dsl_dir_t *dd = ds->ds_dir; 1493 objset_t *mos = dd->dd_pool->dp_meta_objset; 1494 dsl_dataset_t *hds; 1495 uint64_t val; 1496 int err; 1497 1498 err = dsl_dataset_open_obj(dd->dd_pool, 1499 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); 1500 if (err) 1501 return (err); 1502 1503 /* new name better not be in use */ 1504 err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj, 1505 newsnapname, 8, 1, &val); 1506 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1507 1508 if (err == 0) 1509 err = EEXIST; 1510 else if (err == ENOENT) 1511 err = 0; 1512 return (err); 1513 } 1514 1515 static void 1516 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1517 { 1518 dsl_dataset_t *ds = arg1; 1519 char *newsnapname = arg2; 1520 dsl_dir_t *dd = ds->ds_dir; 1521 objset_t *mos = dd->dd_pool->dp_meta_objset; 1522 dsl_dataset_t *hds; 1523 int err; 1524 1525 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 1526 1527 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1528 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); 1529 1530 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1531 err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj, 1532 ds->ds_snapname, tx); 1533 ASSERT3U(err, ==, 0); 1534 mutex_enter(&ds->ds_lock); 1535 (void) strcpy(ds->ds_snapname, newsnapname); 1536 mutex_exit(&ds->ds_lock); 1537 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 1538 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 1539 ASSERT3U(err, ==, 0); 1540 1541 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1542 } 1543 1544 #pragma weak dmu_objset_rename = dsl_dataset_rename 1545 int 1546 dsl_dataset_rename(const char *oldname, const char *newname) 1547 { 1548 dsl_dir_t *dd; 1549 dsl_dataset_t *ds; 1550 const char *tail; 1551 int err; 1552 1553 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 1554 if (err) 1555 return (err); 1556 if (tail == NULL) { 1557 err = dsl_dir_rename(dd, newname); 1558 dsl_dir_close(dd, FTAG); 1559 return (err); 1560 } 1561 if (tail[0] != '@') { 1562 /* the name ended in a nonexistant component */ 1563 dsl_dir_close(dd, FTAG); 1564 return (ENOENT); 1565 } 1566 1567 dsl_dir_close(dd, FTAG); 1568 1569 /* new name must be snapshot in same filesystem */ 1570 tail = strchr(newname, '@'); 1571 if (tail == NULL) 1572 return (EINVAL); 1573 tail++; 1574 if (strncmp(oldname, newname, tail - newname) != 0) 1575 return (EXDEV); 1576 1577 err = dsl_dataset_open(oldname, 1578 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); 1579 if (err) 1580 return (err); 1581 1582 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1583 dsl_dataset_snapshot_rename_check, 1584 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 1585 1586 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 1587 1588 return (err); 1589 } 1590 1591 struct promotearg { 1592 uint64_t used, comp, uncomp, unique; 1593 uint64_t newnext_obj, snapnames_obj; 1594 }; 1595 1596 static int 1597 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 1598 { 1599 dsl_dataset_t *hds = arg1; 1600 struct promotearg *pa = arg2; 1601 dsl_dir_t *dd = hds->ds_dir; 1602 dsl_pool_t *dp = hds->ds_dir->dd_pool; 1603 dsl_dir_t *pdd = NULL; 1604 dsl_dataset_t *ds = NULL; 1605 dsl_dataset_t *pivot_ds = NULL; 1606 dsl_dataset_t *newnext_ds = NULL; 1607 int err; 1608 char *name = NULL; 1609 uint64_t itor = 0; 1610 blkptr_t bp; 1611 1612 bzero(pa, sizeof (*pa)); 1613 1614 /* Check that it is a clone */ 1615 if (dd->dd_phys->dd_clone_parent_obj == 0) 1616 return (EINVAL); 1617 1618 /* Since this is so expensive, don't do the preliminary check */ 1619 if (!dmu_tx_is_syncing(tx)) 1620 return (0); 1621 1622 if (err = dsl_dataset_open_obj(dp, 1623 dd->dd_phys->dd_clone_parent_obj, 1624 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)) 1625 goto out; 1626 pdd = pivot_ds->ds_dir; 1627 1628 { 1629 dsl_dataset_t *phds; 1630 if (err = dsl_dataset_open_obj(dd->dd_pool, 1631 pdd->dd_phys->dd_head_dataset_obj, 1632 NULL, DS_MODE_NONE, FTAG, &phds)) 1633 goto out; 1634 pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; 1635 dsl_dataset_close(phds, DS_MODE_NONE, FTAG); 1636 } 1637 1638 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 1639 err = EXDEV; 1640 goto out; 1641 } 1642 1643 /* find pivot point's new next ds */ 1644 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, 1645 NULL, DS_MODE_NONE, FTAG, &newnext_ds)); 1646 while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) { 1647 dsl_dataset_t *prev; 1648 1649 if (err = dsl_dataset_open_obj(dd->dd_pool, 1650 newnext_ds->ds_phys->ds_prev_snap_obj, 1651 NULL, DS_MODE_NONE, FTAG, &prev)) 1652 goto out; 1653 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1654 newnext_ds = prev; 1655 } 1656 pa->newnext_obj = newnext_ds->ds_object; 1657 1658 /* compute pivot point's new unique space */ 1659 while ((err = bplist_iterate(&newnext_ds->ds_deadlist, 1660 &itor, &bp)) == 0) { 1661 if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg) 1662 pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); 1663 } 1664 if (err != ENOENT) 1665 goto out; 1666 1667 /* Walk the snapshots that we are moving */ 1668 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1669 ds = pivot_ds; 1670 /* CONSTCOND */ 1671 while (TRUE) { 1672 uint64_t val, dlused, dlcomp, dluncomp; 1673 dsl_dataset_t *prev; 1674 1675 /* Check that the snapshot name does not conflict */ 1676 dsl_dataset_name(ds, name); 1677 err = zap_lookup(dd->dd_pool->dp_meta_objset, 1678 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1679 8, 1, &val); 1680 if (err != ENOENT) { 1681 if (err == 0) 1682 err = EEXIST; 1683 goto out; 1684 } 1685 1686 /* 1687 * compute space to transfer. Each snapshot gave birth to: 1688 * (my used) - (prev's used) + (deadlist's used) 1689 */ 1690 pa->used += ds->ds_phys->ds_used_bytes; 1691 pa->comp += ds->ds_phys->ds_compressed_bytes; 1692 pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; 1693 1694 /* If we reach the first snapshot, we're done. */ 1695 if (ds->ds_phys->ds_prev_snap_obj == 0) 1696 break; 1697 1698 if (err = bplist_space(&ds->ds_deadlist, 1699 &dlused, &dlcomp, &dluncomp)) 1700 goto out; 1701 if (err = dsl_dataset_open_obj(dd->dd_pool, 1702 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1703 FTAG, &prev)) 1704 goto out; 1705 pa->used += dlused - prev->ds_phys->ds_used_bytes; 1706 pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; 1707 pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; 1708 1709 /* 1710 * We could be a clone of a clone. If we reach our 1711 * parent's branch point, we're done. 1712 */ 1713 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1714 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1715 break; 1716 } 1717 if (ds != pivot_ds) 1718 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1719 ds = prev; 1720 } 1721 1722 /* Check that there is enough space here */ 1723 err = dsl_dir_transfer_possible(pdd, dd, pa->used); 1724 1725 out: 1726 if (ds && ds != pivot_ds) 1727 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1728 if (pivot_ds) 1729 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 1730 if (newnext_ds) 1731 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1732 if (name) 1733 kmem_free(name, MAXPATHLEN); 1734 return (err); 1735 } 1736 1737 static void 1738 dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1739 { 1740 dsl_dataset_t *hds = arg1; 1741 struct promotearg *pa = arg2; 1742 dsl_dir_t *dd = hds->ds_dir; 1743 dsl_pool_t *dp = hds->ds_dir->dd_pool; 1744 dsl_dir_t *pdd = NULL; 1745 dsl_dataset_t *ds, *pivot_ds; 1746 char *name; 1747 1748 ASSERT(dd->dd_phys->dd_clone_parent_obj != 0); 1749 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 1750 1751 VERIFY(0 == dsl_dataset_open_obj(dp, 1752 dd->dd_phys->dd_clone_parent_obj, 1753 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)); 1754 /* 1755 * We need to explicitly open pdd, since pivot_ds's pdd will be 1756 * changing. 1757 */ 1758 VERIFY(0 == dsl_dir_open_obj(dp, pivot_ds->ds_dir->dd_object, 1759 NULL, FTAG, &pdd)); 1760 1761 /* move snapshots to this dir */ 1762 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1763 ds = pivot_ds; 1764 /* CONSTCOND */ 1765 while (TRUE) { 1766 dsl_dataset_t *prev; 1767 1768 /* move snap name entry */ 1769 dsl_dataset_name(ds, name); 1770 VERIFY(0 == zap_remove(dp->dp_meta_objset, 1771 pa->snapnames_obj, ds->ds_snapname, tx)); 1772 VERIFY(0 == zap_add(dp->dp_meta_objset, 1773 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1774 8, 1, &ds->ds_object, tx)); 1775 1776 /* change containing dsl_dir */ 1777 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1778 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object); 1779 ds->ds_phys->ds_dir_obj = dd->dd_object; 1780 ASSERT3P(ds->ds_dir, ==, pdd); 1781 dsl_dir_close(ds->ds_dir, ds); 1782 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 1783 NULL, ds, &ds->ds_dir)); 1784 1785 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 1786 1787 if (ds->ds_phys->ds_prev_snap_obj == 0) 1788 break; 1789 1790 VERIFY(0 == dsl_dataset_open_obj(dp, 1791 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1792 FTAG, &prev)); 1793 1794 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1795 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1796 break; 1797 } 1798 if (ds != pivot_ds) 1799 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1800 ds = prev; 1801 } 1802 if (ds != pivot_ds) 1803 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1804 1805 /* change pivot point's next snap */ 1806 dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx); 1807 pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; 1808 1809 /* change clone_parent-age */ 1810 dmu_buf_will_dirty(dd->dd_dbuf, tx); 1811 ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object); 1812 dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj; 1813 dmu_buf_will_dirty(pdd->dd_dbuf, tx); 1814 pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object; 1815 1816 /* change space accounting */ 1817 dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx); 1818 dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); 1819 pivot_ds->ds_phys->ds_unique_bytes = pa->unique; 1820 1821 dsl_dir_close(pdd, FTAG); 1822 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 1823 kmem_free(name, MAXPATHLEN); 1824 } 1825 1826 int 1827 dsl_dataset_promote(const char *name) 1828 { 1829 dsl_dataset_t *ds; 1830 int err; 1831 dmu_object_info_t doi; 1832 struct promotearg pa; 1833 1834 err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); 1835 if (err) 1836 return (err); 1837 1838 err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, 1839 ds->ds_phys->ds_snapnames_zapobj, &doi); 1840 if (err) { 1841 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1842 return (err); 1843 } 1844 1845 /* 1846 * Add in 128x the snapnames zapobj size, since we will be moving 1847 * a bunch of snapnames to the promoted ds, and dirtying their 1848 * bonus buffers. 1849 */ 1850 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1851 dsl_dataset_promote_check, 1852 dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); 1853 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1854 return (err); 1855 } 1856