1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_prop.h> 32 #include <sys/dsl_synctask.h> 33 #include <sys/dmu_traverse.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/arc.h> 36 #include <sys/zio.h> 37 #include <sys/zap.h> 38 #include <sys/unique.h> 39 #include <sys/zfs_context.h> 40 41 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 42 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 43 static dsl_checkfunc_t dsl_dataset_rollback_check; 44 static dsl_syncfunc_t dsl_dataset_rollback_sync; 45 static dsl_checkfunc_t dsl_dataset_destroy_check; 46 static dsl_syncfunc_t dsl_dataset_destroy_sync; 47 48 #define DOS_REF_MAX (1ULL << 62) 49 50 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 51 52 /* 53 * We use weighted reference counts to express the various forms of exclusion 54 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open 55 * is DOS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. 56 * This makes the exclusion logic simple: the total refcnt for all opens cannot 57 * exceed DOS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their 58 * weight (DOS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume 59 * just over half of the refcnt space, so there can't be more than one, but it 60 * can peacefully coexist with any number of STANDARD opens. 61 */ 62 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { 63 0, /* DOS_MODE_NONE - invalid */ 64 1, /* DOS_MODE_STANDARD - unlimited number */ 65 (DOS_REF_MAX >> 1) + 1, /* DOS_MODE_PRIMARY - only one of these */ 66 DOS_REF_MAX /* DOS_MODE_EXCLUSIVE - no other opens */ 67 }; 68 69 70 void 71 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 72 { 73 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 74 int compressed = BP_GET_PSIZE(bp); 75 int uncompressed = BP_GET_UCSIZE(bp); 76 77 dprintf_bp(bp, "born, ds=%p\n", ds); 78 79 ASSERT(dmu_tx_is_syncing(tx)); 80 /* It could have been compressed away to nothing */ 81 if (BP_IS_HOLE(bp)) 82 return; 83 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 84 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 85 if (ds == NULL) { 86 /* 87 * Account for the meta-objset space in its placeholder 88 * dsl_dir. 89 */ 90 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 91 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 92 used, compressed, uncompressed, tx); 93 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 94 return; 95 } 96 dmu_buf_will_dirty(ds->ds_dbuf, tx); 97 mutex_enter(&ds->ds_lock); 98 ds->ds_phys->ds_used_bytes += used; 99 ds->ds_phys->ds_compressed_bytes += compressed; 100 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 101 ds->ds_phys->ds_unique_bytes += used; 102 mutex_exit(&ds->ds_lock); 103 dsl_dir_diduse_space(ds->ds_dir, 104 used, compressed, uncompressed, tx); 105 } 106 107 void 108 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 109 { 110 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 111 int compressed = BP_GET_PSIZE(bp); 112 int uncompressed = BP_GET_UCSIZE(bp); 113 114 ASSERT(dmu_tx_is_syncing(tx)); 115 if (BP_IS_HOLE(bp)) 116 return; 117 118 ASSERT(used > 0); 119 if (ds == NULL) { 120 /* 121 * Account for the meta-objset space in its placeholder 122 * dataset. 123 */ 124 /* XXX this can fail, what do we do when it does? */ 125 (void) arc_free(NULL, tx->tx_pool->dp_spa, 126 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); 127 bzero(bp, sizeof (blkptr_t)); 128 129 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 130 -used, -compressed, -uncompressed, tx); 131 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 132 return; 133 } 134 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 135 136 dmu_buf_will_dirty(ds->ds_dbuf, tx); 137 138 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 139 dprintf_bp(bp, "freeing: %s", ""); 140 /* XXX check return code? */ 141 (void) arc_free(NULL, tx->tx_pool->dp_spa, 142 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); 143 144 mutex_enter(&ds->ds_lock); 145 /* XXX unique_bytes is not accurate for head datasets */ 146 /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */ 147 ds->ds_phys->ds_unique_bytes -= used; 148 mutex_exit(&ds->ds_lock); 149 dsl_dir_diduse_space(ds->ds_dir, 150 -used, -compressed, -uncompressed, tx); 151 } else { 152 dprintf_bp(bp, "putting on dead list: %s", ""); 153 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 154 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 155 if (ds->ds_phys->ds_prev_snap_obj != 0) { 156 ASSERT3U(ds->ds_prev->ds_object, ==, 157 ds->ds_phys->ds_prev_snap_obj); 158 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 159 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 160 ds->ds_object && bp->blk_birth > 161 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 162 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 163 mutex_enter(&ds->ds_prev->ds_lock); 164 ds->ds_prev->ds_phys->ds_unique_bytes += 165 used; 166 mutex_exit(&ds->ds_prev->ds_lock); 167 } 168 } 169 } 170 bzero(bp, sizeof (blkptr_t)); 171 mutex_enter(&ds->ds_lock); 172 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 173 ds->ds_phys->ds_used_bytes -= used; 174 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 175 ds->ds_phys->ds_compressed_bytes -= compressed; 176 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 177 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 178 mutex_exit(&ds->ds_lock); 179 } 180 181 uint64_t 182 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 183 { 184 if (ds == NULL) 185 return (0); 186 /* 187 * The snapshot creation could fail, but that would cause an 188 * incorrect FALSE return, which would only result in an 189 * overestimation of the amount of space that an operation would 190 * consume, which is OK. 191 * 192 * There's also a small window where we could miss a pending 193 * snapshot, because we could set the sync task in the quiescing 194 * phase. So this should only be used as a guess. 195 */ 196 return (MAX(ds->ds_phys->ds_prev_snap_txg, ds->ds_trysnap_txg)); 197 } 198 199 int 200 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 201 { 202 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 203 } 204 205 /* ARGSUSED */ 206 static void 207 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 208 { 209 dsl_dataset_t *ds = dsv; 210 dsl_pool_t *dp = ds->ds_dir->dd_pool; 211 212 /* open_refcount == DOS_REF_MAX when deleting */ 213 ASSERT(ds->ds_open_refcount == 0 || 214 ds->ds_open_refcount == DOS_REF_MAX); 215 216 dprintf_ds(ds, "evicting %s\n", ""); 217 218 unique_remove(ds->ds_phys->ds_fsid_guid); 219 220 if (ds->ds_user_ptr != NULL) 221 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 222 223 if (ds->ds_prev) { 224 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 225 ds->ds_prev = NULL; 226 } 227 228 bplist_close(&ds->ds_deadlist); 229 dsl_dir_close(ds->ds_dir, ds); 230 231 if (list_link_active(&ds->ds_synced_link)) 232 list_remove(&dp->dp_synced_objsets, ds); 233 234 kmem_free(ds, sizeof (dsl_dataset_t)); 235 } 236 237 static int 238 dsl_dataset_get_snapname(dsl_dataset_t *ds) 239 { 240 dsl_dataset_phys_t *headphys; 241 int err; 242 dmu_buf_t *headdbuf; 243 dsl_pool_t *dp = ds->ds_dir->dd_pool; 244 objset_t *mos = dp->dp_meta_objset; 245 246 if (ds->ds_snapname[0]) 247 return (0); 248 if (ds->ds_phys->ds_next_snap_obj == 0) 249 return (0); 250 251 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 252 FTAG, &headdbuf); 253 if (err) 254 return (err); 255 headphys = headdbuf->db_data; 256 err = zap_value_search(dp->dp_meta_objset, 257 headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname); 258 dmu_buf_rele(headdbuf, FTAG); 259 return (err); 260 } 261 262 int 263 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, 264 int mode, void *tag, dsl_dataset_t **dsp) 265 { 266 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 267 objset_t *mos = dp->dp_meta_objset; 268 dmu_buf_t *dbuf; 269 dsl_dataset_t *ds; 270 int err; 271 272 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 273 dsl_pool_sync_context(dp)); 274 275 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 276 if (err) 277 return (err); 278 ds = dmu_buf_get_user(dbuf); 279 if (ds == NULL) { 280 dsl_dataset_t *winner; 281 282 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 283 ds->ds_dbuf = dbuf; 284 ds->ds_object = dsobj; 285 ds->ds_phys = dbuf->db_data; 286 287 err = bplist_open(&ds->ds_deadlist, 288 mos, ds->ds_phys->ds_deadlist_obj); 289 if (err == 0) { 290 err = dsl_dir_open_obj(dp, 291 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 292 } 293 if (err) { 294 /* 295 * we don't really need to close the blist if we 296 * just opened it. 297 */ 298 kmem_free(ds, sizeof (dsl_dataset_t)); 299 dmu_buf_rele(dbuf, tag); 300 return (err); 301 } 302 303 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { 304 ds->ds_snapname[0] = '\0'; 305 if (ds->ds_phys->ds_prev_snap_obj) { 306 err = dsl_dataset_open_obj(dp, 307 ds->ds_phys->ds_prev_snap_obj, NULL, 308 DS_MODE_NONE, ds, &ds->ds_prev); 309 } 310 } else { 311 if (snapname) { 312 #ifdef ZFS_DEBUG 313 dsl_dataset_phys_t *headphys; 314 dmu_buf_t *headdbuf; 315 err = dmu_bonus_hold(mos, 316 ds->ds_dir->dd_phys->dd_head_dataset_obj, 317 FTAG, &headdbuf); 318 if (err == 0) { 319 headphys = headdbuf->db_data; 320 uint64_t foundobj; 321 err = zap_lookup(dp->dp_meta_objset, 322 headphys->ds_snapnames_zapobj, 323 snapname, sizeof (foundobj), 1, 324 &foundobj); 325 ASSERT3U(foundobj, ==, dsobj); 326 dmu_buf_rele(headdbuf, FTAG); 327 } 328 #endif 329 (void) strcat(ds->ds_snapname, snapname); 330 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 331 err = dsl_dataset_get_snapname(ds); 332 } 333 } 334 335 if (err == 0) { 336 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 337 dsl_dataset_evict); 338 } 339 if (err || winner) { 340 bplist_close(&ds->ds_deadlist); 341 if (ds->ds_prev) { 342 dsl_dataset_close(ds->ds_prev, 343 DS_MODE_NONE, ds); 344 } 345 dsl_dir_close(ds->ds_dir, ds); 346 kmem_free(ds, sizeof (dsl_dataset_t)); 347 if (err) { 348 dmu_buf_rele(dbuf, tag); 349 return (err); 350 } 351 ds = winner; 352 } else { 353 uint64_t new = 354 unique_insert(ds->ds_phys->ds_fsid_guid); 355 if (new != ds->ds_phys->ds_fsid_guid) { 356 /* XXX it won't necessarily be synced... */ 357 ds->ds_phys->ds_fsid_guid = new; 358 } 359 } 360 } 361 ASSERT3P(ds->ds_dbuf, ==, dbuf); 362 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 363 364 mutex_enter(&ds->ds_lock); 365 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && 366 (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && 367 !DS_MODE_IS_INCONSISTENT(mode)) || 368 (ds->ds_open_refcount + weight > DOS_REF_MAX)) { 369 mutex_exit(&ds->ds_lock); 370 dsl_dataset_close(ds, DS_MODE_NONE, tag); 371 return (EBUSY); 372 } 373 ds->ds_open_refcount += weight; 374 mutex_exit(&ds->ds_lock); 375 376 *dsp = ds; 377 return (0); 378 } 379 380 int 381 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, 382 void *tag, dsl_dataset_t **dsp) 383 { 384 dsl_dir_t *dd; 385 dsl_pool_t *dp; 386 const char *tail; 387 uint64_t obj; 388 dsl_dataset_t *ds = NULL; 389 int err = 0; 390 391 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); 392 if (err) 393 return (err); 394 395 dp = dd->dd_pool; 396 obj = dd->dd_phys->dd_head_dataset_obj; 397 rw_enter(&dp->dp_config_rwlock, RW_READER); 398 if (obj == 0) { 399 /* A dataset with no associated objset */ 400 err = ENOENT; 401 goto out; 402 } 403 404 if (tail != NULL) { 405 objset_t *mos = dp->dp_meta_objset; 406 407 err = dsl_dataset_open_obj(dp, obj, NULL, 408 DS_MODE_NONE, tag, &ds); 409 if (err) 410 goto out; 411 obj = ds->ds_phys->ds_snapnames_zapobj; 412 dsl_dataset_close(ds, DS_MODE_NONE, tag); 413 ds = NULL; 414 415 if (tail[0] != '@') { 416 err = ENOENT; 417 goto out; 418 } 419 tail++; 420 421 /* Look for a snapshot */ 422 if (!DS_MODE_IS_READONLY(mode)) { 423 err = EROFS; 424 goto out; 425 } 426 dprintf("looking for snapshot '%s'\n", tail); 427 err = zap_lookup(mos, obj, tail, 8, 1, &obj); 428 if (err) 429 goto out; 430 } 431 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); 432 433 out: 434 rw_exit(&dp->dp_config_rwlock); 435 dsl_dir_close(dd, FTAG); 436 437 ASSERT3U((err == 0), ==, (ds != NULL)); 438 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ 439 440 *dsp = ds; 441 return (err); 442 } 443 444 int 445 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) 446 { 447 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); 448 } 449 450 void 451 dsl_dataset_name(dsl_dataset_t *ds, char *name) 452 { 453 if (ds == NULL) { 454 (void) strcpy(name, "mos"); 455 } else { 456 dsl_dir_name(ds->ds_dir, name); 457 VERIFY(0 == dsl_dataset_get_snapname(ds)); 458 if (ds->ds_snapname[0]) { 459 (void) strcat(name, "@"); 460 if (!MUTEX_HELD(&ds->ds_lock)) { 461 /* 462 * We use a "recursive" mutex so that we 463 * can call dprintf_ds() with ds_lock held. 464 */ 465 mutex_enter(&ds->ds_lock); 466 (void) strcat(name, ds->ds_snapname); 467 mutex_exit(&ds->ds_lock); 468 } else { 469 (void) strcat(name, ds->ds_snapname); 470 } 471 } 472 } 473 } 474 475 void 476 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) 477 { 478 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 479 mutex_enter(&ds->ds_lock); 480 ASSERT3U(ds->ds_open_refcount, >=, weight); 481 ds->ds_open_refcount -= weight; 482 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", 483 mode, ds->ds_open_refcount); 484 mutex_exit(&ds->ds_lock); 485 486 dmu_buf_rele(ds->ds_dbuf, tag); 487 } 488 489 void 490 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) 491 { 492 objset_t *mos = dp->dp_meta_objset; 493 dmu_buf_t *dbuf; 494 dsl_dataset_phys_t *dsphys; 495 dsl_dataset_t *ds; 496 uint64_t dsobj; 497 dsl_dir_t *dd; 498 499 dsl_dir_create_root(mos, ddobjp, tx); 500 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); 501 502 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 503 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 504 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 505 dmu_buf_will_dirty(dbuf, tx); 506 dsphys = dbuf->db_data; 507 dsphys->ds_dir_obj = dd->dd_object; 508 dsphys->ds_fsid_guid = unique_create(); 509 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 510 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 511 sizeof (dsphys->ds_guid)); 512 dsphys->ds_snapnames_zapobj = 513 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 514 dsphys->ds_creation_time = gethrestime_sec(); 515 dsphys->ds_creation_txg = tx->tx_txg; 516 dsphys->ds_deadlist_obj = 517 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 518 dmu_buf_rele(dbuf, FTAG); 519 520 dmu_buf_will_dirty(dd->dd_dbuf, tx); 521 dd->dd_phys->dd_head_dataset_obj = dsobj; 522 dsl_dir_close(dd, FTAG); 523 524 VERIFY(0 == 525 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); 526 (void) dmu_objset_create_impl(dp->dp_spa, ds, DMU_OST_ZFS, tx); 527 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 528 } 529 530 uint64_t 531 dsl_dataset_create_sync(dsl_dir_t *pdd, 532 const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) 533 { 534 dsl_pool_t *dp = pdd->dd_pool; 535 dmu_buf_t *dbuf; 536 dsl_dataset_phys_t *dsphys; 537 uint64_t dsobj, ddobj; 538 objset_t *mos = dp->dp_meta_objset; 539 dsl_dir_t *dd; 540 541 ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp); 542 ASSERT(clone_parent == NULL || 543 clone_parent->ds_phys->ds_num_children > 0); 544 ASSERT(lastname[0] != '@'); 545 ASSERT(dmu_tx_is_syncing(tx)); 546 547 ddobj = dsl_dir_create_sync(pdd, lastname, tx); 548 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 549 550 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 551 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 552 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 553 dmu_buf_will_dirty(dbuf, tx); 554 dsphys = dbuf->db_data; 555 dsphys->ds_dir_obj = dd->dd_object; 556 dsphys->ds_fsid_guid = unique_create(); 557 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 558 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 559 sizeof (dsphys->ds_guid)); 560 dsphys->ds_snapnames_zapobj = 561 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 562 dsphys->ds_creation_time = gethrestime_sec(); 563 dsphys->ds_creation_txg = tx->tx_txg; 564 dsphys->ds_deadlist_obj = 565 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 566 if (clone_parent) { 567 dsphys->ds_prev_snap_obj = clone_parent->ds_object; 568 dsphys->ds_prev_snap_txg = 569 clone_parent->ds_phys->ds_creation_txg; 570 dsphys->ds_used_bytes = 571 clone_parent->ds_phys->ds_used_bytes; 572 dsphys->ds_compressed_bytes = 573 clone_parent->ds_phys->ds_compressed_bytes; 574 dsphys->ds_uncompressed_bytes = 575 clone_parent->ds_phys->ds_uncompressed_bytes; 576 dsphys->ds_bp = clone_parent->ds_phys->ds_bp; 577 578 dmu_buf_will_dirty(clone_parent->ds_dbuf, tx); 579 clone_parent->ds_phys->ds_num_children++; 580 581 dmu_buf_will_dirty(dd->dd_dbuf, tx); 582 dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object; 583 } 584 dmu_buf_rele(dbuf, FTAG); 585 586 dmu_buf_will_dirty(dd->dd_dbuf, tx); 587 dd->dd_phys->dd_head_dataset_obj = dsobj; 588 dsl_dir_close(dd, FTAG); 589 590 return (dsobj); 591 } 592 593 struct destroyarg { 594 dsl_sync_task_group_t *dstg; 595 char *snapname; 596 void *tag; 597 char *failed; 598 }; 599 600 static int 601 dsl_snapshot_destroy_one(char *name, void *arg) 602 { 603 struct destroyarg *da = arg; 604 dsl_dataset_t *ds; 605 char *cp; 606 int err; 607 608 (void) strcat(name, "@"); 609 (void) strcat(name, da->snapname); 610 err = dsl_dataset_open(name, 611 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 612 da->tag, &ds); 613 cp = strchr(name, '@'); 614 *cp = '\0'; 615 if (err == ENOENT) 616 return (0); 617 if (err) { 618 (void) strcpy(da->failed, name); 619 return (err); 620 } 621 622 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 623 dsl_dataset_destroy_sync, ds, da->tag, 0); 624 return (0); 625 } 626 627 /* 628 * Destroy 'snapname' in all descendants of 'fsname'. 629 */ 630 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 631 int 632 dsl_snapshots_destroy(char *fsname, char *snapname) 633 { 634 int err; 635 struct destroyarg da; 636 dsl_sync_task_t *dst; 637 spa_t *spa; 638 char *cp; 639 640 cp = strchr(fsname, '/'); 641 if (cp) { 642 *cp = '\0'; 643 err = spa_open(fsname, &spa, FTAG); 644 *cp = '/'; 645 } else { 646 err = spa_open(fsname, &spa, FTAG); 647 } 648 if (err) 649 return (err); 650 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 651 da.snapname = snapname; 652 da.tag = FTAG; 653 da.failed = fsname; 654 655 err = dmu_objset_find(fsname, 656 dsl_snapshot_destroy_one, &da, 0); 657 658 if (err == 0) 659 err = dsl_sync_task_group_wait(da.dstg); 660 661 for (dst = list_head(&da.dstg->dstg_tasks); dst; 662 dst = list_next(&da.dstg->dstg_tasks, dst)) { 663 dsl_dataset_t *ds = dst->dst_arg1; 664 if (dst->dst_err) { 665 dsl_dataset_name(ds, fsname); 666 cp = strchr(fsname, '@'); 667 *cp = '\0'; 668 } 669 /* 670 * If it was successful, destroy_sync would have 671 * closed the ds 672 */ 673 if (err) 674 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 675 } 676 677 dsl_sync_task_group_destroy(da.dstg); 678 spa_close(spa, FTAG); 679 return (err); 680 } 681 682 int 683 dsl_dataset_destroy(const char *name) 684 { 685 int err; 686 dsl_sync_task_group_t *dstg; 687 objset_t *os; 688 dsl_dataset_t *ds; 689 dsl_dir_t *dd; 690 uint64_t obj; 691 692 if (strchr(name, '@')) { 693 /* Destroying a snapshot is simpler */ 694 err = dsl_dataset_open(name, 695 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 696 FTAG, &ds); 697 if (err) 698 return (err); 699 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 700 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 701 ds, FTAG, 0); 702 if (err) 703 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 704 return (err); 705 } 706 707 err = dmu_objset_open(name, DMU_OST_ANY, 708 DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); 709 if (err) 710 return (err); 711 ds = os->os->os_dsl_dataset; 712 dd = ds->ds_dir; 713 714 /* 715 * Check for errors and mark this ds as inconsistent, in 716 * case we crash while freeing the objects. 717 */ 718 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 719 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 720 if (err) { 721 dmu_objset_close(os); 722 return (err); 723 } 724 725 /* 726 * remove the objects in open context, so that we won't 727 * have too much to do in syncing context. 728 */ 729 for (obj = 0; err == 0; 730 err = dmu_object_next(os, &obj, FALSE)) { 731 dmu_tx_t *tx = dmu_tx_create(os); 732 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); 733 dmu_tx_hold_bonus(tx, obj); 734 err = dmu_tx_assign(tx, TXG_WAIT); 735 if (err) { 736 /* 737 * Perhaps there is not enough disk 738 * space. Just deal with it from 739 * dsl_dataset_destroy_sync(). 740 */ 741 dmu_tx_abort(tx); 742 continue; 743 } 744 VERIFY(0 == dmu_object_free(os, obj, tx)); 745 dmu_tx_commit(tx); 746 } 747 /* Make sure it's not dirty before we finish destroying it. */ 748 txg_wait_synced(dd->dd_pool, 0); 749 750 dmu_objset_close(os); 751 if (err != ESRCH) 752 return (err); 753 754 err = dsl_dataset_open(name, 755 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 756 FTAG, &ds); 757 if (err) 758 return (err); 759 760 err = dsl_dir_open(name, FTAG, &dd, NULL); 761 if (err) { 762 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 763 return (err); 764 } 765 766 /* 767 * Blow away the dsl_dir + head dataset. 768 */ 769 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 770 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 771 dsl_dataset_destroy_sync, ds, FTAG, 0); 772 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 773 dsl_dir_destroy_sync, dd, FTAG, 0); 774 err = dsl_sync_task_group_wait(dstg); 775 dsl_sync_task_group_destroy(dstg); 776 /* if it is successful, *destroy_sync will close the ds+dd */ 777 if (err) { 778 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 779 dsl_dir_close(dd, FTAG); 780 } 781 return (err); 782 } 783 784 int 785 dsl_dataset_rollback(dsl_dataset_t *ds) 786 { 787 ASSERT3U(ds->ds_open_refcount, ==, DOS_REF_MAX); 788 return (dsl_sync_task_do(ds->ds_dir->dd_pool, 789 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, 790 ds, NULL, 0)); 791 } 792 793 void * 794 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 795 void *p, dsl_dataset_evict_func_t func) 796 { 797 void *old; 798 799 mutex_enter(&ds->ds_lock); 800 old = ds->ds_user_ptr; 801 if (old == NULL) { 802 ds->ds_user_ptr = p; 803 ds->ds_user_evict_func = func; 804 } 805 mutex_exit(&ds->ds_lock); 806 return (old); 807 } 808 809 void * 810 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 811 { 812 return (ds->ds_user_ptr); 813 } 814 815 816 void 817 dsl_dataset_get_blkptr(dsl_dataset_t *ds, blkptr_t *bp) 818 { 819 *bp = ds->ds_phys->ds_bp; 820 } 821 822 void 823 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 824 { 825 ASSERT(dmu_tx_is_syncing(tx)); 826 /* If it's the meta-objset, set dp_meta_rootbp */ 827 if (ds == NULL) { 828 tx->tx_pool->dp_meta_rootbp = *bp; 829 } else { 830 dmu_buf_will_dirty(ds->ds_dbuf, tx); 831 ds->ds_phys->ds_bp = *bp; 832 } 833 } 834 835 spa_t * 836 dsl_dataset_get_spa(dsl_dataset_t *ds) 837 { 838 return (ds->ds_dir->dd_pool->dp_spa); 839 } 840 841 void 842 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 843 { 844 dsl_pool_t *dp; 845 846 if (ds == NULL) /* this is the meta-objset */ 847 return; 848 849 ASSERT(ds->ds_user_ptr != NULL); 850 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 851 852 dp = ds->ds_dir->dd_pool; 853 854 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 855 /* up the hold count until we can be written out */ 856 dmu_buf_add_ref(ds->ds_dbuf, ds); 857 } 858 } 859 860 struct killarg { 861 uint64_t *usedp; 862 uint64_t *compressedp; 863 uint64_t *uncompressedp; 864 zio_t *zio; 865 dmu_tx_t *tx; 866 }; 867 868 static int 869 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 870 { 871 struct killarg *ka = arg; 872 blkptr_t *bp = &bc->bc_blkptr; 873 874 ASSERT3U(bc->bc_errno, ==, 0); 875 876 /* 877 * Since this callback is not called concurrently, no lock is 878 * needed on the accounting values. 879 */ 880 *ka->usedp += bp_get_dasize(spa, bp); 881 *ka->compressedp += BP_GET_PSIZE(bp); 882 *ka->uncompressedp += BP_GET_UCSIZE(bp); 883 /* XXX check for EIO? */ 884 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, 885 ARC_NOWAIT); 886 return (0); 887 } 888 889 /* ARGSUSED */ 890 static int 891 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) 892 { 893 dsl_dataset_t *ds = arg1; 894 895 /* 896 * There must be a previous snapshot. I suppose we could roll 897 * it back to being empty (and re-initialize the upper (ZPL) 898 * layer). But for now there's no way to do this via the user 899 * interface. 900 */ 901 if (ds->ds_phys->ds_prev_snap_txg == 0) 902 return (EINVAL); 903 904 /* 905 * This must not be a snapshot. 906 */ 907 if (ds->ds_phys->ds_next_snap_obj != 0) 908 return (EINVAL); 909 910 /* 911 * If we made changes this txg, traverse_dsl_dataset won't find 912 * them. Try again. 913 */ 914 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 915 return (EAGAIN); 916 917 return (0); 918 } 919 920 /* ARGSUSED */ 921 static void 922 dsl_dataset_rollback_sync(void *arg1, void *arg2, dmu_tx_t *tx) 923 { 924 dsl_dataset_t *ds = arg1; 925 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 926 927 dmu_buf_will_dirty(ds->ds_dbuf, tx); 928 929 /* Zero out the deadlist. */ 930 bplist_close(&ds->ds_deadlist); 931 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 932 ds->ds_phys->ds_deadlist_obj = 933 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 934 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 935 ds->ds_phys->ds_deadlist_obj)); 936 937 { 938 /* Free blkptrs that we gave birth to */ 939 zio_t *zio; 940 uint64_t used = 0, compressed = 0, uncompressed = 0; 941 struct killarg ka; 942 943 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 944 ZIO_FLAG_MUSTSUCCEED); 945 ka.usedp = &used; 946 ka.compressedp = &compressed; 947 ka.uncompressedp = &uncompressed; 948 ka.zio = zio; 949 ka.tx = tx; 950 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 951 ADVANCE_POST, kill_blkptr, &ka); 952 (void) zio_wait(zio); 953 954 dsl_dir_diduse_space(ds->ds_dir, 955 -used, -compressed, -uncompressed, tx); 956 } 957 958 /* Change our contents to that of the prev snapshot */ 959 ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); 960 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 961 ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; 962 ds->ds_phys->ds_compressed_bytes = 963 ds->ds_prev->ds_phys->ds_compressed_bytes; 964 ds->ds_phys->ds_uncompressed_bytes = 965 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 966 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 967 ds->ds_phys->ds_unique_bytes = 0; 968 969 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 970 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 971 } 972 973 /* ARGSUSED */ 974 static int 975 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 976 { 977 dsl_dataset_t *ds = arg1; 978 979 /* 980 * Can't delete a head dataset if there are snapshots of it. 981 * (Except if the only snapshots are from the branch we cloned 982 * from.) 983 */ 984 if (ds->ds_prev != NULL && 985 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 986 return (EINVAL); 987 988 return (0); 989 } 990 991 /* ARGSUSED */ 992 static void 993 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) 994 { 995 dsl_dataset_t *ds = arg1; 996 997 /* Mark it as inconsistent on-disk, in case we crash */ 998 dmu_buf_will_dirty(ds->ds_dbuf, tx); 999 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1000 } 1001 1002 /* ARGSUSED */ 1003 static int 1004 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1005 { 1006 dsl_dataset_t *ds = arg1; 1007 1008 /* Can't delete a branch point. */ 1009 if (ds->ds_phys->ds_num_children > 1) 1010 return (EEXIST); 1011 1012 /* 1013 * Can't delete a head dataset if there are snapshots of it. 1014 * (Except if the only snapshots are from the branch we cloned 1015 * from.) 1016 */ 1017 if (ds->ds_prev != NULL && 1018 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1019 return (EINVAL); 1020 1021 /* 1022 * If we made changes this txg, traverse_dsl_dataset won't find 1023 * them. Try again. 1024 */ 1025 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1026 return (EAGAIN); 1027 1028 /* XXX we should do some i/o error checking... */ 1029 return (0); 1030 } 1031 1032 static void 1033 dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) 1034 { 1035 dsl_dataset_t *ds = arg1; 1036 uint64_t used = 0, compressed = 0, uncompressed = 0; 1037 zio_t *zio; 1038 int err; 1039 int after_branch_point = FALSE; 1040 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1041 objset_t *mos = dp->dp_meta_objset; 1042 dsl_dataset_t *ds_prev = NULL; 1043 uint64_t obj; 1044 1045 ASSERT3U(ds->ds_open_refcount, ==, DOS_REF_MAX); 1046 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 1047 ASSERT(ds->ds_prev == NULL || 1048 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1049 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1050 1051 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1052 1053 obj = ds->ds_object; 1054 1055 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1056 if (ds->ds_prev) { 1057 ds_prev = ds->ds_prev; 1058 } else { 1059 VERIFY(0 == dsl_dataset_open_obj(dp, 1060 ds->ds_phys->ds_prev_snap_obj, NULL, 1061 DS_MODE_NONE, FTAG, &ds_prev)); 1062 } 1063 after_branch_point = 1064 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1065 1066 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1067 if (after_branch_point && 1068 ds->ds_phys->ds_next_snap_obj == 0) { 1069 /* This clone is toast. */ 1070 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1071 ds_prev->ds_phys->ds_num_children--; 1072 } else if (!after_branch_point) { 1073 ds_prev->ds_phys->ds_next_snap_obj = 1074 ds->ds_phys->ds_next_snap_obj; 1075 } 1076 } 1077 1078 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1079 1080 if (ds->ds_phys->ds_next_snap_obj != 0) { 1081 blkptr_t bp; 1082 dsl_dataset_t *ds_next; 1083 uint64_t itor = 0; 1084 1085 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1086 1087 VERIFY(0 == dsl_dataset_open_obj(dp, 1088 ds->ds_phys->ds_next_snap_obj, NULL, 1089 DS_MODE_NONE, FTAG, &ds_next)); 1090 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1091 1092 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1093 ds_next->ds_phys->ds_prev_snap_obj = 1094 ds->ds_phys->ds_prev_snap_obj; 1095 ds_next->ds_phys->ds_prev_snap_txg = 1096 ds->ds_phys->ds_prev_snap_txg; 1097 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1098 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1099 1100 /* 1101 * Transfer to our deadlist (which will become next's 1102 * new deadlist) any entries from next's current 1103 * deadlist which were born before prev, and free the 1104 * other entries. 1105 * 1106 * XXX we're doing this long task with the config lock held 1107 */ 1108 while (bplist_iterate(&ds_next->ds_deadlist, &itor, 1109 &bp) == 0) { 1110 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1111 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1112 &bp, tx)); 1113 if (ds_prev && !after_branch_point && 1114 bp.blk_birth > 1115 ds_prev->ds_phys->ds_prev_snap_txg) { 1116 ds_prev->ds_phys->ds_unique_bytes += 1117 bp_get_dasize(dp->dp_spa, &bp); 1118 } 1119 } else { 1120 used += bp_get_dasize(dp->dp_spa, &bp); 1121 compressed += BP_GET_PSIZE(&bp); 1122 uncompressed += BP_GET_UCSIZE(&bp); 1123 /* XXX check return value? */ 1124 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, 1125 &bp, NULL, NULL, ARC_NOWAIT); 1126 } 1127 } 1128 1129 /* free next's deadlist */ 1130 bplist_close(&ds_next->ds_deadlist); 1131 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1132 1133 /* set next's deadlist to our deadlist */ 1134 ds_next->ds_phys->ds_deadlist_obj = 1135 ds->ds_phys->ds_deadlist_obj; 1136 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1137 ds_next->ds_phys->ds_deadlist_obj)); 1138 ds->ds_phys->ds_deadlist_obj = 0; 1139 1140 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1141 /* 1142 * Update next's unique to include blocks which 1143 * were previously shared by only this snapshot 1144 * and it. Those blocks will be born after the 1145 * prev snap and before this snap, and will have 1146 * died after the next snap and before the one 1147 * after that (ie. be on the snap after next's 1148 * deadlist). 1149 * 1150 * XXX we're doing this long task with the 1151 * config lock held 1152 */ 1153 dsl_dataset_t *ds_after_next; 1154 1155 VERIFY(0 == dsl_dataset_open_obj(dp, 1156 ds_next->ds_phys->ds_next_snap_obj, NULL, 1157 DS_MODE_NONE, FTAG, &ds_after_next)); 1158 itor = 0; 1159 while (bplist_iterate(&ds_after_next->ds_deadlist, 1160 &itor, &bp) == 0) { 1161 if (bp.blk_birth > 1162 ds->ds_phys->ds_prev_snap_txg && 1163 bp.blk_birth <= 1164 ds->ds_phys->ds_creation_txg) { 1165 ds_next->ds_phys->ds_unique_bytes += 1166 bp_get_dasize(dp->dp_spa, &bp); 1167 } 1168 } 1169 1170 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); 1171 ASSERT3P(ds_next->ds_prev, ==, NULL); 1172 } else { 1173 /* 1174 * It would be nice to update the head dataset's 1175 * unique. To do so we would have to traverse 1176 * it for blocks born after ds_prev, which is 1177 * pretty expensive just to maintain something 1178 * for debugging purposes. 1179 */ 1180 ASSERT3P(ds_next->ds_prev, ==, ds); 1181 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, 1182 ds_next); 1183 if (ds_prev) { 1184 VERIFY(0 == dsl_dataset_open_obj(dp, 1185 ds->ds_phys->ds_prev_snap_obj, NULL, 1186 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); 1187 } else { 1188 ds_next->ds_prev = NULL; 1189 } 1190 } 1191 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); 1192 1193 /* 1194 * NB: unique_bytes is not accurate for head objsets 1195 * because we don't update it when we delete the most 1196 * recent snapshot -- see above comment. 1197 */ 1198 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1199 } else { 1200 /* 1201 * There's no next snapshot, so this is a head dataset. 1202 * Destroy the deadlist. Unless it's a clone, the 1203 * deadlist should be empty. (If it's a clone, it's 1204 * safe to ignore the deadlist contents.) 1205 */ 1206 struct killarg ka; 1207 1208 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1209 bplist_close(&ds->ds_deadlist); 1210 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1211 ds->ds_phys->ds_deadlist_obj = 0; 1212 1213 /* 1214 * Free everything that we point to (that's born after 1215 * the previous snapshot, if we are a clone) 1216 * 1217 * XXX we're doing this long task with the config lock held 1218 */ 1219 ka.usedp = &used; 1220 ka.compressedp = &compressed; 1221 ka.uncompressedp = &uncompressed; 1222 ka.zio = zio; 1223 ka.tx = tx; 1224 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1225 ADVANCE_POST, kill_blkptr, &ka); 1226 ASSERT3U(err, ==, 0); 1227 } 1228 1229 err = zio_wait(zio); 1230 ASSERT3U(err, ==, 0); 1231 1232 dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); 1233 1234 if (ds->ds_phys->ds_snapnames_zapobj) { 1235 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1236 ASSERT(err == 0); 1237 } 1238 1239 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1240 /* Erase the link in the dataset */ 1241 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1242 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1243 /* 1244 * dsl_dir_sync_destroy() called us, they'll destroy 1245 * the dataset. 1246 */ 1247 } else { 1248 /* remove from snapshot namespace */ 1249 dsl_dataset_t *ds_head; 1250 VERIFY(0 == dsl_dataset_open_obj(dp, 1251 ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, 1252 DS_MODE_NONE, FTAG, &ds_head)); 1253 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1254 #ifdef ZFS_DEBUG 1255 { 1256 uint64_t val; 1257 err = zap_lookup(mos, 1258 ds_head->ds_phys->ds_snapnames_zapobj, 1259 ds->ds_snapname, 8, 1, &val); 1260 ASSERT3U(err, ==, 0); 1261 ASSERT3U(val, ==, obj); 1262 } 1263 #endif 1264 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, 1265 ds->ds_snapname, tx); 1266 ASSERT(err == 0); 1267 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); 1268 } 1269 1270 if (ds_prev && ds->ds_prev != ds_prev) 1271 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1272 1273 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); 1274 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1275 } 1276 1277 /* ARGSUSED */ 1278 int 1279 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1280 { 1281 objset_t *os = arg1; 1282 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1283 const char *snapname = arg2; 1284 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1285 int err; 1286 uint64_t value; 1287 1288 /* 1289 * We don't allow multiple snapshots of the same txg. If there 1290 * is already one, try again. 1291 */ 1292 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1293 return (EAGAIN); 1294 1295 /* 1296 * Check for conflicting name snapshot name. 1297 */ 1298 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1299 snapname, 8, 1, &value); 1300 if (err == 0) 1301 return (EEXIST); 1302 if (err != ENOENT) 1303 return (err); 1304 1305 ds->ds_trysnap_txg = tx->tx_txg; 1306 return (0); 1307 } 1308 1309 void 1310 dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1311 { 1312 objset_t *os = arg1; 1313 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1314 const char *snapname = arg2; 1315 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1316 dmu_buf_t *dbuf; 1317 dsl_dataset_phys_t *dsphys; 1318 uint64_t dsobj; 1319 objset_t *mos = dp->dp_meta_objset; 1320 int err; 1321 1322 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1323 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1324 1325 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1326 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1327 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1328 dmu_buf_will_dirty(dbuf, tx); 1329 dsphys = dbuf->db_data; 1330 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1331 dsphys->ds_fsid_guid = unique_create(); 1332 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 1333 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1334 sizeof (dsphys->ds_guid)); 1335 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1336 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1337 dsphys->ds_next_snap_obj = ds->ds_object; 1338 dsphys->ds_num_children = 1; 1339 dsphys->ds_creation_time = gethrestime_sec(); 1340 dsphys->ds_creation_txg = tx->tx_txg; 1341 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1342 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1343 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1344 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1345 dsphys->ds_flags = ds->ds_phys->ds_flags; 1346 dsphys->ds_bp = ds->ds_phys->ds_bp; 1347 dmu_buf_rele(dbuf, FTAG); 1348 1349 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1350 if (ds->ds_prev) { 1351 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1352 ds->ds_object || 1353 ds->ds_prev->ds_phys->ds_num_children > 1); 1354 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1355 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1356 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1357 ds->ds_prev->ds_phys->ds_creation_txg); 1358 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1359 } 1360 } 1361 1362 bplist_close(&ds->ds_deadlist); 1363 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1364 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); 1365 ds->ds_phys->ds_prev_snap_obj = dsobj; 1366 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; 1367 ds->ds_phys->ds_unique_bytes = 0; 1368 ds->ds_phys->ds_deadlist_obj = 1369 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1370 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1371 ds->ds_phys->ds_deadlist_obj)); 1372 1373 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1374 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1375 snapname, 8, 1, &dsobj, tx); 1376 ASSERT(err == 0); 1377 1378 if (ds->ds_prev) 1379 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 1380 VERIFY(0 == dsl_dataset_open_obj(dp, 1381 ds->ds_phys->ds_prev_snap_obj, snapname, 1382 DS_MODE_NONE, ds, &ds->ds_prev)); 1383 } 1384 1385 void 1386 dsl_dataset_sync(dsl_dataset_t *ds, dmu_tx_t *tx) 1387 { 1388 ASSERT(dmu_tx_is_syncing(tx)); 1389 ASSERT(ds->ds_user_ptr != NULL); 1390 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1391 1392 dmu_objset_sync(ds->ds_user_ptr, tx); 1393 dsl_dir_dirty(ds->ds_dir, tx); 1394 bplist_close(&ds->ds_deadlist); 1395 1396 dmu_buf_rele(ds->ds_dbuf, ds); 1397 } 1398 1399 void 1400 dsl_dataset_stats(dsl_dataset_t *ds, dmu_objset_stats_t *dds) 1401 { 1402 /* fill in properties crap */ 1403 dsl_dir_stats(ds->ds_dir, dds); 1404 1405 if (ds->ds_phys->ds_num_children != 0) { 1406 dds->dds_is_snapshot = TRUE; 1407 dds->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1408 } 1409 1410 dds->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1411 dds->dds_last_txg = ds->ds_phys->ds_bp.blk_birth; 1412 1413 dds->dds_objects_used = ds->ds_phys->ds_bp.blk_fill; 1414 dds->dds_objects_avail = DN_MAX_OBJECT - dds->dds_objects_used; 1415 1416 /* We override the dataset's creation time... they should be the same */ 1417 dds->dds_creation_time = ds->ds_phys->ds_creation_time; 1418 dds->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1419 dds->dds_space_refd = ds->ds_phys->ds_used_bytes; 1420 dds->dds_fsid_guid = ds->ds_phys->ds_fsid_guid; 1421 1422 if (ds->ds_phys->ds_next_snap_obj) { 1423 /* 1424 * This is a snapshot; override the dd's space used with 1425 * our unique space 1426 */ 1427 dds->dds_space_used = ds->ds_phys->ds_unique_bytes; 1428 dds->dds_compressed_bytes = 1429 ds->ds_phys->ds_compressed_bytes; 1430 dds->dds_uncompressed_bytes = 1431 ds->ds_phys->ds_uncompressed_bytes; 1432 } 1433 } 1434 1435 dsl_pool_t * 1436 dsl_dataset_pool(dsl_dataset_t *ds) 1437 { 1438 return (ds->ds_dir->dd_pool); 1439 } 1440 1441 /* ARGSUSED */ 1442 static int 1443 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 1444 { 1445 dsl_dataset_t *ds = arg1; 1446 char *newsnapname = arg2; 1447 dsl_dir_t *dd = ds->ds_dir; 1448 objset_t *mos = dd->dd_pool->dp_meta_objset; 1449 dsl_dataset_t *hds; 1450 uint64_t val; 1451 int err; 1452 1453 err = dsl_dataset_open_obj(dd->dd_pool, 1454 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); 1455 if (err) 1456 return (err); 1457 1458 /* new name better not be in use */ 1459 err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj, 1460 newsnapname, 8, 1, &val); 1461 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1462 1463 if (err == 0) 1464 err = EEXIST; 1465 else if (err == ENOENT) 1466 err = 0; 1467 return (err); 1468 } 1469 1470 static void 1471 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1472 { 1473 dsl_dataset_t *ds = arg1; 1474 char *newsnapname = arg2; 1475 dsl_dir_t *dd = ds->ds_dir; 1476 objset_t *mos = dd->dd_pool->dp_meta_objset; 1477 dsl_dataset_t *hds; 1478 int err; 1479 1480 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 1481 1482 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1483 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); 1484 1485 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1486 err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj, 1487 ds->ds_snapname, tx); 1488 ASSERT3U(err, ==, 0); 1489 mutex_enter(&ds->ds_lock); 1490 (void) strcpy(ds->ds_snapname, newsnapname); 1491 mutex_exit(&ds->ds_lock); 1492 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 1493 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 1494 ASSERT3U(err, ==, 0); 1495 1496 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1497 } 1498 1499 #pragma weak dmu_objset_rename = dsl_dataset_rename 1500 int 1501 dsl_dataset_rename(const char *oldname, const char *newname) 1502 { 1503 dsl_dir_t *dd; 1504 dsl_dataset_t *ds; 1505 const char *tail; 1506 int err; 1507 1508 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 1509 if (err) 1510 return (err); 1511 if (tail == NULL) { 1512 err = dsl_dir_rename(dd, newname); 1513 dsl_dir_close(dd, FTAG); 1514 return (err); 1515 } 1516 if (tail[0] != '@') { 1517 /* the name ended in a nonexistant component */ 1518 dsl_dir_close(dd, FTAG); 1519 return (ENOENT); 1520 } 1521 1522 dsl_dir_close(dd, FTAG); 1523 1524 /* new name must be snapshot in same filesystem */ 1525 tail = strchr(newname, '@'); 1526 if (tail == NULL) 1527 return (EINVAL); 1528 tail++; 1529 if (strncmp(oldname, newname, tail - newname) != 0) 1530 return (EXDEV); 1531 1532 err = dsl_dataset_open(oldname, 1533 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); 1534 if (err) 1535 return (err); 1536 1537 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1538 dsl_dataset_snapshot_rename_check, 1539 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 1540 1541 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 1542 1543 return (err); 1544 } 1545 1546 struct promotearg { 1547 uint64_t used, comp, uncomp, unique; 1548 uint64_t newnext_obj, snapnames_obj; 1549 }; 1550 1551 static int 1552 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 1553 { 1554 dsl_dataset_t *hds = arg1; 1555 struct promotearg *pa = arg2; 1556 dsl_dir_t *dd = hds->ds_dir; 1557 dsl_pool_t *dp = hds->ds_dir->dd_pool; 1558 dsl_dir_t *pdd = NULL; 1559 dsl_dataset_t *ds = NULL; 1560 dsl_dataset_t *pivot_ds = NULL; 1561 dsl_dataset_t *newnext_ds = NULL; 1562 int err; 1563 char *name = NULL; 1564 uint64_t itor = 0; 1565 blkptr_t bp; 1566 1567 bzero(pa, sizeof (*pa)); 1568 1569 /* Check that it is a clone */ 1570 if (dd->dd_phys->dd_clone_parent_obj == 0) 1571 return (EINVAL); 1572 1573 /* Since this is so expensive, don't do the preliminary check */ 1574 if (!dmu_tx_is_syncing(tx)) 1575 return (0); 1576 1577 if (err = dsl_dataset_open_obj(dp, 1578 dd->dd_phys->dd_clone_parent_obj, 1579 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)) 1580 goto out; 1581 pdd = pivot_ds->ds_dir; 1582 1583 { 1584 dsl_dataset_t *phds; 1585 if (err = dsl_dataset_open_obj(dd->dd_pool, 1586 pdd->dd_phys->dd_head_dataset_obj, 1587 NULL, DS_MODE_NONE, FTAG, &phds)) 1588 goto out; 1589 pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; 1590 dsl_dataset_close(phds, DS_MODE_NONE, FTAG); 1591 } 1592 1593 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 1594 err = EXDEV; 1595 goto out; 1596 } 1597 1598 /* find pivot point's new next ds */ 1599 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, 1600 NULL, DS_MODE_NONE, FTAG, &newnext_ds)); 1601 while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) { 1602 dsl_dataset_t *prev; 1603 1604 if (err = dsl_dataset_open_obj(dd->dd_pool, 1605 newnext_ds->ds_phys->ds_prev_snap_obj, 1606 NULL, DS_MODE_NONE, FTAG, &prev)) 1607 goto out; 1608 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1609 newnext_ds = prev; 1610 } 1611 pa->newnext_obj = newnext_ds->ds_object; 1612 1613 /* compute pivot point's new unique space */ 1614 while ((err = bplist_iterate(&newnext_ds->ds_deadlist, 1615 &itor, &bp)) == 0) { 1616 if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg) 1617 pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); 1618 } 1619 if (err != ENOENT) 1620 goto out; 1621 1622 /* Walk the snapshots that we are moving */ 1623 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1624 ds = pivot_ds; 1625 /* CONSTCOND */ 1626 while (TRUE) { 1627 uint64_t val, dlused, dlcomp, dluncomp; 1628 dsl_dataset_t *prev; 1629 1630 /* Check that the snapshot name does not conflict */ 1631 dsl_dataset_name(ds, name); 1632 err = zap_lookup(dd->dd_pool->dp_meta_objset, 1633 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1634 8, 1, &val); 1635 if (err != ENOENT) { 1636 if (err == 0) 1637 err = EEXIST; 1638 goto out; 1639 } 1640 1641 /* 1642 * compute space to transfer. Each snapshot gave birth to: 1643 * (my used) - (prev's used) + (deadlist's used) 1644 */ 1645 pa->used += ds->ds_phys->ds_used_bytes; 1646 pa->comp += ds->ds_phys->ds_compressed_bytes; 1647 pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; 1648 1649 /* If we reach the first snapshot, we're done. */ 1650 if (ds->ds_phys->ds_prev_snap_obj == 0) 1651 break; 1652 1653 if (err = bplist_space(&ds->ds_deadlist, 1654 &dlused, &dlcomp, &dluncomp)) 1655 goto out; 1656 if (err = dsl_dataset_open_obj(dd->dd_pool, 1657 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1658 FTAG, &prev)) 1659 goto out; 1660 pa->used += dlused - prev->ds_phys->ds_used_bytes; 1661 pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; 1662 pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; 1663 1664 /* 1665 * We could be a clone of a clone. If we reach our 1666 * parent's branch point, we're done. 1667 */ 1668 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1669 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1670 break; 1671 } 1672 if (ds != pivot_ds) 1673 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1674 ds = prev; 1675 } 1676 1677 /* Check that there is enough space here */ 1678 err = dsl_dir_transfer_possible(pdd, dd, pa->used); 1679 1680 out: 1681 if (ds && ds != pivot_ds) 1682 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1683 if (pivot_ds) 1684 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 1685 if (newnext_ds) 1686 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1687 if (name) 1688 kmem_free(name, MAXPATHLEN); 1689 return (err); 1690 } 1691 1692 static void 1693 dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1694 { 1695 dsl_dataset_t *hds = arg1; 1696 struct promotearg *pa = arg2; 1697 dsl_dir_t *dd = hds->ds_dir; 1698 dsl_pool_t *dp = hds->ds_dir->dd_pool; 1699 dsl_dir_t *pdd = NULL; 1700 dsl_dataset_t *ds, *pivot_ds; 1701 char *name; 1702 1703 ASSERT(dd->dd_phys->dd_clone_parent_obj != 0); 1704 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 1705 1706 VERIFY(0 == dsl_dataset_open_obj(dp, 1707 dd->dd_phys->dd_clone_parent_obj, 1708 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)); 1709 pdd = pivot_ds->ds_dir; 1710 1711 /* move snapshots to this dir */ 1712 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1713 ds = pivot_ds; 1714 /* CONSTCOND */ 1715 while (TRUE) { 1716 dsl_dataset_t *prev; 1717 1718 /* move snap name entry */ 1719 dsl_dataset_name(ds, name); 1720 VERIFY(0 == zap_remove(dp->dp_meta_objset, 1721 pa->snapnames_obj, ds->ds_snapname, tx)); 1722 VERIFY(0 == zap_add(dp->dp_meta_objset, 1723 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1724 8, 1, &ds->ds_object, tx)); 1725 1726 /* change containing dsl_dir */ 1727 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1728 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object); 1729 ds->ds_phys->ds_dir_obj = dd->dd_object; 1730 ASSERT3P(ds->ds_dir, ==, pdd); 1731 dsl_dir_close(ds->ds_dir, ds); 1732 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 1733 NULL, ds, &ds->ds_dir)); 1734 1735 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 1736 1737 if (ds->ds_phys->ds_prev_snap_obj == 0) 1738 break; 1739 1740 VERIFY(0 == dsl_dataset_open_obj(dp, 1741 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1742 FTAG, &prev)); 1743 1744 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1745 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1746 break; 1747 } 1748 if (ds != pivot_ds) 1749 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1750 ds = prev; 1751 } 1752 if (ds != pivot_ds) 1753 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1754 1755 /* change pivot point's next snap */ 1756 dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx); 1757 pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; 1758 1759 /* change clone_parent-age */ 1760 dmu_buf_will_dirty(dd->dd_dbuf, tx); 1761 ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object); 1762 dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj; 1763 dmu_buf_will_dirty(pdd->dd_dbuf, tx); 1764 pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object; 1765 1766 /* change space accounting */ 1767 dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx); 1768 dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); 1769 pivot_ds->ds_phys->ds_unique_bytes = pa->unique; 1770 1771 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 1772 kmem_free(name, MAXPATHLEN); 1773 } 1774 1775 int 1776 dsl_dataset_promote(const char *name) 1777 { 1778 dsl_dataset_t *ds; 1779 int err; 1780 dmu_object_info_t doi; 1781 struct promotearg pa; 1782 1783 err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); 1784 if (err) 1785 return (err); 1786 1787 err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, 1788 ds->ds_phys->ds_snapnames_zapobj, &doi); 1789 if (err) { 1790 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1791 return (err); 1792 } 1793 1794 /* 1795 * Add in 128x the snapnames zapobj size, since we will be moving 1796 * a bunch of snapnames to the promoted ds, and dirtying their 1797 * bonus buffers. 1798 */ 1799 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1800 dsl_dataset_promote_check, 1801 dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); 1802 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1803 return (err); 1804 } 1805