1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_prop.h> 32 #include <sys/dsl_synctask.h> 33 #include <sys/dmu_traverse.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/arc.h> 36 #include <sys/zio.h> 37 #include <sys/zap.h> 38 #include <sys/unique.h> 39 #include <sys/zfs_context.h> 40 41 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 42 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 43 static dsl_checkfunc_t dsl_dataset_rollback_check; 44 static dsl_syncfunc_t dsl_dataset_rollback_sync; 45 static dsl_checkfunc_t dsl_dataset_destroy_check; 46 static dsl_syncfunc_t dsl_dataset_destroy_sync; 47 48 #define DOS_REF_MAX (1ULL << 62) 49 50 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 51 52 /* 53 * We use weighted reference counts to express the various forms of exclusion 54 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open 55 * is DOS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. 56 * This makes the exclusion logic simple: the total refcnt for all opens cannot 57 * exceed DOS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their 58 * weight (DOS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume 59 * just over half of the refcnt space, so there can't be more than one, but it 60 * can peacefully coexist with any number of STANDARD opens. 61 */ 62 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { 63 0, /* DOS_MODE_NONE - invalid */ 64 1, /* DOS_MODE_STANDARD - unlimited number */ 65 (DOS_REF_MAX >> 1) + 1, /* DOS_MODE_PRIMARY - only one of these */ 66 DOS_REF_MAX /* DOS_MODE_EXCLUSIVE - no other opens */ 67 }; 68 69 70 void 71 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 72 { 73 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 74 int compressed = BP_GET_PSIZE(bp); 75 int uncompressed = BP_GET_UCSIZE(bp); 76 77 dprintf_bp(bp, "born, ds=%p\n", ds); 78 79 ASSERT(dmu_tx_is_syncing(tx)); 80 /* It could have been compressed away to nothing */ 81 if (BP_IS_HOLE(bp)) 82 return; 83 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 84 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 85 if (ds == NULL) { 86 /* 87 * Account for the meta-objset space in its placeholder 88 * dsl_dir. 89 */ 90 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 91 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 92 used, compressed, uncompressed, tx); 93 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 94 return; 95 } 96 dmu_buf_will_dirty(ds->ds_dbuf, tx); 97 mutex_enter(&ds->ds_lock); 98 ds->ds_phys->ds_used_bytes += used; 99 ds->ds_phys->ds_compressed_bytes += compressed; 100 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 101 ds->ds_phys->ds_unique_bytes += used; 102 mutex_exit(&ds->ds_lock); 103 dsl_dir_diduse_space(ds->ds_dir, 104 used, compressed, uncompressed, tx); 105 } 106 107 void 108 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 109 { 110 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 111 int compressed = BP_GET_PSIZE(bp); 112 int uncompressed = BP_GET_UCSIZE(bp); 113 114 ASSERT(dmu_tx_is_syncing(tx)); 115 if (BP_IS_HOLE(bp)) 116 return; 117 118 ASSERT(used > 0); 119 if (ds == NULL) { 120 /* 121 * Account for the meta-objset space in its placeholder 122 * dataset. 123 */ 124 /* XXX this can fail, what do we do when it does? */ 125 (void) arc_free(NULL, tx->tx_pool->dp_spa, 126 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); 127 bzero(bp, sizeof (blkptr_t)); 128 129 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 130 -used, -compressed, -uncompressed, tx); 131 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 132 return; 133 } 134 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 135 136 dmu_buf_will_dirty(ds->ds_dbuf, tx); 137 138 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 139 dprintf_bp(bp, "freeing: %s", ""); 140 /* XXX check return code? */ 141 (void) arc_free(NULL, tx->tx_pool->dp_spa, 142 tx->tx_txg, bp, NULL, NULL, ARC_WAIT); 143 144 mutex_enter(&ds->ds_lock); 145 /* XXX unique_bytes is not accurate for head datasets */ 146 /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */ 147 ds->ds_phys->ds_unique_bytes -= used; 148 mutex_exit(&ds->ds_lock); 149 dsl_dir_diduse_space(ds->ds_dir, 150 -used, -compressed, -uncompressed, tx); 151 } else { 152 dprintf_bp(bp, "putting on dead list: %s", ""); 153 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 154 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 155 if (ds->ds_phys->ds_prev_snap_obj != 0) { 156 ASSERT3U(ds->ds_prev->ds_object, ==, 157 ds->ds_phys->ds_prev_snap_obj); 158 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 159 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 160 ds->ds_object && bp->blk_birth > 161 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 162 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 163 mutex_enter(&ds->ds_prev->ds_lock); 164 ds->ds_prev->ds_phys->ds_unique_bytes += 165 used; 166 mutex_exit(&ds->ds_prev->ds_lock); 167 } 168 } 169 } 170 bzero(bp, sizeof (blkptr_t)); 171 mutex_enter(&ds->ds_lock); 172 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 173 ds->ds_phys->ds_used_bytes -= used; 174 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 175 ds->ds_phys->ds_compressed_bytes -= compressed; 176 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 177 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 178 mutex_exit(&ds->ds_lock); 179 } 180 181 uint64_t 182 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 183 { 184 if (ds == NULL) 185 return (0); 186 /* 187 * The snapshot creation could fail, but that would cause an 188 * incorrect FALSE return, which would only result in an 189 * overestimation of the amount of space that an operation would 190 * consume, which is OK. 191 * 192 * There's also a small window where we could miss a pending 193 * snapshot, because we could set the sync task in the quiescing 194 * phase. So this should only be used as a guess. 195 */ 196 return (MAX(ds->ds_phys->ds_prev_snap_txg, ds->ds_trysnap_txg)); 197 } 198 199 int 200 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 201 { 202 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 203 } 204 205 /* ARGSUSED */ 206 static void 207 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 208 { 209 dsl_dataset_t *ds = dsv; 210 dsl_pool_t *dp = ds->ds_dir->dd_pool; 211 212 /* open_refcount == DOS_REF_MAX when deleting */ 213 ASSERT(ds->ds_open_refcount == 0 || 214 ds->ds_open_refcount == DOS_REF_MAX); 215 216 dprintf_ds(ds, "evicting %s\n", ""); 217 218 unique_remove(ds->ds_phys->ds_fsid_guid); 219 220 if (ds->ds_user_ptr != NULL) 221 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 222 223 if (ds->ds_prev) { 224 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 225 ds->ds_prev = NULL; 226 } 227 228 bplist_close(&ds->ds_deadlist); 229 dsl_dir_close(ds->ds_dir, ds); 230 231 if (list_link_active(&ds->ds_synced_link)) 232 list_remove(&dp->dp_synced_objsets, ds); 233 234 kmem_free(ds, sizeof (dsl_dataset_t)); 235 } 236 237 static int 238 dsl_dataset_get_snapname(dsl_dataset_t *ds) 239 { 240 dsl_dataset_phys_t *headphys; 241 int err; 242 dmu_buf_t *headdbuf; 243 dsl_pool_t *dp = ds->ds_dir->dd_pool; 244 objset_t *mos = dp->dp_meta_objset; 245 246 if (ds->ds_snapname[0]) 247 return (0); 248 if (ds->ds_phys->ds_next_snap_obj == 0) 249 return (0); 250 251 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 252 FTAG, &headdbuf); 253 if (err) 254 return (err); 255 headphys = headdbuf->db_data; 256 err = zap_value_search(dp->dp_meta_objset, 257 headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname); 258 dmu_buf_rele(headdbuf, FTAG); 259 return (err); 260 } 261 262 int 263 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, 264 int mode, void *tag, dsl_dataset_t **dsp) 265 { 266 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 267 objset_t *mos = dp->dp_meta_objset; 268 dmu_buf_t *dbuf; 269 dsl_dataset_t *ds; 270 int err; 271 272 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 273 dsl_pool_sync_context(dp)); 274 275 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 276 if (err) 277 return (err); 278 ds = dmu_buf_get_user(dbuf); 279 if (ds == NULL) { 280 dsl_dataset_t *winner; 281 282 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 283 ds->ds_dbuf = dbuf; 284 ds->ds_object = dsobj; 285 ds->ds_phys = dbuf->db_data; 286 287 err = bplist_open(&ds->ds_deadlist, 288 mos, ds->ds_phys->ds_deadlist_obj); 289 if (err == 0) { 290 err = dsl_dir_open_obj(dp, 291 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 292 } 293 if (err) { 294 /* 295 * we don't really need to close the blist if we 296 * just opened it. 297 */ 298 kmem_free(ds, sizeof (dsl_dataset_t)); 299 dmu_buf_rele(dbuf, tag); 300 return (err); 301 } 302 303 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { 304 ds->ds_snapname[0] = '\0'; 305 if (ds->ds_phys->ds_prev_snap_obj) { 306 err = dsl_dataset_open_obj(dp, 307 ds->ds_phys->ds_prev_snap_obj, NULL, 308 DS_MODE_NONE, ds, &ds->ds_prev); 309 } 310 } else { 311 if (snapname) { 312 #ifdef ZFS_DEBUG 313 dsl_dataset_phys_t *headphys; 314 dmu_buf_t *headdbuf; 315 err = dmu_bonus_hold(mos, 316 ds->ds_dir->dd_phys->dd_head_dataset_obj, 317 FTAG, &headdbuf); 318 if (err == 0) { 319 headphys = headdbuf->db_data; 320 uint64_t foundobj; 321 err = zap_lookup(dp->dp_meta_objset, 322 headphys->ds_snapnames_zapobj, 323 snapname, sizeof (foundobj), 1, 324 &foundobj); 325 ASSERT3U(foundobj, ==, dsobj); 326 dmu_buf_rele(headdbuf, FTAG); 327 } 328 #endif 329 (void) strcat(ds->ds_snapname, snapname); 330 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 331 err = dsl_dataset_get_snapname(ds); 332 } 333 } 334 335 if (err == 0) { 336 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 337 dsl_dataset_evict); 338 } 339 if (err || winner) { 340 bplist_close(&ds->ds_deadlist); 341 if (ds->ds_prev) { 342 dsl_dataset_close(ds->ds_prev, 343 DS_MODE_NONE, ds); 344 } 345 dsl_dir_close(ds->ds_dir, ds); 346 kmem_free(ds, sizeof (dsl_dataset_t)); 347 if (err) { 348 dmu_buf_rele(dbuf, tag); 349 return (err); 350 } 351 ds = winner; 352 } else { 353 uint64_t new = 354 unique_insert(ds->ds_phys->ds_fsid_guid); 355 if (new != ds->ds_phys->ds_fsid_guid) { 356 /* XXX it won't necessarily be synced... */ 357 ds->ds_phys->ds_fsid_guid = new; 358 } 359 } 360 } 361 ASSERT3P(ds->ds_dbuf, ==, dbuf); 362 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 363 364 mutex_enter(&ds->ds_lock); 365 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && 366 (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && 367 !DS_MODE_IS_INCONSISTENT(mode)) || 368 (ds->ds_open_refcount + weight > DOS_REF_MAX)) { 369 mutex_exit(&ds->ds_lock); 370 dsl_dataset_close(ds, DS_MODE_NONE, tag); 371 return (EBUSY); 372 } 373 ds->ds_open_refcount += weight; 374 mutex_exit(&ds->ds_lock); 375 376 *dsp = ds; 377 return (0); 378 } 379 380 int 381 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, 382 void *tag, dsl_dataset_t **dsp) 383 { 384 dsl_dir_t *dd; 385 dsl_pool_t *dp; 386 const char *tail; 387 uint64_t obj; 388 dsl_dataset_t *ds = NULL; 389 int err = 0; 390 391 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); 392 if (err) 393 return (err); 394 395 dp = dd->dd_pool; 396 obj = dd->dd_phys->dd_head_dataset_obj; 397 rw_enter(&dp->dp_config_rwlock, RW_READER); 398 if (obj == 0) { 399 /* A dataset with no associated objset */ 400 err = ENOENT; 401 goto out; 402 } 403 404 if (tail != NULL) { 405 objset_t *mos = dp->dp_meta_objset; 406 407 err = dsl_dataset_open_obj(dp, obj, NULL, 408 DS_MODE_NONE, tag, &ds); 409 if (err) 410 goto out; 411 obj = ds->ds_phys->ds_snapnames_zapobj; 412 dsl_dataset_close(ds, DS_MODE_NONE, tag); 413 ds = NULL; 414 415 if (tail[0] != '@') { 416 err = ENOENT; 417 goto out; 418 } 419 tail++; 420 421 /* Look for a snapshot */ 422 if (!DS_MODE_IS_READONLY(mode)) { 423 err = EROFS; 424 goto out; 425 } 426 dprintf("looking for snapshot '%s'\n", tail); 427 err = zap_lookup(mos, obj, tail, 8, 1, &obj); 428 if (err) 429 goto out; 430 } 431 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); 432 433 out: 434 rw_exit(&dp->dp_config_rwlock); 435 dsl_dir_close(dd, FTAG); 436 437 ASSERT3U((err == 0), ==, (ds != NULL)); 438 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ 439 440 *dsp = ds; 441 return (err); 442 } 443 444 int 445 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) 446 { 447 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); 448 } 449 450 void 451 dsl_dataset_name(dsl_dataset_t *ds, char *name) 452 { 453 if (ds == NULL) { 454 (void) strcpy(name, "mos"); 455 } else { 456 dsl_dir_name(ds->ds_dir, name); 457 VERIFY(0 == dsl_dataset_get_snapname(ds)); 458 if (ds->ds_snapname[0]) { 459 (void) strcat(name, "@"); 460 if (!MUTEX_HELD(&ds->ds_lock)) { 461 /* 462 * We use a "recursive" mutex so that we 463 * can call dprintf_ds() with ds_lock held. 464 */ 465 mutex_enter(&ds->ds_lock); 466 (void) strcat(name, ds->ds_snapname); 467 mutex_exit(&ds->ds_lock); 468 } else { 469 (void) strcat(name, ds->ds_snapname); 470 } 471 } 472 } 473 } 474 475 void 476 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) 477 { 478 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 479 mutex_enter(&ds->ds_lock); 480 ASSERT3U(ds->ds_open_refcount, >=, weight); 481 ds->ds_open_refcount -= weight; 482 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", 483 mode, ds->ds_open_refcount); 484 mutex_exit(&ds->ds_lock); 485 486 dmu_buf_rele(ds->ds_dbuf, tag); 487 } 488 489 void 490 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) 491 { 492 objset_t *mos = dp->dp_meta_objset; 493 dmu_buf_t *dbuf; 494 dsl_dataset_phys_t *dsphys; 495 dsl_dataset_t *ds; 496 uint64_t dsobj; 497 dsl_dir_t *dd; 498 499 dsl_dir_create_root(mos, ddobjp, tx); 500 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); 501 502 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 503 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 504 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 505 dmu_buf_will_dirty(dbuf, tx); 506 dsphys = dbuf->db_data; 507 dsphys->ds_dir_obj = dd->dd_object; 508 dsphys->ds_fsid_guid = unique_create(); 509 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 510 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 511 sizeof (dsphys->ds_guid)); 512 dsphys->ds_snapnames_zapobj = 513 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 514 dsphys->ds_creation_time = gethrestime_sec(); 515 dsphys->ds_creation_txg = tx->tx_txg; 516 dsphys->ds_deadlist_obj = 517 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 518 dmu_buf_rele(dbuf, FTAG); 519 520 dmu_buf_will_dirty(dd->dd_dbuf, tx); 521 dd->dd_phys->dd_head_dataset_obj = dsobj; 522 dsl_dir_close(dd, FTAG); 523 524 VERIFY(0 == 525 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); 526 (void) dmu_objset_create_impl(dp->dp_spa, ds, DMU_OST_ZFS, tx); 527 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 528 } 529 530 uint64_t 531 dsl_dataset_create_sync(dsl_dir_t *pdd, 532 const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) 533 { 534 dsl_pool_t *dp = pdd->dd_pool; 535 dmu_buf_t *dbuf; 536 dsl_dataset_phys_t *dsphys; 537 uint64_t dsobj, ddobj; 538 objset_t *mos = dp->dp_meta_objset; 539 dsl_dir_t *dd; 540 541 ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp); 542 ASSERT(clone_parent == NULL || 543 clone_parent->ds_phys->ds_num_children > 0); 544 ASSERT(lastname[0] != '@'); 545 ASSERT(dmu_tx_is_syncing(tx)); 546 547 ddobj = dsl_dir_create_sync(pdd, lastname, tx); 548 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 549 550 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 551 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 552 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 553 dmu_buf_will_dirty(dbuf, tx); 554 dsphys = dbuf->db_data; 555 dsphys->ds_dir_obj = dd->dd_object; 556 dsphys->ds_fsid_guid = unique_create(); 557 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 558 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 559 sizeof (dsphys->ds_guid)); 560 dsphys->ds_snapnames_zapobj = 561 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 562 dsphys->ds_creation_time = gethrestime_sec(); 563 dsphys->ds_creation_txg = tx->tx_txg; 564 dsphys->ds_deadlist_obj = 565 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 566 if (clone_parent) { 567 dsphys->ds_prev_snap_obj = clone_parent->ds_object; 568 dsphys->ds_prev_snap_txg = 569 clone_parent->ds_phys->ds_creation_txg; 570 dsphys->ds_used_bytes = 571 clone_parent->ds_phys->ds_used_bytes; 572 dsphys->ds_compressed_bytes = 573 clone_parent->ds_phys->ds_compressed_bytes; 574 dsphys->ds_uncompressed_bytes = 575 clone_parent->ds_phys->ds_uncompressed_bytes; 576 dsphys->ds_bp = clone_parent->ds_phys->ds_bp; 577 578 dmu_buf_will_dirty(clone_parent->ds_dbuf, tx); 579 clone_parent->ds_phys->ds_num_children++; 580 581 dmu_buf_will_dirty(dd->dd_dbuf, tx); 582 dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object; 583 } 584 dmu_buf_rele(dbuf, FTAG); 585 586 dmu_buf_will_dirty(dd->dd_dbuf, tx); 587 dd->dd_phys->dd_head_dataset_obj = dsobj; 588 dsl_dir_close(dd, FTAG); 589 590 return (dsobj); 591 } 592 593 struct destroyarg { 594 dsl_sync_task_group_t *dstg; 595 char *snapname; 596 void *tag; 597 char *failed; 598 }; 599 600 static int 601 dsl_snapshot_destroy_one(char *name, void *arg) 602 { 603 struct destroyarg *da = arg; 604 dsl_dataset_t *ds; 605 char *cp; 606 int err; 607 608 (void) strcat(name, "@"); 609 (void) strcat(name, da->snapname); 610 err = dsl_dataset_open(name, 611 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 612 da->tag, &ds); 613 cp = strchr(name, '@'); 614 *cp = '\0'; 615 if (err == ENOENT) 616 return (0); 617 if (err) { 618 (void) strcpy(da->failed, name); 619 return (err); 620 } 621 622 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 623 dsl_dataset_destroy_sync, ds, da->tag, 0); 624 return (0); 625 } 626 627 /* 628 * Destroy 'snapname' in all descendants of 'fsname'. 629 */ 630 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 631 int 632 dsl_snapshots_destroy(char *fsname, char *snapname) 633 { 634 int err; 635 struct destroyarg da; 636 dsl_sync_task_t *dst; 637 spa_t *spa; 638 char *cp; 639 640 cp = strchr(fsname, '/'); 641 if (cp) { 642 *cp = '\0'; 643 err = spa_open(fsname, &spa, FTAG); 644 *cp = '/'; 645 } else { 646 err = spa_open(fsname, &spa, FTAG); 647 } 648 if (err) 649 return (err); 650 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 651 da.snapname = snapname; 652 da.tag = FTAG; 653 da.failed = fsname; 654 655 err = dmu_objset_find(fsname, 656 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 657 658 if (err == 0) 659 err = dsl_sync_task_group_wait(da.dstg); 660 661 for (dst = list_head(&da.dstg->dstg_tasks); dst; 662 dst = list_next(&da.dstg->dstg_tasks, dst)) { 663 dsl_dataset_t *ds = dst->dst_arg1; 664 if (dst->dst_err) { 665 dsl_dataset_name(ds, fsname); 666 cp = strchr(fsname, '@'); 667 *cp = '\0'; 668 } 669 /* 670 * If it was successful, destroy_sync would have 671 * closed the ds 672 */ 673 if (err) 674 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 675 } 676 677 dsl_sync_task_group_destroy(da.dstg); 678 spa_close(spa, FTAG); 679 return (err); 680 } 681 682 int 683 dsl_dataset_destroy(const char *name) 684 { 685 int err; 686 dsl_sync_task_group_t *dstg; 687 objset_t *os; 688 dsl_dataset_t *ds; 689 dsl_dir_t *dd; 690 uint64_t obj; 691 692 if (strchr(name, '@')) { 693 /* Destroying a snapshot is simpler */ 694 err = dsl_dataset_open(name, 695 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 696 FTAG, &ds); 697 if (err) 698 return (err); 699 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 700 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 701 ds, FTAG, 0); 702 if (err) 703 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 704 return (err); 705 } 706 707 err = dmu_objset_open(name, DMU_OST_ANY, 708 DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); 709 if (err) 710 return (err); 711 ds = os->os->os_dsl_dataset; 712 dd = ds->ds_dir; 713 714 /* 715 * Check for errors and mark this ds as inconsistent, in 716 * case we crash while freeing the objects. 717 */ 718 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 719 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 720 if (err) { 721 dmu_objset_close(os); 722 return (err); 723 } 724 725 /* 726 * remove the objects in open context, so that we won't 727 * have too much to do in syncing context. 728 */ 729 for (obj = 0; err == 0; 730 err = dmu_object_next(os, &obj, FALSE)) { 731 dmu_tx_t *tx = dmu_tx_create(os); 732 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); 733 dmu_tx_hold_bonus(tx, obj); 734 err = dmu_tx_assign(tx, TXG_WAIT); 735 if (err) { 736 /* 737 * Perhaps there is not enough disk 738 * space. Just deal with it from 739 * dsl_dataset_destroy_sync(). 740 */ 741 dmu_tx_abort(tx); 742 continue; 743 } 744 VERIFY(0 == dmu_object_free(os, obj, tx)); 745 dmu_tx_commit(tx); 746 } 747 /* Make sure it's not dirty before we finish destroying it. */ 748 txg_wait_synced(dd->dd_pool, 0); 749 750 dmu_objset_close(os); 751 if (err != ESRCH) 752 return (err); 753 754 err = dsl_dataset_open(name, 755 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 756 FTAG, &ds); 757 if (err) 758 return (err); 759 760 err = dsl_dir_open(name, FTAG, &dd, NULL); 761 if (err) { 762 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 763 return (err); 764 } 765 766 /* 767 * Blow away the dsl_dir + head dataset. 768 */ 769 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 770 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 771 dsl_dataset_destroy_sync, ds, FTAG, 0); 772 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 773 dsl_dir_destroy_sync, dd, FTAG, 0); 774 err = dsl_sync_task_group_wait(dstg); 775 dsl_sync_task_group_destroy(dstg); 776 /* if it is successful, *destroy_sync will close the ds+dd */ 777 if (err) { 778 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 779 dsl_dir_close(dd, FTAG); 780 } 781 return (err); 782 } 783 784 int 785 dsl_dataset_rollback(dsl_dataset_t *ds) 786 { 787 ASSERT3U(ds->ds_open_refcount, ==, DOS_REF_MAX); 788 return (dsl_sync_task_do(ds->ds_dir->dd_pool, 789 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, 790 ds, NULL, 0)); 791 } 792 793 void * 794 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 795 void *p, dsl_dataset_evict_func_t func) 796 { 797 void *old; 798 799 mutex_enter(&ds->ds_lock); 800 old = ds->ds_user_ptr; 801 if (old == NULL) { 802 ds->ds_user_ptr = p; 803 ds->ds_user_evict_func = func; 804 } 805 mutex_exit(&ds->ds_lock); 806 return (old); 807 } 808 809 void * 810 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 811 { 812 return (ds->ds_user_ptr); 813 } 814 815 816 void 817 dsl_dataset_get_blkptr(dsl_dataset_t *ds, blkptr_t *bp) 818 { 819 *bp = ds->ds_phys->ds_bp; 820 } 821 822 void 823 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 824 { 825 ASSERT(dmu_tx_is_syncing(tx)); 826 /* If it's the meta-objset, set dp_meta_rootbp */ 827 if (ds == NULL) { 828 tx->tx_pool->dp_meta_rootbp = *bp; 829 } else { 830 dmu_buf_will_dirty(ds->ds_dbuf, tx); 831 ds->ds_phys->ds_bp = *bp; 832 } 833 } 834 835 spa_t * 836 dsl_dataset_get_spa(dsl_dataset_t *ds) 837 { 838 return (ds->ds_dir->dd_pool->dp_spa); 839 } 840 841 void 842 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 843 { 844 dsl_pool_t *dp; 845 846 if (ds == NULL) /* this is the meta-objset */ 847 return; 848 849 ASSERT(ds->ds_user_ptr != NULL); 850 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 851 852 dp = ds->ds_dir->dd_pool; 853 854 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 855 /* up the hold count until we can be written out */ 856 dmu_buf_add_ref(ds->ds_dbuf, ds); 857 } 858 } 859 860 struct killarg { 861 uint64_t *usedp; 862 uint64_t *compressedp; 863 uint64_t *uncompressedp; 864 zio_t *zio; 865 dmu_tx_t *tx; 866 }; 867 868 static int 869 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 870 { 871 struct killarg *ka = arg; 872 blkptr_t *bp = &bc->bc_blkptr; 873 874 ASSERT3U(bc->bc_errno, ==, 0); 875 876 /* 877 * Since this callback is not called concurrently, no lock is 878 * needed on the accounting values. 879 */ 880 *ka->usedp += bp_get_dasize(spa, bp); 881 *ka->compressedp += BP_GET_PSIZE(bp); 882 *ka->uncompressedp += BP_GET_UCSIZE(bp); 883 /* XXX check for EIO? */ 884 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, 885 ARC_NOWAIT); 886 return (0); 887 } 888 889 /* ARGSUSED */ 890 static int 891 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) 892 { 893 dsl_dataset_t *ds = arg1; 894 895 /* 896 * There must be a previous snapshot. I suppose we could roll 897 * it back to being empty (and re-initialize the upper (ZPL) 898 * layer). But for now there's no way to do this via the user 899 * interface. 900 */ 901 if (ds->ds_phys->ds_prev_snap_txg == 0) 902 return (EINVAL); 903 904 /* 905 * This must not be a snapshot. 906 */ 907 if (ds->ds_phys->ds_next_snap_obj != 0) 908 return (EINVAL); 909 910 /* 911 * If we made changes this txg, traverse_dsl_dataset won't find 912 * them. Try again. 913 */ 914 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 915 return (EAGAIN); 916 917 return (0); 918 } 919 920 /* ARGSUSED */ 921 static void 922 dsl_dataset_rollback_sync(void *arg1, void *arg2, dmu_tx_t *tx) 923 { 924 dsl_dataset_t *ds = arg1; 925 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 926 927 dmu_buf_will_dirty(ds->ds_dbuf, tx); 928 929 /* Zero out the deadlist. */ 930 bplist_close(&ds->ds_deadlist); 931 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 932 ds->ds_phys->ds_deadlist_obj = 933 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 934 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 935 ds->ds_phys->ds_deadlist_obj)); 936 937 { 938 /* Free blkptrs that we gave birth to */ 939 zio_t *zio; 940 uint64_t used = 0, compressed = 0, uncompressed = 0; 941 struct killarg ka; 942 943 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 944 ZIO_FLAG_MUSTSUCCEED); 945 ka.usedp = &used; 946 ka.compressedp = &compressed; 947 ka.uncompressedp = &uncompressed; 948 ka.zio = zio; 949 ka.tx = tx; 950 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 951 ADVANCE_POST, kill_blkptr, &ka); 952 (void) zio_wait(zio); 953 954 dsl_dir_diduse_space(ds->ds_dir, 955 -used, -compressed, -uncompressed, tx); 956 } 957 958 /* Change our contents to that of the prev snapshot */ 959 ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); 960 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 961 ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; 962 ds->ds_phys->ds_compressed_bytes = 963 ds->ds_prev->ds_phys->ds_compressed_bytes; 964 ds->ds_phys->ds_uncompressed_bytes = 965 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 966 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 967 ds->ds_phys->ds_unique_bytes = 0; 968 969 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 970 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 971 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 972 } 973 } 974 975 /* ARGSUSED */ 976 static int 977 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 978 { 979 dsl_dataset_t *ds = arg1; 980 981 /* 982 * Can't delete a head dataset if there are snapshots of it. 983 * (Except if the only snapshots are from the branch we cloned 984 * from.) 985 */ 986 if (ds->ds_prev != NULL && 987 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 988 return (EINVAL); 989 990 return (0); 991 } 992 993 /* ARGSUSED */ 994 static void 995 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) 996 { 997 dsl_dataset_t *ds = arg1; 998 999 /* Mark it as inconsistent on-disk, in case we crash */ 1000 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1001 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1002 } 1003 1004 /* ARGSUSED */ 1005 static int 1006 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1007 { 1008 dsl_dataset_t *ds = arg1; 1009 1010 /* Can't delete a branch point. */ 1011 if (ds->ds_phys->ds_num_children > 1) 1012 return (EEXIST); 1013 1014 /* 1015 * Can't delete a head dataset if there are snapshots of it. 1016 * (Except if the only snapshots are from the branch we cloned 1017 * from.) 1018 */ 1019 if (ds->ds_prev != NULL && 1020 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1021 return (EINVAL); 1022 1023 /* 1024 * If we made changes this txg, traverse_dsl_dataset won't find 1025 * them. Try again. 1026 */ 1027 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1028 return (EAGAIN); 1029 1030 /* XXX we should do some i/o error checking... */ 1031 return (0); 1032 } 1033 1034 static void 1035 dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) 1036 { 1037 dsl_dataset_t *ds = arg1; 1038 uint64_t used = 0, compressed = 0, uncompressed = 0; 1039 zio_t *zio; 1040 int err; 1041 int after_branch_point = FALSE; 1042 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1043 objset_t *mos = dp->dp_meta_objset; 1044 dsl_dataset_t *ds_prev = NULL; 1045 uint64_t obj; 1046 1047 ASSERT3U(ds->ds_open_refcount, ==, DOS_REF_MAX); 1048 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 1049 ASSERT(ds->ds_prev == NULL || 1050 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1051 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1052 1053 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1054 1055 obj = ds->ds_object; 1056 1057 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1058 if (ds->ds_prev) { 1059 ds_prev = ds->ds_prev; 1060 } else { 1061 VERIFY(0 == dsl_dataset_open_obj(dp, 1062 ds->ds_phys->ds_prev_snap_obj, NULL, 1063 DS_MODE_NONE, FTAG, &ds_prev)); 1064 } 1065 after_branch_point = 1066 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1067 1068 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1069 if (after_branch_point && 1070 ds->ds_phys->ds_next_snap_obj == 0) { 1071 /* This clone is toast. */ 1072 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1073 ds_prev->ds_phys->ds_num_children--; 1074 } else if (!after_branch_point) { 1075 ds_prev->ds_phys->ds_next_snap_obj = 1076 ds->ds_phys->ds_next_snap_obj; 1077 } 1078 } 1079 1080 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1081 1082 if (ds->ds_phys->ds_next_snap_obj != 0) { 1083 blkptr_t bp; 1084 dsl_dataset_t *ds_next; 1085 uint64_t itor = 0; 1086 1087 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1088 1089 VERIFY(0 == dsl_dataset_open_obj(dp, 1090 ds->ds_phys->ds_next_snap_obj, NULL, 1091 DS_MODE_NONE, FTAG, &ds_next)); 1092 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1093 1094 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1095 ds_next->ds_phys->ds_prev_snap_obj = 1096 ds->ds_phys->ds_prev_snap_obj; 1097 ds_next->ds_phys->ds_prev_snap_txg = 1098 ds->ds_phys->ds_prev_snap_txg; 1099 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1100 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1101 1102 /* 1103 * Transfer to our deadlist (which will become next's 1104 * new deadlist) any entries from next's current 1105 * deadlist which were born before prev, and free the 1106 * other entries. 1107 * 1108 * XXX we're doing this long task with the config lock held 1109 */ 1110 while (bplist_iterate(&ds_next->ds_deadlist, &itor, 1111 &bp) == 0) { 1112 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1113 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1114 &bp, tx)); 1115 if (ds_prev && !after_branch_point && 1116 bp.blk_birth > 1117 ds_prev->ds_phys->ds_prev_snap_txg) { 1118 ds_prev->ds_phys->ds_unique_bytes += 1119 bp_get_dasize(dp->dp_spa, &bp); 1120 } 1121 } else { 1122 used += bp_get_dasize(dp->dp_spa, &bp); 1123 compressed += BP_GET_PSIZE(&bp); 1124 uncompressed += BP_GET_UCSIZE(&bp); 1125 /* XXX check return value? */ 1126 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, 1127 &bp, NULL, NULL, ARC_NOWAIT); 1128 } 1129 } 1130 1131 /* free next's deadlist */ 1132 bplist_close(&ds_next->ds_deadlist); 1133 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1134 1135 /* set next's deadlist to our deadlist */ 1136 ds_next->ds_phys->ds_deadlist_obj = 1137 ds->ds_phys->ds_deadlist_obj; 1138 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1139 ds_next->ds_phys->ds_deadlist_obj)); 1140 ds->ds_phys->ds_deadlist_obj = 0; 1141 1142 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1143 /* 1144 * Update next's unique to include blocks which 1145 * were previously shared by only this snapshot 1146 * and it. Those blocks will be born after the 1147 * prev snap and before this snap, and will have 1148 * died after the next snap and before the one 1149 * after that (ie. be on the snap after next's 1150 * deadlist). 1151 * 1152 * XXX we're doing this long task with the 1153 * config lock held 1154 */ 1155 dsl_dataset_t *ds_after_next; 1156 1157 VERIFY(0 == dsl_dataset_open_obj(dp, 1158 ds_next->ds_phys->ds_next_snap_obj, NULL, 1159 DS_MODE_NONE, FTAG, &ds_after_next)); 1160 itor = 0; 1161 while (bplist_iterate(&ds_after_next->ds_deadlist, 1162 &itor, &bp) == 0) { 1163 if (bp.blk_birth > 1164 ds->ds_phys->ds_prev_snap_txg && 1165 bp.blk_birth <= 1166 ds->ds_phys->ds_creation_txg) { 1167 ds_next->ds_phys->ds_unique_bytes += 1168 bp_get_dasize(dp->dp_spa, &bp); 1169 } 1170 } 1171 1172 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); 1173 ASSERT3P(ds_next->ds_prev, ==, NULL); 1174 } else { 1175 /* 1176 * It would be nice to update the head dataset's 1177 * unique. To do so we would have to traverse 1178 * it for blocks born after ds_prev, which is 1179 * pretty expensive just to maintain something 1180 * for debugging purposes. 1181 */ 1182 ASSERT3P(ds_next->ds_prev, ==, ds); 1183 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, 1184 ds_next); 1185 if (ds_prev) { 1186 VERIFY(0 == dsl_dataset_open_obj(dp, 1187 ds->ds_phys->ds_prev_snap_obj, NULL, 1188 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); 1189 } else { 1190 ds_next->ds_prev = NULL; 1191 } 1192 } 1193 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); 1194 1195 /* 1196 * NB: unique_bytes is not accurate for head objsets 1197 * because we don't update it when we delete the most 1198 * recent snapshot -- see above comment. 1199 */ 1200 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1201 } else { 1202 /* 1203 * There's no next snapshot, so this is a head dataset. 1204 * Destroy the deadlist. Unless it's a clone, the 1205 * deadlist should be empty. (If it's a clone, it's 1206 * safe to ignore the deadlist contents.) 1207 */ 1208 struct killarg ka; 1209 1210 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1211 bplist_close(&ds->ds_deadlist); 1212 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1213 ds->ds_phys->ds_deadlist_obj = 0; 1214 1215 /* 1216 * Free everything that we point to (that's born after 1217 * the previous snapshot, if we are a clone) 1218 * 1219 * XXX we're doing this long task with the config lock held 1220 */ 1221 ka.usedp = &used; 1222 ka.compressedp = &compressed; 1223 ka.uncompressedp = &uncompressed; 1224 ka.zio = zio; 1225 ka.tx = tx; 1226 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1227 ADVANCE_POST, kill_blkptr, &ka); 1228 ASSERT3U(err, ==, 0); 1229 } 1230 1231 err = zio_wait(zio); 1232 ASSERT3U(err, ==, 0); 1233 1234 dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); 1235 1236 if (ds->ds_phys->ds_snapnames_zapobj) { 1237 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1238 ASSERT(err == 0); 1239 } 1240 1241 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1242 /* Erase the link in the dataset */ 1243 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1244 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1245 /* 1246 * dsl_dir_sync_destroy() called us, they'll destroy 1247 * the dataset. 1248 */ 1249 } else { 1250 /* remove from snapshot namespace */ 1251 dsl_dataset_t *ds_head; 1252 VERIFY(0 == dsl_dataset_open_obj(dp, 1253 ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, 1254 DS_MODE_NONE, FTAG, &ds_head)); 1255 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1256 #ifdef ZFS_DEBUG 1257 { 1258 uint64_t val; 1259 err = zap_lookup(mos, 1260 ds_head->ds_phys->ds_snapnames_zapobj, 1261 ds->ds_snapname, 8, 1, &val); 1262 ASSERT3U(err, ==, 0); 1263 ASSERT3U(val, ==, obj); 1264 } 1265 #endif 1266 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, 1267 ds->ds_snapname, tx); 1268 ASSERT(err == 0); 1269 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); 1270 } 1271 1272 if (ds_prev && ds->ds_prev != ds_prev) 1273 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1274 1275 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); 1276 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1277 } 1278 1279 /* ARGSUSED */ 1280 int 1281 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1282 { 1283 objset_t *os = arg1; 1284 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1285 const char *snapname = arg2; 1286 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1287 int err; 1288 uint64_t value; 1289 1290 /* 1291 * We don't allow multiple snapshots of the same txg. If there 1292 * is already one, try again. 1293 */ 1294 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1295 return (EAGAIN); 1296 1297 /* 1298 * Check for conflicting name snapshot name. 1299 */ 1300 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1301 snapname, 8, 1, &value); 1302 if (err == 0) 1303 return (EEXIST); 1304 if (err != ENOENT) 1305 return (err); 1306 1307 ds->ds_trysnap_txg = tx->tx_txg; 1308 return (0); 1309 } 1310 1311 void 1312 dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1313 { 1314 objset_t *os = arg1; 1315 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1316 const char *snapname = arg2; 1317 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1318 dmu_buf_t *dbuf; 1319 dsl_dataset_phys_t *dsphys; 1320 uint64_t dsobj; 1321 objset_t *mos = dp->dp_meta_objset; 1322 int err; 1323 1324 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1325 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1326 1327 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1328 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1329 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1330 dmu_buf_will_dirty(dbuf, tx); 1331 dsphys = dbuf->db_data; 1332 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1333 dsphys->ds_fsid_guid = unique_create(); 1334 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 1335 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1336 sizeof (dsphys->ds_guid)); 1337 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1338 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1339 dsphys->ds_next_snap_obj = ds->ds_object; 1340 dsphys->ds_num_children = 1; 1341 dsphys->ds_creation_time = gethrestime_sec(); 1342 dsphys->ds_creation_txg = tx->tx_txg; 1343 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1344 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1345 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1346 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1347 dsphys->ds_flags = ds->ds_phys->ds_flags; 1348 dsphys->ds_bp = ds->ds_phys->ds_bp; 1349 dmu_buf_rele(dbuf, FTAG); 1350 1351 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1352 if (ds->ds_prev) { 1353 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1354 ds->ds_object || 1355 ds->ds_prev->ds_phys->ds_num_children > 1); 1356 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1357 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1358 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1359 ds->ds_prev->ds_phys->ds_creation_txg); 1360 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1361 } 1362 } 1363 1364 bplist_close(&ds->ds_deadlist); 1365 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1366 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); 1367 ds->ds_phys->ds_prev_snap_obj = dsobj; 1368 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; 1369 ds->ds_phys->ds_unique_bytes = 0; 1370 ds->ds_phys->ds_deadlist_obj = 1371 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1372 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1373 ds->ds_phys->ds_deadlist_obj)); 1374 1375 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1376 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1377 snapname, 8, 1, &dsobj, tx); 1378 ASSERT(err == 0); 1379 1380 if (ds->ds_prev) 1381 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 1382 VERIFY(0 == dsl_dataset_open_obj(dp, 1383 ds->ds_phys->ds_prev_snap_obj, snapname, 1384 DS_MODE_NONE, ds, &ds->ds_prev)); 1385 } 1386 1387 void 1388 dsl_dataset_sync(dsl_dataset_t *ds, dmu_tx_t *tx) 1389 { 1390 ASSERT(dmu_tx_is_syncing(tx)); 1391 ASSERT(ds->ds_user_ptr != NULL); 1392 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1393 1394 dmu_objset_sync(ds->ds_user_ptr, tx); 1395 dsl_dir_dirty(ds->ds_dir, tx); 1396 bplist_close(&ds->ds_deadlist); 1397 1398 dmu_buf_rele(ds->ds_dbuf, ds); 1399 } 1400 1401 void 1402 dsl_dataset_stats(dsl_dataset_t *ds, dmu_objset_stats_t *dds) 1403 { 1404 /* fill in properties crap */ 1405 dsl_dir_stats(ds->ds_dir, dds); 1406 1407 if (ds->ds_phys->ds_num_children != 0) { 1408 dds->dds_is_snapshot = TRUE; 1409 dds->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1410 } 1411 1412 dds->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1413 dds->dds_last_txg = ds->ds_phys->ds_bp.blk_birth; 1414 1415 dds->dds_objects_used = ds->ds_phys->ds_bp.blk_fill; 1416 dds->dds_objects_avail = DN_MAX_OBJECT - dds->dds_objects_used; 1417 1418 /* We override the dataset's creation time... they should be the same */ 1419 dds->dds_creation_time = ds->ds_phys->ds_creation_time; 1420 dds->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1421 dds->dds_space_refd = ds->ds_phys->ds_used_bytes; 1422 dds->dds_fsid_guid = ds->ds_phys->ds_fsid_guid; 1423 1424 if (ds->ds_phys->ds_next_snap_obj) { 1425 /* 1426 * This is a snapshot; override the dd's space used with 1427 * our unique space 1428 */ 1429 dds->dds_space_used = ds->ds_phys->ds_unique_bytes; 1430 dds->dds_compressed_bytes = 1431 ds->ds_phys->ds_compressed_bytes; 1432 dds->dds_uncompressed_bytes = 1433 ds->ds_phys->ds_uncompressed_bytes; 1434 } 1435 } 1436 1437 dsl_pool_t * 1438 dsl_dataset_pool(dsl_dataset_t *ds) 1439 { 1440 return (ds->ds_dir->dd_pool); 1441 } 1442 1443 /* ARGSUSED */ 1444 static int 1445 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 1446 { 1447 dsl_dataset_t *ds = arg1; 1448 char *newsnapname = arg2; 1449 dsl_dir_t *dd = ds->ds_dir; 1450 objset_t *mos = dd->dd_pool->dp_meta_objset; 1451 dsl_dataset_t *hds; 1452 uint64_t val; 1453 int err; 1454 1455 err = dsl_dataset_open_obj(dd->dd_pool, 1456 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); 1457 if (err) 1458 return (err); 1459 1460 /* new name better not be in use */ 1461 err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj, 1462 newsnapname, 8, 1, &val); 1463 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1464 1465 if (err == 0) 1466 err = EEXIST; 1467 else if (err == ENOENT) 1468 err = 0; 1469 return (err); 1470 } 1471 1472 static void 1473 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1474 { 1475 dsl_dataset_t *ds = arg1; 1476 char *newsnapname = arg2; 1477 dsl_dir_t *dd = ds->ds_dir; 1478 objset_t *mos = dd->dd_pool->dp_meta_objset; 1479 dsl_dataset_t *hds; 1480 int err; 1481 1482 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 1483 1484 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1485 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); 1486 1487 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1488 err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj, 1489 ds->ds_snapname, tx); 1490 ASSERT3U(err, ==, 0); 1491 mutex_enter(&ds->ds_lock); 1492 (void) strcpy(ds->ds_snapname, newsnapname); 1493 mutex_exit(&ds->ds_lock); 1494 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 1495 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 1496 ASSERT3U(err, ==, 0); 1497 1498 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1499 } 1500 1501 #pragma weak dmu_objset_rename = dsl_dataset_rename 1502 int 1503 dsl_dataset_rename(const char *oldname, const char *newname) 1504 { 1505 dsl_dir_t *dd; 1506 dsl_dataset_t *ds; 1507 const char *tail; 1508 int err; 1509 1510 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 1511 if (err) 1512 return (err); 1513 if (tail == NULL) { 1514 err = dsl_dir_rename(dd, newname); 1515 dsl_dir_close(dd, FTAG); 1516 return (err); 1517 } 1518 if (tail[0] != '@') { 1519 /* the name ended in a nonexistant component */ 1520 dsl_dir_close(dd, FTAG); 1521 return (ENOENT); 1522 } 1523 1524 dsl_dir_close(dd, FTAG); 1525 1526 /* new name must be snapshot in same filesystem */ 1527 tail = strchr(newname, '@'); 1528 if (tail == NULL) 1529 return (EINVAL); 1530 tail++; 1531 if (strncmp(oldname, newname, tail - newname) != 0) 1532 return (EXDEV); 1533 1534 err = dsl_dataset_open(oldname, 1535 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); 1536 if (err) 1537 return (err); 1538 1539 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1540 dsl_dataset_snapshot_rename_check, 1541 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 1542 1543 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 1544 1545 return (err); 1546 } 1547 1548 struct promotearg { 1549 uint64_t used, comp, uncomp, unique; 1550 uint64_t newnext_obj, snapnames_obj; 1551 }; 1552 1553 static int 1554 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 1555 { 1556 dsl_dataset_t *hds = arg1; 1557 struct promotearg *pa = arg2; 1558 dsl_dir_t *dd = hds->ds_dir; 1559 dsl_pool_t *dp = hds->ds_dir->dd_pool; 1560 dsl_dir_t *pdd = NULL; 1561 dsl_dataset_t *ds = NULL; 1562 dsl_dataset_t *pivot_ds = NULL; 1563 dsl_dataset_t *newnext_ds = NULL; 1564 int err; 1565 char *name = NULL; 1566 uint64_t itor = 0; 1567 blkptr_t bp; 1568 1569 bzero(pa, sizeof (*pa)); 1570 1571 /* Check that it is a clone */ 1572 if (dd->dd_phys->dd_clone_parent_obj == 0) 1573 return (EINVAL); 1574 1575 /* Since this is so expensive, don't do the preliminary check */ 1576 if (!dmu_tx_is_syncing(tx)) 1577 return (0); 1578 1579 if (err = dsl_dataset_open_obj(dp, 1580 dd->dd_phys->dd_clone_parent_obj, 1581 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)) 1582 goto out; 1583 pdd = pivot_ds->ds_dir; 1584 1585 { 1586 dsl_dataset_t *phds; 1587 if (err = dsl_dataset_open_obj(dd->dd_pool, 1588 pdd->dd_phys->dd_head_dataset_obj, 1589 NULL, DS_MODE_NONE, FTAG, &phds)) 1590 goto out; 1591 pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; 1592 dsl_dataset_close(phds, DS_MODE_NONE, FTAG); 1593 } 1594 1595 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 1596 err = EXDEV; 1597 goto out; 1598 } 1599 1600 /* find pivot point's new next ds */ 1601 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, 1602 NULL, DS_MODE_NONE, FTAG, &newnext_ds)); 1603 while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) { 1604 dsl_dataset_t *prev; 1605 1606 if (err = dsl_dataset_open_obj(dd->dd_pool, 1607 newnext_ds->ds_phys->ds_prev_snap_obj, 1608 NULL, DS_MODE_NONE, FTAG, &prev)) 1609 goto out; 1610 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1611 newnext_ds = prev; 1612 } 1613 pa->newnext_obj = newnext_ds->ds_object; 1614 1615 /* compute pivot point's new unique space */ 1616 while ((err = bplist_iterate(&newnext_ds->ds_deadlist, 1617 &itor, &bp)) == 0) { 1618 if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg) 1619 pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); 1620 } 1621 if (err != ENOENT) 1622 goto out; 1623 1624 /* Walk the snapshots that we are moving */ 1625 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1626 ds = pivot_ds; 1627 /* CONSTCOND */ 1628 while (TRUE) { 1629 uint64_t val, dlused, dlcomp, dluncomp; 1630 dsl_dataset_t *prev; 1631 1632 /* Check that the snapshot name does not conflict */ 1633 dsl_dataset_name(ds, name); 1634 err = zap_lookup(dd->dd_pool->dp_meta_objset, 1635 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1636 8, 1, &val); 1637 if (err != ENOENT) { 1638 if (err == 0) 1639 err = EEXIST; 1640 goto out; 1641 } 1642 1643 /* 1644 * compute space to transfer. Each snapshot gave birth to: 1645 * (my used) - (prev's used) + (deadlist's used) 1646 */ 1647 pa->used += ds->ds_phys->ds_used_bytes; 1648 pa->comp += ds->ds_phys->ds_compressed_bytes; 1649 pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; 1650 1651 /* If we reach the first snapshot, we're done. */ 1652 if (ds->ds_phys->ds_prev_snap_obj == 0) 1653 break; 1654 1655 if (err = bplist_space(&ds->ds_deadlist, 1656 &dlused, &dlcomp, &dluncomp)) 1657 goto out; 1658 if (err = dsl_dataset_open_obj(dd->dd_pool, 1659 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1660 FTAG, &prev)) 1661 goto out; 1662 pa->used += dlused - prev->ds_phys->ds_used_bytes; 1663 pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; 1664 pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; 1665 1666 /* 1667 * We could be a clone of a clone. If we reach our 1668 * parent's branch point, we're done. 1669 */ 1670 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1671 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1672 break; 1673 } 1674 if (ds != pivot_ds) 1675 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1676 ds = prev; 1677 } 1678 1679 /* Check that there is enough space here */ 1680 err = dsl_dir_transfer_possible(pdd, dd, pa->used); 1681 1682 out: 1683 if (ds && ds != pivot_ds) 1684 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1685 if (pivot_ds) 1686 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 1687 if (newnext_ds) 1688 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1689 if (name) 1690 kmem_free(name, MAXPATHLEN); 1691 return (err); 1692 } 1693 1694 static void 1695 dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1696 { 1697 dsl_dataset_t *hds = arg1; 1698 struct promotearg *pa = arg2; 1699 dsl_dir_t *dd = hds->ds_dir; 1700 dsl_pool_t *dp = hds->ds_dir->dd_pool; 1701 dsl_dir_t *pdd = NULL; 1702 dsl_dataset_t *ds, *pivot_ds; 1703 char *name; 1704 1705 ASSERT(dd->dd_phys->dd_clone_parent_obj != 0); 1706 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 1707 1708 VERIFY(0 == dsl_dataset_open_obj(dp, 1709 dd->dd_phys->dd_clone_parent_obj, 1710 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)); 1711 /* 1712 * We need to explicitly open pdd, since pivot_ds's pdd will be 1713 * changing. 1714 */ 1715 VERIFY(0 == dsl_dir_open_obj(dp, pivot_ds->ds_dir->dd_object, 1716 NULL, FTAG, &pdd)); 1717 1718 /* move snapshots to this dir */ 1719 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1720 ds = pivot_ds; 1721 /* CONSTCOND */ 1722 while (TRUE) { 1723 dsl_dataset_t *prev; 1724 1725 /* move snap name entry */ 1726 dsl_dataset_name(ds, name); 1727 VERIFY(0 == zap_remove(dp->dp_meta_objset, 1728 pa->snapnames_obj, ds->ds_snapname, tx)); 1729 VERIFY(0 == zap_add(dp->dp_meta_objset, 1730 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1731 8, 1, &ds->ds_object, tx)); 1732 1733 /* change containing dsl_dir */ 1734 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1735 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object); 1736 ds->ds_phys->ds_dir_obj = dd->dd_object; 1737 ASSERT3P(ds->ds_dir, ==, pdd); 1738 dsl_dir_close(ds->ds_dir, ds); 1739 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 1740 NULL, ds, &ds->ds_dir)); 1741 1742 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 1743 1744 if (ds->ds_phys->ds_prev_snap_obj == 0) 1745 break; 1746 1747 VERIFY(0 == dsl_dataset_open_obj(dp, 1748 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1749 FTAG, &prev)); 1750 1751 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1752 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1753 break; 1754 } 1755 if (ds != pivot_ds) 1756 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1757 ds = prev; 1758 } 1759 if (ds != pivot_ds) 1760 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1761 1762 /* change pivot point's next snap */ 1763 dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx); 1764 pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; 1765 1766 /* change clone_parent-age */ 1767 dmu_buf_will_dirty(dd->dd_dbuf, tx); 1768 ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object); 1769 dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj; 1770 dmu_buf_will_dirty(pdd->dd_dbuf, tx); 1771 pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object; 1772 1773 /* change space accounting */ 1774 dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx); 1775 dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); 1776 pivot_ds->ds_phys->ds_unique_bytes = pa->unique; 1777 1778 dsl_dir_close(pdd, FTAG); 1779 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 1780 kmem_free(name, MAXPATHLEN); 1781 } 1782 1783 int 1784 dsl_dataset_promote(const char *name) 1785 { 1786 dsl_dataset_t *ds; 1787 int err; 1788 dmu_object_info_t doi; 1789 struct promotearg pa; 1790 1791 err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); 1792 if (err) 1793 return (err); 1794 1795 err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, 1796 ds->ds_phys->ds_snapnames_zapobj, &doi); 1797 if (err) { 1798 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1799 return (err); 1800 } 1801 1802 /* 1803 * Add in 128x the snapnames zapobj size, since we will be moving 1804 * a bunch of snapnames to the promoted ds, and dirtying their 1805 * bonus buffers. 1806 */ 1807 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1808 dsl_dataset_promote_check, 1809 dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); 1810 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1811 return (err); 1812 } 1813