1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_prop.h> 32 #include <sys/dsl_synctask.h> 33 #include <sys/dmu_traverse.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/arc.h> 36 #include <sys/zio.h> 37 #include <sys/zap.h> 38 #include <sys/unique.h> 39 #include <sys/zfs_context.h> 40 41 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 42 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 43 static dsl_checkfunc_t dsl_dataset_rollback_check; 44 static dsl_syncfunc_t dsl_dataset_rollback_sync; 45 static dsl_checkfunc_t dsl_dataset_destroy_check; 46 static dsl_syncfunc_t dsl_dataset_destroy_sync; 47 48 #define DS_REF_MAX (1ULL << 62) 49 50 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 51 52 /* 53 * We use weighted reference counts to express the various forms of exclusion 54 * between different open modes. A STANDARD open is 1 point, an EXCLUSIVE open 55 * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE. 56 * This makes the exclusion logic simple: the total refcnt for all opens cannot 57 * exceed DS_REF_MAX. For example, EXCLUSIVE opens are exclusive because their 58 * weight (DS_REF_MAX) consumes the entire refcnt space. PRIMARY opens consume 59 * just over half of the refcnt space, so there can't be more than one, but it 60 * can peacefully coexist with any number of STANDARD opens. 61 */ 62 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = { 63 0, /* DS_MODE_NONE - invalid */ 64 1, /* DS_MODE_STANDARD - unlimited number */ 65 (DS_REF_MAX >> 1) + 1, /* DS_MODE_PRIMARY - only one of these */ 66 DS_REF_MAX /* DS_MODE_EXCLUSIVE - no other opens */ 67 }; 68 69 70 void 71 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 72 { 73 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 74 int compressed = BP_GET_PSIZE(bp); 75 int uncompressed = BP_GET_UCSIZE(bp); 76 77 dprintf_bp(bp, "born, ds=%p\n", ds); 78 79 ASSERT(dmu_tx_is_syncing(tx)); 80 /* It could have been compressed away to nothing */ 81 if (BP_IS_HOLE(bp)) 82 return; 83 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 84 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 85 if (ds == NULL) { 86 /* 87 * Account for the meta-objset space in its placeholder 88 * dsl_dir. 89 */ 90 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 91 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 92 used, compressed, uncompressed, tx); 93 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 94 return; 95 } 96 dmu_buf_will_dirty(ds->ds_dbuf, tx); 97 mutex_enter(&ds->ds_lock); 98 ds->ds_phys->ds_used_bytes += used; 99 ds->ds_phys->ds_compressed_bytes += compressed; 100 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 101 ds->ds_phys->ds_unique_bytes += used; 102 mutex_exit(&ds->ds_lock); 103 dsl_dir_diduse_space(ds->ds_dir, 104 used, compressed, uncompressed, tx); 105 } 106 107 void 108 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, 109 dmu_tx_t *tx) 110 { 111 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 112 int compressed = BP_GET_PSIZE(bp); 113 int uncompressed = BP_GET_UCSIZE(bp); 114 115 ASSERT(dmu_tx_is_syncing(tx)); 116 /* No block pointer => nothing to free */ 117 if (BP_IS_HOLE(bp)) 118 return; 119 120 ASSERT(used > 0); 121 if (ds == NULL) { 122 int err; 123 /* 124 * Account for the meta-objset space in its placeholder 125 * dataset. 126 */ 127 err = arc_free(pio, tx->tx_pool->dp_spa, 128 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 129 ASSERT(err == 0); 130 131 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, 132 -used, -compressed, -uncompressed, tx); 133 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 134 return; 135 } 136 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 137 138 dmu_buf_will_dirty(ds->ds_dbuf, tx); 139 140 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 141 int err; 142 143 dprintf_bp(bp, "freeing: %s", ""); 144 err = arc_free(pio, tx->tx_pool->dp_spa, 145 tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT); 146 ASSERT(err == 0); 147 148 mutex_enter(&ds->ds_lock); 149 /* XXX unique_bytes is not accurate for head datasets */ 150 /* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */ 151 ds->ds_phys->ds_unique_bytes -= used; 152 mutex_exit(&ds->ds_lock); 153 dsl_dir_diduse_space(ds->ds_dir, 154 -used, -compressed, -uncompressed, tx); 155 } else { 156 dprintf_bp(bp, "putting on dead list: %s", ""); 157 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 158 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 159 if (ds->ds_phys->ds_prev_snap_obj != 0) { 160 ASSERT3U(ds->ds_prev->ds_object, ==, 161 ds->ds_phys->ds_prev_snap_obj); 162 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 163 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 164 ds->ds_object && bp->blk_birth > 165 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 166 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 167 mutex_enter(&ds->ds_prev->ds_lock); 168 ds->ds_prev->ds_phys->ds_unique_bytes += 169 used; 170 mutex_exit(&ds->ds_prev->ds_lock); 171 } 172 } 173 } 174 mutex_enter(&ds->ds_lock); 175 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 176 ds->ds_phys->ds_used_bytes -= used; 177 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 178 ds->ds_phys->ds_compressed_bytes -= compressed; 179 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 180 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 181 mutex_exit(&ds->ds_lock); 182 } 183 184 uint64_t 185 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 186 { 187 uint64_t trysnap = 0; 188 189 if (ds == NULL) 190 return (0); 191 /* 192 * The snapshot creation could fail, but that would cause an 193 * incorrect FALSE return, which would only result in an 194 * overestimation of the amount of space that an operation would 195 * consume, which is OK. 196 * 197 * There's also a small window where we could miss a pending 198 * snapshot, because we could set the sync task in the quiescing 199 * phase. So this should only be used as a guess. 200 */ 201 if (ds->ds_trysnap_txg > 202 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 203 trysnap = ds->ds_trysnap_txg; 204 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 205 } 206 207 int 208 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 209 { 210 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 211 } 212 213 /* ARGSUSED */ 214 static void 215 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 216 { 217 dsl_dataset_t *ds = dsv; 218 dsl_pool_t *dp = ds->ds_dir->dd_pool; 219 220 /* open_refcount == DS_REF_MAX when deleting */ 221 ASSERT(ds->ds_open_refcount == 0 || 222 ds->ds_open_refcount == DS_REF_MAX); 223 224 dprintf_ds(ds, "evicting %s\n", ""); 225 226 unique_remove(ds->ds_phys->ds_fsid_guid); 227 228 if (ds->ds_user_ptr != NULL) 229 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 230 231 if (ds->ds_prev) { 232 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 233 ds->ds_prev = NULL; 234 } 235 236 bplist_close(&ds->ds_deadlist); 237 dsl_dir_close(ds->ds_dir, ds); 238 239 if (list_link_active(&ds->ds_synced_link)) 240 list_remove(&dp->dp_synced_objsets, ds); 241 242 mutex_destroy(&ds->ds_lock); 243 mutex_destroy(&ds->ds_deadlist.bpl_lock); 244 245 kmem_free(ds, sizeof (dsl_dataset_t)); 246 } 247 248 static int 249 dsl_dataset_get_snapname(dsl_dataset_t *ds) 250 { 251 dsl_dataset_phys_t *headphys; 252 int err; 253 dmu_buf_t *headdbuf; 254 dsl_pool_t *dp = ds->ds_dir->dd_pool; 255 objset_t *mos = dp->dp_meta_objset; 256 257 if (ds->ds_snapname[0]) 258 return (0); 259 if (ds->ds_phys->ds_next_snap_obj == 0) 260 return (0); 261 262 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 263 FTAG, &headdbuf); 264 if (err) 265 return (err); 266 headphys = headdbuf->db_data; 267 err = zap_value_search(dp->dp_meta_objset, 268 headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname); 269 dmu_buf_rele(headdbuf, FTAG); 270 return (err); 271 } 272 273 int 274 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname, 275 int mode, void *tag, dsl_dataset_t **dsp) 276 { 277 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 278 objset_t *mos = dp->dp_meta_objset; 279 dmu_buf_t *dbuf; 280 dsl_dataset_t *ds; 281 int err; 282 283 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 284 dsl_pool_sync_context(dp)); 285 286 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 287 if (err) 288 return (err); 289 ds = dmu_buf_get_user(dbuf); 290 if (ds == NULL) { 291 dsl_dataset_t *winner; 292 293 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 294 ds->ds_dbuf = dbuf; 295 ds->ds_object = dsobj; 296 ds->ds_phys = dbuf->db_data; 297 298 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 299 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 300 NULL); 301 302 err = bplist_open(&ds->ds_deadlist, 303 mos, ds->ds_phys->ds_deadlist_obj); 304 if (err == 0) { 305 err = dsl_dir_open_obj(dp, 306 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 307 } 308 if (err) { 309 /* 310 * we don't really need to close the blist if we 311 * just opened it. 312 */ 313 mutex_destroy(&ds->ds_lock); 314 mutex_destroy(&ds->ds_deadlist.bpl_lock); 315 kmem_free(ds, sizeof (dsl_dataset_t)); 316 dmu_buf_rele(dbuf, tag); 317 return (err); 318 } 319 320 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) { 321 ds->ds_snapname[0] = '\0'; 322 if (ds->ds_phys->ds_prev_snap_obj) { 323 err = dsl_dataset_open_obj(dp, 324 ds->ds_phys->ds_prev_snap_obj, NULL, 325 DS_MODE_NONE, ds, &ds->ds_prev); 326 } 327 } else { 328 if (snapname) { 329 #ifdef ZFS_DEBUG 330 dsl_dataset_phys_t *headphys; 331 dmu_buf_t *headdbuf; 332 err = dmu_bonus_hold(mos, 333 ds->ds_dir->dd_phys->dd_head_dataset_obj, 334 FTAG, &headdbuf); 335 if (err == 0) { 336 headphys = headdbuf->db_data; 337 uint64_t foundobj; 338 err = zap_lookup(dp->dp_meta_objset, 339 headphys->ds_snapnames_zapobj, 340 snapname, sizeof (foundobj), 1, 341 &foundobj); 342 ASSERT3U(foundobj, ==, dsobj); 343 dmu_buf_rele(headdbuf, FTAG); 344 } 345 #endif 346 (void) strcat(ds->ds_snapname, snapname); 347 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 348 err = dsl_dataset_get_snapname(ds); 349 } 350 } 351 352 if (err == 0) { 353 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 354 dsl_dataset_evict); 355 } 356 if (err || winner) { 357 bplist_close(&ds->ds_deadlist); 358 if (ds->ds_prev) { 359 dsl_dataset_close(ds->ds_prev, 360 DS_MODE_NONE, ds); 361 } 362 dsl_dir_close(ds->ds_dir, ds); 363 mutex_destroy(&ds->ds_lock); 364 mutex_destroy(&ds->ds_deadlist.bpl_lock); 365 kmem_free(ds, sizeof (dsl_dataset_t)); 366 if (err) { 367 dmu_buf_rele(dbuf, tag); 368 return (err); 369 } 370 ds = winner; 371 } else { 372 uint64_t new = 373 unique_insert(ds->ds_phys->ds_fsid_guid); 374 if (new != ds->ds_phys->ds_fsid_guid) { 375 /* XXX it won't necessarily be synced... */ 376 ds->ds_phys->ds_fsid_guid = new; 377 } 378 } 379 } 380 ASSERT3P(ds->ds_dbuf, ==, dbuf); 381 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 382 383 mutex_enter(&ds->ds_lock); 384 if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY && 385 (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) && 386 !DS_MODE_IS_INCONSISTENT(mode)) || 387 (ds->ds_open_refcount + weight > DS_REF_MAX)) { 388 mutex_exit(&ds->ds_lock); 389 dsl_dataset_close(ds, DS_MODE_NONE, tag); 390 return (EBUSY); 391 } 392 ds->ds_open_refcount += weight; 393 mutex_exit(&ds->ds_lock); 394 395 *dsp = ds; 396 return (0); 397 } 398 399 int 400 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode, 401 void *tag, dsl_dataset_t **dsp) 402 { 403 dsl_dir_t *dd; 404 dsl_pool_t *dp; 405 const char *tail; 406 uint64_t obj; 407 dsl_dataset_t *ds = NULL; 408 int err = 0; 409 410 err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail); 411 if (err) 412 return (err); 413 414 dp = dd->dd_pool; 415 obj = dd->dd_phys->dd_head_dataset_obj; 416 rw_enter(&dp->dp_config_rwlock, RW_READER); 417 if (obj == 0) { 418 /* A dataset with no associated objset */ 419 err = ENOENT; 420 goto out; 421 } 422 423 if (tail != NULL) { 424 objset_t *mos = dp->dp_meta_objset; 425 426 err = dsl_dataset_open_obj(dp, obj, NULL, 427 DS_MODE_NONE, tag, &ds); 428 if (err) 429 goto out; 430 obj = ds->ds_phys->ds_snapnames_zapobj; 431 dsl_dataset_close(ds, DS_MODE_NONE, tag); 432 ds = NULL; 433 434 if (tail[0] != '@') { 435 err = ENOENT; 436 goto out; 437 } 438 tail++; 439 440 /* Look for a snapshot */ 441 if (!DS_MODE_IS_READONLY(mode)) { 442 err = EROFS; 443 goto out; 444 } 445 dprintf("looking for snapshot '%s'\n", tail); 446 err = zap_lookup(mos, obj, tail, 8, 1, &obj); 447 if (err) 448 goto out; 449 } 450 err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds); 451 452 out: 453 rw_exit(&dp->dp_config_rwlock); 454 dsl_dir_close(dd, FTAG); 455 456 ASSERT3U((err == 0), ==, (ds != NULL)); 457 /* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */ 458 459 *dsp = ds; 460 return (err); 461 } 462 463 int 464 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp) 465 { 466 return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp)); 467 } 468 469 void 470 dsl_dataset_name(dsl_dataset_t *ds, char *name) 471 { 472 if (ds == NULL) { 473 (void) strcpy(name, "mos"); 474 } else { 475 dsl_dir_name(ds->ds_dir, name); 476 VERIFY(0 == dsl_dataset_get_snapname(ds)); 477 if (ds->ds_snapname[0]) { 478 (void) strcat(name, "@"); 479 if (!MUTEX_HELD(&ds->ds_lock)) { 480 /* 481 * We use a "recursive" mutex so that we 482 * can call dprintf_ds() with ds_lock held. 483 */ 484 mutex_enter(&ds->ds_lock); 485 (void) strcat(name, ds->ds_snapname); 486 mutex_exit(&ds->ds_lock); 487 } else { 488 (void) strcat(name, ds->ds_snapname); 489 } 490 } 491 } 492 } 493 494 void 495 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag) 496 { 497 uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)]; 498 mutex_enter(&ds->ds_lock); 499 ASSERT3U(ds->ds_open_refcount, >=, weight); 500 ds->ds_open_refcount -= weight; 501 dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n", 502 mode, ds->ds_open_refcount); 503 mutex_exit(&ds->ds_lock); 504 505 dmu_buf_rele(ds->ds_dbuf, tag); 506 } 507 508 void 509 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx) 510 { 511 objset_t *mos = dp->dp_meta_objset; 512 dmu_buf_t *dbuf; 513 dsl_dataset_phys_t *dsphys; 514 dsl_dataset_t *ds; 515 uint64_t dsobj; 516 dsl_dir_t *dd; 517 518 dsl_dir_create_root(mos, ddobjp, tx); 519 VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd)); 520 521 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 522 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 523 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 524 dmu_buf_will_dirty(dbuf, tx); 525 dsphys = dbuf->db_data; 526 dsphys->ds_dir_obj = dd->dd_object; 527 dsphys->ds_fsid_guid = unique_create(); 528 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 529 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 530 sizeof (dsphys->ds_guid)); 531 dsphys->ds_snapnames_zapobj = 532 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 533 dsphys->ds_creation_time = gethrestime_sec(); 534 dsphys->ds_creation_txg = tx->tx_txg; 535 dsphys->ds_deadlist_obj = 536 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 537 dmu_buf_rele(dbuf, FTAG); 538 539 dmu_buf_will_dirty(dd->dd_dbuf, tx); 540 dd->dd_phys->dd_head_dataset_obj = dsobj; 541 dsl_dir_close(dd, FTAG); 542 543 VERIFY(0 == 544 dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds)); 545 (void) dmu_objset_create_impl(dp->dp_spa, ds, 546 &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx); 547 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 548 } 549 550 uint64_t 551 dsl_dataset_create_sync(dsl_dir_t *pdd, 552 const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx) 553 { 554 dsl_pool_t *dp = pdd->dd_pool; 555 dmu_buf_t *dbuf; 556 dsl_dataset_phys_t *dsphys; 557 uint64_t dsobj, ddobj; 558 objset_t *mos = dp->dp_meta_objset; 559 dsl_dir_t *dd; 560 561 ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp); 562 ASSERT(clone_parent == NULL || 563 clone_parent->ds_phys->ds_num_children > 0); 564 ASSERT(lastname[0] != '@'); 565 ASSERT(dmu_tx_is_syncing(tx)); 566 567 ddobj = dsl_dir_create_sync(pdd, lastname, tx); 568 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 569 570 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 571 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 572 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 573 dmu_buf_will_dirty(dbuf, tx); 574 dsphys = dbuf->db_data; 575 dsphys->ds_dir_obj = dd->dd_object; 576 dsphys->ds_fsid_guid = unique_create(); 577 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 578 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 579 sizeof (dsphys->ds_guid)); 580 dsphys->ds_snapnames_zapobj = 581 zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx); 582 dsphys->ds_creation_time = gethrestime_sec(); 583 dsphys->ds_creation_txg = tx->tx_txg; 584 dsphys->ds_deadlist_obj = 585 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 586 if (clone_parent) { 587 dsphys->ds_prev_snap_obj = clone_parent->ds_object; 588 dsphys->ds_prev_snap_txg = 589 clone_parent->ds_phys->ds_creation_txg; 590 dsphys->ds_used_bytes = 591 clone_parent->ds_phys->ds_used_bytes; 592 dsphys->ds_compressed_bytes = 593 clone_parent->ds_phys->ds_compressed_bytes; 594 dsphys->ds_uncompressed_bytes = 595 clone_parent->ds_phys->ds_uncompressed_bytes; 596 dsphys->ds_bp = clone_parent->ds_phys->ds_bp; 597 598 dmu_buf_will_dirty(clone_parent->ds_dbuf, tx); 599 clone_parent->ds_phys->ds_num_children++; 600 601 dmu_buf_will_dirty(dd->dd_dbuf, tx); 602 dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object; 603 } 604 dmu_buf_rele(dbuf, FTAG); 605 606 dmu_buf_will_dirty(dd->dd_dbuf, tx); 607 dd->dd_phys->dd_head_dataset_obj = dsobj; 608 dsl_dir_close(dd, FTAG); 609 610 return (dsobj); 611 } 612 613 struct destroyarg { 614 dsl_sync_task_group_t *dstg; 615 char *snapname; 616 void *tag; 617 char *failed; 618 }; 619 620 static int 621 dsl_snapshot_destroy_one(char *name, void *arg) 622 { 623 struct destroyarg *da = arg; 624 dsl_dataset_t *ds; 625 char *cp; 626 int err; 627 628 (void) strcat(name, "@"); 629 (void) strcat(name, da->snapname); 630 err = dsl_dataset_open(name, 631 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 632 da->tag, &ds); 633 cp = strchr(name, '@'); 634 *cp = '\0'; 635 if (err == ENOENT) 636 return (0); 637 if (err) { 638 (void) strcpy(da->failed, name); 639 return (err); 640 } 641 642 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 643 dsl_dataset_destroy_sync, ds, da->tag, 0); 644 return (0); 645 } 646 647 /* 648 * Destroy 'snapname' in all descendants of 'fsname'. 649 */ 650 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 651 int 652 dsl_snapshots_destroy(char *fsname, char *snapname) 653 { 654 int err; 655 struct destroyarg da; 656 dsl_sync_task_t *dst; 657 spa_t *spa; 658 char *cp; 659 660 cp = strchr(fsname, '/'); 661 if (cp) { 662 *cp = '\0'; 663 err = spa_open(fsname, &spa, FTAG); 664 *cp = '/'; 665 } else { 666 err = spa_open(fsname, &spa, FTAG); 667 } 668 if (err) 669 return (err); 670 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 671 da.snapname = snapname; 672 da.tag = FTAG; 673 da.failed = fsname; 674 675 err = dmu_objset_find(fsname, 676 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 677 678 if (err == 0) 679 err = dsl_sync_task_group_wait(da.dstg); 680 681 for (dst = list_head(&da.dstg->dstg_tasks); dst; 682 dst = list_next(&da.dstg->dstg_tasks, dst)) { 683 dsl_dataset_t *ds = dst->dst_arg1; 684 if (dst->dst_err) { 685 dsl_dataset_name(ds, fsname); 686 cp = strchr(fsname, '@'); 687 *cp = '\0'; 688 } 689 /* 690 * If it was successful, destroy_sync would have 691 * closed the ds 692 */ 693 if (err) 694 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 695 } 696 697 dsl_sync_task_group_destroy(da.dstg); 698 spa_close(spa, FTAG); 699 return (err); 700 } 701 702 int 703 dsl_dataset_destroy(const char *name) 704 { 705 int err; 706 dsl_sync_task_group_t *dstg; 707 objset_t *os; 708 dsl_dataset_t *ds; 709 dsl_dir_t *dd; 710 uint64_t obj; 711 712 if (strchr(name, '@')) { 713 /* Destroying a snapshot is simpler */ 714 err = dsl_dataset_open(name, 715 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 716 FTAG, &ds); 717 if (err) 718 return (err); 719 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 720 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 721 ds, FTAG, 0); 722 if (err) 723 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 724 return (err); 725 } 726 727 err = dmu_objset_open(name, DMU_OST_ANY, 728 DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os); 729 if (err) 730 return (err); 731 ds = os->os->os_dsl_dataset; 732 dd = ds->ds_dir; 733 734 /* 735 * Check for errors and mark this ds as inconsistent, in 736 * case we crash while freeing the objects. 737 */ 738 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 739 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 740 if (err) { 741 dmu_objset_close(os); 742 return (err); 743 } 744 745 /* 746 * remove the objects in open context, so that we won't 747 * have too much to do in syncing context. 748 */ 749 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 750 ds->ds_phys->ds_prev_snap_txg)) { 751 dmu_tx_t *tx = dmu_tx_create(os); 752 dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END); 753 dmu_tx_hold_bonus(tx, obj); 754 err = dmu_tx_assign(tx, TXG_WAIT); 755 if (err) { 756 /* 757 * Perhaps there is not enough disk 758 * space. Just deal with it from 759 * dsl_dataset_destroy_sync(). 760 */ 761 dmu_tx_abort(tx); 762 continue; 763 } 764 VERIFY(0 == dmu_object_free(os, obj, tx)); 765 dmu_tx_commit(tx); 766 } 767 /* Make sure it's not dirty before we finish destroying it. */ 768 txg_wait_synced(dd->dd_pool, 0); 769 770 dmu_objset_close(os); 771 if (err != ESRCH) 772 return (err); 773 774 err = dsl_dataset_open(name, 775 DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT, 776 FTAG, &ds); 777 if (err) 778 return (err); 779 780 err = dsl_dir_open(name, FTAG, &dd, NULL); 781 if (err) { 782 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 783 return (err); 784 } 785 786 /* 787 * Blow away the dsl_dir + head dataset. 788 */ 789 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 790 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 791 dsl_dataset_destroy_sync, ds, FTAG, 0); 792 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 793 dsl_dir_destroy_sync, dd, FTAG, 0); 794 err = dsl_sync_task_group_wait(dstg); 795 dsl_sync_task_group_destroy(dstg); 796 /* if it is successful, *destroy_sync will close the ds+dd */ 797 if (err) { 798 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 799 dsl_dir_close(dd, FTAG); 800 } 801 return (err); 802 } 803 804 int 805 dsl_dataset_rollback(dsl_dataset_t *ds) 806 { 807 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 808 return (dsl_sync_task_do(ds->ds_dir->dd_pool, 809 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, 810 ds, NULL, 0)); 811 } 812 813 void * 814 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 815 void *p, dsl_dataset_evict_func_t func) 816 { 817 void *old; 818 819 mutex_enter(&ds->ds_lock); 820 old = ds->ds_user_ptr; 821 if (old == NULL) { 822 ds->ds_user_ptr = p; 823 ds->ds_user_evict_func = func; 824 } 825 mutex_exit(&ds->ds_lock); 826 return (old); 827 } 828 829 void * 830 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 831 { 832 return (ds->ds_user_ptr); 833 } 834 835 836 blkptr_t * 837 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 838 { 839 return (&ds->ds_phys->ds_bp); 840 } 841 842 void 843 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 844 { 845 ASSERT(dmu_tx_is_syncing(tx)); 846 /* If it's the meta-objset, set dp_meta_rootbp */ 847 if (ds == NULL) { 848 tx->tx_pool->dp_meta_rootbp = *bp; 849 } else { 850 dmu_buf_will_dirty(ds->ds_dbuf, tx); 851 ds->ds_phys->ds_bp = *bp; 852 } 853 } 854 855 spa_t * 856 dsl_dataset_get_spa(dsl_dataset_t *ds) 857 { 858 return (ds->ds_dir->dd_pool->dp_spa); 859 } 860 861 void 862 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 863 { 864 dsl_pool_t *dp; 865 866 if (ds == NULL) /* this is the meta-objset */ 867 return; 868 869 ASSERT(ds->ds_user_ptr != NULL); 870 871 if (ds->ds_phys->ds_next_snap_obj != 0) 872 panic("dirtying snapshot!"); 873 874 dp = ds->ds_dir->dd_pool; 875 876 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 877 /* up the hold count until we can be written out */ 878 dmu_buf_add_ref(ds->ds_dbuf, ds); 879 } 880 } 881 882 struct killarg { 883 uint64_t *usedp; 884 uint64_t *compressedp; 885 uint64_t *uncompressedp; 886 zio_t *zio; 887 dmu_tx_t *tx; 888 }; 889 890 static int 891 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg) 892 { 893 struct killarg *ka = arg; 894 blkptr_t *bp = &bc->bc_blkptr; 895 896 ASSERT3U(bc->bc_errno, ==, 0); 897 898 /* 899 * Since this callback is not called concurrently, no lock is 900 * needed on the accounting values. 901 */ 902 *ka->usedp += bp_get_dasize(spa, bp); 903 *ka->compressedp += BP_GET_PSIZE(bp); 904 *ka->uncompressedp += BP_GET_UCSIZE(bp); 905 /* XXX check for EIO? */ 906 (void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL, 907 ARC_NOWAIT); 908 return (0); 909 } 910 911 /* ARGSUSED */ 912 static int 913 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) 914 { 915 dsl_dataset_t *ds = arg1; 916 917 /* 918 * There must be a previous snapshot. I suppose we could roll 919 * it back to being empty (and re-initialize the upper (ZPL) 920 * layer). But for now there's no way to do this via the user 921 * interface. 922 */ 923 if (ds->ds_phys->ds_prev_snap_txg == 0) 924 return (EINVAL); 925 926 /* 927 * This must not be a snapshot. 928 */ 929 if (ds->ds_phys->ds_next_snap_obj != 0) 930 return (EINVAL); 931 932 /* 933 * If we made changes this txg, traverse_dsl_dataset won't find 934 * them. Try again. 935 */ 936 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 937 return (EAGAIN); 938 939 return (0); 940 } 941 942 /* ARGSUSED */ 943 static void 944 dsl_dataset_rollback_sync(void *arg1, void *arg2, dmu_tx_t *tx) 945 { 946 dsl_dataset_t *ds = arg1; 947 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 948 949 dmu_buf_will_dirty(ds->ds_dbuf, tx); 950 951 /* Zero out the deadlist. */ 952 bplist_close(&ds->ds_deadlist); 953 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 954 ds->ds_phys->ds_deadlist_obj = 955 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 956 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 957 ds->ds_phys->ds_deadlist_obj)); 958 959 { 960 /* Free blkptrs that we gave birth to */ 961 zio_t *zio; 962 uint64_t used = 0, compressed = 0, uncompressed = 0; 963 struct killarg ka; 964 965 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 966 ZIO_FLAG_MUSTSUCCEED); 967 ka.usedp = &used; 968 ka.compressedp = &compressed; 969 ka.uncompressedp = &uncompressed; 970 ka.zio = zio; 971 ka.tx = tx; 972 (void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 973 ADVANCE_POST, kill_blkptr, &ka); 974 (void) zio_wait(zio); 975 976 dsl_dir_diduse_space(ds->ds_dir, 977 -used, -compressed, -uncompressed, tx); 978 } 979 980 /* Change our contents to that of the prev snapshot */ 981 ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj); 982 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 983 ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes; 984 ds->ds_phys->ds_compressed_bytes = 985 ds->ds_prev->ds_phys->ds_compressed_bytes; 986 ds->ds_phys->ds_uncompressed_bytes = 987 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 988 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 989 ds->ds_phys->ds_unique_bytes = 0; 990 991 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 992 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 993 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 994 } 995 } 996 997 /* ARGSUSED */ 998 static int 999 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1000 { 1001 dsl_dataset_t *ds = arg1; 1002 1003 /* 1004 * Can't delete a head dataset if there are snapshots of it. 1005 * (Except if the only snapshots are from the branch we cloned 1006 * from.) 1007 */ 1008 if (ds->ds_prev != NULL && 1009 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1010 return (EINVAL); 1011 1012 return (0); 1013 } 1014 1015 /* ARGSUSED */ 1016 static void 1017 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1018 { 1019 dsl_dataset_t *ds = arg1; 1020 1021 /* Mark it as inconsistent on-disk, in case we crash */ 1022 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1023 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1024 } 1025 1026 /* ARGSUSED */ 1027 static int 1028 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1029 { 1030 dsl_dataset_t *ds = arg1; 1031 1032 /* Can't delete a branch point. */ 1033 if (ds->ds_phys->ds_num_children > 1) 1034 return (EEXIST); 1035 1036 /* 1037 * Can't delete a head dataset if there are snapshots of it. 1038 * (Except if the only snapshots are from the branch we cloned 1039 * from.) 1040 */ 1041 if (ds->ds_prev != NULL && 1042 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1043 return (EINVAL); 1044 1045 /* 1046 * If we made changes this txg, traverse_dsl_dataset won't find 1047 * them. Try again. 1048 */ 1049 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1050 return (EAGAIN); 1051 1052 /* XXX we should do some i/o error checking... */ 1053 return (0); 1054 } 1055 1056 static void 1057 dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) 1058 { 1059 dsl_dataset_t *ds = arg1; 1060 uint64_t used = 0, compressed = 0, uncompressed = 0; 1061 zio_t *zio; 1062 int err; 1063 int after_branch_point = FALSE; 1064 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1065 objset_t *mos = dp->dp_meta_objset; 1066 dsl_dataset_t *ds_prev = NULL; 1067 uint64_t obj; 1068 1069 ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX); 1070 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 1071 ASSERT(ds->ds_prev == NULL || 1072 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1073 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1074 1075 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1076 1077 obj = ds->ds_object; 1078 1079 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1080 if (ds->ds_prev) { 1081 ds_prev = ds->ds_prev; 1082 } else { 1083 VERIFY(0 == dsl_dataset_open_obj(dp, 1084 ds->ds_phys->ds_prev_snap_obj, NULL, 1085 DS_MODE_NONE, FTAG, &ds_prev)); 1086 } 1087 after_branch_point = 1088 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1089 1090 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1091 if (after_branch_point && 1092 ds->ds_phys->ds_next_snap_obj == 0) { 1093 /* This clone is toast. */ 1094 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1095 ds_prev->ds_phys->ds_num_children--; 1096 } else if (!after_branch_point) { 1097 ds_prev->ds_phys->ds_next_snap_obj = 1098 ds->ds_phys->ds_next_snap_obj; 1099 } 1100 } 1101 1102 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1103 1104 if (ds->ds_phys->ds_next_snap_obj != 0) { 1105 blkptr_t bp; 1106 dsl_dataset_t *ds_next; 1107 uint64_t itor = 0; 1108 1109 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1110 1111 VERIFY(0 == dsl_dataset_open_obj(dp, 1112 ds->ds_phys->ds_next_snap_obj, NULL, 1113 DS_MODE_NONE, FTAG, &ds_next)); 1114 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1115 1116 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1117 ds_next->ds_phys->ds_prev_snap_obj = 1118 ds->ds_phys->ds_prev_snap_obj; 1119 ds_next->ds_phys->ds_prev_snap_txg = 1120 ds->ds_phys->ds_prev_snap_txg; 1121 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1122 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1123 1124 /* 1125 * Transfer to our deadlist (which will become next's 1126 * new deadlist) any entries from next's current 1127 * deadlist which were born before prev, and free the 1128 * other entries. 1129 * 1130 * XXX we're doing this long task with the config lock held 1131 */ 1132 while (bplist_iterate(&ds_next->ds_deadlist, &itor, 1133 &bp) == 0) { 1134 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1135 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1136 &bp, tx)); 1137 if (ds_prev && !after_branch_point && 1138 bp.blk_birth > 1139 ds_prev->ds_phys->ds_prev_snap_txg) { 1140 ds_prev->ds_phys->ds_unique_bytes += 1141 bp_get_dasize(dp->dp_spa, &bp); 1142 } 1143 } else { 1144 used += bp_get_dasize(dp->dp_spa, &bp); 1145 compressed += BP_GET_PSIZE(&bp); 1146 uncompressed += BP_GET_UCSIZE(&bp); 1147 /* XXX check return value? */ 1148 (void) arc_free(zio, dp->dp_spa, tx->tx_txg, 1149 &bp, NULL, NULL, ARC_NOWAIT); 1150 } 1151 } 1152 1153 /* free next's deadlist */ 1154 bplist_close(&ds_next->ds_deadlist); 1155 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1156 1157 /* set next's deadlist to our deadlist */ 1158 ds_next->ds_phys->ds_deadlist_obj = 1159 ds->ds_phys->ds_deadlist_obj; 1160 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1161 ds_next->ds_phys->ds_deadlist_obj)); 1162 ds->ds_phys->ds_deadlist_obj = 0; 1163 1164 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1165 /* 1166 * Update next's unique to include blocks which 1167 * were previously shared by only this snapshot 1168 * and it. Those blocks will be born after the 1169 * prev snap and before this snap, and will have 1170 * died after the next snap and before the one 1171 * after that (ie. be on the snap after next's 1172 * deadlist). 1173 * 1174 * XXX we're doing this long task with the 1175 * config lock held 1176 */ 1177 dsl_dataset_t *ds_after_next; 1178 1179 VERIFY(0 == dsl_dataset_open_obj(dp, 1180 ds_next->ds_phys->ds_next_snap_obj, NULL, 1181 DS_MODE_NONE, FTAG, &ds_after_next)); 1182 itor = 0; 1183 while (bplist_iterate(&ds_after_next->ds_deadlist, 1184 &itor, &bp) == 0) { 1185 if (bp.blk_birth > 1186 ds->ds_phys->ds_prev_snap_txg && 1187 bp.blk_birth <= 1188 ds->ds_phys->ds_creation_txg) { 1189 ds_next->ds_phys->ds_unique_bytes += 1190 bp_get_dasize(dp->dp_spa, &bp); 1191 } 1192 } 1193 1194 dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG); 1195 ASSERT3P(ds_next->ds_prev, ==, NULL); 1196 } else { 1197 /* 1198 * It would be nice to update the head dataset's 1199 * unique. To do so we would have to traverse 1200 * it for blocks born after ds_prev, which is 1201 * pretty expensive just to maintain something 1202 * for debugging purposes. 1203 */ 1204 ASSERT3P(ds_next->ds_prev, ==, ds); 1205 dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE, 1206 ds_next); 1207 if (ds_prev) { 1208 VERIFY(0 == dsl_dataset_open_obj(dp, 1209 ds->ds_phys->ds_prev_snap_obj, NULL, 1210 DS_MODE_NONE, ds_next, &ds_next->ds_prev)); 1211 } else { 1212 ds_next->ds_prev = NULL; 1213 } 1214 } 1215 dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG); 1216 1217 /* 1218 * NB: unique_bytes is not accurate for head objsets 1219 * because we don't update it when we delete the most 1220 * recent snapshot -- see above comment. 1221 */ 1222 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1223 } else { 1224 /* 1225 * There's no next snapshot, so this is a head dataset. 1226 * Destroy the deadlist. Unless it's a clone, the 1227 * deadlist should be empty. (If it's a clone, it's 1228 * safe to ignore the deadlist contents.) 1229 */ 1230 struct killarg ka; 1231 1232 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1233 bplist_close(&ds->ds_deadlist); 1234 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1235 ds->ds_phys->ds_deadlist_obj = 0; 1236 1237 /* 1238 * Free everything that we point to (that's born after 1239 * the previous snapshot, if we are a clone) 1240 * 1241 * XXX we're doing this long task with the config lock held 1242 */ 1243 ka.usedp = &used; 1244 ka.compressedp = &compressed; 1245 ka.uncompressedp = &uncompressed; 1246 ka.zio = zio; 1247 ka.tx = tx; 1248 err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1249 ADVANCE_POST, kill_blkptr, &ka); 1250 ASSERT3U(err, ==, 0); 1251 } 1252 1253 err = zio_wait(zio); 1254 ASSERT3U(err, ==, 0); 1255 1256 dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx); 1257 1258 if (ds->ds_phys->ds_snapnames_zapobj) { 1259 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1260 ASSERT(err == 0); 1261 } 1262 1263 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1264 /* Erase the link in the dataset */ 1265 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1266 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1267 /* 1268 * dsl_dir_sync_destroy() called us, they'll destroy 1269 * the dataset. 1270 */ 1271 } else { 1272 /* remove from snapshot namespace */ 1273 dsl_dataset_t *ds_head; 1274 VERIFY(0 == dsl_dataset_open_obj(dp, 1275 ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL, 1276 DS_MODE_NONE, FTAG, &ds_head)); 1277 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1278 #ifdef ZFS_DEBUG 1279 { 1280 uint64_t val; 1281 err = zap_lookup(mos, 1282 ds_head->ds_phys->ds_snapnames_zapobj, 1283 ds->ds_snapname, 8, 1, &val); 1284 ASSERT3U(err, ==, 0); 1285 ASSERT3U(val, ==, obj); 1286 } 1287 #endif 1288 err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj, 1289 ds->ds_snapname, tx); 1290 ASSERT(err == 0); 1291 dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG); 1292 } 1293 1294 if (ds_prev && ds->ds_prev != ds_prev) 1295 dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG); 1296 1297 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag); 1298 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1299 } 1300 1301 /* ARGSUSED */ 1302 int 1303 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1304 { 1305 objset_t *os = arg1; 1306 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1307 const char *snapname = arg2; 1308 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1309 int err; 1310 uint64_t value; 1311 1312 /* 1313 * We don't allow multiple snapshots of the same txg. If there 1314 * is already one, try again. 1315 */ 1316 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1317 return (EAGAIN); 1318 1319 /* 1320 * Check for conflicting name snapshot name. 1321 */ 1322 err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj, 1323 snapname, 8, 1, &value); 1324 if (err == 0) 1325 return (EEXIST); 1326 if (err != ENOENT) 1327 return (err); 1328 1329 ds->ds_trysnap_txg = tx->tx_txg; 1330 return (0); 1331 } 1332 1333 void 1334 dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1335 { 1336 objset_t *os = arg1; 1337 dsl_dataset_t *ds = os->os->os_dsl_dataset; 1338 const char *snapname = arg2; 1339 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1340 dmu_buf_t *dbuf; 1341 dsl_dataset_phys_t *dsphys; 1342 uint64_t dsobj; 1343 objset_t *mos = dp->dp_meta_objset; 1344 int err; 1345 1346 spa_scrub_restart(dp->dp_spa, tx->tx_txg); 1347 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1348 1349 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1350 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1351 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1352 dmu_buf_will_dirty(dbuf, tx); 1353 dsphys = dbuf->db_data; 1354 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1355 dsphys->ds_fsid_guid = unique_create(); 1356 unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */ 1357 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1358 sizeof (dsphys->ds_guid)); 1359 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1360 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1361 dsphys->ds_next_snap_obj = ds->ds_object; 1362 dsphys->ds_num_children = 1; 1363 dsphys->ds_creation_time = gethrestime_sec(); 1364 dsphys->ds_creation_txg = tx->tx_txg; 1365 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1366 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1367 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1368 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1369 dsphys->ds_flags = ds->ds_phys->ds_flags; 1370 dsphys->ds_bp = ds->ds_phys->ds_bp; 1371 dmu_buf_rele(dbuf, FTAG); 1372 1373 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1374 if (ds->ds_prev) { 1375 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1376 ds->ds_object || 1377 ds->ds_prev->ds_phys->ds_num_children > 1); 1378 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1379 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1380 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1381 ds->ds_prev->ds_phys->ds_creation_txg); 1382 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1383 } 1384 } 1385 1386 bplist_close(&ds->ds_deadlist); 1387 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1388 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg); 1389 ds->ds_phys->ds_prev_snap_obj = dsobj; 1390 ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg; 1391 ds->ds_phys->ds_unique_bytes = 0; 1392 ds->ds_phys->ds_deadlist_obj = 1393 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1394 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1395 ds->ds_phys->ds_deadlist_obj)); 1396 1397 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1398 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1399 snapname, 8, 1, &dsobj, tx); 1400 ASSERT(err == 0); 1401 1402 if (ds->ds_prev) 1403 dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds); 1404 VERIFY(0 == dsl_dataset_open_obj(dp, 1405 ds->ds_phys->ds_prev_snap_obj, snapname, 1406 DS_MODE_NONE, ds, &ds->ds_prev)); 1407 } 1408 1409 void 1410 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1411 { 1412 ASSERT(dmu_tx_is_syncing(tx)); 1413 ASSERT(ds->ds_user_ptr != NULL); 1414 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1415 1416 dsl_dir_dirty(ds->ds_dir, tx); 1417 dmu_objset_sync(ds->ds_user_ptr, zio, tx); 1418 /* Unneeded? bplist_close(&ds->ds_deadlist); */ 1419 } 1420 1421 void 1422 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1423 { 1424 dsl_dir_stats(ds->ds_dir, nv); 1425 1426 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1427 ds->ds_phys->ds_creation_time); 1428 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1429 ds->ds_phys->ds_creation_txg); 1430 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, 1431 ds->ds_phys->ds_used_bytes); 1432 1433 if (ds->ds_phys->ds_next_snap_obj) { 1434 /* 1435 * This is a snapshot; override the dd's space used with 1436 * our unique space and compression ratio. 1437 */ 1438 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1439 ds->ds_phys->ds_unique_bytes); 1440 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 1441 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1442 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1443 ds->ds_phys->ds_compressed_bytes)); 1444 } 1445 } 1446 1447 void 1448 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1449 { 1450 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1451 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1452 if (ds->ds_phys->ds_next_snap_obj) { 1453 stat->dds_is_snapshot = B_TRUE; 1454 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1455 } 1456 1457 /* clone origin is really a dsl_dir thing... */ 1458 if (ds->ds_dir->dd_phys->dd_clone_parent_obj) { 1459 dsl_dataset_t *ods; 1460 1461 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 1462 VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool, 1463 ds->ds_dir->dd_phys->dd_clone_parent_obj, 1464 NULL, DS_MODE_NONE, FTAG, &ods)); 1465 dsl_dataset_name(ods, stat->dds_clone_of); 1466 dsl_dataset_close(ods, DS_MODE_NONE, FTAG); 1467 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 1468 } 1469 } 1470 1471 uint64_t 1472 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1473 { 1474 return (ds->ds_phys->ds_fsid_guid); 1475 } 1476 1477 void 1478 dsl_dataset_space(dsl_dataset_t *ds, 1479 uint64_t *refdbytesp, uint64_t *availbytesp, 1480 uint64_t *usedobjsp, uint64_t *availobjsp) 1481 { 1482 *refdbytesp = ds->ds_phys->ds_used_bytes; 1483 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1484 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 1485 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1486 } 1487 1488 /* ARGSUSED */ 1489 static int 1490 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 1491 { 1492 dsl_dataset_t *ds = arg1; 1493 char *newsnapname = arg2; 1494 dsl_dir_t *dd = ds->ds_dir; 1495 objset_t *mos = dd->dd_pool->dp_meta_objset; 1496 dsl_dataset_t *hds; 1497 uint64_t val; 1498 int err; 1499 1500 err = dsl_dataset_open_obj(dd->dd_pool, 1501 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds); 1502 if (err) 1503 return (err); 1504 1505 /* new name better not be in use */ 1506 err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj, 1507 newsnapname, 8, 1, &val); 1508 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1509 1510 if (err == 0) 1511 err = EEXIST; 1512 else if (err == ENOENT) 1513 err = 0; 1514 return (err); 1515 } 1516 1517 static void 1518 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1519 { 1520 dsl_dataset_t *ds = arg1; 1521 char *newsnapname = arg2; 1522 dsl_dir_t *dd = ds->ds_dir; 1523 objset_t *mos = dd->dd_pool->dp_meta_objset; 1524 dsl_dataset_t *hds; 1525 int err; 1526 1527 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 1528 1529 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 1530 dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds)); 1531 1532 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1533 err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj, 1534 ds->ds_snapname, tx); 1535 ASSERT3U(err, ==, 0); 1536 mutex_enter(&ds->ds_lock); 1537 (void) strcpy(ds->ds_snapname, newsnapname); 1538 mutex_exit(&ds->ds_lock); 1539 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 1540 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 1541 ASSERT3U(err, ==, 0); 1542 1543 dsl_dataset_close(hds, DS_MODE_NONE, FTAG); 1544 } 1545 1546 #pragma weak dmu_objset_rename = dsl_dataset_rename 1547 int 1548 dsl_dataset_rename(const char *oldname, const char *newname) 1549 { 1550 dsl_dir_t *dd; 1551 dsl_dataset_t *ds; 1552 const char *tail; 1553 int err; 1554 1555 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 1556 if (err) 1557 return (err); 1558 if (tail == NULL) { 1559 err = dsl_dir_rename(dd, newname); 1560 dsl_dir_close(dd, FTAG); 1561 return (err); 1562 } 1563 if (tail[0] != '@') { 1564 /* the name ended in a nonexistant component */ 1565 dsl_dir_close(dd, FTAG); 1566 return (ENOENT); 1567 } 1568 1569 dsl_dir_close(dd, FTAG); 1570 1571 /* new name must be snapshot in same filesystem */ 1572 tail = strchr(newname, '@'); 1573 if (tail == NULL) 1574 return (EINVAL); 1575 tail++; 1576 if (strncmp(oldname, newname, tail - newname) != 0) 1577 return (EXDEV); 1578 1579 err = dsl_dataset_open(oldname, 1580 DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds); 1581 if (err) 1582 return (err); 1583 1584 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1585 dsl_dataset_snapshot_rename_check, 1586 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 1587 1588 dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG); 1589 1590 return (err); 1591 } 1592 1593 struct promotearg { 1594 uint64_t used, comp, uncomp, unique; 1595 uint64_t newnext_obj, snapnames_obj; 1596 }; 1597 1598 static int 1599 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 1600 { 1601 dsl_dataset_t *hds = arg1; 1602 struct promotearg *pa = arg2; 1603 dsl_dir_t *dd = hds->ds_dir; 1604 dsl_pool_t *dp = hds->ds_dir->dd_pool; 1605 dsl_dir_t *pdd = NULL; 1606 dsl_dataset_t *ds = NULL; 1607 dsl_dataset_t *pivot_ds = NULL; 1608 dsl_dataset_t *newnext_ds = NULL; 1609 int err; 1610 char *name = NULL; 1611 uint64_t itor = 0; 1612 blkptr_t bp; 1613 1614 bzero(pa, sizeof (*pa)); 1615 1616 /* Check that it is a clone */ 1617 if (dd->dd_phys->dd_clone_parent_obj == 0) 1618 return (EINVAL); 1619 1620 /* Since this is so expensive, don't do the preliminary check */ 1621 if (!dmu_tx_is_syncing(tx)) 1622 return (0); 1623 1624 if (err = dsl_dataset_open_obj(dp, 1625 dd->dd_phys->dd_clone_parent_obj, 1626 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)) 1627 goto out; 1628 pdd = pivot_ds->ds_dir; 1629 1630 { 1631 dsl_dataset_t *phds; 1632 if (err = dsl_dataset_open_obj(dd->dd_pool, 1633 pdd->dd_phys->dd_head_dataset_obj, 1634 NULL, DS_MODE_NONE, FTAG, &phds)) 1635 goto out; 1636 pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj; 1637 dsl_dataset_close(phds, DS_MODE_NONE, FTAG); 1638 } 1639 1640 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 1641 err = EXDEV; 1642 goto out; 1643 } 1644 1645 /* find pivot point's new next ds */ 1646 VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object, 1647 NULL, DS_MODE_NONE, FTAG, &newnext_ds)); 1648 while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) { 1649 dsl_dataset_t *prev; 1650 1651 if (err = dsl_dataset_open_obj(dd->dd_pool, 1652 newnext_ds->ds_phys->ds_prev_snap_obj, 1653 NULL, DS_MODE_NONE, FTAG, &prev)) 1654 goto out; 1655 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1656 newnext_ds = prev; 1657 } 1658 pa->newnext_obj = newnext_ds->ds_object; 1659 1660 /* compute pivot point's new unique space */ 1661 while ((err = bplist_iterate(&newnext_ds->ds_deadlist, 1662 &itor, &bp)) == 0) { 1663 if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg) 1664 pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp); 1665 } 1666 if (err != ENOENT) 1667 goto out; 1668 1669 /* Walk the snapshots that we are moving */ 1670 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1671 ds = pivot_ds; 1672 /* CONSTCOND */ 1673 while (TRUE) { 1674 uint64_t val, dlused, dlcomp, dluncomp; 1675 dsl_dataset_t *prev; 1676 1677 /* Check that the snapshot name does not conflict */ 1678 dsl_dataset_name(ds, name); 1679 err = zap_lookup(dd->dd_pool->dp_meta_objset, 1680 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1681 8, 1, &val); 1682 if (err != ENOENT) { 1683 if (err == 0) 1684 err = EEXIST; 1685 goto out; 1686 } 1687 1688 /* 1689 * compute space to transfer. Each snapshot gave birth to: 1690 * (my used) - (prev's used) + (deadlist's used) 1691 */ 1692 pa->used += ds->ds_phys->ds_used_bytes; 1693 pa->comp += ds->ds_phys->ds_compressed_bytes; 1694 pa->uncomp += ds->ds_phys->ds_uncompressed_bytes; 1695 1696 /* If we reach the first snapshot, we're done. */ 1697 if (ds->ds_phys->ds_prev_snap_obj == 0) 1698 break; 1699 1700 if (err = bplist_space(&ds->ds_deadlist, 1701 &dlused, &dlcomp, &dluncomp)) 1702 goto out; 1703 if (err = dsl_dataset_open_obj(dd->dd_pool, 1704 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1705 FTAG, &prev)) 1706 goto out; 1707 pa->used += dlused - prev->ds_phys->ds_used_bytes; 1708 pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes; 1709 pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes; 1710 1711 /* 1712 * We could be a clone of a clone. If we reach our 1713 * parent's branch point, we're done. 1714 */ 1715 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1716 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1717 break; 1718 } 1719 if (ds != pivot_ds) 1720 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1721 ds = prev; 1722 } 1723 1724 /* Check that there is enough space here */ 1725 err = dsl_dir_transfer_possible(pdd, dd, pa->used); 1726 1727 out: 1728 if (ds && ds != pivot_ds) 1729 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1730 if (pivot_ds) 1731 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 1732 if (newnext_ds) 1733 dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG); 1734 if (name) 1735 kmem_free(name, MAXPATHLEN); 1736 return (err); 1737 } 1738 1739 static void 1740 dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx) 1741 { 1742 dsl_dataset_t *hds = arg1; 1743 struct promotearg *pa = arg2; 1744 dsl_dir_t *dd = hds->ds_dir; 1745 dsl_pool_t *dp = hds->ds_dir->dd_pool; 1746 dsl_dir_t *pdd = NULL; 1747 dsl_dataset_t *ds, *pivot_ds; 1748 char *name; 1749 1750 ASSERT(dd->dd_phys->dd_clone_parent_obj != 0); 1751 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 1752 1753 VERIFY(0 == dsl_dataset_open_obj(dp, 1754 dd->dd_phys->dd_clone_parent_obj, 1755 NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds)); 1756 /* 1757 * We need to explicitly open pdd, since pivot_ds's pdd will be 1758 * changing. 1759 */ 1760 VERIFY(0 == dsl_dir_open_obj(dp, pivot_ds->ds_dir->dd_object, 1761 NULL, FTAG, &pdd)); 1762 1763 /* move snapshots to this dir */ 1764 name = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1765 ds = pivot_ds; 1766 /* CONSTCOND */ 1767 while (TRUE) { 1768 dsl_dataset_t *prev; 1769 1770 /* move snap name entry */ 1771 dsl_dataset_name(ds, name); 1772 VERIFY(0 == zap_remove(dp->dp_meta_objset, 1773 pa->snapnames_obj, ds->ds_snapname, tx)); 1774 VERIFY(0 == zap_add(dp->dp_meta_objset, 1775 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 1776 8, 1, &ds->ds_object, tx)); 1777 1778 /* change containing dsl_dir */ 1779 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1780 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object); 1781 ds->ds_phys->ds_dir_obj = dd->dd_object; 1782 ASSERT3P(ds->ds_dir, ==, pdd); 1783 dsl_dir_close(ds->ds_dir, ds); 1784 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 1785 NULL, ds, &ds->ds_dir)); 1786 1787 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 1788 1789 if (ds->ds_phys->ds_prev_snap_obj == 0) 1790 break; 1791 1792 VERIFY(0 == dsl_dataset_open_obj(dp, 1793 ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE, 1794 FTAG, &prev)); 1795 1796 if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) { 1797 dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG); 1798 break; 1799 } 1800 if (ds != pivot_ds) 1801 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1802 ds = prev; 1803 } 1804 if (ds != pivot_ds) 1805 dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG); 1806 1807 /* change pivot point's next snap */ 1808 dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx); 1809 pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj; 1810 1811 /* change clone_parent-age */ 1812 dmu_buf_will_dirty(dd->dd_dbuf, tx); 1813 ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object); 1814 dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj; 1815 dmu_buf_will_dirty(pdd->dd_dbuf, tx); 1816 pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object; 1817 1818 /* change space accounting */ 1819 dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx); 1820 dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx); 1821 pivot_ds->ds_phys->ds_unique_bytes = pa->unique; 1822 1823 dsl_dir_close(pdd, FTAG); 1824 dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG); 1825 kmem_free(name, MAXPATHLEN); 1826 } 1827 1828 int 1829 dsl_dataset_promote(const char *name) 1830 { 1831 dsl_dataset_t *ds; 1832 int err; 1833 dmu_object_info_t doi; 1834 struct promotearg pa; 1835 1836 err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds); 1837 if (err) 1838 return (err); 1839 1840 err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset, 1841 ds->ds_phys->ds_snapnames_zapobj, &doi); 1842 if (err) { 1843 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1844 return (err); 1845 } 1846 1847 /* 1848 * Add in 128x the snapnames zapobj size, since we will be moving 1849 * a bunch of snapnames to the promoted ds, and dirtying their 1850 * bonus buffers. 1851 */ 1852 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1853 dsl_dataset_promote_check, 1854 dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks); 1855 dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 1856 return (err); 1857 } 1858