1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2013 by Delphix. All rights reserved. 24 * Copyright (c) 2012, Joyent, Inc. All rights reserved. 25 */ 26 27 #include <sys/dmu_objset.h> 28 #include <sys/dsl_dataset.h> 29 #include <sys/dsl_dir.h> 30 #include <sys/dsl_prop.h> 31 #include <sys/dsl_synctask.h> 32 #include <sys/dmu_traverse.h> 33 #include <sys/dmu_impl.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/arc.h> 36 #include <sys/zio.h> 37 #include <sys/zap.h> 38 #include <sys/zfeature.h> 39 #include <sys/unique.h> 40 #include <sys/zfs_context.h> 41 #include <sys/zfs_ioctl.h> 42 #include <sys/spa.h> 43 #include <sys/zfs_znode.h> 44 #include <sys/zfs_onexit.h> 45 #include <sys/zvol.h> 46 #include <sys/dsl_scan.h> 47 #include <sys/dsl_deadlist.h> 48 #include <sys/dsl_destroy.h> 49 #include <sys/dsl_userhold.h> 50 51 #define SWITCH64(x, y) \ 52 { \ 53 uint64_t __tmp = (x); \ 54 (x) = (y); \ 55 (y) = __tmp; \ 56 } 57 58 #define DS_REF_MAX (1ULL << 62) 59 60 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 61 62 /* 63 * Figure out how much of this delta should be propogated to the dsl_dir 64 * layer. If there's a refreservation, that space has already been 65 * partially accounted for in our ancestors. 66 */ 67 static int64_t 68 parent_delta(dsl_dataset_t *ds, int64_t delta) 69 { 70 uint64_t old_bytes, new_bytes; 71 72 if (ds->ds_reserved == 0) 73 return (delta); 74 75 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 76 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 77 78 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 79 return (new_bytes - old_bytes); 80 } 81 82 void 83 dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) 84 { 85 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 86 int compressed = BP_GET_PSIZE(bp); 87 int uncompressed = BP_GET_UCSIZE(bp); 88 int64_t delta; 89 90 dprintf_bp(bp, "ds=%p", ds); 91 92 ASSERT(dmu_tx_is_syncing(tx)); 93 /* It could have been compressed away to nothing */ 94 if (BP_IS_HOLE(bp)) 95 return; 96 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 97 ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp))); 98 if (ds == NULL) { 99 dsl_pool_mos_diduse_space(tx->tx_pool, 100 used, compressed, uncompressed); 101 return; 102 } 103 104 dmu_buf_will_dirty(ds->ds_dbuf, tx); 105 mutex_enter(&ds->ds_lock); 106 delta = parent_delta(ds, used); 107 ds->ds_phys->ds_referenced_bytes += used; 108 ds->ds_phys->ds_compressed_bytes += compressed; 109 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 110 ds->ds_phys->ds_unique_bytes += used; 111 mutex_exit(&ds->ds_lock); 112 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 113 compressed, uncompressed, tx); 114 dsl_dir_transfer_space(ds->ds_dir, used - delta, 115 DD_USED_REFRSRV, DD_USED_HEAD, tx); 116 } 117 118 int 119 dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, 120 boolean_t async) 121 { 122 if (BP_IS_HOLE(bp)) 123 return (0); 124 125 ASSERT(dmu_tx_is_syncing(tx)); 126 ASSERT(bp->blk_birth <= tx->tx_txg); 127 128 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 129 int compressed = BP_GET_PSIZE(bp); 130 int uncompressed = BP_GET_UCSIZE(bp); 131 132 ASSERT(used > 0); 133 if (ds == NULL) { 134 dsl_free(tx->tx_pool, tx->tx_txg, bp); 135 dsl_pool_mos_diduse_space(tx->tx_pool, 136 -used, -compressed, -uncompressed); 137 return (used); 138 } 139 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 140 141 ASSERT(!dsl_dataset_is_snapshot(ds)); 142 dmu_buf_will_dirty(ds->ds_dbuf, tx); 143 144 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 145 int64_t delta; 146 147 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); 148 dsl_free(tx->tx_pool, tx->tx_txg, bp); 149 150 mutex_enter(&ds->ds_lock); 151 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 152 !DS_UNIQUE_IS_ACCURATE(ds)); 153 delta = parent_delta(ds, -used); 154 ds->ds_phys->ds_unique_bytes -= used; 155 mutex_exit(&ds->ds_lock); 156 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 157 delta, -compressed, -uncompressed, tx); 158 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 159 DD_USED_REFRSRV, DD_USED_HEAD, tx); 160 } else { 161 dprintf_bp(bp, "putting on dead list: %s", ""); 162 if (async) { 163 /* 164 * We are here as part of zio's write done callback, 165 * which means we're a zio interrupt thread. We can't 166 * call dsl_deadlist_insert() now because it may block 167 * waiting for I/O. Instead, put bp on the deferred 168 * queue and let dsl_pool_sync() finish the job. 169 */ 170 bplist_append(&ds->ds_pending_deadlist, bp); 171 } else { 172 dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); 173 } 174 ASSERT3U(ds->ds_prev->ds_object, ==, 175 ds->ds_phys->ds_prev_snap_obj); 176 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 177 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 178 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 179 ds->ds_object && bp->blk_birth > 180 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 181 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 182 mutex_enter(&ds->ds_prev->ds_lock); 183 ds->ds_prev->ds_phys->ds_unique_bytes += used; 184 mutex_exit(&ds->ds_prev->ds_lock); 185 } 186 if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { 187 dsl_dir_transfer_space(ds->ds_dir, used, 188 DD_USED_HEAD, DD_USED_SNAP, tx); 189 } 190 } 191 mutex_enter(&ds->ds_lock); 192 ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used); 193 ds->ds_phys->ds_referenced_bytes -= used; 194 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 195 ds->ds_phys->ds_compressed_bytes -= compressed; 196 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 197 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 198 mutex_exit(&ds->ds_lock); 199 200 return (used); 201 } 202 203 uint64_t 204 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 205 { 206 uint64_t trysnap = 0; 207 208 if (ds == NULL) 209 return (0); 210 /* 211 * The snapshot creation could fail, but that would cause an 212 * incorrect FALSE return, which would only result in an 213 * overestimation of the amount of space that an operation would 214 * consume, which is OK. 215 * 216 * There's also a small window where we could miss a pending 217 * snapshot, because we could set the sync task in the quiescing 218 * phase. So this should only be used as a guess. 219 */ 220 if (ds->ds_trysnap_txg > 221 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 222 trysnap = ds->ds_trysnap_txg; 223 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 224 } 225 226 boolean_t 227 dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, 228 uint64_t blk_birth) 229 { 230 if (blk_birth <= dsl_dataset_prev_snap_txg(ds)) 231 return (B_FALSE); 232 233 ddt_prefetch(dsl_dataset_get_spa(ds), bp); 234 235 return (B_TRUE); 236 } 237 238 /* ARGSUSED */ 239 static void 240 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 241 { 242 dsl_dataset_t *ds = dsv; 243 244 ASSERT(ds->ds_owner == NULL); 245 246 unique_remove(ds->ds_fsid_guid); 247 248 if (ds->ds_objset != NULL) 249 dmu_objset_evict(ds->ds_objset); 250 251 if (ds->ds_prev) { 252 dsl_dataset_rele(ds->ds_prev, ds); 253 ds->ds_prev = NULL; 254 } 255 256 bplist_destroy(&ds->ds_pending_deadlist); 257 if (ds->ds_phys->ds_deadlist_obj != 0) 258 dsl_deadlist_close(&ds->ds_deadlist); 259 if (ds->ds_dir) 260 dsl_dir_rele(ds->ds_dir, ds); 261 262 ASSERT(!list_link_active(&ds->ds_synced_link)); 263 264 mutex_destroy(&ds->ds_lock); 265 mutex_destroy(&ds->ds_opening_lock); 266 refcount_destroy(&ds->ds_longholds); 267 268 kmem_free(ds, sizeof (dsl_dataset_t)); 269 } 270 271 int 272 dsl_dataset_get_snapname(dsl_dataset_t *ds) 273 { 274 dsl_dataset_phys_t *headphys; 275 int err; 276 dmu_buf_t *headdbuf; 277 dsl_pool_t *dp = ds->ds_dir->dd_pool; 278 objset_t *mos = dp->dp_meta_objset; 279 280 if (ds->ds_snapname[0]) 281 return (0); 282 if (ds->ds_phys->ds_next_snap_obj == 0) 283 return (0); 284 285 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 286 FTAG, &headdbuf); 287 if (err != 0) 288 return (err); 289 headphys = headdbuf->db_data; 290 err = zap_value_search(dp->dp_meta_objset, 291 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 292 dmu_buf_rele(headdbuf, FTAG); 293 return (err); 294 } 295 296 int 297 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 298 { 299 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 300 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 301 matchtype_t mt; 302 int err; 303 304 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 305 mt = MT_FIRST; 306 else 307 mt = MT_EXACT; 308 309 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 310 value, mt, NULL, 0, NULL); 311 if (err == ENOTSUP && mt == MT_FIRST) 312 err = zap_lookup(mos, snapobj, name, 8, 1, value); 313 return (err); 314 } 315 316 int 317 dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx) 318 { 319 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 320 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 321 matchtype_t mt; 322 int err; 323 324 dsl_dir_snap_cmtime_update(ds->ds_dir); 325 326 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 327 mt = MT_FIRST; 328 else 329 mt = MT_EXACT; 330 331 err = zap_remove_norm(mos, snapobj, name, mt, tx); 332 if (err == ENOTSUP && mt == MT_FIRST) 333 err = zap_remove(mos, snapobj, name, tx); 334 return (err); 335 } 336 337 int 338 dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 339 dsl_dataset_t **dsp) 340 { 341 objset_t *mos = dp->dp_meta_objset; 342 dmu_buf_t *dbuf; 343 dsl_dataset_t *ds; 344 int err; 345 dmu_object_info_t doi; 346 347 ASSERT(dsl_pool_config_held(dp)); 348 349 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 350 if (err != 0) 351 return (err); 352 353 /* Make sure dsobj has the correct object type. */ 354 dmu_object_info_from_db(dbuf, &doi); 355 if (doi.doi_type != DMU_OT_DSL_DATASET) { 356 dmu_buf_rele(dbuf, tag); 357 return (SET_ERROR(EINVAL)); 358 } 359 360 ds = dmu_buf_get_user(dbuf); 361 if (ds == NULL) { 362 dsl_dataset_t *winner = NULL; 363 364 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 365 ds->ds_dbuf = dbuf; 366 ds->ds_object = dsobj; 367 ds->ds_phys = dbuf->db_data; 368 369 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 370 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 371 mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); 372 refcount_create(&ds->ds_longholds); 373 374 bplist_create(&ds->ds_pending_deadlist); 375 dsl_deadlist_open(&ds->ds_deadlist, 376 mos, ds->ds_phys->ds_deadlist_obj); 377 378 list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), 379 offsetof(dmu_sendarg_t, dsa_link)); 380 381 if (err == 0) { 382 err = dsl_dir_hold_obj(dp, 383 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 384 } 385 if (err != 0) { 386 mutex_destroy(&ds->ds_lock); 387 mutex_destroy(&ds->ds_opening_lock); 388 refcount_destroy(&ds->ds_longholds); 389 bplist_destroy(&ds->ds_pending_deadlist); 390 dsl_deadlist_close(&ds->ds_deadlist); 391 kmem_free(ds, sizeof (dsl_dataset_t)); 392 dmu_buf_rele(dbuf, tag); 393 return (err); 394 } 395 396 if (!dsl_dataset_is_snapshot(ds)) { 397 ds->ds_snapname[0] = '\0'; 398 if (ds->ds_phys->ds_prev_snap_obj != 0) { 399 err = dsl_dataset_hold_obj(dp, 400 ds->ds_phys->ds_prev_snap_obj, 401 ds, &ds->ds_prev); 402 } 403 } else { 404 if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 405 err = dsl_dataset_get_snapname(ds); 406 if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { 407 err = zap_count( 408 ds->ds_dir->dd_pool->dp_meta_objset, 409 ds->ds_phys->ds_userrefs_obj, 410 &ds->ds_userrefs); 411 } 412 } 413 414 if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 415 err = dsl_prop_get_int_ds(ds, 416 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 417 &ds->ds_reserved); 418 if (err == 0) { 419 err = dsl_prop_get_int_ds(ds, 420 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 421 &ds->ds_quota); 422 } 423 } else { 424 ds->ds_reserved = ds->ds_quota = 0; 425 } 426 427 if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds, 428 &ds->ds_phys, dsl_dataset_evict)) != NULL) { 429 bplist_destroy(&ds->ds_pending_deadlist); 430 dsl_deadlist_close(&ds->ds_deadlist); 431 if (ds->ds_prev) 432 dsl_dataset_rele(ds->ds_prev, ds); 433 dsl_dir_rele(ds->ds_dir, ds); 434 mutex_destroy(&ds->ds_lock); 435 mutex_destroy(&ds->ds_opening_lock); 436 refcount_destroy(&ds->ds_longholds); 437 kmem_free(ds, sizeof (dsl_dataset_t)); 438 if (err != 0) { 439 dmu_buf_rele(dbuf, tag); 440 return (err); 441 } 442 ds = winner; 443 } else { 444 ds->ds_fsid_guid = 445 unique_insert(ds->ds_phys->ds_fsid_guid); 446 } 447 } 448 ASSERT3P(ds->ds_dbuf, ==, dbuf); 449 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 450 ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 451 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 452 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 453 *dsp = ds; 454 return (0); 455 } 456 457 int 458 dsl_dataset_hold(dsl_pool_t *dp, const char *name, 459 void *tag, dsl_dataset_t **dsp) 460 { 461 dsl_dir_t *dd; 462 const char *snapname; 463 uint64_t obj; 464 int err = 0; 465 466 err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname); 467 if (err != 0) 468 return (err); 469 470 ASSERT(dsl_pool_config_held(dp)); 471 obj = dd->dd_phys->dd_head_dataset_obj; 472 if (obj != 0) 473 err = dsl_dataset_hold_obj(dp, obj, tag, dsp); 474 else 475 err = SET_ERROR(ENOENT); 476 477 /* we may be looking for a snapshot */ 478 if (err == 0 && snapname != NULL) { 479 dsl_dataset_t *ds; 480 481 if (*snapname++ != '@') { 482 dsl_dataset_rele(*dsp, tag); 483 dsl_dir_rele(dd, FTAG); 484 return (SET_ERROR(ENOENT)); 485 } 486 487 dprintf("looking for snapshot '%s'\n", snapname); 488 err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 489 if (err == 0) 490 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 491 dsl_dataset_rele(*dsp, tag); 492 493 if (err == 0) { 494 mutex_enter(&ds->ds_lock); 495 if (ds->ds_snapname[0] == 0) 496 (void) strlcpy(ds->ds_snapname, snapname, 497 sizeof (ds->ds_snapname)); 498 mutex_exit(&ds->ds_lock); 499 *dsp = ds; 500 } 501 } 502 503 dsl_dir_rele(dd, FTAG); 504 return (err); 505 } 506 507 int 508 dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, 509 void *tag, dsl_dataset_t **dsp) 510 { 511 int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 512 if (err != 0) 513 return (err); 514 if (!dsl_dataset_tryown(*dsp, tag)) { 515 dsl_dataset_rele(*dsp, tag); 516 *dsp = NULL; 517 return (SET_ERROR(EBUSY)); 518 } 519 return (0); 520 } 521 522 int 523 dsl_dataset_own(dsl_pool_t *dp, const char *name, 524 void *tag, dsl_dataset_t **dsp) 525 { 526 int err = dsl_dataset_hold(dp, name, tag, dsp); 527 if (err != 0) 528 return (err); 529 if (!dsl_dataset_tryown(*dsp, tag)) { 530 dsl_dataset_rele(*dsp, tag); 531 return (SET_ERROR(EBUSY)); 532 } 533 return (0); 534 } 535 536 /* 537 * See the comment above dsl_pool_hold() for details. In summary, a long 538 * hold is used to prevent destruction of a dataset while the pool hold 539 * is dropped, allowing other concurrent operations (e.g. spa_sync()). 540 * 541 * The dataset and pool must be held when this function is called. After it 542 * is called, the pool hold may be released while the dataset is still held 543 * and accessed. 544 */ 545 void 546 dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag) 547 { 548 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 549 (void) refcount_add(&ds->ds_longholds, tag); 550 } 551 552 void 553 dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag) 554 { 555 (void) refcount_remove(&ds->ds_longholds, tag); 556 } 557 558 /* Return B_TRUE if there are any long holds on this dataset. */ 559 boolean_t 560 dsl_dataset_long_held(dsl_dataset_t *ds) 561 { 562 return (!refcount_is_zero(&ds->ds_longholds)); 563 } 564 565 void 566 dsl_dataset_name(dsl_dataset_t *ds, char *name) 567 { 568 if (ds == NULL) { 569 (void) strcpy(name, "mos"); 570 } else { 571 dsl_dir_name(ds->ds_dir, name); 572 VERIFY0(dsl_dataset_get_snapname(ds)); 573 if (ds->ds_snapname[0]) { 574 (void) strcat(name, "@"); 575 /* 576 * We use a "recursive" mutex so that we 577 * can call dprintf_ds() with ds_lock held. 578 */ 579 if (!MUTEX_HELD(&ds->ds_lock)) { 580 mutex_enter(&ds->ds_lock); 581 (void) strcat(name, ds->ds_snapname); 582 mutex_exit(&ds->ds_lock); 583 } else { 584 (void) strcat(name, ds->ds_snapname); 585 } 586 } 587 } 588 } 589 590 void 591 dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 592 { 593 dmu_buf_rele(ds->ds_dbuf, tag); 594 } 595 596 void 597 dsl_dataset_disown(dsl_dataset_t *ds, void *tag) 598 { 599 ASSERT(ds->ds_owner == tag && ds->ds_dbuf != NULL); 600 601 mutex_enter(&ds->ds_lock); 602 ds->ds_owner = NULL; 603 mutex_exit(&ds->ds_lock); 604 dsl_dataset_long_rele(ds, tag); 605 if (ds->ds_dbuf != NULL) 606 dsl_dataset_rele(ds, tag); 607 else 608 dsl_dataset_evict(NULL, ds); 609 } 610 611 boolean_t 612 dsl_dataset_tryown(dsl_dataset_t *ds, void *tag) 613 { 614 boolean_t gotit = FALSE; 615 616 mutex_enter(&ds->ds_lock); 617 if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) { 618 ds->ds_owner = tag; 619 dsl_dataset_long_hold(ds, tag); 620 gotit = TRUE; 621 } 622 mutex_exit(&ds->ds_lock); 623 return (gotit); 624 } 625 626 uint64_t 627 dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 628 uint64_t flags, dmu_tx_t *tx) 629 { 630 dsl_pool_t *dp = dd->dd_pool; 631 dmu_buf_t *dbuf; 632 dsl_dataset_phys_t *dsphys; 633 uint64_t dsobj; 634 objset_t *mos = dp->dp_meta_objset; 635 636 if (origin == NULL) 637 origin = dp->dp_origin_snap; 638 639 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 640 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 641 ASSERT(dmu_tx_is_syncing(tx)); 642 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 643 644 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 645 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 646 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 647 dmu_buf_will_dirty(dbuf, tx); 648 dsphys = dbuf->db_data; 649 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 650 dsphys->ds_dir_obj = dd->dd_object; 651 dsphys->ds_flags = flags; 652 dsphys->ds_fsid_guid = unique_create(); 653 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 654 sizeof (dsphys->ds_guid)); 655 dsphys->ds_snapnames_zapobj = 656 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 657 DMU_OT_NONE, 0, tx); 658 dsphys->ds_creation_time = gethrestime_sec(); 659 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 660 661 if (origin == NULL) { 662 dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); 663 } else { 664 dsl_dataset_t *ohds; /* head of the origin snapshot */ 665 666 dsphys->ds_prev_snap_obj = origin->ds_object; 667 dsphys->ds_prev_snap_txg = 668 origin->ds_phys->ds_creation_txg; 669 dsphys->ds_referenced_bytes = 670 origin->ds_phys->ds_referenced_bytes; 671 dsphys->ds_compressed_bytes = 672 origin->ds_phys->ds_compressed_bytes; 673 dsphys->ds_uncompressed_bytes = 674 origin->ds_phys->ds_uncompressed_bytes; 675 dsphys->ds_bp = origin->ds_phys->ds_bp; 676 dsphys->ds_flags |= origin->ds_phys->ds_flags; 677 678 dmu_buf_will_dirty(origin->ds_dbuf, tx); 679 origin->ds_phys->ds_num_children++; 680 681 VERIFY0(dsl_dataset_hold_obj(dp, 682 origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds)); 683 dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, 684 dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); 685 dsl_dataset_rele(ohds, FTAG); 686 687 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 688 if (origin->ds_phys->ds_next_clones_obj == 0) { 689 origin->ds_phys->ds_next_clones_obj = 690 zap_create(mos, 691 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 692 } 693 VERIFY0(zap_add_int(mos, 694 origin->ds_phys->ds_next_clones_obj, dsobj, tx)); 695 } 696 697 dmu_buf_will_dirty(dd->dd_dbuf, tx); 698 dd->dd_phys->dd_origin_obj = origin->ds_object; 699 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 700 if (origin->ds_dir->dd_phys->dd_clones == 0) { 701 dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); 702 origin->ds_dir->dd_phys->dd_clones = 703 zap_create(mos, 704 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 705 } 706 VERIFY0(zap_add_int(mos, 707 origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); 708 } 709 } 710 711 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 712 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 713 714 dmu_buf_rele(dbuf, FTAG); 715 716 dmu_buf_will_dirty(dd->dd_dbuf, tx); 717 dd->dd_phys->dd_head_dataset_obj = dsobj; 718 719 return (dsobj); 720 } 721 722 static void 723 dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) 724 { 725 objset_t *os; 726 727 VERIFY0(dmu_objset_from_ds(ds, &os)); 728 bzero(&os->os_zil_header, sizeof (os->os_zil_header)); 729 dsl_dataset_dirty(ds, tx); 730 } 731 732 uint64_t 733 dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 734 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 735 { 736 dsl_pool_t *dp = pdd->dd_pool; 737 uint64_t dsobj, ddobj; 738 dsl_dir_t *dd; 739 740 ASSERT(dmu_tx_is_syncing(tx)); 741 ASSERT(lastname[0] != '@'); 742 743 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 744 VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); 745 746 dsobj = dsl_dataset_create_sync_dd(dd, origin, 747 flags & ~DS_CREATE_FLAG_NODIRTY, tx); 748 749 dsl_deleg_set_create_perms(dd, tx, cr); 750 751 dsl_dir_rele(dd, FTAG); 752 753 /* 754 * If we are creating a clone, make sure we zero out any stale 755 * data from the origin snapshots zil header. 756 */ 757 if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) { 758 dsl_dataset_t *ds; 759 760 VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 761 dsl_dataset_zero_zil(ds, tx); 762 dsl_dataset_rele(ds, FTAG); 763 } 764 765 return (dsobj); 766 } 767 768 /* 769 * The unique space in the head dataset can be calculated by subtracting 770 * the space used in the most recent snapshot, that is still being used 771 * in this file system, from the space currently in use. To figure out 772 * the space in the most recent snapshot still in use, we need to take 773 * the total space used in the snapshot and subtract out the space that 774 * has been freed up since the snapshot was taken. 775 */ 776 void 777 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 778 { 779 uint64_t mrs_used; 780 uint64_t dlused, dlcomp, dluncomp; 781 782 ASSERT(!dsl_dataset_is_snapshot(ds)); 783 784 if (ds->ds_phys->ds_prev_snap_obj != 0) 785 mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes; 786 else 787 mrs_used = 0; 788 789 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); 790 791 ASSERT3U(dlused, <=, mrs_used); 792 ds->ds_phys->ds_unique_bytes = 793 ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused); 794 795 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 796 SPA_VERSION_UNIQUE_ACCURATE) 797 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 798 } 799 800 void 801 dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, 802 dmu_tx_t *tx) 803 { 804 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 805 uint64_t count; 806 int err; 807 808 ASSERT(ds->ds_phys->ds_num_children >= 2); 809 err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); 810 /* 811 * The err should not be ENOENT, but a bug in a previous version 812 * of the code could cause upgrade_clones_cb() to not set 813 * ds_next_snap_obj when it should, leading to a missing entry. 814 * If we knew that the pool was created after 815 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't 816 * ENOENT. However, at least we can check that we don't have 817 * too many entries in the next_clones_obj even after failing to 818 * remove this one. 819 */ 820 if (err != ENOENT) 821 VERIFY0(err); 822 ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, 823 &count)); 824 ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); 825 } 826 827 828 blkptr_t * 829 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 830 { 831 return (&ds->ds_phys->ds_bp); 832 } 833 834 void 835 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 836 { 837 ASSERT(dmu_tx_is_syncing(tx)); 838 /* If it's the meta-objset, set dp_meta_rootbp */ 839 if (ds == NULL) { 840 tx->tx_pool->dp_meta_rootbp = *bp; 841 } else { 842 dmu_buf_will_dirty(ds->ds_dbuf, tx); 843 ds->ds_phys->ds_bp = *bp; 844 } 845 } 846 847 spa_t * 848 dsl_dataset_get_spa(dsl_dataset_t *ds) 849 { 850 return (ds->ds_dir->dd_pool->dp_spa); 851 } 852 853 void 854 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 855 { 856 dsl_pool_t *dp; 857 858 if (ds == NULL) /* this is the meta-objset */ 859 return; 860 861 ASSERT(ds->ds_objset != NULL); 862 863 if (ds->ds_phys->ds_next_snap_obj != 0) 864 panic("dirtying snapshot!"); 865 866 dp = ds->ds_dir->dd_pool; 867 868 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { 869 /* up the hold count until we can be written out */ 870 dmu_buf_add_ref(ds->ds_dbuf, ds); 871 } 872 } 873 874 boolean_t 875 dsl_dataset_is_dirty(dsl_dataset_t *ds) 876 { 877 for (int t = 0; t < TXG_SIZE; t++) { 878 if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, 879 ds, t)) 880 return (B_TRUE); 881 } 882 return (B_FALSE); 883 } 884 885 static int 886 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 887 { 888 uint64_t asize; 889 890 if (!dmu_tx_is_syncing(tx)) 891 return (0); 892 893 /* 894 * If there's an fs-only reservation, any blocks that might become 895 * owned by the snapshot dataset must be accommodated by space 896 * outside of the reservation. 897 */ 898 ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); 899 asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 900 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 901 return (SET_ERROR(ENOSPC)); 902 903 /* 904 * Propagate any reserved space for this snapshot to other 905 * snapshot checks in this sync group. 906 */ 907 if (asize > 0) 908 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 909 910 return (0); 911 } 912 913 typedef struct dsl_dataset_snapshot_arg { 914 nvlist_t *ddsa_snaps; 915 nvlist_t *ddsa_props; 916 nvlist_t *ddsa_errors; 917 } dsl_dataset_snapshot_arg_t; 918 919 int 920 dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname, 921 dmu_tx_t *tx, boolean_t recv) 922 { 923 int error; 924 uint64_t value; 925 926 ds->ds_trysnap_txg = tx->tx_txg; 927 928 if (!dmu_tx_is_syncing(tx)) 929 return (0); 930 931 /* 932 * We don't allow multiple snapshots of the same txg. If there 933 * is already one, try again. 934 */ 935 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 936 return (SET_ERROR(EAGAIN)); 937 938 /* 939 * Check for conflicting snapshot name. 940 */ 941 error = dsl_dataset_snap_lookup(ds, snapname, &value); 942 if (error == 0) 943 return (SET_ERROR(EEXIST)); 944 if (error != ENOENT) 945 return (error); 946 947 /* 948 * We don't allow taking snapshots of inconsistent datasets, such as 949 * those into which we are currently receiving. However, if we are 950 * creating this snapshot as part of a receive, this check will be 951 * executed atomically with respect to the completion of the receive 952 * itself but prior to the clearing of DS_FLAG_INCONSISTENT; in this 953 * case we ignore this, knowing it will be fixed up for us shortly in 954 * dmu_recv_end_sync(). 955 */ 956 if (!recv && DS_IS_INCONSISTENT(ds)) 957 return (SET_ERROR(EBUSY)); 958 959 error = dsl_dataset_snapshot_reserve_space(ds, tx); 960 if (error != 0) 961 return (error); 962 963 return (0); 964 } 965 966 static int 967 dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx) 968 { 969 dsl_dataset_snapshot_arg_t *ddsa = arg; 970 dsl_pool_t *dp = dmu_tx_pool(tx); 971 nvpair_t *pair; 972 int rv = 0; 973 974 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 975 pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 976 int error = 0; 977 dsl_dataset_t *ds; 978 char *name, *atp; 979 char dsname[MAXNAMELEN]; 980 981 name = nvpair_name(pair); 982 if (strlen(name) >= MAXNAMELEN) 983 error = SET_ERROR(ENAMETOOLONG); 984 if (error == 0) { 985 atp = strchr(name, '@'); 986 if (atp == NULL) 987 error = SET_ERROR(EINVAL); 988 if (error == 0) 989 (void) strlcpy(dsname, name, atp - name + 1); 990 } 991 if (error == 0) 992 error = dsl_dataset_hold(dp, dsname, FTAG, &ds); 993 if (error == 0) { 994 error = dsl_dataset_snapshot_check_impl(ds, 995 atp + 1, tx, B_FALSE); 996 dsl_dataset_rele(ds, FTAG); 997 } 998 999 if (error != 0) { 1000 if (ddsa->ddsa_errors != NULL) { 1001 fnvlist_add_int32(ddsa->ddsa_errors, 1002 name, error); 1003 } 1004 rv = error; 1005 } 1006 } 1007 return (rv); 1008 } 1009 1010 void 1011 dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, 1012 dmu_tx_t *tx) 1013 { 1014 static zil_header_t zero_zil; 1015 1016 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1017 dmu_buf_t *dbuf; 1018 dsl_dataset_phys_t *dsphys; 1019 uint64_t dsobj, crtxg; 1020 objset_t *mos = dp->dp_meta_objset; 1021 objset_t *os; 1022 1023 ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); 1024 1025 /* 1026 * If we are on an old pool, the zil must not be active, in which 1027 * case it will be zeroed. Usually zil_suspend() accomplishes this. 1028 */ 1029 ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP || 1030 dmu_objset_from_ds(ds, &os) != 0 || 1031 bcmp(&os->os_phys->os_zil_header, &zero_zil, 1032 sizeof (zero_zil)) == 0); 1033 1034 1035 /* 1036 * The origin's ds_creation_txg has to be < TXG_INITIAL 1037 */ 1038 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1039 crtxg = 1; 1040 else 1041 crtxg = tx->tx_txg; 1042 1043 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1044 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1045 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1046 dmu_buf_will_dirty(dbuf, tx); 1047 dsphys = dbuf->db_data; 1048 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 1049 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1050 dsphys->ds_fsid_guid = unique_create(); 1051 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1052 sizeof (dsphys->ds_guid)); 1053 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1054 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1055 dsphys->ds_next_snap_obj = ds->ds_object; 1056 dsphys->ds_num_children = 1; 1057 dsphys->ds_creation_time = gethrestime_sec(); 1058 dsphys->ds_creation_txg = crtxg; 1059 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1060 dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes; 1061 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1062 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1063 dsphys->ds_flags = ds->ds_phys->ds_flags; 1064 dsphys->ds_bp = ds->ds_phys->ds_bp; 1065 dmu_buf_rele(dbuf, FTAG); 1066 1067 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1068 if (ds->ds_prev) { 1069 uint64_t next_clones_obj = 1070 ds->ds_prev->ds_phys->ds_next_clones_obj; 1071 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1072 ds->ds_object || 1073 ds->ds_prev->ds_phys->ds_num_children > 1); 1074 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1075 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1076 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1077 ds->ds_prev->ds_phys->ds_creation_txg); 1078 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1079 } else if (next_clones_obj != 0) { 1080 dsl_dataset_remove_from_next_clones(ds->ds_prev, 1081 dsphys->ds_next_snap_obj, tx); 1082 VERIFY0(zap_add_int(mos, 1083 next_clones_obj, dsobj, tx)); 1084 } 1085 } 1086 1087 /* 1088 * If we have a reference-reservation on this dataset, we will 1089 * need to increase the amount of refreservation being charged 1090 * since our unique space is going to zero. 1091 */ 1092 if (ds->ds_reserved) { 1093 int64_t delta; 1094 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 1095 delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 1096 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1097 delta, 0, 0, tx); 1098 } 1099 1100 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1101 ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist, 1102 UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx); 1103 dsl_deadlist_close(&ds->ds_deadlist); 1104 dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); 1105 dsl_deadlist_add_key(&ds->ds_deadlist, 1106 ds->ds_phys->ds_prev_snap_txg, tx); 1107 1108 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 1109 ds->ds_phys->ds_prev_snap_obj = dsobj; 1110 ds->ds_phys->ds_prev_snap_txg = crtxg; 1111 ds->ds_phys->ds_unique_bytes = 0; 1112 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1113 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1114 1115 VERIFY0(zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1116 snapname, 8, 1, &dsobj, tx)); 1117 1118 if (ds->ds_prev) 1119 dsl_dataset_rele(ds->ds_prev, ds); 1120 VERIFY0(dsl_dataset_hold_obj(dp, 1121 ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 1122 1123 dsl_scan_ds_snapshotted(ds, tx); 1124 1125 dsl_dir_snap_cmtime_update(ds->ds_dir); 1126 1127 spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, ""); 1128 } 1129 1130 static void 1131 dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx) 1132 { 1133 dsl_dataset_snapshot_arg_t *ddsa = arg; 1134 dsl_pool_t *dp = dmu_tx_pool(tx); 1135 nvpair_t *pair; 1136 1137 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1138 pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1139 dsl_dataset_t *ds; 1140 char *name, *atp; 1141 char dsname[MAXNAMELEN]; 1142 1143 name = nvpair_name(pair); 1144 atp = strchr(name, '@'); 1145 (void) strlcpy(dsname, name, atp - name + 1); 1146 VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds)); 1147 1148 dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx); 1149 if (ddsa->ddsa_props != NULL) { 1150 dsl_props_set_sync_impl(ds->ds_prev, 1151 ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx); 1152 } 1153 dsl_dataset_rele(ds, FTAG); 1154 } 1155 } 1156 1157 /* 1158 * The snapshots must all be in the same pool. 1159 * All-or-nothing: if there are any failures, nothing will be modified. 1160 */ 1161 int 1162 dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) 1163 { 1164 dsl_dataset_snapshot_arg_t ddsa; 1165 nvpair_t *pair; 1166 boolean_t needsuspend; 1167 int error; 1168 spa_t *spa; 1169 char *firstname; 1170 nvlist_t *suspended = NULL; 1171 1172 pair = nvlist_next_nvpair(snaps, NULL); 1173 if (pair == NULL) 1174 return (0); 1175 firstname = nvpair_name(pair); 1176 1177 error = spa_open(firstname, &spa, FTAG); 1178 if (error != 0) 1179 return (error); 1180 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1181 spa_close(spa, FTAG); 1182 1183 if (needsuspend) { 1184 suspended = fnvlist_alloc(); 1185 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; 1186 pair = nvlist_next_nvpair(snaps, pair)) { 1187 char fsname[MAXNAMELEN]; 1188 char *snapname = nvpair_name(pair); 1189 char *atp; 1190 void *cookie; 1191 1192 atp = strchr(snapname, '@'); 1193 if (atp == NULL) { 1194 error = SET_ERROR(EINVAL); 1195 break; 1196 } 1197 (void) strlcpy(fsname, snapname, atp - snapname + 1); 1198 1199 error = zil_suspend(fsname, &cookie); 1200 if (error != 0) 1201 break; 1202 fnvlist_add_uint64(suspended, fsname, 1203 (uintptr_t)cookie); 1204 } 1205 } 1206 1207 ddsa.ddsa_snaps = snaps; 1208 ddsa.ddsa_props = props; 1209 ddsa.ddsa_errors = errors; 1210 1211 if (error == 0) { 1212 error = dsl_sync_task(firstname, dsl_dataset_snapshot_check, 1213 dsl_dataset_snapshot_sync, &ddsa, 1214 fnvlist_num_pairs(snaps) * 3); 1215 } 1216 1217 if (suspended != NULL) { 1218 for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL; 1219 pair = nvlist_next_nvpair(suspended, pair)) { 1220 zil_resume((void *)(uintptr_t) 1221 fnvpair_value_uint64(pair)); 1222 } 1223 fnvlist_free(suspended); 1224 } 1225 1226 return (error); 1227 } 1228 1229 typedef struct dsl_dataset_snapshot_tmp_arg { 1230 const char *ddsta_fsname; 1231 const char *ddsta_snapname; 1232 minor_t ddsta_cleanup_minor; 1233 const char *ddsta_htag; 1234 } dsl_dataset_snapshot_tmp_arg_t; 1235 1236 static int 1237 dsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx) 1238 { 1239 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1240 dsl_pool_t *dp = dmu_tx_pool(tx); 1241 dsl_dataset_t *ds; 1242 int error; 1243 1244 error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds); 1245 if (error != 0) 1246 return (error); 1247 1248 error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname, 1249 tx, B_FALSE); 1250 if (error != 0) { 1251 dsl_dataset_rele(ds, FTAG); 1252 return (error); 1253 } 1254 1255 if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) { 1256 dsl_dataset_rele(ds, FTAG); 1257 return (SET_ERROR(ENOTSUP)); 1258 } 1259 error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag, 1260 B_TRUE, tx); 1261 if (error != 0) { 1262 dsl_dataset_rele(ds, FTAG); 1263 return (error); 1264 } 1265 1266 dsl_dataset_rele(ds, FTAG); 1267 return (0); 1268 } 1269 1270 static void 1271 dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx) 1272 { 1273 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1274 dsl_pool_t *dp = dmu_tx_pool(tx); 1275 dsl_dataset_t *ds; 1276 1277 VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds)); 1278 1279 dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx); 1280 dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag, 1281 ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx); 1282 dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx); 1283 1284 dsl_dataset_rele(ds, FTAG); 1285 } 1286 1287 int 1288 dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, 1289 minor_t cleanup_minor, const char *htag) 1290 { 1291 dsl_dataset_snapshot_tmp_arg_t ddsta; 1292 int error; 1293 spa_t *spa; 1294 boolean_t needsuspend; 1295 void *cookie; 1296 1297 ddsta.ddsta_fsname = fsname; 1298 ddsta.ddsta_snapname = snapname; 1299 ddsta.ddsta_cleanup_minor = cleanup_minor; 1300 ddsta.ddsta_htag = htag; 1301 1302 error = spa_open(fsname, &spa, FTAG); 1303 if (error != 0) 1304 return (error); 1305 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1306 spa_close(spa, FTAG); 1307 1308 if (needsuspend) { 1309 error = zil_suspend(fsname, &cookie); 1310 if (error != 0) 1311 return (error); 1312 } 1313 1314 error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check, 1315 dsl_dataset_snapshot_tmp_sync, &ddsta, 3); 1316 1317 if (needsuspend) 1318 zil_resume(cookie); 1319 return (error); 1320 } 1321 1322 1323 void 1324 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1325 { 1326 ASSERT(dmu_tx_is_syncing(tx)); 1327 ASSERT(ds->ds_objset != NULL); 1328 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1329 1330 /* 1331 * in case we had to change ds_fsid_guid when we opened it, 1332 * sync it out now. 1333 */ 1334 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1335 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1336 1337 dmu_objset_sync(ds->ds_objset, zio, tx); 1338 } 1339 1340 static void 1341 get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) 1342 { 1343 uint64_t count = 0; 1344 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1345 zap_cursor_t zc; 1346 zap_attribute_t za; 1347 nvlist_t *propval = fnvlist_alloc(); 1348 nvlist_t *val = fnvlist_alloc(); 1349 1350 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 1351 1352 /* 1353 * There may be missing entries in ds_next_clones_obj 1354 * due to a bug in a previous version of the code. 1355 * Only trust it if it has the right number of entries. 1356 */ 1357 if (ds->ds_phys->ds_next_clones_obj != 0) { 1358 ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, 1359 &count)); 1360 } 1361 if (count != ds->ds_phys->ds_num_children - 1) 1362 goto fail; 1363 for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj); 1364 zap_cursor_retrieve(&zc, &za) == 0; 1365 zap_cursor_advance(&zc)) { 1366 dsl_dataset_t *clone; 1367 char buf[ZFS_MAXNAMELEN]; 1368 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 1369 za.za_first_integer, FTAG, &clone)); 1370 dsl_dir_name(clone->ds_dir, buf); 1371 fnvlist_add_boolean(val, buf); 1372 dsl_dataset_rele(clone, FTAG); 1373 } 1374 zap_cursor_fini(&zc); 1375 fnvlist_add_nvlist(propval, ZPROP_VALUE, val); 1376 fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval); 1377 fail: 1378 nvlist_free(val); 1379 nvlist_free(propval); 1380 } 1381 1382 void 1383 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1384 { 1385 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1386 uint64_t refd, avail, uobjs, aobjs, ratio; 1387 1388 ASSERT(dsl_pool_config_held(dp)); 1389 1390 ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1391 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1392 ds->ds_phys->ds_compressed_bytes); 1393 1394 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); 1395 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, 1396 ds->ds_phys->ds_uncompressed_bytes); 1397 1398 if (dsl_dataset_is_snapshot(ds)) { 1399 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); 1400 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1401 ds->ds_phys->ds_unique_bytes); 1402 get_clones_stat(ds, nv); 1403 } else { 1404 dsl_dir_stats(ds->ds_dir, nv); 1405 } 1406 1407 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1408 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1409 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1410 1411 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1412 ds->ds_phys->ds_creation_time); 1413 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1414 ds->ds_phys->ds_creation_txg); 1415 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1416 ds->ds_quota); 1417 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1418 ds->ds_reserved); 1419 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 1420 ds->ds_phys->ds_guid); 1421 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, 1422 ds->ds_phys->ds_unique_bytes); 1423 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, 1424 ds->ds_object); 1425 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, 1426 ds->ds_userrefs); 1427 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 1428 DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 1429 1430 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1431 uint64_t written, comp, uncomp; 1432 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1433 dsl_dataset_t *prev; 1434 1435 int err = dsl_dataset_hold_obj(dp, 1436 ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); 1437 if (err == 0) { 1438 err = dsl_dataset_space_written(prev, ds, &written, 1439 &comp, &uncomp); 1440 dsl_dataset_rele(prev, FTAG); 1441 if (err == 0) { 1442 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, 1443 written); 1444 } 1445 } 1446 } 1447 } 1448 1449 void 1450 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1451 { 1452 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1453 ASSERT(dsl_pool_config_held(dp)); 1454 1455 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1456 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1457 stat->dds_guid = ds->ds_phys->ds_guid; 1458 stat->dds_origin[0] = '\0'; 1459 if (dsl_dataset_is_snapshot(ds)) { 1460 stat->dds_is_snapshot = B_TRUE; 1461 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1462 } else { 1463 stat->dds_is_snapshot = B_FALSE; 1464 stat->dds_num_clones = 0; 1465 1466 if (dsl_dir_is_clone(ds->ds_dir)) { 1467 dsl_dataset_t *ods; 1468 1469 VERIFY0(dsl_dataset_hold_obj(dp, 1470 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 1471 dsl_dataset_name(ods, stat->dds_origin); 1472 dsl_dataset_rele(ods, FTAG); 1473 } 1474 } 1475 } 1476 1477 uint64_t 1478 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1479 { 1480 return (ds->ds_fsid_guid); 1481 } 1482 1483 void 1484 dsl_dataset_space(dsl_dataset_t *ds, 1485 uint64_t *refdbytesp, uint64_t *availbytesp, 1486 uint64_t *usedobjsp, uint64_t *availobjsp) 1487 { 1488 *refdbytesp = ds->ds_phys->ds_referenced_bytes; 1489 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1490 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 1491 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 1492 if (ds->ds_quota != 0) { 1493 /* 1494 * Adjust available bytes according to refquota 1495 */ 1496 if (*refdbytesp < ds->ds_quota) 1497 *availbytesp = MIN(*availbytesp, 1498 ds->ds_quota - *refdbytesp); 1499 else 1500 *availbytesp = 0; 1501 } 1502 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 1503 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1504 } 1505 1506 boolean_t 1507 dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap) 1508 { 1509 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1510 1511 ASSERT(dsl_pool_config_held(dp)); 1512 if (snap == NULL) 1513 return (B_FALSE); 1514 if (ds->ds_phys->ds_bp.blk_birth > 1515 snap->ds_phys->ds_creation_txg) { 1516 objset_t *os, *os_snap; 1517 /* 1518 * It may be that only the ZIL differs, because it was 1519 * reset in the head. Don't count that as being 1520 * modified. 1521 */ 1522 if (dmu_objset_from_ds(ds, &os) != 0) 1523 return (B_TRUE); 1524 if (dmu_objset_from_ds(snap, &os_snap) != 0) 1525 return (B_TRUE); 1526 return (bcmp(&os->os_phys->os_meta_dnode, 1527 &os_snap->os_phys->os_meta_dnode, 1528 sizeof (os->os_phys->os_meta_dnode)) != 0); 1529 } 1530 return (B_FALSE); 1531 } 1532 1533 typedef struct dsl_dataset_rename_snapshot_arg { 1534 const char *ddrsa_fsname; 1535 const char *ddrsa_oldsnapname; 1536 const char *ddrsa_newsnapname; 1537 boolean_t ddrsa_recursive; 1538 dmu_tx_t *ddrsa_tx; 1539 } dsl_dataset_rename_snapshot_arg_t; 1540 1541 /* ARGSUSED */ 1542 static int 1543 dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp, 1544 dsl_dataset_t *hds, void *arg) 1545 { 1546 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1547 int error; 1548 uint64_t val; 1549 1550 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1551 if (error != 0) { 1552 /* ignore nonexistent snapshots */ 1553 return (error == ENOENT ? 0 : error); 1554 } 1555 1556 /* new name should not exist */ 1557 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val); 1558 if (error == 0) 1559 error = SET_ERROR(EEXIST); 1560 else if (error == ENOENT) 1561 error = 0; 1562 1563 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 1564 if (dsl_dir_namelen(hds->ds_dir) + 1 + 1565 strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN) 1566 error = SET_ERROR(ENAMETOOLONG); 1567 1568 return (error); 1569 } 1570 1571 static int 1572 dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx) 1573 { 1574 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1575 dsl_pool_t *dp = dmu_tx_pool(tx); 1576 dsl_dataset_t *hds; 1577 int error; 1578 1579 error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds); 1580 if (error != 0) 1581 return (error); 1582 1583 if (ddrsa->ddrsa_recursive) { 1584 error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1585 dsl_dataset_rename_snapshot_check_impl, ddrsa, 1586 DS_FIND_CHILDREN); 1587 } else { 1588 error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa); 1589 } 1590 dsl_dataset_rele(hds, FTAG); 1591 return (error); 1592 } 1593 1594 static int 1595 dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, 1596 dsl_dataset_t *hds, void *arg) 1597 { 1598 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1599 dsl_dataset_t *ds; 1600 uint64_t val; 1601 dmu_tx_t *tx = ddrsa->ddrsa_tx; 1602 int error; 1603 1604 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1605 ASSERT(error == 0 || error == ENOENT); 1606 if (error == ENOENT) { 1607 /* ignore nonexistent snapshots */ 1608 return (0); 1609 } 1610 1611 VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds)); 1612 1613 /* log before we change the name */ 1614 spa_history_log_internal_ds(ds, "rename", tx, 1615 "-> @%s", ddrsa->ddrsa_newsnapname); 1616 1617 VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx)); 1618 mutex_enter(&ds->ds_lock); 1619 (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname); 1620 mutex_exit(&ds->ds_lock); 1621 VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, 1622 ds->ds_snapname, 8, 1, &ds->ds_object, tx)); 1623 1624 dsl_dataset_rele(ds, FTAG); 1625 return (0); 1626 } 1627 1628 static void 1629 dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx) 1630 { 1631 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1632 dsl_pool_t *dp = dmu_tx_pool(tx); 1633 dsl_dataset_t *hds; 1634 1635 VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds)); 1636 ddrsa->ddrsa_tx = tx; 1637 if (ddrsa->ddrsa_recursive) { 1638 VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1639 dsl_dataset_rename_snapshot_sync_impl, ddrsa, 1640 DS_FIND_CHILDREN)); 1641 } else { 1642 VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa)); 1643 } 1644 dsl_dataset_rele(hds, FTAG); 1645 } 1646 1647 int 1648 dsl_dataset_rename_snapshot(const char *fsname, 1649 const char *oldsnapname, const char *newsnapname, boolean_t recursive) 1650 { 1651 dsl_dataset_rename_snapshot_arg_t ddrsa; 1652 1653 ddrsa.ddrsa_fsname = fsname; 1654 ddrsa.ddrsa_oldsnapname = oldsnapname; 1655 ddrsa.ddrsa_newsnapname = newsnapname; 1656 ddrsa.ddrsa_recursive = recursive; 1657 1658 return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check, 1659 dsl_dataset_rename_snapshot_sync, &ddrsa, 1)); 1660 } 1661 1662 /* 1663 * If we're doing an ownership handoff, we need to make sure that there is 1664 * only one long hold on the dataset. We're not allowed to change anything here 1665 * so we don't permanently release the long hold or regular hold here. We want 1666 * to do this only when syncing to avoid the dataset unexpectedly going away 1667 * when we release the long hold. 1668 */ 1669 static int 1670 dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) 1671 { 1672 boolean_t held; 1673 1674 if (!dmu_tx_is_syncing(tx)) 1675 return (0); 1676 1677 if (owner != NULL) { 1678 VERIFY3P(ds->ds_owner, ==, owner); 1679 dsl_dataset_long_rele(ds, owner); 1680 } 1681 1682 held = dsl_dataset_long_held(ds); 1683 1684 if (owner != NULL) 1685 dsl_dataset_long_hold(ds, owner); 1686 1687 if (held) 1688 return (SET_ERROR(EBUSY)); 1689 1690 return (0); 1691 } 1692 1693 typedef struct dsl_dataset_rollback_arg { 1694 const char *ddra_fsname; 1695 void *ddra_owner; 1696 nvlist_t *ddra_result; 1697 } dsl_dataset_rollback_arg_t; 1698 1699 static int 1700 dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) 1701 { 1702 dsl_dataset_rollback_arg_t *ddra = arg; 1703 dsl_pool_t *dp = dmu_tx_pool(tx); 1704 dsl_dataset_t *ds; 1705 int64_t unused_refres_delta; 1706 int error; 1707 1708 error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds); 1709 if (error != 0) 1710 return (error); 1711 1712 /* must not be a snapshot */ 1713 if (dsl_dataset_is_snapshot(ds)) { 1714 dsl_dataset_rele(ds, FTAG); 1715 return (SET_ERROR(EINVAL)); 1716 } 1717 1718 /* must have a most recent snapshot */ 1719 if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) { 1720 dsl_dataset_rele(ds, FTAG); 1721 return (SET_ERROR(EINVAL)); 1722 } 1723 1724 error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx); 1725 if (error != 0) { 1726 dsl_dataset_rele(ds, FTAG); 1727 return (error); 1728 } 1729 1730 /* 1731 * Check if the snap we are rolling back to uses more than 1732 * the refquota. 1733 */ 1734 if (ds->ds_quota != 0 && 1735 ds->ds_prev->ds_phys->ds_referenced_bytes > ds->ds_quota) { 1736 dsl_dataset_rele(ds, FTAG); 1737 return (SET_ERROR(EDQUOT)); 1738 } 1739 1740 /* 1741 * When we do the clone swap, we will temporarily use more space 1742 * due to the refreservation (the head will no longer have any 1743 * unique space, so the entire amount of the refreservation will need 1744 * to be free). We will immediately destroy the clone, freeing 1745 * this space, but the freeing happens over many txg's. 1746 */ 1747 unused_refres_delta = (int64_t)MIN(ds->ds_reserved, 1748 ds->ds_phys->ds_unique_bytes); 1749 1750 if (unused_refres_delta > 0 && 1751 unused_refres_delta > 1752 dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) { 1753 dsl_dataset_rele(ds, FTAG); 1754 return (SET_ERROR(ENOSPC)); 1755 } 1756 1757 dsl_dataset_rele(ds, FTAG); 1758 return (0); 1759 } 1760 1761 static void 1762 dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) 1763 { 1764 dsl_dataset_rollback_arg_t *ddra = arg; 1765 dsl_pool_t *dp = dmu_tx_pool(tx); 1766 dsl_dataset_t *ds, *clone; 1767 uint64_t cloneobj; 1768 char namebuf[ZFS_MAXNAMELEN]; 1769 1770 VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds)); 1771 1772 dsl_dataset_name(ds->ds_prev, namebuf); 1773 fnvlist_add_string(ddra->ddra_result, "target", namebuf); 1774 1775 cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", 1776 ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); 1777 1778 VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); 1779 1780 dsl_dataset_clone_swap_sync_impl(clone, ds, tx); 1781 dsl_dataset_zero_zil(ds, tx); 1782 1783 dsl_destroy_head_sync_impl(clone, tx); 1784 1785 dsl_dataset_rele(clone, FTAG); 1786 dsl_dataset_rele(ds, FTAG); 1787 } 1788 1789 /* 1790 * Rolls back the given filesystem or volume to the most recent snapshot. 1791 * The name of the most recent snapshot will be returned under key "target" 1792 * in the result nvlist. 1793 * 1794 * If owner != NULL: 1795 * - The existing dataset MUST be owned by the specified owner at entry 1796 * - Upon return, dataset will still be held by the same owner, whether we 1797 * succeed or not. 1798 * 1799 * This mode is required any time the existing filesystem is mounted. See 1800 * notes above zfs_suspend_fs() for further details. 1801 */ 1802 int 1803 dsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result) 1804 { 1805 dsl_dataset_rollback_arg_t ddra; 1806 1807 ddra.ddra_fsname = fsname; 1808 ddra.ddra_owner = owner; 1809 ddra.ddra_result = result; 1810 1811 return (dsl_sync_task(fsname, dsl_dataset_rollback_check, 1812 dsl_dataset_rollback_sync, &ddra, 1)); 1813 } 1814 1815 struct promotenode { 1816 list_node_t link; 1817 dsl_dataset_t *ds; 1818 }; 1819 1820 typedef struct dsl_dataset_promote_arg { 1821 const char *ddpa_clonename; 1822 dsl_dataset_t *ddpa_clone; 1823 list_t shared_snaps, origin_snaps, clone_snaps; 1824 dsl_dataset_t *origin_origin; /* origin of the origin */ 1825 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 1826 char *err_ds; 1827 } dsl_dataset_promote_arg_t; 1828 1829 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 1830 static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, 1831 void *tag); 1832 static void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag); 1833 1834 static int 1835 dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) 1836 { 1837 dsl_dataset_promote_arg_t *ddpa = arg; 1838 dsl_pool_t *dp = dmu_tx_pool(tx); 1839 dsl_dataset_t *hds; 1840 struct promotenode *snap; 1841 dsl_dataset_t *origin_ds; 1842 int err; 1843 uint64_t unused; 1844 1845 err = promote_hold(ddpa, dp, FTAG); 1846 if (err != 0) 1847 return (err); 1848 1849 hds = ddpa->ddpa_clone; 1850 1851 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 1852 promote_rele(ddpa, FTAG); 1853 return (SET_ERROR(EXDEV)); 1854 } 1855 1856 /* 1857 * Compute and check the amount of space to transfer. Since this is 1858 * so expensive, don't do the preliminary check. 1859 */ 1860 if (!dmu_tx_is_syncing(tx)) { 1861 promote_rele(ddpa, FTAG); 1862 return (0); 1863 } 1864 1865 snap = list_head(&ddpa->shared_snaps); 1866 origin_ds = snap->ds; 1867 1868 /* compute origin's new unique space */ 1869 snap = list_tail(&ddpa->clone_snaps); 1870 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 1871 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 1872 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, 1873 &ddpa->unique, &unused, &unused); 1874 1875 /* 1876 * Walk the snapshots that we are moving 1877 * 1878 * Compute space to transfer. Consider the incremental changes 1879 * to used by each snapshot: 1880 * (my used) = (prev's used) + (blocks born) - (blocks killed) 1881 * So each snapshot gave birth to: 1882 * (blocks born) = (my used) - (prev's used) + (blocks killed) 1883 * So a sequence would look like: 1884 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 1885 * Which simplifies to: 1886 * uN + kN + kN-1 + ... + k1 + k0 1887 * Note however, if we stop before we reach the ORIGIN we get: 1888 * uN + kN + kN-1 + ... + kM - uM-1 1889 */ 1890 ddpa->used = origin_ds->ds_phys->ds_referenced_bytes; 1891 ddpa->comp = origin_ds->ds_phys->ds_compressed_bytes; 1892 ddpa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 1893 for (snap = list_head(&ddpa->shared_snaps); snap; 1894 snap = list_next(&ddpa->shared_snaps, snap)) { 1895 uint64_t val, dlused, dlcomp, dluncomp; 1896 dsl_dataset_t *ds = snap->ds; 1897 1898 /* 1899 * If there are long holds, we won't be able to evict 1900 * the objset. 1901 */ 1902 if (dsl_dataset_long_held(ds)) { 1903 err = SET_ERROR(EBUSY); 1904 goto out; 1905 } 1906 1907 /* Check that the snapshot name does not conflict */ 1908 VERIFY0(dsl_dataset_get_snapname(ds)); 1909 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 1910 if (err == 0) { 1911 (void) strcpy(ddpa->err_ds, snap->ds->ds_snapname); 1912 err = SET_ERROR(EEXIST); 1913 goto out; 1914 } 1915 if (err != ENOENT) 1916 goto out; 1917 1918 /* The very first snapshot does not have a deadlist */ 1919 if (ds->ds_phys->ds_prev_snap_obj == 0) 1920 continue; 1921 1922 dsl_deadlist_space(&ds->ds_deadlist, 1923 &dlused, &dlcomp, &dluncomp); 1924 ddpa->used += dlused; 1925 ddpa->comp += dlcomp; 1926 ddpa->uncomp += dluncomp; 1927 } 1928 1929 /* 1930 * If we are a clone of a clone then we never reached ORIGIN, 1931 * so we need to subtract out the clone origin's used space. 1932 */ 1933 if (ddpa->origin_origin) { 1934 ddpa->used -= ddpa->origin_origin->ds_phys->ds_referenced_bytes; 1935 ddpa->comp -= ddpa->origin_origin->ds_phys->ds_compressed_bytes; 1936 ddpa->uncomp -= 1937 ddpa->origin_origin->ds_phys->ds_uncompressed_bytes; 1938 } 1939 1940 /* Check that there is enough space here */ 1941 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 1942 ddpa->used); 1943 if (err != 0) 1944 goto out; 1945 1946 /* 1947 * Compute the amounts of space that will be used by snapshots 1948 * after the promotion (for both origin and clone). For each, 1949 * it is the amount of space that will be on all of their 1950 * deadlists (that was not born before their new origin). 1951 */ 1952 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 1953 uint64_t space; 1954 1955 /* 1956 * Note, typically this will not be a clone of a clone, 1957 * so dd_origin_txg will be < TXG_INITIAL, so 1958 * these snaplist_space() -> dsl_deadlist_space_range() 1959 * calls will be fast because they do not have to 1960 * iterate over all bps. 1961 */ 1962 snap = list_head(&ddpa->origin_snaps); 1963 err = snaplist_space(&ddpa->shared_snaps, 1964 snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap); 1965 if (err != 0) 1966 goto out; 1967 1968 err = snaplist_space(&ddpa->clone_snaps, 1969 snap->ds->ds_dir->dd_origin_txg, &space); 1970 if (err != 0) 1971 goto out; 1972 ddpa->cloneusedsnap += space; 1973 } 1974 if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 1975 err = snaplist_space(&ddpa->origin_snaps, 1976 origin_ds->ds_phys->ds_creation_txg, &ddpa->originusedsnap); 1977 if (err != 0) 1978 goto out; 1979 } 1980 1981 out: 1982 promote_rele(ddpa, FTAG); 1983 return (err); 1984 } 1985 1986 static void 1987 dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) 1988 { 1989 dsl_dataset_promote_arg_t *ddpa = arg; 1990 dsl_pool_t *dp = dmu_tx_pool(tx); 1991 dsl_dataset_t *hds; 1992 struct promotenode *snap; 1993 dsl_dataset_t *origin_ds; 1994 dsl_dataset_t *origin_head; 1995 dsl_dir_t *dd; 1996 dsl_dir_t *odd = NULL; 1997 uint64_t oldnext_obj; 1998 int64_t delta; 1999 2000 VERIFY0(promote_hold(ddpa, dp, FTAG)); 2001 hds = ddpa->ddpa_clone; 2002 2003 ASSERT0(hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE); 2004 2005 snap = list_head(&ddpa->shared_snaps); 2006 origin_ds = snap->ds; 2007 dd = hds->ds_dir; 2008 2009 snap = list_head(&ddpa->origin_snaps); 2010 origin_head = snap->ds; 2011 2012 /* 2013 * We need to explicitly open odd, since origin_ds's dd will be 2014 * changing. 2015 */ 2016 VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, 2017 NULL, FTAG, &odd)); 2018 2019 /* change origin's next snap */ 2020 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2021 oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2022 snap = list_tail(&ddpa->clone_snaps); 2023 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2024 origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2025 2026 /* change the origin's next clone */ 2027 if (origin_ds->ds_phys->ds_next_clones_obj) { 2028 dsl_dataset_remove_from_next_clones(origin_ds, 2029 snap->ds->ds_object, tx); 2030 VERIFY0(zap_add_int(dp->dp_meta_objset, 2031 origin_ds->ds_phys->ds_next_clones_obj, 2032 oldnext_obj, tx)); 2033 } 2034 2035 /* change origin */ 2036 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2037 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2038 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2039 dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; 2040 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2041 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2042 origin_head->ds_dir->dd_origin_txg = 2043 origin_ds->ds_phys->ds_creation_txg; 2044 2045 /* change dd_clone entries */ 2046 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2047 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2048 odd->dd_phys->dd_clones, hds->ds_object, tx)); 2049 VERIFY0(zap_add_int(dp->dp_meta_objset, 2050 ddpa->origin_origin->ds_dir->dd_phys->dd_clones, 2051 hds->ds_object, tx)); 2052 2053 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2054 ddpa->origin_origin->ds_dir->dd_phys->dd_clones, 2055 origin_head->ds_object, tx)); 2056 if (dd->dd_phys->dd_clones == 0) { 2057 dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset, 2058 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 2059 } 2060 VERIFY0(zap_add_int(dp->dp_meta_objset, 2061 dd->dd_phys->dd_clones, origin_head->ds_object, tx)); 2062 } 2063 2064 /* move snapshots to this dir */ 2065 for (snap = list_head(&ddpa->shared_snaps); snap; 2066 snap = list_next(&ddpa->shared_snaps, snap)) { 2067 dsl_dataset_t *ds = snap->ds; 2068 2069 /* 2070 * Property callbacks are registered to a particular 2071 * dsl_dir. Since ours is changing, evict the objset 2072 * so that they will be unregistered from the old dsl_dir. 2073 */ 2074 if (ds->ds_objset) { 2075 dmu_objset_evict(ds->ds_objset); 2076 ds->ds_objset = NULL; 2077 } 2078 2079 /* move snap name entry */ 2080 VERIFY0(dsl_dataset_get_snapname(ds)); 2081 VERIFY0(dsl_dataset_snap_remove(origin_head, 2082 ds->ds_snapname, tx)); 2083 VERIFY0(zap_add(dp->dp_meta_objset, 2084 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2085 8, 1, &ds->ds_object, tx)); 2086 2087 /* change containing dsl_dir */ 2088 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2089 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2090 ds->ds_phys->ds_dir_obj = dd->dd_object; 2091 ASSERT3P(ds->ds_dir, ==, odd); 2092 dsl_dir_rele(ds->ds_dir, ds); 2093 VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object, 2094 NULL, ds, &ds->ds_dir)); 2095 2096 /* move any clone references */ 2097 if (ds->ds_phys->ds_next_clones_obj && 2098 spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2099 zap_cursor_t zc; 2100 zap_attribute_t za; 2101 2102 for (zap_cursor_init(&zc, dp->dp_meta_objset, 2103 ds->ds_phys->ds_next_clones_obj); 2104 zap_cursor_retrieve(&zc, &za) == 0; 2105 zap_cursor_advance(&zc)) { 2106 dsl_dataset_t *cnds; 2107 uint64_t o; 2108 2109 if (za.za_first_integer == oldnext_obj) { 2110 /* 2111 * We've already moved the 2112 * origin's reference. 2113 */ 2114 continue; 2115 } 2116 2117 VERIFY0(dsl_dataset_hold_obj(dp, 2118 za.za_first_integer, FTAG, &cnds)); 2119 o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; 2120 2121 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2122 odd->dd_phys->dd_clones, o, tx)); 2123 VERIFY0(zap_add_int(dp->dp_meta_objset, 2124 dd->dd_phys->dd_clones, o, tx)); 2125 dsl_dataset_rele(cnds, FTAG); 2126 } 2127 zap_cursor_fini(&zc); 2128 } 2129 2130 ASSERT(!dsl_prop_hascb(ds)); 2131 } 2132 2133 /* 2134 * Change space accounting. 2135 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2136 * both be valid, or both be 0 (resulting in delta == 0). This 2137 * is true for each of {clone,origin} independently. 2138 */ 2139 2140 delta = ddpa->cloneusedsnap - 2141 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2142 ASSERT3S(delta, >=, 0); 2143 ASSERT3U(ddpa->used, >=, delta); 2144 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2145 dsl_dir_diduse_space(dd, DD_USED_HEAD, 2146 ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx); 2147 2148 delta = ddpa->originusedsnap - 2149 odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2150 ASSERT3S(delta, <=, 0); 2151 ASSERT3U(ddpa->used, >=, -delta); 2152 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2153 dsl_dir_diduse_space(odd, DD_USED_HEAD, 2154 -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx); 2155 2156 origin_ds->ds_phys->ds_unique_bytes = ddpa->unique; 2157 2158 /* log history record */ 2159 spa_history_log_internal_ds(hds, "promote", tx, ""); 2160 2161 dsl_dir_rele(odd, FTAG); 2162 promote_rele(ddpa, FTAG); 2163 } 2164 2165 /* 2166 * Make a list of dsl_dataset_t's for the snapshots between first_obj 2167 * (exclusive) and last_obj (inclusive). The list will be in reverse 2168 * order (last_obj will be the list_head()). If first_obj == 0, do all 2169 * snapshots back to this dataset's origin. 2170 */ 2171 static int 2172 snaplist_make(dsl_pool_t *dp, 2173 uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag) 2174 { 2175 uint64_t obj = last_obj; 2176 2177 list_create(l, sizeof (struct promotenode), 2178 offsetof(struct promotenode, link)); 2179 2180 while (obj != first_obj) { 2181 dsl_dataset_t *ds; 2182 struct promotenode *snap; 2183 int err; 2184 2185 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 2186 ASSERT(err != ENOENT); 2187 if (err != 0) 2188 return (err); 2189 2190 if (first_obj == 0) 2191 first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2192 2193 snap = kmem_alloc(sizeof (*snap), KM_SLEEP); 2194 snap->ds = ds; 2195 list_insert_tail(l, snap); 2196 obj = ds->ds_phys->ds_prev_snap_obj; 2197 } 2198 2199 return (0); 2200 } 2201 2202 static int 2203 snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2204 { 2205 struct promotenode *snap; 2206 2207 *spacep = 0; 2208 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2209 uint64_t used, comp, uncomp; 2210 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2211 mintxg, UINT64_MAX, &used, &comp, &uncomp); 2212 *spacep += used; 2213 } 2214 return (0); 2215 } 2216 2217 static void 2218 snaplist_destroy(list_t *l, void *tag) 2219 { 2220 struct promotenode *snap; 2221 2222 if (l == NULL || !list_link_active(&l->list_head)) 2223 return; 2224 2225 while ((snap = list_tail(l)) != NULL) { 2226 list_remove(l, snap); 2227 dsl_dataset_rele(snap->ds, tag); 2228 kmem_free(snap, sizeof (*snap)); 2229 } 2230 list_destroy(l); 2231 } 2232 2233 static int 2234 promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag) 2235 { 2236 int error; 2237 dsl_dir_t *dd; 2238 struct promotenode *snap; 2239 2240 error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag, 2241 &ddpa->ddpa_clone); 2242 if (error != 0) 2243 return (error); 2244 dd = ddpa->ddpa_clone->ds_dir; 2245 2246 if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) || 2247 !dsl_dir_is_clone(dd)) { 2248 dsl_dataset_rele(ddpa->ddpa_clone, tag); 2249 return (SET_ERROR(EINVAL)); 2250 } 2251 2252 error = snaplist_make(dp, 0, dd->dd_phys->dd_origin_obj, 2253 &ddpa->shared_snaps, tag); 2254 if (error != 0) 2255 goto out; 2256 2257 error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object, 2258 &ddpa->clone_snaps, tag); 2259 if (error != 0) 2260 goto out; 2261 2262 snap = list_head(&ddpa->shared_snaps); 2263 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 2264 error = snaplist_make(dp, dd->dd_phys->dd_origin_obj, 2265 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, 2266 &ddpa->origin_snaps, tag); 2267 if (error != 0) 2268 goto out; 2269 2270 if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { 2271 error = dsl_dataset_hold_obj(dp, 2272 snap->ds->ds_dir->dd_phys->dd_origin_obj, 2273 tag, &ddpa->origin_origin); 2274 if (error != 0) 2275 goto out; 2276 } 2277 out: 2278 if (error != 0) 2279 promote_rele(ddpa, tag); 2280 return (error); 2281 } 2282 2283 static void 2284 promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag) 2285 { 2286 snaplist_destroy(&ddpa->shared_snaps, tag); 2287 snaplist_destroy(&ddpa->clone_snaps, tag); 2288 snaplist_destroy(&ddpa->origin_snaps, tag); 2289 if (ddpa->origin_origin != NULL) 2290 dsl_dataset_rele(ddpa->origin_origin, tag); 2291 dsl_dataset_rele(ddpa->ddpa_clone, tag); 2292 } 2293 2294 /* 2295 * Promote a clone. 2296 * 2297 * If it fails due to a conflicting snapshot name, "conflsnap" will be filled 2298 * in with the name. (It must be at least MAXNAMELEN bytes long.) 2299 */ 2300 int 2301 dsl_dataset_promote(const char *name, char *conflsnap) 2302 { 2303 dsl_dataset_promote_arg_t ddpa = { 0 }; 2304 uint64_t numsnaps; 2305 int error; 2306 objset_t *os; 2307 2308 /* 2309 * We will modify space proportional to the number of 2310 * snapshots. Compute numsnaps. 2311 */ 2312 error = dmu_objset_hold(name, FTAG, &os); 2313 if (error != 0) 2314 return (error); 2315 error = zap_count(dmu_objset_pool(os)->dp_meta_objset, 2316 dmu_objset_ds(os)->ds_phys->ds_snapnames_zapobj, &numsnaps); 2317 dmu_objset_rele(os, FTAG); 2318 if (error != 0) 2319 return (error); 2320 2321 ddpa.ddpa_clonename = name; 2322 ddpa.err_ds = conflsnap; 2323 2324 return (dsl_sync_task(name, dsl_dataset_promote_check, 2325 dsl_dataset_promote_sync, &ddpa, 2 + numsnaps)); 2326 } 2327 2328 int 2329 dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, 2330 dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx) 2331 { 2332 int64_t unused_refres_delta; 2333 2334 /* they should both be heads */ 2335 if (dsl_dataset_is_snapshot(clone) || 2336 dsl_dataset_is_snapshot(origin_head)) 2337 return (SET_ERROR(EINVAL)); 2338 2339 /* if we are not forcing, the branch point should be just before them */ 2340 if (!force && clone->ds_prev != origin_head->ds_prev) 2341 return (SET_ERROR(EINVAL)); 2342 2343 /* clone should be the clone (unless they are unrelated) */ 2344 if (clone->ds_prev != NULL && 2345 clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap && 2346 origin_head->ds_dir != clone->ds_prev->ds_dir) 2347 return (SET_ERROR(EINVAL)); 2348 2349 /* the clone should be a child of the origin */ 2350 if (clone->ds_dir->dd_parent != origin_head->ds_dir) 2351 return (SET_ERROR(EINVAL)); 2352 2353 /* origin_head shouldn't be modified unless 'force' */ 2354 if (!force && 2355 dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev)) 2356 return (SET_ERROR(ETXTBSY)); 2357 2358 /* origin_head should have no long holds (e.g. is not mounted) */ 2359 if (dsl_dataset_handoff_check(origin_head, owner, tx)) 2360 return (SET_ERROR(EBUSY)); 2361 2362 /* check amount of any unconsumed refreservation */ 2363 unused_refres_delta = 2364 (int64_t)MIN(origin_head->ds_reserved, 2365 origin_head->ds_phys->ds_unique_bytes) - 2366 (int64_t)MIN(origin_head->ds_reserved, 2367 clone->ds_phys->ds_unique_bytes); 2368 2369 if (unused_refres_delta > 0 && 2370 unused_refres_delta > 2371 dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE)) 2372 return (SET_ERROR(ENOSPC)); 2373 2374 /* clone can't be over the head's refquota */ 2375 if (origin_head->ds_quota != 0 && 2376 clone->ds_phys->ds_referenced_bytes > origin_head->ds_quota) 2377 return (SET_ERROR(EDQUOT)); 2378 2379 return (0); 2380 } 2381 2382 void 2383 dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, 2384 dsl_dataset_t *origin_head, dmu_tx_t *tx) 2385 { 2386 dsl_pool_t *dp = dmu_tx_pool(tx); 2387 int64_t unused_refres_delta; 2388 2389 ASSERT(clone->ds_reserved == 0); 2390 ASSERT(origin_head->ds_quota == 0 || 2391 clone->ds_phys->ds_unique_bytes <= origin_head->ds_quota); 2392 ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev); 2393 2394 dmu_buf_will_dirty(clone->ds_dbuf, tx); 2395 dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 2396 2397 if (clone->ds_objset != NULL) { 2398 dmu_objset_evict(clone->ds_objset); 2399 clone->ds_objset = NULL; 2400 } 2401 2402 if (origin_head->ds_objset != NULL) { 2403 dmu_objset_evict(origin_head->ds_objset); 2404 origin_head->ds_objset = NULL; 2405 } 2406 2407 unused_refres_delta = 2408 (int64_t)MIN(origin_head->ds_reserved, 2409 origin_head->ds_phys->ds_unique_bytes) - 2410 (int64_t)MIN(origin_head->ds_reserved, 2411 clone->ds_phys->ds_unique_bytes); 2412 2413 /* 2414 * Reset origin's unique bytes, if it exists. 2415 */ 2416 if (clone->ds_prev) { 2417 dsl_dataset_t *origin = clone->ds_prev; 2418 uint64_t comp, uncomp; 2419 2420 dmu_buf_will_dirty(origin->ds_dbuf, tx); 2421 dsl_deadlist_space_range(&clone->ds_deadlist, 2422 origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2423 &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); 2424 } 2425 2426 /* swap blkptrs */ 2427 { 2428 blkptr_t tmp; 2429 tmp = origin_head->ds_phys->ds_bp; 2430 origin_head->ds_phys->ds_bp = clone->ds_phys->ds_bp; 2431 clone->ds_phys->ds_bp = tmp; 2432 } 2433 2434 /* set dd_*_bytes */ 2435 { 2436 int64_t dused, dcomp, duncomp; 2437 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2438 uint64_t odl_used, odl_comp, odl_uncomp; 2439 2440 ASSERT3U(clone->ds_dir->dd_phys-> 2441 dd_used_breakdown[DD_USED_SNAP], ==, 0); 2442 2443 dsl_deadlist_space(&clone->ds_deadlist, 2444 &cdl_used, &cdl_comp, &cdl_uncomp); 2445 dsl_deadlist_space(&origin_head->ds_deadlist, 2446 &odl_used, &odl_comp, &odl_uncomp); 2447 2448 dused = clone->ds_phys->ds_referenced_bytes + cdl_used - 2449 (origin_head->ds_phys->ds_referenced_bytes + odl_used); 2450 dcomp = clone->ds_phys->ds_compressed_bytes + cdl_comp - 2451 (origin_head->ds_phys->ds_compressed_bytes + odl_comp); 2452 duncomp = clone->ds_phys->ds_uncompressed_bytes + 2453 cdl_uncomp - 2454 (origin_head->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2455 2456 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD, 2457 dused, dcomp, duncomp, tx); 2458 dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD, 2459 -dused, -dcomp, -duncomp, tx); 2460 2461 /* 2462 * The difference in the space used by snapshots is the 2463 * difference in snapshot space due to the head's 2464 * deadlist (since that's the only thing that's 2465 * changing that affects the snapused). 2466 */ 2467 dsl_deadlist_space_range(&clone->ds_deadlist, 2468 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2469 &cdl_used, &cdl_comp, &cdl_uncomp); 2470 dsl_deadlist_space_range(&origin_head->ds_deadlist, 2471 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2472 &odl_used, &odl_comp, &odl_uncomp); 2473 dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used, 2474 DD_USED_HEAD, DD_USED_SNAP, tx); 2475 } 2476 2477 /* swap ds_*_bytes */ 2478 SWITCH64(origin_head->ds_phys->ds_referenced_bytes, 2479 clone->ds_phys->ds_referenced_bytes); 2480 SWITCH64(origin_head->ds_phys->ds_compressed_bytes, 2481 clone->ds_phys->ds_compressed_bytes); 2482 SWITCH64(origin_head->ds_phys->ds_uncompressed_bytes, 2483 clone->ds_phys->ds_uncompressed_bytes); 2484 SWITCH64(origin_head->ds_phys->ds_unique_bytes, 2485 clone->ds_phys->ds_unique_bytes); 2486 2487 /* apply any parent delta for change in unconsumed refreservation */ 2488 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV, 2489 unused_refres_delta, 0, 0, tx); 2490 2491 /* 2492 * Swap deadlists. 2493 */ 2494 dsl_deadlist_close(&clone->ds_deadlist); 2495 dsl_deadlist_close(&origin_head->ds_deadlist); 2496 SWITCH64(origin_head->ds_phys->ds_deadlist_obj, 2497 clone->ds_phys->ds_deadlist_obj); 2498 dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, 2499 clone->ds_phys->ds_deadlist_obj); 2500 dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, 2501 origin_head->ds_phys->ds_deadlist_obj); 2502 2503 dsl_scan_ds_clone_swapped(origin_head, clone, tx); 2504 2505 spa_history_log_internal_ds(clone, "clone swap", tx, 2506 "parent=%s", origin_head->ds_dir->dd_myname); 2507 } 2508 2509 /* 2510 * Given a pool name and a dataset object number in that pool, 2511 * return the name of that dataset. 2512 */ 2513 int 2514 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2515 { 2516 dsl_pool_t *dp; 2517 dsl_dataset_t *ds; 2518 int error; 2519 2520 error = dsl_pool_hold(pname, FTAG, &dp); 2521 if (error != 0) 2522 return (error); 2523 2524 error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); 2525 if (error == 0) { 2526 dsl_dataset_name(ds, buf); 2527 dsl_dataset_rele(ds, FTAG); 2528 } 2529 dsl_pool_rele(dp, FTAG); 2530 2531 return (error); 2532 } 2533 2534 int 2535 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2536 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 2537 { 2538 int error = 0; 2539 2540 ASSERT3S(asize, >, 0); 2541 2542 /* 2543 * *ref_rsrv is the portion of asize that will come from any 2544 * unconsumed refreservation space. 2545 */ 2546 *ref_rsrv = 0; 2547 2548 mutex_enter(&ds->ds_lock); 2549 /* 2550 * Make a space adjustment for reserved bytes. 2551 */ 2552 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 2553 ASSERT3U(*used, >=, 2554 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2555 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2556 *ref_rsrv = 2557 asize - MIN(asize, parent_delta(ds, asize + inflight)); 2558 } 2559 2560 if (!check_quota || ds->ds_quota == 0) { 2561 mutex_exit(&ds->ds_lock); 2562 return (0); 2563 } 2564 /* 2565 * If they are requesting more space, and our current estimate 2566 * is over quota, they get to try again unless the actual 2567 * on-disk is over quota and there are no pending changes (which 2568 * may free up space for us). 2569 */ 2570 if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) { 2571 if (inflight > 0 || 2572 ds->ds_phys->ds_referenced_bytes < ds->ds_quota) 2573 error = SET_ERROR(ERESTART); 2574 else 2575 error = SET_ERROR(EDQUOT); 2576 } 2577 mutex_exit(&ds->ds_lock); 2578 2579 return (error); 2580 } 2581 2582 typedef struct dsl_dataset_set_qr_arg { 2583 const char *ddsqra_name; 2584 zprop_source_t ddsqra_source; 2585 uint64_t ddsqra_value; 2586 } dsl_dataset_set_qr_arg_t; 2587 2588 2589 /* ARGSUSED */ 2590 static int 2591 dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx) 2592 { 2593 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2594 dsl_pool_t *dp = dmu_tx_pool(tx); 2595 dsl_dataset_t *ds; 2596 int error; 2597 uint64_t newval; 2598 2599 if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA) 2600 return (SET_ERROR(ENOTSUP)); 2601 2602 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 2603 if (error != 0) 2604 return (error); 2605 2606 if (dsl_dataset_is_snapshot(ds)) { 2607 dsl_dataset_rele(ds, FTAG); 2608 return (SET_ERROR(EINVAL)); 2609 } 2610 2611 error = dsl_prop_predict(ds->ds_dir, 2612 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 2613 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 2614 if (error != 0) { 2615 dsl_dataset_rele(ds, FTAG); 2616 return (error); 2617 } 2618 2619 if (newval == 0) { 2620 dsl_dataset_rele(ds, FTAG); 2621 return (0); 2622 } 2623 2624 if (newval < ds->ds_phys->ds_referenced_bytes || 2625 newval < ds->ds_reserved) { 2626 dsl_dataset_rele(ds, FTAG); 2627 return (SET_ERROR(ENOSPC)); 2628 } 2629 2630 dsl_dataset_rele(ds, FTAG); 2631 return (0); 2632 } 2633 2634 static void 2635 dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx) 2636 { 2637 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2638 dsl_pool_t *dp = dmu_tx_pool(tx); 2639 dsl_dataset_t *ds; 2640 uint64_t newval; 2641 2642 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 2643 2644 dsl_prop_set_sync_impl(ds, 2645 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 2646 ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, 2647 &ddsqra->ddsqra_value, tx); 2648 2649 VERIFY0(dsl_prop_get_int_ds(ds, 2650 zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval)); 2651 2652 if (ds->ds_quota != newval) { 2653 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2654 ds->ds_quota = newval; 2655 } 2656 dsl_dataset_rele(ds, FTAG); 2657 } 2658 2659 int 2660 dsl_dataset_set_refquota(const char *dsname, zprop_source_t source, 2661 uint64_t refquota) 2662 { 2663 dsl_dataset_set_qr_arg_t ddsqra; 2664 2665 ddsqra.ddsqra_name = dsname; 2666 ddsqra.ddsqra_source = source; 2667 ddsqra.ddsqra_value = refquota; 2668 2669 return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check, 2670 dsl_dataset_set_refquota_sync, &ddsqra, 0)); 2671 } 2672 2673 static int 2674 dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx) 2675 { 2676 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2677 dsl_pool_t *dp = dmu_tx_pool(tx); 2678 dsl_dataset_t *ds; 2679 int error; 2680 uint64_t newval, unique; 2681 2682 if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION) 2683 return (SET_ERROR(ENOTSUP)); 2684 2685 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 2686 if (error != 0) 2687 return (error); 2688 2689 if (dsl_dataset_is_snapshot(ds)) { 2690 dsl_dataset_rele(ds, FTAG); 2691 return (SET_ERROR(EINVAL)); 2692 } 2693 2694 error = dsl_prop_predict(ds->ds_dir, 2695 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 2696 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 2697 if (error != 0) { 2698 dsl_dataset_rele(ds, FTAG); 2699 return (error); 2700 } 2701 2702 /* 2703 * If we are doing the preliminary check in open context, the 2704 * space estimates may be inaccurate. 2705 */ 2706 if (!dmu_tx_is_syncing(tx)) { 2707 dsl_dataset_rele(ds, FTAG); 2708 return (0); 2709 } 2710 2711 mutex_enter(&ds->ds_lock); 2712 if (!DS_UNIQUE_IS_ACCURATE(ds)) 2713 dsl_dataset_recalc_head_uniq(ds); 2714 unique = ds->ds_phys->ds_unique_bytes; 2715 mutex_exit(&ds->ds_lock); 2716 2717 if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) { 2718 uint64_t delta = MAX(unique, newval) - 2719 MAX(unique, ds->ds_reserved); 2720 2721 if (delta > 2722 dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) || 2723 (ds->ds_quota > 0 && newval > ds->ds_quota)) { 2724 dsl_dataset_rele(ds, FTAG); 2725 return (SET_ERROR(ENOSPC)); 2726 } 2727 } 2728 2729 dsl_dataset_rele(ds, FTAG); 2730 return (0); 2731 } 2732 2733 void 2734 dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, 2735 zprop_source_t source, uint64_t value, dmu_tx_t *tx) 2736 { 2737 uint64_t newval; 2738 uint64_t unique; 2739 int64_t delta; 2740 2741 dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 2742 source, sizeof (value), 1, &value, tx); 2743 2744 VERIFY0(dsl_prop_get_int_ds(ds, 2745 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval)); 2746 2747 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2748 mutex_enter(&ds->ds_dir->dd_lock); 2749 mutex_enter(&ds->ds_lock); 2750 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 2751 unique = ds->ds_phys->ds_unique_bytes; 2752 delta = MAX(0, (int64_t)(newval - unique)) - 2753 MAX(0, (int64_t)(ds->ds_reserved - unique)); 2754 ds->ds_reserved = newval; 2755 mutex_exit(&ds->ds_lock); 2756 2757 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 2758 mutex_exit(&ds->ds_dir->dd_lock); 2759 } 2760 2761 static void 2762 dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx) 2763 { 2764 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2765 dsl_pool_t *dp = dmu_tx_pool(tx); 2766 dsl_dataset_t *ds; 2767 2768 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 2769 dsl_dataset_set_refreservation_sync_impl(ds, 2770 ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx); 2771 dsl_dataset_rele(ds, FTAG); 2772 } 2773 2774 int 2775 dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source, 2776 uint64_t refreservation) 2777 { 2778 dsl_dataset_set_qr_arg_t ddsqra; 2779 2780 ddsqra.ddsqra_name = dsname; 2781 ddsqra.ddsqra_source = source; 2782 ddsqra.ddsqra_value = refreservation; 2783 2784 return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check, 2785 dsl_dataset_set_refreservation_sync, &ddsqra, 0)); 2786 } 2787 2788 /* 2789 * Return (in *usedp) the amount of space written in new that is not 2790 * present in oldsnap. New may be a snapshot or the head. Old must be 2791 * a snapshot before new, in new's filesystem (or its origin). If not then 2792 * fail and return EINVAL. 2793 * 2794 * The written space is calculated by considering two components: First, we 2795 * ignore any freed space, and calculate the written as new's used space 2796 * minus old's used space. Next, we add in the amount of space that was freed 2797 * between the two snapshots, thus reducing new's used space relative to old's. 2798 * Specifically, this is the space that was born before old->ds_creation_txg, 2799 * and freed before new (ie. on new's deadlist or a previous deadlist). 2800 * 2801 * space freed [---------------------] 2802 * snapshots ---O-------O--------O-------O------ 2803 * oldsnap new 2804 */ 2805 int 2806 dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, 2807 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 2808 { 2809 int err = 0; 2810 uint64_t snapobj; 2811 dsl_pool_t *dp = new->ds_dir->dd_pool; 2812 2813 ASSERT(dsl_pool_config_held(dp)); 2814 2815 *usedp = 0; 2816 *usedp += new->ds_phys->ds_referenced_bytes; 2817 *usedp -= oldsnap->ds_phys->ds_referenced_bytes; 2818 2819 *compp = 0; 2820 *compp += new->ds_phys->ds_compressed_bytes; 2821 *compp -= oldsnap->ds_phys->ds_compressed_bytes; 2822 2823 *uncompp = 0; 2824 *uncompp += new->ds_phys->ds_uncompressed_bytes; 2825 *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes; 2826 2827 snapobj = new->ds_object; 2828 while (snapobj != oldsnap->ds_object) { 2829 dsl_dataset_t *snap; 2830 uint64_t used, comp, uncomp; 2831 2832 if (snapobj == new->ds_object) { 2833 snap = new; 2834 } else { 2835 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); 2836 if (err != 0) 2837 break; 2838 } 2839 2840 if (snap->ds_phys->ds_prev_snap_txg == 2841 oldsnap->ds_phys->ds_creation_txg) { 2842 /* 2843 * The blocks in the deadlist can not be born after 2844 * ds_prev_snap_txg, so get the whole deadlist space, 2845 * which is more efficient (especially for old-format 2846 * deadlists). Unfortunately the deadlist code 2847 * doesn't have enough information to make this 2848 * optimization itself. 2849 */ 2850 dsl_deadlist_space(&snap->ds_deadlist, 2851 &used, &comp, &uncomp); 2852 } else { 2853 dsl_deadlist_space_range(&snap->ds_deadlist, 2854 0, oldsnap->ds_phys->ds_creation_txg, 2855 &used, &comp, &uncomp); 2856 } 2857 *usedp += used; 2858 *compp += comp; 2859 *uncompp += uncomp; 2860 2861 /* 2862 * If we get to the beginning of the chain of snapshots 2863 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap 2864 * was not a snapshot of/before new. 2865 */ 2866 snapobj = snap->ds_phys->ds_prev_snap_obj; 2867 if (snap != new) 2868 dsl_dataset_rele(snap, FTAG); 2869 if (snapobj == 0) { 2870 err = SET_ERROR(EINVAL); 2871 break; 2872 } 2873 2874 } 2875 return (err); 2876 } 2877 2878 /* 2879 * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, 2880 * lastsnap, and all snapshots in between are deleted. 2881 * 2882 * blocks that would be freed [---------------------------] 2883 * snapshots ---O-------O--------O-------O--------O 2884 * firstsnap lastsnap 2885 * 2886 * This is the set of blocks that were born after the snap before firstsnap, 2887 * (birth > firstsnap->prev_snap_txg) and died before the snap after the 2888 * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). 2889 * We calculate this by iterating over the relevant deadlists (from the snap 2890 * after lastsnap, backward to the snap after firstsnap), summing up the 2891 * space on the deadlist that was born after the snap before firstsnap. 2892 */ 2893 int 2894 dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, 2895 dsl_dataset_t *lastsnap, 2896 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 2897 { 2898 int err = 0; 2899 uint64_t snapobj; 2900 dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; 2901 2902 ASSERT(dsl_dataset_is_snapshot(firstsnap)); 2903 ASSERT(dsl_dataset_is_snapshot(lastsnap)); 2904 2905 /* 2906 * Check that the snapshots are in the same dsl_dir, and firstsnap 2907 * is before lastsnap. 2908 */ 2909 if (firstsnap->ds_dir != lastsnap->ds_dir || 2910 firstsnap->ds_phys->ds_creation_txg > 2911 lastsnap->ds_phys->ds_creation_txg) 2912 return (SET_ERROR(EINVAL)); 2913 2914 *usedp = *compp = *uncompp = 0; 2915 2916 snapobj = lastsnap->ds_phys->ds_next_snap_obj; 2917 while (snapobj != firstsnap->ds_object) { 2918 dsl_dataset_t *ds; 2919 uint64_t used, comp, uncomp; 2920 2921 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); 2922 if (err != 0) 2923 break; 2924 2925 dsl_deadlist_space_range(&ds->ds_deadlist, 2926 firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2927 &used, &comp, &uncomp); 2928 *usedp += used; 2929 *compp += comp; 2930 *uncompp += uncomp; 2931 2932 snapobj = ds->ds_phys->ds_prev_snap_obj; 2933 ASSERT3U(snapobj, !=, 0); 2934 dsl_dataset_rele(ds, FTAG); 2935 } 2936 return (err); 2937 } 2938 2939 /* 2940 * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. 2941 * For example, they could both be snapshots of the same filesystem, and 2942 * 'earlier' is before 'later'. Or 'earlier' could be the origin of 2943 * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's 2944 * filesystem. Or 'earlier' could be the origin's origin. 2945 */ 2946 boolean_t 2947 dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier) 2948 { 2949 dsl_pool_t *dp = later->ds_dir->dd_pool; 2950 int error; 2951 boolean_t ret; 2952 2953 ASSERT(dsl_pool_config_held(dp)); 2954 2955 if (earlier->ds_phys->ds_creation_txg >= 2956 later->ds_phys->ds_creation_txg) 2957 return (B_FALSE); 2958 2959 if (later->ds_dir == earlier->ds_dir) 2960 return (B_TRUE); 2961 if (!dsl_dir_is_clone(later->ds_dir)) 2962 return (B_FALSE); 2963 2964 if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) 2965 return (B_TRUE); 2966 dsl_dataset_t *origin; 2967 error = dsl_dataset_hold_obj(dp, 2968 later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); 2969 if (error != 0) 2970 return (B_FALSE); 2971 ret = dsl_dataset_is_before(origin, earlier); 2972 dsl_dataset_rele(origin, FTAG); 2973 return (ret); 2974 } 2975