1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2013, 2014 by Delphix. All rights reserved. 24 * Copyright (c) 2014, Joyent, Inc. All rights reserved. 25 * Copyright (c) 2014 RackTop Systems. 26 */ 27 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_prop.h> 32 #include <sys/dsl_synctask.h> 33 #include <sys/dmu_traverse.h> 34 #include <sys/dmu_impl.h> 35 #include <sys/dmu_tx.h> 36 #include <sys/arc.h> 37 #include <sys/zio.h> 38 #include <sys/zap.h> 39 #include <sys/zfeature.h> 40 #include <sys/unique.h> 41 #include <sys/zfs_context.h> 42 #include <sys/zfs_ioctl.h> 43 #include <sys/spa.h> 44 #include <sys/zfs_znode.h> 45 #include <sys/zfs_onexit.h> 46 #include <sys/zvol.h> 47 #include <sys/dsl_scan.h> 48 #include <sys/dsl_deadlist.h> 49 #include <sys/dsl_destroy.h> 50 #include <sys/dsl_userhold.h> 51 #include <sys/dsl_bookmark.h> 52 53 #define SWITCH64(x, y) \ 54 { \ 55 uint64_t __tmp = (x); \ 56 (x) = (y); \ 57 (y) = __tmp; \ 58 } 59 60 #define DS_REF_MAX (1ULL << 62) 61 62 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 63 64 /* 65 * Figure out how much of this delta should be propogated to the dsl_dir 66 * layer. If there's a refreservation, that space has already been 67 * partially accounted for in our ancestors. 68 */ 69 static int64_t 70 parent_delta(dsl_dataset_t *ds, int64_t delta) 71 { 72 uint64_t old_bytes, new_bytes; 73 74 if (ds->ds_reserved == 0) 75 return (delta); 76 77 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 78 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 79 80 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 81 return (new_bytes - old_bytes); 82 } 83 84 void 85 dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) 86 { 87 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 88 int compressed = BP_GET_PSIZE(bp); 89 int uncompressed = BP_GET_UCSIZE(bp); 90 int64_t delta; 91 92 dprintf_bp(bp, "ds=%p", ds); 93 94 ASSERT(dmu_tx_is_syncing(tx)); 95 /* It could have been compressed away to nothing */ 96 if (BP_IS_HOLE(bp)) 97 return; 98 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 99 ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp))); 100 if (ds == NULL) { 101 dsl_pool_mos_diduse_space(tx->tx_pool, 102 used, compressed, uncompressed); 103 return; 104 } 105 106 dmu_buf_will_dirty(ds->ds_dbuf, tx); 107 mutex_enter(&ds->ds_lock); 108 delta = parent_delta(ds, used); 109 ds->ds_phys->ds_referenced_bytes += used; 110 ds->ds_phys->ds_compressed_bytes += compressed; 111 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 112 ds->ds_phys->ds_unique_bytes += used; 113 mutex_exit(&ds->ds_lock); 114 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 115 compressed, uncompressed, tx); 116 dsl_dir_transfer_space(ds->ds_dir, used - delta, 117 DD_USED_REFRSRV, DD_USED_HEAD, tx); 118 } 119 120 int 121 dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, 122 boolean_t async) 123 { 124 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 125 int compressed = BP_GET_PSIZE(bp); 126 int uncompressed = BP_GET_UCSIZE(bp); 127 128 if (BP_IS_HOLE(bp)) 129 return (0); 130 131 ASSERT(dmu_tx_is_syncing(tx)); 132 ASSERT(bp->blk_birth <= tx->tx_txg); 133 134 if (ds == NULL) { 135 dsl_free(tx->tx_pool, tx->tx_txg, bp); 136 dsl_pool_mos_diduse_space(tx->tx_pool, 137 -used, -compressed, -uncompressed); 138 return (used); 139 } 140 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 141 142 ASSERT(!dsl_dataset_is_snapshot(ds)); 143 dmu_buf_will_dirty(ds->ds_dbuf, tx); 144 145 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 146 int64_t delta; 147 148 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); 149 dsl_free(tx->tx_pool, tx->tx_txg, bp); 150 151 mutex_enter(&ds->ds_lock); 152 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 153 !DS_UNIQUE_IS_ACCURATE(ds)); 154 delta = parent_delta(ds, -used); 155 ds->ds_phys->ds_unique_bytes -= used; 156 mutex_exit(&ds->ds_lock); 157 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 158 delta, -compressed, -uncompressed, tx); 159 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 160 DD_USED_REFRSRV, DD_USED_HEAD, tx); 161 } else { 162 dprintf_bp(bp, "putting on dead list: %s", ""); 163 if (async) { 164 /* 165 * We are here as part of zio's write done callback, 166 * which means we're a zio interrupt thread. We can't 167 * call dsl_deadlist_insert() now because it may block 168 * waiting for I/O. Instead, put bp on the deferred 169 * queue and let dsl_pool_sync() finish the job. 170 */ 171 bplist_append(&ds->ds_pending_deadlist, bp); 172 } else { 173 dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); 174 } 175 ASSERT3U(ds->ds_prev->ds_object, ==, 176 ds->ds_phys->ds_prev_snap_obj); 177 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 178 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 179 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 180 ds->ds_object && bp->blk_birth > 181 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 182 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 183 mutex_enter(&ds->ds_prev->ds_lock); 184 ds->ds_prev->ds_phys->ds_unique_bytes += used; 185 mutex_exit(&ds->ds_prev->ds_lock); 186 } 187 if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { 188 dsl_dir_transfer_space(ds->ds_dir, used, 189 DD_USED_HEAD, DD_USED_SNAP, tx); 190 } 191 } 192 mutex_enter(&ds->ds_lock); 193 ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used); 194 ds->ds_phys->ds_referenced_bytes -= used; 195 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 196 ds->ds_phys->ds_compressed_bytes -= compressed; 197 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 198 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 199 mutex_exit(&ds->ds_lock); 200 201 return (used); 202 } 203 204 uint64_t 205 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 206 { 207 uint64_t trysnap = 0; 208 209 if (ds == NULL) 210 return (0); 211 /* 212 * The snapshot creation could fail, but that would cause an 213 * incorrect FALSE return, which would only result in an 214 * overestimation of the amount of space that an operation would 215 * consume, which is OK. 216 * 217 * There's also a small window where we could miss a pending 218 * snapshot, because we could set the sync task in the quiescing 219 * phase. So this should only be used as a guess. 220 */ 221 if (ds->ds_trysnap_txg > 222 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 223 trysnap = ds->ds_trysnap_txg; 224 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 225 } 226 227 boolean_t 228 dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, 229 uint64_t blk_birth) 230 { 231 if (blk_birth <= dsl_dataset_prev_snap_txg(ds) || 232 (bp != NULL && BP_IS_HOLE(bp))) 233 return (B_FALSE); 234 235 ddt_prefetch(dsl_dataset_get_spa(ds), bp); 236 237 return (B_TRUE); 238 } 239 240 /* ARGSUSED */ 241 static void 242 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 243 { 244 dsl_dataset_t *ds = dsv; 245 246 ASSERT(ds->ds_owner == NULL); 247 248 unique_remove(ds->ds_fsid_guid); 249 250 if (ds->ds_objset != NULL) 251 dmu_objset_evict(ds->ds_objset); 252 253 if (ds->ds_prev) { 254 dsl_dataset_rele(ds->ds_prev, ds); 255 ds->ds_prev = NULL; 256 } 257 258 bplist_destroy(&ds->ds_pending_deadlist); 259 if (ds->ds_phys->ds_deadlist_obj != 0) 260 dsl_deadlist_close(&ds->ds_deadlist); 261 if (ds->ds_dir) 262 dsl_dir_rele(ds->ds_dir, ds); 263 264 ASSERT(!list_link_active(&ds->ds_synced_link)); 265 266 mutex_destroy(&ds->ds_lock); 267 mutex_destroy(&ds->ds_opening_lock); 268 mutex_destroy(&ds->ds_sendstream_lock); 269 refcount_destroy(&ds->ds_longholds); 270 271 kmem_free(ds, sizeof (dsl_dataset_t)); 272 } 273 274 int 275 dsl_dataset_get_snapname(dsl_dataset_t *ds) 276 { 277 dsl_dataset_phys_t *headphys; 278 int err; 279 dmu_buf_t *headdbuf; 280 dsl_pool_t *dp = ds->ds_dir->dd_pool; 281 objset_t *mos = dp->dp_meta_objset; 282 283 if (ds->ds_snapname[0]) 284 return (0); 285 if (ds->ds_phys->ds_next_snap_obj == 0) 286 return (0); 287 288 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 289 FTAG, &headdbuf); 290 if (err != 0) 291 return (err); 292 headphys = headdbuf->db_data; 293 err = zap_value_search(dp->dp_meta_objset, 294 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 295 dmu_buf_rele(headdbuf, FTAG); 296 return (err); 297 } 298 299 int 300 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 301 { 302 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 303 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 304 matchtype_t mt; 305 int err; 306 307 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 308 mt = MT_FIRST; 309 else 310 mt = MT_EXACT; 311 312 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 313 value, mt, NULL, 0, NULL); 314 if (err == ENOTSUP && mt == MT_FIRST) 315 err = zap_lookup(mos, snapobj, name, 8, 1, value); 316 return (err); 317 } 318 319 int 320 dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx, 321 boolean_t adj_cnt) 322 { 323 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 324 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 325 matchtype_t mt; 326 int err; 327 328 dsl_dir_snap_cmtime_update(ds->ds_dir); 329 330 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 331 mt = MT_FIRST; 332 else 333 mt = MT_EXACT; 334 335 err = zap_remove_norm(mos, snapobj, name, mt, tx); 336 if (err == ENOTSUP && mt == MT_FIRST) 337 err = zap_remove(mos, snapobj, name, tx); 338 339 if (err == 0 && adj_cnt) 340 dsl_fs_ss_count_adjust(ds->ds_dir, -1, 341 DD_FIELD_SNAPSHOT_COUNT, tx); 342 343 return (err); 344 } 345 346 int 347 dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 348 dsl_dataset_t **dsp) 349 { 350 objset_t *mos = dp->dp_meta_objset; 351 dmu_buf_t *dbuf; 352 dsl_dataset_t *ds; 353 int err; 354 dmu_object_info_t doi; 355 356 ASSERT(dsl_pool_config_held(dp)); 357 358 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 359 if (err != 0) 360 return (err); 361 362 /* Make sure dsobj has the correct object type. */ 363 dmu_object_info_from_db(dbuf, &doi); 364 if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) { 365 dmu_buf_rele(dbuf, tag); 366 return (SET_ERROR(EINVAL)); 367 } 368 369 ds = dmu_buf_get_user(dbuf); 370 if (ds == NULL) { 371 dsl_dataset_t *winner = NULL; 372 373 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 374 ds->ds_dbuf = dbuf; 375 ds->ds_object = dsobj; 376 ds->ds_phys = dbuf->db_data; 377 378 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 379 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 380 mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); 381 refcount_create(&ds->ds_longholds); 382 383 bplist_create(&ds->ds_pending_deadlist); 384 dsl_deadlist_open(&ds->ds_deadlist, 385 mos, ds->ds_phys->ds_deadlist_obj); 386 387 list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), 388 offsetof(dmu_sendarg_t, dsa_link)); 389 390 if (err == 0) { 391 err = dsl_dir_hold_obj(dp, 392 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 393 } 394 if (err != 0) { 395 mutex_destroy(&ds->ds_lock); 396 mutex_destroy(&ds->ds_opening_lock); 397 mutex_destroy(&ds->ds_sendstream_lock); 398 refcount_destroy(&ds->ds_longholds); 399 bplist_destroy(&ds->ds_pending_deadlist); 400 dsl_deadlist_close(&ds->ds_deadlist); 401 kmem_free(ds, sizeof (dsl_dataset_t)); 402 dmu_buf_rele(dbuf, tag); 403 return (err); 404 } 405 406 if (!dsl_dataset_is_snapshot(ds)) { 407 ds->ds_snapname[0] = '\0'; 408 if (ds->ds_phys->ds_prev_snap_obj != 0) { 409 err = dsl_dataset_hold_obj(dp, 410 ds->ds_phys->ds_prev_snap_obj, 411 ds, &ds->ds_prev); 412 } 413 if (doi.doi_type == DMU_OTN_ZAP_METADATA) { 414 int zaperr = zap_lookup(mos, ds->ds_object, 415 DS_FIELD_BOOKMARK_NAMES, 416 sizeof (ds->ds_bookmarks), 1, 417 &ds->ds_bookmarks); 418 if (zaperr != ENOENT) 419 VERIFY0(zaperr); 420 } 421 } else { 422 if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 423 err = dsl_dataset_get_snapname(ds); 424 if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { 425 err = zap_count( 426 ds->ds_dir->dd_pool->dp_meta_objset, 427 ds->ds_phys->ds_userrefs_obj, 428 &ds->ds_userrefs); 429 } 430 } 431 432 if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 433 err = dsl_prop_get_int_ds(ds, 434 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 435 &ds->ds_reserved); 436 if (err == 0) { 437 err = dsl_prop_get_int_ds(ds, 438 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 439 &ds->ds_quota); 440 } 441 } else { 442 ds->ds_reserved = ds->ds_quota = 0; 443 } 444 445 if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds, 446 &ds->ds_phys, dsl_dataset_evict)) != NULL) { 447 bplist_destroy(&ds->ds_pending_deadlist); 448 dsl_deadlist_close(&ds->ds_deadlist); 449 if (ds->ds_prev) 450 dsl_dataset_rele(ds->ds_prev, ds); 451 dsl_dir_rele(ds->ds_dir, ds); 452 mutex_destroy(&ds->ds_lock); 453 mutex_destroy(&ds->ds_opening_lock); 454 mutex_destroy(&ds->ds_sendstream_lock); 455 refcount_destroy(&ds->ds_longholds); 456 kmem_free(ds, sizeof (dsl_dataset_t)); 457 if (err != 0) { 458 dmu_buf_rele(dbuf, tag); 459 return (err); 460 } 461 ds = winner; 462 } else { 463 ds->ds_fsid_guid = 464 unique_insert(ds->ds_phys->ds_fsid_guid); 465 } 466 } 467 ASSERT3P(ds->ds_dbuf, ==, dbuf); 468 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 469 ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 470 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 471 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 472 *dsp = ds; 473 return (0); 474 } 475 476 int 477 dsl_dataset_hold(dsl_pool_t *dp, const char *name, 478 void *tag, dsl_dataset_t **dsp) 479 { 480 dsl_dir_t *dd; 481 const char *snapname; 482 uint64_t obj; 483 int err = 0; 484 485 err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname); 486 if (err != 0) 487 return (err); 488 489 ASSERT(dsl_pool_config_held(dp)); 490 obj = dd->dd_phys->dd_head_dataset_obj; 491 if (obj != 0) 492 err = dsl_dataset_hold_obj(dp, obj, tag, dsp); 493 else 494 err = SET_ERROR(ENOENT); 495 496 /* we may be looking for a snapshot */ 497 if (err == 0 && snapname != NULL) { 498 dsl_dataset_t *ds; 499 500 if (*snapname++ != '@') { 501 dsl_dataset_rele(*dsp, tag); 502 dsl_dir_rele(dd, FTAG); 503 return (SET_ERROR(ENOENT)); 504 } 505 506 dprintf("looking for snapshot '%s'\n", snapname); 507 err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 508 if (err == 0) 509 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 510 dsl_dataset_rele(*dsp, tag); 511 512 if (err == 0) { 513 mutex_enter(&ds->ds_lock); 514 if (ds->ds_snapname[0] == 0) 515 (void) strlcpy(ds->ds_snapname, snapname, 516 sizeof (ds->ds_snapname)); 517 mutex_exit(&ds->ds_lock); 518 *dsp = ds; 519 } 520 } 521 522 dsl_dir_rele(dd, FTAG); 523 return (err); 524 } 525 526 int 527 dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, 528 void *tag, dsl_dataset_t **dsp) 529 { 530 int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 531 if (err != 0) 532 return (err); 533 if (!dsl_dataset_tryown(*dsp, tag)) { 534 dsl_dataset_rele(*dsp, tag); 535 *dsp = NULL; 536 return (SET_ERROR(EBUSY)); 537 } 538 return (0); 539 } 540 541 int 542 dsl_dataset_own(dsl_pool_t *dp, const char *name, 543 void *tag, dsl_dataset_t **dsp) 544 { 545 int err = dsl_dataset_hold(dp, name, tag, dsp); 546 if (err != 0) 547 return (err); 548 if (!dsl_dataset_tryown(*dsp, tag)) { 549 dsl_dataset_rele(*dsp, tag); 550 return (SET_ERROR(EBUSY)); 551 } 552 return (0); 553 } 554 555 /* 556 * See the comment above dsl_pool_hold() for details. In summary, a long 557 * hold is used to prevent destruction of a dataset while the pool hold 558 * is dropped, allowing other concurrent operations (e.g. spa_sync()). 559 * 560 * The dataset and pool must be held when this function is called. After it 561 * is called, the pool hold may be released while the dataset is still held 562 * and accessed. 563 */ 564 void 565 dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag) 566 { 567 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 568 (void) refcount_add(&ds->ds_longholds, tag); 569 } 570 571 void 572 dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag) 573 { 574 (void) refcount_remove(&ds->ds_longholds, tag); 575 } 576 577 /* Return B_TRUE if there are any long holds on this dataset. */ 578 boolean_t 579 dsl_dataset_long_held(dsl_dataset_t *ds) 580 { 581 return (!refcount_is_zero(&ds->ds_longholds)); 582 } 583 584 void 585 dsl_dataset_name(dsl_dataset_t *ds, char *name) 586 { 587 if (ds == NULL) { 588 (void) strcpy(name, "mos"); 589 } else { 590 dsl_dir_name(ds->ds_dir, name); 591 VERIFY0(dsl_dataset_get_snapname(ds)); 592 if (ds->ds_snapname[0]) { 593 (void) strcat(name, "@"); 594 /* 595 * We use a "recursive" mutex so that we 596 * can call dprintf_ds() with ds_lock held. 597 */ 598 if (!MUTEX_HELD(&ds->ds_lock)) { 599 mutex_enter(&ds->ds_lock); 600 (void) strcat(name, ds->ds_snapname); 601 mutex_exit(&ds->ds_lock); 602 } else { 603 (void) strcat(name, ds->ds_snapname); 604 } 605 } 606 } 607 } 608 609 void 610 dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 611 { 612 dmu_buf_rele(ds->ds_dbuf, tag); 613 } 614 615 void 616 dsl_dataset_disown(dsl_dataset_t *ds, void *tag) 617 { 618 ASSERT(ds->ds_owner == tag && ds->ds_dbuf != NULL); 619 620 mutex_enter(&ds->ds_lock); 621 ds->ds_owner = NULL; 622 mutex_exit(&ds->ds_lock); 623 dsl_dataset_long_rele(ds, tag); 624 if (ds->ds_dbuf != NULL) 625 dsl_dataset_rele(ds, tag); 626 else 627 dsl_dataset_evict(NULL, ds); 628 } 629 630 boolean_t 631 dsl_dataset_tryown(dsl_dataset_t *ds, void *tag) 632 { 633 boolean_t gotit = FALSE; 634 635 mutex_enter(&ds->ds_lock); 636 if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) { 637 ds->ds_owner = tag; 638 dsl_dataset_long_hold(ds, tag); 639 gotit = TRUE; 640 } 641 mutex_exit(&ds->ds_lock); 642 return (gotit); 643 } 644 645 uint64_t 646 dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 647 uint64_t flags, dmu_tx_t *tx) 648 { 649 dsl_pool_t *dp = dd->dd_pool; 650 dmu_buf_t *dbuf; 651 dsl_dataset_phys_t *dsphys; 652 uint64_t dsobj; 653 objset_t *mos = dp->dp_meta_objset; 654 655 if (origin == NULL) 656 origin = dp->dp_origin_snap; 657 658 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 659 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 660 ASSERT(dmu_tx_is_syncing(tx)); 661 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 662 663 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 664 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 665 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 666 dmu_buf_will_dirty(dbuf, tx); 667 dsphys = dbuf->db_data; 668 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 669 dsphys->ds_dir_obj = dd->dd_object; 670 dsphys->ds_flags = flags; 671 dsphys->ds_fsid_guid = unique_create(); 672 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 673 sizeof (dsphys->ds_guid)); 674 dsphys->ds_snapnames_zapobj = 675 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 676 DMU_OT_NONE, 0, tx); 677 dsphys->ds_creation_time = gethrestime_sec(); 678 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 679 680 if (origin == NULL) { 681 dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); 682 } else { 683 dsl_dataset_t *ohds; /* head of the origin snapshot */ 684 685 dsphys->ds_prev_snap_obj = origin->ds_object; 686 dsphys->ds_prev_snap_txg = 687 origin->ds_phys->ds_creation_txg; 688 dsphys->ds_referenced_bytes = 689 origin->ds_phys->ds_referenced_bytes; 690 dsphys->ds_compressed_bytes = 691 origin->ds_phys->ds_compressed_bytes; 692 dsphys->ds_uncompressed_bytes = 693 origin->ds_phys->ds_uncompressed_bytes; 694 dsphys->ds_bp = origin->ds_phys->ds_bp; 695 dsphys->ds_flags |= origin->ds_phys->ds_flags; 696 697 dmu_buf_will_dirty(origin->ds_dbuf, tx); 698 origin->ds_phys->ds_num_children++; 699 700 VERIFY0(dsl_dataset_hold_obj(dp, 701 origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds)); 702 dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, 703 dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); 704 dsl_dataset_rele(ohds, FTAG); 705 706 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 707 if (origin->ds_phys->ds_next_clones_obj == 0) { 708 origin->ds_phys->ds_next_clones_obj = 709 zap_create(mos, 710 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 711 } 712 VERIFY0(zap_add_int(mos, 713 origin->ds_phys->ds_next_clones_obj, dsobj, tx)); 714 } 715 716 dmu_buf_will_dirty(dd->dd_dbuf, tx); 717 dd->dd_phys->dd_origin_obj = origin->ds_object; 718 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 719 if (origin->ds_dir->dd_phys->dd_clones == 0) { 720 dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); 721 origin->ds_dir->dd_phys->dd_clones = 722 zap_create(mos, 723 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 724 } 725 VERIFY0(zap_add_int(mos, 726 origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); 727 } 728 } 729 730 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 731 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 732 733 dmu_buf_rele(dbuf, FTAG); 734 735 dmu_buf_will_dirty(dd->dd_dbuf, tx); 736 dd->dd_phys->dd_head_dataset_obj = dsobj; 737 738 return (dsobj); 739 } 740 741 static void 742 dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) 743 { 744 objset_t *os; 745 746 VERIFY0(dmu_objset_from_ds(ds, &os)); 747 bzero(&os->os_zil_header, sizeof (os->os_zil_header)); 748 dsl_dataset_dirty(ds, tx); 749 } 750 751 uint64_t 752 dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 753 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 754 { 755 dsl_pool_t *dp = pdd->dd_pool; 756 uint64_t dsobj, ddobj; 757 dsl_dir_t *dd; 758 759 ASSERT(dmu_tx_is_syncing(tx)); 760 ASSERT(lastname[0] != '@'); 761 762 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 763 VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); 764 765 dsobj = dsl_dataset_create_sync_dd(dd, origin, 766 flags & ~DS_CREATE_FLAG_NODIRTY, tx); 767 768 dsl_deleg_set_create_perms(dd, tx, cr); 769 770 /* 771 * Since we're creating a new node we know it's a leaf, so we can 772 * initialize the counts if the limit feature is active. 773 */ 774 if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) { 775 uint64_t cnt = 0; 776 objset_t *os = dd->dd_pool->dp_meta_objset; 777 778 dsl_dir_zapify(dd, tx); 779 VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, 780 sizeof (cnt), 1, &cnt, tx)); 781 VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, 782 sizeof (cnt), 1, &cnt, tx)); 783 } 784 785 dsl_dir_rele(dd, FTAG); 786 787 /* 788 * If we are creating a clone, make sure we zero out any stale 789 * data from the origin snapshots zil header. 790 */ 791 if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) { 792 dsl_dataset_t *ds; 793 794 VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 795 dsl_dataset_zero_zil(ds, tx); 796 dsl_dataset_rele(ds, FTAG); 797 } 798 799 return (dsobj); 800 } 801 802 /* 803 * The unique space in the head dataset can be calculated by subtracting 804 * the space used in the most recent snapshot, that is still being used 805 * in this file system, from the space currently in use. To figure out 806 * the space in the most recent snapshot still in use, we need to take 807 * the total space used in the snapshot and subtract out the space that 808 * has been freed up since the snapshot was taken. 809 */ 810 void 811 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 812 { 813 uint64_t mrs_used; 814 uint64_t dlused, dlcomp, dluncomp; 815 816 ASSERT(!dsl_dataset_is_snapshot(ds)); 817 818 if (ds->ds_phys->ds_prev_snap_obj != 0) 819 mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes; 820 else 821 mrs_used = 0; 822 823 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); 824 825 ASSERT3U(dlused, <=, mrs_used); 826 ds->ds_phys->ds_unique_bytes = 827 ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused); 828 829 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 830 SPA_VERSION_UNIQUE_ACCURATE) 831 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 832 } 833 834 void 835 dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, 836 dmu_tx_t *tx) 837 { 838 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 839 uint64_t count; 840 int err; 841 842 ASSERT(ds->ds_phys->ds_num_children >= 2); 843 err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); 844 /* 845 * The err should not be ENOENT, but a bug in a previous version 846 * of the code could cause upgrade_clones_cb() to not set 847 * ds_next_snap_obj when it should, leading to a missing entry. 848 * If we knew that the pool was created after 849 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't 850 * ENOENT. However, at least we can check that we don't have 851 * too many entries in the next_clones_obj even after failing to 852 * remove this one. 853 */ 854 if (err != ENOENT) 855 VERIFY0(err); 856 ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, 857 &count)); 858 ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); 859 } 860 861 862 blkptr_t * 863 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 864 { 865 return (&ds->ds_phys->ds_bp); 866 } 867 868 void 869 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 870 { 871 ASSERT(dmu_tx_is_syncing(tx)); 872 /* If it's the meta-objset, set dp_meta_rootbp */ 873 if (ds == NULL) { 874 tx->tx_pool->dp_meta_rootbp = *bp; 875 } else { 876 dmu_buf_will_dirty(ds->ds_dbuf, tx); 877 ds->ds_phys->ds_bp = *bp; 878 } 879 } 880 881 spa_t * 882 dsl_dataset_get_spa(dsl_dataset_t *ds) 883 { 884 return (ds->ds_dir->dd_pool->dp_spa); 885 } 886 887 void 888 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 889 { 890 dsl_pool_t *dp; 891 892 if (ds == NULL) /* this is the meta-objset */ 893 return; 894 895 ASSERT(ds->ds_objset != NULL); 896 897 if (ds->ds_phys->ds_next_snap_obj != 0) 898 panic("dirtying snapshot!"); 899 900 dp = ds->ds_dir->dd_pool; 901 902 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { 903 /* up the hold count until we can be written out */ 904 dmu_buf_add_ref(ds->ds_dbuf, ds); 905 } 906 } 907 908 boolean_t 909 dsl_dataset_is_dirty(dsl_dataset_t *ds) 910 { 911 for (int t = 0; t < TXG_SIZE; t++) { 912 if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, 913 ds, t)) 914 return (B_TRUE); 915 } 916 return (B_FALSE); 917 } 918 919 static int 920 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 921 { 922 uint64_t asize; 923 924 if (!dmu_tx_is_syncing(tx)) 925 return (0); 926 927 /* 928 * If there's an fs-only reservation, any blocks that might become 929 * owned by the snapshot dataset must be accommodated by space 930 * outside of the reservation. 931 */ 932 ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); 933 asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 934 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 935 return (SET_ERROR(ENOSPC)); 936 937 /* 938 * Propagate any reserved space for this snapshot to other 939 * snapshot checks in this sync group. 940 */ 941 if (asize > 0) 942 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 943 944 return (0); 945 } 946 947 typedef struct dsl_dataset_snapshot_arg { 948 nvlist_t *ddsa_snaps; 949 nvlist_t *ddsa_props; 950 nvlist_t *ddsa_errors; 951 cred_t *ddsa_cr; 952 } dsl_dataset_snapshot_arg_t; 953 954 int 955 dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname, 956 dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr) 957 { 958 int error; 959 uint64_t value; 960 961 ds->ds_trysnap_txg = tx->tx_txg; 962 963 if (!dmu_tx_is_syncing(tx)) 964 return (0); 965 966 /* 967 * We don't allow multiple snapshots of the same txg. If there 968 * is already one, try again. 969 */ 970 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 971 return (SET_ERROR(EAGAIN)); 972 973 /* 974 * Check for conflicting snapshot name. 975 */ 976 error = dsl_dataset_snap_lookup(ds, snapname, &value); 977 if (error == 0) 978 return (SET_ERROR(EEXIST)); 979 if (error != ENOENT) 980 return (error); 981 982 /* 983 * We don't allow taking snapshots of inconsistent datasets, such as 984 * those into which we are currently receiving. However, if we are 985 * creating this snapshot as part of a receive, this check will be 986 * executed atomically with respect to the completion of the receive 987 * itself but prior to the clearing of DS_FLAG_INCONSISTENT; in this 988 * case we ignore this, knowing it will be fixed up for us shortly in 989 * dmu_recv_end_sync(). 990 */ 991 if (!recv && DS_IS_INCONSISTENT(ds)) 992 return (SET_ERROR(EBUSY)); 993 994 /* 995 * Skip the check for temporary snapshots or if we have already checked 996 * the counts in dsl_dataset_snapshot_check. This means we really only 997 * check the count here when we're receiving a stream. 998 */ 999 if (cnt != 0 && cr != NULL) { 1000 error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, 1001 ZFS_PROP_SNAPSHOT_LIMIT, NULL, cr); 1002 if (error != 0) 1003 return (error); 1004 } 1005 1006 error = dsl_dataset_snapshot_reserve_space(ds, tx); 1007 if (error != 0) 1008 return (error); 1009 1010 return (0); 1011 } 1012 1013 static int 1014 dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx) 1015 { 1016 dsl_dataset_snapshot_arg_t *ddsa = arg; 1017 dsl_pool_t *dp = dmu_tx_pool(tx); 1018 nvpair_t *pair; 1019 int rv = 0; 1020 1021 /* 1022 * Pre-compute how many total new snapshots will be created for each 1023 * level in the tree and below. This is needed for validating the 1024 * snapshot limit when either taking a recursive snapshot or when 1025 * taking multiple snapshots. 1026 * 1027 * The problem is that the counts are not actually adjusted when 1028 * we are checking, only when we finally sync. For a single snapshot, 1029 * this is easy, the count will increase by 1 at each node up the tree, 1030 * but its more complicated for the recursive/multiple snapshot case. 1031 * 1032 * The dsl_fs_ss_limit_check function does recursively check the count 1033 * at each level up the tree but since it is validating each snapshot 1034 * independently we need to be sure that we are validating the complete 1035 * count for the entire set of snapshots. We do this by rolling up the 1036 * counts for each component of the name into an nvlist and then 1037 * checking each of those cases with the aggregated count. 1038 * 1039 * This approach properly handles not only the recursive snapshot 1040 * case (where we get all of those on the ddsa_snaps list) but also 1041 * the sibling case (e.g. snapshot a/b and a/c so that we will also 1042 * validate the limit on 'a' using a count of 2). 1043 * 1044 * We validate the snapshot names in the third loop and only report 1045 * name errors once. 1046 */ 1047 if (dmu_tx_is_syncing(tx)) { 1048 nvlist_t *cnt_track = NULL; 1049 cnt_track = fnvlist_alloc(); 1050 1051 /* Rollup aggregated counts into the cnt_track list */ 1052 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1053 pair != NULL; 1054 pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1055 char *pdelim; 1056 uint64_t val; 1057 char nm[MAXPATHLEN]; 1058 1059 (void) strlcpy(nm, nvpair_name(pair), sizeof (nm)); 1060 pdelim = strchr(nm, '@'); 1061 if (pdelim == NULL) 1062 continue; 1063 *pdelim = '\0'; 1064 1065 do { 1066 if (nvlist_lookup_uint64(cnt_track, nm, 1067 &val) == 0) { 1068 /* update existing entry */ 1069 fnvlist_add_uint64(cnt_track, nm, 1070 val + 1); 1071 } else { 1072 /* add to list */ 1073 fnvlist_add_uint64(cnt_track, nm, 1); 1074 } 1075 1076 pdelim = strrchr(nm, '/'); 1077 if (pdelim != NULL) 1078 *pdelim = '\0'; 1079 } while (pdelim != NULL); 1080 } 1081 1082 /* Check aggregated counts at each level */ 1083 for (pair = nvlist_next_nvpair(cnt_track, NULL); 1084 pair != NULL; pair = nvlist_next_nvpair(cnt_track, pair)) { 1085 int error = 0; 1086 char *name; 1087 uint64_t cnt = 0; 1088 dsl_dataset_t *ds; 1089 1090 name = nvpair_name(pair); 1091 cnt = fnvpair_value_uint64(pair); 1092 ASSERT(cnt > 0); 1093 1094 error = dsl_dataset_hold(dp, name, FTAG, &ds); 1095 if (error == 0) { 1096 error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, 1097 ZFS_PROP_SNAPSHOT_LIMIT, NULL, 1098 ddsa->ddsa_cr); 1099 dsl_dataset_rele(ds, FTAG); 1100 } 1101 1102 if (error != 0) { 1103 if (ddsa->ddsa_errors != NULL) 1104 fnvlist_add_int32(ddsa->ddsa_errors, 1105 name, error); 1106 rv = error; 1107 /* only report one error for this check */ 1108 break; 1109 } 1110 } 1111 nvlist_free(cnt_track); 1112 } 1113 1114 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1115 pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1116 int error = 0; 1117 dsl_dataset_t *ds; 1118 char *name, *atp; 1119 char dsname[MAXNAMELEN]; 1120 1121 name = nvpair_name(pair); 1122 if (strlen(name) >= MAXNAMELEN) 1123 error = SET_ERROR(ENAMETOOLONG); 1124 if (error == 0) { 1125 atp = strchr(name, '@'); 1126 if (atp == NULL) 1127 error = SET_ERROR(EINVAL); 1128 if (error == 0) 1129 (void) strlcpy(dsname, name, atp - name + 1); 1130 } 1131 if (error == 0) 1132 error = dsl_dataset_hold(dp, dsname, FTAG, &ds); 1133 if (error == 0) { 1134 /* passing 0/NULL skips dsl_fs_ss_limit_check */ 1135 error = dsl_dataset_snapshot_check_impl(ds, 1136 atp + 1, tx, B_FALSE, 0, NULL); 1137 dsl_dataset_rele(ds, FTAG); 1138 } 1139 1140 if (error != 0) { 1141 if (ddsa->ddsa_errors != NULL) { 1142 fnvlist_add_int32(ddsa->ddsa_errors, 1143 name, error); 1144 } 1145 rv = error; 1146 } 1147 } 1148 1149 return (rv); 1150 } 1151 1152 void 1153 dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, 1154 dmu_tx_t *tx) 1155 { 1156 static zil_header_t zero_zil; 1157 1158 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1159 dmu_buf_t *dbuf; 1160 dsl_dataset_phys_t *dsphys; 1161 uint64_t dsobj, crtxg; 1162 objset_t *mos = dp->dp_meta_objset; 1163 objset_t *os; 1164 1165 ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); 1166 1167 /* 1168 * If we are on an old pool, the zil must not be active, in which 1169 * case it will be zeroed. Usually zil_suspend() accomplishes this. 1170 */ 1171 ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP || 1172 dmu_objset_from_ds(ds, &os) != 0 || 1173 bcmp(&os->os_phys->os_zil_header, &zero_zil, 1174 sizeof (zero_zil)) == 0); 1175 1176 dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx); 1177 1178 /* 1179 * The origin's ds_creation_txg has to be < TXG_INITIAL 1180 */ 1181 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1182 crtxg = 1; 1183 else 1184 crtxg = tx->tx_txg; 1185 1186 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1187 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1188 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1189 dmu_buf_will_dirty(dbuf, tx); 1190 dsphys = dbuf->db_data; 1191 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 1192 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1193 dsphys->ds_fsid_guid = unique_create(); 1194 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1195 sizeof (dsphys->ds_guid)); 1196 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1197 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1198 dsphys->ds_next_snap_obj = ds->ds_object; 1199 dsphys->ds_num_children = 1; 1200 dsphys->ds_creation_time = gethrestime_sec(); 1201 dsphys->ds_creation_txg = crtxg; 1202 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1203 dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes; 1204 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1205 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1206 dsphys->ds_flags = ds->ds_phys->ds_flags; 1207 dsphys->ds_bp = ds->ds_phys->ds_bp; 1208 dmu_buf_rele(dbuf, FTAG); 1209 1210 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1211 if (ds->ds_prev) { 1212 uint64_t next_clones_obj = 1213 ds->ds_prev->ds_phys->ds_next_clones_obj; 1214 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1215 ds->ds_object || 1216 ds->ds_prev->ds_phys->ds_num_children > 1); 1217 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1218 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1219 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1220 ds->ds_prev->ds_phys->ds_creation_txg); 1221 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1222 } else if (next_clones_obj != 0) { 1223 dsl_dataset_remove_from_next_clones(ds->ds_prev, 1224 dsphys->ds_next_snap_obj, tx); 1225 VERIFY0(zap_add_int(mos, 1226 next_clones_obj, dsobj, tx)); 1227 } 1228 } 1229 1230 /* 1231 * If we have a reference-reservation on this dataset, we will 1232 * need to increase the amount of refreservation being charged 1233 * since our unique space is going to zero. 1234 */ 1235 if (ds->ds_reserved) { 1236 int64_t delta; 1237 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 1238 delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 1239 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1240 delta, 0, 0, tx); 1241 } 1242 1243 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1244 ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist, 1245 UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx); 1246 dsl_deadlist_close(&ds->ds_deadlist); 1247 dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); 1248 dsl_deadlist_add_key(&ds->ds_deadlist, 1249 ds->ds_phys->ds_prev_snap_txg, tx); 1250 1251 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 1252 ds->ds_phys->ds_prev_snap_obj = dsobj; 1253 ds->ds_phys->ds_prev_snap_txg = crtxg; 1254 ds->ds_phys->ds_unique_bytes = 0; 1255 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1256 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1257 1258 VERIFY0(zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1259 snapname, 8, 1, &dsobj, tx)); 1260 1261 if (ds->ds_prev) 1262 dsl_dataset_rele(ds->ds_prev, ds); 1263 VERIFY0(dsl_dataset_hold_obj(dp, 1264 ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 1265 1266 dsl_scan_ds_snapshotted(ds, tx); 1267 1268 dsl_dir_snap_cmtime_update(ds->ds_dir); 1269 1270 spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, ""); 1271 } 1272 1273 static void 1274 dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx) 1275 { 1276 dsl_dataset_snapshot_arg_t *ddsa = arg; 1277 dsl_pool_t *dp = dmu_tx_pool(tx); 1278 nvpair_t *pair; 1279 1280 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1281 pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1282 dsl_dataset_t *ds; 1283 char *name, *atp; 1284 char dsname[MAXNAMELEN]; 1285 1286 name = nvpair_name(pair); 1287 atp = strchr(name, '@'); 1288 (void) strlcpy(dsname, name, atp - name + 1); 1289 VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds)); 1290 1291 dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx); 1292 if (ddsa->ddsa_props != NULL) { 1293 dsl_props_set_sync_impl(ds->ds_prev, 1294 ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx); 1295 } 1296 dsl_dataset_rele(ds, FTAG); 1297 } 1298 } 1299 1300 /* 1301 * The snapshots must all be in the same pool. 1302 * All-or-nothing: if there are any failures, nothing will be modified. 1303 */ 1304 int 1305 dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) 1306 { 1307 dsl_dataset_snapshot_arg_t ddsa; 1308 nvpair_t *pair; 1309 boolean_t needsuspend; 1310 int error; 1311 spa_t *spa; 1312 char *firstname; 1313 nvlist_t *suspended = NULL; 1314 1315 pair = nvlist_next_nvpair(snaps, NULL); 1316 if (pair == NULL) 1317 return (0); 1318 firstname = nvpair_name(pair); 1319 1320 error = spa_open(firstname, &spa, FTAG); 1321 if (error != 0) 1322 return (error); 1323 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1324 spa_close(spa, FTAG); 1325 1326 if (needsuspend) { 1327 suspended = fnvlist_alloc(); 1328 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; 1329 pair = nvlist_next_nvpair(snaps, pair)) { 1330 char fsname[MAXNAMELEN]; 1331 char *snapname = nvpair_name(pair); 1332 char *atp; 1333 void *cookie; 1334 1335 atp = strchr(snapname, '@'); 1336 if (atp == NULL) { 1337 error = SET_ERROR(EINVAL); 1338 break; 1339 } 1340 (void) strlcpy(fsname, snapname, atp - snapname + 1); 1341 1342 error = zil_suspend(fsname, &cookie); 1343 if (error != 0) 1344 break; 1345 fnvlist_add_uint64(suspended, fsname, 1346 (uintptr_t)cookie); 1347 } 1348 } 1349 1350 ddsa.ddsa_snaps = snaps; 1351 ddsa.ddsa_props = props; 1352 ddsa.ddsa_errors = errors; 1353 ddsa.ddsa_cr = CRED(); 1354 1355 if (error == 0) { 1356 error = dsl_sync_task(firstname, dsl_dataset_snapshot_check, 1357 dsl_dataset_snapshot_sync, &ddsa, 1358 fnvlist_num_pairs(snaps) * 3, ZFS_SPACE_CHECK_NORMAL); 1359 } 1360 1361 if (suspended != NULL) { 1362 for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL; 1363 pair = nvlist_next_nvpair(suspended, pair)) { 1364 zil_resume((void *)(uintptr_t) 1365 fnvpair_value_uint64(pair)); 1366 } 1367 fnvlist_free(suspended); 1368 } 1369 1370 return (error); 1371 } 1372 1373 typedef struct dsl_dataset_snapshot_tmp_arg { 1374 const char *ddsta_fsname; 1375 const char *ddsta_snapname; 1376 minor_t ddsta_cleanup_minor; 1377 const char *ddsta_htag; 1378 } dsl_dataset_snapshot_tmp_arg_t; 1379 1380 static int 1381 dsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx) 1382 { 1383 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1384 dsl_pool_t *dp = dmu_tx_pool(tx); 1385 dsl_dataset_t *ds; 1386 int error; 1387 1388 error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds); 1389 if (error != 0) 1390 return (error); 1391 1392 /* NULL cred means no limit check for tmp snapshot */ 1393 error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname, 1394 tx, B_FALSE, 0, NULL); 1395 if (error != 0) { 1396 dsl_dataset_rele(ds, FTAG); 1397 return (error); 1398 } 1399 1400 if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) { 1401 dsl_dataset_rele(ds, FTAG); 1402 return (SET_ERROR(ENOTSUP)); 1403 } 1404 error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag, 1405 B_TRUE, tx); 1406 if (error != 0) { 1407 dsl_dataset_rele(ds, FTAG); 1408 return (error); 1409 } 1410 1411 dsl_dataset_rele(ds, FTAG); 1412 return (0); 1413 } 1414 1415 static void 1416 dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx) 1417 { 1418 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1419 dsl_pool_t *dp = dmu_tx_pool(tx); 1420 dsl_dataset_t *ds; 1421 1422 VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds)); 1423 1424 dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx); 1425 dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag, 1426 ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx); 1427 dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx); 1428 1429 dsl_dataset_rele(ds, FTAG); 1430 } 1431 1432 int 1433 dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, 1434 minor_t cleanup_minor, const char *htag) 1435 { 1436 dsl_dataset_snapshot_tmp_arg_t ddsta; 1437 int error; 1438 spa_t *spa; 1439 boolean_t needsuspend; 1440 void *cookie; 1441 1442 ddsta.ddsta_fsname = fsname; 1443 ddsta.ddsta_snapname = snapname; 1444 ddsta.ddsta_cleanup_minor = cleanup_minor; 1445 ddsta.ddsta_htag = htag; 1446 1447 error = spa_open(fsname, &spa, FTAG); 1448 if (error != 0) 1449 return (error); 1450 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1451 spa_close(spa, FTAG); 1452 1453 if (needsuspend) { 1454 error = zil_suspend(fsname, &cookie); 1455 if (error != 0) 1456 return (error); 1457 } 1458 1459 error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check, 1460 dsl_dataset_snapshot_tmp_sync, &ddsta, 3, ZFS_SPACE_CHECK_RESERVED); 1461 1462 if (needsuspend) 1463 zil_resume(cookie); 1464 return (error); 1465 } 1466 1467 1468 void 1469 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1470 { 1471 ASSERT(dmu_tx_is_syncing(tx)); 1472 ASSERT(ds->ds_objset != NULL); 1473 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1474 1475 /* 1476 * in case we had to change ds_fsid_guid when we opened it, 1477 * sync it out now. 1478 */ 1479 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1480 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1481 1482 dmu_objset_sync(ds->ds_objset, zio, tx); 1483 } 1484 1485 static void 1486 get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) 1487 { 1488 uint64_t count = 0; 1489 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1490 zap_cursor_t zc; 1491 zap_attribute_t za; 1492 nvlist_t *propval = fnvlist_alloc(); 1493 nvlist_t *val = fnvlist_alloc(); 1494 1495 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 1496 1497 /* 1498 * There may be missing entries in ds_next_clones_obj 1499 * due to a bug in a previous version of the code. 1500 * Only trust it if it has the right number of entries. 1501 */ 1502 if (ds->ds_phys->ds_next_clones_obj != 0) { 1503 VERIFY0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, 1504 &count)); 1505 } 1506 if (count != ds->ds_phys->ds_num_children - 1) 1507 goto fail; 1508 for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj); 1509 zap_cursor_retrieve(&zc, &za) == 0; 1510 zap_cursor_advance(&zc)) { 1511 dsl_dataset_t *clone; 1512 char buf[ZFS_MAXNAMELEN]; 1513 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 1514 za.za_first_integer, FTAG, &clone)); 1515 dsl_dir_name(clone->ds_dir, buf); 1516 fnvlist_add_boolean(val, buf); 1517 dsl_dataset_rele(clone, FTAG); 1518 } 1519 zap_cursor_fini(&zc); 1520 fnvlist_add_nvlist(propval, ZPROP_VALUE, val); 1521 fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval); 1522 fail: 1523 nvlist_free(val); 1524 nvlist_free(propval); 1525 } 1526 1527 void 1528 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1529 { 1530 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1531 uint64_t refd, avail, uobjs, aobjs, ratio; 1532 1533 ASSERT(dsl_pool_config_held(dp)); 1534 1535 ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1536 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1537 ds->ds_phys->ds_compressed_bytes); 1538 1539 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); 1540 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, 1541 ds->ds_phys->ds_uncompressed_bytes); 1542 1543 if (dsl_dataset_is_snapshot(ds)) { 1544 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); 1545 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1546 ds->ds_phys->ds_unique_bytes); 1547 get_clones_stat(ds, nv); 1548 } else { 1549 if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) { 1550 char buf[MAXNAMELEN]; 1551 dsl_dataset_name(ds->ds_prev, buf); 1552 dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf); 1553 } 1554 1555 dsl_dir_stats(ds->ds_dir, nv); 1556 } 1557 1558 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1559 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1560 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1561 1562 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1563 ds->ds_phys->ds_creation_time); 1564 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1565 ds->ds_phys->ds_creation_txg); 1566 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1567 ds->ds_quota); 1568 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1569 ds->ds_reserved); 1570 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 1571 ds->ds_phys->ds_guid); 1572 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, 1573 ds->ds_phys->ds_unique_bytes); 1574 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, 1575 ds->ds_object); 1576 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, 1577 ds->ds_userrefs); 1578 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 1579 DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 1580 1581 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1582 uint64_t written, comp, uncomp; 1583 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1584 dsl_dataset_t *prev; 1585 1586 int err = dsl_dataset_hold_obj(dp, 1587 ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); 1588 if (err == 0) { 1589 err = dsl_dataset_space_written(prev, ds, &written, 1590 &comp, &uncomp); 1591 dsl_dataset_rele(prev, FTAG); 1592 if (err == 0) { 1593 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, 1594 written); 1595 } 1596 } 1597 } 1598 } 1599 1600 void 1601 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1602 { 1603 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1604 ASSERT(dsl_pool_config_held(dp)); 1605 1606 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1607 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1608 stat->dds_guid = ds->ds_phys->ds_guid; 1609 stat->dds_origin[0] = '\0'; 1610 if (dsl_dataset_is_snapshot(ds)) { 1611 stat->dds_is_snapshot = B_TRUE; 1612 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1613 } else { 1614 stat->dds_is_snapshot = B_FALSE; 1615 stat->dds_num_clones = 0; 1616 1617 if (dsl_dir_is_clone(ds->ds_dir)) { 1618 dsl_dataset_t *ods; 1619 1620 VERIFY0(dsl_dataset_hold_obj(dp, 1621 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 1622 dsl_dataset_name(ods, stat->dds_origin); 1623 dsl_dataset_rele(ods, FTAG); 1624 } 1625 } 1626 } 1627 1628 uint64_t 1629 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1630 { 1631 return (ds->ds_fsid_guid); 1632 } 1633 1634 void 1635 dsl_dataset_space(dsl_dataset_t *ds, 1636 uint64_t *refdbytesp, uint64_t *availbytesp, 1637 uint64_t *usedobjsp, uint64_t *availobjsp) 1638 { 1639 *refdbytesp = ds->ds_phys->ds_referenced_bytes; 1640 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1641 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 1642 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 1643 if (ds->ds_quota != 0) { 1644 /* 1645 * Adjust available bytes according to refquota 1646 */ 1647 if (*refdbytesp < ds->ds_quota) 1648 *availbytesp = MIN(*availbytesp, 1649 ds->ds_quota - *refdbytesp); 1650 else 1651 *availbytesp = 0; 1652 } 1653 *usedobjsp = BP_GET_FILL(&ds->ds_phys->ds_bp); 1654 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1655 } 1656 1657 boolean_t 1658 dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap) 1659 { 1660 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1661 1662 ASSERT(dsl_pool_config_held(dp)); 1663 if (snap == NULL) 1664 return (B_FALSE); 1665 if (ds->ds_phys->ds_bp.blk_birth > 1666 snap->ds_phys->ds_creation_txg) { 1667 objset_t *os, *os_snap; 1668 /* 1669 * It may be that only the ZIL differs, because it was 1670 * reset in the head. Don't count that as being 1671 * modified. 1672 */ 1673 if (dmu_objset_from_ds(ds, &os) != 0) 1674 return (B_TRUE); 1675 if (dmu_objset_from_ds(snap, &os_snap) != 0) 1676 return (B_TRUE); 1677 return (bcmp(&os->os_phys->os_meta_dnode, 1678 &os_snap->os_phys->os_meta_dnode, 1679 sizeof (os->os_phys->os_meta_dnode)) != 0); 1680 } 1681 return (B_FALSE); 1682 } 1683 1684 typedef struct dsl_dataset_rename_snapshot_arg { 1685 const char *ddrsa_fsname; 1686 const char *ddrsa_oldsnapname; 1687 const char *ddrsa_newsnapname; 1688 boolean_t ddrsa_recursive; 1689 dmu_tx_t *ddrsa_tx; 1690 } dsl_dataset_rename_snapshot_arg_t; 1691 1692 /* ARGSUSED */ 1693 static int 1694 dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp, 1695 dsl_dataset_t *hds, void *arg) 1696 { 1697 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1698 int error; 1699 uint64_t val; 1700 1701 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1702 if (error != 0) { 1703 /* ignore nonexistent snapshots */ 1704 return (error == ENOENT ? 0 : error); 1705 } 1706 1707 /* new name should not exist */ 1708 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val); 1709 if (error == 0) 1710 error = SET_ERROR(EEXIST); 1711 else if (error == ENOENT) 1712 error = 0; 1713 1714 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 1715 if (dsl_dir_namelen(hds->ds_dir) + 1 + 1716 strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN) 1717 error = SET_ERROR(ENAMETOOLONG); 1718 1719 return (error); 1720 } 1721 1722 static int 1723 dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx) 1724 { 1725 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1726 dsl_pool_t *dp = dmu_tx_pool(tx); 1727 dsl_dataset_t *hds; 1728 int error; 1729 1730 error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds); 1731 if (error != 0) 1732 return (error); 1733 1734 if (ddrsa->ddrsa_recursive) { 1735 error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1736 dsl_dataset_rename_snapshot_check_impl, ddrsa, 1737 DS_FIND_CHILDREN); 1738 } else { 1739 error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa); 1740 } 1741 dsl_dataset_rele(hds, FTAG); 1742 return (error); 1743 } 1744 1745 static int 1746 dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, 1747 dsl_dataset_t *hds, void *arg) 1748 { 1749 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1750 dsl_dataset_t *ds; 1751 uint64_t val; 1752 dmu_tx_t *tx = ddrsa->ddrsa_tx; 1753 int error; 1754 1755 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1756 ASSERT(error == 0 || error == ENOENT); 1757 if (error == ENOENT) { 1758 /* ignore nonexistent snapshots */ 1759 return (0); 1760 } 1761 1762 VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds)); 1763 1764 /* log before we change the name */ 1765 spa_history_log_internal_ds(ds, "rename", tx, 1766 "-> @%s", ddrsa->ddrsa_newsnapname); 1767 1768 VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx, 1769 B_FALSE)); 1770 mutex_enter(&ds->ds_lock); 1771 (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname); 1772 mutex_exit(&ds->ds_lock); 1773 VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, 1774 ds->ds_snapname, 8, 1, &ds->ds_object, tx)); 1775 1776 dsl_dataset_rele(ds, FTAG); 1777 return (0); 1778 } 1779 1780 static void 1781 dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx) 1782 { 1783 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1784 dsl_pool_t *dp = dmu_tx_pool(tx); 1785 dsl_dataset_t *hds; 1786 1787 VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds)); 1788 ddrsa->ddrsa_tx = tx; 1789 if (ddrsa->ddrsa_recursive) { 1790 VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1791 dsl_dataset_rename_snapshot_sync_impl, ddrsa, 1792 DS_FIND_CHILDREN)); 1793 } else { 1794 VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa)); 1795 } 1796 dsl_dataset_rele(hds, FTAG); 1797 } 1798 1799 int 1800 dsl_dataset_rename_snapshot(const char *fsname, 1801 const char *oldsnapname, const char *newsnapname, boolean_t recursive) 1802 { 1803 dsl_dataset_rename_snapshot_arg_t ddrsa; 1804 1805 ddrsa.ddrsa_fsname = fsname; 1806 ddrsa.ddrsa_oldsnapname = oldsnapname; 1807 ddrsa.ddrsa_newsnapname = newsnapname; 1808 ddrsa.ddrsa_recursive = recursive; 1809 1810 return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check, 1811 dsl_dataset_rename_snapshot_sync, &ddrsa, 1812 1, ZFS_SPACE_CHECK_RESERVED)); 1813 } 1814 1815 /* 1816 * If we're doing an ownership handoff, we need to make sure that there is 1817 * only one long hold on the dataset. We're not allowed to change anything here 1818 * so we don't permanently release the long hold or regular hold here. We want 1819 * to do this only when syncing to avoid the dataset unexpectedly going away 1820 * when we release the long hold. 1821 */ 1822 static int 1823 dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) 1824 { 1825 boolean_t held; 1826 1827 if (!dmu_tx_is_syncing(tx)) 1828 return (0); 1829 1830 if (owner != NULL) { 1831 VERIFY3P(ds->ds_owner, ==, owner); 1832 dsl_dataset_long_rele(ds, owner); 1833 } 1834 1835 held = dsl_dataset_long_held(ds); 1836 1837 if (owner != NULL) 1838 dsl_dataset_long_hold(ds, owner); 1839 1840 if (held) 1841 return (SET_ERROR(EBUSY)); 1842 1843 return (0); 1844 } 1845 1846 typedef struct dsl_dataset_rollback_arg { 1847 const char *ddra_fsname; 1848 void *ddra_owner; 1849 nvlist_t *ddra_result; 1850 } dsl_dataset_rollback_arg_t; 1851 1852 static int 1853 dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) 1854 { 1855 dsl_dataset_rollback_arg_t *ddra = arg; 1856 dsl_pool_t *dp = dmu_tx_pool(tx); 1857 dsl_dataset_t *ds; 1858 int64_t unused_refres_delta; 1859 int error; 1860 1861 error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds); 1862 if (error != 0) 1863 return (error); 1864 1865 /* must not be a snapshot */ 1866 if (dsl_dataset_is_snapshot(ds)) { 1867 dsl_dataset_rele(ds, FTAG); 1868 return (SET_ERROR(EINVAL)); 1869 } 1870 1871 /* must have a most recent snapshot */ 1872 if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) { 1873 dsl_dataset_rele(ds, FTAG); 1874 return (SET_ERROR(EINVAL)); 1875 } 1876 1877 /* must not have any bookmarks after the most recent snapshot */ 1878 nvlist_t *proprequest = fnvlist_alloc(); 1879 fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG)); 1880 nvlist_t *bookmarks = fnvlist_alloc(); 1881 error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks); 1882 fnvlist_free(proprequest); 1883 if (error != 0) 1884 return (error); 1885 for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL); 1886 pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) { 1887 nvlist_t *valuenv = 1888 fnvlist_lookup_nvlist(fnvpair_value_nvlist(pair), 1889 zfs_prop_to_name(ZFS_PROP_CREATETXG)); 1890 uint64_t createtxg = fnvlist_lookup_uint64(valuenv, "value"); 1891 if (createtxg > ds->ds_phys->ds_prev_snap_txg) { 1892 fnvlist_free(bookmarks); 1893 dsl_dataset_rele(ds, FTAG); 1894 return (SET_ERROR(EEXIST)); 1895 } 1896 } 1897 fnvlist_free(bookmarks); 1898 1899 error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx); 1900 if (error != 0) { 1901 dsl_dataset_rele(ds, FTAG); 1902 return (error); 1903 } 1904 1905 /* 1906 * Check if the snap we are rolling back to uses more than 1907 * the refquota. 1908 */ 1909 if (ds->ds_quota != 0 && 1910 ds->ds_prev->ds_phys->ds_referenced_bytes > ds->ds_quota) { 1911 dsl_dataset_rele(ds, FTAG); 1912 return (SET_ERROR(EDQUOT)); 1913 } 1914 1915 /* 1916 * When we do the clone swap, we will temporarily use more space 1917 * due to the refreservation (the head will no longer have any 1918 * unique space, so the entire amount of the refreservation will need 1919 * to be free). We will immediately destroy the clone, freeing 1920 * this space, but the freeing happens over many txg's. 1921 */ 1922 unused_refres_delta = (int64_t)MIN(ds->ds_reserved, 1923 ds->ds_phys->ds_unique_bytes); 1924 1925 if (unused_refres_delta > 0 && 1926 unused_refres_delta > 1927 dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) { 1928 dsl_dataset_rele(ds, FTAG); 1929 return (SET_ERROR(ENOSPC)); 1930 } 1931 1932 dsl_dataset_rele(ds, FTAG); 1933 return (0); 1934 } 1935 1936 static void 1937 dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) 1938 { 1939 dsl_dataset_rollback_arg_t *ddra = arg; 1940 dsl_pool_t *dp = dmu_tx_pool(tx); 1941 dsl_dataset_t *ds, *clone; 1942 uint64_t cloneobj; 1943 char namebuf[ZFS_MAXNAMELEN]; 1944 1945 VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds)); 1946 1947 dsl_dataset_name(ds->ds_prev, namebuf); 1948 fnvlist_add_string(ddra->ddra_result, "target", namebuf); 1949 1950 cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", 1951 ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); 1952 1953 VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); 1954 1955 dsl_dataset_clone_swap_sync_impl(clone, ds, tx); 1956 dsl_dataset_zero_zil(ds, tx); 1957 1958 dsl_destroy_head_sync_impl(clone, tx); 1959 1960 dsl_dataset_rele(clone, FTAG); 1961 dsl_dataset_rele(ds, FTAG); 1962 } 1963 1964 /* 1965 * Rolls back the given filesystem or volume to the most recent snapshot. 1966 * The name of the most recent snapshot will be returned under key "target" 1967 * in the result nvlist. 1968 * 1969 * If owner != NULL: 1970 * - The existing dataset MUST be owned by the specified owner at entry 1971 * - Upon return, dataset will still be held by the same owner, whether we 1972 * succeed or not. 1973 * 1974 * This mode is required any time the existing filesystem is mounted. See 1975 * notes above zfs_suspend_fs() for further details. 1976 */ 1977 int 1978 dsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result) 1979 { 1980 dsl_dataset_rollback_arg_t ddra; 1981 1982 ddra.ddra_fsname = fsname; 1983 ddra.ddra_owner = owner; 1984 ddra.ddra_result = result; 1985 1986 return (dsl_sync_task(fsname, dsl_dataset_rollback_check, 1987 dsl_dataset_rollback_sync, &ddra, 1988 1, ZFS_SPACE_CHECK_RESERVED)); 1989 } 1990 1991 struct promotenode { 1992 list_node_t link; 1993 dsl_dataset_t *ds; 1994 }; 1995 1996 typedef struct dsl_dataset_promote_arg { 1997 const char *ddpa_clonename; 1998 dsl_dataset_t *ddpa_clone; 1999 list_t shared_snaps, origin_snaps, clone_snaps; 2000 dsl_dataset_t *origin_origin; /* origin of the origin */ 2001 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 2002 char *err_ds; 2003 cred_t *cr; 2004 } dsl_dataset_promote_arg_t; 2005 2006 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2007 static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, 2008 void *tag); 2009 static void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag); 2010 2011 static int 2012 dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) 2013 { 2014 dsl_dataset_promote_arg_t *ddpa = arg; 2015 dsl_pool_t *dp = dmu_tx_pool(tx); 2016 dsl_dataset_t *hds; 2017 struct promotenode *snap; 2018 dsl_dataset_t *origin_ds; 2019 int err; 2020 uint64_t unused; 2021 uint64_t ss_mv_cnt; 2022 2023 err = promote_hold(ddpa, dp, FTAG); 2024 if (err != 0) 2025 return (err); 2026 2027 hds = ddpa->ddpa_clone; 2028 2029 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 2030 promote_rele(ddpa, FTAG); 2031 return (SET_ERROR(EXDEV)); 2032 } 2033 2034 /* 2035 * Compute and check the amount of space to transfer. Since this is 2036 * so expensive, don't do the preliminary check. 2037 */ 2038 if (!dmu_tx_is_syncing(tx)) { 2039 promote_rele(ddpa, FTAG); 2040 return (0); 2041 } 2042 2043 snap = list_head(&ddpa->shared_snaps); 2044 origin_ds = snap->ds; 2045 2046 /* compute origin's new unique space */ 2047 snap = list_tail(&ddpa->clone_snaps); 2048 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2049 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2050 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2051 &ddpa->unique, &unused, &unused); 2052 2053 /* 2054 * Walk the snapshots that we are moving 2055 * 2056 * Compute space to transfer. Consider the incremental changes 2057 * to used by each snapshot: 2058 * (my used) = (prev's used) + (blocks born) - (blocks killed) 2059 * So each snapshot gave birth to: 2060 * (blocks born) = (my used) - (prev's used) + (blocks killed) 2061 * So a sequence would look like: 2062 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2063 * Which simplifies to: 2064 * uN + kN + kN-1 + ... + k1 + k0 2065 * Note however, if we stop before we reach the ORIGIN we get: 2066 * uN + kN + kN-1 + ... + kM - uM-1 2067 */ 2068 ss_mv_cnt = 0; 2069 ddpa->used = origin_ds->ds_phys->ds_referenced_bytes; 2070 ddpa->comp = origin_ds->ds_phys->ds_compressed_bytes; 2071 ddpa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 2072 for (snap = list_head(&ddpa->shared_snaps); snap; 2073 snap = list_next(&ddpa->shared_snaps, snap)) { 2074 uint64_t val, dlused, dlcomp, dluncomp; 2075 dsl_dataset_t *ds = snap->ds; 2076 2077 ss_mv_cnt++; 2078 2079 /* 2080 * If there are long holds, we won't be able to evict 2081 * the objset. 2082 */ 2083 if (dsl_dataset_long_held(ds)) { 2084 err = SET_ERROR(EBUSY); 2085 goto out; 2086 } 2087 2088 /* Check that the snapshot name does not conflict */ 2089 VERIFY0(dsl_dataset_get_snapname(ds)); 2090 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2091 if (err == 0) { 2092 (void) strcpy(ddpa->err_ds, snap->ds->ds_snapname); 2093 err = SET_ERROR(EEXIST); 2094 goto out; 2095 } 2096 if (err != ENOENT) 2097 goto out; 2098 2099 /* The very first snapshot does not have a deadlist */ 2100 if (ds->ds_phys->ds_prev_snap_obj == 0) 2101 continue; 2102 2103 dsl_deadlist_space(&ds->ds_deadlist, 2104 &dlused, &dlcomp, &dluncomp); 2105 ddpa->used += dlused; 2106 ddpa->comp += dlcomp; 2107 ddpa->uncomp += dluncomp; 2108 } 2109 2110 /* 2111 * If we are a clone of a clone then we never reached ORIGIN, 2112 * so we need to subtract out the clone origin's used space. 2113 */ 2114 if (ddpa->origin_origin) { 2115 ddpa->used -= ddpa->origin_origin->ds_phys->ds_referenced_bytes; 2116 ddpa->comp -= ddpa->origin_origin->ds_phys->ds_compressed_bytes; 2117 ddpa->uncomp -= 2118 ddpa->origin_origin->ds_phys->ds_uncompressed_bytes; 2119 } 2120 2121 /* Check that there is enough space and limit headroom here */ 2122 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2123 0, ss_mv_cnt, ddpa->used, ddpa->cr); 2124 if (err != 0) 2125 goto out; 2126 2127 /* 2128 * Compute the amounts of space that will be used by snapshots 2129 * after the promotion (for both origin and clone). For each, 2130 * it is the amount of space that will be on all of their 2131 * deadlists (that was not born before their new origin). 2132 */ 2133 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2134 uint64_t space; 2135 2136 /* 2137 * Note, typically this will not be a clone of a clone, 2138 * so dd_origin_txg will be < TXG_INITIAL, so 2139 * these snaplist_space() -> dsl_deadlist_space_range() 2140 * calls will be fast because they do not have to 2141 * iterate over all bps. 2142 */ 2143 snap = list_head(&ddpa->origin_snaps); 2144 err = snaplist_space(&ddpa->shared_snaps, 2145 snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap); 2146 if (err != 0) 2147 goto out; 2148 2149 err = snaplist_space(&ddpa->clone_snaps, 2150 snap->ds->ds_dir->dd_origin_txg, &space); 2151 if (err != 0) 2152 goto out; 2153 ddpa->cloneusedsnap += space; 2154 } 2155 if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2156 err = snaplist_space(&ddpa->origin_snaps, 2157 origin_ds->ds_phys->ds_creation_txg, &ddpa->originusedsnap); 2158 if (err != 0) 2159 goto out; 2160 } 2161 2162 out: 2163 promote_rele(ddpa, FTAG); 2164 return (err); 2165 } 2166 2167 static void 2168 dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) 2169 { 2170 dsl_dataset_promote_arg_t *ddpa = arg; 2171 dsl_pool_t *dp = dmu_tx_pool(tx); 2172 dsl_dataset_t *hds; 2173 struct promotenode *snap; 2174 dsl_dataset_t *origin_ds; 2175 dsl_dataset_t *origin_head; 2176 dsl_dir_t *dd; 2177 dsl_dir_t *odd = NULL; 2178 uint64_t oldnext_obj; 2179 int64_t delta; 2180 2181 VERIFY0(promote_hold(ddpa, dp, FTAG)); 2182 hds = ddpa->ddpa_clone; 2183 2184 ASSERT0(hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE); 2185 2186 snap = list_head(&ddpa->shared_snaps); 2187 origin_ds = snap->ds; 2188 dd = hds->ds_dir; 2189 2190 snap = list_head(&ddpa->origin_snaps); 2191 origin_head = snap->ds; 2192 2193 /* 2194 * We need to explicitly open odd, since origin_ds's dd will be 2195 * changing. 2196 */ 2197 VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, 2198 NULL, FTAG, &odd)); 2199 2200 /* change origin's next snap */ 2201 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2202 oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2203 snap = list_tail(&ddpa->clone_snaps); 2204 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2205 origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2206 2207 /* change the origin's next clone */ 2208 if (origin_ds->ds_phys->ds_next_clones_obj) { 2209 dsl_dataset_remove_from_next_clones(origin_ds, 2210 snap->ds->ds_object, tx); 2211 VERIFY0(zap_add_int(dp->dp_meta_objset, 2212 origin_ds->ds_phys->ds_next_clones_obj, 2213 oldnext_obj, tx)); 2214 } 2215 2216 /* change origin */ 2217 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2218 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2219 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2220 dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; 2221 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2222 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2223 origin_head->ds_dir->dd_origin_txg = 2224 origin_ds->ds_phys->ds_creation_txg; 2225 2226 /* change dd_clone entries */ 2227 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2228 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2229 odd->dd_phys->dd_clones, hds->ds_object, tx)); 2230 VERIFY0(zap_add_int(dp->dp_meta_objset, 2231 ddpa->origin_origin->ds_dir->dd_phys->dd_clones, 2232 hds->ds_object, tx)); 2233 2234 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2235 ddpa->origin_origin->ds_dir->dd_phys->dd_clones, 2236 origin_head->ds_object, tx)); 2237 if (dd->dd_phys->dd_clones == 0) { 2238 dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset, 2239 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 2240 } 2241 VERIFY0(zap_add_int(dp->dp_meta_objset, 2242 dd->dd_phys->dd_clones, origin_head->ds_object, tx)); 2243 } 2244 2245 /* move snapshots to this dir */ 2246 for (snap = list_head(&ddpa->shared_snaps); snap; 2247 snap = list_next(&ddpa->shared_snaps, snap)) { 2248 dsl_dataset_t *ds = snap->ds; 2249 2250 /* 2251 * Property callbacks are registered to a particular 2252 * dsl_dir. Since ours is changing, evict the objset 2253 * so that they will be unregistered from the old dsl_dir. 2254 */ 2255 if (ds->ds_objset) { 2256 dmu_objset_evict(ds->ds_objset); 2257 ds->ds_objset = NULL; 2258 } 2259 2260 /* move snap name entry */ 2261 VERIFY0(dsl_dataset_get_snapname(ds)); 2262 VERIFY0(dsl_dataset_snap_remove(origin_head, 2263 ds->ds_snapname, tx, B_TRUE)); 2264 VERIFY0(zap_add(dp->dp_meta_objset, 2265 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2266 8, 1, &ds->ds_object, tx)); 2267 dsl_fs_ss_count_adjust(hds->ds_dir, 1, 2268 DD_FIELD_SNAPSHOT_COUNT, tx); 2269 2270 /* change containing dsl_dir */ 2271 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2272 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2273 ds->ds_phys->ds_dir_obj = dd->dd_object; 2274 ASSERT3P(ds->ds_dir, ==, odd); 2275 dsl_dir_rele(ds->ds_dir, ds); 2276 VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object, 2277 NULL, ds, &ds->ds_dir)); 2278 2279 /* move any clone references */ 2280 if (ds->ds_phys->ds_next_clones_obj && 2281 spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2282 zap_cursor_t zc; 2283 zap_attribute_t za; 2284 2285 for (zap_cursor_init(&zc, dp->dp_meta_objset, 2286 ds->ds_phys->ds_next_clones_obj); 2287 zap_cursor_retrieve(&zc, &za) == 0; 2288 zap_cursor_advance(&zc)) { 2289 dsl_dataset_t *cnds; 2290 uint64_t o; 2291 2292 if (za.za_first_integer == oldnext_obj) { 2293 /* 2294 * We've already moved the 2295 * origin's reference. 2296 */ 2297 continue; 2298 } 2299 2300 VERIFY0(dsl_dataset_hold_obj(dp, 2301 za.za_first_integer, FTAG, &cnds)); 2302 o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; 2303 2304 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2305 odd->dd_phys->dd_clones, o, tx)); 2306 VERIFY0(zap_add_int(dp->dp_meta_objset, 2307 dd->dd_phys->dd_clones, o, tx)); 2308 dsl_dataset_rele(cnds, FTAG); 2309 } 2310 zap_cursor_fini(&zc); 2311 } 2312 2313 ASSERT(!dsl_prop_hascb(ds)); 2314 } 2315 2316 /* 2317 * Change space accounting. 2318 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2319 * both be valid, or both be 0 (resulting in delta == 0). This 2320 * is true for each of {clone,origin} independently. 2321 */ 2322 2323 delta = ddpa->cloneusedsnap - 2324 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2325 ASSERT3S(delta, >=, 0); 2326 ASSERT3U(ddpa->used, >=, delta); 2327 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2328 dsl_dir_diduse_space(dd, DD_USED_HEAD, 2329 ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx); 2330 2331 delta = ddpa->originusedsnap - 2332 odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2333 ASSERT3S(delta, <=, 0); 2334 ASSERT3U(ddpa->used, >=, -delta); 2335 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2336 dsl_dir_diduse_space(odd, DD_USED_HEAD, 2337 -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx); 2338 2339 origin_ds->ds_phys->ds_unique_bytes = ddpa->unique; 2340 2341 /* log history record */ 2342 spa_history_log_internal_ds(hds, "promote", tx, ""); 2343 2344 dsl_dir_rele(odd, FTAG); 2345 promote_rele(ddpa, FTAG); 2346 } 2347 2348 /* 2349 * Make a list of dsl_dataset_t's for the snapshots between first_obj 2350 * (exclusive) and last_obj (inclusive). The list will be in reverse 2351 * order (last_obj will be the list_head()). If first_obj == 0, do all 2352 * snapshots back to this dataset's origin. 2353 */ 2354 static int 2355 snaplist_make(dsl_pool_t *dp, 2356 uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag) 2357 { 2358 uint64_t obj = last_obj; 2359 2360 list_create(l, sizeof (struct promotenode), 2361 offsetof(struct promotenode, link)); 2362 2363 while (obj != first_obj) { 2364 dsl_dataset_t *ds; 2365 struct promotenode *snap; 2366 int err; 2367 2368 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 2369 ASSERT(err != ENOENT); 2370 if (err != 0) 2371 return (err); 2372 2373 if (first_obj == 0) 2374 first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2375 2376 snap = kmem_alloc(sizeof (*snap), KM_SLEEP); 2377 snap->ds = ds; 2378 list_insert_tail(l, snap); 2379 obj = ds->ds_phys->ds_prev_snap_obj; 2380 } 2381 2382 return (0); 2383 } 2384 2385 static int 2386 snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2387 { 2388 struct promotenode *snap; 2389 2390 *spacep = 0; 2391 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2392 uint64_t used, comp, uncomp; 2393 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2394 mintxg, UINT64_MAX, &used, &comp, &uncomp); 2395 *spacep += used; 2396 } 2397 return (0); 2398 } 2399 2400 static void 2401 snaplist_destroy(list_t *l, void *tag) 2402 { 2403 struct promotenode *snap; 2404 2405 if (l == NULL || !list_link_active(&l->list_head)) 2406 return; 2407 2408 while ((snap = list_tail(l)) != NULL) { 2409 list_remove(l, snap); 2410 dsl_dataset_rele(snap->ds, tag); 2411 kmem_free(snap, sizeof (*snap)); 2412 } 2413 list_destroy(l); 2414 } 2415 2416 static int 2417 promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag) 2418 { 2419 int error; 2420 dsl_dir_t *dd; 2421 struct promotenode *snap; 2422 2423 error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag, 2424 &ddpa->ddpa_clone); 2425 if (error != 0) 2426 return (error); 2427 dd = ddpa->ddpa_clone->ds_dir; 2428 2429 if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) || 2430 !dsl_dir_is_clone(dd)) { 2431 dsl_dataset_rele(ddpa->ddpa_clone, tag); 2432 return (SET_ERROR(EINVAL)); 2433 } 2434 2435 error = snaplist_make(dp, 0, dd->dd_phys->dd_origin_obj, 2436 &ddpa->shared_snaps, tag); 2437 if (error != 0) 2438 goto out; 2439 2440 error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object, 2441 &ddpa->clone_snaps, tag); 2442 if (error != 0) 2443 goto out; 2444 2445 snap = list_head(&ddpa->shared_snaps); 2446 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 2447 error = snaplist_make(dp, dd->dd_phys->dd_origin_obj, 2448 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, 2449 &ddpa->origin_snaps, tag); 2450 if (error != 0) 2451 goto out; 2452 2453 if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { 2454 error = dsl_dataset_hold_obj(dp, 2455 snap->ds->ds_dir->dd_phys->dd_origin_obj, 2456 tag, &ddpa->origin_origin); 2457 if (error != 0) 2458 goto out; 2459 } 2460 out: 2461 if (error != 0) 2462 promote_rele(ddpa, tag); 2463 return (error); 2464 } 2465 2466 static void 2467 promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag) 2468 { 2469 snaplist_destroy(&ddpa->shared_snaps, tag); 2470 snaplist_destroy(&ddpa->clone_snaps, tag); 2471 snaplist_destroy(&ddpa->origin_snaps, tag); 2472 if (ddpa->origin_origin != NULL) 2473 dsl_dataset_rele(ddpa->origin_origin, tag); 2474 dsl_dataset_rele(ddpa->ddpa_clone, tag); 2475 } 2476 2477 /* 2478 * Promote a clone. 2479 * 2480 * If it fails due to a conflicting snapshot name, "conflsnap" will be filled 2481 * in with the name. (It must be at least MAXNAMELEN bytes long.) 2482 */ 2483 int 2484 dsl_dataset_promote(const char *name, char *conflsnap) 2485 { 2486 dsl_dataset_promote_arg_t ddpa = { 0 }; 2487 uint64_t numsnaps; 2488 int error; 2489 objset_t *os; 2490 2491 /* 2492 * We will modify space proportional to the number of 2493 * snapshots. Compute numsnaps. 2494 */ 2495 error = dmu_objset_hold(name, FTAG, &os); 2496 if (error != 0) 2497 return (error); 2498 error = zap_count(dmu_objset_pool(os)->dp_meta_objset, 2499 dmu_objset_ds(os)->ds_phys->ds_snapnames_zapobj, &numsnaps); 2500 dmu_objset_rele(os, FTAG); 2501 if (error != 0) 2502 return (error); 2503 2504 ddpa.ddpa_clonename = name; 2505 ddpa.err_ds = conflsnap; 2506 ddpa.cr = CRED(); 2507 2508 return (dsl_sync_task(name, dsl_dataset_promote_check, 2509 dsl_dataset_promote_sync, &ddpa, 2510 2 + numsnaps, ZFS_SPACE_CHECK_RESERVED)); 2511 } 2512 2513 int 2514 dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, 2515 dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx) 2516 { 2517 int64_t unused_refres_delta; 2518 2519 /* they should both be heads */ 2520 if (dsl_dataset_is_snapshot(clone) || 2521 dsl_dataset_is_snapshot(origin_head)) 2522 return (SET_ERROR(EINVAL)); 2523 2524 /* if we are not forcing, the branch point should be just before them */ 2525 if (!force && clone->ds_prev != origin_head->ds_prev) 2526 return (SET_ERROR(EINVAL)); 2527 2528 /* clone should be the clone (unless they are unrelated) */ 2529 if (clone->ds_prev != NULL && 2530 clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap && 2531 origin_head->ds_dir != clone->ds_prev->ds_dir) 2532 return (SET_ERROR(EINVAL)); 2533 2534 /* the clone should be a child of the origin */ 2535 if (clone->ds_dir->dd_parent != origin_head->ds_dir) 2536 return (SET_ERROR(EINVAL)); 2537 2538 /* origin_head shouldn't be modified unless 'force' */ 2539 if (!force && 2540 dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev)) 2541 return (SET_ERROR(ETXTBSY)); 2542 2543 /* origin_head should have no long holds (e.g. is not mounted) */ 2544 if (dsl_dataset_handoff_check(origin_head, owner, tx)) 2545 return (SET_ERROR(EBUSY)); 2546 2547 /* check amount of any unconsumed refreservation */ 2548 unused_refres_delta = 2549 (int64_t)MIN(origin_head->ds_reserved, 2550 origin_head->ds_phys->ds_unique_bytes) - 2551 (int64_t)MIN(origin_head->ds_reserved, 2552 clone->ds_phys->ds_unique_bytes); 2553 2554 if (unused_refres_delta > 0 && 2555 unused_refres_delta > 2556 dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE)) 2557 return (SET_ERROR(ENOSPC)); 2558 2559 /* clone can't be over the head's refquota */ 2560 if (origin_head->ds_quota != 0 && 2561 clone->ds_phys->ds_referenced_bytes > origin_head->ds_quota) 2562 return (SET_ERROR(EDQUOT)); 2563 2564 return (0); 2565 } 2566 2567 void 2568 dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, 2569 dsl_dataset_t *origin_head, dmu_tx_t *tx) 2570 { 2571 dsl_pool_t *dp = dmu_tx_pool(tx); 2572 int64_t unused_refres_delta; 2573 2574 ASSERT(clone->ds_reserved == 0); 2575 ASSERT(origin_head->ds_quota == 0 || 2576 clone->ds_phys->ds_unique_bytes <= origin_head->ds_quota); 2577 ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev); 2578 2579 dmu_buf_will_dirty(clone->ds_dbuf, tx); 2580 dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 2581 2582 if (clone->ds_objset != NULL) { 2583 dmu_objset_evict(clone->ds_objset); 2584 clone->ds_objset = NULL; 2585 } 2586 2587 if (origin_head->ds_objset != NULL) { 2588 dmu_objset_evict(origin_head->ds_objset); 2589 origin_head->ds_objset = NULL; 2590 } 2591 2592 unused_refres_delta = 2593 (int64_t)MIN(origin_head->ds_reserved, 2594 origin_head->ds_phys->ds_unique_bytes) - 2595 (int64_t)MIN(origin_head->ds_reserved, 2596 clone->ds_phys->ds_unique_bytes); 2597 2598 /* 2599 * Reset origin's unique bytes, if it exists. 2600 */ 2601 if (clone->ds_prev) { 2602 dsl_dataset_t *origin = clone->ds_prev; 2603 uint64_t comp, uncomp; 2604 2605 dmu_buf_will_dirty(origin->ds_dbuf, tx); 2606 dsl_deadlist_space_range(&clone->ds_deadlist, 2607 origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2608 &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); 2609 } 2610 2611 /* swap blkptrs */ 2612 { 2613 blkptr_t tmp; 2614 tmp = origin_head->ds_phys->ds_bp; 2615 origin_head->ds_phys->ds_bp = clone->ds_phys->ds_bp; 2616 clone->ds_phys->ds_bp = tmp; 2617 } 2618 2619 /* set dd_*_bytes */ 2620 { 2621 int64_t dused, dcomp, duncomp; 2622 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2623 uint64_t odl_used, odl_comp, odl_uncomp; 2624 2625 ASSERT3U(clone->ds_dir->dd_phys-> 2626 dd_used_breakdown[DD_USED_SNAP], ==, 0); 2627 2628 dsl_deadlist_space(&clone->ds_deadlist, 2629 &cdl_used, &cdl_comp, &cdl_uncomp); 2630 dsl_deadlist_space(&origin_head->ds_deadlist, 2631 &odl_used, &odl_comp, &odl_uncomp); 2632 2633 dused = clone->ds_phys->ds_referenced_bytes + cdl_used - 2634 (origin_head->ds_phys->ds_referenced_bytes + odl_used); 2635 dcomp = clone->ds_phys->ds_compressed_bytes + cdl_comp - 2636 (origin_head->ds_phys->ds_compressed_bytes + odl_comp); 2637 duncomp = clone->ds_phys->ds_uncompressed_bytes + 2638 cdl_uncomp - 2639 (origin_head->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2640 2641 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD, 2642 dused, dcomp, duncomp, tx); 2643 dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD, 2644 -dused, -dcomp, -duncomp, tx); 2645 2646 /* 2647 * The difference in the space used by snapshots is the 2648 * difference in snapshot space due to the head's 2649 * deadlist (since that's the only thing that's 2650 * changing that affects the snapused). 2651 */ 2652 dsl_deadlist_space_range(&clone->ds_deadlist, 2653 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2654 &cdl_used, &cdl_comp, &cdl_uncomp); 2655 dsl_deadlist_space_range(&origin_head->ds_deadlist, 2656 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2657 &odl_used, &odl_comp, &odl_uncomp); 2658 dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used, 2659 DD_USED_HEAD, DD_USED_SNAP, tx); 2660 } 2661 2662 /* swap ds_*_bytes */ 2663 SWITCH64(origin_head->ds_phys->ds_referenced_bytes, 2664 clone->ds_phys->ds_referenced_bytes); 2665 SWITCH64(origin_head->ds_phys->ds_compressed_bytes, 2666 clone->ds_phys->ds_compressed_bytes); 2667 SWITCH64(origin_head->ds_phys->ds_uncompressed_bytes, 2668 clone->ds_phys->ds_uncompressed_bytes); 2669 SWITCH64(origin_head->ds_phys->ds_unique_bytes, 2670 clone->ds_phys->ds_unique_bytes); 2671 2672 /* apply any parent delta for change in unconsumed refreservation */ 2673 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV, 2674 unused_refres_delta, 0, 0, tx); 2675 2676 /* 2677 * Swap deadlists. 2678 */ 2679 dsl_deadlist_close(&clone->ds_deadlist); 2680 dsl_deadlist_close(&origin_head->ds_deadlist); 2681 SWITCH64(origin_head->ds_phys->ds_deadlist_obj, 2682 clone->ds_phys->ds_deadlist_obj); 2683 dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, 2684 clone->ds_phys->ds_deadlist_obj); 2685 dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, 2686 origin_head->ds_phys->ds_deadlist_obj); 2687 2688 dsl_scan_ds_clone_swapped(origin_head, clone, tx); 2689 2690 spa_history_log_internal_ds(clone, "clone swap", tx, 2691 "parent=%s", origin_head->ds_dir->dd_myname); 2692 } 2693 2694 /* 2695 * Given a pool name and a dataset object number in that pool, 2696 * return the name of that dataset. 2697 */ 2698 int 2699 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2700 { 2701 dsl_pool_t *dp; 2702 dsl_dataset_t *ds; 2703 int error; 2704 2705 error = dsl_pool_hold(pname, FTAG, &dp); 2706 if (error != 0) 2707 return (error); 2708 2709 error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); 2710 if (error == 0) { 2711 dsl_dataset_name(ds, buf); 2712 dsl_dataset_rele(ds, FTAG); 2713 } 2714 dsl_pool_rele(dp, FTAG); 2715 2716 return (error); 2717 } 2718 2719 int 2720 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2721 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 2722 { 2723 int error = 0; 2724 2725 ASSERT3S(asize, >, 0); 2726 2727 /* 2728 * *ref_rsrv is the portion of asize that will come from any 2729 * unconsumed refreservation space. 2730 */ 2731 *ref_rsrv = 0; 2732 2733 mutex_enter(&ds->ds_lock); 2734 /* 2735 * Make a space adjustment for reserved bytes. 2736 */ 2737 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 2738 ASSERT3U(*used, >=, 2739 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2740 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2741 *ref_rsrv = 2742 asize - MIN(asize, parent_delta(ds, asize + inflight)); 2743 } 2744 2745 if (!check_quota || ds->ds_quota == 0) { 2746 mutex_exit(&ds->ds_lock); 2747 return (0); 2748 } 2749 /* 2750 * If they are requesting more space, and our current estimate 2751 * is over quota, they get to try again unless the actual 2752 * on-disk is over quota and there are no pending changes (which 2753 * may free up space for us). 2754 */ 2755 if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) { 2756 if (inflight > 0 || 2757 ds->ds_phys->ds_referenced_bytes < ds->ds_quota) 2758 error = SET_ERROR(ERESTART); 2759 else 2760 error = SET_ERROR(EDQUOT); 2761 } 2762 mutex_exit(&ds->ds_lock); 2763 2764 return (error); 2765 } 2766 2767 typedef struct dsl_dataset_set_qr_arg { 2768 const char *ddsqra_name; 2769 zprop_source_t ddsqra_source; 2770 uint64_t ddsqra_value; 2771 } dsl_dataset_set_qr_arg_t; 2772 2773 2774 /* ARGSUSED */ 2775 static int 2776 dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx) 2777 { 2778 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2779 dsl_pool_t *dp = dmu_tx_pool(tx); 2780 dsl_dataset_t *ds; 2781 int error; 2782 uint64_t newval; 2783 2784 if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA) 2785 return (SET_ERROR(ENOTSUP)); 2786 2787 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 2788 if (error != 0) 2789 return (error); 2790 2791 if (dsl_dataset_is_snapshot(ds)) { 2792 dsl_dataset_rele(ds, FTAG); 2793 return (SET_ERROR(EINVAL)); 2794 } 2795 2796 error = dsl_prop_predict(ds->ds_dir, 2797 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 2798 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 2799 if (error != 0) { 2800 dsl_dataset_rele(ds, FTAG); 2801 return (error); 2802 } 2803 2804 if (newval == 0) { 2805 dsl_dataset_rele(ds, FTAG); 2806 return (0); 2807 } 2808 2809 if (newval < ds->ds_phys->ds_referenced_bytes || 2810 newval < ds->ds_reserved) { 2811 dsl_dataset_rele(ds, FTAG); 2812 return (SET_ERROR(ENOSPC)); 2813 } 2814 2815 dsl_dataset_rele(ds, FTAG); 2816 return (0); 2817 } 2818 2819 static void 2820 dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx) 2821 { 2822 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2823 dsl_pool_t *dp = dmu_tx_pool(tx); 2824 dsl_dataset_t *ds; 2825 uint64_t newval; 2826 2827 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 2828 2829 dsl_prop_set_sync_impl(ds, 2830 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 2831 ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, 2832 &ddsqra->ddsqra_value, tx); 2833 2834 VERIFY0(dsl_prop_get_int_ds(ds, 2835 zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval)); 2836 2837 if (ds->ds_quota != newval) { 2838 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2839 ds->ds_quota = newval; 2840 } 2841 dsl_dataset_rele(ds, FTAG); 2842 } 2843 2844 int 2845 dsl_dataset_set_refquota(const char *dsname, zprop_source_t source, 2846 uint64_t refquota) 2847 { 2848 dsl_dataset_set_qr_arg_t ddsqra; 2849 2850 ddsqra.ddsqra_name = dsname; 2851 ddsqra.ddsqra_source = source; 2852 ddsqra.ddsqra_value = refquota; 2853 2854 return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check, 2855 dsl_dataset_set_refquota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE)); 2856 } 2857 2858 static int 2859 dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx) 2860 { 2861 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2862 dsl_pool_t *dp = dmu_tx_pool(tx); 2863 dsl_dataset_t *ds; 2864 int error; 2865 uint64_t newval, unique; 2866 2867 if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION) 2868 return (SET_ERROR(ENOTSUP)); 2869 2870 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 2871 if (error != 0) 2872 return (error); 2873 2874 if (dsl_dataset_is_snapshot(ds)) { 2875 dsl_dataset_rele(ds, FTAG); 2876 return (SET_ERROR(EINVAL)); 2877 } 2878 2879 error = dsl_prop_predict(ds->ds_dir, 2880 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 2881 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 2882 if (error != 0) { 2883 dsl_dataset_rele(ds, FTAG); 2884 return (error); 2885 } 2886 2887 /* 2888 * If we are doing the preliminary check in open context, the 2889 * space estimates may be inaccurate. 2890 */ 2891 if (!dmu_tx_is_syncing(tx)) { 2892 dsl_dataset_rele(ds, FTAG); 2893 return (0); 2894 } 2895 2896 mutex_enter(&ds->ds_lock); 2897 if (!DS_UNIQUE_IS_ACCURATE(ds)) 2898 dsl_dataset_recalc_head_uniq(ds); 2899 unique = ds->ds_phys->ds_unique_bytes; 2900 mutex_exit(&ds->ds_lock); 2901 2902 if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) { 2903 uint64_t delta = MAX(unique, newval) - 2904 MAX(unique, ds->ds_reserved); 2905 2906 if (delta > 2907 dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) || 2908 (ds->ds_quota > 0 && newval > ds->ds_quota)) { 2909 dsl_dataset_rele(ds, FTAG); 2910 return (SET_ERROR(ENOSPC)); 2911 } 2912 } 2913 2914 dsl_dataset_rele(ds, FTAG); 2915 return (0); 2916 } 2917 2918 void 2919 dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, 2920 zprop_source_t source, uint64_t value, dmu_tx_t *tx) 2921 { 2922 uint64_t newval; 2923 uint64_t unique; 2924 int64_t delta; 2925 2926 dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 2927 source, sizeof (value), 1, &value, tx); 2928 2929 VERIFY0(dsl_prop_get_int_ds(ds, 2930 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval)); 2931 2932 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2933 mutex_enter(&ds->ds_dir->dd_lock); 2934 mutex_enter(&ds->ds_lock); 2935 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 2936 unique = ds->ds_phys->ds_unique_bytes; 2937 delta = MAX(0, (int64_t)(newval - unique)) - 2938 MAX(0, (int64_t)(ds->ds_reserved - unique)); 2939 ds->ds_reserved = newval; 2940 mutex_exit(&ds->ds_lock); 2941 2942 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 2943 mutex_exit(&ds->ds_dir->dd_lock); 2944 } 2945 2946 static void 2947 dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx) 2948 { 2949 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2950 dsl_pool_t *dp = dmu_tx_pool(tx); 2951 dsl_dataset_t *ds; 2952 2953 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 2954 dsl_dataset_set_refreservation_sync_impl(ds, 2955 ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx); 2956 dsl_dataset_rele(ds, FTAG); 2957 } 2958 2959 int 2960 dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source, 2961 uint64_t refreservation) 2962 { 2963 dsl_dataset_set_qr_arg_t ddsqra; 2964 2965 ddsqra.ddsqra_name = dsname; 2966 ddsqra.ddsqra_source = source; 2967 ddsqra.ddsqra_value = refreservation; 2968 2969 return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check, 2970 dsl_dataset_set_refreservation_sync, &ddsqra, 2971 0, ZFS_SPACE_CHECK_NONE)); 2972 } 2973 2974 /* 2975 * Return (in *usedp) the amount of space written in new that is not 2976 * present in oldsnap. New may be a snapshot or the head. Old must be 2977 * a snapshot before new, in new's filesystem (or its origin). If not then 2978 * fail and return EINVAL. 2979 * 2980 * The written space is calculated by considering two components: First, we 2981 * ignore any freed space, and calculate the written as new's used space 2982 * minus old's used space. Next, we add in the amount of space that was freed 2983 * between the two snapshots, thus reducing new's used space relative to old's. 2984 * Specifically, this is the space that was born before old->ds_creation_txg, 2985 * and freed before new (ie. on new's deadlist or a previous deadlist). 2986 * 2987 * space freed [---------------------] 2988 * snapshots ---O-------O--------O-------O------ 2989 * oldsnap new 2990 */ 2991 int 2992 dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, 2993 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 2994 { 2995 int err = 0; 2996 uint64_t snapobj; 2997 dsl_pool_t *dp = new->ds_dir->dd_pool; 2998 2999 ASSERT(dsl_pool_config_held(dp)); 3000 3001 *usedp = 0; 3002 *usedp += new->ds_phys->ds_referenced_bytes; 3003 *usedp -= oldsnap->ds_phys->ds_referenced_bytes; 3004 3005 *compp = 0; 3006 *compp += new->ds_phys->ds_compressed_bytes; 3007 *compp -= oldsnap->ds_phys->ds_compressed_bytes; 3008 3009 *uncompp = 0; 3010 *uncompp += new->ds_phys->ds_uncompressed_bytes; 3011 *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes; 3012 3013 snapobj = new->ds_object; 3014 while (snapobj != oldsnap->ds_object) { 3015 dsl_dataset_t *snap; 3016 uint64_t used, comp, uncomp; 3017 3018 if (snapobj == new->ds_object) { 3019 snap = new; 3020 } else { 3021 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); 3022 if (err != 0) 3023 break; 3024 } 3025 3026 if (snap->ds_phys->ds_prev_snap_txg == 3027 oldsnap->ds_phys->ds_creation_txg) { 3028 /* 3029 * The blocks in the deadlist can not be born after 3030 * ds_prev_snap_txg, so get the whole deadlist space, 3031 * which is more efficient (especially for old-format 3032 * deadlists). Unfortunately the deadlist code 3033 * doesn't have enough information to make this 3034 * optimization itself. 3035 */ 3036 dsl_deadlist_space(&snap->ds_deadlist, 3037 &used, &comp, &uncomp); 3038 } else { 3039 dsl_deadlist_space_range(&snap->ds_deadlist, 3040 0, oldsnap->ds_phys->ds_creation_txg, 3041 &used, &comp, &uncomp); 3042 } 3043 *usedp += used; 3044 *compp += comp; 3045 *uncompp += uncomp; 3046 3047 /* 3048 * If we get to the beginning of the chain of snapshots 3049 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap 3050 * was not a snapshot of/before new. 3051 */ 3052 snapobj = snap->ds_phys->ds_prev_snap_obj; 3053 if (snap != new) 3054 dsl_dataset_rele(snap, FTAG); 3055 if (snapobj == 0) { 3056 err = SET_ERROR(EINVAL); 3057 break; 3058 } 3059 3060 } 3061 return (err); 3062 } 3063 3064 /* 3065 * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, 3066 * lastsnap, and all snapshots in between are deleted. 3067 * 3068 * blocks that would be freed [---------------------------] 3069 * snapshots ---O-------O--------O-------O--------O 3070 * firstsnap lastsnap 3071 * 3072 * This is the set of blocks that were born after the snap before firstsnap, 3073 * (birth > firstsnap->prev_snap_txg) and died before the snap after the 3074 * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). 3075 * We calculate this by iterating over the relevant deadlists (from the snap 3076 * after lastsnap, backward to the snap after firstsnap), summing up the 3077 * space on the deadlist that was born after the snap before firstsnap. 3078 */ 3079 int 3080 dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, 3081 dsl_dataset_t *lastsnap, 3082 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 3083 { 3084 int err = 0; 3085 uint64_t snapobj; 3086 dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; 3087 3088 ASSERT(dsl_dataset_is_snapshot(firstsnap)); 3089 ASSERT(dsl_dataset_is_snapshot(lastsnap)); 3090 3091 /* 3092 * Check that the snapshots are in the same dsl_dir, and firstsnap 3093 * is before lastsnap. 3094 */ 3095 if (firstsnap->ds_dir != lastsnap->ds_dir || 3096 firstsnap->ds_phys->ds_creation_txg > 3097 lastsnap->ds_phys->ds_creation_txg) 3098 return (SET_ERROR(EINVAL)); 3099 3100 *usedp = *compp = *uncompp = 0; 3101 3102 snapobj = lastsnap->ds_phys->ds_next_snap_obj; 3103 while (snapobj != firstsnap->ds_object) { 3104 dsl_dataset_t *ds; 3105 uint64_t used, comp, uncomp; 3106 3107 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); 3108 if (err != 0) 3109 break; 3110 3111 dsl_deadlist_space_range(&ds->ds_deadlist, 3112 firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX, 3113 &used, &comp, &uncomp); 3114 *usedp += used; 3115 *compp += comp; 3116 *uncompp += uncomp; 3117 3118 snapobj = ds->ds_phys->ds_prev_snap_obj; 3119 ASSERT3U(snapobj, !=, 0); 3120 dsl_dataset_rele(ds, FTAG); 3121 } 3122 return (err); 3123 } 3124 3125 /* 3126 * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. 3127 * For example, they could both be snapshots of the same filesystem, and 3128 * 'earlier' is before 'later'. Or 'earlier' could be the origin of 3129 * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's 3130 * filesystem. Or 'earlier' could be the origin's origin. 3131 * 3132 * If non-zero, earlier_txg is used instead of earlier's ds_creation_txg. 3133 */ 3134 boolean_t 3135 dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier, 3136 uint64_t earlier_txg) 3137 { 3138 dsl_pool_t *dp = later->ds_dir->dd_pool; 3139 int error; 3140 boolean_t ret; 3141 3142 ASSERT(dsl_pool_config_held(dp)); 3143 ASSERT(dsl_dataset_is_snapshot(earlier) || earlier_txg != 0); 3144 3145 if (earlier_txg == 0) 3146 earlier_txg = earlier->ds_phys->ds_creation_txg; 3147 3148 if (dsl_dataset_is_snapshot(later) && 3149 earlier_txg >= later->ds_phys->ds_creation_txg) 3150 return (B_FALSE); 3151 3152 if (later->ds_dir == earlier->ds_dir) 3153 return (B_TRUE); 3154 if (!dsl_dir_is_clone(later->ds_dir)) 3155 return (B_FALSE); 3156 3157 if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) 3158 return (B_TRUE); 3159 dsl_dataset_t *origin; 3160 error = dsl_dataset_hold_obj(dp, 3161 later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); 3162 if (error != 0) 3163 return (B_FALSE); 3164 ret = dsl_dataset_is_before(origin, earlier, earlier_txg); 3165 dsl_dataset_rele(origin, FTAG); 3166 return (ret); 3167 } 3168 3169 3170 void 3171 dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx) 3172 { 3173 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3174 dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx); 3175 } 3176