1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011, 2014 by Delphix. All rights reserved. 24 * Copyright (c) 2014, Joyent, Inc. All rights reserved. 25 * Copyright (c) 2014 RackTop Systems. 26 */ 27 28 #include <sys/dmu_objset.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_dir.h> 31 #include <sys/dsl_prop.h> 32 #include <sys/dsl_synctask.h> 33 #include <sys/dmu_traverse.h> 34 #include <sys/dmu_impl.h> 35 #include <sys/dmu_tx.h> 36 #include <sys/arc.h> 37 #include <sys/zio.h> 38 #include <sys/zap.h> 39 #include <sys/zfeature.h> 40 #include <sys/unique.h> 41 #include <sys/zfs_context.h> 42 #include <sys/zfs_ioctl.h> 43 #include <sys/spa.h> 44 #include <sys/zfs_znode.h> 45 #include <sys/zfs_onexit.h> 46 #include <sys/zvol.h> 47 #include <sys/dsl_scan.h> 48 #include <sys/dsl_deadlist.h> 49 #include <sys/dsl_destroy.h> 50 #include <sys/dsl_userhold.h> 51 #include <sys/dsl_bookmark.h> 52 53 #define SWITCH64(x, y) \ 54 { \ 55 uint64_t __tmp = (x); \ 56 (x) = (y); \ 57 (y) = __tmp; \ 58 } 59 60 #define DS_REF_MAX (1ULL << 62) 61 62 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 63 64 /* 65 * Figure out how much of this delta should be propogated to the dsl_dir 66 * layer. If there's a refreservation, that space has already been 67 * partially accounted for in our ancestors. 68 */ 69 static int64_t 70 parent_delta(dsl_dataset_t *ds, int64_t delta) 71 { 72 uint64_t old_bytes, new_bytes; 73 74 if (ds->ds_reserved == 0) 75 return (delta); 76 77 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 78 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 79 80 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 81 return (new_bytes - old_bytes); 82 } 83 84 void 85 dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) 86 { 87 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 88 int compressed = BP_GET_PSIZE(bp); 89 int uncompressed = BP_GET_UCSIZE(bp); 90 int64_t delta; 91 92 dprintf_bp(bp, "ds=%p", ds); 93 94 ASSERT(dmu_tx_is_syncing(tx)); 95 /* It could have been compressed away to nothing */ 96 if (BP_IS_HOLE(bp)) 97 return; 98 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 99 ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp))); 100 if (ds == NULL) { 101 dsl_pool_mos_diduse_space(tx->tx_pool, 102 used, compressed, uncompressed); 103 return; 104 } 105 106 dmu_buf_will_dirty(ds->ds_dbuf, tx); 107 mutex_enter(&ds->ds_lock); 108 delta = parent_delta(ds, used); 109 ds->ds_phys->ds_referenced_bytes += used; 110 ds->ds_phys->ds_compressed_bytes += compressed; 111 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 112 ds->ds_phys->ds_unique_bytes += used; 113 mutex_exit(&ds->ds_lock); 114 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 115 compressed, uncompressed, tx); 116 dsl_dir_transfer_space(ds->ds_dir, used - delta, 117 DD_USED_REFRSRV, DD_USED_HEAD, tx); 118 } 119 120 int 121 dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, 122 boolean_t async) 123 { 124 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 125 int compressed = BP_GET_PSIZE(bp); 126 int uncompressed = BP_GET_UCSIZE(bp); 127 128 if (BP_IS_HOLE(bp)) 129 return (0); 130 131 ASSERT(dmu_tx_is_syncing(tx)); 132 ASSERT(bp->blk_birth <= tx->tx_txg); 133 134 if (ds == NULL) { 135 dsl_free(tx->tx_pool, tx->tx_txg, bp); 136 dsl_pool_mos_diduse_space(tx->tx_pool, 137 -used, -compressed, -uncompressed); 138 return (used); 139 } 140 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 141 142 ASSERT(!dsl_dataset_is_snapshot(ds)); 143 dmu_buf_will_dirty(ds->ds_dbuf, tx); 144 145 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 146 int64_t delta; 147 148 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); 149 dsl_free(tx->tx_pool, tx->tx_txg, bp); 150 151 mutex_enter(&ds->ds_lock); 152 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 153 !DS_UNIQUE_IS_ACCURATE(ds)); 154 delta = parent_delta(ds, -used); 155 ds->ds_phys->ds_unique_bytes -= used; 156 mutex_exit(&ds->ds_lock); 157 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 158 delta, -compressed, -uncompressed, tx); 159 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 160 DD_USED_REFRSRV, DD_USED_HEAD, tx); 161 } else { 162 dprintf_bp(bp, "putting on dead list: %s", ""); 163 if (async) { 164 /* 165 * We are here as part of zio's write done callback, 166 * which means we're a zio interrupt thread. We can't 167 * call dsl_deadlist_insert() now because it may block 168 * waiting for I/O. Instead, put bp on the deferred 169 * queue and let dsl_pool_sync() finish the job. 170 */ 171 bplist_append(&ds->ds_pending_deadlist, bp); 172 } else { 173 dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); 174 } 175 ASSERT3U(ds->ds_prev->ds_object, ==, 176 ds->ds_phys->ds_prev_snap_obj); 177 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 178 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 179 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 180 ds->ds_object && bp->blk_birth > 181 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 182 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 183 mutex_enter(&ds->ds_prev->ds_lock); 184 ds->ds_prev->ds_phys->ds_unique_bytes += used; 185 mutex_exit(&ds->ds_prev->ds_lock); 186 } 187 if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { 188 dsl_dir_transfer_space(ds->ds_dir, used, 189 DD_USED_HEAD, DD_USED_SNAP, tx); 190 } 191 } 192 mutex_enter(&ds->ds_lock); 193 ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used); 194 ds->ds_phys->ds_referenced_bytes -= used; 195 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 196 ds->ds_phys->ds_compressed_bytes -= compressed; 197 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 198 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 199 mutex_exit(&ds->ds_lock); 200 201 return (used); 202 } 203 204 uint64_t 205 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 206 { 207 uint64_t trysnap = 0; 208 209 if (ds == NULL) 210 return (0); 211 /* 212 * The snapshot creation could fail, but that would cause an 213 * incorrect FALSE return, which would only result in an 214 * overestimation of the amount of space that an operation would 215 * consume, which is OK. 216 * 217 * There's also a small window where we could miss a pending 218 * snapshot, because we could set the sync task in the quiescing 219 * phase. So this should only be used as a guess. 220 */ 221 if (ds->ds_trysnap_txg > 222 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 223 trysnap = ds->ds_trysnap_txg; 224 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 225 } 226 227 boolean_t 228 dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, 229 uint64_t blk_birth) 230 { 231 if (blk_birth <= dsl_dataset_prev_snap_txg(ds) || 232 (bp != NULL && BP_IS_HOLE(bp))) 233 return (B_FALSE); 234 235 ddt_prefetch(dsl_dataset_get_spa(ds), bp); 236 237 return (B_TRUE); 238 } 239 240 /* ARGSUSED */ 241 static void 242 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 243 { 244 dsl_dataset_t *ds = dsv; 245 246 ASSERT(ds->ds_owner == NULL); 247 248 unique_remove(ds->ds_fsid_guid); 249 250 if (ds->ds_objset != NULL) 251 dmu_objset_evict(ds->ds_objset); 252 253 if (ds->ds_prev) { 254 dsl_dataset_rele(ds->ds_prev, ds); 255 ds->ds_prev = NULL; 256 } 257 258 bplist_destroy(&ds->ds_pending_deadlist); 259 if (ds->ds_phys->ds_deadlist_obj != 0) 260 dsl_deadlist_close(&ds->ds_deadlist); 261 if (ds->ds_dir) 262 dsl_dir_rele(ds->ds_dir, ds); 263 264 ASSERT(!list_link_active(&ds->ds_synced_link)); 265 266 mutex_destroy(&ds->ds_lock); 267 mutex_destroy(&ds->ds_opening_lock); 268 mutex_destroy(&ds->ds_sendstream_lock); 269 refcount_destroy(&ds->ds_longholds); 270 271 kmem_free(ds, sizeof (dsl_dataset_t)); 272 } 273 274 int 275 dsl_dataset_get_snapname(dsl_dataset_t *ds) 276 { 277 dsl_dataset_phys_t *headphys; 278 int err; 279 dmu_buf_t *headdbuf; 280 dsl_pool_t *dp = ds->ds_dir->dd_pool; 281 objset_t *mos = dp->dp_meta_objset; 282 283 if (ds->ds_snapname[0]) 284 return (0); 285 if (ds->ds_phys->ds_next_snap_obj == 0) 286 return (0); 287 288 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 289 FTAG, &headdbuf); 290 if (err != 0) 291 return (err); 292 headphys = headdbuf->db_data; 293 err = zap_value_search(dp->dp_meta_objset, 294 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 295 dmu_buf_rele(headdbuf, FTAG); 296 return (err); 297 } 298 299 int 300 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 301 { 302 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 303 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 304 matchtype_t mt; 305 int err; 306 307 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 308 mt = MT_FIRST; 309 else 310 mt = MT_EXACT; 311 312 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 313 value, mt, NULL, 0, NULL); 314 if (err == ENOTSUP && mt == MT_FIRST) 315 err = zap_lookup(mos, snapobj, name, 8, 1, value); 316 return (err); 317 } 318 319 int 320 dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx, 321 boolean_t adj_cnt) 322 { 323 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 324 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 325 matchtype_t mt; 326 int err; 327 328 dsl_dir_snap_cmtime_update(ds->ds_dir); 329 330 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 331 mt = MT_FIRST; 332 else 333 mt = MT_EXACT; 334 335 err = zap_remove_norm(mos, snapobj, name, mt, tx); 336 if (err == ENOTSUP && mt == MT_FIRST) 337 err = zap_remove(mos, snapobj, name, tx); 338 339 if (err == 0 && adj_cnt) 340 dsl_fs_ss_count_adjust(ds->ds_dir, -1, 341 DD_FIELD_SNAPSHOT_COUNT, tx); 342 343 return (err); 344 } 345 346 int 347 dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 348 dsl_dataset_t **dsp) 349 { 350 objset_t *mos = dp->dp_meta_objset; 351 dmu_buf_t *dbuf; 352 dsl_dataset_t *ds; 353 int err; 354 dmu_object_info_t doi; 355 356 ASSERT(dsl_pool_config_held(dp)); 357 358 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 359 if (err != 0) 360 return (err); 361 362 /* Make sure dsobj has the correct object type. */ 363 dmu_object_info_from_db(dbuf, &doi); 364 if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) { 365 dmu_buf_rele(dbuf, tag); 366 return (SET_ERROR(EINVAL)); 367 } 368 369 ds = dmu_buf_get_user(dbuf); 370 if (ds == NULL) { 371 dsl_dataset_t *winner = NULL; 372 373 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 374 ds->ds_dbuf = dbuf; 375 ds->ds_object = dsobj; 376 ds->ds_phys = dbuf->db_data; 377 378 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 379 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 380 mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); 381 refcount_create(&ds->ds_longholds); 382 383 bplist_create(&ds->ds_pending_deadlist); 384 dsl_deadlist_open(&ds->ds_deadlist, 385 mos, ds->ds_phys->ds_deadlist_obj); 386 387 list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), 388 offsetof(dmu_sendarg_t, dsa_link)); 389 390 if (err == 0) { 391 err = dsl_dir_hold_obj(dp, 392 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 393 } 394 if (err != 0) { 395 mutex_destroy(&ds->ds_lock); 396 mutex_destroy(&ds->ds_opening_lock); 397 mutex_destroy(&ds->ds_sendstream_lock); 398 refcount_destroy(&ds->ds_longholds); 399 bplist_destroy(&ds->ds_pending_deadlist); 400 dsl_deadlist_close(&ds->ds_deadlist); 401 kmem_free(ds, sizeof (dsl_dataset_t)); 402 dmu_buf_rele(dbuf, tag); 403 return (err); 404 } 405 406 if (!dsl_dataset_is_snapshot(ds)) { 407 ds->ds_snapname[0] = '\0'; 408 if (ds->ds_phys->ds_prev_snap_obj != 0) { 409 err = dsl_dataset_hold_obj(dp, 410 ds->ds_phys->ds_prev_snap_obj, 411 ds, &ds->ds_prev); 412 } 413 if (doi.doi_type == DMU_OTN_ZAP_METADATA) { 414 int zaperr = zap_lookup(mos, ds->ds_object, 415 DS_FIELD_BOOKMARK_NAMES, 416 sizeof (ds->ds_bookmarks), 1, 417 &ds->ds_bookmarks); 418 if (zaperr != ENOENT) 419 VERIFY0(zaperr); 420 } 421 } else { 422 if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 423 err = dsl_dataset_get_snapname(ds); 424 if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { 425 err = zap_count( 426 ds->ds_dir->dd_pool->dp_meta_objset, 427 ds->ds_phys->ds_userrefs_obj, 428 &ds->ds_userrefs); 429 } 430 } 431 432 if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 433 err = dsl_prop_get_int_ds(ds, 434 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 435 &ds->ds_reserved); 436 if (err == 0) { 437 err = dsl_prop_get_int_ds(ds, 438 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 439 &ds->ds_quota); 440 } 441 } else { 442 ds->ds_reserved = ds->ds_quota = 0; 443 } 444 445 if (err != 0 || (winner = dmu_buf_set_user_ie(dbuf, ds, 446 &ds->ds_phys, dsl_dataset_evict)) != NULL) { 447 bplist_destroy(&ds->ds_pending_deadlist); 448 dsl_deadlist_close(&ds->ds_deadlist); 449 if (ds->ds_prev) 450 dsl_dataset_rele(ds->ds_prev, ds); 451 dsl_dir_rele(ds->ds_dir, ds); 452 mutex_destroy(&ds->ds_lock); 453 mutex_destroy(&ds->ds_opening_lock); 454 mutex_destroy(&ds->ds_sendstream_lock); 455 refcount_destroy(&ds->ds_longholds); 456 kmem_free(ds, sizeof (dsl_dataset_t)); 457 if (err != 0) { 458 dmu_buf_rele(dbuf, tag); 459 return (err); 460 } 461 ds = winner; 462 } else { 463 ds->ds_fsid_guid = 464 unique_insert(ds->ds_phys->ds_fsid_guid); 465 } 466 } 467 ASSERT3P(ds->ds_dbuf, ==, dbuf); 468 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 469 ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 470 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 471 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 472 *dsp = ds; 473 return (0); 474 } 475 476 int 477 dsl_dataset_hold(dsl_pool_t *dp, const char *name, 478 void *tag, dsl_dataset_t **dsp) 479 { 480 dsl_dir_t *dd; 481 const char *snapname; 482 uint64_t obj; 483 int err = 0; 484 485 err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname); 486 if (err != 0) 487 return (err); 488 489 ASSERT(dsl_pool_config_held(dp)); 490 obj = dd->dd_phys->dd_head_dataset_obj; 491 if (obj != 0) 492 err = dsl_dataset_hold_obj(dp, obj, tag, dsp); 493 else 494 err = SET_ERROR(ENOENT); 495 496 /* we may be looking for a snapshot */ 497 if (err == 0 && snapname != NULL) { 498 dsl_dataset_t *ds; 499 500 if (*snapname++ != '@') { 501 dsl_dataset_rele(*dsp, tag); 502 dsl_dir_rele(dd, FTAG); 503 return (SET_ERROR(ENOENT)); 504 } 505 506 dprintf("looking for snapshot '%s'\n", snapname); 507 err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 508 if (err == 0) 509 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 510 dsl_dataset_rele(*dsp, tag); 511 512 if (err == 0) { 513 mutex_enter(&ds->ds_lock); 514 if (ds->ds_snapname[0] == 0) 515 (void) strlcpy(ds->ds_snapname, snapname, 516 sizeof (ds->ds_snapname)); 517 mutex_exit(&ds->ds_lock); 518 *dsp = ds; 519 } 520 } 521 522 dsl_dir_rele(dd, FTAG); 523 return (err); 524 } 525 526 int 527 dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, 528 void *tag, dsl_dataset_t **dsp) 529 { 530 int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 531 if (err != 0) 532 return (err); 533 if (!dsl_dataset_tryown(*dsp, tag)) { 534 dsl_dataset_rele(*dsp, tag); 535 *dsp = NULL; 536 return (SET_ERROR(EBUSY)); 537 } 538 return (0); 539 } 540 541 int 542 dsl_dataset_own(dsl_pool_t *dp, const char *name, 543 void *tag, dsl_dataset_t **dsp) 544 { 545 int err = dsl_dataset_hold(dp, name, tag, dsp); 546 if (err != 0) 547 return (err); 548 if (!dsl_dataset_tryown(*dsp, tag)) { 549 dsl_dataset_rele(*dsp, tag); 550 return (SET_ERROR(EBUSY)); 551 } 552 return (0); 553 } 554 555 /* 556 * See the comment above dsl_pool_hold() for details. In summary, a long 557 * hold is used to prevent destruction of a dataset while the pool hold 558 * is dropped, allowing other concurrent operations (e.g. spa_sync()). 559 * 560 * The dataset and pool must be held when this function is called. After it 561 * is called, the pool hold may be released while the dataset is still held 562 * and accessed. 563 */ 564 void 565 dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag) 566 { 567 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 568 (void) refcount_add(&ds->ds_longholds, tag); 569 } 570 571 void 572 dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag) 573 { 574 (void) refcount_remove(&ds->ds_longholds, tag); 575 } 576 577 /* Return B_TRUE if there are any long holds on this dataset. */ 578 boolean_t 579 dsl_dataset_long_held(dsl_dataset_t *ds) 580 { 581 return (!refcount_is_zero(&ds->ds_longholds)); 582 } 583 584 void 585 dsl_dataset_name(dsl_dataset_t *ds, char *name) 586 { 587 if (ds == NULL) { 588 (void) strcpy(name, "mos"); 589 } else { 590 dsl_dir_name(ds->ds_dir, name); 591 VERIFY0(dsl_dataset_get_snapname(ds)); 592 if (ds->ds_snapname[0]) { 593 (void) strcat(name, "@"); 594 /* 595 * We use a "recursive" mutex so that we 596 * can call dprintf_ds() with ds_lock held. 597 */ 598 if (!MUTEX_HELD(&ds->ds_lock)) { 599 mutex_enter(&ds->ds_lock); 600 (void) strcat(name, ds->ds_snapname); 601 mutex_exit(&ds->ds_lock); 602 } else { 603 (void) strcat(name, ds->ds_snapname); 604 } 605 } 606 } 607 } 608 609 void 610 dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 611 { 612 dmu_buf_rele(ds->ds_dbuf, tag); 613 } 614 615 void 616 dsl_dataset_disown(dsl_dataset_t *ds, void *tag) 617 { 618 ASSERT(ds->ds_owner == tag && ds->ds_dbuf != NULL); 619 620 mutex_enter(&ds->ds_lock); 621 ds->ds_owner = NULL; 622 mutex_exit(&ds->ds_lock); 623 dsl_dataset_long_rele(ds, tag); 624 if (ds->ds_dbuf != NULL) 625 dsl_dataset_rele(ds, tag); 626 else 627 dsl_dataset_evict(NULL, ds); 628 } 629 630 boolean_t 631 dsl_dataset_tryown(dsl_dataset_t *ds, void *tag) 632 { 633 boolean_t gotit = FALSE; 634 635 mutex_enter(&ds->ds_lock); 636 if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) { 637 ds->ds_owner = tag; 638 dsl_dataset_long_hold(ds, tag); 639 gotit = TRUE; 640 } 641 mutex_exit(&ds->ds_lock); 642 return (gotit); 643 } 644 645 uint64_t 646 dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 647 uint64_t flags, dmu_tx_t *tx) 648 { 649 dsl_pool_t *dp = dd->dd_pool; 650 dmu_buf_t *dbuf; 651 dsl_dataset_phys_t *dsphys; 652 uint64_t dsobj; 653 objset_t *mos = dp->dp_meta_objset; 654 655 if (origin == NULL) 656 origin = dp->dp_origin_snap; 657 658 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 659 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 660 ASSERT(dmu_tx_is_syncing(tx)); 661 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 662 663 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 664 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 665 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 666 dmu_buf_will_dirty(dbuf, tx); 667 dsphys = dbuf->db_data; 668 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 669 dsphys->ds_dir_obj = dd->dd_object; 670 dsphys->ds_flags = flags; 671 dsphys->ds_fsid_guid = unique_create(); 672 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 673 sizeof (dsphys->ds_guid)); 674 dsphys->ds_snapnames_zapobj = 675 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 676 DMU_OT_NONE, 0, tx); 677 dsphys->ds_creation_time = gethrestime_sec(); 678 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 679 680 if (origin == NULL) { 681 dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); 682 } else { 683 dsl_dataset_t *ohds; /* head of the origin snapshot */ 684 685 dsphys->ds_prev_snap_obj = origin->ds_object; 686 dsphys->ds_prev_snap_txg = 687 origin->ds_phys->ds_creation_txg; 688 dsphys->ds_referenced_bytes = 689 origin->ds_phys->ds_referenced_bytes; 690 dsphys->ds_compressed_bytes = 691 origin->ds_phys->ds_compressed_bytes; 692 dsphys->ds_uncompressed_bytes = 693 origin->ds_phys->ds_uncompressed_bytes; 694 dsphys->ds_bp = origin->ds_phys->ds_bp; 695 696 /* 697 * Inherit flags that describe the dataset's contents 698 * (INCONSISTENT) or properties (Case Insensitive). 699 */ 700 dsphys->ds_flags |= origin->ds_phys->ds_flags & 701 (DS_FLAG_INCONSISTENT | DS_FLAG_CI_DATASET); 702 703 dmu_buf_will_dirty(origin->ds_dbuf, tx); 704 origin->ds_phys->ds_num_children++; 705 706 VERIFY0(dsl_dataset_hold_obj(dp, 707 origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds)); 708 dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, 709 dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); 710 dsl_dataset_rele(ohds, FTAG); 711 712 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 713 if (origin->ds_phys->ds_next_clones_obj == 0) { 714 origin->ds_phys->ds_next_clones_obj = 715 zap_create(mos, 716 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 717 } 718 VERIFY0(zap_add_int(mos, 719 origin->ds_phys->ds_next_clones_obj, dsobj, tx)); 720 } 721 722 dmu_buf_will_dirty(dd->dd_dbuf, tx); 723 dd->dd_phys->dd_origin_obj = origin->ds_object; 724 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 725 if (origin->ds_dir->dd_phys->dd_clones == 0) { 726 dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); 727 origin->ds_dir->dd_phys->dd_clones = 728 zap_create(mos, 729 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 730 } 731 VERIFY0(zap_add_int(mos, 732 origin->ds_dir->dd_phys->dd_clones, dsobj, tx)); 733 } 734 } 735 736 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 737 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 738 739 dmu_buf_rele(dbuf, FTAG); 740 741 dmu_buf_will_dirty(dd->dd_dbuf, tx); 742 dd->dd_phys->dd_head_dataset_obj = dsobj; 743 744 return (dsobj); 745 } 746 747 static void 748 dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) 749 { 750 objset_t *os; 751 752 VERIFY0(dmu_objset_from_ds(ds, &os)); 753 bzero(&os->os_zil_header, sizeof (os->os_zil_header)); 754 dsl_dataset_dirty(ds, tx); 755 } 756 757 uint64_t 758 dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 759 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 760 { 761 dsl_pool_t *dp = pdd->dd_pool; 762 uint64_t dsobj, ddobj; 763 dsl_dir_t *dd; 764 765 ASSERT(dmu_tx_is_syncing(tx)); 766 ASSERT(lastname[0] != '@'); 767 768 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 769 VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); 770 771 dsobj = dsl_dataset_create_sync_dd(dd, origin, 772 flags & ~DS_CREATE_FLAG_NODIRTY, tx); 773 774 dsl_deleg_set_create_perms(dd, tx, cr); 775 776 /* 777 * Since we're creating a new node we know it's a leaf, so we can 778 * initialize the counts if the limit feature is active. 779 */ 780 if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) { 781 uint64_t cnt = 0; 782 objset_t *os = dd->dd_pool->dp_meta_objset; 783 784 dsl_dir_zapify(dd, tx); 785 VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, 786 sizeof (cnt), 1, &cnt, tx)); 787 VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, 788 sizeof (cnt), 1, &cnt, tx)); 789 } 790 791 dsl_dir_rele(dd, FTAG); 792 793 /* 794 * If we are creating a clone, make sure we zero out any stale 795 * data from the origin snapshots zil header. 796 */ 797 if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) { 798 dsl_dataset_t *ds; 799 800 VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 801 dsl_dataset_zero_zil(ds, tx); 802 dsl_dataset_rele(ds, FTAG); 803 } 804 805 return (dsobj); 806 } 807 808 /* 809 * The unique space in the head dataset can be calculated by subtracting 810 * the space used in the most recent snapshot, that is still being used 811 * in this file system, from the space currently in use. To figure out 812 * the space in the most recent snapshot still in use, we need to take 813 * the total space used in the snapshot and subtract out the space that 814 * has been freed up since the snapshot was taken. 815 */ 816 void 817 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 818 { 819 uint64_t mrs_used; 820 uint64_t dlused, dlcomp, dluncomp; 821 822 ASSERT(!dsl_dataset_is_snapshot(ds)); 823 824 if (ds->ds_phys->ds_prev_snap_obj != 0) 825 mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes; 826 else 827 mrs_used = 0; 828 829 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); 830 831 ASSERT3U(dlused, <=, mrs_used); 832 ds->ds_phys->ds_unique_bytes = 833 ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused); 834 835 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 836 SPA_VERSION_UNIQUE_ACCURATE) 837 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 838 } 839 840 void 841 dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, 842 dmu_tx_t *tx) 843 { 844 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 845 uint64_t count; 846 int err; 847 848 ASSERT(ds->ds_phys->ds_num_children >= 2); 849 err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx); 850 /* 851 * The err should not be ENOENT, but a bug in a previous version 852 * of the code could cause upgrade_clones_cb() to not set 853 * ds_next_snap_obj when it should, leading to a missing entry. 854 * If we knew that the pool was created after 855 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't 856 * ENOENT. However, at least we can check that we don't have 857 * too many entries in the next_clones_obj even after failing to 858 * remove this one. 859 */ 860 if (err != ENOENT) 861 VERIFY0(err); 862 ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, 863 &count)); 864 ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2); 865 } 866 867 868 blkptr_t * 869 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 870 { 871 return (&ds->ds_phys->ds_bp); 872 } 873 874 void 875 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 876 { 877 ASSERT(dmu_tx_is_syncing(tx)); 878 /* If it's the meta-objset, set dp_meta_rootbp */ 879 if (ds == NULL) { 880 tx->tx_pool->dp_meta_rootbp = *bp; 881 } else { 882 dmu_buf_will_dirty(ds->ds_dbuf, tx); 883 ds->ds_phys->ds_bp = *bp; 884 } 885 } 886 887 spa_t * 888 dsl_dataset_get_spa(dsl_dataset_t *ds) 889 { 890 return (ds->ds_dir->dd_pool->dp_spa); 891 } 892 893 void 894 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 895 { 896 dsl_pool_t *dp; 897 898 if (ds == NULL) /* this is the meta-objset */ 899 return; 900 901 ASSERT(ds->ds_objset != NULL); 902 903 if (ds->ds_phys->ds_next_snap_obj != 0) 904 panic("dirtying snapshot!"); 905 906 dp = ds->ds_dir->dd_pool; 907 908 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { 909 /* up the hold count until we can be written out */ 910 dmu_buf_add_ref(ds->ds_dbuf, ds); 911 } 912 } 913 914 boolean_t 915 dsl_dataset_is_dirty(dsl_dataset_t *ds) 916 { 917 for (int t = 0; t < TXG_SIZE; t++) { 918 if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, 919 ds, t)) 920 return (B_TRUE); 921 } 922 return (B_FALSE); 923 } 924 925 static int 926 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 927 { 928 uint64_t asize; 929 930 if (!dmu_tx_is_syncing(tx)) 931 return (0); 932 933 /* 934 * If there's an fs-only reservation, any blocks that might become 935 * owned by the snapshot dataset must be accommodated by space 936 * outside of the reservation. 937 */ 938 ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); 939 asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 940 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 941 return (SET_ERROR(ENOSPC)); 942 943 /* 944 * Propagate any reserved space for this snapshot to other 945 * snapshot checks in this sync group. 946 */ 947 if (asize > 0) 948 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 949 950 return (0); 951 } 952 953 typedef struct dsl_dataset_snapshot_arg { 954 nvlist_t *ddsa_snaps; 955 nvlist_t *ddsa_props; 956 nvlist_t *ddsa_errors; 957 cred_t *ddsa_cr; 958 } dsl_dataset_snapshot_arg_t; 959 960 int 961 dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname, 962 dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr) 963 { 964 int error; 965 uint64_t value; 966 967 ds->ds_trysnap_txg = tx->tx_txg; 968 969 if (!dmu_tx_is_syncing(tx)) 970 return (0); 971 972 /* 973 * We don't allow multiple snapshots of the same txg. If there 974 * is already one, try again. 975 */ 976 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 977 return (SET_ERROR(EAGAIN)); 978 979 /* 980 * Check for conflicting snapshot name. 981 */ 982 error = dsl_dataset_snap_lookup(ds, snapname, &value); 983 if (error == 0) 984 return (SET_ERROR(EEXIST)); 985 if (error != ENOENT) 986 return (error); 987 988 /* 989 * We don't allow taking snapshots of inconsistent datasets, such as 990 * those into which we are currently receiving. However, if we are 991 * creating this snapshot as part of a receive, this check will be 992 * executed atomically with respect to the completion of the receive 993 * itself but prior to the clearing of DS_FLAG_INCONSISTENT; in this 994 * case we ignore this, knowing it will be fixed up for us shortly in 995 * dmu_recv_end_sync(). 996 */ 997 if (!recv && DS_IS_INCONSISTENT(ds)) 998 return (SET_ERROR(EBUSY)); 999 1000 /* 1001 * Skip the check for temporary snapshots or if we have already checked 1002 * the counts in dsl_dataset_snapshot_check. This means we really only 1003 * check the count here when we're receiving a stream. 1004 */ 1005 if (cnt != 0 && cr != NULL) { 1006 error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, 1007 ZFS_PROP_SNAPSHOT_LIMIT, NULL, cr); 1008 if (error != 0) 1009 return (error); 1010 } 1011 1012 error = dsl_dataset_snapshot_reserve_space(ds, tx); 1013 if (error != 0) 1014 return (error); 1015 1016 return (0); 1017 } 1018 1019 static int 1020 dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx) 1021 { 1022 dsl_dataset_snapshot_arg_t *ddsa = arg; 1023 dsl_pool_t *dp = dmu_tx_pool(tx); 1024 nvpair_t *pair; 1025 int rv = 0; 1026 1027 /* 1028 * Pre-compute how many total new snapshots will be created for each 1029 * level in the tree and below. This is needed for validating the 1030 * snapshot limit when either taking a recursive snapshot or when 1031 * taking multiple snapshots. 1032 * 1033 * The problem is that the counts are not actually adjusted when 1034 * we are checking, only when we finally sync. For a single snapshot, 1035 * this is easy, the count will increase by 1 at each node up the tree, 1036 * but its more complicated for the recursive/multiple snapshot case. 1037 * 1038 * The dsl_fs_ss_limit_check function does recursively check the count 1039 * at each level up the tree but since it is validating each snapshot 1040 * independently we need to be sure that we are validating the complete 1041 * count for the entire set of snapshots. We do this by rolling up the 1042 * counts for each component of the name into an nvlist and then 1043 * checking each of those cases with the aggregated count. 1044 * 1045 * This approach properly handles not only the recursive snapshot 1046 * case (where we get all of those on the ddsa_snaps list) but also 1047 * the sibling case (e.g. snapshot a/b and a/c so that we will also 1048 * validate the limit on 'a' using a count of 2). 1049 * 1050 * We validate the snapshot names in the third loop and only report 1051 * name errors once. 1052 */ 1053 if (dmu_tx_is_syncing(tx)) { 1054 nvlist_t *cnt_track = NULL; 1055 cnt_track = fnvlist_alloc(); 1056 1057 /* Rollup aggregated counts into the cnt_track list */ 1058 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1059 pair != NULL; 1060 pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1061 char *pdelim; 1062 uint64_t val; 1063 char nm[MAXPATHLEN]; 1064 1065 (void) strlcpy(nm, nvpair_name(pair), sizeof (nm)); 1066 pdelim = strchr(nm, '@'); 1067 if (pdelim == NULL) 1068 continue; 1069 *pdelim = '\0'; 1070 1071 do { 1072 if (nvlist_lookup_uint64(cnt_track, nm, 1073 &val) == 0) { 1074 /* update existing entry */ 1075 fnvlist_add_uint64(cnt_track, nm, 1076 val + 1); 1077 } else { 1078 /* add to list */ 1079 fnvlist_add_uint64(cnt_track, nm, 1); 1080 } 1081 1082 pdelim = strrchr(nm, '/'); 1083 if (pdelim != NULL) 1084 *pdelim = '\0'; 1085 } while (pdelim != NULL); 1086 } 1087 1088 /* Check aggregated counts at each level */ 1089 for (pair = nvlist_next_nvpair(cnt_track, NULL); 1090 pair != NULL; pair = nvlist_next_nvpair(cnt_track, pair)) { 1091 int error = 0; 1092 char *name; 1093 uint64_t cnt = 0; 1094 dsl_dataset_t *ds; 1095 1096 name = nvpair_name(pair); 1097 cnt = fnvpair_value_uint64(pair); 1098 ASSERT(cnt > 0); 1099 1100 error = dsl_dataset_hold(dp, name, FTAG, &ds); 1101 if (error == 0) { 1102 error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, 1103 ZFS_PROP_SNAPSHOT_LIMIT, NULL, 1104 ddsa->ddsa_cr); 1105 dsl_dataset_rele(ds, FTAG); 1106 } 1107 1108 if (error != 0) { 1109 if (ddsa->ddsa_errors != NULL) 1110 fnvlist_add_int32(ddsa->ddsa_errors, 1111 name, error); 1112 rv = error; 1113 /* only report one error for this check */ 1114 break; 1115 } 1116 } 1117 nvlist_free(cnt_track); 1118 } 1119 1120 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1121 pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1122 int error = 0; 1123 dsl_dataset_t *ds; 1124 char *name, *atp; 1125 char dsname[MAXNAMELEN]; 1126 1127 name = nvpair_name(pair); 1128 if (strlen(name) >= MAXNAMELEN) 1129 error = SET_ERROR(ENAMETOOLONG); 1130 if (error == 0) { 1131 atp = strchr(name, '@'); 1132 if (atp == NULL) 1133 error = SET_ERROR(EINVAL); 1134 if (error == 0) 1135 (void) strlcpy(dsname, name, atp - name + 1); 1136 } 1137 if (error == 0) 1138 error = dsl_dataset_hold(dp, dsname, FTAG, &ds); 1139 if (error == 0) { 1140 /* passing 0/NULL skips dsl_fs_ss_limit_check */ 1141 error = dsl_dataset_snapshot_check_impl(ds, 1142 atp + 1, tx, B_FALSE, 0, NULL); 1143 dsl_dataset_rele(ds, FTAG); 1144 } 1145 1146 if (error != 0) { 1147 if (ddsa->ddsa_errors != NULL) { 1148 fnvlist_add_int32(ddsa->ddsa_errors, 1149 name, error); 1150 } 1151 rv = error; 1152 } 1153 } 1154 1155 return (rv); 1156 } 1157 1158 void 1159 dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, 1160 dmu_tx_t *tx) 1161 { 1162 static zil_header_t zero_zil; 1163 1164 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1165 dmu_buf_t *dbuf; 1166 dsl_dataset_phys_t *dsphys; 1167 uint64_t dsobj, crtxg; 1168 objset_t *mos = dp->dp_meta_objset; 1169 objset_t *os; 1170 1171 ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); 1172 1173 /* 1174 * If we are on an old pool, the zil must not be active, in which 1175 * case it will be zeroed. Usually zil_suspend() accomplishes this. 1176 */ 1177 ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP || 1178 dmu_objset_from_ds(ds, &os) != 0 || 1179 bcmp(&os->os_phys->os_zil_header, &zero_zil, 1180 sizeof (zero_zil)) == 0); 1181 1182 dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx); 1183 1184 /* 1185 * The origin's ds_creation_txg has to be < TXG_INITIAL 1186 */ 1187 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1188 crtxg = 1; 1189 else 1190 crtxg = tx->tx_txg; 1191 1192 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1193 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1194 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1195 dmu_buf_will_dirty(dbuf, tx); 1196 dsphys = dbuf->db_data; 1197 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 1198 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1199 dsphys->ds_fsid_guid = unique_create(); 1200 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1201 sizeof (dsphys->ds_guid)); 1202 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1203 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1204 dsphys->ds_next_snap_obj = ds->ds_object; 1205 dsphys->ds_num_children = 1; 1206 dsphys->ds_creation_time = gethrestime_sec(); 1207 dsphys->ds_creation_txg = crtxg; 1208 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1209 dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes; 1210 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1211 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1212 dsphys->ds_flags = ds->ds_phys->ds_flags; 1213 dsphys->ds_bp = ds->ds_phys->ds_bp; 1214 dmu_buf_rele(dbuf, FTAG); 1215 1216 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1217 if (ds->ds_prev) { 1218 uint64_t next_clones_obj = 1219 ds->ds_prev->ds_phys->ds_next_clones_obj; 1220 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1221 ds->ds_object || 1222 ds->ds_prev->ds_phys->ds_num_children > 1); 1223 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1224 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1225 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1226 ds->ds_prev->ds_phys->ds_creation_txg); 1227 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1228 } else if (next_clones_obj != 0) { 1229 dsl_dataset_remove_from_next_clones(ds->ds_prev, 1230 dsphys->ds_next_snap_obj, tx); 1231 VERIFY0(zap_add_int(mos, 1232 next_clones_obj, dsobj, tx)); 1233 } 1234 } 1235 1236 /* 1237 * If we have a reference-reservation on this dataset, we will 1238 * need to increase the amount of refreservation being charged 1239 * since our unique space is going to zero. 1240 */ 1241 if (ds->ds_reserved) { 1242 int64_t delta; 1243 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 1244 delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 1245 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1246 delta, 0, 0, tx); 1247 } 1248 1249 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1250 ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist, 1251 UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx); 1252 dsl_deadlist_close(&ds->ds_deadlist); 1253 dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); 1254 dsl_deadlist_add_key(&ds->ds_deadlist, 1255 ds->ds_phys->ds_prev_snap_txg, tx); 1256 1257 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 1258 ds->ds_phys->ds_prev_snap_obj = dsobj; 1259 ds->ds_phys->ds_prev_snap_txg = crtxg; 1260 ds->ds_phys->ds_unique_bytes = 0; 1261 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1262 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1263 1264 VERIFY0(zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1265 snapname, 8, 1, &dsobj, tx)); 1266 1267 if (ds->ds_prev) 1268 dsl_dataset_rele(ds->ds_prev, ds); 1269 VERIFY0(dsl_dataset_hold_obj(dp, 1270 ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 1271 1272 dsl_scan_ds_snapshotted(ds, tx); 1273 1274 dsl_dir_snap_cmtime_update(ds->ds_dir); 1275 1276 spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, ""); 1277 } 1278 1279 static void 1280 dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx) 1281 { 1282 dsl_dataset_snapshot_arg_t *ddsa = arg; 1283 dsl_pool_t *dp = dmu_tx_pool(tx); 1284 nvpair_t *pair; 1285 1286 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1287 pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1288 dsl_dataset_t *ds; 1289 char *name, *atp; 1290 char dsname[MAXNAMELEN]; 1291 1292 name = nvpair_name(pair); 1293 atp = strchr(name, '@'); 1294 (void) strlcpy(dsname, name, atp - name + 1); 1295 VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds)); 1296 1297 dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx); 1298 if (ddsa->ddsa_props != NULL) { 1299 dsl_props_set_sync_impl(ds->ds_prev, 1300 ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx); 1301 } 1302 dsl_dataset_rele(ds, FTAG); 1303 } 1304 } 1305 1306 /* 1307 * The snapshots must all be in the same pool. 1308 * All-or-nothing: if there are any failures, nothing will be modified. 1309 */ 1310 int 1311 dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) 1312 { 1313 dsl_dataset_snapshot_arg_t ddsa; 1314 nvpair_t *pair; 1315 boolean_t needsuspend; 1316 int error; 1317 spa_t *spa; 1318 char *firstname; 1319 nvlist_t *suspended = NULL; 1320 1321 pair = nvlist_next_nvpair(snaps, NULL); 1322 if (pair == NULL) 1323 return (0); 1324 firstname = nvpair_name(pair); 1325 1326 error = spa_open(firstname, &spa, FTAG); 1327 if (error != 0) 1328 return (error); 1329 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1330 spa_close(spa, FTAG); 1331 1332 if (needsuspend) { 1333 suspended = fnvlist_alloc(); 1334 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; 1335 pair = nvlist_next_nvpair(snaps, pair)) { 1336 char fsname[MAXNAMELEN]; 1337 char *snapname = nvpair_name(pair); 1338 char *atp; 1339 void *cookie; 1340 1341 atp = strchr(snapname, '@'); 1342 if (atp == NULL) { 1343 error = SET_ERROR(EINVAL); 1344 break; 1345 } 1346 (void) strlcpy(fsname, snapname, atp - snapname + 1); 1347 1348 error = zil_suspend(fsname, &cookie); 1349 if (error != 0) 1350 break; 1351 fnvlist_add_uint64(suspended, fsname, 1352 (uintptr_t)cookie); 1353 } 1354 } 1355 1356 ddsa.ddsa_snaps = snaps; 1357 ddsa.ddsa_props = props; 1358 ddsa.ddsa_errors = errors; 1359 ddsa.ddsa_cr = CRED(); 1360 1361 if (error == 0) { 1362 error = dsl_sync_task(firstname, dsl_dataset_snapshot_check, 1363 dsl_dataset_snapshot_sync, &ddsa, 1364 fnvlist_num_pairs(snaps) * 3, ZFS_SPACE_CHECK_NORMAL); 1365 } 1366 1367 if (suspended != NULL) { 1368 for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL; 1369 pair = nvlist_next_nvpair(suspended, pair)) { 1370 zil_resume((void *)(uintptr_t) 1371 fnvpair_value_uint64(pair)); 1372 } 1373 fnvlist_free(suspended); 1374 } 1375 1376 return (error); 1377 } 1378 1379 typedef struct dsl_dataset_snapshot_tmp_arg { 1380 const char *ddsta_fsname; 1381 const char *ddsta_snapname; 1382 minor_t ddsta_cleanup_minor; 1383 const char *ddsta_htag; 1384 } dsl_dataset_snapshot_tmp_arg_t; 1385 1386 static int 1387 dsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx) 1388 { 1389 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1390 dsl_pool_t *dp = dmu_tx_pool(tx); 1391 dsl_dataset_t *ds; 1392 int error; 1393 1394 error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds); 1395 if (error != 0) 1396 return (error); 1397 1398 /* NULL cred means no limit check for tmp snapshot */ 1399 error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname, 1400 tx, B_FALSE, 0, NULL); 1401 if (error != 0) { 1402 dsl_dataset_rele(ds, FTAG); 1403 return (error); 1404 } 1405 1406 if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) { 1407 dsl_dataset_rele(ds, FTAG); 1408 return (SET_ERROR(ENOTSUP)); 1409 } 1410 error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag, 1411 B_TRUE, tx); 1412 if (error != 0) { 1413 dsl_dataset_rele(ds, FTAG); 1414 return (error); 1415 } 1416 1417 dsl_dataset_rele(ds, FTAG); 1418 return (0); 1419 } 1420 1421 static void 1422 dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx) 1423 { 1424 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1425 dsl_pool_t *dp = dmu_tx_pool(tx); 1426 dsl_dataset_t *ds; 1427 1428 VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds)); 1429 1430 dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx); 1431 dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag, 1432 ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx); 1433 dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx); 1434 1435 dsl_dataset_rele(ds, FTAG); 1436 } 1437 1438 int 1439 dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, 1440 minor_t cleanup_minor, const char *htag) 1441 { 1442 dsl_dataset_snapshot_tmp_arg_t ddsta; 1443 int error; 1444 spa_t *spa; 1445 boolean_t needsuspend; 1446 void *cookie; 1447 1448 ddsta.ddsta_fsname = fsname; 1449 ddsta.ddsta_snapname = snapname; 1450 ddsta.ddsta_cleanup_minor = cleanup_minor; 1451 ddsta.ddsta_htag = htag; 1452 1453 error = spa_open(fsname, &spa, FTAG); 1454 if (error != 0) 1455 return (error); 1456 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1457 spa_close(spa, FTAG); 1458 1459 if (needsuspend) { 1460 error = zil_suspend(fsname, &cookie); 1461 if (error != 0) 1462 return (error); 1463 } 1464 1465 error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check, 1466 dsl_dataset_snapshot_tmp_sync, &ddsta, 3, ZFS_SPACE_CHECK_RESERVED); 1467 1468 if (needsuspend) 1469 zil_resume(cookie); 1470 return (error); 1471 } 1472 1473 1474 void 1475 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1476 { 1477 ASSERT(dmu_tx_is_syncing(tx)); 1478 ASSERT(ds->ds_objset != NULL); 1479 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1480 1481 /* 1482 * in case we had to change ds_fsid_guid when we opened it, 1483 * sync it out now. 1484 */ 1485 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1486 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1487 1488 dmu_objset_sync(ds->ds_objset, zio, tx); 1489 } 1490 1491 static void 1492 get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) 1493 { 1494 uint64_t count = 0; 1495 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1496 zap_cursor_t zc; 1497 zap_attribute_t za; 1498 nvlist_t *propval = fnvlist_alloc(); 1499 nvlist_t *val = fnvlist_alloc(); 1500 1501 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 1502 1503 /* 1504 * There may be missing entries in ds_next_clones_obj 1505 * due to a bug in a previous version of the code. 1506 * Only trust it if it has the right number of entries. 1507 */ 1508 if (ds->ds_phys->ds_next_clones_obj != 0) { 1509 VERIFY0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, 1510 &count)); 1511 } 1512 if (count != ds->ds_phys->ds_num_children - 1) 1513 goto fail; 1514 for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj); 1515 zap_cursor_retrieve(&zc, &za) == 0; 1516 zap_cursor_advance(&zc)) { 1517 dsl_dataset_t *clone; 1518 char buf[ZFS_MAXNAMELEN]; 1519 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 1520 za.za_first_integer, FTAG, &clone)); 1521 dsl_dir_name(clone->ds_dir, buf); 1522 fnvlist_add_boolean(val, buf); 1523 dsl_dataset_rele(clone, FTAG); 1524 } 1525 zap_cursor_fini(&zc); 1526 fnvlist_add_nvlist(propval, ZPROP_VALUE, val); 1527 fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval); 1528 fail: 1529 nvlist_free(val); 1530 nvlist_free(propval); 1531 } 1532 1533 void 1534 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1535 { 1536 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1537 uint64_t refd, avail, uobjs, aobjs, ratio; 1538 1539 ASSERT(dsl_pool_config_held(dp)); 1540 1541 ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1542 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1543 ds->ds_phys->ds_compressed_bytes); 1544 1545 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); 1546 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, 1547 ds->ds_phys->ds_uncompressed_bytes); 1548 1549 if (dsl_dataset_is_snapshot(ds)) { 1550 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); 1551 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1552 ds->ds_phys->ds_unique_bytes); 1553 get_clones_stat(ds, nv); 1554 } else { 1555 if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) { 1556 char buf[MAXNAMELEN]; 1557 dsl_dataset_name(ds->ds_prev, buf); 1558 dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf); 1559 } 1560 1561 dsl_dir_stats(ds->ds_dir, nv); 1562 } 1563 1564 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1565 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1566 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1567 1568 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1569 ds->ds_phys->ds_creation_time); 1570 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1571 ds->ds_phys->ds_creation_txg); 1572 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1573 ds->ds_quota); 1574 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1575 ds->ds_reserved); 1576 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 1577 ds->ds_phys->ds_guid); 1578 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, 1579 ds->ds_phys->ds_unique_bytes); 1580 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, 1581 ds->ds_object); 1582 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, 1583 ds->ds_userrefs); 1584 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 1585 DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 1586 1587 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1588 uint64_t written, comp, uncomp; 1589 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1590 dsl_dataset_t *prev; 1591 1592 int err = dsl_dataset_hold_obj(dp, 1593 ds->ds_phys->ds_prev_snap_obj, FTAG, &prev); 1594 if (err == 0) { 1595 err = dsl_dataset_space_written(prev, ds, &written, 1596 &comp, &uncomp); 1597 dsl_dataset_rele(prev, FTAG); 1598 if (err == 0) { 1599 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, 1600 written); 1601 } 1602 } 1603 } 1604 } 1605 1606 void 1607 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1608 { 1609 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1610 ASSERT(dsl_pool_config_held(dp)); 1611 1612 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1613 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1614 stat->dds_guid = ds->ds_phys->ds_guid; 1615 stat->dds_origin[0] = '\0'; 1616 if (dsl_dataset_is_snapshot(ds)) { 1617 stat->dds_is_snapshot = B_TRUE; 1618 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1619 } else { 1620 stat->dds_is_snapshot = B_FALSE; 1621 stat->dds_num_clones = 0; 1622 1623 if (dsl_dir_is_clone(ds->ds_dir)) { 1624 dsl_dataset_t *ods; 1625 1626 VERIFY0(dsl_dataset_hold_obj(dp, 1627 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 1628 dsl_dataset_name(ods, stat->dds_origin); 1629 dsl_dataset_rele(ods, FTAG); 1630 } 1631 } 1632 } 1633 1634 uint64_t 1635 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1636 { 1637 return (ds->ds_fsid_guid); 1638 } 1639 1640 void 1641 dsl_dataset_space(dsl_dataset_t *ds, 1642 uint64_t *refdbytesp, uint64_t *availbytesp, 1643 uint64_t *usedobjsp, uint64_t *availobjsp) 1644 { 1645 *refdbytesp = ds->ds_phys->ds_referenced_bytes; 1646 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1647 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 1648 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 1649 if (ds->ds_quota != 0) { 1650 /* 1651 * Adjust available bytes according to refquota 1652 */ 1653 if (*refdbytesp < ds->ds_quota) 1654 *availbytesp = MIN(*availbytesp, 1655 ds->ds_quota - *refdbytesp); 1656 else 1657 *availbytesp = 0; 1658 } 1659 *usedobjsp = BP_GET_FILL(&ds->ds_phys->ds_bp); 1660 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1661 } 1662 1663 boolean_t 1664 dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap) 1665 { 1666 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1667 1668 ASSERT(dsl_pool_config_held(dp)); 1669 if (snap == NULL) 1670 return (B_FALSE); 1671 if (ds->ds_phys->ds_bp.blk_birth > 1672 snap->ds_phys->ds_creation_txg) { 1673 objset_t *os, *os_snap; 1674 /* 1675 * It may be that only the ZIL differs, because it was 1676 * reset in the head. Don't count that as being 1677 * modified. 1678 */ 1679 if (dmu_objset_from_ds(ds, &os) != 0) 1680 return (B_TRUE); 1681 if (dmu_objset_from_ds(snap, &os_snap) != 0) 1682 return (B_TRUE); 1683 return (bcmp(&os->os_phys->os_meta_dnode, 1684 &os_snap->os_phys->os_meta_dnode, 1685 sizeof (os->os_phys->os_meta_dnode)) != 0); 1686 } 1687 return (B_FALSE); 1688 } 1689 1690 typedef struct dsl_dataset_rename_snapshot_arg { 1691 const char *ddrsa_fsname; 1692 const char *ddrsa_oldsnapname; 1693 const char *ddrsa_newsnapname; 1694 boolean_t ddrsa_recursive; 1695 dmu_tx_t *ddrsa_tx; 1696 } dsl_dataset_rename_snapshot_arg_t; 1697 1698 /* ARGSUSED */ 1699 static int 1700 dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp, 1701 dsl_dataset_t *hds, void *arg) 1702 { 1703 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1704 int error; 1705 uint64_t val; 1706 1707 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1708 if (error != 0) { 1709 /* ignore nonexistent snapshots */ 1710 return (error == ENOENT ? 0 : error); 1711 } 1712 1713 /* new name should not exist */ 1714 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val); 1715 if (error == 0) 1716 error = SET_ERROR(EEXIST); 1717 else if (error == ENOENT) 1718 error = 0; 1719 1720 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 1721 if (dsl_dir_namelen(hds->ds_dir) + 1 + 1722 strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN) 1723 error = SET_ERROR(ENAMETOOLONG); 1724 1725 return (error); 1726 } 1727 1728 static int 1729 dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx) 1730 { 1731 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1732 dsl_pool_t *dp = dmu_tx_pool(tx); 1733 dsl_dataset_t *hds; 1734 int error; 1735 1736 error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds); 1737 if (error != 0) 1738 return (error); 1739 1740 if (ddrsa->ddrsa_recursive) { 1741 error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1742 dsl_dataset_rename_snapshot_check_impl, ddrsa, 1743 DS_FIND_CHILDREN); 1744 } else { 1745 error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa); 1746 } 1747 dsl_dataset_rele(hds, FTAG); 1748 return (error); 1749 } 1750 1751 static int 1752 dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, 1753 dsl_dataset_t *hds, void *arg) 1754 { 1755 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1756 dsl_dataset_t *ds; 1757 uint64_t val; 1758 dmu_tx_t *tx = ddrsa->ddrsa_tx; 1759 int error; 1760 1761 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1762 ASSERT(error == 0 || error == ENOENT); 1763 if (error == ENOENT) { 1764 /* ignore nonexistent snapshots */ 1765 return (0); 1766 } 1767 1768 VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds)); 1769 1770 /* log before we change the name */ 1771 spa_history_log_internal_ds(ds, "rename", tx, 1772 "-> @%s", ddrsa->ddrsa_newsnapname); 1773 1774 VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx, 1775 B_FALSE)); 1776 mutex_enter(&ds->ds_lock); 1777 (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname); 1778 mutex_exit(&ds->ds_lock); 1779 VERIFY0(zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, 1780 ds->ds_snapname, 8, 1, &ds->ds_object, tx)); 1781 1782 dsl_dataset_rele(ds, FTAG); 1783 return (0); 1784 } 1785 1786 static void 1787 dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx) 1788 { 1789 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1790 dsl_pool_t *dp = dmu_tx_pool(tx); 1791 dsl_dataset_t *hds; 1792 1793 VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds)); 1794 ddrsa->ddrsa_tx = tx; 1795 if (ddrsa->ddrsa_recursive) { 1796 VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1797 dsl_dataset_rename_snapshot_sync_impl, ddrsa, 1798 DS_FIND_CHILDREN)); 1799 } else { 1800 VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa)); 1801 } 1802 dsl_dataset_rele(hds, FTAG); 1803 } 1804 1805 int 1806 dsl_dataset_rename_snapshot(const char *fsname, 1807 const char *oldsnapname, const char *newsnapname, boolean_t recursive) 1808 { 1809 dsl_dataset_rename_snapshot_arg_t ddrsa; 1810 1811 ddrsa.ddrsa_fsname = fsname; 1812 ddrsa.ddrsa_oldsnapname = oldsnapname; 1813 ddrsa.ddrsa_newsnapname = newsnapname; 1814 ddrsa.ddrsa_recursive = recursive; 1815 1816 return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check, 1817 dsl_dataset_rename_snapshot_sync, &ddrsa, 1818 1, ZFS_SPACE_CHECK_RESERVED)); 1819 } 1820 1821 /* 1822 * If we're doing an ownership handoff, we need to make sure that there is 1823 * only one long hold on the dataset. We're not allowed to change anything here 1824 * so we don't permanently release the long hold or regular hold here. We want 1825 * to do this only when syncing to avoid the dataset unexpectedly going away 1826 * when we release the long hold. 1827 */ 1828 static int 1829 dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) 1830 { 1831 boolean_t held; 1832 1833 if (!dmu_tx_is_syncing(tx)) 1834 return (0); 1835 1836 if (owner != NULL) { 1837 VERIFY3P(ds->ds_owner, ==, owner); 1838 dsl_dataset_long_rele(ds, owner); 1839 } 1840 1841 held = dsl_dataset_long_held(ds); 1842 1843 if (owner != NULL) 1844 dsl_dataset_long_hold(ds, owner); 1845 1846 if (held) 1847 return (SET_ERROR(EBUSY)); 1848 1849 return (0); 1850 } 1851 1852 typedef struct dsl_dataset_rollback_arg { 1853 const char *ddra_fsname; 1854 void *ddra_owner; 1855 nvlist_t *ddra_result; 1856 } dsl_dataset_rollback_arg_t; 1857 1858 static int 1859 dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) 1860 { 1861 dsl_dataset_rollback_arg_t *ddra = arg; 1862 dsl_pool_t *dp = dmu_tx_pool(tx); 1863 dsl_dataset_t *ds; 1864 int64_t unused_refres_delta; 1865 int error; 1866 1867 error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds); 1868 if (error != 0) 1869 return (error); 1870 1871 /* must not be a snapshot */ 1872 if (dsl_dataset_is_snapshot(ds)) { 1873 dsl_dataset_rele(ds, FTAG); 1874 return (SET_ERROR(EINVAL)); 1875 } 1876 1877 /* must have a most recent snapshot */ 1878 if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) { 1879 dsl_dataset_rele(ds, FTAG); 1880 return (SET_ERROR(EINVAL)); 1881 } 1882 1883 /* must not have any bookmarks after the most recent snapshot */ 1884 nvlist_t *proprequest = fnvlist_alloc(); 1885 fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG)); 1886 nvlist_t *bookmarks = fnvlist_alloc(); 1887 error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks); 1888 fnvlist_free(proprequest); 1889 if (error != 0) 1890 return (error); 1891 for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL); 1892 pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) { 1893 nvlist_t *valuenv = 1894 fnvlist_lookup_nvlist(fnvpair_value_nvlist(pair), 1895 zfs_prop_to_name(ZFS_PROP_CREATETXG)); 1896 uint64_t createtxg = fnvlist_lookup_uint64(valuenv, "value"); 1897 if (createtxg > ds->ds_phys->ds_prev_snap_txg) { 1898 fnvlist_free(bookmarks); 1899 dsl_dataset_rele(ds, FTAG); 1900 return (SET_ERROR(EEXIST)); 1901 } 1902 } 1903 fnvlist_free(bookmarks); 1904 1905 error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx); 1906 if (error != 0) { 1907 dsl_dataset_rele(ds, FTAG); 1908 return (error); 1909 } 1910 1911 /* 1912 * Check if the snap we are rolling back to uses more than 1913 * the refquota. 1914 */ 1915 if (ds->ds_quota != 0 && 1916 ds->ds_prev->ds_phys->ds_referenced_bytes > ds->ds_quota) { 1917 dsl_dataset_rele(ds, FTAG); 1918 return (SET_ERROR(EDQUOT)); 1919 } 1920 1921 /* 1922 * When we do the clone swap, we will temporarily use more space 1923 * due to the refreservation (the head will no longer have any 1924 * unique space, so the entire amount of the refreservation will need 1925 * to be free). We will immediately destroy the clone, freeing 1926 * this space, but the freeing happens over many txg's. 1927 */ 1928 unused_refres_delta = (int64_t)MIN(ds->ds_reserved, 1929 ds->ds_phys->ds_unique_bytes); 1930 1931 if (unused_refres_delta > 0 && 1932 unused_refres_delta > 1933 dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) { 1934 dsl_dataset_rele(ds, FTAG); 1935 return (SET_ERROR(ENOSPC)); 1936 } 1937 1938 dsl_dataset_rele(ds, FTAG); 1939 return (0); 1940 } 1941 1942 static void 1943 dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) 1944 { 1945 dsl_dataset_rollback_arg_t *ddra = arg; 1946 dsl_pool_t *dp = dmu_tx_pool(tx); 1947 dsl_dataset_t *ds, *clone; 1948 uint64_t cloneobj; 1949 char namebuf[ZFS_MAXNAMELEN]; 1950 1951 VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds)); 1952 1953 dsl_dataset_name(ds->ds_prev, namebuf); 1954 fnvlist_add_string(ddra->ddra_result, "target", namebuf); 1955 1956 cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", 1957 ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); 1958 1959 VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); 1960 1961 dsl_dataset_clone_swap_sync_impl(clone, ds, tx); 1962 dsl_dataset_zero_zil(ds, tx); 1963 1964 dsl_destroy_head_sync_impl(clone, tx); 1965 1966 dsl_dataset_rele(clone, FTAG); 1967 dsl_dataset_rele(ds, FTAG); 1968 } 1969 1970 /* 1971 * Rolls back the given filesystem or volume to the most recent snapshot. 1972 * The name of the most recent snapshot will be returned under key "target" 1973 * in the result nvlist. 1974 * 1975 * If owner != NULL: 1976 * - The existing dataset MUST be owned by the specified owner at entry 1977 * - Upon return, dataset will still be held by the same owner, whether we 1978 * succeed or not. 1979 * 1980 * This mode is required any time the existing filesystem is mounted. See 1981 * notes above zfs_suspend_fs() for further details. 1982 */ 1983 int 1984 dsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result) 1985 { 1986 dsl_dataset_rollback_arg_t ddra; 1987 1988 ddra.ddra_fsname = fsname; 1989 ddra.ddra_owner = owner; 1990 ddra.ddra_result = result; 1991 1992 return (dsl_sync_task(fsname, dsl_dataset_rollback_check, 1993 dsl_dataset_rollback_sync, &ddra, 1994 1, ZFS_SPACE_CHECK_RESERVED)); 1995 } 1996 1997 struct promotenode { 1998 list_node_t link; 1999 dsl_dataset_t *ds; 2000 }; 2001 2002 typedef struct dsl_dataset_promote_arg { 2003 const char *ddpa_clonename; 2004 dsl_dataset_t *ddpa_clone; 2005 list_t shared_snaps, origin_snaps, clone_snaps; 2006 dsl_dataset_t *origin_origin; /* origin of the origin */ 2007 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 2008 char *err_ds; 2009 cred_t *cr; 2010 } dsl_dataset_promote_arg_t; 2011 2012 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2013 static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, 2014 void *tag); 2015 static void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag); 2016 2017 static int 2018 dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) 2019 { 2020 dsl_dataset_promote_arg_t *ddpa = arg; 2021 dsl_pool_t *dp = dmu_tx_pool(tx); 2022 dsl_dataset_t *hds; 2023 struct promotenode *snap; 2024 dsl_dataset_t *origin_ds; 2025 int err; 2026 uint64_t unused; 2027 uint64_t ss_mv_cnt; 2028 2029 err = promote_hold(ddpa, dp, FTAG); 2030 if (err != 0) 2031 return (err); 2032 2033 hds = ddpa->ddpa_clone; 2034 2035 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) { 2036 promote_rele(ddpa, FTAG); 2037 return (SET_ERROR(EXDEV)); 2038 } 2039 2040 /* 2041 * Compute and check the amount of space to transfer. Since this is 2042 * so expensive, don't do the preliminary check. 2043 */ 2044 if (!dmu_tx_is_syncing(tx)) { 2045 promote_rele(ddpa, FTAG); 2046 return (0); 2047 } 2048 2049 snap = list_head(&ddpa->shared_snaps); 2050 origin_ds = snap->ds; 2051 2052 /* compute origin's new unique space */ 2053 snap = list_tail(&ddpa->clone_snaps); 2054 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2055 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2056 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2057 &ddpa->unique, &unused, &unused); 2058 2059 /* 2060 * Walk the snapshots that we are moving 2061 * 2062 * Compute space to transfer. Consider the incremental changes 2063 * to used by each snapshot: 2064 * (my used) = (prev's used) + (blocks born) - (blocks killed) 2065 * So each snapshot gave birth to: 2066 * (blocks born) = (my used) - (prev's used) + (blocks killed) 2067 * So a sequence would look like: 2068 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2069 * Which simplifies to: 2070 * uN + kN + kN-1 + ... + k1 + k0 2071 * Note however, if we stop before we reach the ORIGIN we get: 2072 * uN + kN + kN-1 + ... + kM - uM-1 2073 */ 2074 ss_mv_cnt = 0; 2075 ddpa->used = origin_ds->ds_phys->ds_referenced_bytes; 2076 ddpa->comp = origin_ds->ds_phys->ds_compressed_bytes; 2077 ddpa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 2078 for (snap = list_head(&ddpa->shared_snaps); snap; 2079 snap = list_next(&ddpa->shared_snaps, snap)) { 2080 uint64_t val, dlused, dlcomp, dluncomp; 2081 dsl_dataset_t *ds = snap->ds; 2082 2083 ss_mv_cnt++; 2084 2085 /* 2086 * If there are long holds, we won't be able to evict 2087 * the objset. 2088 */ 2089 if (dsl_dataset_long_held(ds)) { 2090 err = SET_ERROR(EBUSY); 2091 goto out; 2092 } 2093 2094 /* Check that the snapshot name does not conflict */ 2095 VERIFY0(dsl_dataset_get_snapname(ds)); 2096 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2097 if (err == 0) { 2098 (void) strcpy(ddpa->err_ds, snap->ds->ds_snapname); 2099 err = SET_ERROR(EEXIST); 2100 goto out; 2101 } 2102 if (err != ENOENT) 2103 goto out; 2104 2105 /* The very first snapshot does not have a deadlist */ 2106 if (ds->ds_phys->ds_prev_snap_obj == 0) 2107 continue; 2108 2109 dsl_deadlist_space(&ds->ds_deadlist, 2110 &dlused, &dlcomp, &dluncomp); 2111 ddpa->used += dlused; 2112 ddpa->comp += dlcomp; 2113 ddpa->uncomp += dluncomp; 2114 } 2115 2116 /* 2117 * If we are a clone of a clone then we never reached ORIGIN, 2118 * so we need to subtract out the clone origin's used space. 2119 */ 2120 if (ddpa->origin_origin) { 2121 ddpa->used -= ddpa->origin_origin->ds_phys->ds_referenced_bytes; 2122 ddpa->comp -= ddpa->origin_origin->ds_phys->ds_compressed_bytes; 2123 ddpa->uncomp -= 2124 ddpa->origin_origin->ds_phys->ds_uncompressed_bytes; 2125 } 2126 2127 /* Check that there is enough space and limit headroom here */ 2128 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2129 0, ss_mv_cnt, ddpa->used, ddpa->cr); 2130 if (err != 0) 2131 goto out; 2132 2133 /* 2134 * Compute the amounts of space that will be used by snapshots 2135 * after the promotion (for both origin and clone). For each, 2136 * it is the amount of space that will be on all of their 2137 * deadlists (that was not born before their new origin). 2138 */ 2139 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2140 uint64_t space; 2141 2142 /* 2143 * Note, typically this will not be a clone of a clone, 2144 * so dd_origin_txg will be < TXG_INITIAL, so 2145 * these snaplist_space() -> dsl_deadlist_space_range() 2146 * calls will be fast because they do not have to 2147 * iterate over all bps. 2148 */ 2149 snap = list_head(&ddpa->origin_snaps); 2150 err = snaplist_space(&ddpa->shared_snaps, 2151 snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap); 2152 if (err != 0) 2153 goto out; 2154 2155 err = snaplist_space(&ddpa->clone_snaps, 2156 snap->ds->ds_dir->dd_origin_txg, &space); 2157 if (err != 0) 2158 goto out; 2159 ddpa->cloneusedsnap += space; 2160 } 2161 if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2162 err = snaplist_space(&ddpa->origin_snaps, 2163 origin_ds->ds_phys->ds_creation_txg, &ddpa->originusedsnap); 2164 if (err != 0) 2165 goto out; 2166 } 2167 2168 out: 2169 promote_rele(ddpa, FTAG); 2170 return (err); 2171 } 2172 2173 static void 2174 dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) 2175 { 2176 dsl_dataset_promote_arg_t *ddpa = arg; 2177 dsl_pool_t *dp = dmu_tx_pool(tx); 2178 dsl_dataset_t *hds; 2179 struct promotenode *snap; 2180 dsl_dataset_t *origin_ds; 2181 dsl_dataset_t *origin_head; 2182 dsl_dir_t *dd; 2183 dsl_dir_t *odd = NULL; 2184 uint64_t oldnext_obj; 2185 int64_t delta; 2186 2187 VERIFY0(promote_hold(ddpa, dp, FTAG)); 2188 hds = ddpa->ddpa_clone; 2189 2190 ASSERT0(hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE); 2191 2192 snap = list_head(&ddpa->shared_snaps); 2193 origin_ds = snap->ds; 2194 dd = hds->ds_dir; 2195 2196 snap = list_head(&ddpa->origin_snaps); 2197 origin_head = snap->ds; 2198 2199 /* 2200 * We need to explicitly open odd, since origin_ds's dd will be 2201 * changing. 2202 */ 2203 VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, 2204 NULL, FTAG, &odd)); 2205 2206 /* change origin's next snap */ 2207 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2208 oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2209 snap = list_tail(&ddpa->clone_snaps); 2210 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2211 origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2212 2213 /* change the origin's next clone */ 2214 if (origin_ds->ds_phys->ds_next_clones_obj) { 2215 dsl_dataset_remove_from_next_clones(origin_ds, 2216 snap->ds->ds_object, tx); 2217 VERIFY0(zap_add_int(dp->dp_meta_objset, 2218 origin_ds->ds_phys->ds_next_clones_obj, 2219 oldnext_obj, tx)); 2220 } 2221 2222 /* change origin */ 2223 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2224 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2225 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2226 dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; 2227 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2228 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2229 origin_head->ds_dir->dd_origin_txg = 2230 origin_ds->ds_phys->ds_creation_txg; 2231 2232 /* change dd_clone entries */ 2233 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2234 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2235 odd->dd_phys->dd_clones, hds->ds_object, tx)); 2236 VERIFY0(zap_add_int(dp->dp_meta_objset, 2237 ddpa->origin_origin->ds_dir->dd_phys->dd_clones, 2238 hds->ds_object, tx)); 2239 2240 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2241 ddpa->origin_origin->ds_dir->dd_phys->dd_clones, 2242 origin_head->ds_object, tx)); 2243 if (dd->dd_phys->dd_clones == 0) { 2244 dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset, 2245 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 2246 } 2247 VERIFY0(zap_add_int(dp->dp_meta_objset, 2248 dd->dd_phys->dd_clones, origin_head->ds_object, tx)); 2249 } 2250 2251 /* move snapshots to this dir */ 2252 for (snap = list_head(&ddpa->shared_snaps); snap; 2253 snap = list_next(&ddpa->shared_snaps, snap)) { 2254 dsl_dataset_t *ds = snap->ds; 2255 2256 /* 2257 * Property callbacks are registered to a particular 2258 * dsl_dir. Since ours is changing, evict the objset 2259 * so that they will be unregistered from the old dsl_dir. 2260 */ 2261 if (ds->ds_objset) { 2262 dmu_objset_evict(ds->ds_objset); 2263 ds->ds_objset = NULL; 2264 } 2265 2266 /* move snap name entry */ 2267 VERIFY0(dsl_dataset_get_snapname(ds)); 2268 VERIFY0(dsl_dataset_snap_remove(origin_head, 2269 ds->ds_snapname, tx, B_TRUE)); 2270 VERIFY0(zap_add(dp->dp_meta_objset, 2271 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2272 8, 1, &ds->ds_object, tx)); 2273 dsl_fs_ss_count_adjust(hds->ds_dir, 1, 2274 DD_FIELD_SNAPSHOT_COUNT, tx); 2275 2276 /* change containing dsl_dir */ 2277 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2278 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2279 ds->ds_phys->ds_dir_obj = dd->dd_object; 2280 ASSERT3P(ds->ds_dir, ==, odd); 2281 dsl_dir_rele(ds->ds_dir, ds); 2282 VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object, 2283 NULL, ds, &ds->ds_dir)); 2284 2285 /* move any clone references */ 2286 if (ds->ds_phys->ds_next_clones_obj && 2287 spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2288 zap_cursor_t zc; 2289 zap_attribute_t za; 2290 2291 for (zap_cursor_init(&zc, dp->dp_meta_objset, 2292 ds->ds_phys->ds_next_clones_obj); 2293 zap_cursor_retrieve(&zc, &za) == 0; 2294 zap_cursor_advance(&zc)) { 2295 dsl_dataset_t *cnds; 2296 uint64_t o; 2297 2298 if (za.za_first_integer == oldnext_obj) { 2299 /* 2300 * We've already moved the 2301 * origin's reference. 2302 */ 2303 continue; 2304 } 2305 2306 VERIFY0(dsl_dataset_hold_obj(dp, 2307 za.za_first_integer, FTAG, &cnds)); 2308 o = cnds->ds_dir->dd_phys->dd_head_dataset_obj; 2309 2310 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2311 odd->dd_phys->dd_clones, o, tx)); 2312 VERIFY0(zap_add_int(dp->dp_meta_objset, 2313 dd->dd_phys->dd_clones, o, tx)); 2314 dsl_dataset_rele(cnds, FTAG); 2315 } 2316 zap_cursor_fini(&zc); 2317 } 2318 2319 ASSERT(!dsl_prop_hascb(ds)); 2320 } 2321 2322 /* 2323 * Change space accounting. 2324 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2325 * both be valid, or both be 0 (resulting in delta == 0). This 2326 * is true for each of {clone,origin} independently. 2327 */ 2328 2329 delta = ddpa->cloneusedsnap - 2330 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2331 ASSERT3S(delta, >=, 0); 2332 ASSERT3U(ddpa->used, >=, delta); 2333 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2334 dsl_dir_diduse_space(dd, DD_USED_HEAD, 2335 ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx); 2336 2337 delta = ddpa->originusedsnap - 2338 odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2339 ASSERT3S(delta, <=, 0); 2340 ASSERT3U(ddpa->used, >=, -delta); 2341 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2342 dsl_dir_diduse_space(odd, DD_USED_HEAD, 2343 -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx); 2344 2345 origin_ds->ds_phys->ds_unique_bytes = ddpa->unique; 2346 2347 /* log history record */ 2348 spa_history_log_internal_ds(hds, "promote", tx, ""); 2349 2350 dsl_dir_rele(odd, FTAG); 2351 promote_rele(ddpa, FTAG); 2352 } 2353 2354 /* 2355 * Make a list of dsl_dataset_t's for the snapshots between first_obj 2356 * (exclusive) and last_obj (inclusive). The list will be in reverse 2357 * order (last_obj will be the list_head()). If first_obj == 0, do all 2358 * snapshots back to this dataset's origin. 2359 */ 2360 static int 2361 snaplist_make(dsl_pool_t *dp, 2362 uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag) 2363 { 2364 uint64_t obj = last_obj; 2365 2366 list_create(l, sizeof (struct promotenode), 2367 offsetof(struct promotenode, link)); 2368 2369 while (obj != first_obj) { 2370 dsl_dataset_t *ds; 2371 struct promotenode *snap; 2372 int err; 2373 2374 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 2375 ASSERT(err != ENOENT); 2376 if (err != 0) 2377 return (err); 2378 2379 if (first_obj == 0) 2380 first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2381 2382 snap = kmem_alloc(sizeof (*snap), KM_SLEEP); 2383 snap->ds = ds; 2384 list_insert_tail(l, snap); 2385 obj = ds->ds_phys->ds_prev_snap_obj; 2386 } 2387 2388 return (0); 2389 } 2390 2391 static int 2392 snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2393 { 2394 struct promotenode *snap; 2395 2396 *spacep = 0; 2397 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2398 uint64_t used, comp, uncomp; 2399 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2400 mintxg, UINT64_MAX, &used, &comp, &uncomp); 2401 *spacep += used; 2402 } 2403 return (0); 2404 } 2405 2406 static void 2407 snaplist_destroy(list_t *l, void *tag) 2408 { 2409 struct promotenode *snap; 2410 2411 if (l == NULL || !list_link_active(&l->list_head)) 2412 return; 2413 2414 while ((snap = list_tail(l)) != NULL) { 2415 list_remove(l, snap); 2416 dsl_dataset_rele(snap->ds, tag); 2417 kmem_free(snap, sizeof (*snap)); 2418 } 2419 list_destroy(l); 2420 } 2421 2422 static int 2423 promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag) 2424 { 2425 int error; 2426 dsl_dir_t *dd; 2427 struct promotenode *snap; 2428 2429 error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag, 2430 &ddpa->ddpa_clone); 2431 if (error != 0) 2432 return (error); 2433 dd = ddpa->ddpa_clone->ds_dir; 2434 2435 if (dsl_dataset_is_snapshot(ddpa->ddpa_clone) || 2436 !dsl_dir_is_clone(dd)) { 2437 dsl_dataset_rele(ddpa->ddpa_clone, tag); 2438 return (SET_ERROR(EINVAL)); 2439 } 2440 2441 error = snaplist_make(dp, 0, dd->dd_phys->dd_origin_obj, 2442 &ddpa->shared_snaps, tag); 2443 if (error != 0) 2444 goto out; 2445 2446 error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object, 2447 &ddpa->clone_snaps, tag); 2448 if (error != 0) 2449 goto out; 2450 2451 snap = list_head(&ddpa->shared_snaps); 2452 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 2453 error = snaplist_make(dp, dd->dd_phys->dd_origin_obj, 2454 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, 2455 &ddpa->origin_snaps, tag); 2456 if (error != 0) 2457 goto out; 2458 2459 if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) { 2460 error = dsl_dataset_hold_obj(dp, 2461 snap->ds->ds_dir->dd_phys->dd_origin_obj, 2462 tag, &ddpa->origin_origin); 2463 if (error != 0) 2464 goto out; 2465 } 2466 out: 2467 if (error != 0) 2468 promote_rele(ddpa, tag); 2469 return (error); 2470 } 2471 2472 static void 2473 promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag) 2474 { 2475 snaplist_destroy(&ddpa->shared_snaps, tag); 2476 snaplist_destroy(&ddpa->clone_snaps, tag); 2477 snaplist_destroy(&ddpa->origin_snaps, tag); 2478 if (ddpa->origin_origin != NULL) 2479 dsl_dataset_rele(ddpa->origin_origin, tag); 2480 dsl_dataset_rele(ddpa->ddpa_clone, tag); 2481 } 2482 2483 /* 2484 * Promote a clone. 2485 * 2486 * If it fails due to a conflicting snapshot name, "conflsnap" will be filled 2487 * in with the name. (It must be at least MAXNAMELEN bytes long.) 2488 */ 2489 int 2490 dsl_dataset_promote(const char *name, char *conflsnap) 2491 { 2492 dsl_dataset_promote_arg_t ddpa = { 0 }; 2493 uint64_t numsnaps; 2494 int error; 2495 objset_t *os; 2496 2497 /* 2498 * We will modify space proportional to the number of 2499 * snapshots. Compute numsnaps. 2500 */ 2501 error = dmu_objset_hold(name, FTAG, &os); 2502 if (error != 0) 2503 return (error); 2504 error = zap_count(dmu_objset_pool(os)->dp_meta_objset, 2505 dmu_objset_ds(os)->ds_phys->ds_snapnames_zapobj, &numsnaps); 2506 dmu_objset_rele(os, FTAG); 2507 if (error != 0) 2508 return (error); 2509 2510 ddpa.ddpa_clonename = name; 2511 ddpa.err_ds = conflsnap; 2512 ddpa.cr = CRED(); 2513 2514 return (dsl_sync_task(name, dsl_dataset_promote_check, 2515 dsl_dataset_promote_sync, &ddpa, 2516 2 + numsnaps, ZFS_SPACE_CHECK_RESERVED)); 2517 } 2518 2519 int 2520 dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, 2521 dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx) 2522 { 2523 int64_t unused_refres_delta; 2524 2525 /* they should both be heads */ 2526 if (dsl_dataset_is_snapshot(clone) || 2527 dsl_dataset_is_snapshot(origin_head)) 2528 return (SET_ERROR(EINVAL)); 2529 2530 /* if we are not forcing, the branch point should be just before them */ 2531 if (!force && clone->ds_prev != origin_head->ds_prev) 2532 return (SET_ERROR(EINVAL)); 2533 2534 /* clone should be the clone (unless they are unrelated) */ 2535 if (clone->ds_prev != NULL && 2536 clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap && 2537 origin_head->ds_dir != clone->ds_prev->ds_dir) 2538 return (SET_ERROR(EINVAL)); 2539 2540 /* the clone should be a child of the origin */ 2541 if (clone->ds_dir->dd_parent != origin_head->ds_dir) 2542 return (SET_ERROR(EINVAL)); 2543 2544 /* origin_head shouldn't be modified unless 'force' */ 2545 if (!force && 2546 dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev)) 2547 return (SET_ERROR(ETXTBSY)); 2548 2549 /* origin_head should have no long holds (e.g. is not mounted) */ 2550 if (dsl_dataset_handoff_check(origin_head, owner, tx)) 2551 return (SET_ERROR(EBUSY)); 2552 2553 /* check amount of any unconsumed refreservation */ 2554 unused_refres_delta = 2555 (int64_t)MIN(origin_head->ds_reserved, 2556 origin_head->ds_phys->ds_unique_bytes) - 2557 (int64_t)MIN(origin_head->ds_reserved, 2558 clone->ds_phys->ds_unique_bytes); 2559 2560 if (unused_refres_delta > 0 && 2561 unused_refres_delta > 2562 dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE)) 2563 return (SET_ERROR(ENOSPC)); 2564 2565 /* clone can't be over the head's refquota */ 2566 if (origin_head->ds_quota != 0 && 2567 clone->ds_phys->ds_referenced_bytes > origin_head->ds_quota) 2568 return (SET_ERROR(EDQUOT)); 2569 2570 return (0); 2571 } 2572 2573 void 2574 dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, 2575 dsl_dataset_t *origin_head, dmu_tx_t *tx) 2576 { 2577 dsl_pool_t *dp = dmu_tx_pool(tx); 2578 int64_t unused_refres_delta; 2579 2580 ASSERT(clone->ds_reserved == 0); 2581 ASSERT(origin_head->ds_quota == 0 || 2582 clone->ds_phys->ds_unique_bytes <= origin_head->ds_quota); 2583 ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev); 2584 2585 dmu_buf_will_dirty(clone->ds_dbuf, tx); 2586 dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 2587 2588 if (clone->ds_objset != NULL) { 2589 dmu_objset_evict(clone->ds_objset); 2590 clone->ds_objset = NULL; 2591 } 2592 2593 if (origin_head->ds_objset != NULL) { 2594 dmu_objset_evict(origin_head->ds_objset); 2595 origin_head->ds_objset = NULL; 2596 } 2597 2598 unused_refres_delta = 2599 (int64_t)MIN(origin_head->ds_reserved, 2600 origin_head->ds_phys->ds_unique_bytes) - 2601 (int64_t)MIN(origin_head->ds_reserved, 2602 clone->ds_phys->ds_unique_bytes); 2603 2604 /* 2605 * Reset origin's unique bytes, if it exists. 2606 */ 2607 if (clone->ds_prev) { 2608 dsl_dataset_t *origin = clone->ds_prev; 2609 uint64_t comp, uncomp; 2610 2611 dmu_buf_will_dirty(origin->ds_dbuf, tx); 2612 dsl_deadlist_space_range(&clone->ds_deadlist, 2613 origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2614 &origin->ds_phys->ds_unique_bytes, &comp, &uncomp); 2615 } 2616 2617 /* swap blkptrs */ 2618 { 2619 blkptr_t tmp; 2620 tmp = origin_head->ds_phys->ds_bp; 2621 origin_head->ds_phys->ds_bp = clone->ds_phys->ds_bp; 2622 clone->ds_phys->ds_bp = tmp; 2623 } 2624 2625 /* set dd_*_bytes */ 2626 { 2627 int64_t dused, dcomp, duncomp; 2628 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2629 uint64_t odl_used, odl_comp, odl_uncomp; 2630 2631 ASSERT3U(clone->ds_dir->dd_phys-> 2632 dd_used_breakdown[DD_USED_SNAP], ==, 0); 2633 2634 dsl_deadlist_space(&clone->ds_deadlist, 2635 &cdl_used, &cdl_comp, &cdl_uncomp); 2636 dsl_deadlist_space(&origin_head->ds_deadlist, 2637 &odl_used, &odl_comp, &odl_uncomp); 2638 2639 dused = clone->ds_phys->ds_referenced_bytes + cdl_used - 2640 (origin_head->ds_phys->ds_referenced_bytes + odl_used); 2641 dcomp = clone->ds_phys->ds_compressed_bytes + cdl_comp - 2642 (origin_head->ds_phys->ds_compressed_bytes + odl_comp); 2643 duncomp = clone->ds_phys->ds_uncompressed_bytes + 2644 cdl_uncomp - 2645 (origin_head->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2646 2647 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD, 2648 dused, dcomp, duncomp, tx); 2649 dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD, 2650 -dused, -dcomp, -duncomp, tx); 2651 2652 /* 2653 * The difference in the space used by snapshots is the 2654 * difference in snapshot space due to the head's 2655 * deadlist (since that's the only thing that's 2656 * changing that affects the snapused). 2657 */ 2658 dsl_deadlist_space_range(&clone->ds_deadlist, 2659 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2660 &cdl_used, &cdl_comp, &cdl_uncomp); 2661 dsl_deadlist_space_range(&origin_head->ds_deadlist, 2662 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2663 &odl_used, &odl_comp, &odl_uncomp); 2664 dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used, 2665 DD_USED_HEAD, DD_USED_SNAP, tx); 2666 } 2667 2668 /* swap ds_*_bytes */ 2669 SWITCH64(origin_head->ds_phys->ds_referenced_bytes, 2670 clone->ds_phys->ds_referenced_bytes); 2671 SWITCH64(origin_head->ds_phys->ds_compressed_bytes, 2672 clone->ds_phys->ds_compressed_bytes); 2673 SWITCH64(origin_head->ds_phys->ds_uncompressed_bytes, 2674 clone->ds_phys->ds_uncompressed_bytes); 2675 SWITCH64(origin_head->ds_phys->ds_unique_bytes, 2676 clone->ds_phys->ds_unique_bytes); 2677 2678 /* apply any parent delta for change in unconsumed refreservation */ 2679 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV, 2680 unused_refres_delta, 0, 0, tx); 2681 2682 /* 2683 * Swap deadlists. 2684 */ 2685 dsl_deadlist_close(&clone->ds_deadlist); 2686 dsl_deadlist_close(&origin_head->ds_deadlist); 2687 SWITCH64(origin_head->ds_phys->ds_deadlist_obj, 2688 clone->ds_phys->ds_deadlist_obj); 2689 dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, 2690 clone->ds_phys->ds_deadlist_obj); 2691 dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, 2692 origin_head->ds_phys->ds_deadlist_obj); 2693 2694 dsl_scan_ds_clone_swapped(origin_head, clone, tx); 2695 2696 spa_history_log_internal_ds(clone, "clone swap", tx, 2697 "parent=%s", origin_head->ds_dir->dd_myname); 2698 } 2699 2700 /* 2701 * Given a pool name and a dataset object number in that pool, 2702 * return the name of that dataset. 2703 */ 2704 int 2705 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2706 { 2707 dsl_pool_t *dp; 2708 dsl_dataset_t *ds; 2709 int error; 2710 2711 error = dsl_pool_hold(pname, FTAG, &dp); 2712 if (error != 0) 2713 return (error); 2714 2715 error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); 2716 if (error == 0) { 2717 dsl_dataset_name(ds, buf); 2718 dsl_dataset_rele(ds, FTAG); 2719 } 2720 dsl_pool_rele(dp, FTAG); 2721 2722 return (error); 2723 } 2724 2725 int 2726 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2727 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 2728 { 2729 int error = 0; 2730 2731 ASSERT3S(asize, >, 0); 2732 2733 /* 2734 * *ref_rsrv is the portion of asize that will come from any 2735 * unconsumed refreservation space. 2736 */ 2737 *ref_rsrv = 0; 2738 2739 mutex_enter(&ds->ds_lock); 2740 /* 2741 * Make a space adjustment for reserved bytes. 2742 */ 2743 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 2744 ASSERT3U(*used, >=, 2745 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2746 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2747 *ref_rsrv = 2748 asize - MIN(asize, parent_delta(ds, asize + inflight)); 2749 } 2750 2751 if (!check_quota || ds->ds_quota == 0) { 2752 mutex_exit(&ds->ds_lock); 2753 return (0); 2754 } 2755 /* 2756 * If they are requesting more space, and our current estimate 2757 * is over quota, they get to try again unless the actual 2758 * on-disk is over quota and there are no pending changes (which 2759 * may free up space for us). 2760 */ 2761 if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) { 2762 if (inflight > 0 || 2763 ds->ds_phys->ds_referenced_bytes < ds->ds_quota) 2764 error = SET_ERROR(ERESTART); 2765 else 2766 error = SET_ERROR(EDQUOT); 2767 } 2768 mutex_exit(&ds->ds_lock); 2769 2770 return (error); 2771 } 2772 2773 typedef struct dsl_dataset_set_qr_arg { 2774 const char *ddsqra_name; 2775 zprop_source_t ddsqra_source; 2776 uint64_t ddsqra_value; 2777 } dsl_dataset_set_qr_arg_t; 2778 2779 2780 /* ARGSUSED */ 2781 static int 2782 dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx) 2783 { 2784 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2785 dsl_pool_t *dp = dmu_tx_pool(tx); 2786 dsl_dataset_t *ds; 2787 int error; 2788 uint64_t newval; 2789 2790 if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA) 2791 return (SET_ERROR(ENOTSUP)); 2792 2793 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 2794 if (error != 0) 2795 return (error); 2796 2797 if (dsl_dataset_is_snapshot(ds)) { 2798 dsl_dataset_rele(ds, FTAG); 2799 return (SET_ERROR(EINVAL)); 2800 } 2801 2802 error = dsl_prop_predict(ds->ds_dir, 2803 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 2804 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 2805 if (error != 0) { 2806 dsl_dataset_rele(ds, FTAG); 2807 return (error); 2808 } 2809 2810 if (newval == 0) { 2811 dsl_dataset_rele(ds, FTAG); 2812 return (0); 2813 } 2814 2815 if (newval < ds->ds_phys->ds_referenced_bytes || 2816 newval < ds->ds_reserved) { 2817 dsl_dataset_rele(ds, FTAG); 2818 return (SET_ERROR(ENOSPC)); 2819 } 2820 2821 dsl_dataset_rele(ds, FTAG); 2822 return (0); 2823 } 2824 2825 static void 2826 dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx) 2827 { 2828 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2829 dsl_pool_t *dp = dmu_tx_pool(tx); 2830 dsl_dataset_t *ds; 2831 uint64_t newval; 2832 2833 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 2834 2835 dsl_prop_set_sync_impl(ds, 2836 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 2837 ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, 2838 &ddsqra->ddsqra_value, tx); 2839 2840 VERIFY0(dsl_prop_get_int_ds(ds, 2841 zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval)); 2842 2843 if (ds->ds_quota != newval) { 2844 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2845 ds->ds_quota = newval; 2846 } 2847 dsl_dataset_rele(ds, FTAG); 2848 } 2849 2850 int 2851 dsl_dataset_set_refquota(const char *dsname, zprop_source_t source, 2852 uint64_t refquota) 2853 { 2854 dsl_dataset_set_qr_arg_t ddsqra; 2855 2856 ddsqra.ddsqra_name = dsname; 2857 ddsqra.ddsqra_source = source; 2858 ddsqra.ddsqra_value = refquota; 2859 2860 return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check, 2861 dsl_dataset_set_refquota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE)); 2862 } 2863 2864 static int 2865 dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx) 2866 { 2867 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2868 dsl_pool_t *dp = dmu_tx_pool(tx); 2869 dsl_dataset_t *ds; 2870 int error; 2871 uint64_t newval, unique; 2872 2873 if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION) 2874 return (SET_ERROR(ENOTSUP)); 2875 2876 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 2877 if (error != 0) 2878 return (error); 2879 2880 if (dsl_dataset_is_snapshot(ds)) { 2881 dsl_dataset_rele(ds, FTAG); 2882 return (SET_ERROR(EINVAL)); 2883 } 2884 2885 error = dsl_prop_predict(ds->ds_dir, 2886 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 2887 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 2888 if (error != 0) { 2889 dsl_dataset_rele(ds, FTAG); 2890 return (error); 2891 } 2892 2893 /* 2894 * If we are doing the preliminary check in open context, the 2895 * space estimates may be inaccurate. 2896 */ 2897 if (!dmu_tx_is_syncing(tx)) { 2898 dsl_dataset_rele(ds, FTAG); 2899 return (0); 2900 } 2901 2902 mutex_enter(&ds->ds_lock); 2903 if (!DS_UNIQUE_IS_ACCURATE(ds)) 2904 dsl_dataset_recalc_head_uniq(ds); 2905 unique = ds->ds_phys->ds_unique_bytes; 2906 mutex_exit(&ds->ds_lock); 2907 2908 if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) { 2909 uint64_t delta = MAX(unique, newval) - 2910 MAX(unique, ds->ds_reserved); 2911 2912 if (delta > 2913 dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) || 2914 (ds->ds_quota > 0 && newval > ds->ds_quota)) { 2915 dsl_dataset_rele(ds, FTAG); 2916 return (SET_ERROR(ENOSPC)); 2917 } 2918 } 2919 2920 dsl_dataset_rele(ds, FTAG); 2921 return (0); 2922 } 2923 2924 void 2925 dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, 2926 zprop_source_t source, uint64_t value, dmu_tx_t *tx) 2927 { 2928 uint64_t newval; 2929 uint64_t unique; 2930 int64_t delta; 2931 2932 dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 2933 source, sizeof (value), 1, &value, tx); 2934 2935 VERIFY0(dsl_prop_get_int_ds(ds, 2936 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval)); 2937 2938 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2939 mutex_enter(&ds->ds_dir->dd_lock); 2940 mutex_enter(&ds->ds_lock); 2941 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 2942 unique = ds->ds_phys->ds_unique_bytes; 2943 delta = MAX(0, (int64_t)(newval - unique)) - 2944 MAX(0, (int64_t)(ds->ds_reserved - unique)); 2945 ds->ds_reserved = newval; 2946 mutex_exit(&ds->ds_lock); 2947 2948 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 2949 mutex_exit(&ds->ds_dir->dd_lock); 2950 } 2951 2952 static void 2953 dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx) 2954 { 2955 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2956 dsl_pool_t *dp = dmu_tx_pool(tx); 2957 dsl_dataset_t *ds; 2958 2959 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 2960 dsl_dataset_set_refreservation_sync_impl(ds, 2961 ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx); 2962 dsl_dataset_rele(ds, FTAG); 2963 } 2964 2965 int 2966 dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source, 2967 uint64_t refreservation) 2968 { 2969 dsl_dataset_set_qr_arg_t ddsqra; 2970 2971 ddsqra.ddsqra_name = dsname; 2972 ddsqra.ddsqra_source = source; 2973 ddsqra.ddsqra_value = refreservation; 2974 2975 return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check, 2976 dsl_dataset_set_refreservation_sync, &ddsqra, 2977 0, ZFS_SPACE_CHECK_NONE)); 2978 } 2979 2980 /* 2981 * Return (in *usedp) the amount of space written in new that is not 2982 * present in oldsnap. New may be a snapshot or the head. Old must be 2983 * a snapshot before new, in new's filesystem (or its origin). If not then 2984 * fail and return EINVAL. 2985 * 2986 * The written space is calculated by considering two components: First, we 2987 * ignore any freed space, and calculate the written as new's used space 2988 * minus old's used space. Next, we add in the amount of space that was freed 2989 * between the two snapshots, thus reducing new's used space relative to old's. 2990 * Specifically, this is the space that was born before old->ds_creation_txg, 2991 * and freed before new (ie. on new's deadlist or a previous deadlist). 2992 * 2993 * space freed [---------------------] 2994 * snapshots ---O-------O--------O-------O------ 2995 * oldsnap new 2996 */ 2997 int 2998 dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, 2999 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 3000 { 3001 int err = 0; 3002 uint64_t snapobj; 3003 dsl_pool_t *dp = new->ds_dir->dd_pool; 3004 3005 ASSERT(dsl_pool_config_held(dp)); 3006 3007 *usedp = 0; 3008 *usedp += new->ds_phys->ds_referenced_bytes; 3009 *usedp -= oldsnap->ds_phys->ds_referenced_bytes; 3010 3011 *compp = 0; 3012 *compp += new->ds_phys->ds_compressed_bytes; 3013 *compp -= oldsnap->ds_phys->ds_compressed_bytes; 3014 3015 *uncompp = 0; 3016 *uncompp += new->ds_phys->ds_uncompressed_bytes; 3017 *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes; 3018 3019 snapobj = new->ds_object; 3020 while (snapobj != oldsnap->ds_object) { 3021 dsl_dataset_t *snap; 3022 uint64_t used, comp, uncomp; 3023 3024 if (snapobj == new->ds_object) { 3025 snap = new; 3026 } else { 3027 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); 3028 if (err != 0) 3029 break; 3030 } 3031 3032 if (snap->ds_phys->ds_prev_snap_txg == 3033 oldsnap->ds_phys->ds_creation_txg) { 3034 /* 3035 * The blocks in the deadlist can not be born after 3036 * ds_prev_snap_txg, so get the whole deadlist space, 3037 * which is more efficient (especially for old-format 3038 * deadlists). Unfortunately the deadlist code 3039 * doesn't have enough information to make this 3040 * optimization itself. 3041 */ 3042 dsl_deadlist_space(&snap->ds_deadlist, 3043 &used, &comp, &uncomp); 3044 } else { 3045 dsl_deadlist_space_range(&snap->ds_deadlist, 3046 0, oldsnap->ds_phys->ds_creation_txg, 3047 &used, &comp, &uncomp); 3048 } 3049 *usedp += used; 3050 *compp += comp; 3051 *uncompp += uncomp; 3052 3053 /* 3054 * If we get to the beginning of the chain of snapshots 3055 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap 3056 * was not a snapshot of/before new. 3057 */ 3058 snapobj = snap->ds_phys->ds_prev_snap_obj; 3059 if (snap != new) 3060 dsl_dataset_rele(snap, FTAG); 3061 if (snapobj == 0) { 3062 err = SET_ERROR(EINVAL); 3063 break; 3064 } 3065 3066 } 3067 return (err); 3068 } 3069 3070 /* 3071 * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, 3072 * lastsnap, and all snapshots in between are deleted. 3073 * 3074 * blocks that would be freed [---------------------------] 3075 * snapshots ---O-------O--------O-------O--------O 3076 * firstsnap lastsnap 3077 * 3078 * This is the set of blocks that were born after the snap before firstsnap, 3079 * (birth > firstsnap->prev_snap_txg) and died before the snap after the 3080 * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). 3081 * We calculate this by iterating over the relevant deadlists (from the snap 3082 * after lastsnap, backward to the snap after firstsnap), summing up the 3083 * space on the deadlist that was born after the snap before firstsnap. 3084 */ 3085 int 3086 dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, 3087 dsl_dataset_t *lastsnap, 3088 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 3089 { 3090 int err = 0; 3091 uint64_t snapobj; 3092 dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; 3093 3094 ASSERT(dsl_dataset_is_snapshot(firstsnap)); 3095 ASSERT(dsl_dataset_is_snapshot(lastsnap)); 3096 3097 /* 3098 * Check that the snapshots are in the same dsl_dir, and firstsnap 3099 * is before lastsnap. 3100 */ 3101 if (firstsnap->ds_dir != lastsnap->ds_dir || 3102 firstsnap->ds_phys->ds_creation_txg > 3103 lastsnap->ds_phys->ds_creation_txg) 3104 return (SET_ERROR(EINVAL)); 3105 3106 *usedp = *compp = *uncompp = 0; 3107 3108 snapobj = lastsnap->ds_phys->ds_next_snap_obj; 3109 while (snapobj != firstsnap->ds_object) { 3110 dsl_dataset_t *ds; 3111 uint64_t used, comp, uncomp; 3112 3113 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); 3114 if (err != 0) 3115 break; 3116 3117 dsl_deadlist_space_range(&ds->ds_deadlist, 3118 firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX, 3119 &used, &comp, &uncomp); 3120 *usedp += used; 3121 *compp += comp; 3122 *uncompp += uncomp; 3123 3124 snapobj = ds->ds_phys->ds_prev_snap_obj; 3125 ASSERT3U(snapobj, !=, 0); 3126 dsl_dataset_rele(ds, FTAG); 3127 } 3128 return (err); 3129 } 3130 3131 /* 3132 * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. 3133 * For example, they could both be snapshots of the same filesystem, and 3134 * 'earlier' is before 'later'. Or 'earlier' could be the origin of 3135 * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's 3136 * filesystem. Or 'earlier' could be the origin's origin. 3137 * 3138 * If non-zero, earlier_txg is used instead of earlier's ds_creation_txg. 3139 */ 3140 boolean_t 3141 dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier, 3142 uint64_t earlier_txg) 3143 { 3144 dsl_pool_t *dp = later->ds_dir->dd_pool; 3145 int error; 3146 boolean_t ret; 3147 3148 ASSERT(dsl_pool_config_held(dp)); 3149 ASSERT(dsl_dataset_is_snapshot(earlier) || earlier_txg != 0); 3150 3151 if (earlier_txg == 0) 3152 earlier_txg = earlier->ds_phys->ds_creation_txg; 3153 3154 if (dsl_dataset_is_snapshot(later) && 3155 earlier_txg >= later->ds_phys->ds_creation_txg) 3156 return (B_FALSE); 3157 3158 if (later->ds_dir == earlier->ds_dir) 3159 return (B_TRUE); 3160 if (!dsl_dir_is_clone(later->ds_dir)) 3161 return (B_FALSE); 3162 3163 if (later->ds_dir->dd_phys->dd_origin_obj == earlier->ds_object) 3164 return (B_TRUE); 3165 dsl_dataset_t *origin; 3166 error = dsl_dataset_hold_obj(dp, 3167 later->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin); 3168 if (error != 0) 3169 return (B_FALSE); 3170 ret = dsl_dataset_is_before(origin, earlier, earlier_txg); 3171 dsl_dataset_rele(origin, FTAG); 3172 return (ret); 3173 } 3174 3175 3176 void 3177 dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx) 3178 { 3179 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3180 dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx); 3181 } 3182