1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011, 2015 by Delphix. All rights reserved. 24 * Copyright (c) 2014, Joyent, Inc. All rights reserved. 25 * Copyright (c) 2014 RackTop Systems. 26 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. 27 */ 28 29 #include <sys/dmu_objset.h> 30 #include <sys/dsl_dataset.h> 31 #include <sys/dsl_dir.h> 32 #include <sys/dsl_prop.h> 33 #include <sys/dsl_synctask.h> 34 #include <sys/dmu_traverse.h> 35 #include <sys/dmu_impl.h> 36 #include <sys/dmu_tx.h> 37 #include <sys/arc.h> 38 #include <sys/zio.h> 39 #include <sys/zap.h> 40 #include <sys/zfeature.h> 41 #include <sys/unique.h> 42 #include <sys/zfs_context.h> 43 #include <sys/zfs_ioctl.h> 44 #include <sys/spa.h> 45 #include <sys/zfs_znode.h> 46 #include <sys/zfs_onexit.h> 47 #include <sys/zvol.h> 48 #include <sys/dsl_scan.h> 49 #include <sys/dsl_deadlist.h> 50 #include <sys/dsl_destroy.h> 51 #include <sys/dsl_userhold.h> 52 #include <sys/dsl_bookmark.h> 53 54 /* 55 * The SPA supports block sizes up to 16MB. However, very large blocks 56 * can have an impact on i/o latency (e.g. tying up a spinning disk for 57 * ~300ms), and also potentially on the memory allocator. Therefore, 58 * we do not allow the recordsize to be set larger than zfs_max_recordsize 59 * (default 1MB). Larger blocks can be created by changing this tunable, 60 * and pools with larger blocks can always be imported and used, regardless 61 * of this setting. 62 */ 63 int zfs_max_recordsize = 1 * 1024 * 1024; 64 65 #define SWITCH64(x, y) \ 66 { \ 67 uint64_t __tmp = (x); \ 68 (x) = (y); \ 69 (y) = __tmp; \ 70 } 71 72 #define DS_REF_MAX (1ULL << 62) 73 74 extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds); 75 76 /* 77 * Figure out how much of this delta should be propogated to the dsl_dir 78 * layer. If there's a refreservation, that space has already been 79 * partially accounted for in our ancestors. 80 */ 81 static int64_t 82 parent_delta(dsl_dataset_t *ds, int64_t delta) 83 { 84 dsl_dataset_phys_t *ds_phys; 85 uint64_t old_bytes, new_bytes; 86 87 if (ds->ds_reserved == 0) 88 return (delta); 89 90 ds_phys = dsl_dataset_phys(ds); 91 old_bytes = MAX(ds_phys->ds_unique_bytes, ds->ds_reserved); 92 new_bytes = MAX(ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 93 94 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 95 return (new_bytes - old_bytes); 96 } 97 98 void 99 dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx) 100 { 101 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 102 int compressed = BP_GET_PSIZE(bp); 103 int uncompressed = BP_GET_UCSIZE(bp); 104 int64_t delta; 105 106 dprintf_bp(bp, "ds=%p", ds); 107 108 ASSERT(dmu_tx_is_syncing(tx)); 109 /* It could have been compressed away to nothing */ 110 if (BP_IS_HOLE(bp)) 111 return; 112 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 113 ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp))); 114 if (ds == NULL) { 115 dsl_pool_mos_diduse_space(tx->tx_pool, 116 used, compressed, uncompressed); 117 return; 118 } 119 120 dmu_buf_will_dirty(ds->ds_dbuf, tx); 121 mutex_enter(&ds->ds_lock); 122 delta = parent_delta(ds, used); 123 dsl_dataset_phys(ds)->ds_referenced_bytes += used; 124 dsl_dataset_phys(ds)->ds_compressed_bytes += compressed; 125 dsl_dataset_phys(ds)->ds_uncompressed_bytes += uncompressed; 126 dsl_dataset_phys(ds)->ds_unique_bytes += used; 127 if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) { 128 ds->ds_feature_activation_needed[SPA_FEATURE_LARGE_BLOCKS] = 129 B_TRUE; 130 } 131 mutex_exit(&ds->ds_lock); 132 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 133 compressed, uncompressed, tx); 134 dsl_dir_transfer_space(ds->ds_dir, used - delta, 135 DD_USED_REFRSRV, DD_USED_HEAD, tx); 136 } 137 138 int 139 dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx, 140 boolean_t async) 141 { 142 int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp); 143 int compressed = BP_GET_PSIZE(bp); 144 int uncompressed = BP_GET_UCSIZE(bp); 145 146 if (BP_IS_HOLE(bp)) 147 return (0); 148 149 ASSERT(dmu_tx_is_syncing(tx)); 150 ASSERT(bp->blk_birth <= tx->tx_txg); 151 152 if (ds == NULL) { 153 dsl_free(tx->tx_pool, tx->tx_txg, bp); 154 dsl_pool_mos_diduse_space(tx->tx_pool, 155 -used, -compressed, -uncompressed); 156 return (used); 157 } 158 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 159 160 ASSERT(!ds->ds_is_snapshot); 161 dmu_buf_will_dirty(ds->ds_dbuf, tx); 162 163 if (bp->blk_birth > dsl_dataset_phys(ds)->ds_prev_snap_txg) { 164 int64_t delta; 165 166 dprintf_bp(bp, "freeing ds=%llu", ds->ds_object); 167 dsl_free(tx->tx_pool, tx->tx_txg, bp); 168 169 mutex_enter(&ds->ds_lock); 170 ASSERT(dsl_dataset_phys(ds)->ds_unique_bytes >= used || 171 !DS_UNIQUE_IS_ACCURATE(ds)); 172 delta = parent_delta(ds, -used); 173 dsl_dataset_phys(ds)->ds_unique_bytes -= used; 174 mutex_exit(&ds->ds_lock); 175 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 176 delta, -compressed, -uncompressed, tx); 177 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 178 DD_USED_REFRSRV, DD_USED_HEAD, tx); 179 } else { 180 dprintf_bp(bp, "putting on dead list: %s", ""); 181 if (async) { 182 /* 183 * We are here as part of zio's write done callback, 184 * which means we're a zio interrupt thread. We can't 185 * call dsl_deadlist_insert() now because it may block 186 * waiting for I/O. Instead, put bp on the deferred 187 * queue and let dsl_pool_sync() finish the job. 188 */ 189 bplist_append(&ds->ds_pending_deadlist, bp); 190 } else { 191 dsl_deadlist_insert(&ds->ds_deadlist, bp, tx); 192 } 193 ASSERT3U(ds->ds_prev->ds_object, ==, 194 dsl_dataset_phys(ds)->ds_prev_snap_obj); 195 ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_num_children > 0); 196 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 197 if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == 198 ds->ds_object && bp->blk_birth > 199 dsl_dataset_phys(ds->ds_prev)->ds_prev_snap_txg) { 200 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 201 mutex_enter(&ds->ds_prev->ds_lock); 202 dsl_dataset_phys(ds->ds_prev)->ds_unique_bytes += used; 203 mutex_exit(&ds->ds_prev->ds_lock); 204 } 205 if (bp->blk_birth > ds->ds_dir->dd_origin_txg) { 206 dsl_dir_transfer_space(ds->ds_dir, used, 207 DD_USED_HEAD, DD_USED_SNAP, tx); 208 } 209 } 210 mutex_enter(&ds->ds_lock); 211 ASSERT3U(dsl_dataset_phys(ds)->ds_referenced_bytes, >=, used); 212 dsl_dataset_phys(ds)->ds_referenced_bytes -= used; 213 ASSERT3U(dsl_dataset_phys(ds)->ds_compressed_bytes, >=, compressed); 214 dsl_dataset_phys(ds)->ds_compressed_bytes -= compressed; 215 ASSERT3U(dsl_dataset_phys(ds)->ds_uncompressed_bytes, >=, uncompressed); 216 dsl_dataset_phys(ds)->ds_uncompressed_bytes -= uncompressed; 217 mutex_exit(&ds->ds_lock); 218 219 return (used); 220 } 221 222 uint64_t 223 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 224 { 225 uint64_t trysnap = 0; 226 227 if (ds == NULL) 228 return (0); 229 /* 230 * The snapshot creation could fail, but that would cause an 231 * incorrect FALSE return, which would only result in an 232 * overestimation of the amount of space that an operation would 233 * consume, which is OK. 234 * 235 * There's also a small window where we could miss a pending 236 * snapshot, because we could set the sync task in the quiescing 237 * phase. So this should only be used as a guess. 238 */ 239 if (ds->ds_trysnap_txg > 240 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 241 trysnap = ds->ds_trysnap_txg; 242 return (MAX(dsl_dataset_phys(ds)->ds_prev_snap_txg, trysnap)); 243 } 244 245 boolean_t 246 dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp, 247 uint64_t blk_birth) 248 { 249 if (blk_birth <= dsl_dataset_prev_snap_txg(ds) || 250 (bp != NULL && BP_IS_HOLE(bp))) 251 return (B_FALSE); 252 253 ddt_prefetch(dsl_dataset_get_spa(ds), bp); 254 255 return (B_TRUE); 256 } 257 258 static void 259 dsl_dataset_evict(void *dbu) 260 { 261 dsl_dataset_t *ds = dbu; 262 263 ASSERT(ds->ds_owner == NULL); 264 265 ds->ds_dbuf = NULL; 266 267 unique_remove(ds->ds_fsid_guid); 268 269 if (ds->ds_objset != NULL) 270 dmu_objset_evict(ds->ds_objset); 271 272 if (ds->ds_prev) { 273 dsl_dataset_rele(ds->ds_prev, ds); 274 ds->ds_prev = NULL; 275 } 276 277 bplist_destroy(&ds->ds_pending_deadlist); 278 if (ds->ds_deadlist.dl_os != NULL) 279 dsl_deadlist_close(&ds->ds_deadlist); 280 if (ds->ds_dir) 281 dsl_dir_async_rele(ds->ds_dir, ds); 282 283 ASSERT(!list_link_active(&ds->ds_synced_link)); 284 285 mutex_destroy(&ds->ds_lock); 286 mutex_destroy(&ds->ds_opening_lock); 287 mutex_destroy(&ds->ds_sendstream_lock); 288 refcount_destroy(&ds->ds_longholds); 289 290 kmem_free(ds, sizeof (dsl_dataset_t)); 291 } 292 293 int 294 dsl_dataset_get_snapname(dsl_dataset_t *ds) 295 { 296 dsl_dataset_phys_t *headphys; 297 int err; 298 dmu_buf_t *headdbuf; 299 dsl_pool_t *dp = ds->ds_dir->dd_pool; 300 objset_t *mos = dp->dp_meta_objset; 301 302 if (ds->ds_snapname[0]) 303 return (0); 304 if (dsl_dataset_phys(ds)->ds_next_snap_obj == 0) 305 return (0); 306 307 err = dmu_bonus_hold(mos, dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, 308 FTAG, &headdbuf); 309 if (err != 0) 310 return (err); 311 headphys = headdbuf->db_data; 312 err = zap_value_search(dp->dp_meta_objset, 313 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 314 dmu_buf_rele(headdbuf, FTAG); 315 return (err); 316 } 317 318 int 319 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 320 { 321 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 322 uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; 323 matchtype_t mt; 324 int err; 325 326 if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) 327 mt = MT_FIRST; 328 else 329 mt = MT_EXACT; 330 331 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 332 value, mt, NULL, 0, NULL); 333 if (err == ENOTSUP && mt == MT_FIRST) 334 err = zap_lookup(mos, snapobj, name, 8, 1, value); 335 return (err); 336 } 337 338 int 339 dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx, 340 boolean_t adj_cnt) 341 { 342 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 343 uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj; 344 matchtype_t mt; 345 int err; 346 347 dsl_dir_snap_cmtime_update(ds->ds_dir); 348 349 if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET) 350 mt = MT_FIRST; 351 else 352 mt = MT_EXACT; 353 354 err = zap_remove_norm(mos, snapobj, name, mt, tx); 355 if (err == ENOTSUP && mt == MT_FIRST) 356 err = zap_remove(mos, snapobj, name, tx); 357 358 if (err == 0 && adj_cnt) 359 dsl_fs_ss_count_adjust(ds->ds_dir, -1, 360 DD_FIELD_SNAPSHOT_COUNT, tx); 361 362 return (err); 363 } 364 365 boolean_t 366 dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag) 367 { 368 dmu_buf_t *dbuf = ds->ds_dbuf; 369 boolean_t result = B_FALSE; 370 371 if (dbuf != NULL && dmu_buf_try_add_ref(dbuf, dp->dp_meta_objset, 372 ds->ds_object, DMU_BONUS_BLKID, tag)) { 373 374 if (ds == dmu_buf_get_user(dbuf)) 375 result = B_TRUE; 376 else 377 dmu_buf_rele(dbuf, tag); 378 } 379 380 return (result); 381 } 382 383 int 384 dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 385 dsl_dataset_t **dsp) 386 { 387 objset_t *mos = dp->dp_meta_objset; 388 dmu_buf_t *dbuf; 389 dsl_dataset_t *ds; 390 int err; 391 dmu_object_info_t doi; 392 393 ASSERT(dsl_pool_config_held(dp)); 394 395 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 396 if (err != 0) 397 return (err); 398 399 /* Make sure dsobj has the correct object type. */ 400 dmu_object_info_from_db(dbuf, &doi); 401 if (doi.doi_bonus_type != DMU_OT_DSL_DATASET) { 402 dmu_buf_rele(dbuf, tag); 403 return (SET_ERROR(EINVAL)); 404 } 405 406 ds = dmu_buf_get_user(dbuf); 407 if (ds == NULL) { 408 dsl_dataset_t *winner = NULL; 409 410 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 411 ds->ds_dbuf = dbuf; 412 ds->ds_object = dsobj; 413 ds->ds_is_snapshot = dsl_dataset_phys(ds)->ds_num_children != 0; 414 415 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 416 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 417 mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL); 418 refcount_create(&ds->ds_longholds); 419 420 bplist_create(&ds->ds_pending_deadlist); 421 dsl_deadlist_open(&ds->ds_deadlist, 422 mos, dsl_dataset_phys(ds)->ds_deadlist_obj); 423 424 list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t), 425 offsetof(dmu_sendarg_t, dsa_link)); 426 427 if (doi.doi_type == DMU_OTN_ZAP_METADATA) { 428 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 429 if (!(spa_feature_table[f].fi_flags & 430 ZFEATURE_FLAG_PER_DATASET)) 431 continue; 432 err = zap_contains(mos, dsobj, 433 spa_feature_table[f].fi_guid); 434 if (err == 0) { 435 ds->ds_feature_inuse[f] = B_TRUE; 436 } else { 437 ASSERT3U(err, ==, ENOENT); 438 err = 0; 439 } 440 } 441 } 442 443 err = dsl_dir_hold_obj(dp, 444 dsl_dataset_phys(ds)->ds_dir_obj, NULL, ds, &ds->ds_dir); 445 if (err != 0) { 446 mutex_destroy(&ds->ds_lock); 447 mutex_destroy(&ds->ds_opening_lock); 448 mutex_destroy(&ds->ds_sendstream_lock); 449 refcount_destroy(&ds->ds_longholds); 450 bplist_destroy(&ds->ds_pending_deadlist); 451 dsl_deadlist_close(&ds->ds_deadlist); 452 kmem_free(ds, sizeof (dsl_dataset_t)); 453 dmu_buf_rele(dbuf, tag); 454 return (err); 455 } 456 457 if (!ds->ds_is_snapshot) { 458 ds->ds_snapname[0] = '\0'; 459 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { 460 err = dsl_dataset_hold_obj(dp, 461 dsl_dataset_phys(ds)->ds_prev_snap_obj, 462 ds, &ds->ds_prev); 463 } 464 if (doi.doi_type == DMU_OTN_ZAP_METADATA) { 465 int zaperr = zap_lookup(mos, ds->ds_object, 466 DS_FIELD_BOOKMARK_NAMES, 467 sizeof (ds->ds_bookmarks), 1, 468 &ds->ds_bookmarks); 469 if (zaperr != ENOENT) 470 VERIFY0(zaperr); 471 } 472 } else { 473 if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 474 err = dsl_dataset_get_snapname(ds); 475 if (err == 0 && 476 dsl_dataset_phys(ds)->ds_userrefs_obj != 0) { 477 err = zap_count( 478 ds->ds_dir->dd_pool->dp_meta_objset, 479 dsl_dataset_phys(ds)->ds_userrefs_obj, 480 &ds->ds_userrefs); 481 } 482 } 483 484 if (err == 0 && !ds->ds_is_snapshot) { 485 err = dsl_prop_get_int_ds(ds, 486 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 487 &ds->ds_reserved); 488 if (err == 0) { 489 err = dsl_prop_get_int_ds(ds, 490 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 491 &ds->ds_quota); 492 } 493 } else { 494 ds->ds_reserved = ds->ds_quota = 0; 495 } 496 497 dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict, &ds->ds_dbuf); 498 if (err == 0) 499 winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu); 500 501 if (err != 0 || winner != NULL) { 502 bplist_destroy(&ds->ds_pending_deadlist); 503 dsl_deadlist_close(&ds->ds_deadlist); 504 if (ds->ds_prev) 505 dsl_dataset_rele(ds->ds_prev, ds); 506 dsl_dir_rele(ds->ds_dir, ds); 507 mutex_destroy(&ds->ds_lock); 508 mutex_destroy(&ds->ds_opening_lock); 509 mutex_destroy(&ds->ds_sendstream_lock); 510 refcount_destroy(&ds->ds_longholds); 511 kmem_free(ds, sizeof (dsl_dataset_t)); 512 if (err != 0) { 513 dmu_buf_rele(dbuf, tag); 514 return (err); 515 } 516 ds = winner; 517 } else { 518 ds->ds_fsid_guid = 519 unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid); 520 } 521 } 522 ASSERT3P(ds->ds_dbuf, ==, dbuf); 523 ASSERT3P(dsl_dataset_phys(ds), ==, dbuf->db_data); 524 ASSERT(dsl_dataset_phys(ds)->ds_prev_snap_obj != 0 || 525 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 526 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 527 *dsp = ds; 528 return (0); 529 } 530 531 int 532 dsl_dataset_hold(dsl_pool_t *dp, const char *name, 533 void *tag, dsl_dataset_t **dsp) 534 { 535 dsl_dir_t *dd; 536 const char *snapname; 537 uint64_t obj; 538 int err = 0; 539 dsl_dataset_t *ds; 540 541 err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname); 542 if (err != 0) 543 return (err); 544 545 ASSERT(dsl_pool_config_held(dp)); 546 obj = dsl_dir_phys(dd)->dd_head_dataset_obj; 547 if (obj != 0) 548 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 549 else 550 err = SET_ERROR(ENOENT); 551 552 /* we may be looking for a snapshot */ 553 if (err == 0 && snapname != NULL) { 554 dsl_dataset_t *snap_ds; 555 556 if (*snapname++ != '@') { 557 dsl_dataset_rele(ds, tag); 558 dsl_dir_rele(dd, FTAG); 559 return (SET_ERROR(ENOENT)); 560 } 561 562 dprintf("looking for snapshot '%s'\n", snapname); 563 err = dsl_dataset_snap_lookup(ds, snapname, &obj); 564 if (err == 0) 565 err = dsl_dataset_hold_obj(dp, obj, tag, &snap_ds); 566 dsl_dataset_rele(ds, tag); 567 568 if (err == 0) { 569 mutex_enter(&snap_ds->ds_lock); 570 if (snap_ds->ds_snapname[0] == 0) 571 (void) strlcpy(snap_ds->ds_snapname, snapname, 572 sizeof (snap_ds->ds_snapname)); 573 mutex_exit(&snap_ds->ds_lock); 574 ds = snap_ds; 575 } 576 } 577 if (err == 0) 578 *dsp = ds; 579 dsl_dir_rele(dd, FTAG); 580 return (err); 581 } 582 583 int 584 dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, 585 void *tag, dsl_dataset_t **dsp) 586 { 587 int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 588 if (err != 0) 589 return (err); 590 if (!dsl_dataset_tryown(*dsp, tag)) { 591 dsl_dataset_rele(*dsp, tag); 592 *dsp = NULL; 593 return (SET_ERROR(EBUSY)); 594 } 595 return (0); 596 } 597 598 int 599 dsl_dataset_own(dsl_pool_t *dp, const char *name, 600 void *tag, dsl_dataset_t **dsp) 601 { 602 int err = dsl_dataset_hold(dp, name, tag, dsp); 603 if (err != 0) 604 return (err); 605 if (!dsl_dataset_tryown(*dsp, tag)) { 606 dsl_dataset_rele(*dsp, tag); 607 return (SET_ERROR(EBUSY)); 608 } 609 return (0); 610 } 611 612 /* 613 * See the comment above dsl_pool_hold() for details. In summary, a long 614 * hold is used to prevent destruction of a dataset while the pool hold 615 * is dropped, allowing other concurrent operations (e.g. spa_sync()). 616 * 617 * The dataset and pool must be held when this function is called. After it 618 * is called, the pool hold may be released while the dataset is still held 619 * and accessed. 620 */ 621 void 622 dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag) 623 { 624 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 625 (void) refcount_add(&ds->ds_longholds, tag); 626 } 627 628 void 629 dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag) 630 { 631 (void) refcount_remove(&ds->ds_longholds, tag); 632 } 633 634 /* Return B_TRUE if there are any long holds on this dataset. */ 635 boolean_t 636 dsl_dataset_long_held(dsl_dataset_t *ds) 637 { 638 return (!refcount_is_zero(&ds->ds_longholds)); 639 } 640 641 void 642 dsl_dataset_name(dsl_dataset_t *ds, char *name) 643 { 644 if (ds == NULL) { 645 (void) strcpy(name, "mos"); 646 } else { 647 dsl_dir_name(ds->ds_dir, name); 648 VERIFY0(dsl_dataset_get_snapname(ds)); 649 if (ds->ds_snapname[0]) { 650 (void) strcat(name, "@"); 651 /* 652 * We use a "recursive" mutex so that we 653 * can call dprintf_ds() with ds_lock held. 654 */ 655 if (!MUTEX_HELD(&ds->ds_lock)) { 656 mutex_enter(&ds->ds_lock); 657 (void) strcat(name, ds->ds_snapname); 658 mutex_exit(&ds->ds_lock); 659 } else { 660 (void) strcat(name, ds->ds_snapname); 661 } 662 } 663 } 664 } 665 666 void 667 dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 668 { 669 dmu_buf_rele(ds->ds_dbuf, tag); 670 } 671 672 void 673 dsl_dataset_disown(dsl_dataset_t *ds, void *tag) 674 { 675 ASSERT3P(ds->ds_owner, ==, tag); 676 ASSERT(ds->ds_dbuf != NULL); 677 678 mutex_enter(&ds->ds_lock); 679 ds->ds_owner = NULL; 680 mutex_exit(&ds->ds_lock); 681 dsl_dataset_long_rele(ds, tag); 682 dsl_dataset_rele(ds, tag); 683 } 684 685 boolean_t 686 dsl_dataset_tryown(dsl_dataset_t *ds, void *tag) 687 { 688 boolean_t gotit = FALSE; 689 690 mutex_enter(&ds->ds_lock); 691 if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) { 692 ds->ds_owner = tag; 693 dsl_dataset_long_hold(ds, tag); 694 gotit = TRUE; 695 } 696 mutex_exit(&ds->ds_lock); 697 return (gotit); 698 } 699 700 static void 701 dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx) 702 { 703 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 704 objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset; 705 uint64_t zero = 0; 706 707 VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET); 708 709 spa_feature_incr(spa, f, tx); 710 dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx); 711 712 VERIFY0(zap_add(mos, dsobj, spa_feature_table[f].fi_guid, 713 sizeof (zero), 1, &zero, tx)); 714 } 715 716 void 717 dsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx) 718 { 719 spa_t *spa = dmu_tx_pool(tx)->dp_spa; 720 objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset; 721 722 VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET); 723 724 VERIFY0(zap_remove(mos, dsobj, spa_feature_table[f].fi_guid, tx)); 725 spa_feature_decr(spa, f, tx); 726 } 727 728 uint64_t 729 dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 730 uint64_t flags, dmu_tx_t *tx) 731 { 732 dsl_pool_t *dp = dd->dd_pool; 733 dmu_buf_t *dbuf; 734 dsl_dataset_phys_t *dsphys; 735 uint64_t dsobj; 736 objset_t *mos = dp->dp_meta_objset; 737 738 if (origin == NULL) 739 origin = dp->dp_origin_snap; 740 741 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 742 ASSERT(origin == NULL || dsl_dataset_phys(origin)->ds_num_children > 0); 743 ASSERT(dmu_tx_is_syncing(tx)); 744 ASSERT(dsl_dir_phys(dd)->dd_head_dataset_obj == 0); 745 746 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 747 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 748 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 749 dmu_buf_will_dirty(dbuf, tx); 750 dsphys = dbuf->db_data; 751 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 752 dsphys->ds_dir_obj = dd->dd_object; 753 dsphys->ds_flags = flags; 754 dsphys->ds_fsid_guid = unique_create(); 755 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 756 sizeof (dsphys->ds_guid)); 757 dsphys->ds_snapnames_zapobj = 758 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 759 DMU_OT_NONE, 0, tx); 760 dsphys->ds_creation_time = gethrestime_sec(); 761 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 762 763 if (origin == NULL) { 764 dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx); 765 } else { 766 dsl_dataset_t *ohds; /* head of the origin snapshot */ 767 768 dsphys->ds_prev_snap_obj = origin->ds_object; 769 dsphys->ds_prev_snap_txg = 770 dsl_dataset_phys(origin)->ds_creation_txg; 771 dsphys->ds_referenced_bytes = 772 dsl_dataset_phys(origin)->ds_referenced_bytes; 773 dsphys->ds_compressed_bytes = 774 dsl_dataset_phys(origin)->ds_compressed_bytes; 775 dsphys->ds_uncompressed_bytes = 776 dsl_dataset_phys(origin)->ds_uncompressed_bytes; 777 dsphys->ds_bp = dsl_dataset_phys(origin)->ds_bp; 778 779 /* 780 * Inherit flags that describe the dataset's contents 781 * (INCONSISTENT) or properties (Case Insensitive). 782 */ 783 dsphys->ds_flags |= dsl_dataset_phys(origin)->ds_flags & 784 (DS_FLAG_INCONSISTENT | DS_FLAG_CI_DATASET); 785 786 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 787 if (origin->ds_feature_inuse[f]) 788 dsl_dataset_activate_feature(dsobj, f, tx); 789 } 790 791 dmu_buf_will_dirty(origin->ds_dbuf, tx); 792 dsl_dataset_phys(origin)->ds_num_children++; 793 794 VERIFY0(dsl_dataset_hold_obj(dp, 795 dsl_dir_phys(origin->ds_dir)->dd_head_dataset_obj, 796 FTAG, &ohds)); 797 dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist, 798 dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx); 799 dsl_dataset_rele(ohds, FTAG); 800 801 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 802 if (dsl_dataset_phys(origin)->ds_next_clones_obj == 0) { 803 dsl_dataset_phys(origin)->ds_next_clones_obj = 804 zap_create(mos, 805 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 806 } 807 VERIFY0(zap_add_int(mos, 808 dsl_dataset_phys(origin)->ds_next_clones_obj, 809 dsobj, tx)); 810 } 811 812 dmu_buf_will_dirty(dd->dd_dbuf, tx); 813 dsl_dir_phys(dd)->dd_origin_obj = origin->ds_object; 814 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 815 if (dsl_dir_phys(origin->ds_dir)->dd_clones == 0) { 816 dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx); 817 dsl_dir_phys(origin->ds_dir)->dd_clones = 818 zap_create(mos, 819 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx); 820 } 821 VERIFY0(zap_add_int(mos, 822 dsl_dir_phys(origin->ds_dir)->dd_clones, 823 dsobj, tx)); 824 } 825 } 826 827 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 828 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 829 830 dmu_buf_rele(dbuf, FTAG); 831 832 dmu_buf_will_dirty(dd->dd_dbuf, tx); 833 dsl_dir_phys(dd)->dd_head_dataset_obj = dsobj; 834 835 return (dsobj); 836 } 837 838 static void 839 dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx) 840 { 841 objset_t *os; 842 843 VERIFY0(dmu_objset_from_ds(ds, &os)); 844 bzero(&os->os_zil_header, sizeof (os->os_zil_header)); 845 dsl_dataset_dirty(ds, tx); 846 } 847 848 uint64_t 849 dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 850 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 851 { 852 dsl_pool_t *dp = pdd->dd_pool; 853 uint64_t dsobj, ddobj; 854 dsl_dir_t *dd; 855 856 ASSERT(dmu_tx_is_syncing(tx)); 857 ASSERT(lastname[0] != '@'); 858 859 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 860 VERIFY0(dsl_dir_hold_obj(dp, ddobj, lastname, FTAG, &dd)); 861 862 dsobj = dsl_dataset_create_sync_dd(dd, origin, 863 flags & ~DS_CREATE_FLAG_NODIRTY, tx); 864 865 dsl_deleg_set_create_perms(dd, tx, cr); 866 867 /* 868 * Since we're creating a new node we know it's a leaf, so we can 869 * initialize the counts if the limit feature is active. 870 */ 871 if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) { 872 uint64_t cnt = 0; 873 objset_t *os = dd->dd_pool->dp_meta_objset; 874 875 dsl_dir_zapify(dd, tx); 876 VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT, 877 sizeof (cnt), 1, &cnt, tx)); 878 VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT, 879 sizeof (cnt), 1, &cnt, tx)); 880 } 881 882 dsl_dir_rele(dd, FTAG); 883 884 /* 885 * If we are creating a clone, make sure we zero out any stale 886 * data from the origin snapshots zil header. 887 */ 888 if (origin != NULL && !(flags & DS_CREATE_FLAG_NODIRTY)) { 889 dsl_dataset_t *ds; 890 891 VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds)); 892 dsl_dataset_zero_zil(ds, tx); 893 dsl_dataset_rele(ds, FTAG); 894 } 895 896 return (dsobj); 897 } 898 899 /* 900 * The unique space in the head dataset can be calculated by subtracting 901 * the space used in the most recent snapshot, that is still being used 902 * in this file system, from the space currently in use. To figure out 903 * the space in the most recent snapshot still in use, we need to take 904 * the total space used in the snapshot and subtract out the space that 905 * has been freed up since the snapshot was taken. 906 */ 907 void 908 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 909 { 910 uint64_t mrs_used; 911 uint64_t dlused, dlcomp, dluncomp; 912 913 ASSERT(!ds->ds_is_snapshot); 914 915 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) 916 mrs_used = dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes; 917 else 918 mrs_used = 0; 919 920 dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp); 921 922 ASSERT3U(dlused, <=, mrs_used); 923 dsl_dataset_phys(ds)->ds_unique_bytes = 924 dsl_dataset_phys(ds)->ds_referenced_bytes - (mrs_used - dlused); 925 926 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 927 SPA_VERSION_UNIQUE_ACCURATE) 928 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 929 } 930 931 void 932 dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, 933 dmu_tx_t *tx) 934 { 935 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 936 uint64_t count; 937 int err; 938 939 ASSERT(dsl_dataset_phys(ds)->ds_num_children >= 2); 940 err = zap_remove_int(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, 941 obj, tx); 942 /* 943 * The err should not be ENOENT, but a bug in a previous version 944 * of the code could cause upgrade_clones_cb() to not set 945 * ds_next_snap_obj when it should, leading to a missing entry. 946 * If we knew that the pool was created after 947 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't 948 * ENOENT. However, at least we can check that we don't have 949 * too many entries in the next_clones_obj even after failing to 950 * remove this one. 951 */ 952 if (err != ENOENT) 953 VERIFY0(err); 954 ASSERT0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, 955 &count)); 956 ASSERT3U(count, <=, dsl_dataset_phys(ds)->ds_num_children - 2); 957 } 958 959 960 blkptr_t * 961 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 962 { 963 return (&dsl_dataset_phys(ds)->ds_bp); 964 } 965 966 void 967 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 968 { 969 ASSERT(dmu_tx_is_syncing(tx)); 970 /* If it's the meta-objset, set dp_meta_rootbp */ 971 if (ds == NULL) { 972 tx->tx_pool->dp_meta_rootbp = *bp; 973 } else { 974 dmu_buf_will_dirty(ds->ds_dbuf, tx); 975 dsl_dataset_phys(ds)->ds_bp = *bp; 976 } 977 } 978 979 spa_t * 980 dsl_dataset_get_spa(dsl_dataset_t *ds) 981 { 982 return (ds->ds_dir->dd_pool->dp_spa); 983 } 984 985 void 986 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 987 { 988 dsl_pool_t *dp; 989 990 if (ds == NULL) /* this is the meta-objset */ 991 return; 992 993 ASSERT(ds->ds_objset != NULL); 994 995 if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) 996 panic("dirtying snapshot!"); 997 998 dp = ds->ds_dir->dd_pool; 999 1000 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) { 1001 /* up the hold count until we can be written out */ 1002 dmu_buf_add_ref(ds->ds_dbuf, ds); 1003 } 1004 } 1005 1006 boolean_t 1007 dsl_dataset_is_dirty(dsl_dataset_t *ds) 1008 { 1009 for (int t = 0; t < TXG_SIZE; t++) { 1010 if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets, 1011 ds, t)) 1012 return (B_TRUE); 1013 } 1014 return (B_FALSE); 1015 } 1016 1017 static int 1018 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1019 { 1020 uint64_t asize; 1021 1022 if (!dmu_tx_is_syncing(tx)) 1023 return (0); 1024 1025 /* 1026 * If there's an fs-only reservation, any blocks that might become 1027 * owned by the snapshot dataset must be accommodated by space 1028 * outside of the reservation. 1029 */ 1030 ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds)); 1031 asize = MIN(dsl_dataset_phys(ds)->ds_unique_bytes, ds->ds_reserved); 1032 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 1033 return (SET_ERROR(ENOSPC)); 1034 1035 /* 1036 * Propagate any reserved space for this snapshot to other 1037 * snapshot checks in this sync group. 1038 */ 1039 if (asize > 0) 1040 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1041 1042 return (0); 1043 } 1044 1045 typedef struct dsl_dataset_snapshot_arg { 1046 nvlist_t *ddsa_snaps; 1047 nvlist_t *ddsa_props; 1048 nvlist_t *ddsa_errors; 1049 cred_t *ddsa_cr; 1050 } dsl_dataset_snapshot_arg_t; 1051 1052 int 1053 dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname, 1054 dmu_tx_t *tx, boolean_t recv, uint64_t cnt, cred_t *cr) 1055 { 1056 int error; 1057 uint64_t value; 1058 1059 ds->ds_trysnap_txg = tx->tx_txg; 1060 1061 if (!dmu_tx_is_syncing(tx)) 1062 return (0); 1063 1064 /* 1065 * We don't allow multiple snapshots of the same txg. If there 1066 * is already one, try again. 1067 */ 1068 if (dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg) 1069 return (SET_ERROR(EAGAIN)); 1070 1071 /* 1072 * Check for conflicting snapshot name. 1073 */ 1074 error = dsl_dataset_snap_lookup(ds, snapname, &value); 1075 if (error == 0) 1076 return (SET_ERROR(EEXIST)); 1077 if (error != ENOENT) 1078 return (error); 1079 1080 /* 1081 * We don't allow taking snapshots of inconsistent datasets, such as 1082 * those into which we are currently receiving. However, if we are 1083 * creating this snapshot as part of a receive, this check will be 1084 * executed atomically with respect to the completion of the receive 1085 * itself but prior to the clearing of DS_FLAG_INCONSISTENT; in this 1086 * case we ignore this, knowing it will be fixed up for us shortly in 1087 * dmu_recv_end_sync(). 1088 */ 1089 if (!recv && DS_IS_INCONSISTENT(ds)) 1090 return (SET_ERROR(EBUSY)); 1091 1092 /* 1093 * Skip the check for temporary snapshots or if we have already checked 1094 * the counts in dsl_dataset_snapshot_check. This means we really only 1095 * check the count here when we're receiving a stream. 1096 */ 1097 if (cnt != 0 && cr != NULL) { 1098 error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, 1099 ZFS_PROP_SNAPSHOT_LIMIT, NULL, cr); 1100 if (error != 0) 1101 return (error); 1102 } 1103 1104 error = dsl_dataset_snapshot_reserve_space(ds, tx); 1105 if (error != 0) 1106 return (error); 1107 1108 return (0); 1109 } 1110 1111 static int 1112 dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx) 1113 { 1114 dsl_dataset_snapshot_arg_t *ddsa = arg; 1115 dsl_pool_t *dp = dmu_tx_pool(tx); 1116 nvpair_t *pair; 1117 int rv = 0; 1118 1119 /* 1120 * Pre-compute how many total new snapshots will be created for each 1121 * level in the tree and below. This is needed for validating the 1122 * snapshot limit when either taking a recursive snapshot or when 1123 * taking multiple snapshots. 1124 * 1125 * The problem is that the counts are not actually adjusted when 1126 * we are checking, only when we finally sync. For a single snapshot, 1127 * this is easy, the count will increase by 1 at each node up the tree, 1128 * but its more complicated for the recursive/multiple snapshot case. 1129 * 1130 * The dsl_fs_ss_limit_check function does recursively check the count 1131 * at each level up the tree but since it is validating each snapshot 1132 * independently we need to be sure that we are validating the complete 1133 * count for the entire set of snapshots. We do this by rolling up the 1134 * counts for each component of the name into an nvlist and then 1135 * checking each of those cases with the aggregated count. 1136 * 1137 * This approach properly handles not only the recursive snapshot 1138 * case (where we get all of those on the ddsa_snaps list) but also 1139 * the sibling case (e.g. snapshot a/b and a/c so that we will also 1140 * validate the limit on 'a' using a count of 2). 1141 * 1142 * We validate the snapshot names in the third loop and only report 1143 * name errors once. 1144 */ 1145 if (dmu_tx_is_syncing(tx)) { 1146 nvlist_t *cnt_track = NULL; 1147 cnt_track = fnvlist_alloc(); 1148 1149 /* Rollup aggregated counts into the cnt_track list */ 1150 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1151 pair != NULL; 1152 pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1153 char *pdelim; 1154 uint64_t val; 1155 char nm[MAXPATHLEN]; 1156 1157 (void) strlcpy(nm, nvpair_name(pair), sizeof (nm)); 1158 pdelim = strchr(nm, '@'); 1159 if (pdelim == NULL) 1160 continue; 1161 *pdelim = '\0'; 1162 1163 do { 1164 if (nvlist_lookup_uint64(cnt_track, nm, 1165 &val) == 0) { 1166 /* update existing entry */ 1167 fnvlist_add_uint64(cnt_track, nm, 1168 val + 1); 1169 } else { 1170 /* add to list */ 1171 fnvlist_add_uint64(cnt_track, nm, 1); 1172 } 1173 1174 pdelim = strrchr(nm, '/'); 1175 if (pdelim != NULL) 1176 *pdelim = '\0'; 1177 } while (pdelim != NULL); 1178 } 1179 1180 /* Check aggregated counts at each level */ 1181 for (pair = nvlist_next_nvpair(cnt_track, NULL); 1182 pair != NULL; pair = nvlist_next_nvpair(cnt_track, pair)) { 1183 int error = 0; 1184 char *name; 1185 uint64_t cnt = 0; 1186 dsl_dataset_t *ds; 1187 1188 name = nvpair_name(pair); 1189 cnt = fnvpair_value_uint64(pair); 1190 ASSERT(cnt > 0); 1191 1192 error = dsl_dataset_hold(dp, name, FTAG, &ds); 1193 if (error == 0) { 1194 error = dsl_fs_ss_limit_check(ds->ds_dir, cnt, 1195 ZFS_PROP_SNAPSHOT_LIMIT, NULL, 1196 ddsa->ddsa_cr); 1197 dsl_dataset_rele(ds, FTAG); 1198 } 1199 1200 if (error != 0) { 1201 if (ddsa->ddsa_errors != NULL) 1202 fnvlist_add_int32(ddsa->ddsa_errors, 1203 name, error); 1204 rv = error; 1205 /* only report one error for this check */ 1206 break; 1207 } 1208 } 1209 nvlist_free(cnt_track); 1210 } 1211 1212 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1213 pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1214 int error = 0; 1215 dsl_dataset_t *ds; 1216 char *name, *atp; 1217 char dsname[MAXNAMELEN]; 1218 1219 name = nvpair_name(pair); 1220 if (strlen(name) >= MAXNAMELEN) 1221 error = SET_ERROR(ENAMETOOLONG); 1222 if (error == 0) { 1223 atp = strchr(name, '@'); 1224 if (atp == NULL) 1225 error = SET_ERROR(EINVAL); 1226 if (error == 0) 1227 (void) strlcpy(dsname, name, atp - name + 1); 1228 } 1229 if (error == 0) 1230 error = dsl_dataset_hold(dp, dsname, FTAG, &ds); 1231 if (error == 0) { 1232 /* passing 0/NULL skips dsl_fs_ss_limit_check */ 1233 error = dsl_dataset_snapshot_check_impl(ds, 1234 atp + 1, tx, B_FALSE, 0, NULL); 1235 dsl_dataset_rele(ds, FTAG); 1236 } 1237 1238 if (error != 0) { 1239 if (ddsa->ddsa_errors != NULL) { 1240 fnvlist_add_int32(ddsa->ddsa_errors, 1241 name, error); 1242 } 1243 rv = error; 1244 } 1245 } 1246 1247 return (rv); 1248 } 1249 1250 void 1251 dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname, 1252 dmu_tx_t *tx) 1253 { 1254 static zil_header_t zero_zil; 1255 1256 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1257 dmu_buf_t *dbuf; 1258 dsl_dataset_phys_t *dsphys; 1259 uint64_t dsobj, crtxg; 1260 objset_t *mos = dp->dp_meta_objset; 1261 objset_t *os; 1262 1263 ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); 1264 1265 /* 1266 * If we are on an old pool, the zil must not be active, in which 1267 * case it will be zeroed. Usually zil_suspend() accomplishes this. 1268 */ 1269 ASSERT(spa_version(dmu_tx_pool(tx)->dp_spa) >= SPA_VERSION_FAST_SNAP || 1270 dmu_objset_from_ds(ds, &os) != 0 || 1271 bcmp(&os->os_phys->os_zil_header, &zero_zil, 1272 sizeof (zero_zil)) == 0); 1273 1274 dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx); 1275 1276 /* 1277 * The origin's ds_creation_txg has to be < TXG_INITIAL 1278 */ 1279 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1280 crtxg = 1; 1281 else 1282 crtxg = tx->tx_txg; 1283 1284 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1285 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1286 VERIFY0(dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1287 dmu_buf_will_dirty(dbuf, tx); 1288 dsphys = dbuf->db_data; 1289 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 1290 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1291 dsphys->ds_fsid_guid = unique_create(); 1292 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1293 sizeof (dsphys->ds_guid)); 1294 dsphys->ds_prev_snap_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; 1295 dsphys->ds_prev_snap_txg = dsl_dataset_phys(ds)->ds_prev_snap_txg; 1296 dsphys->ds_next_snap_obj = ds->ds_object; 1297 dsphys->ds_num_children = 1; 1298 dsphys->ds_creation_time = gethrestime_sec(); 1299 dsphys->ds_creation_txg = crtxg; 1300 dsphys->ds_deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj; 1301 dsphys->ds_referenced_bytes = dsl_dataset_phys(ds)->ds_referenced_bytes; 1302 dsphys->ds_compressed_bytes = dsl_dataset_phys(ds)->ds_compressed_bytes; 1303 dsphys->ds_uncompressed_bytes = 1304 dsl_dataset_phys(ds)->ds_uncompressed_bytes; 1305 dsphys->ds_flags = dsl_dataset_phys(ds)->ds_flags; 1306 dsphys->ds_bp = dsl_dataset_phys(ds)->ds_bp; 1307 dmu_buf_rele(dbuf, FTAG); 1308 1309 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 1310 if (ds->ds_feature_inuse[f]) 1311 dsl_dataset_activate_feature(dsobj, f, tx); 1312 } 1313 1314 ASSERT3U(ds->ds_prev != 0, ==, 1315 dsl_dataset_phys(ds)->ds_prev_snap_obj != 0); 1316 if (ds->ds_prev) { 1317 uint64_t next_clones_obj = 1318 dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj; 1319 ASSERT(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == 1320 ds->ds_object || 1321 dsl_dataset_phys(ds->ds_prev)->ds_num_children > 1); 1322 if (dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == 1323 ds->ds_object) { 1324 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1325 ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==, 1326 dsl_dataset_phys(ds->ds_prev)->ds_creation_txg); 1327 dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj = dsobj; 1328 } else if (next_clones_obj != 0) { 1329 dsl_dataset_remove_from_next_clones(ds->ds_prev, 1330 dsphys->ds_next_snap_obj, tx); 1331 VERIFY0(zap_add_int(mos, 1332 next_clones_obj, dsobj, tx)); 1333 } 1334 } 1335 1336 /* 1337 * If we have a reference-reservation on this dataset, we will 1338 * need to increase the amount of refreservation being charged 1339 * since our unique space is going to zero. 1340 */ 1341 if (ds->ds_reserved) { 1342 int64_t delta; 1343 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 1344 delta = MIN(dsl_dataset_phys(ds)->ds_unique_bytes, 1345 ds->ds_reserved); 1346 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1347 delta, 0, 0, tx); 1348 } 1349 1350 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1351 dsl_dataset_phys(ds)->ds_deadlist_obj = 1352 dsl_deadlist_clone(&ds->ds_deadlist, UINT64_MAX, 1353 dsl_dataset_phys(ds)->ds_prev_snap_obj, tx); 1354 dsl_deadlist_close(&ds->ds_deadlist); 1355 dsl_deadlist_open(&ds->ds_deadlist, mos, 1356 dsl_dataset_phys(ds)->ds_deadlist_obj); 1357 dsl_deadlist_add_key(&ds->ds_deadlist, 1358 dsl_dataset_phys(ds)->ds_prev_snap_txg, tx); 1359 1360 ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, <, tx->tx_txg); 1361 dsl_dataset_phys(ds)->ds_prev_snap_obj = dsobj; 1362 dsl_dataset_phys(ds)->ds_prev_snap_txg = crtxg; 1363 dsl_dataset_phys(ds)->ds_unique_bytes = 0; 1364 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1365 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1366 1367 VERIFY0(zap_add(mos, dsl_dataset_phys(ds)->ds_snapnames_zapobj, 1368 snapname, 8, 1, &dsobj, tx)); 1369 1370 if (ds->ds_prev) 1371 dsl_dataset_rele(ds->ds_prev, ds); 1372 VERIFY0(dsl_dataset_hold_obj(dp, 1373 dsl_dataset_phys(ds)->ds_prev_snap_obj, ds, &ds->ds_prev)); 1374 1375 dsl_scan_ds_snapshotted(ds, tx); 1376 1377 dsl_dir_snap_cmtime_update(ds->ds_dir); 1378 1379 spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, ""); 1380 } 1381 1382 static void 1383 dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx) 1384 { 1385 dsl_dataset_snapshot_arg_t *ddsa = arg; 1386 dsl_pool_t *dp = dmu_tx_pool(tx); 1387 nvpair_t *pair; 1388 1389 for (pair = nvlist_next_nvpair(ddsa->ddsa_snaps, NULL); 1390 pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) { 1391 dsl_dataset_t *ds; 1392 char *name, *atp; 1393 char dsname[MAXNAMELEN]; 1394 1395 name = nvpair_name(pair); 1396 atp = strchr(name, '@'); 1397 (void) strlcpy(dsname, name, atp - name + 1); 1398 VERIFY0(dsl_dataset_hold(dp, dsname, FTAG, &ds)); 1399 1400 dsl_dataset_snapshot_sync_impl(ds, atp + 1, tx); 1401 if (ddsa->ddsa_props != NULL) { 1402 dsl_props_set_sync_impl(ds->ds_prev, 1403 ZPROP_SRC_LOCAL, ddsa->ddsa_props, tx); 1404 } 1405 dsl_dataset_rele(ds, FTAG); 1406 } 1407 } 1408 1409 /* 1410 * The snapshots must all be in the same pool. 1411 * All-or-nothing: if there are any failures, nothing will be modified. 1412 */ 1413 int 1414 dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors) 1415 { 1416 dsl_dataset_snapshot_arg_t ddsa; 1417 nvpair_t *pair; 1418 boolean_t needsuspend; 1419 int error; 1420 spa_t *spa; 1421 char *firstname; 1422 nvlist_t *suspended = NULL; 1423 1424 pair = nvlist_next_nvpair(snaps, NULL); 1425 if (pair == NULL) 1426 return (0); 1427 firstname = nvpair_name(pair); 1428 1429 error = spa_open(firstname, &spa, FTAG); 1430 if (error != 0) 1431 return (error); 1432 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1433 spa_close(spa, FTAG); 1434 1435 if (needsuspend) { 1436 suspended = fnvlist_alloc(); 1437 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL; 1438 pair = nvlist_next_nvpair(snaps, pair)) { 1439 char fsname[MAXNAMELEN]; 1440 char *snapname = nvpair_name(pair); 1441 char *atp; 1442 void *cookie; 1443 1444 atp = strchr(snapname, '@'); 1445 if (atp == NULL) { 1446 error = SET_ERROR(EINVAL); 1447 break; 1448 } 1449 (void) strlcpy(fsname, snapname, atp - snapname + 1); 1450 1451 error = zil_suspend(fsname, &cookie); 1452 if (error != 0) 1453 break; 1454 fnvlist_add_uint64(suspended, fsname, 1455 (uintptr_t)cookie); 1456 } 1457 } 1458 1459 ddsa.ddsa_snaps = snaps; 1460 ddsa.ddsa_props = props; 1461 ddsa.ddsa_errors = errors; 1462 ddsa.ddsa_cr = CRED(); 1463 1464 if (error == 0) { 1465 error = dsl_sync_task(firstname, dsl_dataset_snapshot_check, 1466 dsl_dataset_snapshot_sync, &ddsa, 1467 fnvlist_num_pairs(snaps) * 3, ZFS_SPACE_CHECK_NORMAL); 1468 } 1469 1470 if (suspended != NULL) { 1471 for (pair = nvlist_next_nvpair(suspended, NULL); pair != NULL; 1472 pair = nvlist_next_nvpair(suspended, pair)) { 1473 zil_resume((void *)(uintptr_t) 1474 fnvpair_value_uint64(pair)); 1475 } 1476 fnvlist_free(suspended); 1477 } 1478 1479 return (error); 1480 } 1481 1482 typedef struct dsl_dataset_snapshot_tmp_arg { 1483 const char *ddsta_fsname; 1484 const char *ddsta_snapname; 1485 minor_t ddsta_cleanup_minor; 1486 const char *ddsta_htag; 1487 } dsl_dataset_snapshot_tmp_arg_t; 1488 1489 static int 1490 dsl_dataset_snapshot_tmp_check(void *arg, dmu_tx_t *tx) 1491 { 1492 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1493 dsl_pool_t *dp = dmu_tx_pool(tx); 1494 dsl_dataset_t *ds; 1495 int error; 1496 1497 error = dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds); 1498 if (error != 0) 1499 return (error); 1500 1501 /* NULL cred means no limit check for tmp snapshot */ 1502 error = dsl_dataset_snapshot_check_impl(ds, ddsta->ddsta_snapname, 1503 tx, B_FALSE, 0, NULL); 1504 if (error != 0) { 1505 dsl_dataset_rele(ds, FTAG); 1506 return (error); 1507 } 1508 1509 if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS) { 1510 dsl_dataset_rele(ds, FTAG); 1511 return (SET_ERROR(ENOTSUP)); 1512 } 1513 error = dsl_dataset_user_hold_check_one(NULL, ddsta->ddsta_htag, 1514 B_TRUE, tx); 1515 if (error != 0) { 1516 dsl_dataset_rele(ds, FTAG); 1517 return (error); 1518 } 1519 1520 dsl_dataset_rele(ds, FTAG); 1521 return (0); 1522 } 1523 1524 static void 1525 dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx) 1526 { 1527 dsl_dataset_snapshot_tmp_arg_t *ddsta = arg; 1528 dsl_pool_t *dp = dmu_tx_pool(tx); 1529 dsl_dataset_t *ds; 1530 1531 VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds)); 1532 1533 dsl_dataset_snapshot_sync_impl(ds, ddsta->ddsta_snapname, tx); 1534 dsl_dataset_user_hold_sync_one(ds->ds_prev, ddsta->ddsta_htag, 1535 ddsta->ddsta_cleanup_minor, gethrestime_sec(), tx); 1536 dsl_destroy_snapshot_sync_impl(ds->ds_prev, B_TRUE, tx); 1537 1538 dsl_dataset_rele(ds, FTAG); 1539 } 1540 1541 int 1542 dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, 1543 minor_t cleanup_minor, const char *htag) 1544 { 1545 dsl_dataset_snapshot_tmp_arg_t ddsta; 1546 int error; 1547 spa_t *spa; 1548 boolean_t needsuspend; 1549 void *cookie; 1550 1551 ddsta.ddsta_fsname = fsname; 1552 ddsta.ddsta_snapname = snapname; 1553 ddsta.ddsta_cleanup_minor = cleanup_minor; 1554 ddsta.ddsta_htag = htag; 1555 1556 error = spa_open(fsname, &spa, FTAG); 1557 if (error != 0) 1558 return (error); 1559 needsuspend = (spa_version(spa) < SPA_VERSION_FAST_SNAP); 1560 spa_close(spa, FTAG); 1561 1562 if (needsuspend) { 1563 error = zil_suspend(fsname, &cookie); 1564 if (error != 0) 1565 return (error); 1566 } 1567 1568 error = dsl_sync_task(fsname, dsl_dataset_snapshot_tmp_check, 1569 dsl_dataset_snapshot_tmp_sync, &ddsta, 3, ZFS_SPACE_CHECK_RESERVED); 1570 1571 if (needsuspend) 1572 zil_resume(cookie); 1573 return (error); 1574 } 1575 1576 1577 void 1578 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1579 { 1580 ASSERT(dmu_tx_is_syncing(tx)); 1581 ASSERT(ds->ds_objset != NULL); 1582 ASSERT(dsl_dataset_phys(ds)->ds_next_snap_obj == 0); 1583 1584 /* 1585 * in case we had to change ds_fsid_guid when we opened it, 1586 * sync it out now. 1587 */ 1588 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1589 dsl_dataset_phys(ds)->ds_fsid_guid = ds->ds_fsid_guid; 1590 1591 dmu_objset_sync(ds->ds_objset, zio, tx); 1592 1593 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 1594 if (ds->ds_feature_activation_needed[f]) { 1595 if (ds->ds_feature_inuse[f]) 1596 continue; 1597 dsl_dataset_activate_feature(ds->ds_object, f, tx); 1598 ds->ds_feature_inuse[f] = B_TRUE; 1599 } 1600 } 1601 } 1602 1603 static void 1604 get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv) 1605 { 1606 uint64_t count = 0; 1607 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1608 zap_cursor_t zc; 1609 zap_attribute_t za; 1610 nvlist_t *propval = fnvlist_alloc(); 1611 nvlist_t *val = fnvlist_alloc(); 1612 1613 ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool)); 1614 1615 /* 1616 * There may be missing entries in ds_next_clones_obj 1617 * due to a bug in a previous version of the code. 1618 * Only trust it if it has the right number of entries. 1619 */ 1620 if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) { 1621 VERIFY0(zap_count(mos, dsl_dataset_phys(ds)->ds_next_clones_obj, 1622 &count)); 1623 } 1624 if (count != dsl_dataset_phys(ds)->ds_num_children - 1) 1625 goto fail; 1626 for (zap_cursor_init(&zc, mos, 1627 dsl_dataset_phys(ds)->ds_next_clones_obj); 1628 zap_cursor_retrieve(&zc, &za) == 0; 1629 zap_cursor_advance(&zc)) { 1630 dsl_dataset_t *clone; 1631 char buf[ZFS_MAXNAMELEN]; 1632 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 1633 za.za_first_integer, FTAG, &clone)); 1634 dsl_dir_name(clone->ds_dir, buf); 1635 fnvlist_add_boolean(val, buf); 1636 dsl_dataset_rele(clone, FTAG); 1637 } 1638 zap_cursor_fini(&zc); 1639 fnvlist_add_nvlist(propval, ZPROP_VALUE, val); 1640 fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES), propval); 1641 fail: 1642 nvlist_free(val); 1643 nvlist_free(propval); 1644 } 1645 1646 void 1647 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1648 { 1649 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1650 uint64_t refd, avail, uobjs, aobjs, ratio; 1651 1652 ASSERT(dsl_pool_config_held(dp)); 1653 1654 ratio = dsl_dataset_phys(ds)->ds_compressed_bytes == 0 ? 100 : 1655 (dsl_dataset_phys(ds)->ds_uncompressed_bytes * 100 / 1656 dsl_dataset_phys(ds)->ds_compressed_bytes); 1657 1658 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio); 1659 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALREFERENCED, 1660 dsl_dataset_phys(ds)->ds_uncompressed_bytes); 1661 1662 if (ds->ds_is_snapshot) { 1663 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio); 1664 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1665 dsl_dataset_phys(ds)->ds_unique_bytes); 1666 get_clones_stat(ds, nv); 1667 } else { 1668 if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) { 1669 char buf[MAXNAMELEN]; 1670 dsl_dataset_name(ds->ds_prev, buf); 1671 dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf); 1672 } 1673 1674 dsl_dir_stats(ds->ds_dir, nv); 1675 } 1676 1677 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1678 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1679 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1680 1681 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1682 dsl_dataset_phys(ds)->ds_creation_time); 1683 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1684 dsl_dataset_phys(ds)->ds_creation_txg); 1685 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1686 ds->ds_quota); 1687 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1688 ds->ds_reserved); 1689 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 1690 dsl_dataset_phys(ds)->ds_guid); 1691 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE, 1692 dsl_dataset_phys(ds)->ds_unique_bytes); 1693 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID, 1694 ds->ds_object); 1695 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, 1696 ds->ds_userrefs); 1697 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 1698 DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 1699 1700 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { 1701 uint64_t written, comp, uncomp; 1702 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1703 dsl_dataset_t *prev; 1704 1705 int err = dsl_dataset_hold_obj(dp, 1706 dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &prev); 1707 if (err == 0) { 1708 err = dsl_dataset_space_written(prev, ds, &written, 1709 &comp, &uncomp); 1710 dsl_dataset_rele(prev, FTAG); 1711 if (err == 0) { 1712 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN, 1713 written); 1714 } 1715 } 1716 } 1717 } 1718 1719 void 1720 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1721 { 1722 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1723 ASSERT(dsl_pool_config_held(dp)); 1724 1725 stat->dds_creation_txg = dsl_dataset_phys(ds)->ds_creation_txg; 1726 stat->dds_inconsistent = 1727 dsl_dataset_phys(ds)->ds_flags & DS_FLAG_INCONSISTENT; 1728 stat->dds_guid = dsl_dataset_phys(ds)->ds_guid; 1729 stat->dds_origin[0] = '\0'; 1730 if (ds->ds_is_snapshot) { 1731 stat->dds_is_snapshot = B_TRUE; 1732 stat->dds_num_clones = 1733 dsl_dataset_phys(ds)->ds_num_children - 1; 1734 } else { 1735 stat->dds_is_snapshot = B_FALSE; 1736 stat->dds_num_clones = 0; 1737 1738 if (dsl_dir_is_clone(ds->ds_dir)) { 1739 dsl_dataset_t *ods; 1740 1741 VERIFY0(dsl_dataset_hold_obj(dp, 1742 dsl_dir_phys(ds->ds_dir)->dd_origin_obj, 1743 FTAG, &ods)); 1744 dsl_dataset_name(ods, stat->dds_origin); 1745 dsl_dataset_rele(ods, FTAG); 1746 } 1747 } 1748 } 1749 1750 uint64_t 1751 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1752 { 1753 return (ds->ds_fsid_guid); 1754 } 1755 1756 void 1757 dsl_dataset_space(dsl_dataset_t *ds, 1758 uint64_t *refdbytesp, uint64_t *availbytesp, 1759 uint64_t *usedobjsp, uint64_t *availobjsp) 1760 { 1761 *refdbytesp = dsl_dataset_phys(ds)->ds_referenced_bytes; 1762 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1763 if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) 1764 *availbytesp += 1765 ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes; 1766 if (ds->ds_quota != 0) { 1767 /* 1768 * Adjust available bytes according to refquota 1769 */ 1770 if (*refdbytesp < ds->ds_quota) 1771 *availbytesp = MIN(*availbytesp, 1772 ds->ds_quota - *refdbytesp); 1773 else 1774 *availbytesp = 0; 1775 } 1776 *usedobjsp = BP_GET_FILL(&dsl_dataset_phys(ds)->ds_bp); 1777 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 1778 } 1779 1780 boolean_t 1781 dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap) 1782 { 1783 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1784 1785 ASSERT(dsl_pool_config_held(dp)); 1786 if (snap == NULL) 1787 return (B_FALSE); 1788 if (dsl_dataset_phys(ds)->ds_bp.blk_birth > 1789 dsl_dataset_phys(snap)->ds_creation_txg) { 1790 objset_t *os, *os_snap; 1791 /* 1792 * It may be that only the ZIL differs, because it was 1793 * reset in the head. Don't count that as being 1794 * modified. 1795 */ 1796 if (dmu_objset_from_ds(ds, &os) != 0) 1797 return (B_TRUE); 1798 if (dmu_objset_from_ds(snap, &os_snap) != 0) 1799 return (B_TRUE); 1800 return (bcmp(&os->os_phys->os_meta_dnode, 1801 &os_snap->os_phys->os_meta_dnode, 1802 sizeof (os->os_phys->os_meta_dnode)) != 0); 1803 } 1804 return (B_FALSE); 1805 } 1806 1807 typedef struct dsl_dataset_rename_snapshot_arg { 1808 const char *ddrsa_fsname; 1809 const char *ddrsa_oldsnapname; 1810 const char *ddrsa_newsnapname; 1811 boolean_t ddrsa_recursive; 1812 dmu_tx_t *ddrsa_tx; 1813 } dsl_dataset_rename_snapshot_arg_t; 1814 1815 /* ARGSUSED */ 1816 static int 1817 dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp, 1818 dsl_dataset_t *hds, void *arg) 1819 { 1820 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1821 int error; 1822 uint64_t val; 1823 1824 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1825 if (error != 0) { 1826 /* ignore nonexistent snapshots */ 1827 return (error == ENOENT ? 0 : error); 1828 } 1829 1830 /* new name should not exist */ 1831 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_newsnapname, &val); 1832 if (error == 0) 1833 error = SET_ERROR(EEXIST); 1834 else if (error == ENOENT) 1835 error = 0; 1836 1837 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 1838 if (dsl_dir_namelen(hds->ds_dir) + 1 + 1839 strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN) 1840 error = SET_ERROR(ENAMETOOLONG); 1841 1842 return (error); 1843 } 1844 1845 static int 1846 dsl_dataset_rename_snapshot_check(void *arg, dmu_tx_t *tx) 1847 { 1848 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1849 dsl_pool_t *dp = dmu_tx_pool(tx); 1850 dsl_dataset_t *hds; 1851 int error; 1852 1853 error = dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds); 1854 if (error != 0) 1855 return (error); 1856 1857 if (ddrsa->ddrsa_recursive) { 1858 error = dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1859 dsl_dataset_rename_snapshot_check_impl, ddrsa, 1860 DS_FIND_CHILDREN); 1861 } else { 1862 error = dsl_dataset_rename_snapshot_check_impl(dp, hds, ddrsa); 1863 } 1864 dsl_dataset_rele(hds, FTAG); 1865 return (error); 1866 } 1867 1868 static int 1869 dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp, 1870 dsl_dataset_t *hds, void *arg) 1871 { 1872 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1873 dsl_dataset_t *ds; 1874 uint64_t val; 1875 dmu_tx_t *tx = ddrsa->ddrsa_tx; 1876 int error; 1877 1878 error = dsl_dataset_snap_lookup(hds, ddrsa->ddrsa_oldsnapname, &val); 1879 ASSERT(error == 0 || error == ENOENT); 1880 if (error == ENOENT) { 1881 /* ignore nonexistent snapshots */ 1882 return (0); 1883 } 1884 1885 VERIFY0(dsl_dataset_hold_obj(dp, val, FTAG, &ds)); 1886 1887 /* log before we change the name */ 1888 spa_history_log_internal_ds(ds, "rename", tx, 1889 "-> @%s", ddrsa->ddrsa_newsnapname); 1890 1891 VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx, 1892 B_FALSE)); 1893 mutex_enter(&ds->ds_lock); 1894 (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname); 1895 mutex_exit(&ds->ds_lock); 1896 VERIFY0(zap_add(dp->dp_meta_objset, 1897 dsl_dataset_phys(hds)->ds_snapnames_zapobj, 1898 ds->ds_snapname, 8, 1, &ds->ds_object, tx)); 1899 1900 dsl_dataset_rele(ds, FTAG); 1901 return (0); 1902 } 1903 1904 static void 1905 dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx) 1906 { 1907 dsl_dataset_rename_snapshot_arg_t *ddrsa = arg; 1908 dsl_pool_t *dp = dmu_tx_pool(tx); 1909 dsl_dataset_t *hds; 1910 1911 VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds)); 1912 ddrsa->ddrsa_tx = tx; 1913 if (ddrsa->ddrsa_recursive) { 1914 VERIFY0(dmu_objset_find_dp(dp, hds->ds_dir->dd_object, 1915 dsl_dataset_rename_snapshot_sync_impl, ddrsa, 1916 DS_FIND_CHILDREN)); 1917 } else { 1918 VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp, hds, ddrsa)); 1919 } 1920 dsl_dataset_rele(hds, FTAG); 1921 } 1922 1923 int 1924 dsl_dataset_rename_snapshot(const char *fsname, 1925 const char *oldsnapname, const char *newsnapname, boolean_t recursive) 1926 { 1927 dsl_dataset_rename_snapshot_arg_t ddrsa; 1928 1929 ddrsa.ddrsa_fsname = fsname; 1930 ddrsa.ddrsa_oldsnapname = oldsnapname; 1931 ddrsa.ddrsa_newsnapname = newsnapname; 1932 ddrsa.ddrsa_recursive = recursive; 1933 1934 return (dsl_sync_task(fsname, dsl_dataset_rename_snapshot_check, 1935 dsl_dataset_rename_snapshot_sync, &ddrsa, 1936 1, ZFS_SPACE_CHECK_RESERVED)); 1937 } 1938 1939 /* 1940 * If we're doing an ownership handoff, we need to make sure that there is 1941 * only one long hold on the dataset. We're not allowed to change anything here 1942 * so we don't permanently release the long hold or regular hold here. We want 1943 * to do this only when syncing to avoid the dataset unexpectedly going away 1944 * when we release the long hold. 1945 */ 1946 static int 1947 dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx) 1948 { 1949 boolean_t held; 1950 1951 if (!dmu_tx_is_syncing(tx)) 1952 return (0); 1953 1954 if (owner != NULL) { 1955 VERIFY3P(ds->ds_owner, ==, owner); 1956 dsl_dataset_long_rele(ds, owner); 1957 } 1958 1959 held = dsl_dataset_long_held(ds); 1960 1961 if (owner != NULL) 1962 dsl_dataset_long_hold(ds, owner); 1963 1964 if (held) 1965 return (SET_ERROR(EBUSY)); 1966 1967 return (0); 1968 } 1969 1970 typedef struct dsl_dataset_rollback_arg { 1971 const char *ddra_fsname; 1972 void *ddra_owner; 1973 nvlist_t *ddra_result; 1974 } dsl_dataset_rollback_arg_t; 1975 1976 static int 1977 dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx) 1978 { 1979 dsl_dataset_rollback_arg_t *ddra = arg; 1980 dsl_pool_t *dp = dmu_tx_pool(tx); 1981 dsl_dataset_t *ds; 1982 int64_t unused_refres_delta; 1983 int error; 1984 1985 error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds); 1986 if (error != 0) 1987 return (error); 1988 1989 /* must not be a snapshot */ 1990 if (ds->ds_is_snapshot) { 1991 dsl_dataset_rele(ds, FTAG); 1992 return (SET_ERROR(EINVAL)); 1993 } 1994 1995 /* must have a most recent snapshot */ 1996 if (dsl_dataset_phys(ds)->ds_prev_snap_txg < TXG_INITIAL) { 1997 dsl_dataset_rele(ds, FTAG); 1998 return (SET_ERROR(EINVAL)); 1999 } 2000 2001 /* must not have any bookmarks after the most recent snapshot */ 2002 nvlist_t *proprequest = fnvlist_alloc(); 2003 fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG)); 2004 nvlist_t *bookmarks = fnvlist_alloc(); 2005 error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks); 2006 fnvlist_free(proprequest); 2007 if (error != 0) 2008 return (error); 2009 for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL); 2010 pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) { 2011 nvlist_t *valuenv = 2012 fnvlist_lookup_nvlist(fnvpair_value_nvlist(pair), 2013 zfs_prop_to_name(ZFS_PROP_CREATETXG)); 2014 uint64_t createtxg = fnvlist_lookup_uint64(valuenv, "value"); 2015 if (createtxg > dsl_dataset_phys(ds)->ds_prev_snap_txg) { 2016 fnvlist_free(bookmarks); 2017 dsl_dataset_rele(ds, FTAG); 2018 return (SET_ERROR(EEXIST)); 2019 } 2020 } 2021 fnvlist_free(bookmarks); 2022 2023 error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx); 2024 if (error != 0) { 2025 dsl_dataset_rele(ds, FTAG); 2026 return (error); 2027 } 2028 2029 /* 2030 * Check if the snap we are rolling back to uses more than 2031 * the refquota. 2032 */ 2033 if (ds->ds_quota != 0 && 2034 dsl_dataset_phys(ds->ds_prev)->ds_referenced_bytes > ds->ds_quota) { 2035 dsl_dataset_rele(ds, FTAG); 2036 return (SET_ERROR(EDQUOT)); 2037 } 2038 2039 /* 2040 * When we do the clone swap, we will temporarily use more space 2041 * due to the refreservation (the head will no longer have any 2042 * unique space, so the entire amount of the refreservation will need 2043 * to be free). We will immediately destroy the clone, freeing 2044 * this space, but the freeing happens over many txg's. 2045 */ 2046 unused_refres_delta = (int64_t)MIN(ds->ds_reserved, 2047 dsl_dataset_phys(ds)->ds_unique_bytes); 2048 2049 if (unused_refres_delta > 0 && 2050 unused_refres_delta > 2051 dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) { 2052 dsl_dataset_rele(ds, FTAG); 2053 return (SET_ERROR(ENOSPC)); 2054 } 2055 2056 dsl_dataset_rele(ds, FTAG); 2057 return (0); 2058 } 2059 2060 static void 2061 dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx) 2062 { 2063 dsl_dataset_rollback_arg_t *ddra = arg; 2064 dsl_pool_t *dp = dmu_tx_pool(tx); 2065 dsl_dataset_t *ds, *clone; 2066 uint64_t cloneobj; 2067 char namebuf[ZFS_MAXNAMELEN]; 2068 2069 VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds)); 2070 2071 dsl_dataset_name(ds->ds_prev, namebuf); 2072 fnvlist_add_string(ddra->ddra_result, "target", namebuf); 2073 2074 cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback", 2075 ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx); 2076 2077 VERIFY0(dsl_dataset_hold_obj(dp, cloneobj, FTAG, &clone)); 2078 2079 dsl_dataset_clone_swap_sync_impl(clone, ds, tx); 2080 dsl_dataset_zero_zil(ds, tx); 2081 2082 dsl_destroy_head_sync_impl(clone, tx); 2083 2084 dsl_dataset_rele(clone, FTAG); 2085 dsl_dataset_rele(ds, FTAG); 2086 } 2087 2088 /* 2089 * Rolls back the given filesystem or volume to the most recent snapshot. 2090 * The name of the most recent snapshot will be returned under key "target" 2091 * in the result nvlist. 2092 * 2093 * If owner != NULL: 2094 * - The existing dataset MUST be owned by the specified owner at entry 2095 * - Upon return, dataset will still be held by the same owner, whether we 2096 * succeed or not. 2097 * 2098 * This mode is required any time the existing filesystem is mounted. See 2099 * notes above zfs_suspend_fs() for further details. 2100 */ 2101 int 2102 dsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result) 2103 { 2104 dsl_dataset_rollback_arg_t ddra; 2105 2106 ddra.ddra_fsname = fsname; 2107 ddra.ddra_owner = owner; 2108 ddra.ddra_result = result; 2109 2110 return (dsl_sync_task(fsname, dsl_dataset_rollback_check, 2111 dsl_dataset_rollback_sync, &ddra, 2112 1, ZFS_SPACE_CHECK_RESERVED)); 2113 } 2114 2115 struct promotenode { 2116 list_node_t link; 2117 dsl_dataset_t *ds; 2118 }; 2119 2120 typedef struct dsl_dataset_promote_arg { 2121 const char *ddpa_clonename; 2122 dsl_dataset_t *ddpa_clone; 2123 list_t shared_snaps, origin_snaps, clone_snaps; 2124 dsl_dataset_t *origin_origin; /* origin of the origin */ 2125 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 2126 char *err_ds; 2127 cred_t *cr; 2128 } dsl_dataset_promote_arg_t; 2129 2130 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2131 static int promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, 2132 void *tag); 2133 static void promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag); 2134 2135 static int 2136 dsl_dataset_promote_check(void *arg, dmu_tx_t *tx) 2137 { 2138 dsl_dataset_promote_arg_t *ddpa = arg; 2139 dsl_pool_t *dp = dmu_tx_pool(tx); 2140 dsl_dataset_t *hds; 2141 struct promotenode *snap; 2142 dsl_dataset_t *origin_ds; 2143 int err; 2144 uint64_t unused; 2145 uint64_t ss_mv_cnt; 2146 size_t max_snap_len; 2147 2148 err = promote_hold(ddpa, dp, FTAG); 2149 if (err != 0) 2150 return (err); 2151 2152 hds = ddpa->ddpa_clone; 2153 max_snap_len = MAXNAMELEN - strlen(ddpa->ddpa_clonename) - 1; 2154 2155 if (dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE) { 2156 promote_rele(ddpa, FTAG); 2157 return (SET_ERROR(EXDEV)); 2158 } 2159 2160 /* 2161 * Compute and check the amount of space to transfer. Since this is 2162 * so expensive, don't do the preliminary check. 2163 */ 2164 if (!dmu_tx_is_syncing(tx)) { 2165 promote_rele(ddpa, FTAG); 2166 return (0); 2167 } 2168 2169 snap = list_head(&ddpa->shared_snaps); 2170 origin_ds = snap->ds; 2171 2172 /* compute origin's new unique space */ 2173 snap = list_tail(&ddpa->clone_snaps); 2174 ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==, 2175 origin_ds->ds_object); 2176 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2177 dsl_dataset_phys(origin_ds)->ds_prev_snap_txg, UINT64_MAX, 2178 &ddpa->unique, &unused, &unused); 2179 2180 /* 2181 * Walk the snapshots that we are moving 2182 * 2183 * Compute space to transfer. Consider the incremental changes 2184 * to used by each snapshot: 2185 * (my used) = (prev's used) + (blocks born) - (blocks killed) 2186 * So each snapshot gave birth to: 2187 * (blocks born) = (my used) - (prev's used) + (blocks killed) 2188 * So a sequence would look like: 2189 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2190 * Which simplifies to: 2191 * uN + kN + kN-1 + ... + k1 + k0 2192 * Note however, if we stop before we reach the ORIGIN we get: 2193 * uN + kN + kN-1 + ... + kM - uM-1 2194 */ 2195 ss_mv_cnt = 0; 2196 ddpa->used = dsl_dataset_phys(origin_ds)->ds_referenced_bytes; 2197 ddpa->comp = dsl_dataset_phys(origin_ds)->ds_compressed_bytes; 2198 ddpa->uncomp = dsl_dataset_phys(origin_ds)->ds_uncompressed_bytes; 2199 for (snap = list_head(&ddpa->shared_snaps); snap; 2200 snap = list_next(&ddpa->shared_snaps, snap)) { 2201 uint64_t val, dlused, dlcomp, dluncomp; 2202 dsl_dataset_t *ds = snap->ds; 2203 2204 ss_mv_cnt++; 2205 2206 /* 2207 * If there are long holds, we won't be able to evict 2208 * the objset. 2209 */ 2210 if (dsl_dataset_long_held(ds)) { 2211 err = SET_ERROR(EBUSY); 2212 goto out; 2213 } 2214 2215 /* Check that the snapshot name does not conflict */ 2216 VERIFY0(dsl_dataset_get_snapname(ds)); 2217 if (strlen(ds->ds_snapname) >= max_snap_len) { 2218 err = SET_ERROR(ENAMETOOLONG); 2219 goto out; 2220 } 2221 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2222 if (err == 0) { 2223 (void) strcpy(ddpa->err_ds, snap->ds->ds_snapname); 2224 err = SET_ERROR(EEXIST); 2225 goto out; 2226 } 2227 if (err != ENOENT) 2228 goto out; 2229 2230 /* The very first snapshot does not have a deadlist */ 2231 if (dsl_dataset_phys(ds)->ds_prev_snap_obj == 0) 2232 continue; 2233 2234 dsl_deadlist_space(&ds->ds_deadlist, 2235 &dlused, &dlcomp, &dluncomp); 2236 ddpa->used += dlused; 2237 ddpa->comp += dlcomp; 2238 ddpa->uncomp += dluncomp; 2239 } 2240 2241 /* 2242 * If we are a clone of a clone then we never reached ORIGIN, 2243 * so we need to subtract out the clone origin's used space. 2244 */ 2245 if (ddpa->origin_origin) { 2246 ddpa->used -= 2247 dsl_dataset_phys(ddpa->origin_origin)->ds_referenced_bytes; 2248 ddpa->comp -= 2249 dsl_dataset_phys(ddpa->origin_origin)->ds_compressed_bytes; 2250 ddpa->uncomp -= 2251 dsl_dataset_phys(ddpa->origin_origin)-> 2252 ds_uncompressed_bytes; 2253 } 2254 2255 /* Check that there is enough space and limit headroom here */ 2256 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2257 0, ss_mv_cnt, ddpa->used, ddpa->cr); 2258 if (err != 0) 2259 goto out; 2260 2261 /* 2262 * Compute the amounts of space that will be used by snapshots 2263 * after the promotion (for both origin and clone). For each, 2264 * it is the amount of space that will be on all of their 2265 * deadlists (that was not born before their new origin). 2266 */ 2267 if (dsl_dir_phys(hds->ds_dir)->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2268 uint64_t space; 2269 2270 /* 2271 * Note, typically this will not be a clone of a clone, 2272 * so dd_origin_txg will be < TXG_INITIAL, so 2273 * these snaplist_space() -> dsl_deadlist_space_range() 2274 * calls will be fast because they do not have to 2275 * iterate over all bps. 2276 */ 2277 snap = list_head(&ddpa->origin_snaps); 2278 err = snaplist_space(&ddpa->shared_snaps, 2279 snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap); 2280 if (err != 0) 2281 goto out; 2282 2283 err = snaplist_space(&ddpa->clone_snaps, 2284 snap->ds->ds_dir->dd_origin_txg, &space); 2285 if (err != 0) 2286 goto out; 2287 ddpa->cloneusedsnap += space; 2288 } 2289 if (dsl_dir_phys(origin_ds->ds_dir)->dd_flags & 2290 DD_FLAG_USED_BREAKDOWN) { 2291 err = snaplist_space(&ddpa->origin_snaps, 2292 dsl_dataset_phys(origin_ds)->ds_creation_txg, 2293 &ddpa->originusedsnap); 2294 if (err != 0) 2295 goto out; 2296 } 2297 2298 out: 2299 promote_rele(ddpa, FTAG); 2300 return (err); 2301 } 2302 2303 static void 2304 dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx) 2305 { 2306 dsl_dataset_promote_arg_t *ddpa = arg; 2307 dsl_pool_t *dp = dmu_tx_pool(tx); 2308 dsl_dataset_t *hds; 2309 struct promotenode *snap; 2310 dsl_dataset_t *origin_ds; 2311 dsl_dataset_t *origin_head; 2312 dsl_dir_t *dd; 2313 dsl_dir_t *odd = NULL; 2314 uint64_t oldnext_obj; 2315 int64_t delta; 2316 2317 VERIFY0(promote_hold(ddpa, dp, FTAG)); 2318 hds = ddpa->ddpa_clone; 2319 2320 ASSERT0(dsl_dataset_phys(hds)->ds_flags & DS_FLAG_NOPROMOTE); 2321 2322 snap = list_head(&ddpa->shared_snaps); 2323 origin_ds = snap->ds; 2324 dd = hds->ds_dir; 2325 2326 snap = list_head(&ddpa->origin_snaps); 2327 origin_head = snap->ds; 2328 2329 /* 2330 * We need to explicitly open odd, since origin_ds's dd will be 2331 * changing. 2332 */ 2333 VERIFY0(dsl_dir_hold_obj(dp, origin_ds->ds_dir->dd_object, 2334 NULL, FTAG, &odd)); 2335 2336 /* change origin's next snap */ 2337 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2338 oldnext_obj = dsl_dataset_phys(origin_ds)->ds_next_snap_obj; 2339 snap = list_tail(&ddpa->clone_snaps); 2340 ASSERT3U(dsl_dataset_phys(snap->ds)->ds_prev_snap_obj, ==, 2341 origin_ds->ds_object); 2342 dsl_dataset_phys(origin_ds)->ds_next_snap_obj = snap->ds->ds_object; 2343 2344 /* change the origin's next clone */ 2345 if (dsl_dataset_phys(origin_ds)->ds_next_clones_obj) { 2346 dsl_dataset_remove_from_next_clones(origin_ds, 2347 snap->ds->ds_object, tx); 2348 VERIFY0(zap_add_int(dp->dp_meta_objset, 2349 dsl_dataset_phys(origin_ds)->ds_next_clones_obj, 2350 oldnext_obj, tx)); 2351 } 2352 2353 /* change origin */ 2354 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2355 ASSERT3U(dsl_dir_phys(dd)->dd_origin_obj, ==, origin_ds->ds_object); 2356 dsl_dir_phys(dd)->dd_origin_obj = dsl_dir_phys(odd)->dd_origin_obj; 2357 dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg; 2358 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2359 dsl_dir_phys(odd)->dd_origin_obj = origin_ds->ds_object; 2360 origin_head->ds_dir->dd_origin_txg = 2361 dsl_dataset_phys(origin_ds)->ds_creation_txg; 2362 2363 /* change dd_clone entries */ 2364 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2365 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2366 dsl_dir_phys(odd)->dd_clones, hds->ds_object, tx)); 2367 VERIFY0(zap_add_int(dp->dp_meta_objset, 2368 dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones, 2369 hds->ds_object, tx)); 2370 2371 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2372 dsl_dir_phys(ddpa->origin_origin->ds_dir)->dd_clones, 2373 origin_head->ds_object, tx)); 2374 if (dsl_dir_phys(dd)->dd_clones == 0) { 2375 dsl_dir_phys(dd)->dd_clones = 2376 zap_create(dp->dp_meta_objset, DMU_OT_DSL_CLONES, 2377 DMU_OT_NONE, 0, tx); 2378 } 2379 VERIFY0(zap_add_int(dp->dp_meta_objset, 2380 dsl_dir_phys(dd)->dd_clones, origin_head->ds_object, tx)); 2381 } 2382 2383 /* move snapshots to this dir */ 2384 for (snap = list_head(&ddpa->shared_snaps); snap; 2385 snap = list_next(&ddpa->shared_snaps, snap)) { 2386 dsl_dataset_t *ds = snap->ds; 2387 2388 /* 2389 * Property callbacks are registered to a particular 2390 * dsl_dir. Since ours is changing, evict the objset 2391 * so that they will be unregistered from the old dsl_dir. 2392 */ 2393 if (ds->ds_objset) { 2394 dmu_objset_evict(ds->ds_objset); 2395 ds->ds_objset = NULL; 2396 } 2397 2398 /* move snap name entry */ 2399 VERIFY0(dsl_dataset_get_snapname(ds)); 2400 VERIFY0(dsl_dataset_snap_remove(origin_head, 2401 ds->ds_snapname, tx, B_TRUE)); 2402 VERIFY0(zap_add(dp->dp_meta_objset, 2403 dsl_dataset_phys(hds)->ds_snapnames_zapobj, ds->ds_snapname, 2404 8, 1, &ds->ds_object, tx)); 2405 dsl_fs_ss_count_adjust(hds->ds_dir, 1, 2406 DD_FIELD_SNAPSHOT_COUNT, tx); 2407 2408 /* change containing dsl_dir */ 2409 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2410 ASSERT3U(dsl_dataset_phys(ds)->ds_dir_obj, ==, odd->dd_object); 2411 dsl_dataset_phys(ds)->ds_dir_obj = dd->dd_object; 2412 ASSERT3P(ds->ds_dir, ==, odd); 2413 dsl_dir_rele(ds->ds_dir, ds); 2414 VERIFY0(dsl_dir_hold_obj(dp, dd->dd_object, 2415 NULL, ds, &ds->ds_dir)); 2416 2417 /* move any clone references */ 2418 if (dsl_dataset_phys(ds)->ds_next_clones_obj && 2419 spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 2420 zap_cursor_t zc; 2421 zap_attribute_t za; 2422 2423 for (zap_cursor_init(&zc, dp->dp_meta_objset, 2424 dsl_dataset_phys(ds)->ds_next_clones_obj); 2425 zap_cursor_retrieve(&zc, &za) == 0; 2426 zap_cursor_advance(&zc)) { 2427 dsl_dataset_t *cnds; 2428 uint64_t o; 2429 2430 if (za.za_first_integer == oldnext_obj) { 2431 /* 2432 * We've already moved the 2433 * origin's reference. 2434 */ 2435 continue; 2436 } 2437 2438 VERIFY0(dsl_dataset_hold_obj(dp, 2439 za.za_first_integer, FTAG, &cnds)); 2440 o = dsl_dir_phys(cnds->ds_dir)-> 2441 dd_head_dataset_obj; 2442 2443 VERIFY0(zap_remove_int(dp->dp_meta_objset, 2444 dsl_dir_phys(odd)->dd_clones, o, tx)); 2445 VERIFY0(zap_add_int(dp->dp_meta_objset, 2446 dsl_dir_phys(dd)->dd_clones, o, tx)); 2447 dsl_dataset_rele(cnds, FTAG); 2448 } 2449 zap_cursor_fini(&zc); 2450 } 2451 2452 ASSERT(!dsl_prop_hascb(ds)); 2453 } 2454 2455 /* 2456 * Change space accounting. 2457 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2458 * both be valid, or both be 0 (resulting in delta == 0). This 2459 * is true for each of {clone,origin} independently. 2460 */ 2461 2462 delta = ddpa->cloneusedsnap - 2463 dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]; 2464 ASSERT3S(delta, >=, 0); 2465 ASSERT3U(ddpa->used, >=, delta); 2466 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2467 dsl_dir_diduse_space(dd, DD_USED_HEAD, 2468 ddpa->used - delta, ddpa->comp, ddpa->uncomp, tx); 2469 2470 delta = ddpa->originusedsnap - 2471 dsl_dir_phys(odd)->dd_used_breakdown[DD_USED_SNAP]; 2472 ASSERT3S(delta, <=, 0); 2473 ASSERT3U(ddpa->used, >=, -delta); 2474 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2475 dsl_dir_diduse_space(odd, DD_USED_HEAD, 2476 -ddpa->used - delta, -ddpa->comp, -ddpa->uncomp, tx); 2477 2478 dsl_dataset_phys(origin_ds)->ds_unique_bytes = ddpa->unique; 2479 2480 /* log history record */ 2481 spa_history_log_internal_ds(hds, "promote", tx, ""); 2482 2483 dsl_dir_rele(odd, FTAG); 2484 promote_rele(ddpa, FTAG); 2485 } 2486 2487 /* 2488 * Make a list of dsl_dataset_t's for the snapshots between first_obj 2489 * (exclusive) and last_obj (inclusive). The list will be in reverse 2490 * order (last_obj will be the list_head()). If first_obj == 0, do all 2491 * snapshots back to this dataset's origin. 2492 */ 2493 static int 2494 snaplist_make(dsl_pool_t *dp, 2495 uint64_t first_obj, uint64_t last_obj, list_t *l, void *tag) 2496 { 2497 uint64_t obj = last_obj; 2498 2499 list_create(l, sizeof (struct promotenode), 2500 offsetof(struct promotenode, link)); 2501 2502 while (obj != first_obj) { 2503 dsl_dataset_t *ds; 2504 struct promotenode *snap; 2505 int err; 2506 2507 err = dsl_dataset_hold_obj(dp, obj, tag, &ds); 2508 ASSERT(err != ENOENT); 2509 if (err != 0) 2510 return (err); 2511 2512 if (first_obj == 0) 2513 first_obj = dsl_dir_phys(ds->ds_dir)->dd_origin_obj; 2514 2515 snap = kmem_alloc(sizeof (*snap), KM_SLEEP); 2516 snap->ds = ds; 2517 list_insert_tail(l, snap); 2518 obj = dsl_dataset_phys(ds)->ds_prev_snap_obj; 2519 } 2520 2521 return (0); 2522 } 2523 2524 static int 2525 snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2526 { 2527 struct promotenode *snap; 2528 2529 *spacep = 0; 2530 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2531 uint64_t used, comp, uncomp; 2532 dsl_deadlist_space_range(&snap->ds->ds_deadlist, 2533 mintxg, UINT64_MAX, &used, &comp, &uncomp); 2534 *spacep += used; 2535 } 2536 return (0); 2537 } 2538 2539 static void 2540 snaplist_destroy(list_t *l, void *tag) 2541 { 2542 struct promotenode *snap; 2543 2544 if (l == NULL || !list_link_active(&l->list_head)) 2545 return; 2546 2547 while ((snap = list_tail(l)) != NULL) { 2548 list_remove(l, snap); 2549 dsl_dataset_rele(snap->ds, tag); 2550 kmem_free(snap, sizeof (*snap)); 2551 } 2552 list_destroy(l); 2553 } 2554 2555 static int 2556 promote_hold(dsl_dataset_promote_arg_t *ddpa, dsl_pool_t *dp, void *tag) 2557 { 2558 int error; 2559 dsl_dir_t *dd; 2560 struct promotenode *snap; 2561 2562 error = dsl_dataset_hold(dp, ddpa->ddpa_clonename, tag, 2563 &ddpa->ddpa_clone); 2564 if (error != 0) 2565 return (error); 2566 dd = ddpa->ddpa_clone->ds_dir; 2567 2568 if (ddpa->ddpa_clone->ds_is_snapshot || 2569 !dsl_dir_is_clone(dd)) { 2570 dsl_dataset_rele(ddpa->ddpa_clone, tag); 2571 return (SET_ERROR(EINVAL)); 2572 } 2573 2574 error = snaplist_make(dp, 0, dsl_dir_phys(dd)->dd_origin_obj, 2575 &ddpa->shared_snaps, tag); 2576 if (error != 0) 2577 goto out; 2578 2579 error = snaplist_make(dp, 0, ddpa->ddpa_clone->ds_object, 2580 &ddpa->clone_snaps, tag); 2581 if (error != 0) 2582 goto out; 2583 2584 snap = list_head(&ddpa->shared_snaps); 2585 ASSERT3U(snap->ds->ds_object, ==, dsl_dir_phys(dd)->dd_origin_obj); 2586 error = snaplist_make(dp, dsl_dir_phys(dd)->dd_origin_obj, 2587 dsl_dir_phys(snap->ds->ds_dir)->dd_head_dataset_obj, 2588 &ddpa->origin_snaps, tag); 2589 if (error != 0) 2590 goto out; 2591 2592 if (dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj != 0) { 2593 error = dsl_dataset_hold_obj(dp, 2594 dsl_dir_phys(snap->ds->ds_dir)->dd_origin_obj, 2595 tag, &ddpa->origin_origin); 2596 if (error != 0) 2597 goto out; 2598 } 2599 out: 2600 if (error != 0) 2601 promote_rele(ddpa, tag); 2602 return (error); 2603 } 2604 2605 static void 2606 promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag) 2607 { 2608 snaplist_destroy(&ddpa->shared_snaps, tag); 2609 snaplist_destroy(&ddpa->clone_snaps, tag); 2610 snaplist_destroy(&ddpa->origin_snaps, tag); 2611 if (ddpa->origin_origin != NULL) 2612 dsl_dataset_rele(ddpa->origin_origin, tag); 2613 dsl_dataset_rele(ddpa->ddpa_clone, tag); 2614 } 2615 2616 /* 2617 * Promote a clone. 2618 * 2619 * If it fails due to a conflicting snapshot name, "conflsnap" will be filled 2620 * in with the name. (It must be at least MAXNAMELEN bytes long.) 2621 */ 2622 int 2623 dsl_dataset_promote(const char *name, char *conflsnap) 2624 { 2625 dsl_dataset_promote_arg_t ddpa = { 0 }; 2626 uint64_t numsnaps; 2627 int error; 2628 objset_t *os; 2629 2630 /* 2631 * We will modify space proportional to the number of 2632 * snapshots. Compute numsnaps. 2633 */ 2634 error = dmu_objset_hold(name, FTAG, &os); 2635 if (error != 0) 2636 return (error); 2637 error = zap_count(dmu_objset_pool(os)->dp_meta_objset, 2638 dsl_dataset_phys(dmu_objset_ds(os))->ds_snapnames_zapobj, 2639 &numsnaps); 2640 dmu_objset_rele(os, FTAG); 2641 if (error != 0) 2642 return (error); 2643 2644 ddpa.ddpa_clonename = name; 2645 ddpa.err_ds = conflsnap; 2646 ddpa.cr = CRED(); 2647 2648 return (dsl_sync_task(name, dsl_dataset_promote_check, 2649 dsl_dataset_promote_sync, &ddpa, 2650 2 + numsnaps, ZFS_SPACE_CHECK_RESERVED)); 2651 } 2652 2653 int 2654 dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone, 2655 dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx) 2656 { 2657 int64_t unused_refres_delta; 2658 2659 /* they should both be heads */ 2660 if (clone->ds_is_snapshot || 2661 origin_head->ds_is_snapshot) 2662 return (SET_ERROR(EINVAL)); 2663 2664 /* if we are not forcing, the branch point should be just before them */ 2665 if (!force && clone->ds_prev != origin_head->ds_prev) 2666 return (SET_ERROR(EINVAL)); 2667 2668 /* clone should be the clone (unless they are unrelated) */ 2669 if (clone->ds_prev != NULL && 2670 clone->ds_prev != clone->ds_dir->dd_pool->dp_origin_snap && 2671 origin_head->ds_dir != clone->ds_prev->ds_dir) 2672 return (SET_ERROR(EINVAL)); 2673 2674 /* the clone should be a child of the origin */ 2675 if (clone->ds_dir->dd_parent != origin_head->ds_dir) 2676 return (SET_ERROR(EINVAL)); 2677 2678 /* origin_head shouldn't be modified unless 'force' */ 2679 if (!force && 2680 dsl_dataset_modified_since_snap(origin_head, origin_head->ds_prev)) 2681 return (SET_ERROR(ETXTBSY)); 2682 2683 /* origin_head should have no long holds (e.g. is not mounted) */ 2684 if (dsl_dataset_handoff_check(origin_head, owner, tx)) 2685 return (SET_ERROR(EBUSY)); 2686 2687 /* check amount of any unconsumed refreservation */ 2688 unused_refres_delta = 2689 (int64_t)MIN(origin_head->ds_reserved, 2690 dsl_dataset_phys(origin_head)->ds_unique_bytes) - 2691 (int64_t)MIN(origin_head->ds_reserved, 2692 dsl_dataset_phys(clone)->ds_unique_bytes); 2693 2694 if (unused_refres_delta > 0 && 2695 unused_refres_delta > 2696 dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE)) 2697 return (SET_ERROR(ENOSPC)); 2698 2699 /* clone can't be over the head's refquota */ 2700 if (origin_head->ds_quota != 0 && 2701 dsl_dataset_phys(clone)->ds_referenced_bytes > 2702 origin_head->ds_quota) 2703 return (SET_ERROR(EDQUOT)); 2704 2705 return (0); 2706 } 2707 2708 void 2709 dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone, 2710 dsl_dataset_t *origin_head, dmu_tx_t *tx) 2711 { 2712 dsl_pool_t *dp = dmu_tx_pool(tx); 2713 int64_t unused_refres_delta; 2714 2715 ASSERT(clone->ds_reserved == 0); 2716 ASSERT(origin_head->ds_quota == 0 || 2717 dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota); 2718 ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev); 2719 2720 /* 2721 * Swap per-dataset feature flags. 2722 */ 2723 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 2724 if (!(spa_feature_table[f].fi_flags & 2725 ZFEATURE_FLAG_PER_DATASET)) { 2726 ASSERT(!clone->ds_feature_inuse[f]); 2727 ASSERT(!origin_head->ds_feature_inuse[f]); 2728 continue; 2729 } 2730 2731 boolean_t clone_inuse = clone->ds_feature_inuse[f]; 2732 boolean_t origin_head_inuse = origin_head->ds_feature_inuse[f]; 2733 2734 if (clone_inuse) { 2735 dsl_dataset_deactivate_feature(clone->ds_object, f, tx); 2736 clone->ds_feature_inuse[f] = B_FALSE; 2737 } 2738 if (origin_head_inuse) { 2739 dsl_dataset_deactivate_feature(origin_head->ds_object, 2740 f, tx); 2741 origin_head->ds_feature_inuse[f] = B_FALSE; 2742 } 2743 if (clone_inuse) { 2744 dsl_dataset_activate_feature(origin_head->ds_object, 2745 f, tx); 2746 origin_head->ds_feature_inuse[f] = B_TRUE; 2747 } 2748 if (origin_head_inuse) { 2749 dsl_dataset_activate_feature(clone->ds_object, f, tx); 2750 clone->ds_feature_inuse[f] = B_TRUE; 2751 } 2752 } 2753 2754 dmu_buf_will_dirty(clone->ds_dbuf, tx); 2755 dmu_buf_will_dirty(origin_head->ds_dbuf, tx); 2756 2757 if (clone->ds_objset != NULL) { 2758 dmu_objset_evict(clone->ds_objset); 2759 clone->ds_objset = NULL; 2760 } 2761 2762 if (origin_head->ds_objset != NULL) { 2763 dmu_objset_evict(origin_head->ds_objset); 2764 origin_head->ds_objset = NULL; 2765 } 2766 2767 unused_refres_delta = 2768 (int64_t)MIN(origin_head->ds_reserved, 2769 dsl_dataset_phys(origin_head)->ds_unique_bytes) - 2770 (int64_t)MIN(origin_head->ds_reserved, 2771 dsl_dataset_phys(clone)->ds_unique_bytes); 2772 2773 /* 2774 * Reset origin's unique bytes, if it exists. 2775 */ 2776 if (clone->ds_prev) { 2777 dsl_dataset_t *origin = clone->ds_prev; 2778 uint64_t comp, uncomp; 2779 2780 dmu_buf_will_dirty(origin->ds_dbuf, tx); 2781 dsl_deadlist_space_range(&clone->ds_deadlist, 2782 dsl_dataset_phys(origin)->ds_prev_snap_txg, UINT64_MAX, 2783 &dsl_dataset_phys(origin)->ds_unique_bytes, &comp, &uncomp); 2784 } 2785 2786 /* swap blkptrs */ 2787 { 2788 blkptr_t tmp; 2789 tmp = dsl_dataset_phys(origin_head)->ds_bp; 2790 dsl_dataset_phys(origin_head)->ds_bp = 2791 dsl_dataset_phys(clone)->ds_bp; 2792 dsl_dataset_phys(clone)->ds_bp = tmp; 2793 } 2794 2795 /* set dd_*_bytes */ 2796 { 2797 int64_t dused, dcomp, duncomp; 2798 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2799 uint64_t odl_used, odl_comp, odl_uncomp; 2800 2801 ASSERT3U(dsl_dir_phys(clone->ds_dir)-> 2802 dd_used_breakdown[DD_USED_SNAP], ==, 0); 2803 2804 dsl_deadlist_space(&clone->ds_deadlist, 2805 &cdl_used, &cdl_comp, &cdl_uncomp); 2806 dsl_deadlist_space(&origin_head->ds_deadlist, 2807 &odl_used, &odl_comp, &odl_uncomp); 2808 2809 dused = dsl_dataset_phys(clone)->ds_referenced_bytes + 2810 cdl_used - 2811 (dsl_dataset_phys(origin_head)->ds_referenced_bytes + 2812 odl_used); 2813 dcomp = dsl_dataset_phys(clone)->ds_compressed_bytes + 2814 cdl_comp - 2815 (dsl_dataset_phys(origin_head)->ds_compressed_bytes + 2816 odl_comp); 2817 duncomp = dsl_dataset_phys(clone)->ds_uncompressed_bytes + 2818 cdl_uncomp - 2819 (dsl_dataset_phys(origin_head)->ds_uncompressed_bytes + 2820 odl_uncomp); 2821 2822 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_HEAD, 2823 dused, dcomp, duncomp, tx); 2824 dsl_dir_diduse_space(clone->ds_dir, DD_USED_HEAD, 2825 -dused, -dcomp, -duncomp, tx); 2826 2827 /* 2828 * The difference in the space used by snapshots is the 2829 * difference in snapshot space due to the head's 2830 * deadlist (since that's the only thing that's 2831 * changing that affects the snapused). 2832 */ 2833 dsl_deadlist_space_range(&clone->ds_deadlist, 2834 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2835 &cdl_used, &cdl_comp, &cdl_uncomp); 2836 dsl_deadlist_space_range(&origin_head->ds_deadlist, 2837 origin_head->ds_dir->dd_origin_txg, UINT64_MAX, 2838 &odl_used, &odl_comp, &odl_uncomp); 2839 dsl_dir_transfer_space(origin_head->ds_dir, cdl_used - odl_used, 2840 DD_USED_HEAD, DD_USED_SNAP, tx); 2841 } 2842 2843 /* swap ds_*_bytes */ 2844 SWITCH64(dsl_dataset_phys(origin_head)->ds_referenced_bytes, 2845 dsl_dataset_phys(clone)->ds_referenced_bytes); 2846 SWITCH64(dsl_dataset_phys(origin_head)->ds_compressed_bytes, 2847 dsl_dataset_phys(clone)->ds_compressed_bytes); 2848 SWITCH64(dsl_dataset_phys(origin_head)->ds_uncompressed_bytes, 2849 dsl_dataset_phys(clone)->ds_uncompressed_bytes); 2850 SWITCH64(dsl_dataset_phys(origin_head)->ds_unique_bytes, 2851 dsl_dataset_phys(clone)->ds_unique_bytes); 2852 2853 /* apply any parent delta for change in unconsumed refreservation */ 2854 dsl_dir_diduse_space(origin_head->ds_dir, DD_USED_REFRSRV, 2855 unused_refres_delta, 0, 0, tx); 2856 2857 /* 2858 * Swap deadlists. 2859 */ 2860 dsl_deadlist_close(&clone->ds_deadlist); 2861 dsl_deadlist_close(&origin_head->ds_deadlist); 2862 SWITCH64(dsl_dataset_phys(origin_head)->ds_deadlist_obj, 2863 dsl_dataset_phys(clone)->ds_deadlist_obj); 2864 dsl_deadlist_open(&clone->ds_deadlist, dp->dp_meta_objset, 2865 dsl_dataset_phys(clone)->ds_deadlist_obj); 2866 dsl_deadlist_open(&origin_head->ds_deadlist, dp->dp_meta_objset, 2867 dsl_dataset_phys(origin_head)->ds_deadlist_obj); 2868 2869 dsl_scan_ds_clone_swapped(origin_head, clone, tx); 2870 2871 spa_history_log_internal_ds(clone, "clone swap", tx, 2872 "parent=%s", origin_head->ds_dir->dd_myname); 2873 } 2874 2875 /* 2876 * Given a pool name and a dataset object number in that pool, 2877 * return the name of that dataset. 2878 */ 2879 int 2880 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2881 { 2882 dsl_pool_t *dp; 2883 dsl_dataset_t *ds; 2884 int error; 2885 2886 error = dsl_pool_hold(pname, FTAG, &dp); 2887 if (error != 0) 2888 return (error); 2889 2890 error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds); 2891 if (error == 0) { 2892 dsl_dataset_name(ds, buf); 2893 dsl_dataset_rele(ds, FTAG); 2894 } 2895 dsl_pool_rele(dp, FTAG); 2896 2897 return (error); 2898 } 2899 2900 int 2901 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2902 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 2903 { 2904 int error = 0; 2905 2906 ASSERT3S(asize, >, 0); 2907 2908 /* 2909 * *ref_rsrv is the portion of asize that will come from any 2910 * unconsumed refreservation space. 2911 */ 2912 *ref_rsrv = 0; 2913 2914 mutex_enter(&ds->ds_lock); 2915 /* 2916 * Make a space adjustment for reserved bytes. 2917 */ 2918 if (ds->ds_reserved > dsl_dataset_phys(ds)->ds_unique_bytes) { 2919 ASSERT3U(*used, >=, 2920 ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes); 2921 *used -= 2922 (ds->ds_reserved - dsl_dataset_phys(ds)->ds_unique_bytes); 2923 *ref_rsrv = 2924 asize - MIN(asize, parent_delta(ds, asize + inflight)); 2925 } 2926 2927 if (!check_quota || ds->ds_quota == 0) { 2928 mutex_exit(&ds->ds_lock); 2929 return (0); 2930 } 2931 /* 2932 * If they are requesting more space, and our current estimate 2933 * is over quota, they get to try again unless the actual 2934 * on-disk is over quota and there are no pending changes (which 2935 * may free up space for us). 2936 */ 2937 if (dsl_dataset_phys(ds)->ds_referenced_bytes + inflight >= 2938 ds->ds_quota) { 2939 if (inflight > 0 || 2940 dsl_dataset_phys(ds)->ds_referenced_bytes < ds->ds_quota) 2941 error = SET_ERROR(ERESTART); 2942 else 2943 error = SET_ERROR(EDQUOT); 2944 } 2945 mutex_exit(&ds->ds_lock); 2946 2947 return (error); 2948 } 2949 2950 typedef struct dsl_dataset_set_qr_arg { 2951 const char *ddsqra_name; 2952 zprop_source_t ddsqra_source; 2953 uint64_t ddsqra_value; 2954 } dsl_dataset_set_qr_arg_t; 2955 2956 2957 /* ARGSUSED */ 2958 static int 2959 dsl_dataset_set_refquota_check(void *arg, dmu_tx_t *tx) 2960 { 2961 dsl_dataset_set_qr_arg_t *ddsqra = arg; 2962 dsl_pool_t *dp = dmu_tx_pool(tx); 2963 dsl_dataset_t *ds; 2964 int error; 2965 uint64_t newval; 2966 2967 if (spa_version(dp->dp_spa) < SPA_VERSION_REFQUOTA) 2968 return (SET_ERROR(ENOTSUP)); 2969 2970 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 2971 if (error != 0) 2972 return (error); 2973 2974 if (ds->ds_is_snapshot) { 2975 dsl_dataset_rele(ds, FTAG); 2976 return (SET_ERROR(EINVAL)); 2977 } 2978 2979 error = dsl_prop_predict(ds->ds_dir, 2980 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 2981 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 2982 if (error != 0) { 2983 dsl_dataset_rele(ds, FTAG); 2984 return (error); 2985 } 2986 2987 if (newval == 0) { 2988 dsl_dataset_rele(ds, FTAG); 2989 return (0); 2990 } 2991 2992 if (newval < dsl_dataset_phys(ds)->ds_referenced_bytes || 2993 newval < ds->ds_reserved) { 2994 dsl_dataset_rele(ds, FTAG); 2995 return (SET_ERROR(ENOSPC)); 2996 } 2997 2998 dsl_dataset_rele(ds, FTAG); 2999 return (0); 3000 } 3001 3002 static void 3003 dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx) 3004 { 3005 dsl_dataset_set_qr_arg_t *ddsqra = arg; 3006 dsl_pool_t *dp = dmu_tx_pool(tx); 3007 dsl_dataset_t *ds; 3008 uint64_t newval; 3009 3010 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 3011 3012 dsl_prop_set_sync_impl(ds, 3013 zfs_prop_to_name(ZFS_PROP_REFQUOTA), 3014 ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1, 3015 &ddsqra->ddsqra_value, tx); 3016 3017 VERIFY0(dsl_prop_get_int_ds(ds, 3018 zfs_prop_to_name(ZFS_PROP_REFQUOTA), &newval)); 3019 3020 if (ds->ds_quota != newval) { 3021 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3022 ds->ds_quota = newval; 3023 } 3024 dsl_dataset_rele(ds, FTAG); 3025 } 3026 3027 int 3028 dsl_dataset_set_refquota(const char *dsname, zprop_source_t source, 3029 uint64_t refquota) 3030 { 3031 dsl_dataset_set_qr_arg_t ddsqra; 3032 3033 ddsqra.ddsqra_name = dsname; 3034 ddsqra.ddsqra_source = source; 3035 ddsqra.ddsqra_value = refquota; 3036 3037 return (dsl_sync_task(dsname, dsl_dataset_set_refquota_check, 3038 dsl_dataset_set_refquota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE)); 3039 } 3040 3041 static int 3042 dsl_dataset_set_refreservation_check(void *arg, dmu_tx_t *tx) 3043 { 3044 dsl_dataset_set_qr_arg_t *ddsqra = arg; 3045 dsl_pool_t *dp = dmu_tx_pool(tx); 3046 dsl_dataset_t *ds; 3047 int error; 3048 uint64_t newval, unique; 3049 3050 if (spa_version(dp->dp_spa) < SPA_VERSION_REFRESERVATION) 3051 return (SET_ERROR(ENOTSUP)); 3052 3053 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds); 3054 if (error != 0) 3055 return (error); 3056 3057 if (ds->ds_is_snapshot) { 3058 dsl_dataset_rele(ds, FTAG); 3059 return (SET_ERROR(EINVAL)); 3060 } 3061 3062 error = dsl_prop_predict(ds->ds_dir, 3063 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 3064 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval); 3065 if (error != 0) { 3066 dsl_dataset_rele(ds, FTAG); 3067 return (error); 3068 } 3069 3070 /* 3071 * If we are doing the preliminary check in open context, the 3072 * space estimates may be inaccurate. 3073 */ 3074 if (!dmu_tx_is_syncing(tx)) { 3075 dsl_dataset_rele(ds, FTAG); 3076 return (0); 3077 } 3078 3079 mutex_enter(&ds->ds_lock); 3080 if (!DS_UNIQUE_IS_ACCURATE(ds)) 3081 dsl_dataset_recalc_head_uniq(ds); 3082 unique = dsl_dataset_phys(ds)->ds_unique_bytes; 3083 mutex_exit(&ds->ds_lock); 3084 3085 if (MAX(unique, newval) > MAX(unique, ds->ds_reserved)) { 3086 uint64_t delta = MAX(unique, newval) - 3087 MAX(unique, ds->ds_reserved); 3088 3089 if (delta > 3090 dsl_dir_space_available(ds->ds_dir, NULL, 0, B_TRUE) || 3091 (ds->ds_quota > 0 && newval > ds->ds_quota)) { 3092 dsl_dataset_rele(ds, FTAG); 3093 return (SET_ERROR(ENOSPC)); 3094 } 3095 } 3096 3097 dsl_dataset_rele(ds, FTAG); 3098 return (0); 3099 } 3100 3101 void 3102 dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds, 3103 zprop_source_t source, uint64_t value, dmu_tx_t *tx) 3104 { 3105 uint64_t newval; 3106 uint64_t unique; 3107 int64_t delta; 3108 3109 dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 3110 source, sizeof (value), 1, &value, tx); 3111 3112 VERIFY0(dsl_prop_get_int_ds(ds, 3113 zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &newval)); 3114 3115 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3116 mutex_enter(&ds->ds_dir->dd_lock); 3117 mutex_enter(&ds->ds_lock); 3118 ASSERT(DS_UNIQUE_IS_ACCURATE(ds)); 3119 unique = dsl_dataset_phys(ds)->ds_unique_bytes; 3120 delta = MAX(0, (int64_t)(newval - unique)) - 3121 MAX(0, (int64_t)(ds->ds_reserved - unique)); 3122 ds->ds_reserved = newval; 3123 mutex_exit(&ds->ds_lock); 3124 3125 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 3126 mutex_exit(&ds->ds_dir->dd_lock); 3127 } 3128 3129 static void 3130 dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx) 3131 { 3132 dsl_dataset_set_qr_arg_t *ddsqra = arg; 3133 dsl_pool_t *dp = dmu_tx_pool(tx); 3134 dsl_dataset_t *ds; 3135 3136 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds)); 3137 dsl_dataset_set_refreservation_sync_impl(ds, 3138 ddsqra->ddsqra_source, ddsqra->ddsqra_value, tx); 3139 dsl_dataset_rele(ds, FTAG); 3140 } 3141 3142 int 3143 dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source, 3144 uint64_t refreservation) 3145 { 3146 dsl_dataset_set_qr_arg_t ddsqra; 3147 3148 ddsqra.ddsqra_name = dsname; 3149 ddsqra.ddsqra_source = source; 3150 ddsqra.ddsqra_value = refreservation; 3151 3152 return (dsl_sync_task(dsname, dsl_dataset_set_refreservation_check, 3153 dsl_dataset_set_refreservation_sync, &ddsqra, 3154 0, ZFS_SPACE_CHECK_NONE)); 3155 } 3156 3157 /* 3158 * Return (in *usedp) the amount of space written in new that is not 3159 * present in oldsnap. New may be a snapshot or the head. Old must be 3160 * a snapshot before new, in new's filesystem (or its origin). If not then 3161 * fail and return EINVAL. 3162 * 3163 * The written space is calculated by considering two components: First, we 3164 * ignore any freed space, and calculate the written as new's used space 3165 * minus old's used space. Next, we add in the amount of space that was freed 3166 * between the two snapshots, thus reducing new's used space relative to old's. 3167 * Specifically, this is the space that was born before old->ds_creation_txg, 3168 * and freed before new (ie. on new's deadlist or a previous deadlist). 3169 * 3170 * space freed [---------------------] 3171 * snapshots ---O-------O--------O-------O------ 3172 * oldsnap new 3173 */ 3174 int 3175 dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new, 3176 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 3177 { 3178 int err = 0; 3179 uint64_t snapobj; 3180 dsl_pool_t *dp = new->ds_dir->dd_pool; 3181 3182 ASSERT(dsl_pool_config_held(dp)); 3183 3184 *usedp = 0; 3185 *usedp += dsl_dataset_phys(new)->ds_referenced_bytes; 3186 *usedp -= dsl_dataset_phys(oldsnap)->ds_referenced_bytes; 3187 3188 *compp = 0; 3189 *compp += dsl_dataset_phys(new)->ds_compressed_bytes; 3190 *compp -= dsl_dataset_phys(oldsnap)->ds_compressed_bytes; 3191 3192 *uncompp = 0; 3193 *uncompp += dsl_dataset_phys(new)->ds_uncompressed_bytes; 3194 *uncompp -= dsl_dataset_phys(oldsnap)->ds_uncompressed_bytes; 3195 3196 snapobj = new->ds_object; 3197 while (snapobj != oldsnap->ds_object) { 3198 dsl_dataset_t *snap; 3199 uint64_t used, comp, uncomp; 3200 3201 if (snapobj == new->ds_object) { 3202 snap = new; 3203 } else { 3204 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap); 3205 if (err != 0) 3206 break; 3207 } 3208 3209 if (dsl_dataset_phys(snap)->ds_prev_snap_txg == 3210 dsl_dataset_phys(oldsnap)->ds_creation_txg) { 3211 /* 3212 * The blocks in the deadlist can not be born after 3213 * ds_prev_snap_txg, so get the whole deadlist space, 3214 * which is more efficient (especially for old-format 3215 * deadlists). Unfortunately the deadlist code 3216 * doesn't have enough information to make this 3217 * optimization itself. 3218 */ 3219 dsl_deadlist_space(&snap->ds_deadlist, 3220 &used, &comp, &uncomp); 3221 } else { 3222 dsl_deadlist_space_range(&snap->ds_deadlist, 3223 0, dsl_dataset_phys(oldsnap)->ds_creation_txg, 3224 &used, &comp, &uncomp); 3225 } 3226 *usedp += used; 3227 *compp += comp; 3228 *uncompp += uncomp; 3229 3230 /* 3231 * If we get to the beginning of the chain of snapshots 3232 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap 3233 * was not a snapshot of/before new. 3234 */ 3235 snapobj = dsl_dataset_phys(snap)->ds_prev_snap_obj; 3236 if (snap != new) 3237 dsl_dataset_rele(snap, FTAG); 3238 if (snapobj == 0) { 3239 err = SET_ERROR(EINVAL); 3240 break; 3241 } 3242 3243 } 3244 return (err); 3245 } 3246 3247 /* 3248 * Return (in *usedp) the amount of space that will be reclaimed if firstsnap, 3249 * lastsnap, and all snapshots in between are deleted. 3250 * 3251 * blocks that would be freed [---------------------------] 3252 * snapshots ---O-------O--------O-------O--------O 3253 * firstsnap lastsnap 3254 * 3255 * This is the set of blocks that were born after the snap before firstsnap, 3256 * (birth > firstsnap->prev_snap_txg) and died before the snap after the 3257 * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist). 3258 * We calculate this by iterating over the relevant deadlists (from the snap 3259 * after lastsnap, backward to the snap after firstsnap), summing up the 3260 * space on the deadlist that was born after the snap before firstsnap. 3261 */ 3262 int 3263 dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, 3264 dsl_dataset_t *lastsnap, 3265 uint64_t *usedp, uint64_t *compp, uint64_t *uncompp) 3266 { 3267 int err = 0; 3268 uint64_t snapobj; 3269 dsl_pool_t *dp = firstsnap->ds_dir->dd_pool; 3270 3271 ASSERT(firstsnap->ds_is_snapshot); 3272 ASSERT(lastsnap->ds_is_snapshot); 3273 3274 /* 3275 * Check that the snapshots are in the same dsl_dir, and firstsnap 3276 * is before lastsnap. 3277 */ 3278 if (firstsnap->ds_dir != lastsnap->ds_dir || 3279 dsl_dataset_phys(firstsnap)->ds_creation_txg > 3280 dsl_dataset_phys(lastsnap)->ds_creation_txg) 3281 return (SET_ERROR(EINVAL)); 3282 3283 *usedp = *compp = *uncompp = 0; 3284 3285 snapobj = dsl_dataset_phys(lastsnap)->ds_next_snap_obj; 3286 while (snapobj != firstsnap->ds_object) { 3287 dsl_dataset_t *ds; 3288 uint64_t used, comp, uncomp; 3289 3290 err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds); 3291 if (err != 0) 3292 break; 3293 3294 dsl_deadlist_space_range(&ds->ds_deadlist, 3295 dsl_dataset_phys(firstsnap)->ds_prev_snap_txg, UINT64_MAX, 3296 &used, &comp, &uncomp); 3297 *usedp += used; 3298 *compp += comp; 3299 *uncompp += uncomp; 3300 3301 snapobj = dsl_dataset_phys(ds)->ds_prev_snap_obj; 3302 ASSERT3U(snapobj, !=, 0); 3303 dsl_dataset_rele(ds, FTAG); 3304 } 3305 return (err); 3306 } 3307 3308 /* 3309 * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline. 3310 * For example, they could both be snapshots of the same filesystem, and 3311 * 'earlier' is before 'later'. Or 'earlier' could be the origin of 3312 * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's 3313 * filesystem. Or 'earlier' could be the origin's origin. 3314 * 3315 * If non-zero, earlier_txg is used instead of earlier's ds_creation_txg. 3316 */ 3317 boolean_t 3318 dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier, 3319 uint64_t earlier_txg) 3320 { 3321 dsl_pool_t *dp = later->ds_dir->dd_pool; 3322 int error; 3323 boolean_t ret; 3324 3325 ASSERT(dsl_pool_config_held(dp)); 3326 ASSERT(earlier->ds_is_snapshot || earlier_txg != 0); 3327 3328 if (earlier_txg == 0) 3329 earlier_txg = dsl_dataset_phys(earlier)->ds_creation_txg; 3330 3331 if (later->ds_is_snapshot && 3332 earlier_txg >= dsl_dataset_phys(later)->ds_creation_txg) 3333 return (B_FALSE); 3334 3335 if (later->ds_dir == earlier->ds_dir) 3336 return (B_TRUE); 3337 if (!dsl_dir_is_clone(later->ds_dir)) 3338 return (B_FALSE); 3339 3340 if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == earlier->ds_object) 3341 return (B_TRUE); 3342 dsl_dataset_t *origin; 3343 error = dsl_dataset_hold_obj(dp, 3344 dsl_dir_phys(later->ds_dir)->dd_origin_obj, FTAG, &origin); 3345 if (error != 0) 3346 return (B_FALSE); 3347 ret = dsl_dataset_is_before(origin, earlier, earlier_txg); 3348 dsl_dataset_rele(origin, FTAG); 3349 return (ret); 3350 } 3351 3352 void 3353 dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx) 3354 { 3355 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3356 dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx); 3357 } 3358