1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/dmu_objset.h> 27 #include <sys/dsl_dataset.h> 28 #include <sys/dsl_dir.h> 29 #include <sys/dsl_prop.h> 30 #include <sys/dsl_synctask.h> 31 #include <sys/dmu_traverse.h> 32 #include <sys/dmu_tx.h> 33 #include <sys/arc.h> 34 #include <sys/zio.h> 35 #include <sys/zap.h> 36 #include <sys/unique.h> 37 #include <sys/zfs_context.h> 38 #include <sys/zfs_ioctl.h> 39 #include <sys/spa.h> 40 #include <sys/zfs_znode.h> 41 #include <sys/sunddi.h> 42 43 static char *dsl_reaper = "the grim reaper"; 44 45 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 46 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 47 static dsl_checkfunc_t dsl_dataset_rollback_check; 48 static dsl_syncfunc_t dsl_dataset_rollback_sync; 49 static dsl_syncfunc_t dsl_dataset_set_reservation_sync; 50 51 #define DS_REF_MAX (1ULL << 62) 52 53 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 54 55 #define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper) 56 57 58 /* 59 * Figure out how much of this delta should be propogated to the dsl_dir 60 * layer. If there's a refreservation, that space has already been 61 * partially accounted for in our ancestors. 62 */ 63 static int64_t 64 parent_delta(dsl_dataset_t *ds, int64_t delta) 65 { 66 uint64_t old_bytes, new_bytes; 67 68 if (ds->ds_reserved == 0) 69 return (delta); 70 71 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 72 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 73 74 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 75 return (new_bytes - old_bytes); 76 } 77 78 void 79 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 80 { 81 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 82 int compressed = BP_GET_PSIZE(bp); 83 int uncompressed = BP_GET_UCSIZE(bp); 84 int64_t delta; 85 86 dprintf_bp(bp, "born, ds=%p\n", ds); 87 88 ASSERT(dmu_tx_is_syncing(tx)); 89 /* It could have been compressed away to nothing */ 90 if (BP_IS_HOLE(bp)) 91 return; 92 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 93 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 94 if (ds == NULL) { 95 /* 96 * Account for the meta-objset space in its placeholder 97 * dsl_dir. 98 */ 99 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 100 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 101 used, compressed, uncompressed, tx); 102 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 103 return; 104 } 105 dmu_buf_will_dirty(ds->ds_dbuf, tx); 106 mutex_enter(&ds->ds_dir->dd_lock); 107 mutex_enter(&ds->ds_lock); 108 delta = parent_delta(ds, used); 109 ds->ds_phys->ds_used_bytes += used; 110 ds->ds_phys->ds_compressed_bytes += compressed; 111 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 112 ds->ds_phys->ds_unique_bytes += used; 113 mutex_exit(&ds->ds_lock); 114 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 115 compressed, uncompressed, tx); 116 dsl_dir_transfer_space(ds->ds_dir, used - delta, 117 DD_USED_REFRSRV, DD_USED_HEAD, tx); 118 mutex_exit(&ds->ds_dir->dd_lock); 119 } 120 121 int 122 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, 123 dmu_tx_t *tx) 124 { 125 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 126 int compressed = BP_GET_PSIZE(bp); 127 int uncompressed = BP_GET_UCSIZE(bp); 128 129 ASSERT(pio != NULL); 130 ASSERT(dmu_tx_is_syncing(tx)); 131 /* No block pointer => nothing to free */ 132 if (BP_IS_HOLE(bp)) 133 return (0); 134 135 ASSERT(used > 0); 136 if (ds == NULL) { 137 int err; 138 /* 139 * Account for the meta-objset space in its placeholder 140 * dataset. 141 */ 142 err = dsl_free(pio, tx->tx_pool, 143 tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT); 144 ASSERT(err == 0); 145 146 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 147 -used, -compressed, -uncompressed, tx); 148 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 149 return (used); 150 } 151 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 152 153 ASSERT(!dsl_dataset_is_snapshot(ds)); 154 dmu_buf_will_dirty(ds->ds_dbuf, tx); 155 156 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 157 int err; 158 int64_t delta; 159 160 dprintf_bp(bp, "freeing: %s", ""); 161 err = dsl_free(pio, tx->tx_pool, 162 tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT); 163 ASSERT(err == 0); 164 165 mutex_enter(&ds->ds_dir->dd_lock); 166 mutex_enter(&ds->ds_lock); 167 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 168 !DS_UNIQUE_IS_ACCURATE(ds)); 169 delta = parent_delta(ds, -used); 170 ds->ds_phys->ds_unique_bytes -= used; 171 mutex_exit(&ds->ds_lock); 172 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 173 delta, -compressed, -uncompressed, tx); 174 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 175 DD_USED_REFRSRV, DD_USED_HEAD, tx); 176 mutex_exit(&ds->ds_dir->dd_lock); 177 } else { 178 dprintf_bp(bp, "putting on dead list: %s", ""); 179 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 180 ASSERT3U(ds->ds_prev->ds_object, ==, 181 ds->ds_phys->ds_prev_snap_obj); 182 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 183 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 184 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 185 ds->ds_object && bp->blk_birth > 186 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 187 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 188 mutex_enter(&ds->ds_prev->ds_lock); 189 ds->ds_prev->ds_phys->ds_unique_bytes += used; 190 mutex_exit(&ds->ds_prev->ds_lock); 191 } 192 if (bp->blk_birth > ds->ds_origin_txg) { 193 dsl_dir_transfer_space(ds->ds_dir, used, 194 DD_USED_HEAD, DD_USED_SNAP, tx); 195 } 196 } 197 mutex_enter(&ds->ds_lock); 198 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 199 ds->ds_phys->ds_used_bytes -= used; 200 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 201 ds->ds_phys->ds_compressed_bytes -= compressed; 202 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 203 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 204 mutex_exit(&ds->ds_lock); 205 206 return (used); 207 } 208 209 uint64_t 210 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 211 { 212 uint64_t trysnap = 0; 213 214 if (ds == NULL) 215 return (0); 216 /* 217 * The snapshot creation could fail, but that would cause an 218 * incorrect FALSE return, which would only result in an 219 * overestimation of the amount of space that an operation would 220 * consume, which is OK. 221 * 222 * There's also a small window where we could miss a pending 223 * snapshot, because we could set the sync task in the quiescing 224 * phase. So this should only be used as a guess. 225 */ 226 if (ds->ds_trysnap_txg > 227 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 228 trysnap = ds->ds_trysnap_txg; 229 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 230 } 231 232 boolean_t 233 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 234 { 235 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 236 } 237 238 /* ARGSUSED */ 239 static void 240 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 241 { 242 dsl_dataset_t *ds = dsv; 243 244 ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); 245 246 dprintf_ds(ds, "evicting %s\n", ""); 247 248 unique_remove(ds->ds_fsid_guid); 249 250 if (ds->ds_user_ptr != NULL) 251 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 252 253 if (ds->ds_prev) { 254 dsl_dataset_drop_ref(ds->ds_prev, ds); 255 ds->ds_prev = NULL; 256 } 257 258 bplist_close(&ds->ds_deadlist); 259 if (ds->ds_dir) 260 dsl_dir_close(ds->ds_dir, ds); 261 262 ASSERT(!list_link_active(&ds->ds_synced_link)); 263 264 mutex_destroy(&ds->ds_lock); 265 mutex_destroy(&ds->ds_recvlock); 266 mutex_destroy(&ds->ds_opening_lock); 267 mutex_destroy(&ds->ds_deadlist.bpl_lock); 268 rw_destroy(&ds->ds_rwlock); 269 cv_destroy(&ds->ds_exclusive_cv); 270 271 kmem_free(ds, sizeof (dsl_dataset_t)); 272 } 273 274 static int 275 dsl_dataset_get_snapname(dsl_dataset_t *ds) 276 { 277 dsl_dataset_phys_t *headphys; 278 int err; 279 dmu_buf_t *headdbuf; 280 dsl_pool_t *dp = ds->ds_dir->dd_pool; 281 objset_t *mos = dp->dp_meta_objset; 282 283 if (ds->ds_snapname[0]) 284 return (0); 285 if (ds->ds_phys->ds_next_snap_obj == 0) 286 return (0); 287 288 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 289 FTAG, &headdbuf); 290 if (err) 291 return (err); 292 headphys = headdbuf->db_data; 293 err = zap_value_search(dp->dp_meta_objset, 294 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 295 dmu_buf_rele(headdbuf, FTAG); 296 return (err); 297 } 298 299 static int 300 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 301 { 302 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 303 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 304 matchtype_t mt; 305 int err; 306 307 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 308 mt = MT_FIRST; 309 else 310 mt = MT_EXACT; 311 312 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 313 value, mt, NULL, 0, NULL); 314 if (err == ENOTSUP && mt == MT_FIRST) 315 err = zap_lookup(mos, snapobj, name, 8, 1, value); 316 return (err); 317 } 318 319 static int 320 dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) 321 { 322 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 323 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 324 matchtype_t mt; 325 int err; 326 327 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 328 mt = MT_FIRST; 329 else 330 mt = MT_EXACT; 331 332 err = zap_remove_norm(mos, snapobj, name, mt, tx); 333 if (err == ENOTSUP && mt == MT_FIRST) 334 err = zap_remove(mos, snapobj, name, tx); 335 return (err); 336 } 337 338 static int 339 dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, 340 dsl_dataset_t **dsp) 341 { 342 objset_t *mos = dp->dp_meta_objset; 343 dmu_buf_t *dbuf; 344 dsl_dataset_t *ds; 345 int err; 346 347 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 348 dsl_pool_sync_context(dp)); 349 350 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 351 if (err) 352 return (err); 353 ds = dmu_buf_get_user(dbuf); 354 if (ds == NULL) { 355 dsl_dataset_t *winner; 356 357 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 358 ds->ds_dbuf = dbuf; 359 ds->ds_object = dsobj; 360 ds->ds_phys = dbuf->db_data; 361 362 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 363 mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL); 364 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 365 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 366 NULL); 367 rw_init(&ds->ds_rwlock, 0, 0, 0); 368 cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); 369 370 err = bplist_open(&ds->ds_deadlist, 371 mos, ds->ds_phys->ds_deadlist_obj); 372 if (err == 0) { 373 err = dsl_dir_open_obj(dp, 374 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 375 } 376 if (err) { 377 /* 378 * we don't really need to close the blist if we 379 * just opened it. 380 */ 381 mutex_destroy(&ds->ds_lock); 382 mutex_destroy(&ds->ds_recvlock); 383 mutex_destroy(&ds->ds_opening_lock); 384 mutex_destroy(&ds->ds_deadlist.bpl_lock); 385 rw_destroy(&ds->ds_rwlock); 386 cv_destroy(&ds->ds_exclusive_cv); 387 kmem_free(ds, sizeof (dsl_dataset_t)); 388 dmu_buf_rele(dbuf, tag); 389 return (err); 390 } 391 392 if (!dsl_dataset_is_snapshot(ds)) { 393 ds->ds_snapname[0] = '\0'; 394 if (ds->ds_phys->ds_prev_snap_obj) { 395 err = dsl_dataset_get_ref(dp, 396 ds->ds_phys->ds_prev_snap_obj, 397 ds, &ds->ds_prev); 398 } 399 400 if (err == 0 && dsl_dir_is_clone(ds->ds_dir)) { 401 dsl_dataset_t *origin; 402 403 err = dsl_dataset_hold_obj(dp, 404 ds->ds_dir->dd_phys->dd_origin_obj, 405 FTAG, &origin); 406 if (err == 0) { 407 ds->ds_origin_txg = 408 origin->ds_phys->ds_creation_txg; 409 dsl_dataset_rele(origin, FTAG); 410 } 411 } 412 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 413 err = dsl_dataset_get_snapname(ds); 414 } 415 416 if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 417 /* 418 * In sync context, we're called with either no lock 419 * or with the write lock. If we're not syncing, 420 * we're always called with the read lock held. 421 */ 422 boolean_t need_lock = 423 !RW_WRITE_HELD(&dp->dp_config_rwlock) && 424 dsl_pool_sync_context(dp); 425 426 if (need_lock) 427 rw_enter(&dp->dp_config_rwlock, RW_READER); 428 429 err = dsl_prop_get_ds(ds, 430 "refreservation", sizeof (uint64_t), 1, 431 &ds->ds_reserved, NULL); 432 if (err == 0) { 433 err = dsl_prop_get_ds(ds, 434 "refquota", sizeof (uint64_t), 1, 435 &ds->ds_quota, NULL); 436 } 437 438 if (need_lock) 439 rw_exit(&dp->dp_config_rwlock); 440 } else { 441 ds->ds_reserved = ds->ds_quota = 0; 442 } 443 444 if (err == 0) { 445 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 446 dsl_dataset_evict); 447 } 448 if (err || winner) { 449 bplist_close(&ds->ds_deadlist); 450 if (ds->ds_prev) 451 dsl_dataset_drop_ref(ds->ds_prev, ds); 452 dsl_dir_close(ds->ds_dir, ds); 453 mutex_destroy(&ds->ds_lock); 454 mutex_destroy(&ds->ds_recvlock); 455 mutex_destroy(&ds->ds_opening_lock); 456 mutex_destroy(&ds->ds_deadlist.bpl_lock); 457 rw_destroy(&ds->ds_rwlock); 458 cv_destroy(&ds->ds_exclusive_cv); 459 kmem_free(ds, sizeof (dsl_dataset_t)); 460 if (err) { 461 dmu_buf_rele(dbuf, tag); 462 return (err); 463 } 464 ds = winner; 465 } else { 466 ds->ds_fsid_guid = 467 unique_insert(ds->ds_phys->ds_fsid_guid); 468 } 469 } 470 ASSERT3P(ds->ds_dbuf, ==, dbuf); 471 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 472 ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 473 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 474 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 475 mutex_enter(&ds->ds_lock); 476 if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) { 477 mutex_exit(&ds->ds_lock); 478 dmu_buf_rele(ds->ds_dbuf, tag); 479 return (ENOENT); 480 } 481 mutex_exit(&ds->ds_lock); 482 *dsp = ds; 483 return (0); 484 } 485 486 static int 487 dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag) 488 { 489 dsl_pool_t *dp = ds->ds_dir->dd_pool; 490 491 /* 492 * In syncing context we don't want the rwlock lock: there 493 * may be an existing writer waiting for sync phase to 494 * finish. We don't need to worry about such writers, since 495 * sync phase is single-threaded, so the writer can't be 496 * doing anything while we are active. 497 */ 498 if (dsl_pool_sync_context(dp)) { 499 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 500 return (0); 501 } 502 503 /* 504 * Normal users will hold the ds_rwlock as a READER until they 505 * are finished (i.e., call dsl_dataset_rele()). "Owners" will 506 * drop their READER lock after they set the ds_owner field. 507 * 508 * If the dataset is being destroyed, the destroy thread will 509 * obtain a WRITER lock for exclusive access after it's done its 510 * open-context work and then change the ds_owner to 511 * dsl_reaper once destruction is assured. So threads 512 * may block here temporarily, until the "destructability" of 513 * the dataset is determined. 514 */ 515 ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock)); 516 mutex_enter(&ds->ds_lock); 517 while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) { 518 rw_exit(&dp->dp_config_rwlock); 519 cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock); 520 if (DSL_DATASET_IS_DESTROYED(ds)) { 521 mutex_exit(&ds->ds_lock); 522 dsl_dataset_drop_ref(ds, tag); 523 rw_enter(&dp->dp_config_rwlock, RW_READER); 524 return (ENOENT); 525 } 526 rw_enter(&dp->dp_config_rwlock, RW_READER); 527 } 528 mutex_exit(&ds->ds_lock); 529 return (0); 530 } 531 532 int 533 dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 534 dsl_dataset_t **dsp) 535 { 536 int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp); 537 538 if (err) 539 return (err); 540 return (dsl_dataset_hold_ref(*dsp, tag)); 541 } 542 543 int 544 dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, int flags, void *owner, 545 dsl_dataset_t **dsp) 546 { 547 int err = dsl_dataset_hold_obj(dp, dsobj, owner, dsp); 548 549 ASSERT(DS_MODE_TYPE(flags) != DS_MODE_USER); 550 551 if (err) 552 return (err); 553 if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) { 554 dsl_dataset_rele(*dsp, owner); 555 *dsp = NULL; 556 return (EBUSY); 557 } 558 return (0); 559 } 560 561 int 562 dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp) 563 { 564 dsl_dir_t *dd; 565 dsl_pool_t *dp; 566 const char *snapname; 567 uint64_t obj; 568 int err = 0; 569 570 err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname); 571 if (err) 572 return (err); 573 574 dp = dd->dd_pool; 575 obj = dd->dd_phys->dd_head_dataset_obj; 576 rw_enter(&dp->dp_config_rwlock, RW_READER); 577 if (obj) 578 err = dsl_dataset_get_ref(dp, obj, tag, dsp); 579 else 580 err = ENOENT; 581 if (err) 582 goto out; 583 584 err = dsl_dataset_hold_ref(*dsp, tag); 585 586 /* we may be looking for a snapshot */ 587 if (err == 0 && snapname != NULL) { 588 dsl_dataset_t *ds = NULL; 589 590 if (*snapname++ != '@') { 591 dsl_dataset_rele(*dsp, tag); 592 err = ENOENT; 593 goto out; 594 } 595 596 dprintf("looking for snapshot '%s'\n", snapname); 597 err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 598 if (err == 0) 599 err = dsl_dataset_get_ref(dp, obj, tag, &ds); 600 dsl_dataset_rele(*dsp, tag); 601 602 ASSERT3U((err == 0), ==, (ds != NULL)); 603 604 if (ds) { 605 mutex_enter(&ds->ds_lock); 606 if (ds->ds_snapname[0] == 0) 607 (void) strlcpy(ds->ds_snapname, snapname, 608 sizeof (ds->ds_snapname)); 609 mutex_exit(&ds->ds_lock); 610 err = dsl_dataset_hold_ref(ds, tag); 611 *dsp = err ? NULL : ds; 612 } 613 } 614 out: 615 rw_exit(&dp->dp_config_rwlock); 616 dsl_dir_close(dd, FTAG); 617 return (err); 618 } 619 620 int 621 dsl_dataset_own(const char *name, int flags, void *owner, dsl_dataset_t **dsp) 622 { 623 int err = dsl_dataset_hold(name, owner, dsp); 624 if (err) 625 return (err); 626 if ((*dsp)->ds_phys->ds_num_children > 0 && 627 !DS_MODE_IS_READONLY(flags)) { 628 dsl_dataset_rele(*dsp, owner); 629 return (EROFS); 630 } 631 if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) { 632 dsl_dataset_rele(*dsp, owner); 633 return (EBUSY); 634 } 635 return (0); 636 } 637 638 void 639 dsl_dataset_name(dsl_dataset_t *ds, char *name) 640 { 641 if (ds == NULL) { 642 (void) strcpy(name, "mos"); 643 } else { 644 dsl_dir_name(ds->ds_dir, name); 645 VERIFY(0 == dsl_dataset_get_snapname(ds)); 646 if (ds->ds_snapname[0]) { 647 (void) strcat(name, "@"); 648 /* 649 * We use a "recursive" mutex so that we 650 * can call dprintf_ds() with ds_lock held. 651 */ 652 if (!MUTEX_HELD(&ds->ds_lock)) { 653 mutex_enter(&ds->ds_lock); 654 (void) strcat(name, ds->ds_snapname); 655 mutex_exit(&ds->ds_lock); 656 } else { 657 (void) strcat(name, ds->ds_snapname); 658 } 659 } 660 } 661 } 662 663 static int 664 dsl_dataset_namelen(dsl_dataset_t *ds) 665 { 666 int result; 667 668 if (ds == NULL) { 669 result = 3; /* "mos" */ 670 } else { 671 result = dsl_dir_namelen(ds->ds_dir); 672 VERIFY(0 == dsl_dataset_get_snapname(ds)); 673 if (ds->ds_snapname[0]) { 674 ++result; /* adding one for the @-sign */ 675 if (!MUTEX_HELD(&ds->ds_lock)) { 676 mutex_enter(&ds->ds_lock); 677 result += strlen(ds->ds_snapname); 678 mutex_exit(&ds->ds_lock); 679 } else { 680 result += strlen(ds->ds_snapname); 681 } 682 } 683 } 684 685 return (result); 686 } 687 688 void 689 dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag) 690 { 691 dmu_buf_rele(ds->ds_dbuf, tag); 692 } 693 694 void 695 dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 696 { 697 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) { 698 rw_exit(&ds->ds_rwlock); 699 } 700 dsl_dataset_drop_ref(ds, tag); 701 } 702 703 void 704 dsl_dataset_disown(dsl_dataset_t *ds, void *owner) 705 { 706 ASSERT((ds->ds_owner == owner && ds->ds_dbuf) || 707 (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL)); 708 709 mutex_enter(&ds->ds_lock); 710 ds->ds_owner = NULL; 711 if (RW_WRITE_HELD(&ds->ds_rwlock)) { 712 rw_exit(&ds->ds_rwlock); 713 cv_broadcast(&ds->ds_exclusive_cv); 714 } 715 mutex_exit(&ds->ds_lock); 716 if (ds->ds_dbuf) 717 dsl_dataset_drop_ref(ds, owner); 718 else 719 dsl_dataset_evict(ds->ds_dbuf, ds); 720 } 721 722 boolean_t 723 dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *owner) 724 { 725 boolean_t gotit = FALSE; 726 727 mutex_enter(&ds->ds_lock); 728 if (ds->ds_owner == NULL && 729 (!DS_IS_INCONSISTENT(ds) || inconsistentok)) { 730 ds->ds_owner = owner; 731 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) 732 rw_exit(&ds->ds_rwlock); 733 gotit = TRUE; 734 } 735 mutex_exit(&ds->ds_lock); 736 return (gotit); 737 } 738 739 void 740 dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner) 741 { 742 ASSERT3P(owner, ==, ds->ds_owner); 743 if (!RW_WRITE_HELD(&ds->ds_rwlock)) 744 rw_enter(&ds->ds_rwlock, RW_WRITER); 745 } 746 747 uint64_t 748 dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 749 uint64_t flags, dmu_tx_t *tx) 750 { 751 dsl_pool_t *dp = dd->dd_pool; 752 dmu_buf_t *dbuf; 753 dsl_dataset_phys_t *dsphys; 754 uint64_t dsobj; 755 objset_t *mos = dp->dp_meta_objset; 756 757 if (origin == NULL) 758 origin = dp->dp_origin_snap; 759 760 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 761 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 762 ASSERT(dmu_tx_is_syncing(tx)); 763 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 764 765 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 766 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 767 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 768 dmu_buf_will_dirty(dbuf, tx); 769 dsphys = dbuf->db_data; 770 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 771 dsphys->ds_dir_obj = dd->dd_object; 772 dsphys->ds_flags = flags; 773 dsphys->ds_fsid_guid = unique_create(); 774 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 775 sizeof (dsphys->ds_guid)); 776 dsphys->ds_snapnames_zapobj = 777 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 778 DMU_OT_NONE, 0, tx); 779 dsphys->ds_creation_time = gethrestime_sec(); 780 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 781 dsphys->ds_deadlist_obj = 782 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 783 784 if (origin) { 785 dsphys->ds_prev_snap_obj = origin->ds_object; 786 dsphys->ds_prev_snap_txg = 787 origin->ds_phys->ds_creation_txg; 788 dsphys->ds_used_bytes = 789 origin->ds_phys->ds_used_bytes; 790 dsphys->ds_compressed_bytes = 791 origin->ds_phys->ds_compressed_bytes; 792 dsphys->ds_uncompressed_bytes = 793 origin->ds_phys->ds_uncompressed_bytes; 794 dsphys->ds_bp = origin->ds_phys->ds_bp; 795 dsphys->ds_flags |= origin->ds_phys->ds_flags; 796 797 dmu_buf_will_dirty(origin->ds_dbuf, tx); 798 origin->ds_phys->ds_num_children++; 799 800 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 801 if (origin->ds_phys->ds_next_clones_obj == 0) { 802 origin->ds_phys->ds_next_clones_obj = 803 zap_create(mos, 804 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 805 } 806 VERIFY(0 == zap_add_int(mos, 807 origin->ds_phys->ds_next_clones_obj, 808 dsobj, tx)); 809 } 810 811 dmu_buf_will_dirty(dd->dd_dbuf, tx); 812 dd->dd_phys->dd_origin_obj = origin->ds_object; 813 } 814 815 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 816 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 817 818 dmu_buf_rele(dbuf, FTAG); 819 820 dmu_buf_will_dirty(dd->dd_dbuf, tx); 821 dd->dd_phys->dd_head_dataset_obj = dsobj; 822 823 return (dsobj); 824 } 825 826 uint64_t 827 dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 828 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 829 { 830 dsl_pool_t *dp = pdd->dd_pool; 831 uint64_t dsobj, ddobj; 832 dsl_dir_t *dd; 833 834 ASSERT(lastname[0] != '@'); 835 836 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 837 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 838 839 dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx); 840 841 dsl_deleg_set_create_perms(dd, tx, cr); 842 843 dsl_dir_close(dd, FTAG); 844 845 return (dsobj); 846 } 847 848 struct destroyarg { 849 dsl_sync_task_group_t *dstg; 850 char *snapname; 851 char *failed; 852 }; 853 854 static int 855 dsl_snapshot_destroy_one(char *name, void *arg) 856 { 857 struct destroyarg *da = arg; 858 dsl_dataset_t *ds; 859 char *cp; 860 int err; 861 862 (void) strcat(name, "@"); 863 (void) strcat(name, da->snapname); 864 err = dsl_dataset_own(name, DS_MODE_READONLY | DS_MODE_INCONSISTENT, 865 da->dstg, &ds); 866 cp = strchr(name, '@'); 867 *cp = '\0'; 868 if (err == 0) { 869 dsl_dataset_make_exclusive(ds, da->dstg); 870 if (ds->ds_user_ptr) { 871 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 872 ds->ds_user_ptr = NULL; 873 } 874 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 875 dsl_dataset_destroy_sync, ds, da->dstg, 0); 876 } else if (err == ENOENT) { 877 err = 0; 878 } else { 879 (void) strcpy(da->failed, name); 880 } 881 return (err); 882 } 883 884 /* 885 * Destroy 'snapname' in all descendants of 'fsname'. 886 */ 887 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 888 int 889 dsl_snapshots_destroy(char *fsname, char *snapname) 890 { 891 int err; 892 struct destroyarg da; 893 dsl_sync_task_t *dst; 894 spa_t *spa; 895 896 err = spa_open(fsname, &spa, FTAG); 897 if (err) 898 return (err); 899 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 900 da.snapname = snapname; 901 da.failed = fsname; 902 903 err = dmu_objset_find(fsname, 904 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 905 906 if (err == 0) 907 err = dsl_sync_task_group_wait(da.dstg); 908 909 for (dst = list_head(&da.dstg->dstg_tasks); dst; 910 dst = list_next(&da.dstg->dstg_tasks, dst)) { 911 dsl_dataset_t *ds = dst->dst_arg1; 912 /* 913 * Return the file system name that triggered the error 914 */ 915 if (dst->dst_err) { 916 dsl_dataset_name(ds, fsname); 917 *strchr(fsname, '@') = '\0'; 918 } 919 dsl_dataset_disown(ds, da.dstg); 920 } 921 922 dsl_sync_task_group_destroy(da.dstg); 923 spa_close(spa, FTAG); 924 return (err); 925 } 926 927 /* 928 * ds must be opened as OWNER. On return (whether successful or not), 929 * ds will be closed and caller can no longer dereference it. 930 */ 931 int 932 dsl_dataset_destroy(dsl_dataset_t *ds, void *tag) 933 { 934 int err; 935 dsl_sync_task_group_t *dstg; 936 objset_t *os; 937 dsl_dir_t *dd; 938 uint64_t obj; 939 940 if (dsl_dataset_is_snapshot(ds)) { 941 /* Destroying a snapshot is simpler */ 942 dsl_dataset_make_exclusive(ds, tag); 943 944 if (ds->ds_user_ptr) { 945 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 946 ds->ds_user_ptr = NULL; 947 } 948 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 949 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 950 ds, tag, 0); 951 goto out; 952 } 953 954 dd = ds->ds_dir; 955 956 /* 957 * Check for errors and mark this ds as inconsistent, in 958 * case we crash while freeing the objects. 959 */ 960 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 961 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 962 if (err) 963 goto out; 964 965 err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os); 966 if (err) 967 goto out; 968 969 /* 970 * remove the objects in open context, so that we won't 971 * have too much to do in syncing context. 972 */ 973 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 974 ds->ds_phys->ds_prev_snap_txg)) { 975 /* 976 * Ignore errors, if there is not enough disk space 977 * we will deal with it in dsl_dataset_destroy_sync(). 978 */ 979 (void) dmu_free_object(os, obj); 980 } 981 982 /* 983 * We need to sync out all in-flight IO before we try to evict 984 * (the dataset evict func is trying to clear the cached entries 985 * for this dataset in the ARC). 986 */ 987 txg_wait_synced(dd->dd_pool, 0); 988 989 /* 990 * If we managed to free all the objects in open 991 * context, the user space accounting should be zero. 992 */ 993 if (ds->ds_phys->ds_bp.blk_fill == 0 && 994 dmu_objset_userused_enabled(os->os)) { 995 uint64_t count; 996 997 ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 || 998 count == 0); 999 ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 || 1000 count == 0); 1001 } 1002 1003 dmu_objset_close(os); 1004 if (err != ESRCH) 1005 goto out; 1006 1007 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 1008 err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); 1009 rw_exit(&dd->dd_pool->dp_config_rwlock); 1010 1011 if (err) 1012 goto out; 1013 1014 if (ds->ds_user_ptr) { 1015 /* 1016 * We need to sync out all in-flight IO before we try 1017 * to evict (the dataset evict func is trying to clear 1018 * the cached entries for this dataset in the ARC). 1019 */ 1020 txg_wait_synced(dd->dd_pool, 0); 1021 } 1022 1023 /* 1024 * Blow away the dsl_dir + head dataset. 1025 */ 1026 dsl_dataset_make_exclusive(ds, tag); 1027 if (ds->ds_user_ptr) { 1028 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 1029 ds->ds_user_ptr = NULL; 1030 } 1031 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 1032 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 1033 dsl_dataset_destroy_sync, ds, tag, 0); 1034 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 1035 dsl_dir_destroy_sync, dd, FTAG, 0); 1036 err = dsl_sync_task_group_wait(dstg); 1037 dsl_sync_task_group_destroy(dstg); 1038 /* if it is successful, dsl_dir_destroy_sync will close the dd */ 1039 if (err) 1040 dsl_dir_close(dd, FTAG); 1041 out: 1042 dsl_dataset_disown(ds, tag); 1043 return (err); 1044 } 1045 1046 int 1047 dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost) 1048 { 1049 int err; 1050 1051 ASSERT(ds->ds_owner); 1052 1053 dsl_dataset_make_exclusive(ds, ds->ds_owner); 1054 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1055 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, 1056 ds, &ost, 0); 1057 /* drop exclusive access */ 1058 mutex_enter(&ds->ds_lock); 1059 rw_exit(&ds->ds_rwlock); 1060 cv_broadcast(&ds->ds_exclusive_cv); 1061 mutex_exit(&ds->ds_lock); 1062 return (err); 1063 } 1064 1065 void * 1066 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 1067 void *p, dsl_dataset_evict_func_t func) 1068 { 1069 void *old; 1070 1071 mutex_enter(&ds->ds_lock); 1072 old = ds->ds_user_ptr; 1073 if (old == NULL) { 1074 ds->ds_user_ptr = p; 1075 ds->ds_user_evict_func = func; 1076 } 1077 mutex_exit(&ds->ds_lock); 1078 return (old); 1079 } 1080 1081 void * 1082 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 1083 { 1084 return (ds->ds_user_ptr); 1085 } 1086 1087 blkptr_t * 1088 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 1089 { 1090 return (&ds->ds_phys->ds_bp); 1091 } 1092 1093 void 1094 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 1095 { 1096 ASSERT(dmu_tx_is_syncing(tx)); 1097 /* If it's the meta-objset, set dp_meta_rootbp */ 1098 if (ds == NULL) { 1099 tx->tx_pool->dp_meta_rootbp = *bp; 1100 } else { 1101 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1102 ds->ds_phys->ds_bp = *bp; 1103 } 1104 } 1105 1106 spa_t * 1107 dsl_dataset_get_spa(dsl_dataset_t *ds) 1108 { 1109 return (ds->ds_dir->dd_pool->dp_spa); 1110 } 1111 1112 void 1113 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 1114 { 1115 dsl_pool_t *dp; 1116 1117 if (ds == NULL) /* this is the meta-objset */ 1118 return; 1119 1120 ASSERT(ds->ds_user_ptr != NULL); 1121 1122 if (ds->ds_phys->ds_next_snap_obj != 0) 1123 panic("dirtying snapshot!"); 1124 1125 dp = ds->ds_dir->dd_pool; 1126 1127 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 1128 /* up the hold count until we can be written out */ 1129 dmu_buf_add_ref(ds->ds_dbuf, ds); 1130 } 1131 } 1132 1133 /* 1134 * The unique space in the head dataset can be calculated by subtracting 1135 * the space used in the most recent snapshot, that is still being used 1136 * in this file system, from the space currently in use. To figure out 1137 * the space in the most recent snapshot still in use, we need to take 1138 * the total space used in the snapshot and subtract out the space that 1139 * has been freed up since the snapshot was taken. 1140 */ 1141 static void 1142 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 1143 { 1144 uint64_t mrs_used; 1145 uint64_t dlused, dlcomp, dluncomp; 1146 1147 ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj); 1148 1149 if (ds->ds_phys->ds_prev_snap_obj != 0) 1150 mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; 1151 else 1152 mrs_used = 0; 1153 1154 VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp, 1155 &dluncomp)); 1156 1157 ASSERT3U(dlused, <=, mrs_used); 1158 ds->ds_phys->ds_unique_bytes = 1159 ds->ds_phys->ds_used_bytes - (mrs_used - dlused); 1160 1161 if (!DS_UNIQUE_IS_ACCURATE(ds) && 1162 spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1163 SPA_VERSION_UNIQUE_ACCURATE) 1164 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1165 } 1166 1167 static uint64_t 1168 dsl_dataset_unique(dsl_dataset_t *ds) 1169 { 1170 if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds)) 1171 dsl_dataset_recalc_head_uniq(ds); 1172 1173 return (ds->ds_phys->ds_unique_bytes); 1174 } 1175 1176 struct killarg { 1177 dsl_dataset_t *ds; 1178 zio_t *zio; 1179 dmu_tx_t *tx; 1180 }; 1181 1182 /* ARGSUSED */ 1183 static int 1184 kill_blkptr(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb, 1185 const dnode_phys_t *dnp, void *arg) 1186 { 1187 struct killarg *ka = arg; 1188 1189 if (bp == NULL) 1190 return (0); 1191 1192 if ((zb->zb_level == -1ULL && zb->zb_blkid != 0) || 1193 (zb->zb_object != 0 && dnp == NULL)) { 1194 /* 1195 * It's a block in the intent log. It has no 1196 * accounting, so just free it. 1197 */ 1198 VERIFY3U(0, ==, dsl_free(ka->zio, ka->tx->tx_pool, 1199 ka->tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT)); 1200 } else { 1201 ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); 1202 (void) dsl_dataset_block_kill(ka->ds, bp, ka->zio, ka->tx); 1203 } 1204 1205 return (0); 1206 } 1207 1208 /* ARGSUSED */ 1209 static int 1210 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) 1211 { 1212 dsl_dataset_t *ds = arg1; 1213 dmu_objset_type_t *ost = arg2; 1214 1215 /* 1216 * We can only roll back to emptyness if it is a ZPL objset. 1217 */ 1218 if (*ost != DMU_OST_ZFS && 1219 ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) 1220 return (EINVAL); 1221 1222 /* 1223 * This must not be a snapshot. 1224 */ 1225 if (ds->ds_phys->ds_next_snap_obj != 0) 1226 return (EINVAL); 1227 1228 /* 1229 * If we made changes this txg, traverse_dataset won't find 1230 * them. Try again. 1231 */ 1232 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1233 return (EAGAIN); 1234 1235 return (0); 1236 } 1237 1238 /* ARGSUSED */ 1239 static void 1240 dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1241 { 1242 dsl_dataset_t *ds = arg1; 1243 dmu_objset_type_t *ost = arg2; 1244 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1245 1246 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1247 1248 if (ds->ds_user_ptr != NULL) { 1249 /* 1250 * We need to make sure that the objset_impl_t is reopened after 1251 * we do the rollback, otherwise it will have the wrong 1252 * objset_phys_t. Normally this would happen when this 1253 * dataset-open is closed, thus causing the 1254 * dataset to be immediately evicted. But when doing "zfs recv 1255 * -F", we reopen the objset before that, so that there is no 1256 * window where the dataset is closed and inconsistent. 1257 */ 1258 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 1259 ds->ds_user_ptr = NULL; 1260 } 1261 1262 /* Transfer space that was freed since last snap back to the head. */ 1263 { 1264 uint64_t used; 1265 1266 VERIFY(0 == bplist_space_birthrange(&ds->ds_deadlist, 1267 ds->ds_origin_txg, UINT64_MAX, &used)); 1268 dsl_dir_transfer_space(ds->ds_dir, used, 1269 DD_USED_SNAP, DD_USED_HEAD, tx); 1270 } 1271 1272 /* Zero out the deadlist. */ 1273 bplist_close(&ds->ds_deadlist); 1274 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1275 ds->ds_phys->ds_deadlist_obj = 1276 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1277 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1278 ds->ds_phys->ds_deadlist_obj)); 1279 1280 { 1281 /* 1282 * Free blkptrs that we gave birth to - this covers 1283 * claimed but not played log blocks too. 1284 */ 1285 zio_t *zio; 1286 struct killarg ka; 1287 1288 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 1289 ZIO_FLAG_MUSTSUCCEED); 1290 ka.ds = ds; 1291 ka.zio = zio; 1292 ka.tx = tx; 1293 (void) traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1294 TRAVERSE_POST, kill_blkptr, &ka); 1295 (void) zio_wait(zio); 1296 } 1297 1298 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0); 1299 1300 if (ds->ds_prev && ds->ds_prev != ds->ds_dir->dd_pool->dp_origin_snap) { 1301 /* Change our contents to that of the prev snapshot */ 1302 1303 ASSERT3U(ds->ds_prev->ds_object, ==, 1304 ds->ds_phys->ds_prev_snap_obj); 1305 ASSERT3U(ds->ds_phys->ds_used_bytes, <=, 1306 ds->ds_prev->ds_phys->ds_used_bytes); 1307 1308 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 1309 ds->ds_phys->ds_used_bytes = 1310 ds->ds_prev->ds_phys->ds_used_bytes; 1311 ds->ds_phys->ds_compressed_bytes = 1312 ds->ds_prev->ds_phys->ds_compressed_bytes; 1313 ds->ds_phys->ds_uncompressed_bytes = 1314 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 1315 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 1316 1317 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1318 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1319 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 1320 } 1321 } else { 1322 objset_impl_t *osi; 1323 1324 ASSERT(*ost != DMU_OST_ZVOL); 1325 ASSERT3U(ds->ds_phys->ds_used_bytes, ==, 0); 1326 ASSERT3U(ds->ds_phys->ds_compressed_bytes, ==, 0); 1327 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, ==, 0); 1328 1329 bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t)); 1330 ds->ds_phys->ds_flags = 0; 1331 ds->ds_phys->ds_unique_bytes = 0; 1332 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1333 SPA_VERSION_UNIQUE_ACCURATE) 1334 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1335 1336 osi = dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds, 1337 &ds->ds_phys->ds_bp, *ost, tx); 1338 #ifdef _KERNEL 1339 zfs_create_fs(&osi->os, kcred, NULL, tx); 1340 #endif 1341 } 1342 1343 spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa, 1344 tx, cr, "dataset = %llu", ds->ds_object); 1345 } 1346 1347 /* ARGSUSED */ 1348 static int 1349 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1350 { 1351 dsl_dataset_t *ds = arg1; 1352 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1353 uint64_t count; 1354 int err; 1355 1356 /* 1357 * Can't delete a head dataset if there are snapshots of it. 1358 * (Except if the only snapshots are from the branch we cloned 1359 * from.) 1360 */ 1361 if (ds->ds_prev != NULL && 1362 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1363 return (EINVAL); 1364 1365 /* 1366 * This is really a dsl_dir thing, but check it here so that 1367 * we'll be less likely to leave this dataset inconsistent & 1368 * nearly destroyed. 1369 */ 1370 err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); 1371 if (err) 1372 return (err); 1373 if (count != 0) 1374 return (EEXIST); 1375 1376 return (0); 1377 } 1378 1379 /* ARGSUSED */ 1380 static void 1381 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1382 { 1383 dsl_dataset_t *ds = arg1; 1384 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1385 1386 /* Mark it as inconsistent on-disk, in case we crash */ 1387 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1388 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1389 1390 spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 1391 cr, "dataset = %llu", ds->ds_object); 1392 } 1393 1394 /* ARGSUSED */ 1395 int 1396 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1397 { 1398 dsl_dataset_t *ds = arg1; 1399 1400 /* we have an owner hold, so noone else can destroy us */ 1401 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 1402 1403 /* Can't delete a branch point. */ 1404 if (ds->ds_phys->ds_num_children > 1) 1405 return (EEXIST); 1406 1407 /* 1408 * Can't delete a head dataset if there are snapshots of it. 1409 * (Except if the only snapshots are from the branch we cloned 1410 * from.) 1411 */ 1412 if (ds->ds_prev != NULL && 1413 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1414 return (EINVAL); 1415 1416 /* 1417 * If we made changes this txg, traverse_dsl_dataset won't find 1418 * them. Try again. 1419 */ 1420 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1421 return (EAGAIN); 1422 1423 /* XXX we should do some i/o error checking... */ 1424 return (0); 1425 } 1426 1427 struct refsarg { 1428 kmutex_t lock; 1429 boolean_t gone; 1430 kcondvar_t cv; 1431 }; 1432 1433 /* ARGSUSED */ 1434 static void 1435 dsl_dataset_refs_gone(dmu_buf_t *db, void *argv) 1436 { 1437 struct refsarg *arg = argv; 1438 1439 mutex_enter(&arg->lock); 1440 arg->gone = TRUE; 1441 cv_signal(&arg->cv); 1442 mutex_exit(&arg->lock); 1443 } 1444 1445 static void 1446 dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag) 1447 { 1448 struct refsarg arg; 1449 1450 mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL); 1451 cv_init(&arg.cv, NULL, CV_DEFAULT, NULL); 1452 arg.gone = FALSE; 1453 (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys, 1454 dsl_dataset_refs_gone); 1455 dmu_buf_rele(ds->ds_dbuf, tag); 1456 mutex_enter(&arg.lock); 1457 while (!arg.gone) 1458 cv_wait(&arg.cv, &arg.lock); 1459 ASSERT(arg.gone); 1460 mutex_exit(&arg.lock); 1461 ds->ds_dbuf = NULL; 1462 ds->ds_phys = NULL; 1463 mutex_destroy(&arg.lock); 1464 cv_destroy(&arg.cv); 1465 } 1466 1467 void 1468 dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 1469 { 1470 dsl_dataset_t *ds = arg1; 1471 zio_t *zio; 1472 int err; 1473 int after_branch_point = FALSE; 1474 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1475 objset_t *mos = dp->dp_meta_objset; 1476 dsl_dataset_t *ds_prev = NULL; 1477 uint64_t obj; 1478 1479 ASSERT(ds->ds_owner); 1480 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 1481 ASSERT(ds->ds_prev == NULL || 1482 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1483 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1484 1485 /* signal any waiters that this dataset is going away */ 1486 mutex_enter(&ds->ds_lock); 1487 ds->ds_owner = dsl_reaper; 1488 cv_broadcast(&ds->ds_exclusive_cv); 1489 mutex_exit(&ds->ds_lock); 1490 1491 /* Remove our reservation */ 1492 if (ds->ds_reserved != 0) { 1493 uint64_t val = 0; 1494 dsl_dataset_set_reservation_sync(ds, &val, cr, tx); 1495 ASSERT3U(ds->ds_reserved, ==, 0); 1496 } 1497 1498 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1499 1500 dsl_pool_ds_destroyed(ds, tx); 1501 1502 obj = ds->ds_object; 1503 1504 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1505 if (ds->ds_prev) { 1506 ds_prev = ds->ds_prev; 1507 } else { 1508 VERIFY(0 == dsl_dataset_hold_obj(dp, 1509 ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); 1510 } 1511 after_branch_point = 1512 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1513 1514 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1515 if (after_branch_point && 1516 ds_prev->ds_phys->ds_next_clones_obj != 0) { 1517 VERIFY3U(0, ==, zap_remove_int(mos, 1518 ds_prev->ds_phys->ds_next_clones_obj, obj, tx)); 1519 if (ds->ds_phys->ds_next_snap_obj != 0) { 1520 VERIFY(0 == zap_add_int(mos, 1521 ds_prev->ds_phys->ds_next_clones_obj, 1522 ds->ds_phys->ds_next_snap_obj, tx)); 1523 } 1524 } 1525 if (after_branch_point && 1526 ds->ds_phys->ds_next_snap_obj == 0) { 1527 /* This clone is toast. */ 1528 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1529 ds_prev->ds_phys->ds_num_children--; 1530 } else if (!after_branch_point) { 1531 ds_prev->ds_phys->ds_next_snap_obj = 1532 ds->ds_phys->ds_next_snap_obj; 1533 } 1534 } 1535 1536 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1537 1538 if (ds->ds_phys->ds_next_snap_obj != 0) { 1539 blkptr_t bp; 1540 dsl_dataset_t *ds_next; 1541 uint64_t itor = 0; 1542 uint64_t old_unique; 1543 int64_t used = 0, compressed = 0, uncompressed = 0; 1544 1545 VERIFY(0 == dsl_dataset_hold_obj(dp, 1546 ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); 1547 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1548 1549 old_unique = dsl_dataset_unique(ds_next); 1550 1551 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1552 ds_next->ds_phys->ds_prev_snap_obj = 1553 ds->ds_phys->ds_prev_snap_obj; 1554 ds_next->ds_phys->ds_prev_snap_txg = 1555 ds->ds_phys->ds_prev_snap_txg; 1556 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1557 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1558 1559 /* 1560 * Transfer to our deadlist (which will become next's 1561 * new deadlist) any entries from next's current 1562 * deadlist which were born before prev, and free the 1563 * other entries. 1564 * 1565 * XXX we're doing this long task with the config lock held 1566 */ 1567 while (bplist_iterate(&ds_next->ds_deadlist, &itor, &bp) == 0) { 1568 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1569 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1570 &bp, tx)); 1571 if (ds_prev && !after_branch_point && 1572 bp.blk_birth > 1573 ds_prev->ds_phys->ds_prev_snap_txg) { 1574 ds_prev->ds_phys->ds_unique_bytes += 1575 bp_get_dasize(dp->dp_spa, &bp); 1576 } 1577 } else { 1578 used += bp_get_dasize(dp->dp_spa, &bp); 1579 compressed += BP_GET_PSIZE(&bp); 1580 uncompressed += BP_GET_UCSIZE(&bp); 1581 /* XXX check return value? */ 1582 (void) dsl_free(zio, dp, tx->tx_txg, 1583 &bp, NULL, NULL, ARC_NOWAIT); 1584 } 1585 } 1586 1587 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1588 1589 /* change snapused */ 1590 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 1591 -used, -compressed, -uncompressed, tx); 1592 1593 /* free next's deadlist */ 1594 bplist_close(&ds_next->ds_deadlist); 1595 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1596 1597 /* set next's deadlist to our deadlist */ 1598 bplist_close(&ds->ds_deadlist); 1599 ds_next->ds_phys->ds_deadlist_obj = 1600 ds->ds_phys->ds_deadlist_obj; 1601 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1602 ds_next->ds_phys->ds_deadlist_obj)); 1603 ds->ds_phys->ds_deadlist_obj = 0; 1604 1605 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1606 /* 1607 * Update next's unique to include blocks which 1608 * were previously shared by only this snapshot 1609 * and it. Those blocks will be born after the 1610 * prev snap and before this snap, and will have 1611 * died after the next snap and before the one 1612 * after that (ie. be on the snap after next's 1613 * deadlist). 1614 * 1615 * XXX we're doing this long task with the 1616 * config lock held 1617 */ 1618 dsl_dataset_t *ds_after_next; 1619 uint64_t space; 1620 1621 VERIFY(0 == dsl_dataset_hold_obj(dp, 1622 ds_next->ds_phys->ds_next_snap_obj, 1623 FTAG, &ds_after_next)); 1624 1625 VERIFY(0 == 1626 bplist_space_birthrange(&ds_after_next->ds_deadlist, 1627 ds->ds_phys->ds_prev_snap_txg, 1628 ds->ds_phys->ds_creation_txg, &space)); 1629 ds_next->ds_phys->ds_unique_bytes += space; 1630 1631 dsl_dataset_rele(ds_after_next, FTAG); 1632 ASSERT3P(ds_next->ds_prev, ==, NULL); 1633 } else { 1634 ASSERT3P(ds_next->ds_prev, ==, ds); 1635 dsl_dataset_drop_ref(ds_next->ds_prev, ds_next); 1636 ds_next->ds_prev = NULL; 1637 if (ds_prev) { 1638 VERIFY(0 == dsl_dataset_get_ref(dp, 1639 ds->ds_phys->ds_prev_snap_obj, 1640 ds_next, &ds_next->ds_prev)); 1641 } 1642 1643 dsl_dataset_recalc_head_uniq(ds_next); 1644 1645 /* 1646 * Reduce the amount of our unconsmed refreservation 1647 * being charged to our parent by the amount of 1648 * new unique data we have gained. 1649 */ 1650 if (old_unique < ds_next->ds_reserved) { 1651 int64_t mrsdelta; 1652 uint64_t new_unique = 1653 ds_next->ds_phys->ds_unique_bytes; 1654 1655 ASSERT(old_unique <= new_unique); 1656 mrsdelta = MIN(new_unique - old_unique, 1657 ds_next->ds_reserved - old_unique); 1658 dsl_dir_diduse_space(ds->ds_dir, 1659 DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); 1660 } 1661 } 1662 dsl_dataset_rele(ds_next, FTAG); 1663 } else { 1664 /* 1665 * There's no next snapshot, so this is a head dataset. 1666 * Destroy the deadlist. Unless it's a clone, the 1667 * deadlist should be empty. (If it's a clone, it's 1668 * safe to ignore the deadlist contents.) 1669 */ 1670 struct killarg ka; 1671 1672 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1673 bplist_close(&ds->ds_deadlist); 1674 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1675 ds->ds_phys->ds_deadlist_obj = 0; 1676 1677 /* 1678 * Free everything that we point to (that's born after 1679 * the previous snapshot, if we are a clone) 1680 * 1681 * NB: this should be very quick, because we already 1682 * freed all the objects in open context. 1683 */ 1684 ka.ds = ds; 1685 ka.zio = zio; 1686 ka.tx = tx; 1687 err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1688 TRAVERSE_POST, kill_blkptr, &ka); 1689 ASSERT3U(err, ==, 0); 1690 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || 1691 ds->ds_phys->ds_unique_bytes == 0); 1692 } 1693 1694 err = zio_wait(zio); 1695 ASSERT3U(err, ==, 0); 1696 1697 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1698 /* Erase the link in the dir */ 1699 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1700 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1701 ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); 1702 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1703 ASSERT(err == 0); 1704 } else { 1705 /* remove from snapshot namespace */ 1706 dsl_dataset_t *ds_head; 1707 ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); 1708 VERIFY(0 == dsl_dataset_hold_obj(dp, 1709 ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); 1710 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1711 #ifdef ZFS_DEBUG 1712 { 1713 uint64_t val; 1714 1715 err = dsl_dataset_snap_lookup(ds_head, 1716 ds->ds_snapname, &val); 1717 ASSERT3U(err, ==, 0); 1718 ASSERT3U(val, ==, obj); 1719 } 1720 #endif 1721 err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx); 1722 ASSERT(err == 0); 1723 dsl_dataset_rele(ds_head, FTAG); 1724 } 1725 1726 if (ds_prev && ds->ds_prev != ds_prev) 1727 dsl_dataset_rele(ds_prev, FTAG); 1728 1729 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1730 spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx, 1731 cr, "dataset = %llu", ds->ds_object); 1732 1733 if (ds->ds_phys->ds_next_clones_obj != 0) { 1734 uint64_t count; 1735 ASSERT(0 == zap_count(mos, 1736 ds->ds_phys->ds_next_clones_obj, &count) && count == 0); 1737 VERIFY(0 == dmu_object_free(mos, 1738 ds->ds_phys->ds_next_clones_obj, tx)); 1739 } 1740 if (ds->ds_phys->ds_props_obj != 0) 1741 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); 1742 dsl_dir_close(ds->ds_dir, ds); 1743 ds->ds_dir = NULL; 1744 dsl_dataset_drain_refs(ds, tag); 1745 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1746 } 1747 1748 static int 1749 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1750 { 1751 uint64_t asize; 1752 1753 if (!dmu_tx_is_syncing(tx)) 1754 return (0); 1755 1756 /* 1757 * If there's an fs-only reservation, any blocks that might become 1758 * owned by the snapshot dataset must be accommodated by space 1759 * outside of the reservation. 1760 */ 1761 asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1762 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE)) 1763 return (ENOSPC); 1764 1765 /* 1766 * Propogate any reserved space for this snapshot to other 1767 * snapshot checks in this sync group. 1768 */ 1769 if (asize > 0) 1770 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1771 1772 return (0); 1773 } 1774 1775 /* ARGSUSED */ 1776 int 1777 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1778 { 1779 dsl_dataset_t *ds = arg1; 1780 const char *snapname = arg2; 1781 int err; 1782 uint64_t value; 1783 1784 /* 1785 * We don't allow multiple snapshots of the same txg. If there 1786 * is already one, try again. 1787 */ 1788 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1789 return (EAGAIN); 1790 1791 /* 1792 * Check for conflicting name snapshot name. 1793 */ 1794 err = dsl_dataset_snap_lookup(ds, snapname, &value); 1795 if (err == 0) 1796 return (EEXIST); 1797 if (err != ENOENT) 1798 return (err); 1799 1800 /* 1801 * Check that the dataset's name is not too long. Name consists 1802 * of the dataset's length + 1 for the @-sign + snapshot name's length 1803 */ 1804 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 1805 return (ENAMETOOLONG); 1806 1807 err = dsl_dataset_snapshot_reserve_space(ds, tx); 1808 if (err) 1809 return (err); 1810 1811 ds->ds_trysnap_txg = tx->tx_txg; 1812 return (0); 1813 } 1814 1815 void 1816 dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1817 { 1818 dsl_dataset_t *ds = arg1; 1819 const char *snapname = arg2; 1820 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1821 dmu_buf_t *dbuf; 1822 dsl_dataset_phys_t *dsphys; 1823 uint64_t dsobj, crtxg; 1824 objset_t *mos = dp->dp_meta_objset; 1825 int err; 1826 1827 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1828 1829 /* 1830 * The origin's ds_creation_txg has to be < TXG_INITIAL 1831 */ 1832 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1833 crtxg = 1; 1834 else 1835 crtxg = tx->tx_txg; 1836 1837 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1838 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1839 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1840 dmu_buf_will_dirty(dbuf, tx); 1841 dsphys = dbuf->db_data; 1842 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 1843 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1844 dsphys->ds_fsid_guid = unique_create(); 1845 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1846 sizeof (dsphys->ds_guid)); 1847 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1848 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1849 dsphys->ds_next_snap_obj = ds->ds_object; 1850 dsphys->ds_num_children = 1; 1851 dsphys->ds_creation_time = gethrestime_sec(); 1852 dsphys->ds_creation_txg = crtxg; 1853 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1854 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1855 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1856 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1857 dsphys->ds_flags = ds->ds_phys->ds_flags; 1858 dsphys->ds_bp = ds->ds_phys->ds_bp; 1859 dmu_buf_rele(dbuf, FTAG); 1860 1861 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1862 if (ds->ds_prev) { 1863 uint64_t next_clones_obj = 1864 ds->ds_prev->ds_phys->ds_next_clones_obj; 1865 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1866 ds->ds_object || 1867 ds->ds_prev->ds_phys->ds_num_children > 1); 1868 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1869 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1870 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1871 ds->ds_prev->ds_phys->ds_creation_txg); 1872 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1873 } else if (next_clones_obj != 0) { 1874 VERIFY3U(0, ==, zap_remove_int(mos, 1875 next_clones_obj, dsphys->ds_next_snap_obj, tx)); 1876 VERIFY3U(0, ==, zap_add_int(mos, 1877 next_clones_obj, dsobj, tx)); 1878 } 1879 } 1880 1881 /* 1882 * If we have a reference-reservation on this dataset, we will 1883 * need to increase the amount of refreservation being charged 1884 * since our unique space is going to zero. 1885 */ 1886 if (ds->ds_reserved) { 1887 int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1888 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1889 add, 0, 0, tx); 1890 } 1891 1892 bplist_close(&ds->ds_deadlist); 1893 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1894 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 1895 ds->ds_phys->ds_prev_snap_obj = dsobj; 1896 ds->ds_phys->ds_prev_snap_txg = crtxg; 1897 ds->ds_phys->ds_unique_bytes = 0; 1898 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1899 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1900 ds->ds_phys->ds_deadlist_obj = 1901 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1902 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1903 ds->ds_phys->ds_deadlist_obj)); 1904 1905 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1906 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1907 snapname, 8, 1, &dsobj, tx); 1908 ASSERT(err == 0); 1909 1910 if (ds->ds_prev) 1911 dsl_dataset_drop_ref(ds->ds_prev, ds); 1912 VERIFY(0 == dsl_dataset_get_ref(dp, 1913 ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 1914 1915 dsl_pool_ds_snapshotted(ds, tx); 1916 1917 spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr, 1918 "dataset = %llu", dsobj); 1919 } 1920 1921 void 1922 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1923 { 1924 ASSERT(dmu_tx_is_syncing(tx)); 1925 ASSERT(ds->ds_user_ptr != NULL); 1926 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1927 1928 /* 1929 * in case we had to change ds_fsid_guid when we opened it, 1930 * sync it out now. 1931 */ 1932 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1933 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1934 1935 dsl_dir_dirty(ds->ds_dir, tx); 1936 dmu_objset_sync(ds->ds_user_ptr, zio, tx); 1937 } 1938 1939 void 1940 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1941 { 1942 uint64_t refd, avail, uobjs, aobjs; 1943 1944 dsl_dir_stats(ds->ds_dir, nv); 1945 1946 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1947 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1948 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1949 1950 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1951 ds->ds_phys->ds_creation_time); 1952 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1953 ds->ds_phys->ds_creation_txg); 1954 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1955 ds->ds_quota); 1956 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1957 ds->ds_reserved); 1958 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 1959 ds->ds_phys->ds_guid); 1960 1961 if (ds->ds_phys->ds_next_snap_obj) { 1962 /* 1963 * This is a snapshot; override the dd's space used with 1964 * our unique space and compression ratio. 1965 */ 1966 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1967 ds->ds_phys->ds_unique_bytes); 1968 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 1969 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1970 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1971 ds->ds_phys->ds_compressed_bytes)); 1972 } 1973 } 1974 1975 void 1976 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1977 { 1978 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1979 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1980 stat->dds_guid = ds->ds_phys->ds_guid; 1981 if (ds->ds_phys->ds_next_snap_obj) { 1982 stat->dds_is_snapshot = B_TRUE; 1983 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1984 } else { 1985 stat->dds_is_snapshot = B_FALSE; 1986 stat->dds_num_clones = 0; 1987 } 1988 1989 /* clone origin is really a dsl_dir thing... */ 1990 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 1991 if (dsl_dir_is_clone(ds->ds_dir)) { 1992 dsl_dataset_t *ods; 1993 1994 VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, 1995 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 1996 dsl_dataset_name(ods, stat->dds_origin); 1997 dsl_dataset_drop_ref(ods, FTAG); 1998 } else { 1999 stat->dds_origin[0] = '\0'; 2000 } 2001 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 2002 } 2003 2004 uint64_t 2005 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 2006 { 2007 return (ds->ds_fsid_guid); 2008 } 2009 2010 void 2011 dsl_dataset_space(dsl_dataset_t *ds, 2012 uint64_t *refdbytesp, uint64_t *availbytesp, 2013 uint64_t *usedobjsp, uint64_t *availobjsp) 2014 { 2015 *refdbytesp = ds->ds_phys->ds_used_bytes; 2016 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 2017 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 2018 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 2019 if (ds->ds_quota != 0) { 2020 /* 2021 * Adjust available bytes according to refquota 2022 */ 2023 if (*refdbytesp < ds->ds_quota) 2024 *availbytesp = MIN(*availbytesp, 2025 ds->ds_quota - *refdbytesp); 2026 else 2027 *availbytesp = 0; 2028 } 2029 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 2030 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 2031 } 2032 2033 boolean_t 2034 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 2035 { 2036 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2037 2038 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 2039 dsl_pool_sync_context(dp)); 2040 if (ds->ds_prev == NULL) 2041 return (B_FALSE); 2042 if (ds->ds_phys->ds_bp.blk_birth > 2043 ds->ds_prev->ds_phys->ds_creation_txg) 2044 return (B_TRUE); 2045 return (B_FALSE); 2046 } 2047 2048 /* ARGSUSED */ 2049 static int 2050 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 2051 { 2052 dsl_dataset_t *ds = arg1; 2053 char *newsnapname = arg2; 2054 dsl_dir_t *dd = ds->ds_dir; 2055 dsl_dataset_t *hds; 2056 uint64_t val; 2057 int err; 2058 2059 err = dsl_dataset_hold_obj(dd->dd_pool, 2060 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds); 2061 if (err) 2062 return (err); 2063 2064 /* new name better not be in use */ 2065 err = dsl_dataset_snap_lookup(hds, newsnapname, &val); 2066 dsl_dataset_rele(hds, FTAG); 2067 2068 if (err == 0) 2069 err = EEXIST; 2070 else if (err == ENOENT) 2071 err = 0; 2072 2073 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 2074 if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 2075 err = ENAMETOOLONG; 2076 2077 return (err); 2078 } 2079 2080 static void 2081 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, 2082 cred_t *cr, dmu_tx_t *tx) 2083 { 2084 dsl_dataset_t *ds = arg1; 2085 const char *newsnapname = arg2; 2086 dsl_dir_t *dd = ds->ds_dir; 2087 objset_t *mos = dd->dd_pool->dp_meta_objset; 2088 dsl_dataset_t *hds; 2089 int err; 2090 2091 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 2092 2093 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, 2094 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds)); 2095 2096 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2097 err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx); 2098 ASSERT3U(err, ==, 0); 2099 mutex_enter(&ds->ds_lock); 2100 (void) strcpy(ds->ds_snapname, newsnapname); 2101 mutex_exit(&ds->ds_lock); 2102 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 2103 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 2104 ASSERT3U(err, ==, 0); 2105 2106 spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 2107 cr, "dataset = %llu", ds->ds_object); 2108 dsl_dataset_rele(hds, FTAG); 2109 } 2110 2111 struct renamesnaparg { 2112 dsl_sync_task_group_t *dstg; 2113 char failed[MAXPATHLEN]; 2114 char *oldsnap; 2115 char *newsnap; 2116 }; 2117 2118 static int 2119 dsl_snapshot_rename_one(char *name, void *arg) 2120 { 2121 struct renamesnaparg *ra = arg; 2122 dsl_dataset_t *ds = NULL; 2123 char *cp; 2124 int err; 2125 2126 cp = name + strlen(name); 2127 *cp = '@'; 2128 (void) strcpy(cp + 1, ra->oldsnap); 2129 2130 /* 2131 * For recursive snapshot renames the parent won't be changing 2132 * so we just pass name for both the to/from argument. 2133 */ 2134 err = zfs_secpolicy_rename_perms(name, name, CRED()); 2135 if (err == ENOENT) { 2136 return (0); 2137 } else if (err) { 2138 (void) strcpy(ra->failed, name); 2139 return (err); 2140 } 2141 2142 #ifdef _KERNEL 2143 /* 2144 * For all filesystems undergoing rename, we'll need to unmount it. 2145 */ 2146 (void) zfs_unmount_snap(name, NULL); 2147 #endif 2148 err = dsl_dataset_hold(name, ra->dstg, &ds); 2149 *cp = '\0'; 2150 if (err == ENOENT) { 2151 return (0); 2152 } else if (err) { 2153 (void) strcpy(ra->failed, name); 2154 return (err); 2155 } 2156 2157 dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 2158 dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 2159 2160 return (0); 2161 } 2162 2163 static int 2164 dsl_recursive_rename(char *oldname, const char *newname) 2165 { 2166 int err; 2167 struct renamesnaparg *ra; 2168 dsl_sync_task_t *dst; 2169 spa_t *spa; 2170 char *cp, *fsname = spa_strdup(oldname); 2171 int len = strlen(oldname); 2172 2173 /* truncate the snapshot name to get the fsname */ 2174 cp = strchr(fsname, '@'); 2175 *cp = '\0'; 2176 2177 err = spa_open(fsname, &spa, FTAG); 2178 if (err) { 2179 kmem_free(fsname, len + 1); 2180 return (err); 2181 } 2182 ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); 2183 ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 2184 2185 ra->oldsnap = strchr(oldname, '@') + 1; 2186 ra->newsnap = strchr(newname, '@') + 1; 2187 *ra->failed = '\0'; 2188 2189 err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 2190 DS_FIND_CHILDREN); 2191 kmem_free(fsname, len + 1); 2192 2193 if (err == 0) { 2194 err = dsl_sync_task_group_wait(ra->dstg); 2195 } 2196 2197 for (dst = list_head(&ra->dstg->dstg_tasks); dst; 2198 dst = list_next(&ra->dstg->dstg_tasks, dst)) { 2199 dsl_dataset_t *ds = dst->dst_arg1; 2200 if (dst->dst_err) { 2201 dsl_dir_name(ds->ds_dir, ra->failed); 2202 (void) strcat(ra->failed, "@"); 2203 (void) strcat(ra->failed, ra->newsnap); 2204 } 2205 dsl_dataset_rele(ds, ra->dstg); 2206 } 2207 2208 if (err) 2209 (void) strcpy(oldname, ra->failed); 2210 2211 dsl_sync_task_group_destroy(ra->dstg); 2212 kmem_free(ra, sizeof (struct renamesnaparg)); 2213 spa_close(spa, FTAG); 2214 return (err); 2215 } 2216 2217 static int 2218 dsl_valid_rename(char *oldname, void *arg) 2219 { 2220 int delta = *(int *)arg; 2221 2222 if (strlen(oldname) + delta >= MAXNAMELEN) 2223 return (ENAMETOOLONG); 2224 2225 return (0); 2226 } 2227 2228 #pragma weak dmu_objset_rename = dsl_dataset_rename 2229 int 2230 dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive) 2231 { 2232 dsl_dir_t *dd; 2233 dsl_dataset_t *ds; 2234 const char *tail; 2235 int err; 2236 2237 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 2238 if (err) 2239 return (err); 2240 /* 2241 * If there are more than 2 references there may be holds 2242 * hanging around that haven't been cleared out yet. 2243 */ 2244 if (dmu_buf_refcount(dd->dd_dbuf) > 2) 2245 txg_wait_synced(dd->dd_pool, 0); 2246 if (tail == NULL) { 2247 int delta = strlen(newname) - strlen(oldname); 2248 2249 /* if we're growing, validate child name lengths */ 2250 if (delta > 0) 2251 err = dmu_objset_find(oldname, dsl_valid_rename, 2252 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 2253 2254 if (!err) 2255 err = dsl_dir_rename(dd, newname); 2256 dsl_dir_close(dd, FTAG); 2257 return (err); 2258 } 2259 if (tail[0] != '@') { 2260 /* the name ended in a nonexistant component */ 2261 dsl_dir_close(dd, FTAG); 2262 return (ENOENT); 2263 } 2264 2265 dsl_dir_close(dd, FTAG); 2266 2267 /* new name must be snapshot in same filesystem */ 2268 tail = strchr(newname, '@'); 2269 if (tail == NULL) 2270 return (EINVAL); 2271 tail++; 2272 if (strncmp(oldname, newname, tail - newname) != 0) 2273 return (EXDEV); 2274 2275 if (recursive) { 2276 err = dsl_recursive_rename(oldname, newname); 2277 } else { 2278 err = dsl_dataset_hold(oldname, FTAG, &ds); 2279 if (err) 2280 return (err); 2281 2282 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2283 dsl_dataset_snapshot_rename_check, 2284 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 2285 2286 dsl_dataset_rele(ds, FTAG); 2287 } 2288 2289 return (err); 2290 } 2291 2292 struct promotenode { 2293 list_node_t link; 2294 dsl_dataset_t *ds; 2295 }; 2296 2297 struct promotearg { 2298 list_t shared_snaps, origin_snaps, clone_snaps; 2299 dsl_dataset_t *origin_origin, *origin_head; 2300 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 2301 }; 2302 2303 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2304 2305 /* ARGSUSED */ 2306 static int 2307 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 2308 { 2309 dsl_dataset_t *hds = arg1; 2310 struct promotearg *pa = arg2; 2311 struct promotenode *snap = list_head(&pa->shared_snaps); 2312 dsl_dataset_t *origin_ds = snap->ds; 2313 int err; 2314 2315 /* Check that it is a real clone */ 2316 if (!dsl_dir_is_clone(hds->ds_dir)) 2317 return (EINVAL); 2318 2319 /* Since this is so expensive, don't do the preliminary check */ 2320 if (!dmu_tx_is_syncing(tx)) 2321 return (0); 2322 2323 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) 2324 return (EXDEV); 2325 2326 /* compute origin's new unique space */ 2327 snap = list_tail(&pa->clone_snaps); 2328 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2329 err = bplist_space_birthrange(&snap->ds->ds_deadlist, 2330 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, &pa->unique); 2331 if (err) 2332 return (err); 2333 2334 /* 2335 * Walk the snapshots that we are moving 2336 * 2337 * Compute space to transfer. Consider the incremental changes 2338 * to used for each snapshot: 2339 * (my used) = (prev's used) + (blocks born) - (blocks killed) 2340 * So each snapshot gave birth to: 2341 * (blocks born) = (my used) - (prev's used) + (blocks killed) 2342 * So a sequence would look like: 2343 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2344 * Which simplifies to: 2345 * uN + kN + kN-1 + ... + k1 + k0 2346 * Note however, if we stop before we reach the ORIGIN we get: 2347 * uN + kN + kN-1 + ... + kM - uM-1 2348 */ 2349 pa->used = origin_ds->ds_phys->ds_used_bytes; 2350 pa->comp = origin_ds->ds_phys->ds_compressed_bytes; 2351 pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 2352 for (snap = list_head(&pa->shared_snaps); snap; 2353 snap = list_next(&pa->shared_snaps, snap)) { 2354 uint64_t val, dlused, dlcomp, dluncomp; 2355 dsl_dataset_t *ds = snap->ds; 2356 2357 /* Check that the snapshot name does not conflict */ 2358 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2359 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2360 if (err == 0) 2361 return (EEXIST); 2362 if (err != ENOENT) 2363 return (err); 2364 2365 /* The very first snapshot does not have a deadlist */ 2366 if (ds->ds_phys->ds_prev_snap_obj == 0) 2367 continue; 2368 2369 if (err = bplist_space(&ds->ds_deadlist, 2370 &dlused, &dlcomp, &dluncomp)) 2371 return (err); 2372 pa->used += dlused; 2373 pa->comp += dlcomp; 2374 pa->uncomp += dluncomp; 2375 } 2376 2377 /* 2378 * If we are a clone of a clone then we never reached ORIGIN, 2379 * so we need to subtract out the clone origin's used space. 2380 */ 2381 if (pa->origin_origin) { 2382 pa->used -= pa->origin_origin->ds_phys->ds_used_bytes; 2383 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; 2384 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; 2385 } 2386 2387 /* Check that there is enough space here */ 2388 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2389 pa->used); 2390 if (err) 2391 return (err); 2392 2393 /* 2394 * Compute the amounts of space that will be used by snapshots 2395 * after the promotion (for both origin and clone). For each, 2396 * it is the amount of space that will be on all of their 2397 * deadlists (that was not born before their new origin). 2398 */ 2399 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2400 uint64_t space; 2401 2402 /* 2403 * Note, typically this will not be a clone of a clone, 2404 * so snap->ds->ds_origin_txg will be < TXG_INITIAL, so 2405 * these snaplist_space() -> bplist_space_birthrange() 2406 * calls will be fast because they do not have to 2407 * iterate over all bps. 2408 */ 2409 snap = list_head(&pa->origin_snaps); 2410 err = snaplist_space(&pa->shared_snaps, 2411 snap->ds->ds_origin_txg, &pa->cloneusedsnap); 2412 if (err) 2413 return (err); 2414 2415 err = snaplist_space(&pa->clone_snaps, 2416 snap->ds->ds_origin_txg, &space); 2417 if (err) 2418 return (err); 2419 pa->cloneusedsnap += space; 2420 } 2421 if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2422 err = snaplist_space(&pa->origin_snaps, 2423 origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap); 2424 if (err) 2425 return (err); 2426 } 2427 2428 return (0); 2429 } 2430 2431 static void 2432 dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2433 { 2434 dsl_dataset_t *hds = arg1; 2435 struct promotearg *pa = arg2; 2436 struct promotenode *snap = list_head(&pa->shared_snaps); 2437 dsl_dataset_t *origin_ds = snap->ds; 2438 dsl_dataset_t *origin_head; 2439 dsl_dir_t *dd = hds->ds_dir; 2440 dsl_pool_t *dp = hds->ds_dir->dd_pool; 2441 dsl_dir_t *odd = NULL; 2442 uint64_t oldnext_obj; 2443 int64_t delta; 2444 2445 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 2446 2447 snap = list_head(&pa->origin_snaps); 2448 origin_head = snap->ds; 2449 2450 /* 2451 * We need to explicitly open odd, since origin_ds's dd will be 2452 * changing. 2453 */ 2454 VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, 2455 NULL, FTAG, &odd)); 2456 2457 /* change origin's next snap */ 2458 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2459 oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2460 snap = list_tail(&pa->clone_snaps); 2461 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2462 origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2463 2464 /* change the origin's next clone */ 2465 if (origin_ds->ds_phys->ds_next_clones_obj) { 2466 VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, 2467 origin_ds->ds_phys->ds_next_clones_obj, 2468 origin_ds->ds_phys->ds_next_snap_obj, tx)); 2469 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 2470 origin_ds->ds_phys->ds_next_clones_obj, 2471 oldnext_obj, tx)); 2472 } 2473 2474 /* change origin */ 2475 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2476 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2477 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2478 hds->ds_origin_txg = origin_head->ds_origin_txg; 2479 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2480 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2481 origin_head->ds_origin_txg = origin_ds->ds_phys->ds_creation_txg; 2482 2483 /* move snapshots to this dir */ 2484 for (snap = list_head(&pa->shared_snaps); snap; 2485 snap = list_next(&pa->shared_snaps, snap)) { 2486 dsl_dataset_t *ds = snap->ds; 2487 2488 /* unregister props as dsl_dir is changing */ 2489 if (ds->ds_user_ptr) { 2490 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 2491 ds->ds_user_ptr = NULL; 2492 } 2493 /* move snap name entry */ 2494 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2495 VERIFY(0 == dsl_dataset_snap_remove(origin_head, 2496 ds->ds_snapname, tx)); 2497 VERIFY(0 == zap_add(dp->dp_meta_objset, 2498 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2499 8, 1, &ds->ds_object, tx)); 2500 /* change containing dsl_dir */ 2501 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2502 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2503 ds->ds_phys->ds_dir_obj = dd->dd_object; 2504 ASSERT3P(ds->ds_dir, ==, odd); 2505 dsl_dir_close(ds->ds_dir, ds); 2506 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 2507 NULL, ds, &ds->ds_dir)); 2508 2509 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 2510 } 2511 2512 /* 2513 * Change space accounting. 2514 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2515 * both be valid, or both be 0 (resulting in delta == 0). This 2516 * is true for each of {clone,origin} independently. 2517 */ 2518 2519 delta = pa->cloneusedsnap - 2520 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2521 ASSERT3S(delta, >=, 0); 2522 ASSERT3U(pa->used, >=, delta); 2523 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2524 dsl_dir_diduse_space(dd, DD_USED_HEAD, 2525 pa->used - delta, pa->comp, pa->uncomp, tx); 2526 2527 delta = pa->originusedsnap - 2528 odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2529 ASSERT3S(delta, <=, 0); 2530 ASSERT3U(pa->used, >=, -delta); 2531 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2532 dsl_dir_diduse_space(odd, DD_USED_HEAD, 2533 -pa->used - delta, -pa->comp, -pa->uncomp, tx); 2534 2535 origin_ds->ds_phys->ds_unique_bytes = pa->unique; 2536 2537 /* log history record */ 2538 spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 2539 cr, "dataset = %llu", hds->ds_object); 2540 2541 dsl_dir_close(odd, FTAG); 2542 } 2543 2544 static char *snaplist_tag = "snaplist"; 2545 /* 2546 * Make a list of dsl_dataset_t's for the snapshots between first_obj 2547 * (exclusive) and last_obj (inclusive). The list will be in reverse 2548 * order (last_obj will be the list_head()). If first_obj == 0, do all 2549 * snapshots back to this dataset's origin. 2550 */ 2551 static int 2552 snaplist_make(dsl_pool_t *dp, boolean_t own, 2553 uint64_t first_obj, uint64_t last_obj, list_t *l) 2554 { 2555 uint64_t obj = last_obj; 2556 2557 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock)); 2558 2559 list_create(l, sizeof (struct promotenode), 2560 offsetof(struct promotenode, link)); 2561 2562 while (obj != first_obj) { 2563 dsl_dataset_t *ds; 2564 struct promotenode *snap; 2565 int err; 2566 2567 if (own) { 2568 err = dsl_dataset_own_obj(dp, obj, 2569 0, snaplist_tag, &ds); 2570 if (err == 0) 2571 dsl_dataset_make_exclusive(ds, snaplist_tag); 2572 } else { 2573 err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds); 2574 } 2575 if (err == ENOENT) { 2576 /* lost race with snapshot destroy */ 2577 struct promotenode *last = list_tail(l); 2578 ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj); 2579 obj = last->ds->ds_phys->ds_prev_snap_obj; 2580 continue; 2581 } else if (err) { 2582 return (err); 2583 } 2584 2585 if (first_obj == 0) 2586 first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2587 2588 snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP); 2589 snap->ds = ds; 2590 list_insert_tail(l, snap); 2591 obj = ds->ds_phys->ds_prev_snap_obj; 2592 } 2593 2594 return (0); 2595 } 2596 2597 static int 2598 snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2599 { 2600 struct promotenode *snap; 2601 2602 *spacep = 0; 2603 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2604 uint64_t used; 2605 int err = bplist_space_birthrange(&snap->ds->ds_deadlist, 2606 mintxg, UINT64_MAX, &used); 2607 if (err) 2608 return (err); 2609 *spacep += used; 2610 } 2611 return (0); 2612 } 2613 2614 static void 2615 snaplist_destroy(list_t *l, boolean_t own) 2616 { 2617 struct promotenode *snap; 2618 2619 if (!l || !list_link_active(&l->list_head)) 2620 return; 2621 2622 while ((snap = list_tail(l)) != NULL) { 2623 list_remove(l, snap); 2624 if (own) 2625 dsl_dataset_disown(snap->ds, snaplist_tag); 2626 else 2627 dsl_dataset_rele(snap->ds, snaplist_tag); 2628 kmem_free(snap, sizeof (struct promotenode)); 2629 } 2630 list_destroy(l); 2631 } 2632 2633 /* 2634 * Promote a clone. Nomenclature note: 2635 * "clone" or "cds": the original clone which is being promoted 2636 * "origin" or "ods": the snapshot which is originally clone's origin 2637 * "origin head" or "ohds": the dataset which is the head 2638 * (filesystem/volume) for the origin 2639 * "origin origin": the origin of the origin's filesystem (typically 2640 * NULL, indicating that the clone is not a clone of a clone). 2641 */ 2642 int 2643 dsl_dataset_promote(const char *name) 2644 { 2645 dsl_dataset_t *ds; 2646 dsl_dir_t *dd; 2647 dsl_pool_t *dp; 2648 dmu_object_info_t doi; 2649 struct promotearg pa = { 0 }; 2650 struct promotenode *snap; 2651 int err; 2652 2653 err = dsl_dataset_hold(name, FTAG, &ds); 2654 if (err) 2655 return (err); 2656 dd = ds->ds_dir; 2657 dp = dd->dd_pool; 2658 2659 err = dmu_object_info(dp->dp_meta_objset, 2660 ds->ds_phys->ds_snapnames_zapobj, &doi); 2661 if (err) { 2662 dsl_dataset_rele(ds, FTAG); 2663 return (err); 2664 } 2665 2666 if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) { 2667 dsl_dataset_rele(ds, FTAG); 2668 return (EINVAL); 2669 } 2670 2671 /* 2672 * We are going to inherit all the snapshots taken before our 2673 * origin (i.e., our new origin will be our parent's origin). 2674 * Take ownership of them so that we can rename them into our 2675 * namespace. 2676 */ 2677 rw_enter(&dp->dp_config_rwlock, RW_READER); 2678 2679 err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj, 2680 &pa.shared_snaps); 2681 if (err != 0) 2682 goto out; 2683 2684 err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps); 2685 if (err != 0) 2686 goto out; 2687 2688 snap = list_head(&pa.shared_snaps); 2689 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 2690 err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj, 2691 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps); 2692 if (err != 0) 2693 goto out; 2694 2695 if (dsl_dir_is_clone(snap->ds->ds_dir)) { 2696 err = dsl_dataset_own_obj(dp, 2697 snap->ds->ds_dir->dd_phys->dd_origin_obj, 2698 0, FTAG, &pa.origin_origin); 2699 if (err != 0) 2700 goto out; 2701 } 2702 2703 out: 2704 rw_exit(&dp->dp_config_rwlock); 2705 2706 /* 2707 * Add in 128x the snapnames zapobj size, since we will be moving 2708 * a bunch of snapnames to the promoted ds, and dirtying their 2709 * bonus buffers. 2710 */ 2711 if (err == 0) { 2712 err = dsl_sync_task_do(dp, dsl_dataset_promote_check, 2713 dsl_dataset_promote_sync, ds, &pa, 2714 2 + 2 * doi.doi_physical_blks); 2715 } 2716 2717 snaplist_destroy(&pa.shared_snaps, B_TRUE); 2718 snaplist_destroy(&pa.clone_snaps, B_FALSE); 2719 snaplist_destroy(&pa.origin_snaps, B_FALSE); 2720 if (pa.origin_origin) 2721 dsl_dataset_disown(pa.origin_origin, FTAG); 2722 dsl_dataset_rele(ds, FTAG); 2723 return (err); 2724 } 2725 2726 struct cloneswaparg { 2727 dsl_dataset_t *cds; /* clone dataset */ 2728 dsl_dataset_t *ohds; /* origin's head dataset */ 2729 boolean_t force; 2730 int64_t unused_refres_delta; /* change in unconsumed refreservation */ 2731 }; 2732 2733 /* ARGSUSED */ 2734 static int 2735 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) 2736 { 2737 struct cloneswaparg *csa = arg1; 2738 2739 /* they should both be heads */ 2740 if (dsl_dataset_is_snapshot(csa->cds) || 2741 dsl_dataset_is_snapshot(csa->ohds)) 2742 return (EINVAL); 2743 2744 /* the branch point should be just before them */ 2745 if (csa->cds->ds_prev != csa->ohds->ds_prev) 2746 return (EINVAL); 2747 2748 /* cds should be the clone */ 2749 if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj != 2750 csa->ohds->ds_object) 2751 return (EINVAL); 2752 2753 /* the clone should be a child of the origin */ 2754 if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) 2755 return (EINVAL); 2756 2757 /* ohds shouldn't be modified unless 'force' */ 2758 if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) 2759 return (ETXTBSY); 2760 2761 /* adjust amount of any unconsumed refreservation */ 2762 csa->unused_refres_delta = 2763 (int64_t)MIN(csa->ohds->ds_reserved, 2764 csa->ohds->ds_phys->ds_unique_bytes) - 2765 (int64_t)MIN(csa->ohds->ds_reserved, 2766 csa->cds->ds_phys->ds_unique_bytes); 2767 2768 if (csa->unused_refres_delta > 0 && 2769 csa->unused_refres_delta > 2770 dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) 2771 return (ENOSPC); 2772 2773 return (0); 2774 } 2775 2776 /* ARGSUSED */ 2777 static void 2778 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2779 { 2780 struct cloneswaparg *csa = arg1; 2781 dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; 2782 2783 ASSERT(csa->cds->ds_reserved == 0); 2784 ASSERT(csa->cds->ds_quota == csa->ohds->ds_quota); 2785 2786 dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); 2787 dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); 2788 dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx); 2789 2790 if (csa->cds->ds_user_ptr != NULL) { 2791 csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr); 2792 csa->cds->ds_user_ptr = NULL; 2793 } 2794 2795 if (csa->ohds->ds_user_ptr != NULL) { 2796 csa->ohds->ds_user_evict_func(csa->ohds, 2797 csa->ohds->ds_user_ptr); 2798 csa->ohds->ds_user_ptr = NULL; 2799 } 2800 2801 /* reset origin's unique bytes */ 2802 VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, 2803 csa->cds->ds_prev->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2804 &csa->cds->ds_prev->ds_phys->ds_unique_bytes)); 2805 2806 /* swap blkptrs */ 2807 { 2808 blkptr_t tmp; 2809 tmp = csa->ohds->ds_phys->ds_bp; 2810 csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; 2811 csa->cds->ds_phys->ds_bp = tmp; 2812 } 2813 2814 /* set dd_*_bytes */ 2815 { 2816 int64_t dused, dcomp, duncomp; 2817 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2818 uint64_t odl_used, odl_comp, odl_uncomp; 2819 2820 ASSERT3U(csa->cds->ds_dir->dd_phys-> 2821 dd_used_breakdown[DD_USED_SNAP], ==, 0); 2822 2823 VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used, 2824 &cdl_comp, &cdl_uncomp)); 2825 VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used, 2826 &odl_comp, &odl_uncomp)); 2827 2828 dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - 2829 (csa->ohds->ds_phys->ds_used_bytes + odl_used); 2830 dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - 2831 (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); 2832 duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + 2833 cdl_uncomp - 2834 (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2835 2836 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD, 2837 dused, dcomp, duncomp, tx); 2838 dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD, 2839 -dused, -dcomp, -duncomp, tx); 2840 2841 /* 2842 * The difference in the space used by snapshots is the 2843 * difference in snapshot space due to the head's 2844 * deadlist (since that's the only thing that's 2845 * changing that affects the snapused). 2846 */ 2847 VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, 2848 csa->ohds->ds_origin_txg, UINT64_MAX, &cdl_used)); 2849 VERIFY(0 == bplist_space_birthrange(&csa->ohds->ds_deadlist, 2850 csa->ohds->ds_origin_txg, UINT64_MAX, &odl_used)); 2851 dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used, 2852 DD_USED_HEAD, DD_USED_SNAP, tx); 2853 } 2854 2855 #define SWITCH64(x, y) \ 2856 { \ 2857 uint64_t __tmp = (x); \ 2858 (x) = (y); \ 2859 (y) = __tmp; \ 2860 } 2861 2862 /* swap ds_*_bytes */ 2863 SWITCH64(csa->ohds->ds_phys->ds_used_bytes, 2864 csa->cds->ds_phys->ds_used_bytes); 2865 SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, 2866 csa->cds->ds_phys->ds_compressed_bytes); 2867 SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, 2868 csa->cds->ds_phys->ds_uncompressed_bytes); 2869 SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, 2870 csa->cds->ds_phys->ds_unique_bytes); 2871 2872 /* apply any parent delta for change in unconsumed refreservation */ 2873 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV, 2874 csa->unused_refres_delta, 0, 0, tx); 2875 2876 /* swap deadlists */ 2877 bplist_close(&csa->cds->ds_deadlist); 2878 bplist_close(&csa->ohds->ds_deadlist); 2879 SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, 2880 csa->cds->ds_phys->ds_deadlist_obj); 2881 VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, 2882 csa->cds->ds_phys->ds_deadlist_obj)); 2883 VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, 2884 csa->ohds->ds_phys->ds_deadlist_obj)); 2885 2886 dsl_pool_ds_clone_swapped(csa->ohds, csa->cds, tx); 2887 } 2888 2889 /* 2890 * Swap 'clone' with its origin head file system. Used at the end 2891 * of "online recv" to swizzle the file system to the new version. 2892 */ 2893 int 2894 dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, 2895 boolean_t force) 2896 { 2897 struct cloneswaparg csa; 2898 int error; 2899 2900 ASSERT(clone->ds_owner); 2901 ASSERT(origin_head->ds_owner); 2902 retry: 2903 /* Need exclusive access for the swap */ 2904 rw_enter(&clone->ds_rwlock, RW_WRITER); 2905 if (!rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) { 2906 rw_exit(&clone->ds_rwlock); 2907 rw_enter(&origin_head->ds_rwlock, RW_WRITER); 2908 if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) { 2909 rw_exit(&origin_head->ds_rwlock); 2910 goto retry; 2911 } 2912 } 2913 csa.cds = clone; 2914 csa.ohds = origin_head; 2915 csa.force = force; 2916 error = dsl_sync_task_do(clone->ds_dir->dd_pool, 2917 dsl_dataset_clone_swap_check, 2918 dsl_dataset_clone_swap_sync, &csa, NULL, 9); 2919 return (error); 2920 } 2921 2922 /* 2923 * Given a pool name and a dataset object number in that pool, 2924 * return the name of that dataset. 2925 */ 2926 int 2927 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2928 { 2929 spa_t *spa; 2930 dsl_pool_t *dp; 2931 dsl_dataset_t *ds; 2932 int error; 2933 2934 if ((error = spa_open(pname, &spa, FTAG)) != 0) 2935 return (error); 2936 dp = spa_get_dsl(spa); 2937 rw_enter(&dp->dp_config_rwlock, RW_READER); 2938 if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) { 2939 dsl_dataset_name(ds, buf); 2940 dsl_dataset_rele(ds, FTAG); 2941 } 2942 rw_exit(&dp->dp_config_rwlock); 2943 spa_close(spa, FTAG); 2944 2945 return (error); 2946 } 2947 2948 int 2949 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2950 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 2951 { 2952 int error = 0; 2953 2954 ASSERT3S(asize, >, 0); 2955 2956 /* 2957 * *ref_rsrv is the portion of asize that will come from any 2958 * unconsumed refreservation space. 2959 */ 2960 *ref_rsrv = 0; 2961 2962 mutex_enter(&ds->ds_lock); 2963 /* 2964 * Make a space adjustment for reserved bytes. 2965 */ 2966 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 2967 ASSERT3U(*used, >=, 2968 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2969 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2970 *ref_rsrv = 2971 asize - MIN(asize, parent_delta(ds, asize + inflight)); 2972 } 2973 2974 if (!check_quota || ds->ds_quota == 0) { 2975 mutex_exit(&ds->ds_lock); 2976 return (0); 2977 } 2978 /* 2979 * If they are requesting more space, and our current estimate 2980 * is over quota, they get to try again unless the actual 2981 * on-disk is over quota and there are no pending changes (which 2982 * may free up space for us). 2983 */ 2984 if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { 2985 if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) 2986 error = ERESTART; 2987 else 2988 error = EDQUOT; 2989 } 2990 mutex_exit(&ds->ds_lock); 2991 2992 return (error); 2993 } 2994 2995 /* ARGSUSED */ 2996 static int 2997 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) 2998 { 2999 dsl_dataset_t *ds = arg1; 3000 uint64_t *quotap = arg2; 3001 uint64_t new_quota = *quotap; 3002 3003 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) 3004 return (ENOTSUP); 3005 3006 if (new_quota == 0) 3007 return (0); 3008 3009 if (new_quota < ds->ds_phys->ds_used_bytes || 3010 new_quota < ds->ds_reserved) 3011 return (ENOSPC); 3012 3013 return (0); 3014 } 3015 3016 /* ARGSUSED */ 3017 void 3018 dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 3019 { 3020 dsl_dataset_t *ds = arg1; 3021 uint64_t *quotap = arg2; 3022 uint64_t new_quota = *quotap; 3023 3024 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3025 3026 ds->ds_quota = new_quota; 3027 3028 dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx); 3029 3030 spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa, 3031 tx, cr, "%lld dataset = %llu ", 3032 (longlong_t)new_quota, ds->ds_object); 3033 } 3034 3035 int 3036 dsl_dataset_set_quota(const char *dsname, uint64_t quota) 3037 { 3038 dsl_dataset_t *ds; 3039 int err; 3040 3041 err = dsl_dataset_hold(dsname, FTAG, &ds); 3042 if (err) 3043 return (err); 3044 3045 if (quota != ds->ds_quota) { 3046 /* 3047 * If someone removes a file, then tries to set the quota, we 3048 * want to make sure the file freeing takes effect. 3049 */ 3050 txg_wait_open(ds->ds_dir->dd_pool, 0); 3051 3052 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3053 dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, 3054 ds, "a, 0); 3055 } 3056 dsl_dataset_rele(ds, FTAG); 3057 return (err); 3058 } 3059 3060 static int 3061 dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) 3062 { 3063 dsl_dataset_t *ds = arg1; 3064 uint64_t *reservationp = arg2; 3065 uint64_t new_reservation = *reservationp; 3066 uint64_t unique; 3067 3068 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 3069 SPA_VERSION_REFRESERVATION) 3070 return (ENOTSUP); 3071 3072 if (dsl_dataset_is_snapshot(ds)) 3073 return (EINVAL); 3074 3075 /* 3076 * If we are doing the preliminary check in open context, the 3077 * space estimates may be inaccurate. 3078 */ 3079 if (!dmu_tx_is_syncing(tx)) 3080 return (0); 3081 3082 mutex_enter(&ds->ds_lock); 3083 unique = dsl_dataset_unique(ds); 3084 mutex_exit(&ds->ds_lock); 3085 3086 if (MAX(unique, new_reservation) > MAX(unique, ds->ds_reserved)) { 3087 uint64_t delta = MAX(unique, new_reservation) - 3088 MAX(unique, ds->ds_reserved); 3089 3090 if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 3091 return (ENOSPC); 3092 if (ds->ds_quota > 0 && 3093 new_reservation > ds->ds_quota) 3094 return (ENOSPC); 3095 } 3096 3097 return (0); 3098 } 3099 3100 /* ARGSUSED */ 3101 static void 3102 dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, 3103 dmu_tx_t *tx) 3104 { 3105 dsl_dataset_t *ds = arg1; 3106 uint64_t *reservationp = arg2; 3107 uint64_t new_reservation = *reservationp; 3108 uint64_t unique; 3109 int64_t delta; 3110 3111 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3112 3113 mutex_enter(&ds->ds_dir->dd_lock); 3114 mutex_enter(&ds->ds_lock); 3115 unique = dsl_dataset_unique(ds); 3116 delta = MAX(0, (int64_t)(new_reservation - unique)) - 3117 MAX(0, (int64_t)(ds->ds_reserved - unique)); 3118 ds->ds_reserved = new_reservation; 3119 mutex_exit(&ds->ds_lock); 3120 3121 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 3122 mutex_exit(&ds->ds_dir->dd_lock); 3123 dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation", 3124 new_reservation, cr, tx); 3125 3126 spa_history_internal_log(LOG_DS_REFRESERV, 3127 ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu", 3128 (longlong_t)new_reservation, ds->ds_object); 3129 } 3130 3131 int 3132 dsl_dataset_set_reservation(const char *dsname, uint64_t reservation) 3133 { 3134 dsl_dataset_t *ds; 3135 int err; 3136 3137 err = dsl_dataset_hold(dsname, FTAG, &ds); 3138 if (err) 3139 return (err); 3140 3141 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3142 dsl_dataset_set_reservation_check, 3143 dsl_dataset_set_reservation_sync, ds, &reservation, 0); 3144 dsl_dataset_rele(ds, FTAG); 3145 return (err); 3146 } 3147