1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/dmu_objset.h> 27 #include <sys/dsl_dataset.h> 28 #include <sys/dsl_dir.h> 29 #include <sys/dsl_prop.h> 30 #include <sys/dsl_synctask.h> 31 #include <sys/dmu_traverse.h> 32 #include <sys/dmu_tx.h> 33 #include <sys/arc.h> 34 #include <sys/zio.h> 35 #include <sys/zap.h> 36 #include <sys/unique.h> 37 #include <sys/zfs_context.h> 38 #include <sys/zfs_ioctl.h> 39 #include <sys/spa.h> 40 #include <sys/zfs_znode.h> 41 #include <sys/sunddi.h> 42 43 static char *dsl_reaper = "the grim reaper"; 44 45 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 46 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 47 static dsl_checkfunc_t dsl_dataset_rollback_check; 48 static dsl_syncfunc_t dsl_dataset_rollback_sync; 49 static dsl_syncfunc_t dsl_dataset_set_reservation_sync; 50 51 #define DS_REF_MAX (1ULL << 62) 52 53 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 54 55 #define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper) 56 57 58 /* 59 * Figure out how much of this delta should be propogated to the dsl_dir 60 * layer. If there's a refreservation, that space has already been 61 * partially accounted for in our ancestors. 62 */ 63 static int64_t 64 parent_delta(dsl_dataset_t *ds, int64_t delta) 65 { 66 uint64_t old_bytes, new_bytes; 67 68 if (ds->ds_reserved == 0) 69 return (delta); 70 71 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 72 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 73 74 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 75 return (new_bytes - old_bytes); 76 } 77 78 void 79 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 80 { 81 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 82 int compressed = BP_GET_PSIZE(bp); 83 int uncompressed = BP_GET_UCSIZE(bp); 84 int64_t delta; 85 86 dprintf_bp(bp, "born, ds=%p\n", ds); 87 88 ASSERT(dmu_tx_is_syncing(tx)); 89 /* It could have been compressed away to nothing */ 90 if (BP_IS_HOLE(bp)) 91 return; 92 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 93 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 94 if (ds == NULL) { 95 /* 96 * Account for the meta-objset space in its placeholder 97 * dsl_dir. 98 */ 99 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 100 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 101 used, compressed, uncompressed, tx); 102 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 103 return; 104 } 105 dmu_buf_will_dirty(ds->ds_dbuf, tx); 106 mutex_enter(&ds->ds_dir->dd_lock); 107 mutex_enter(&ds->ds_lock); 108 delta = parent_delta(ds, used); 109 ds->ds_phys->ds_used_bytes += used; 110 ds->ds_phys->ds_compressed_bytes += compressed; 111 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 112 ds->ds_phys->ds_unique_bytes += used; 113 mutex_exit(&ds->ds_lock); 114 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 115 compressed, uncompressed, tx); 116 dsl_dir_transfer_space(ds->ds_dir, used - delta, 117 DD_USED_REFRSRV, DD_USED_HEAD, tx); 118 mutex_exit(&ds->ds_dir->dd_lock); 119 } 120 121 int 122 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, 123 dmu_tx_t *tx) 124 { 125 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 126 int compressed = BP_GET_PSIZE(bp); 127 int uncompressed = BP_GET_UCSIZE(bp); 128 129 ASSERT(pio != NULL); 130 ASSERT(dmu_tx_is_syncing(tx)); 131 /* No block pointer => nothing to free */ 132 if (BP_IS_HOLE(bp)) 133 return (0); 134 135 ASSERT(used > 0); 136 if (ds == NULL) { 137 int err; 138 /* 139 * Account for the meta-objset space in its placeholder 140 * dataset. 141 */ 142 err = dsl_free(pio, tx->tx_pool, 143 tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT); 144 ASSERT(err == 0); 145 146 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 147 -used, -compressed, -uncompressed, tx); 148 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 149 return (used); 150 } 151 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 152 153 ASSERT(!dsl_dataset_is_snapshot(ds)); 154 dmu_buf_will_dirty(ds->ds_dbuf, tx); 155 156 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 157 int err; 158 int64_t delta; 159 160 dprintf_bp(bp, "freeing: %s", ""); 161 err = dsl_free(pio, tx->tx_pool, 162 tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT); 163 ASSERT(err == 0); 164 165 mutex_enter(&ds->ds_dir->dd_lock); 166 mutex_enter(&ds->ds_lock); 167 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 168 !DS_UNIQUE_IS_ACCURATE(ds)); 169 delta = parent_delta(ds, -used); 170 ds->ds_phys->ds_unique_bytes -= used; 171 mutex_exit(&ds->ds_lock); 172 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 173 delta, -compressed, -uncompressed, tx); 174 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 175 DD_USED_REFRSRV, DD_USED_HEAD, tx); 176 mutex_exit(&ds->ds_dir->dd_lock); 177 } else { 178 dprintf_bp(bp, "putting on dead list: %s", ""); 179 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 180 ASSERT3U(ds->ds_prev->ds_object, ==, 181 ds->ds_phys->ds_prev_snap_obj); 182 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 183 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 184 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 185 ds->ds_object && bp->blk_birth > 186 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 187 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 188 mutex_enter(&ds->ds_prev->ds_lock); 189 ds->ds_prev->ds_phys->ds_unique_bytes += used; 190 mutex_exit(&ds->ds_prev->ds_lock); 191 } 192 if (bp->blk_birth > ds->ds_origin_txg) { 193 dsl_dir_transfer_space(ds->ds_dir, used, 194 DD_USED_HEAD, DD_USED_SNAP, tx); 195 } 196 } 197 mutex_enter(&ds->ds_lock); 198 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 199 ds->ds_phys->ds_used_bytes -= used; 200 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 201 ds->ds_phys->ds_compressed_bytes -= compressed; 202 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 203 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 204 mutex_exit(&ds->ds_lock); 205 206 return (used); 207 } 208 209 uint64_t 210 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 211 { 212 uint64_t trysnap = 0; 213 214 if (ds == NULL) 215 return (0); 216 /* 217 * The snapshot creation could fail, but that would cause an 218 * incorrect FALSE return, which would only result in an 219 * overestimation of the amount of space that an operation would 220 * consume, which is OK. 221 * 222 * There's also a small window where we could miss a pending 223 * snapshot, because we could set the sync task in the quiescing 224 * phase. So this should only be used as a guess. 225 */ 226 if (ds->ds_trysnap_txg > 227 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 228 trysnap = ds->ds_trysnap_txg; 229 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 230 } 231 232 boolean_t 233 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 234 { 235 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 236 } 237 238 /* ARGSUSED */ 239 static void 240 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 241 { 242 dsl_dataset_t *ds = dsv; 243 244 ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); 245 246 dprintf_ds(ds, "evicting %s\n", ""); 247 248 unique_remove(ds->ds_fsid_guid); 249 250 if (ds->ds_user_ptr != NULL) 251 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 252 253 if (ds->ds_prev) { 254 dsl_dataset_drop_ref(ds->ds_prev, ds); 255 ds->ds_prev = NULL; 256 } 257 258 bplist_close(&ds->ds_deadlist); 259 if (ds->ds_dir) 260 dsl_dir_close(ds->ds_dir, ds); 261 262 ASSERT(!list_link_active(&ds->ds_synced_link)); 263 264 mutex_destroy(&ds->ds_lock); 265 mutex_destroy(&ds->ds_opening_lock); 266 mutex_destroy(&ds->ds_deadlist.bpl_lock); 267 rw_destroy(&ds->ds_rwlock); 268 cv_destroy(&ds->ds_exclusive_cv); 269 270 kmem_free(ds, sizeof (dsl_dataset_t)); 271 } 272 273 static int 274 dsl_dataset_get_snapname(dsl_dataset_t *ds) 275 { 276 dsl_dataset_phys_t *headphys; 277 int err; 278 dmu_buf_t *headdbuf; 279 dsl_pool_t *dp = ds->ds_dir->dd_pool; 280 objset_t *mos = dp->dp_meta_objset; 281 282 if (ds->ds_snapname[0]) 283 return (0); 284 if (ds->ds_phys->ds_next_snap_obj == 0) 285 return (0); 286 287 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 288 FTAG, &headdbuf); 289 if (err) 290 return (err); 291 headphys = headdbuf->db_data; 292 err = zap_value_search(dp->dp_meta_objset, 293 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 294 dmu_buf_rele(headdbuf, FTAG); 295 return (err); 296 } 297 298 static int 299 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 300 { 301 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 302 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 303 matchtype_t mt; 304 int err; 305 306 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 307 mt = MT_FIRST; 308 else 309 mt = MT_EXACT; 310 311 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 312 value, mt, NULL, 0, NULL); 313 if (err == ENOTSUP && mt == MT_FIRST) 314 err = zap_lookup(mos, snapobj, name, 8, 1, value); 315 return (err); 316 } 317 318 static int 319 dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) 320 { 321 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 322 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 323 matchtype_t mt; 324 int err; 325 326 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 327 mt = MT_FIRST; 328 else 329 mt = MT_EXACT; 330 331 err = zap_remove_norm(mos, snapobj, name, mt, tx); 332 if (err == ENOTSUP && mt == MT_FIRST) 333 err = zap_remove(mos, snapobj, name, tx); 334 return (err); 335 } 336 337 static int 338 dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, 339 dsl_dataset_t **dsp) 340 { 341 objset_t *mos = dp->dp_meta_objset; 342 dmu_buf_t *dbuf; 343 dsl_dataset_t *ds; 344 int err; 345 346 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 347 dsl_pool_sync_context(dp)); 348 349 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 350 if (err) 351 return (err); 352 ds = dmu_buf_get_user(dbuf); 353 if (ds == NULL) { 354 dsl_dataset_t *winner; 355 356 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 357 ds->ds_dbuf = dbuf; 358 ds->ds_object = dsobj; 359 ds->ds_phys = dbuf->db_data; 360 361 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 362 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 363 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 364 NULL); 365 rw_init(&ds->ds_rwlock, 0, 0, 0); 366 cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); 367 368 err = bplist_open(&ds->ds_deadlist, 369 mos, ds->ds_phys->ds_deadlist_obj); 370 if (err == 0) { 371 err = dsl_dir_open_obj(dp, 372 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 373 } 374 if (err) { 375 /* 376 * we don't really need to close the blist if we 377 * just opened it. 378 */ 379 mutex_destroy(&ds->ds_lock); 380 mutex_destroy(&ds->ds_opening_lock); 381 mutex_destroy(&ds->ds_deadlist.bpl_lock); 382 rw_destroy(&ds->ds_rwlock); 383 cv_destroy(&ds->ds_exclusive_cv); 384 kmem_free(ds, sizeof (dsl_dataset_t)); 385 dmu_buf_rele(dbuf, tag); 386 return (err); 387 } 388 389 if (!dsl_dataset_is_snapshot(ds)) { 390 ds->ds_snapname[0] = '\0'; 391 if (ds->ds_phys->ds_prev_snap_obj) { 392 err = dsl_dataset_get_ref(dp, 393 ds->ds_phys->ds_prev_snap_obj, 394 ds, &ds->ds_prev); 395 } 396 397 if (err == 0 && dsl_dir_is_clone(ds->ds_dir)) { 398 dsl_dataset_t *origin; 399 400 err = dsl_dataset_hold_obj(dp, 401 ds->ds_dir->dd_phys->dd_origin_obj, 402 FTAG, &origin); 403 if (err == 0) { 404 ds->ds_origin_txg = 405 origin->ds_phys->ds_creation_txg; 406 dsl_dataset_rele(origin, FTAG); 407 } 408 } 409 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 410 err = dsl_dataset_get_snapname(ds); 411 } 412 413 if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 414 /* 415 * In sync context, we're called with either no lock 416 * or with the write lock. If we're not syncing, 417 * we're always called with the read lock held. 418 */ 419 boolean_t need_lock = 420 !RW_WRITE_HELD(&dp->dp_config_rwlock) && 421 dsl_pool_sync_context(dp); 422 423 if (need_lock) 424 rw_enter(&dp->dp_config_rwlock, RW_READER); 425 426 err = dsl_prop_get_ds(ds, 427 "refreservation", sizeof (uint64_t), 1, 428 &ds->ds_reserved, NULL); 429 if (err == 0) { 430 err = dsl_prop_get_ds(ds, 431 "refquota", sizeof (uint64_t), 1, 432 &ds->ds_quota, NULL); 433 } 434 435 if (need_lock) 436 rw_exit(&dp->dp_config_rwlock); 437 } else { 438 ds->ds_reserved = ds->ds_quota = 0; 439 } 440 441 if (err == 0) { 442 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 443 dsl_dataset_evict); 444 } 445 if (err || winner) { 446 bplist_close(&ds->ds_deadlist); 447 if (ds->ds_prev) 448 dsl_dataset_drop_ref(ds->ds_prev, ds); 449 dsl_dir_close(ds->ds_dir, ds); 450 mutex_destroy(&ds->ds_lock); 451 mutex_destroy(&ds->ds_opening_lock); 452 mutex_destroy(&ds->ds_deadlist.bpl_lock); 453 rw_destroy(&ds->ds_rwlock); 454 cv_destroy(&ds->ds_exclusive_cv); 455 kmem_free(ds, sizeof (dsl_dataset_t)); 456 if (err) { 457 dmu_buf_rele(dbuf, tag); 458 return (err); 459 } 460 ds = winner; 461 } else { 462 ds->ds_fsid_guid = 463 unique_insert(ds->ds_phys->ds_fsid_guid); 464 } 465 } 466 ASSERT3P(ds->ds_dbuf, ==, dbuf); 467 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 468 ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 469 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 470 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 471 mutex_enter(&ds->ds_lock); 472 if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) { 473 mutex_exit(&ds->ds_lock); 474 dmu_buf_rele(ds->ds_dbuf, tag); 475 return (ENOENT); 476 } 477 mutex_exit(&ds->ds_lock); 478 *dsp = ds; 479 return (0); 480 } 481 482 static int 483 dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag) 484 { 485 dsl_pool_t *dp = ds->ds_dir->dd_pool; 486 487 /* 488 * In syncing context we don't want the rwlock lock: there 489 * may be an existing writer waiting for sync phase to 490 * finish. We don't need to worry about such writers, since 491 * sync phase is single-threaded, so the writer can't be 492 * doing anything while we are active. 493 */ 494 if (dsl_pool_sync_context(dp)) { 495 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 496 return (0); 497 } 498 499 /* 500 * Normal users will hold the ds_rwlock as a READER until they 501 * are finished (i.e., call dsl_dataset_rele()). "Owners" will 502 * drop their READER lock after they set the ds_owner field. 503 * 504 * If the dataset is being destroyed, the destroy thread will 505 * obtain a WRITER lock for exclusive access after it's done its 506 * open-context work and then change the ds_owner to 507 * dsl_reaper once destruction is assured. So threads 508 * may block here temporarily, until the "destructability" of 509 * the dataset is determined. 510 */ 511 ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock)); 512 mutex_enter(&ds->ds_lock); 513 while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) { 514 rw_exit(&dp->dp_config_rwlock); 515 cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock); 516 if (DSL_DATASET_IS_DESTROYED(ds)) { 517 mutex_exit(&ds->ds_lock); 518 dsl_dataset_drop_ref(ds, tag); 519 rw_enter(&dp->dp_config_rwlock, RW_READER); 520 return (ENOENT); 521 } 522 rw_enter(&dp->dp_config_rwlock, RW_READER); 523 } 524 mutex_exit(&ds->ds_lock); 525 return (0); 526 } 527 528 int 529 dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 530 dsl_dataset_t **dsp) 531 { 532 int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp); 533 534 if (err) 535 return (err); 536 return (dsl_dataset_hold_ref(*dsp, tag)); 537 } 538 539 int 540 dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, int flags, void *owner, 541 dsl_dataset_t **dsp) 542 { 543 int err = dsl_dataset_hold_obj(dp, dsobj, owner, dsp); 544 545 ASSERT(DS_MODE_TYPE(flags) != DS_MODE_USER); 546 547 if (err) 548 return (err); 549 if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) { 550 dsl_dataset_rele(*dsp, owner); 551 *dsp = NULL; 552 return (EBUSY); 553 } 554 return (0); 555 } 556 557 int 558 dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp) 559 { 560 dsl_dir_t *dd; 561 dsl_pool_t *dp; 562 const char *snapname; 563 uint64_t obj; 564 int err = 0; 565 566 err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname); 567 if (err) 568 return (err); 569 570 dp = dd->dd_pool; 571 obj = dd->dd_phys->dd_head_dataset_obj; 572 rw_enter(&dp->dp_config_rwlock, RW_READER); 573 if (obj) 574 err = dsl_dataset_get_ref(dp, obj, tag, dsp); 575 else 576 err = ENOENT; 577 if (err) 578 goto out; 579 580 err = dsl_dataset_hold_ref(*dsp, tag); 581 582 /* we may be looking for a snapshot */ 583 if (err == 0 && snapname != NULL) { 584 dsl_dataset_t *ds = NULL; 585 586 if (*snapname++ != '@') { 587 dsl_dataset_rele(*dsp, tag); 588 err = ENOENT; 589 goto out; 590 } 591 592 dprintf("looking for snapshot '%s'\n", snapname); 593 err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 594 if (err == 0) 595 err = dsl_dataset_get_ref(dp, obj, tag, &ds); 596 dsl_dataset_rele(*dsp, tag); 597 598 ASSERT3U((err == 0), ==, (ds != NULL)); 599 600 if (ds) { 601 mutex_enter(&ds->ds_lock); 602 if (ds->ds_snapname[0] == 0) 603 (void) strlcpy(ds->ds_snapname, snapname, 604 sizeof (ds->ds_snapname)); 605 mutex_exit(&ds->ds_lock); 606 err = dsl_dataset_hold_ref(ds, tag); 607 *dsp = err ? NULL : ds; 608 } 609 } 610 out: 611 rw_exit(&dp->dp_config_rwlock); 612 dsl_dir_close(dd, FTAG); 613 return (err); 614 } 615 616 int 617 dsl_dataset_own(const char *name, int flags, void *owner, dsl_dataset_t **dsp) 618 { 619 int err = dsl_dataset_hold(name, owner, dsp); 620 if (err) 621 return (err); 622 if ((*dsp)->ds_phys->ds_num_children > 0 && 623 !DS_MODE_IS_READONLY(flags)) { 624 dsl_dataset_rele(*dsp, owner); 625 return (EROFS); 626 } 627 if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) { 628 dsl_dataset_rele(*dsp, owner); 629 return (EBUSY); 630 } 631 return (0); 632 } 633 634 void 635 dsl_dataset_name(dsl_dataset_t *ds, char *name) 636 { 637 if (ds == NULL) { 638 (void) strcpy(name, "mos"); 639 } else { 640 dsl_dir_name(ds->ds_dir, name); 641 VERIFY(0 == dsl_dataset_get_snapname(ds)); 642 if (ds->ds_snapname[0]) { 643 (void) strcat(name, "@"); 644 /* 645 * We use a "recursive" mutex so that we 646 * can call dprintf_ds() with ds_lock held. 647 */ 648 if (!MUTEX_HELD(&ds->ds_lock)) { 649 mutex_enter(&ds->ds_lock); 650 (void) strcat(name, ds->ds_snapname); 651 mutex_exit(&ds->ds_lock); 652 } else { 653 (void) strcat(name, ds->ds_snapname); 654 } 655 } 656 } 657 } 658 659 static int 660 dsl_dataset_namelen(dsl_dataset_t *ds) 661 { 662 int result; 663 664 if (ds == NULL) { 665 result = 3; /* "mos" */ 666 } else { 667 result = dsl_dir_namelen(ds->ds_dir); 668 VERIFY(0 == dsl_dataset_get_snapname(ds)); 669 if (ds->ds_snapname[0]) { 670 ++result; /* adding one for the @-sign */ 671 if (!MUTEX_HELD(&ds->ds_lock)) { 672 mutex_enter(&ds->ds_lock); 673 result += strlen(ds->ds_snapname); 674 mutex_exit(&ds->ds_lock); 675 } else { 676 result += strlen(ds->ds_snapname); 677 } 678 } 679 } 680 681 return (result); 682 } 683 684 void 685 dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag) 686 { 687 dmu_buf_rele(ds->ds_dbuf, tag); 688 } 689 690 void 691 dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 692 { 693 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) { 694 rw_exit(&ds->ds_rwlock); 695 } 696 dsl_dataset_drop_ref(ds, tag); 697 } 698 699 void 700 dsl_dataset_disown(dsl_dataset_t *ds, void *owner) 701 { 702 ASSERT((ds->ds_owner == owner && ds->ds_dbuf) || 703 (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL)); 704 705 mutex_enter(&ds->ds_lock); 706 ds->ds_owner = NULL; 707 if (RW_WRITE_HELD(&ds->ds_rwlock)) { 708 rw_exit(&ds->ds_rwlock); 709 cv_broadcast(&ds->ds_exclusive_cv); 710 } 711 mutex_exit(&ds->ds_lock); 712 if (ds->ds_dbuf) 713 dsl_dataset_drop_ref(ds, owner); 714 else 715 dsl_dataset_evict(ds->ds_dbuf, ds); 716 } 717 718 boolean_t 719 dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *owner) 720 { 721 boolean_t gotit = FALSE; 722 723 mutex_enter(&ds->ds_lock); 724 if (ds->ds_owner == NULL && 725 (!DS_IS_INCONSISTENT(ds) || inconsistentok)) { 726 ds->ds_owner = owner; 727 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) 728 rw_exit(&ds->ds_rwlock); 729 gotit = TRUE; 730 } 731 mutex_exit(&ds->ds_lock); 732 return (gotit); 733 } 734 735 void 736 dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner) 737 { 738 ASSERT3P(owner, ==, ds->ds_owner); 739 if (!RW_WRITE_HELD(&ds->ds_rwlock)) 740 rw_enter(&ds->ds_rwlock, RW_WRITER); 741 } 742 743 uint64_t 744 dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 745 uint64_t flags, dmu_tx_t *tx) 746 { 747 dsl_pool_t *dp = dd->dd_pool; 748 dmu_buf_t *dbuf; 749 dsl_dataset_phys_t *dsphys; 750 uint64_t dsobj; 751 objset_t *mos = dp->dp_meta_objset; 752 753 if (origin == NULL) 754 origin = dp->dp_origin_snap; 755 756 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 757 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 758 ASSERT(dmu_tx_is_syncing(tx)); 759 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 760 761 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 762 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 763 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 764 dmu_buf_will_dirty(dbuf, tx); 765 dsphys = dbuf->db_data; 766 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 767 dsphys->ds_dir_obj = dd->dd_object; 768 dsphys->ds_flags = flags; 769 dsphys->ds_fsid_guid = unique_create(); 770 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 771 sizeof (dsphys->ds_guid)); 772 dsphys->ds_snapnames_zapobj = 773 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 774 DMU_OT_NONE, 0, tx); 775 dsphys->ds_creation_time = gethrestime_sec(); 776 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 777 dsphys->ds_deadlist_obj = 778 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 779 780 if (origin) { 781 dsphys->ds_prev_snap_obj = origin->ds_object; 782 dsphys->ds_prev_snap_txg = 783 origin->ds_phys->ds_creation_txg; 784 dsphys->ds_used_bytes = 785 origin->ds_phys->ds_used_bytes; 786 dsphys->ds_compressed_bytes = 787 origin->ds_phys->ds_compressed_bytes; 788 dsphys->ds_uncompressed_bytes = 789 origin->ds_phys->ds_uncompressed_bytes; 790 dsphys->ds_bp = origin->ds_phys->ds_bp; 791 dsphys->ds_flags |= origin->ds_phys->ds_flags; 792 793 dmu_buf_will_dirty(origin->ds_dbuf, tx); 794 origin->ds_phys->ds_num_children++; 795 796 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 797 if (origin->ds_phys->ds_next_clones_obj == 0) { 798 origin->ds_phys->ds_next_clones_obj = 799 zap_create(mos, 800 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 801 } 802 VERIFY(0 == zap_add_int(mos, 803 origin->ds_phys->ds_next_clones_obj, 804 dsobj, tx)); 805 } 806 807 dmu_buf_will_dirty(dd->dd_dbuf, tx); 808 dd->dd_phys->dd_origin_obj = origin->ds_object; 809 } 810 811 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 812 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 813 814 dmu_buf_rele(dbuf, FTAG); 815 816 dmu_buf_will_dirty(dd->dd_dbuf, tx); 817 dd->dd_phys->dd_head_dataset_obj = dsobj; 818 819 return (dsobj); 820 } 821 822 uint64_t 823 dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 824 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 825 { 826 dsl_pool_t *dp = pdd->dd_pool; 827 uint64_t dsobj, ddobj; 828 dsl_dir_t *dd; 829 830 ASSERT(lastname[0] != '@'); 831 832 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 833 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 834 835 dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx); 836 837 dsl_deleg_set_create_perms(dd, tx, cr); 838 839 dsl_dir_close(dd, FTAG); 840 841 return (dsobj); 842 } 843 844 struct destroyarg { 845 dsl_sync_task_group_t *dstg; 846 char *snapname; 847 char *failed; 848 }; 849 850 static int 851 dsl_snapshot_destroy_one(char *name, void *arg) 852 { 853 struct destroyarg *da = arg; 854 dsl_dataset_t *ds; 855 char *cp; 856 int err; 857 858 (void) strcat(name, "@"); 859 (void) strcat(name, da->snapname); 860 err = dsl_dataset_own(name, DS_MODE_READONLY | DS_MODE_INCONSISTENT, 861 da->dstg, &ds); 862 cp = strchr(name, '@'); 863 *cp = '\0'; 864 if (err == 0) { 865 dsl_dataset_make_exclusive(ds, da->dstg); 866 if (ds->ds_user_ptr) { 867 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 868 ds->ds_user_ptr = NULL; 869 } 870 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 871 dsl_dataset_destroy_sync, ds, da->dstg, 0); 872 } else if (err == ENOENT) { 873 err = 0; 874 } else { 875 (void) strcpy(da->failed, name); 876 } 877 return (err); 878 } 879 880 /* 881 * Destroy 'snapname' in all descendants of 'fsname'. 882 */ 883 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 884 int 885 dsl_snapshots_destroy(char *fsname, char *snapname) 886 { 887 int err; 888 struct destroyarg da; 889 dsl_sync_task_t *dst; 890 spa_t *spa; 891 892 err = spa_open(fsname, &spa, FTAG); 893 if (err) 894 return (err); 895 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 896 da.snapname = snapname; 897 da.failed = fsname; 898 899 err = dmu_objset_find(fsname, 900 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 901 902 if (err == 0) 903 err = dsl_sync_task_group_wait(da.dstg); 904 905 for (dst = list_head(&da.dstg->dstg_tasks); dst; 906 dst = list_next(&da.dstg->dstg_tasks, dst)) { 907 dsl_dataset_t *ds = dst->dst_arg1; 908 /* 909 * Return the file system name that triggered the error 910 */ 911 if (dst->dst_err) { 912 dsl_dataset_name(ds, fsname); 913 *strchr(fsname, '@') = '\0'; 914 } 915 dsl_dataset_disown(ds, da.dstg); 916 } 917 918 dsl_sync_task_group_destroy(da.dstg); 919 spa_close(spa, FTAG); 920 return (err); 921 } 922 923 /* 924 * ds must be opened as OWNER. On return (whether successful or not), 925 * ds will be closed and caller can no longer dereference it. 926 */ 927 int 928 dsl_dataset_destroy(dsl_dataset_t *ds, void *tag) 929 { 930 int err; 931 dsl_sync_task_group_t *dstg; 932 objset_t *os; 933 dsl_dir_t *dd; 934 uint64_t obj; 935 936 if (dsl_dataset_is_snapshot(ds)) { 937 /* Destroying a snapshot is simpler */ 938 dsl_dataset_make_exclusive(ds, tag); 939 940 if (ds->ds_user_ptr) { 941 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 942 ds->ds_user_ptr = NULL; 943 } 944 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 945 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 946 ds, tag, 0); 947 goto out; 948 } 949 950 dd = ds->ds_dir; 951 952 /* 953 * Check for errors and mark this ds as inconsistent, in 954 * case we crash while freeing the objects. 955 */ 956 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 957 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 958 if (err) 959 goto out; 960 961 err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os); 962 if (err) 963 goto out; 964 965 /* 966 * remove the objects in open context, so that we won't 967 * have too much to do in syncing context. 968 */ 969 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 970 ds->ds_phys->ds_prev_snap_txg)) { 971 /* 972 * Ignore errors, if there is not enough disk space 973 * we will deal with it in dsl_dataset_destroy_sync(). 974 */ 975 (void) dmu_free_object(os, obj); 976 } 977 978 /* 979 * We need to sync out all in-flight IO before we try to evict 980 * (the dataset evict func is trying to clear the cached entries 981 * for this dataset in the ARC). 982 */ 983 txg_wait_synced(dd->dd_pool, 0); 984 985 /* 986 * If we managed to free all the objects in open 987 * context, the user space accounting should be zero. 988 */ 989 if (ds->ds_phys->ds_bp.blk_fill == 0 && 990 dmu_objset_userused_enabled(os->os)) { 991 uint64_t count; 992 993 ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 || 994 count == 0); 995 ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 || 996 count == 0); 997 } 998 999 dmu_objset_close(os); 1000 if (err != ESRCH) 1001 goto out; 1002 1003 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 1004 err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); 1005 rw_exit(&dd->dd_pool->dp_config_rwlock); 1006 1007 if (err) 1008 goto out; 1009 1010 if (ds->ds_user_ptr) { 1011 /* 1012 * We need to sync out all in-flight IO before we try 1013 * to evict (the dataset evict func is trying to clear 1014 * the cached entries for this dataset in the ARC). 1015 */ 1016 txg_wait_synced(dd->dd_pool, 0); 1017 } 1018 1019 /* 1020 * Blow away the dsl_dir + head dataset. 1021 */ 1022 dsl_dataset_make_exclusive(ds, tag); 1023 if (ds->ds_user_ptr) { 1024 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 1025 ds->ds_user_ptr = NULL; 1026 } 1027 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 1028 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 1029 dsl_dataset_destroy_sync, ds, tag, 0); 1030 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 1031 dsl_dir_destroy_sync, dd, FTAG, 0); 1032 err = dsl_sync_task_group_wait(dstg); 1033 dsl_sync_task_group_destroy(dstg); 1034 /* if it is successful, dsl_dir_destroy_sync will close the dd */ 1035 if (err) 1036 dsl_dir_close(dd, FTAG); 1037 out: 1038 dsl_dataset_disown(ds, tag); 1039 return (err); 1040 } 1041 1042 int 1043 dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost) 1044 { 1045 int err; 1046 1047 ASSERT(ds->ds_owner); 1048 1049 dsl_dataset_make_exclusive(ds, ds->ds_owner); 1050 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1051 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, 1052 ds, &ost, 0); 1053 /* drop exclusive access */ 1054 mutex_enter(&ds->ds_lock); 1055 rw_exit(&ds->ds_rwlock); 1056 cv_broadcast(&ds->ds_exclusive_cv); 1057 mutex_exit(&ds->ds_lock); 1058 return (err); 1059 } 1060 1061 void * 1062 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 1063 void *p, dsl_dataset_evict_func_t func) 1064 { 1065 void *old; 1066 1067 mutex_enter(&ds->ds_lock); 1068 old = ds->ds_user_ptr; 1069 if (old == NULL) { 1070 ds->ds_user_ptr = p; 1071 ds->ds_user_evict_func = func; 1072 } 1073 mutex_exit(&ds->ds_lock); 1074 return (old); 1075 } 1076 1077 void * 1078 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 1079 { 1080 return (ds->ds_user_ptr); 1081 } 1082 1083 blkptr_t * 1084 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 1085 { 1086 return (&ds->ds_phys->ds_bp); 1087 } 1088 1089 void 1090 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 1091 { 1092 ASSERT(dmu_tx_is_syncing(tx)); 1093 /* If it's the meta-objset, set dp_meta_rootbp */ 1094 if (ds == NULL) { 1095 tx->tx_pool->dp_meta_rootbp = *bp; 1096 } else { 1097 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1098 ds->ds_phys->ds_bp = *bp; 1099 } 1100 } 1101 1102 spa_t * 1103 dsl_dataset_get_spa(dsl_dataset_t *ds) 1104 { 1105 return (ds->ds_dir->dd_pool->dp_spa); 1106 } 1107 1108 void 1109 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 1110 { 1111 dsl_pool_t *dp; 1112 1113 if (ds == NULL) /* this is the meta-objset */ 1114 return; 1115 1116 ASSERT(ds->ds_user_ptr != NULL); 1117 1118 if (ds->ds_phys->ds_next_snap_obj != 0) 1119 panic("dirtying snapshot!"); 1120 1121 dp = ds->ds_dir->dd_pool; 1122 1123 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 1124 /* up the hold count until we can be written out */ 1125 dmu_buf_add_ref(ds->ds_dbuf, ds); 1126 } 1127 } 1128 1129 /* 1130 * The unique space in the head dataset can be calculated by subtracting 1131 * the space used in the most recent snapshot, that is still being used 1132 * in this file system, from the space currently in use. To figure out 1133 * the space in the most recent snapshot still in use, we need to take 1134 * the total space used in the snapshot and subtract out the space that 1135 * has been freed up since the snapshot was taken. 1136 */ 1137 static void 1138 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 1139 { 1140 uint64_t mrs_used; 1141 uint64_t dlused, dlcomp, dluncomp; 1142 1143 ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj); 1144 1145 if (ds->ds_phys->ds_prev_snap_obj != 0) 1146 mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; 1147 else 1148 mrs_used = 0; 1149 1150 VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp, 1151 &dluncomp)); 1152 1153 ASSERT3U(dlused, <=, mrs_used); 1154 ds->ds_phys->ds_unique_bytes = 1155 ds->ds_phys->ds_used_bytes - (mrs_used - dlused); 1156 1157 if (!DS_UNIQUE_IS_ACCURATE(ds) && 1158 spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1159 SPA_VERSION_UNIQUE_ACCURATE) 1160 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1161 } 1162 1163 static uint64_t 1164 dsl_dataset_unique(dsl_dataset_t *ds) 1165 { 1166 if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds)) 1167 dsl_dataset_recalc_head_uniq(ds); 1168 1169 return (ds->ds_phys->ds_unique_bytes); 1170 } 1171 1172 struct killarg { 1173 dsl_dataset_t *ds; 1174 zio_t *zio; 1175 dmu_tx_t *tx; 1176 }; 1177 1178 /* ARGSUSED */ 1179 static int 1180 kill_blkptr(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb, 1181 const dnode_phys_t *dnp, void *arg) 1182 { 1183 struct killarg *ka = arg; 1184 1185 if (bp == NULL) 1186 return (0); 1187 1188 if ((zb->zb_level == -1ULL && zb->zb_blkid != 0) || 1189 (zb->zb_object != 0 && dnp == NULL)) { 1190 /* 1191 * It's a block in the intent log. It has no 1192 * accounting, so just free it. 1193 */ 1194 VERIFY3U(0, ==, dsl_free(ka->zio, ka->tx->tx_pool, 1195 ka->tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT)); 1196 } else { 1197 ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); 1198 (void) dsl_dataset_block_kill(ka->ds, bp, ka->zio, ka->tx); 1199 } 1200 1201 return (0); 1202 } 1203 1204 /* ARGSUSED */ 1205 static int 1206 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) 1207 { 1208 dsl_dataset_t *ds = arg1; 1209 dmu_objset_type_t *ost = arg2; 1210 1211 /* 1212 * We can only roll back to emptyness if it is a ZPL objset. 1213 */ 1214 if (*ost != DMU_OST_ZFS && 1215 ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) 1216 return (EINVAL); 1217 1218 /* 1219 * This must not be a snapshot. 1220 */ 1221 if (ds->ds_phys->ds_next_snap_obj != 0) 1222 return (EINVAL); 1223 1224 /* 1225 * If we made changes this txg, traverse_dataset won't find 1226 * them. Try again. 1227 */ 1228 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1229 return (EAGAIN); 1230 1231 return (0); 1232 } 1233 1234 /* ARGSUSED */ 1235 static void 1236 dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1237 { 1238 dsl_dataset_t *ds = arg1; 1239 dmu_objset_type_t *ost = arg2; 1240 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1241 1242 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1243 1244 if (ds->ds_user_ptr != NULL) { 1245 /* 1246 * We need to make sure that the objset_impl_t is reopened after 1247 * we do the rollback, otherwise it will have the wrong 1248 * objset_phys_t. Normally this would happen when this 1249 * dataset-open is closed, thus causing the 1250 * dataset to be immediately evicted. But when doing "zfs recv 1251 * -F", we reopen the objset before that, so that there is no 1252 * window where the dataset is closed and inconsistent. 1253 */ 1254 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 1255 ds->ds_user_ptr = NULL; 1256 } 1257 1258 /* Transfer space that was freed since last snap back to the head. */ 1259 { 1260 uint64_t used; 1261 1262 VERIFY(0 == bplist_space_birthrange(&ds->ds_deadlist, 1263 ds->ds_origin_txg, UINT64_MAX, &used)); 1264 dsl_dir_transfer_space(ds->ds_dir, used, 1265 DD_USED_SNAP, DD_USED_HEAD, tx); 1266 } 1267 1268 /* Zero out the deadlist. */ 1269 bplist_close(&ds->ds_deadlist); 1270 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1271 ds->ds_phys->ds_deadlist_obj = 1272 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1273 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1274 ds->ds_phys->ds_deadlist_obj)); 1275 1276 { 1277 /* 1278 * Free blkptrs that we gave birth to - this covers 1279 * claimed but not played log blocks too. 1280 */ 1281 zio_t *zio; 1282 struct killarg ka; 1283 1284 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 1285 ZIO_FLAG_MUSTSUCCEED); 1286 ka.ds = ds; 1287 ka.zio = zio; 1288 ka.tx = tx; 1289 (void) traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1290 TRAVERSE_POST, kill_blkptr, &ka); 1291 (void) zio_wait(zio); 1292 } 1293 1294 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0); 1295 1296 if (ds->ds_prev && ds->ds_prev != ds->ds_dir->dd_pool->dp_origin_snap) { 1297 /* Change our contents to that of the prev snapshot */ 1298 1299 ASSERT3U(ds->ds_prev->ds_object, ==, 1300 ds->ds_phys->ds_prev_snap_obj); 1301 ASSERT3U(ds->ds_phys->ds_used_bytes, <=, 1302 ds->ds_prev->ds_phys->ds_used_bytes); 1303 1304 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 1305 ds->ds_phys->ds_used_bytes = 1306 ds->ds_prev->ds_phys->ds_used_bytes; 1307 ds->ds_phys->ds_compressed_bytes = 1308 ds->ds_prev->ds_phys->ds_compressed_bytes; 1309 ds->ds_phys->ds_uncompressed_bytes = 1310 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 1311 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 1312 1313 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1314 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1315 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 1316 } 1317 } else { 1318 objset_impl_t *osi; 1319 1320 ASSERT(*ost != DMU_OST_ZVOL); 1321 ASSERT3U(ds->ds_phys->ds_used_bytes, ==, 0); 1322 ASSERT3U(ds->ds_phys->ds_compressed_bytes, ==, 0); 1323 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, ==, 0); 1324 1325 bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t)); 1326 ds->ds_phys->ds_flags = 0; 1327 ds->ds_phys->ds_unique_bytes = 0; 1328 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1329 SPA_VERSION_UNIQUE_ACCURATE) 1330 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1331 1332 osi = dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds, 1333 &ds->ds_phys->ds_bp, *ost, tx); 1334 #ifdef _KERNEL 1335 zfs_create_fs(&osi->os, kcred, NULL, tx); 1336 #endif 1337 } 1338 1339 spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa, 1340 tx, cr, "dataset = %llu", ds->ds_object); 1341 } 1342 1343 /* ARGSUSED */ 1344 static int 1345 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1346 { 1347 dsl_dataset_t *ds = arg1; 1348 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1349 uint64_t count; 1350 int err; 1351 1352 /* 1353 * Can't delete a head dataset if there are snapshots of it. 1354 * (Except if the only snapshots are from the branch we cloned 1355 * from.) 1356 */ 1357 if (ds->ds_prev != NULL && 1358 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1359 return (EINVAL); 1360 1361 /* 1362 * This is really a dsl_dir thing, but check it here so that 1363 * we'll be less likely to leave this dataset inconsistent & 1364 * nearly destroyed. 1365 */ 1366 err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); 1367 if (err) 1368 return (err); 1369 if (count != 0) 1370 return (EEXIST); 1371 1372 return (0); 1373 } 1374 1375 /* ARGSUSED */ 1376 static void 1377 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1378 { 1379 dsl_dataset_t *ds = arg1; 1380 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1381 1382 /* Mark it as inconsistent on-disk, in case we crash */ 1383 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1384 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1385 1386 spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 1387 cr, "dataset = %llu", ds->ds_object); 1388 } 1389 1390 /* ARGSUSED */ 1391 int 1392 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1393 { 1394 dsl_dataset_t *ds = arg1; 1395 1396 /* we have an owner hold, so noone else can destroy us */ 1397 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 1398 1399 /* Can't delete a branch point. */ 1400 if (ds->ds_phys->ds_num_children > 1) 1401 return (EEXIST); 1402 1403 /* 1404 * Can't delete a head dataset if there are snapshots of it. 1405 * (Except if the only snapshots are from the branch we cloned 1406 * from.) 1407 */ 1408 if (ds->ds_prev != NULL && 1409 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1410 return (EINVAL); 1411 1412 /* 1413 * If we made changes this txg, traverse_dsl_dataset won't find 1414 * them. Try again. 1415 */ 1416 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1417 return (EAGAIN); 1418 1419 /* XXX we should do some i/o error checking... */ 1420 return (0); 1421 } 1422 1423 struct refsarg { 1424 kmutex_t lock; 1425 boolean_t gone; 1426 kcondvar_t cv; 1427 }; 1428 1429 /* ARGSUSED */ 1430 static void 1431 dsl_dataset_refs_gone(dmu_buf_t *db, void *argv) 1432 { 1433 struct refsarg *arg = argv; 1434 1435 mutex_enter(&arg->lock); 1436 arg->gone = TRUE; 1437 cv_signal(&arg->cv); 1438 mutex_exit(&arg->lock); 1439 } 1440 1441 static void 1442 dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag) 1443 { 1444 struct refsarg arg; 1445 1446 mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL); 1447 cv_init(&arg.cv, NULL, CV_DEFAULT, NULL); 1448 arg.gone = FALSE; 1449 (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys, 1450 dsl_dataset_refs_gone); 1451 dmu_buf_rele(ds->ds_dbuf, tag); 1452 mutex_enter(&arg.lock); 1453 while (!arg.gone) 1454 cv_wait(&arg.cv, &arg.lock); 1455 ASSERT(arg.gone); 1456 mutex_exit(&arg.lock); 1457 ds->ds_dbuf = NULL; 1458 ds->ds_phys = NULL; 1459 mutex_destroy(&arg.lock); 1460 cv_destroy(&arg.cv); 1461 } 1462 1463 void 1464 dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 1465 { 1466 dsl_dataset_t *ds = arg1; 1467 zio_t *zio; 1468 int err; 1469 int after_branch_point = FALSE; 1470 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1471 objset_t *mos = dp->dp_meta_objset; 1472 dsl_dataset_t *ds_prev = NULL; 1473 uint64_t obj; 1474 1475 ASSERT(ds->ds_owner); 1476 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 1477 ASSERT(ds->ds_prev == NULL || 1478 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1479 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1480 1481 /* signal any waiters that this dataset is going away */ 1482 mutex_enter(&ds->ds_lock); 1483 ds->ds_owner = dsl_reaper; 1484 cv_broadcast(&ds->ds_exclusive_cv); 1485 mutex_exit(&ds->ds_lock); 1486 1487 /* Remove our reservation */ 1488 if (ds->ds_reserved != 0) { 1489 uint64_t val = 0; 1490 dsl_dataset_set_reservation_sync(ds, &val, cr, tx); 1491 ASSERT3U(ds->ds_reserved, ==, 0); 1492 } 1493 1494 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1495 1496 dsl_pool_ds_destroyed(ds, tx); 1497 1498 obj = ds->ds_object; 1499 1500 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1501 if (ds->ds_prev) { 1502 ds_prev = ds->ds_prev; 1503 } else { 1504 VERIFY(0 == dsl_dataset_hold_obj(dp, 1505 ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); 1506 } 1507 after_branch_point = 1508 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1509 1510 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1511 if (after_branch_point && 1512 ds_prev->ds_phys->ds_next_clones_obj != 0) { 1513 VERIFY3U(0, ==, zap_remove_int(mos, 1514 ds_prev->ds_phys->ds_next_clones_obj, obj, tx)); 1515 if (ds->ds_phys->ds_next_snap_obj != 0) { 1516 VERIFY(0 == zap_add_int(mos, 1517 ds_prev->ds_phys->ds_next_clones_obj, 1518 ds->ds_phys->ds_next_snap_obj, tx)); 1519 } 1520 } 1521 if (after_branch_point && 1522 ds->ds_phys->ds_next_snap_obj == 0) { 1523 /* This clone is toast. */ 1524 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1525 ds_prev->ds_phys->ds_num_children--; 1526 } else if (!after_branch_point) { 1527 ds_prev->ds_phys->ds_next_snap_obj = 1528 ds->ds_phys->ds_next_snap_obj; 1529 } 1530 } 1531 1532 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1533 1534 if (ds->ds_phys->ds_next_snap_obj != 0) { 1535 blkptr_t bp; 1536 dsl_dataset_t *ds_next; 1537 uint64_t itor = 0; 1538 uint64_t old_unique; 1539 int64_t used = 0, compressed = 0, uncompressed = 0; 1540 1541 VERIFY(0 == dsl_dataset_hold_obj(dp, 1542 ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); 1543 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1544 1545 old_unique = dsl_dataset_unique(ds_next); 1546 1547 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1548 ds_next->ds_phys->ds_prev_snap_obj = 1549 ds->ds_phys->ds_prev_snap_obj; 1550 ds_next->ds_phys->ds_prev_snap_txg = 1551 ds->ds_phys->ds_prev_snap_txg; 1552 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1553 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1554 1555 /* 1556 * Transfer to our deadlist (which will become next's 1557 * new deadlist) any entries from next's current 1558 * deadlist which were born before prev, and free the 1559 * other entries. 1560 * 1561 * XXX we're doing this long task with the config lock held 1562 */ 1563 while (bplist_iterate(&ds_next->ds_deadlist, &itor, &bp) == 0) { 1564 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1565 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1566 &bp, tx)); 1567 if (ds_prev && !after_branch_point && 1568 bp.blk_birth > 1569 ds_prev->ds_phys->ds_prev_snap_txg) { 1570 ds_prev->ds_phys->ds_unique_bytes += 1571 bp_get_dasize(dp->dp_spa, &bp); 1572 } 1573 } else { 1574 used += bp_get_dasize(dp->dp_spa, &bp); 1575 compressed += BP_GET_PSIZE(&bp); 1576 uncompressed += BP_GET_UCSIZE(&bp); 1577 /* XXX check return value? */ 1578 (void) dsl_free(zio, dp, tx->tx_txg, 1579 &bp, NULL, NULL, ARC_NOWAIT); 1580 } 1581 } 1582 1583 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1584 1585 /* change snapused */ 1586 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 1587 -used, -compressed, -uncompressed, tx); 1588 1589 /* free next's deadlist */ 1590 bplist_close(&ds_next->ds_deadlist); 1591 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1592 1593 /* set next's deadlist to our deadlist */ 1594 bplist_close(&ds->ds_deadlist); 1595 ds_next->ds_phys->ds_deadlist_obj = 1596 ds->ds_phys->ds_deadlist_obj; 1597 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1598 ds_next->ds_phys->ds_deadlist_obj)); 1599 ds->ds_phys->ds_deadlist_obj = 0; 1600 1601 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1602 /* 1603 * Update next's unique to include blocks which 1604 * were previously shared by only this snapshot 1605 * and it. Those blocks will be born after the 1606 * prev snap and before this snap, and will have 1607 * died after the next snap and before the one 1608 * after that (ie. be on the snap after next's 1609 * deadlist). 1610 * 1611 * XXX we're doing this long task with the 1612 * config lock held 1613 */ 1614 dsl_dataset_t *ds_after_next; 1615 uint64_t space; 1616 1617 VERIFY(0 == dsl_dataset_hold_obj(dp, 1618 ds_next->ds_phys->ds_next_snap_obj, 1619 FTAG, &ds_after_next)); 1620 1621 VERIFY(0 == 1622 bplist_space_birthrange(&ds_after_next->ds_deadlist, 1623 ds->ds_phys->ds_prev_snap_txg, 1624 ds->ds_phys->ds_creation_txg, &space)); 1625 ds_next->ds_phys->ds_unique_bytes += space; 1626 1627 dsl_dataset_rele(ds_after_next, FTAG); 1628 ASSERT3P(ds_next->ds_prev, ==, NULL); 1629 } else { 1630 ASSERT3P(ds_next->ds_prev, ==, ds); 1631 dsl_dataset_drop_ref(ds_next->ds_prev, ds_next); 1632 ds_next->ds_prev = NULL; 1633 if (ds_prev) { 1634 VERIFY(0 == dsl_dataset_get_ref(dp, 1635 ds->ds_phys->ds_prev_snap_obj, 1636 ds_next, &ds_next->ds_prev)); 1637 } 1638 1639 dsl_dataset_recalc_head_uniq(ds_next); 1640 1641 /* 1642 * Reduce the amount of our unconsmed refreservation 1643 * being charged to our parent by the amount of 1644 * new unique data we have gained. 1645 */ 1646 if (old_unique < ds_next->ds_reserved) { 1647 int64_t mrsdelta; 1648 uint64_t new_unique = 1649 ds_next->ds_phys->ds_unique_bytes; 1650 1651 ASSERT(old_unique <= new_unique); 1652 mrsdelta = MIN(new_unique - old_unique, 1653 ds_next->ds_reserved - old_unique); 1654 dsl_dir_diduse_space(ds->ds_dir, 1655 DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); 1656 } 1657 } 1658 dsl_dataset_rele(ds_next, FTAG); 1659 } else { 1660 /* 1661 * There's no next snapshot, so this is a head dataset. 1662 * Destroy the deadlist. Unless it's a clone, the 1663 * deadlist should be empty. (If it's a clone, it's 1664 * safe to ignore the deadlist contents.) 1665 */ 1666 struct killarg ka; 1667 1668 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1669 bplist_close(&ds->ds_deadlist); 1670 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1671 ds->ds_phys->ds_deadlist_obj = 0; 1672 1673 /* 1674 * Free everything that we point to (that's born after 1675 * the previous snapshot, if we are a clone) 1676 * 1677 * NB: this should be very quick, because we already 1678 * freed all the objects in open context. 1679 */ 1680 ka.ds = ds; 1681 ka.zio = zio; 1682 ka.tx = tx; 1683 err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1684 TRAVERSE_POST, kill_blkptr, &ka); 1685 ASSERT3U(err, ==, 0); 1686 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || 1687 ds->ds_phys->ds_unique_bytes == 0); 1688 } 1689 1690 err = zio_wait(zio); 1691 ASSERT3U(err, ==, 0); 1692 1693 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1694 /* Erase the link in the dir */ 1695 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1696 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1697 ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); 1698 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1699 ASSERT(err == 0); 1700 } else { 1701 /* remove from snapshot namespace */ 1702 dsl_dataset_t *ds_head; 1703 ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); 1704 VERIFY(0 == dsl_dataset_hold_obj(dp, 1705 ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); 1706 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1707 #ifdef ZFS_DEBUG 1708 { 1709 uint64_t val; 1710 1711 err = dsl_dataset_snap_lookup(ds_head, 1712 ds->ds_snapname, &val); 1713 ASSERT3U(err, ==, 0); 1714 ASSERT3U(val, ==, obj); 1715 } 1716 #endif 1717 err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx); 1718 ASSERT(err == 0); 1719 dsl_dataset_rele(ds_head, FTAG); 1720 } 1721 1722 if (ds_prev && ds->ds_prev != ds_prev) 1723 dsl_dataset_rele(ds_prev, FTAG); 1724 1725 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1726 spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx, 1727 cr, "dataset = %llu", ds->ds_object); 1728 1729 if (ds->ds_phys->ds_next_clones_obj != 0) { 1730 uint64_t count; 1731 ASSERT(0 == zap_count(mos, 1732 ds->ds_phys->ds_next_clones_obj, &count) && count == 0); 1733 VERIFY(0 == dmu_object_free(mos, 1734 ds->ds_phys->ds_next_clones_obj, tx)); 1735 } 1736 if (ds->ds_phys->ds_props_obj != 0) 1737 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); 1738 dsl_dir_close(ds->ds_dir, ds); 1739 ds->ds_dir = NULL; 1740 dsl_dataset_drain_refs(ds, tag); 1741 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1742 } 1743 1744 static int 1745 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1746 { 1747 uint64_t asize; 1748 1749 if (!dmu_tx_is_syncing(tx)) 1750 return (0); 1751 1752 /* 1753 * If there's an fs-only reservation, any blocks that might become 1754 * owned by the snapshot dataset must be accommodated by space 1755 * outside of the reservation. 1756 */ 1757 asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1758 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE)) 1759 return (ENOSPC); 1760 1761 /* 1762 * Propogate any reserved space for this snapshot to other 1763 * snapshot checks in this sync group. 1764 */ 1765 if (asize > 0) 1766 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1767 1768 return (0); 1769 } 1770 1771 /* ARGSUSED */ 1772 int 1773 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1774 { 1775 dsl_dataset_t *ds = arg1; 1776 const char *snapname = arg2; 1777 int err; 1778 uint64_t value; 1779 1780 /* 1781 * We don't allow multiple snapshots of the same txg. If there 1782 * is already one, try again. 1783 */ 1784 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1785 return (EAGAIN); 1786 1787 /* 1788 * Check for conflicting name snapshot name. 1789 */ 1790 err = dsl_dataset_snap_lookup(ds, snapname, &value); 1791 if (err == 0) 1792 return (EEXIST); 1793 if (err != ENOENT) 1794 return (err); 1795 1796 /* 1797 * Check that the dataset's name is not too long. Name consists 1798 * of the dataset's length + 1 for the @-sign + snapshot name's length 1799 */ 1800 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 1801 return (ENAMETOOLONG); 1802 1803 err = dsl_dataset_snapshot_reserve_space(ds, tx); 1804 if (err) 1805 return (err); 1806 1807 ds->ds_trysnap_txg = tx->tx_txg; 1808 return (0); 1809 } 1810 1811 void 1812 dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1813 { 1814 dsl_dataset_t *ds = arg1; 1815 const char *snapname = arg2; 1816 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1817 dmu_buf_t *dbuf; 1818 dsl_dataset_phys_t *dsphys; 1819 uint64_t dsobj, crtxg; 1820 objset_t *mos = dp->dp_meta_objset; 1821 int err; 1822 1823 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1824 1825 /* 1826 * The origin's ds_creation_txg has to be < TXG_INITIAL 1827 */ 1828 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1829 crtxg = 1; 1830 else 1831 crtxg = tx->tx_txg; 1832 1833 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1834 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1835 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1836 dmu_buf_will_dirty(dbuf, tx); 1837 dsphys = dbuf->db_data; 1838 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 1839 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1840 dsphys->ds_fsid_guid = unique_create(); 1841 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1842 sizeof (dsphys->ds_guid)); 1843 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1844 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1845 dsphys->ds_next_snap_obj = ds->ds_object; 1846 dsphys->ds_num_children = 1; 1847 dsphys->ds_creation_time = gethrestime_sec(); 1848 dsphys->ds_creation_txg = crtxg; 1849 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1850 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1851 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1852 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1853 dsphys->ds_flags = ds->ds_phys->ds_flags; 1854 dsphys->ds_bp = ds->ds_phys->ds_bp; 1855 dmu_buf_rele(dbuf, FTAG); 1856 1857 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1858 if (ds->ds_prev) { 1859 uint64_t next_clones_obj = 1860 ds->ds_prev->ds_phys->ds_next_clones_obj; 1861 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1862 ds->ds_object || 1863 ds->ds_prev->ds_phys->ds_num_children > 1); 1864 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1865 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1866 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1867 ds->ds_prev->ds_phys->ds_creation_txg); 1868 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1869 } else if (next_clones_obj != 0) { 1870 VERIFY3U(0, ==, zap_remove_int(mos, 1871 next_clones_obj, dsphys->ds_next_snap_obj, tx)); 1872 VERIFY3U(0, ==, zap_add_int(mos, 1873 next_clones_obj, dsobj, tx)); 1874 } 1875 } 1876 1877 /* 1878 * If we have a reference-reservation on this dataset, we will 1879 * need to increase the amount of refreservation being charged 1880 * since our unique space is going to zero. 1881 */ 1882 if (ds->ds_reserved) { 1883 int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1884 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1885 add, 0, 0, tx); 1886 } 1887 1888 bplist_close(&ds->ds_deadlist); 1889 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1890 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 1891 ds->ds_phys->ds_prev_snap_obj = dsobj; 1892 ds->ds_phys->ds_prev_snap_txg = crtxg; 1893 ds->ds_phys->ds_unique_bytes = 0; 1894 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1895 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1896 ds->ds_phys->ds_deadlist_obj = 1897 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1898 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1899 ds->ds_phys->ds_deadlist_obj)); 1900 1901 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1902 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1903 snapname, 8, 1, &dsobj, tx); 1904 ASSERT(err == 0); 1905 1906 if (ds->ds_prev) 1907 dsl_dataset_drop_ref(ds->ds_prev, ds); 1908 VERIFY(0 == dsl_dataset_get_ref(dp, 1909 ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 1910 1911 dsl_pool_ds_snapshotted(ds, tx); 1912 1913 spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr, 1914 "dataset = %llu", dsobj); 1915 } 1916 1917 void 1918 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1919 { 1920 ASSERT(dmu_tx_is_syncing(tx)); 1921 ASSERT(ds->ds_user_ptr != NULL); 1922 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1923 1924 /* 1925 * in case we had to change ds_fsid_guid when we opened it, 1926 * sync it out now. 1927 */ 1928 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1929 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1930 1931 dsl_dir_dirty(ds->ds_dir, tx); 1932 dmu_objset_sync(ds->ds_user_ptr, zio, tx); 1933 } 1934 1935 void 1936 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1937 { 1938 uint64_t refd, avail, uobjs, aobjs; 1939 1940 dsl_dir_stats(ds->ds_dir, nv); 1941 1942 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1943 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1944 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1945 1946 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1947 ds->ds_phys->ds_creation_time); 1948 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1949 ds->ds_phys->ds_creation_txg); 1950 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1951 ds->ds_quota); 1952 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1953 ds->ds_reserved); 1954 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 1955 ds->ds_phys->ds_guid); 1956 1957 if (ds->ds_phys->ds_next_snap_obj) { 1958 /* 1959 * This is a snapshot; override the dd's space used with 1960 * our unique space and compression ratio. 1961 */ 1962 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1963 ds->ds_phys->ds_unique_bytes); 1964 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 1965 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1966 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1967 ds->ds_phys->ds_compressed_bytes)); 1968 } 1969 } 1970 1971 void 1972 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1973 { 1974 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1975 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1976 stat->dds_guid = ds->ds_phys->ds_guid; 1977 if (ds->ds_phys->ds_next_snap_obj) { 1978 stat->dds_is_snapshot = B_TRUE; 1979 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1980 } else { 1981 stat->dds_is_snapshot = B_FALSE; 1982 stat->dds_num_clones = 0; 1983 } 1984 1985 /* clone origin is really a dsl_dir thing... */ 1986 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 1987 if (dsl_dir_is_clone(ds->ds_dir)) { 1988 dsl_dataset_t *ods; 1989 1990 VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, 1991 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 1992 dsl_dataset_name(ods, stat->dds_origin); 1993 dsl_dataset_drop_ref(ods, FTAG); 1994 } else { 1995 stat->dds_origin[0] = '\0'; 1996 } 1997 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 1998 } 1999 2000 uint64_t 2001 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 2002 { 2003 return (ds->ds_fsid_guid); 2004 } 2005 2006 void 2007 dsl_dataset_space(dsl_dataset_t *ds, 2008 uint64_t *refdbytesp, uint64_t *availbytesp, 2009 uint64_t *usedobjsp, uint64_t *availobjsp) 2010 { 2011 *refdbytesp = ds->ds_phys->ds_used_bytes; 2012 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 2013 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 2014 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 2015 if (ds->ds_quota != 0) { 2016 /* 2017 * Adjust available bytes according to refquota 2018 */ 2019 if (*refdbytesp < ds->ds_quota) 2020 *availbytesp = MIN(*availbytesp, 2021 ds->ds_quota - *refdbytesp); 2022 else 2023 *availbytesp = 0; 2024 } 2025 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 2026 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 2027 } 2028 2029 boolean_t 2030 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 2031 { 2032 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2033 2034 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 2035 dsl_pool_sync_context(dp)); 2036 if (ds->ds_prev == NULL) 2037 return (B_FALSE); 2038 if (ds->ds_phys->ds_bp.blk_birth > 2039 ds->ds_prev->ds_phys->ds_creation_txg) 2040 return (B_TRUE); 2041 return (B_FALSE); 2042 } 2043 2044 /* ARGSUSED */ 2045 static int 2046 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 2047 { 2048 dsl_dataset_t *ds = arg1; 2049 char *newsnapname = arg2; 2050 dsl_dir_t *dd = ds->ds_dir; 2051 dsl_dataset_t *hds; 2052 uint64_t val; 2053 int err; 2054 2055 err = dsl_dataset_hold_obj(dd->dd_pool, 2056 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds); 2057 if (err) 2058 return (err); 2059 2060 /* new name better not be in use */ 2061 err = dsl_dataset_snap_lookup(hds, newsnapname, &val); 2062 dsl_dataset_rele(hds, FTAG); 2063 2064 if (err == 0) 2065 err = EEXIST; 2066 else if (err == ENOENT) 2067 err = 0; 2068 2069 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 2070 if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 2071 err = ENAMETOOLONG; 2072 2073 return (err); 2074 } 2075 2076 static void 2077 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, 2078 cred_t *cr, dmu_tx_t *tx) 2079 { 2080 dsl_dataset_t *ds = arg1; 2081 const char *newsnapname = arg2; 2082 dsl_dir_t *dd = ds->ds_dir; 2083 objset_t *mos = dd->dd_pool->dp_meta_objset; 2084 dsl_dataset_t *hds; 2085 int err; 2086 2087 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 2088 2089 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, 2090 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds)); 2091 2092 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2093 err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx); 2094 ASSERT3U(err, ==, 0); 2095 mutex_enter(&ds->ds_lock); 2096 (void) strcpy(ds->ds_snapname, newsnapname); 2097 mutex_exit(&ds->ds_lock); 2098 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 2099 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 2100 ASSERT3U(err, ==, 0); 2101 2102 spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 2103 cr, "dataset = %llu", ds->ds_object); 2104 dsl_dataset_rele(hds, FTAG); 2105 } 2106 2107 struct renamesnaparg { 2108 dsl_sync_task_group_t *dstg; 2109 char failed[MAXPATHLEN]; 2110 char *oldsnap; 2111 char *newsnap; 2112 }; 2113 2114 static int 2115 dsl_snapshot_rename_one(char *name, void *arg) 2116 { 2117 struct renamesnaparg *ra = arg; 2118 dsl_dataset_t *ds = NULL; 2119 char *cp; 2120 int err; 2121 2122 cp = name + strlen(name); 2123 *cp = '@'; 2124 (void) strcpy(cp + 1, ra->oldsnap); 2125 2126 /* 2127 * For recursive snapshot renames the parent won't be changing 2128 * so we just pass name for both the to/from argument. 2129 */ 2130 err = zfs_secpolicy_rename_perms(name, name, CRED()); 2131 if (err == ENOENT) { 2132 return (0); 2133 } else if (err) { 2134 (void) strcpy(ra->failed, name); 2135 return (err); 2136 } 2137 2138 #ifdef _KERNEL 2139 /* 2140 * For all filesystems undergoing rename, we'll need to unmount it. 2141 */ 2142 (void) zfs_unmount_snap(name, NULL); 2143 #endif 2144 err = dsl_dataset_hold(name, ra->dstg, &ds); 2145 *cp = '\0'; 2146 if (err == ENOENT) { 2147 return (0); 2148 } else if (err) { 2149 (void) strcpy(ra->failed, name); 2150 return (err); 2151 } 2152 2153 dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 2154 dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 2155 2156 return (0); 2157 } 2158 2159 static int 2160 dsl_recursive_rename(char *oldname, const char *newname) 2161 { 2162 int err; 2163 struct renamesnaparg *ra; 2164 dsl_sync_task_t *dst; 2165 spa_t *spa; 2166 char *cp, *fsname = spa_strdup(oldname); 2167 int len = strlen(oldname); 2168 2169 /* truncate the snapshot name to get the fsname */ 2170 cp = strchr(fsname, '@'); 2171 *cp = '\0'; 2172 2173 err = spa_open(fsname, &spa, FTAG); 2174 if (err) { 2175 kmem_free(fsname, len + 1); 2176 return (err); 2177 } 2178 ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); 2179 ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 2180 2181 ra->oldsnap = strchr(oldname, '@') + 1; 2182 ra->newsnap = strchr(newname, '@') + 1; 2183 *ra->failed = '\0'; 2184 2185 err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 2186 DS_FIND_CHILDREN); 2187 kmem_free(fsname, len + 1); 2188 2189 if (err == 0) { 2190 err = dsl_sync_task_group_wait(ra->dstg); 2191 } 2192 2193 for (dst = list_head(&ra->dstg->dstg_tasks); dst; 2194 dst = list_next(&ra->dstg->dstg_tasks, dst)) { 2195 dsl_dataset_t *ds = dst->dst_arg1; 2196 if (dst->dst_err) { 2197 dsl_dir_name(ds->ds_dir, ra->failed); 2198 (void) strcat(ra->failed, "@"); 2199 (void) strcat(ra->failed, ra->newsnap); 2200 } 2201 dsl_dataset_rele(ds, ra->dstg); 2202 } 2203 2204 if (err) 2205 (void) strcpy(oldname, ra->failed); 2206 2207 dsl_sync_task_group_destroy(ra->dstg); 2208 kmem_free(ra, sizeof (struct renamesnaparg)); 2209 spa_close(spa, FTAG); 2210 return (err); 2211 } 2212 2213 static int 2214 dsl_valid_rename(char *oldname, void *arg) 2215 { 2216 int delta = *(int *)arg; 2217 2218 if (strlen(oldname) + delta >= MAXNAMELEN) 2219 return (ENAMETOOLONG); 2220 2221 return (0); 2222 } 2223 2224 #pragma weak dmu_objset_rename = dsl_dataset_rename 2225 int 2226 dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive) 2227 { 2228 dsl_dir_t *dd; 2229 dsl_dataset_t *ds; 2230 const char *tail; 2231 int err; 2232 2233 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 2234 if (err) 2235 return (err); 2236 /* 2237 * If there are more than 2 references there may be holds 2238 * hanging around that haven't been cleared out yet. 2239 */ 2240 if (dmu_buf_refcount(dd->dd_dbuf) > 2) 2241 txg_wait_synced(dd->dd_pool, 0); 2242 if (tail == NULL) { 2243 int delta = strlen(newname) - strlen(oldname); 2244 2245 /* if we're growing, validate child name lengths */ 2246 if (delta > 0) 2247 err = dmu_objset_find(oldname, dsl_valid_rename, 2248 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 2249 2250 if (!err) 2251 err = dsl_dir_rename(dd, newname); 2252 dsl_dir_close(dd, FTAG); 2253 return (err); 2254 } 2255 if (tail[0] != '@') { 2256 /* the name ended in a nonexistant component */ 2257 dsl_dir_close(dd, FTAG); 2258 return (ENOENT); 2259 } 2260 2261 dsl_dir_close(dd, FTAG); 2262 2263 /* new name must be snapshot in same filesystem */ 2264 tail = strchr(newname, '@'); 2265 if (tail == NULL) 2266 return (EINVAL); 2267 tail++; 2268 if (strncmp(oldname, newname, tail - newname) != 0) 2269 return (EXDEV); 2270 2271 if (recursive) { 2272 err = dsl_recursive_rename(oldname, newname); 2273 } else { 2274 err = dsl_dataset_hold(oldname, FTAG, &ds); 2275 if (err) 2276 return (err); 2277 2278 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2279 dsl_dataset_snapshot_rename_check, 2280 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 2281 2282 dsl_dataset_rele(ds, FTAG); 2283 } 2284 2285 return (err); 2286 } 2287 2288 struct promotenode { 2289 list_node_t link; 2290 dsl_dataset_t *ds; 2291 }; 2292 2293 struct promotearg { 2294 list_t shared_snaps, origin_snaps, clone_snaps; 2295 dsl_dataset_t *origin_origin, *origin_head; 2296 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 2297 }; 2298 2299 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2300 2301 /* ARGSUSED */ 2302 static int 2303 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 2304 { 2305 dsl_dataset_t *hds = arg1; 2306 struct promotearg *pa = arg2; 2307 struct promotenode *snap = list_head(&pa->shared_snaps); 2308 dsl_dataset_t *origin_ds = snap->ds; 2309 int err; 2310 2311 /* Check that it is a real clone */ 2312 if (!dsl_dir_is_clone(hds->ds_dir)) 2313 return (EINVAL); 2314 2315 /* Since this is so expensive, don't do the preliminary check */ 2316 if (!dmu_tx_is_syncing(tx)) 2317 return (0); 2318 2319 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) 2320 return (EXDEV); 2321 2322 /* compute origin's new unique space */ 2323 snap = list_tail(&pa->clone_snaps); 2324 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2325 err = bplist_space_birthrange(&snap->ds->ds_deadlist, 2326 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, &pa->unique); 2327 if (err) 2328 return (err); 2329 2330 /* 2331 * Walk the snapshots that we are moving 2332 * 2333 * Compute space to transfer. Consider the incremental changes 2334 * to used for each snapshot: 2335 * (my used) = (prev's used) + (blocks born) - (blocks killed) 2336 * So each snapshot gave birth to: 2337 * (blocks born) = (my used) - (prev's used) + (blocks killed) 2338 * So a sequence would look like: 2339 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2340 * Which simplifies to: 2341 * uN + kN + kN-1 + ... + k1 + k0 2342 * Note however, if we stop before we reach the ORIGIN we get: 2343 * uN + kN + kN-1 + ... + kM - uM-1 2344 */ 2345 pa->used = origin_ds->ds_phys->ds_used_bytes; 2346 pa->comp = origin_ds->ds_phys->ds_compressed_bytes; 2347 pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 2348 for (snap = list_head(&pa->shared_snaps); snap; 2349 snap = list_next(&pa->shared_snaps, snap)) { 2350 uint64_t val, dlused, dlcomp, dluncomp; 2351 dsl_dataset_t *ds = snap->ds; 2352 2353 /* Check that the snapshot name does not conflict */ 2354 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2355 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2356 if (err == 0) 2357 return (EEXIST); 2358 if (err != ENOENT) 2359 return (err); 2360 2361 /* The very first snapshot does not have a deadlist */ 2362 if (ds->ds_phys->ds_prev_snap_obj == 0) 2363 continue; 2364 2365 if (err = bplist_space(&ds->ds_deadlist, 2366 &dlused, &dlcomp, &dluncomp)) 2367 return (err); 2368 pa->used += dlused; 2369 pa->comp += dlcomp; 2370 pa->uncomp += dluncomp; 2371 } 2372 2373 /* 2374 * If we are a clone of a clone then we never reached ORIGIN, 2375 * so we need to subtract out the clone origin's used space. 2376 */ 2377 if (pa->origin_origin) { 2378 pa->used -= pa->origin_origin->ds_phys->ds_used_bytes; 2379 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; 2380 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; 2381 } 2382 2383 /* Check that there is enough space here */ 2384 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2385 pa->used); 2386 if (err) 2387 return (err); 2388 2389 /* 2390 * Compute the amounts of space that will be used by snapshots 2391 * after the promotion (for both origin and clone). For each, 2392 * it is the amount of space that will be on all of their 2393 * deadlists (that was not born before their new origin). 2394 */ 2395 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2396 uint64_t space; 2397 2398 /* 2399 * Note, typically this will not be a clone of a clone, 2400 * so snap->ds->ds_origin_txg will be < TXG_INITIAL, so 2401 * these snaplist_space() -> bplist_space_birthrange() 2402 * calls will be fast because they do not have to 2403 * iterate over all bps. 2404 */ 2405 snap = list_head(&pa->origin_snaps); 2406 err = snaplist_space(&pa->shared_snaps, 2407 snap->ds->ds_origin_txg, &pa->cloneusedsnap); 2408 if (err) 2409 return (err); 2410 2411 err = snaplist_space(&pa->clone_snaps, 2412 snap->ds->ds_origin_txg, &space); 2413 if (err) 2414 return (err); 2415 pa->cloneusedsnap += space; 2416 } 2417 if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2418 err = snaplist_space(&pa->origin_snaps, 2419 origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap); 2420 if (err) 2421 return (err); 2422 } 2423 2424 return (0); 2425 } 2426 2427 static void 2428 dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2429 { 2430 dsl_dataset_t *hds = arg1; 2431 struct promotearg *pa = arg2; 2432 struct promotenode *snap = list_head(&pa->shared_snaps); 2433 dsl_dataset_t *origin_ds = snap->ds; 2434 dsl_dataset_t *origin_head; 2435 dsl_dir_t *dd = hds->ds_dir; 2436 dsl_pool_t *dp = hds->ds_dir->dd_pool; 2437 dsl_dir_t *odd = NULL; 2438 uint64_t oldnext_obj; 2439 int64_t delta; 2440 2441 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 2442 2443 snap = list_head(&pa->origin_snaps); 2444 origin_head = snap->ds; 2445 2446 /* 2447 * We need to explicitly open odd, since origin_ds's dd will be 2448 * changing. 2449 */ 2450 VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, 2451 NULL, FTAG, &odd)); 2452 2453 /* change origin's next snap */ 2454 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2455 oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2456 snap = list_tail(&pa->clone_snaps); 2457 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2458 origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2459 2460 /* change the origin's next clone */ 2461 if (origin_ds->ds_phys->ds_next_clones_obj) { 2462 VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, 2463 origin_ds->ds_phys->ds_next_clones_obj, 2464 origin_ds->ds_phys->ds_next_snap_obj, tx)); 2465 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 2466 origin_ds->ds_phys->ds_next_clones_obj, 2467 oldnext_obj, tx)); 2468 } 2469 2470 /* change origin */ 2471 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2472 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2473 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2474 hds->ds_origin_txg = origin_head->ds_origin_txg; 2475 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2476 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2477 origin_head->ds_origin_txg = origin_ds->ds_phys->ds_creation_txg; 2478 2479 /* move snapshots to this dir */ 2480 for (snap = list_head(&pa->shared_snaps); snap; 2481 snap = list_next(&pa->shared_snaps, snap)) { 2482 dsl_dataset_t *ds = snap->ds; 2483 2484 /* unregister props as dsl_dir is changing */ 2485 if (ds->ds_user_ptr) { 2486 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 2487 ds->ds_user_ptr = NULL; 2488 } 2489 /* move snap name entry */ 2490 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2491 VERIFY(0 == dsl_dataset_snap_remove(origin_head, 2492 ds->ds_snapname, tx)); 2493 VERIFY(0 == zap_add(dp->dp_meta_objset, 2494 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2495 8, 1, &ds->ds_object, tx)); 2496 /* change containing dsl_dir */ 2497 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2498 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2499 ds->ds_phys->ds_dir_obj = dd->dd_object; 2500 ASSERT3P(ds->ds_dir, ==, odd); 2501 dsl_dir_close(ds->ds_dir, ds); 2502 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 2503 NULL, ds, &ds->ds_dir)); 2504 2505 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 2506 } 2507 2508 /* 2509 * Change space accounting. 2510 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2511 * both be valid, or both be 0 (resulting in delta == 0). This 2512 * is true for each of {clone,origin} independently. 2513 */ 2514 2515 delta = pa->cloneusedsnap - 2516 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2517 ASSERT3S(delta, >=, 0); 2518 ASSERT3U(pa->used, >=, delta); 2519 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2520 dsl_dir_diduse_space(dd, DD_USED_HEAD, 2521 pa->used - delta, pa->comp, pa->uncomp, tx); 2522 2523 delta = pa->originusedsnap - 2524 odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2525 ASSERT3S(delta, <=, 0); 2526 ASSERT3U(pa->used, >=, -delta); 2527 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2528 dsl_dir_diduse_space(odd, DD_USED_HEAD, 2529 -pa->used - delta, -pa->comp, -pa->uncomp, tx); 2530 2531 origin_ds->ds_phys->ds_unique_bytes = pa->unique; 2532 2533 /* log history record */ 2534 spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 2535 cr, "dataset = %llu", hds->ds_object); 2536 2537 dsl_dir_close(odd, FTAG); 2538 } 2539 2540 static char *snaplist_tag = "snaplist"; 2541 /* 2542 * Make a list of dsl_dataset_t's for the snapshots between first_obj 2543 * (exclusive) and last_obj (inclusive). The list will be in reverse 2544 * order (last_obj will be the list_head()). If first_obj == 0, do all 2545 * snapshots back to this dataset's origin. 2546 */ 2547 static int 2548 snaplist_make(dsl_pool_t *dp, boolean_t own, 2549 uint64_t first_obj, uint64_t last_obj, list_t *l) 2550 { 2551 uint64_t obj = last_obj; 2552 2553 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock)); 2554 2555 list_create(l, sizeof (struct promotenode), 2556 offsetof(struct promotenode, link)); 2557 2558 while (obj != first_obj) { 2559 dsl_dataset_t *ds; 2560 struct promotenode *snap; 2561 int err; 2562 2563 if (own) { 2564 err = dsl_dataset_own_obj(dp, obj, 2565 0, snaplist_tag, &ds); 2566 if (err == 0) 2567 dsl_dataset_make_exclusive(ds, snaplist_tag); 2568 } else { 2569 err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds); 2570 } 2571 if (err == ENOENT) { 2572 /* lost race with snapshot destroy */ 2573 struct promotenode *last = list_tail(l); 2574 ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj); 2575 obj = last->ds->ds_phys->ds_prev_snap_obj; 2576 continue; 2577 } else if (err) { 2578 return (err); 2579 } 2580 2581 if (first_obj == 0) 2582 first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2583 2584 snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP); 2585 snap->ds = ds; 2586 list_insert_tail(l, snap); 2587 obj = ds->ds_phys->ds_prev_snap_obj; 2588 } 2589 2590 return (0); 2591 } 2592 2593 static int 2594 snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2595 { 2596 struct promotenode *snap; 2597 2598 *spacep = 0; 2599 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2600 uint64_t used; 2601 int err = bplist_space_birthrange(&snap->ds->ds_deadlist, 2602 mintxg, UINT64_MAX, &used); 2603 if (err) 2604 return (err); 2605 *spacep += used; 2606 } 2607 return (0); 2608 } 2609 2610 static void 2611 snaplist_destroy(list_t *l, boolean_t own) 2612 { 2613 struct promotenode *snap; 2614 2615 if (!l || !list_link_active(&l->list_head)) 2616 return; 2617 2618 while ((snap = list_tail(l)) != NULL) { 2619 list_remove(l, snap); 2620 if (own) 2621 dsl_dataset_disown(snap->ds, snaplist_tag); 2622 else 2623 dsl_dataset_rele(snap->ds, snaplist_tag); 2624 kmem_free(snap, sizeof (struct promotenode)); 2625 } 2626 list_destroy(l); 2627 } 2628 2629 /* 2630 * Promote a clone. Nomenclature note: 2631 * "clone" or "cds": the original clone which is being promoted 2632 * "origin" or "ods": the snapshot which is originally clone's origin 2633 * "origin head" or "ohds": the dataset which is the head 2634 * (filesystem/volume) for the origin 2635 * "origin origin": the origin of the origin's filesystem (typically 2636 * NULL, indicating that the clone is not a clone of a clone). 2637 */ 2638 int 2639 dsl_dataset_promote(const char *name) 2640 { 2641 dsl_dataset_t *ds; 2642 dsl_dir_t *dd; 2643 dsl_pool_t *dp; 2644 dmu_object_info_t doi; 2645 struct promotearg pa = { 0 }; 2646 struct promotenode *snap; 2647 int err; 2648 2649 err = dsl_dataset_hold(name, FTAG, &ds); 2650 if (err) 2651 return (err); 2652 dd = ds->ds_dir; 2653 dp = dd->dd_pool; 2654 2655 err = dmu_object_info(dp->dp_meta_objset, 2656 ds->ds_phys->ds_snapnames_zapobj, &doi); 2657 if (err) { 2658 dsl_dataset_rele(ds, FTAG); 2659 return (err); 2660 } 2661 2662 if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) { 2663 dsl_dataset_rele(ds, FTAG); 2664 return (EINVAL); 2665 } 2666 2667 /* 2668 * We are going to inherit all the snapshots taken before our 2669 * origin (i.e., our new origin will be our parent's origin). 2670 * Take ownership of them so that we can rename them into our 2671 * namespace. 2672 */ 2673 rw_enter(&dp->dp_config_rwlock, RW_READER); 2674 2675 err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj, 2676 &pa.shared_snaps); 2677 if (err != 0) 2678 goto out; 2679 2680 err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps); 2681 if (err != 0) 2682 goto out; 2683 2684 snap = list_head(&pa.shared_snaps); 2685 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 2686 err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj, 2687 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps); 2688 if (err != 0) 2689 goto out; 2690 2691 if (dsl_dir_is_clone(snap->ds->ds_dir)) { 2692 err = dsl_dataset_own_obj(dp, 2693 snap->ds->ds_dir->dd_phys->dd_origin_obj, 2694 0, FTAG, &pa.origin_origin); 2695 if (err != 0) 2696 goto out; 2697 } 2698 2699 out: 2700 rw_exit(&dp->dp_config_rwlock); 2701 2702 /* 2703 * Add in 128x the snapnames zapobj size, since we will be moving 2704 * a bunch of snapnames to the promoted ds, and dirtying their 2705 * bonus buffers. 2706 */ 2707 if (err == 0) { 2708 err = dsl_sync_task_do(dp, dsl_dataset_promote_check, 2709 dsl_dataset_promote_sync, ds, &pa, 2710 2 + 2 * doi.doi_physical_blks); 2711 } 2712 2713 snaplist_destroy(&pa.shared_snaps, B_TRUE); 2714 snaplist_destroy(&pa.clone_snaps, B_FALSE); 2715 snaplist_destroy(&pa.origin_snaps, B_FALSE); 2716 if (pa.origin_origin) 2717 dsl_dataset_disown(pa.origin_origin, FTAG); 2718 dsl_dataset_rele(ds, FTAG); 2719 return (err); 2720 } 2721 2722 struct cloneswaparg { 2723 dsl_dataset_t *cds; /* clone dataset */ 2724 dsl_dataset_t *ohds; /* origin's head dataset */ 2725 boolean_t force; 2726 int64_t unused_refres_delta; /* change in unconsumed refreservation */ 2727 }; 2728 2729 /* ARGSUSED */ 2730 static int 2731 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) 2732 { 2733 struct cloneswaparg *csa = arg1; 2734 2735 /* they should both be heads */ 2736 if (dsl_dataset_is_snapshot(csa->cds) || 2737 dsl_dataset_is_snapshot(csa->ohds)) 2738 return (EINVAL); 2739 2740 /* the branch point should be just before them */ 2741 if (csa->cds->ds_prev != csa->ohds->ds_prev) 2742 return (EINVAL); 2743 2744 /* cds should be the clone */ 2745 if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj != 2746 csa->ohds->ds_object) 2747 return (EINVAL); 2748 2749 /* the clone should be a child of the origin */ 2750 if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) 2751 return (EINVAL); 2752 2753 /* ohds shouldn't be modified unless 'force' */ 2754 if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) 2755 return (ETXTBSY); 2756 2757 /* adjust amount of any unconsumed refreservation */ 2758 csa->unused_refres_delta = 2759 (int64_t)MIN(csa->ohds->ds_reserved, 2760 csa->ohds->ds_phys->ds_unique_bytes) - 2761 (int64_t)MIN(csa->ohds->ds_reserved, 2762 csa->cds->ds_phys->ds_unique_bytes); 2763 2764 if (csa->unused_refres_delta > 0 && 2765 csa->unused_refres_delta > 2766 dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) 2767 return (ENOSPC); 2768 2769 return (0); 2770 } 2771 2772 /* ARGSUSED */ 2773 static void 2774 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2775 { 2776 struct cloneswaparg *csa = arg1; 2777 dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; 2778 2779 ASSERT(csa->cds->ds_reserved == 0); 2780 ASSERT(csa->cds->ds_quota == csa->ohds->ds_quota); 2781 2782 dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); 2783 dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); 2784 dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx); 2785 2786 if (csa->cds->ds_user_ptr != NULL) { 2787 csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr); 2788 csa->cds->ds_user_ptr = NULL; 2789 } 2790 2791 if (csa->ohds->ds_user_ptr != NULL) { 2792 csa->ohds->ds_user_evict_func(csa->ohds, 2793 csa->ohds->ds_user_ptr); 2794 csa->ohds->ds_user_ptr = NULL; 2795 } 2796 2797 /* reset origin's unique bytes */ 2798 VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, 2799 csa->cds->ds_prev->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2800 &csa->cds->ds_prev->ds_phys->ds_unique_bytes)); 2801 2802 /* swap blkptrs */ 2803 { 2804 blkptr_t tmp; 2805 tmp = csa->ohds->ds_phys->ds_bp; 2806 csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; 2807 csa->cds->ds_phys->ds_bp = tmp; 2808 } 2809 2810 /* set dd_*_bytes */ 2811 { 2812 int64_t dused, dcomp, duncomp; 2813 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2814 uint64_t odl_used, odl_comp, odl_uncomp; 2815 2816 ASSERT3U(csa->cds->ds_dir->dd_phys-> 2817 dd_used_breakdown[DD_USED_SNAP], ==, 0); 2818 2819 VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used, 2820 &cdl_comp, &cdl_uncomp)); 2821 VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used, 2822 &odl_comp, &odl_uncomp)); 2823 2824 dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - 2825 (csa->ohds->ds_phys->ds_used_bytes + odl_used); 2826 dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - 2827 (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); 2828 duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + 2829 cdl_uncomp - 2830 (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2831 2832 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD, 2833 dused, dcomp, duncomp, tx); 2834 dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD, 2835 -dused, -dcomp, -duncomp, tx); 2836 2837 /* 2838 * The difference in the space used by snapshots is the 2839 * difference in snapshot space due to the head's 2840 * deadlist (since that's the only thing that's 2841 * changing that affects the snapused). 2842 */ 2843 VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, 2844 csa->ohds->ds_origin_txg, UINT64_MAX, &cdl_used)); 2845 VERIFY(0 == bplist_space_birthrange(&csa->ohds->ds_deadlist, 2846 csa->ohds->ds_origin_txg, UINT64_MAX, &odl_used)); 2847 dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used, 2848 DD_USED_HEAD, DD_USED_SNAP, tx); 2849 } 2850 2851 #define SWITCH64(x, y) \ 2852 { \ 2853 uint64_t __tmp = (x); \ 2854 (x) = (y); \ 2855 (y) = __tmp; \ 2856 } 2857 2858 /* swap ds_*_bytes */ 2859 SWITCH64(csa->ohds->ds_phys->ds_used_bytes, 2860 csa->cds->ds_phys->ds_used_bytes); 2861 SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, 2862 csa->cds->ds_phys->ds_compressed_bytes); 2863 SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, 2864 csa->cds->ds_phys->ds_uncompressed_bytes); 2865 SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, 2866 csa->cds->ds_phys->ds_unique_bytes); 2867 2868 /* apply any parent delta for change in unconsumed refreservation */ 2869 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV, 2870 csa->unused_refres_delta, 0, 0, tx); 2871 2872 /* swap deadlists */ 2873 bplist_close(&csa->cds->ds_deadlist); 2874 bplist_close(&csa->ohds->ds_deadlist); 2875 SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, 2876 csa->cds->ds_phys->ds_deadlist_obj); 2877 VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, 2878 csa->cds->ds_phys->ds_deadlist_obj)); 2879 VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, 2880 csa->ohds->ds_phys->ds_deadlist_obj)); 2881 2882 dsl_pool_ds_clone_swapped(csa->ohds, csa->cds, tx); 2883 } 2884 2885 /* 2886 * Swap 'clone' with its origin head file system. Used at the end 2887 * of "online recv" to swizzle the file system to the new version. 2888 */ 2889 int 2890 dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, 2891 boolean_t force) 2892 { 2893 struct cloneswaparg csa; 2894 int error; 2895 2896 ASSERT(clone->ds_owner); 2897 ASSERT(origin_head->ds_owner); 2898 retry: 2899 /* Need exclusive access for the swap */ 2900 rw_enter(&clone->ds_rwlock, RW_WRITER); 2901 if (!rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) { 2902 rw_exit(&clone->ds_rwlock); 2903 rw_enter(&origin_head->ds_rwlock, RW_WRITER); 2904 if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) { 2905 rw_exit(&origin_head->ds_rwlock); 2906 goto retry; 2907 } 2908 } 2909 csa.cds = clone; 2910 csa.ohds = origin_head; 2911 csa.force = force; 2912 error = dsl_sync_task_do(clone->ds_dir->dd_pool, 2913 dsl_dataset_clone_swap_check, 2914 dsl_dataset_clone_swap_sync, &csa, NULL, 9); 2915 return (error); 2916 } 2917 2918 /* 2919 * Given a pool name and a dataset object number in that pool, 2920 * return the name of that dataset. 2921 */ 2922 int 2923 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2924 { 2925 spa_t *spa; 2926 dsl_pool_t *dp; 2927 dsl_dataset_t *ds; 2928 int error; 2929 2930 if ((error = spa_open(pname, &spa, FTAG)) != 0) 2931 return (error); 2932 dp = spa_get_dsl(spa); 2933 rw_enter(&dp->dp_config_rwlock, RW_READER); 2934 if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) { 2935 dsl_dataset_name(ds, buf); 2936 dsl_dataset_rele(ds, FTAG); 2937 } 2938 rw_exit(&dp->dp_config_rwlock); 2939 spa_close(spa, FTAG); 2940 2941 return (error); 2942 } 2943 2944 int 2945 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2946 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 2947 { 2948 int error = 0; 2949 2950 ASSERT3S(asize, >, 0); 2951 2952 /* 2953 * *ref_rsrv is the portion of asize that will come from any 2954 * unconsumed refreservation space. 2955 */ 2956 *ref_rsrv = 0; 2957 2958 mutex_enter(&ds->ds_lock); 2959 /* 2960 * Make a space adjustment for reserved bytes. 2961 */ 2962 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 2963 ASSERT3U(*used, >=, 2964 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2965 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2966 *ref_rsrv = 2967 asize - MIN(asize, parent_delta(ds, asize + inflight)); 2968 } 2969 2970 if (!check_quota || ds->ds_quota == 0) { 2971 mutex_exit(&ds->ds_lock); 2972 return (0); 2973 } 2974 /* 2975 * If they are requesting more space, and our current estimate 2976 * is over quota, they get to try again unless the actual 2977 * on-disk is over quota and there are no pending changes (which 2978 * may free up space for us). 2979 */ 2980 if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { 2981 if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) 2982 error = ERESTART; 2983 else 2984 error = EDQUOT; 2985 } 2986 mutex_exit(&ds->ds_lock); 2987 2988 return (error); 2989 } 2990 2991 /* ARGSUSED */ 2992 static int 2993 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) 2994 { 2995 dsl_dataset_t *ds = arg1; 2996 uint64_t *quotap = arg2; 2997 uint64_t new_quota = *quotap; 2998 2999 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) 3000 return (ENOTSUP); 3001 3002 if (new_quota == 0) 3003 return (0); 3004 3005 if (new_quota < ds->ds_phys->ds_used_bytes || 3006 new_quota < ds->ds_reserved) 3007 return (ENOSPC); 3008 3009 return (0); 3010 } 3011 3012 /* ARGSUSED */ 3013 void 3014 dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 3015 { 3016 dsl_dataset_t *ds = arg1; 3017 uint64_t *quotap = arg2; 3018 uint64_t new_quota = *quotap; 3019 3020 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3021 3022 ds->ds_quota = new_quota; 3023 3024 dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx); 3025 3026 spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa, 3027 tx, cr, "%lld dataset = %llu ", 3028 (longlong_t)new_quota, ds->ds_object); 3029 } 3030 3031 int 3032 dsl_dataset_set_quota(const char *dsname, uint64_t quota) 3033 { 3034 dsl_dataset_t *ds; 3035 int err; 3036 3037 err = dsl_dataset_hold(dsname, FTAG, &ds); 3038 if (err) 3039 return (err); 3040 3041 if (quota != ds->ds_quota) { 3042 /* 3043 * If someone removes a file, then tries to set the quota, we 3044 * want to make sure the file freeing takes effect. 3045 */ 3046 txg_wait_open(ds->ds_dir->dd_pool, 0); 3047 3048 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3049 dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, 3050 ds, "a, 0); 3051 } 3052 dsl_dataset_rele(ds, FTAG); 3053 return (err); 3054 } 3055 3056 static int 3057 dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) 3058 { 3059 dsl_dataset_t *ds = arg1; 3060 uint64_t *reservationp = arg2; 3061 uint64_t new_reservation = *reservationp; 3062 uint64_t unique; 3063 3064 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 3065 SPA_VERSION_REFRESERVATION) 3066 return (ENOTSUP); 3067 3068 if (dsl_dataset_is_snapshot(ds)) 3069 return (EINVAL); 3070 3071 /* 3072 * If we are doing the preliminary check in open context, the 3073 * space estimates may be inaccurate. 3074 */ 3075 if (!dmu_tx_is_syncing(tx)) 3076 return (0); 3077 3078 mutex_enter(&ds->ds_lock); 3079 unique = dsl_dataset_unique(ds); 3080 mutex_exit(&ds->ds_lock); 3081 3082 if (MAX(unique, new_reservation) > MAX(unique, ds->ds_reserved)) { 3083 uint64_t delta = MAX(unique, new_reservation) - 3084 MAX(unique, ds->ds_reserved); 3085 3086 if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 3087 return (ENOSPC); 3088 if (ds->ds_quota > 0 && 3089 new_reservation > ds->ds_quota) 3090 return (ENOSPC); 3091 } 3092 3093 return (0); 3094 } 3095 3096 /* ARGSUSED */ 3097 static void 3098 dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, 3099 dmu_tx_t *tx) 3100 { 3101 dsl_dataset_t *ds = arg1; 3102 uint64_t *reservationp = arg2; 3103 uint64_t new_reservation = *reservationp; 3104 uint64_t unique; 3105 int64_t delta; 3106 3107 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3108 3109 mutex_enter(&ds->ds_dir->dd_lock); 3110 mutex_enter(&ds->ds_lock); 3111 unique = dsl_dataset_unique(ds); 3112 delta = MAX(0, (int64_t)(new_reservation - unique)) - 3113 MAX(0, (int64_t)(ds->ds_reserved - unique)); 3114 ds->ds_reserved = new_reservation; 3115 mutex_exit(&ds->ds_lock); 3116 3117 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 3118 mutex_exit(&ds->ds_dir->dd_lock); 3119 dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation", 3120 new_reservation, cr, tx); 3121 3122 spa_history_internal_log(LOG_DS_REFRESERV, 3123 ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu", 3124 (longlong_t)new_reservation, ds->ds_object); 3125 } 3126 3127 int 3128 dsl_dataset_set_reservation(const char *dsname, uint64_t reservation) 3129 { 3130 dsl_dataset_t *ds; 3131 int err; 3132 3133 err = dsl_dataset_hold(dsname, FTAG, &ds); 3134 if (err) 3135 return (err); 3136 3137 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3138 dsl_dataset_set_reservation_check, 3139 dsl_dataset_set_reservation_sync, ds, &reservation, 0); 3140 dsl_dataset_rele(ds, FTAG); 3141 return (err); 3142 } 3143