1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/dmu_objset.h> 27 #include <sys/dsl_dataset.h> 28 #include <sys/dsl_dir.h> 29 #include <sys/dsl_prop.h> 30 #include <sys/dsl_synctask.h> 31 #include <sys/dmu_traverse.h> 32 #include <sys/dmu_tx.h> 33 #include <sys/arc.h> 34 #include <sys/zio.h> 35 #include <sys/zap.h> 36 #include <sys/unique.h> 37 #include <sys/zfs_context.h> 38 #include <sys/zfs_ioctl.h> 39 #include <sys/spa.h> 40 #include <sys/zfs_znode.h> 41 #include <sys/sunddi.h> 42 43 static char *dsl_reaper = "the grim reaper"; 44 45 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 46 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 47 static dsl_checkfunc_t dsl_dataset_rollback_check; 48 static dsl_syncfunc_t dsl_dataset_rollback_sync; 49 static dsl_syncfunc_t dsl_dataset_set_reservation_sync; 50 51 #define DS_REF_MAX (1ULL << 62) 52 53 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 54 55 #define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper) 56 57 58 /* 59 * Figure out how much of this delta should be propogated to the dsl_dir 60 * layer. If there's a refreservation, that space has already been 61 * partially accounted for in our ancestors. 62 */ 63 static int64_t 64 parent_delta(dsl_dataset_t *ds, int64_t delta) 65 { 66 uint64_t old_bytes, new_bytes; 67 68 if (ds->ds_reserved == 0) 69 return (delta); 70 71 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 72 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 73 74 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 75 return (new_bytes - old_bytes); 76 } 77 78 void 79 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 80 { 81 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 82 int compressed = BP_GET_PSIZE(bp); 83 int uncompressed = BP_GET_UCSIZE(bp); 84 int64_t delta; 85 86 dprintf_bp(bp, "born, ds=%p\n", ds); 87 88 ASSERT(dmu_tx_is_syncing(tx)); 89 /* It could have been compressed away to nothing */ 90 if (BP_IS_HOLE(bp)) 91 return; 92 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 93 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 94 if (ds == NULL) { 95 /* 96 * Account for the meta-objset space in its placeholder 97 * dsl_dir. 98 */ 99 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 100 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 101 used, compressed, uncompressed, tx); 102 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 103 return; 104 } 105 dmu_buf_will_dirty(ds->ds_dbuf, tx); 106 mutex_enter(&ds->ds_dir->dd_lock); 107 mutex_enter(&ds->ds_lock); 108 delta = parent_delta(ds, used); 109 ds->ds_phys->ds_used_bytes += used; 110 ds->ds_phys->ds_compressed_bytes += compressed; 111 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 112 ds->ds_phys->ds_unique_bytes += used; 113 mutex_exit(&ds->ds_lock); 114 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 115 compressed, uncompressed, tx); 116 dsl_dir_transfer_space(ds->ds_dir, used - delta, 117 DD_USED_REFRSRV, DD_USED_HEAD, tx); 118 mutex_exit(&ds->ds_dir->dd_lock); 119 } 120 121 int 122 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, 123 dmu_tx_t *tx) 124 { 125 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 126 int compressed = BP_GET_PSIZE(bp); 127 int uncompressed = BP_GET_UCSIZE(bp); 128 129 ASSERT(pio != NULL); 130 ASSERT(dmu_tx_is_syncing(tx)); 131 /* No block pointer => nothing to free */ 132 if (BP_IS_HOLE(bp)) 133 return (0); 134 135 ASSERT(used > 0); 136 if (ds == NULL) { 137 int err; 138 /* 139 * Account for the meta-objset space in its placeholder 140 * dataset. 141 */ 142 err = dsl_free(pio, tx->tx_pool, 143 tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT); 144 ASSERT(err == 0); 145 146 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 147 -used, -compressed, -uncompressed, tx); 148 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 149 return (used); 150 } 151 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 152 153 ASSERT(!dsl_dataset_is_snapshot(ds)); 154 dmu_buf_will_dirty(ds->ds_dbuf, tx); 155 156 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 157 int err; 158 int64_t delta; 159 160 dprintf_bp(bp, "freeing: %s", ""); 161 err = dsl_free(pio, tx->tx_pool, 162 tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT); 163 ASSERT(err == 0); 164 165 mutex_enter(&ds->ds_dir->dd_lock); 166 mutex_enter(&ds->ds_lock); 167 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 168 !DS_UNIQUE_IS_ACCURATE(ds)); 169 delta = parent_delta(ds, -used); 170 ds->ds_phys->ds_unique_bytes -= used; 171 mutex_exit(&ds->ds_lock); 172 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 173 delta, -compressed, -uncompressed, tx); 174 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 175 DD_USED_REFRSRV, DD_USED_HEAD, tx); 176 mutex_exit(&ds->ds_dir->dd_lock); 177 } else { 178 dprintf_bp(bp, "putting on dead list: %s", ""); 179 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 180 ASSERT3U(ds->ds_prev->ds_object, ==, 181 ds->ds_phys->ds_prev_snap_obj); 182 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 183 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 184 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 185 ds->ds_object && bp->blk_birth > 186 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 187 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 188 mutex_enter(&ds->ds_prev->ds_lock); 189 ds->ds_prev->ds_phys->ds_unique_bytes += used; 190 mutex_exit(&ds->ds_prev->ds_lock); 191 } 192 if (bp->blk_birth > ds->ds_origin_txg) { 193 dsl_dir_transfer_space(ds->ds_dir, used, 194 DD_USED_HEAD, DD_USED_SNAP, tx); 195 } 196 } 197 mutex_enter(&ds->ds_lock); 198 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 199 ds->ds_phys->ds_used_bytes -= used; 200 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 201 ds->ds_phys->ds_compressed_bytes -= compressed; 202 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 203 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 204 mutex_exit(&ds->ds_lock); 205 206 return (used); 207 } 208 209 uint64_t 210 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 211 { 212 uint64_t trysnap = 0; 213 214 if (ds == NULL) 215 return (0); 216 /* 217 * The snapshot creation could fail, but that would cause an 218 * incorrect FALSE return, which would only result in an 219 * overestimation of the amount of space that an operation would 220 * consume, which is OK. 221 * 222 * There's also a small window where we could miss a pending 223 * snapshot, because we could set the sync task in the quiescing 224 * phase. So this should only be used as a guess. 225 */ 226 if (ds->ds_trysnap_txg > 227 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 228 trysnap = ds->ds_trysnap_txg; 229 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 230 } 231 232 int 233 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 234 { 235 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 236 } 237 238 /* ARGSUSED */ 239 static void 240 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 241 { 242 dsl_dataset_t *ds = dsv; 243 244 ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); 245 246 dprintf_ds(ds, "evicting %s\n", ""); 247 248 unique_remove(ds->ds_fsid_guid); 249 250 if (ds->ds_user_ptr != NULL) 251 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 252 253 if (ds->ds_prev) { 254 dsl_dataset_drop_ref(ds->ds_prev, ds); 255 ds->ds_prev = NULL; 256 } 257 258 bplist_close(&ds->ds_deadlist); 259 if (ds->ds_dir) 260 dsl_dir_close(ds->ds_dir, ds); 261 262 ASSERT(!list_link_active(&ds->ds_synced_link)); 263 264 mutex_destroy(&ds->ds_lock); 265 mutex_destroy(&ds->ds_opening_lock); 266 mutex_destroy(&ds->ds_deadlist.bpl_lock); 267 rw_destroy(&ds->ds_rwlock); 268 cv_destroy(&ds->ds_exclusive_cv); 269 270 kmem_free(ds, sizeof (dsl_dataset_t)); 271 } 272 273 static int 274 dsl_dataset_get_snapname(dsl_dataset_t *ds) 275 { 276 dsl_dataset_phys_t *headphys; 277 int err; 278 dmu_buf_t *headdbuf; 279 dsl_pool_t *dp = ds->ds_dir->dd_pool; 280 objset_t *mos = dp->dp_meta_objset; 281 282 if (ds->ds_snapname[0]) 283 return (0); 284 if (ds->ds_phys->ds_next_snap_obj == 0) 285 return (0); 286 287 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 288 FTAG, &headdbuf); 289 if (err) 290 return (err); 291 headphys = headdbuf->db_data; 292 err = zap_value_search(dp->dp_meta_objset, 293 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 294 dmu_buf_rele(headdbuf, FTAG); 295 return (err); 296 } 297 298 static int 299 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 300 { 301 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 302 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 303 matchtype_t mt; 304 int err; 305 306 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 307 mt = MT_FIRST; 308 else 309 mt = MT_EXACT; 310 311 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 312 value, mt, NULL, 0, NULL); 313 if (err == ENOTSUP && mt == MT_FIRST) 314 err = zap_lookup(mos, snapobj, name, 8, 1, value); 315 return (err); 316 } 317 318 static int 319 dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) 320 { 321 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 322 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 323 matchtype_t mt; 324 int err; 325 326 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 327 mt = MT_FIRST; 328 else 329 mt = MT_EXACT; 330 331 err = zap_remove_norm(mos, snapobj, name, mt, tx); 332 if (err == ENOTSUP && mt == MT_FIRST) 333 err = zap_remove(mos, snapobj, name, tx); 334 return (err); 335 } 336 337 static int 338 dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, 339 dsl_dataset_t **dsp) 340 { 341 objset_t *mos = dp->dp_meta_objset; 342 dmu_buf_t *dbuf; 343 dsl_dataset_t *ds; 344 int err; 345 346 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 347 dsl_pool_sync_context(dp)); 348 349 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 350 if (err) 351 return (err); 352 ds = dmu_buf_get_user(dbuf); 353 if (ds == NULL) { 354 dsl_dataset_t *winner; 355 356 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 357 ds->ds_dbuf = dbuf; 358 ds->ds_object = dsobj; 359 ds->ds_phys = dbuf->db_data; 360 361 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 362 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 363 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 364 NULL); 365 rw_init(&ds->ds_rwlock, 0, 0, 0); 366 cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); 367 368 err = bplist_open(&ds->ds_deadlist, 369 mos, ds->ds_phys->ds_deadlist_obj); 370 if (err == 0) { 371 err = dsl_dir_open_obj(dp, 372 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 373 } 374 if (err) { 375 /* 376 * we don't really need to close the blist if we 377 * just opened it. 378 */ 379 mutex_destroy(&ds->ds_lock); 380 mutex_destroy(&ds->ds_opening_lock); 381 mutex_destroy(&ds->ds_deadlist.bpl_lock); 382 rw_destroy(&ds->ds_rwlock); 383 cv_destroy(&ds->ds_exclusive_cv); 384 kmem_free(ds, sizeof (dsl_dataset_t)); 385 dmu_buf_rele(dbuf, tag); 386 return (err); 387 } 388 389 if (!dsl_dataset_is_snapshot(ds)) { 390 ds->ds_snapname[0] = '\0'; 391 if (ds->ds_phys->ds_prev_snap_obj) { 392 err = dsl_dataset_get_ref(dp, 393 ds->ds_phys->ds_prev_snap_obj, 394 ds, &ds->ds_prev); 395 } 396 397 if (err == 0 && dsl_dir_is_clone(ds->ds_dir)) { 398 dsl_dataset_t *origin; 399 400 err = dsl_dataset_hold_obj(dp, 401 ds->ds_dir->dd_phys->dd_origin_obj, 402 FTAG, &origin); 403 if (err == 0) { 404 ds->ds_origin_txg = 405 origin->ds_phys->ds_creation_txg; 406 dsl_dataset_rele(origin, FTAG); 407 } 408 } 409 } else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) { 410 err = dsl_dataset_get_snapname(ds); 411 } 412 413 if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 414 /* 415 * In sync context, we're called with either no lock 416 * or with the write lock. If we're not syncing, 417 * we're always called with the read lock held. 418 */ 419 boolean_t need_lock = 420 !RW_WRITE_HELD(&dp->dp_config_rwlock) && 421 dsl_pool_sync_context(dp); 422 423 if (need_lock) 424 rw_enter(&dp->dp_config_rwlock, RW_READER); 425 426 err = dsl_prop_get_ds(ds, 427 "refreservation", sizeof (uint64_t), 1, 428 &ds->ds_reserved, NULL); 429 if (err == 0) { 430 err = dsl_prop_get_ds(ds, 431 "refquota", sizeof (uint64_t), 1, 432 &ds->ds_quota, NULL); 433 } 434 435 if (need_lock) 436 rw_exit(&dp->dp_config_rwlock); 437 } else { 438 ds->ds_reserved = ds->ds_quota = 0; 439 } 440 441 if (err == 0) { 442 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 443 dsl_dataset_evict); 444 } 445 if (err || winner) { 446 bplist_close(&ds->ds_deadlist); 447 if (ds->ds_prev) 448 dsl_dataset_drop_ref(ds->ds_prev, ds); 449 dsl_dir_close(ds->ds_dir, ds); 450 mutex_destroy(&ds->ds_lock); 451 mutex_destroy(&ds->ds_opening_lock); 452 mutex_destroy(&ds->ds_deadlist.bpl_lock); 453 rw_destroy(&ds->ds_rwlock); 454 cv_destroy(&ds->ds_exclusive_cv); 455 kmem_free(ds, sizeof (dsl_dataset_t)); 456 if (err) { 457 dmu_buf_rele(dbuf, tag); 458 return (err); 459 } 460 ds = winner; 461 } else { 462 ds->ds_fsid_guid = 463 unique_insert(ds->ds_phys->ds_fsid_guid); 464 } 465 } 466 ASSERT3P(ds->ds_dbuf, ==, dbuf); 467 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 468 ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 469 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 470 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 471 mutex_enter(&ds->ds_lock); 472 if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) { 473 mutex_exit(&ds->ds_lock); 474 dmu_buf_rele(ds->ds_dbuf, tag); 475 return (ENOENT); 476 } 477 mutex_exit(&ds->ds_lock); 478 *dsp = ds; 479 return (0); 480 } 481 482 static int 483 dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag) 484 { 485 dsl_pool_t *dp = ds->ds_dir->dd_pool; 486 487 /* 488 * In syncing context we don't want the rwlock lock: there 489 * may be an existing writer waiting for sync phase to 490 * finish. We don't need to worry about such writers, since 491 * sync phase is single-threaded, so the writer can't be 492 * doing anything while we are active. 493 */ 494 if (dsl_pool_sync_context(dp)) { 495 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 496 return (0); 497 } 498 499 /* 500 * Normal users will hold the ds_rwlock as a READER until they 501 * are finished (i.e., call dsl_dataset_rele()). "Owners" will 502 * drop their READER lock after they set the ds_owner field. 503 * 504 * If the dataset is being destroyed, the destroy thread will 505 * obtain a WRITER lock for exclusive access after it's done its 506 * open-context work and then change the ds_owner to 507 * dsl_reaper once destruction is assured. So threads 508 * may block here temporarily, until the "destructability" of 509 * the dataset is determined. 510 */ 511 ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock)); 512 mutex_enter(&ds->ds_lock); 513 while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) { 514 rw_exit(&dp->dp_config_rwlock); 515 cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock); 516 if (DSL_DATASET_IS_DESTROYED(ds)) { 517 mutex_exit(&ds->ds_lock); 518 dsl_dataset_drop_ref(ds, tag); 519 rw_enter(&dp->dp_config_rwlock, RW_READER); 520 return (ENOENT); 521 } 522 rw_enter(&dp->dp_config_rwlock, RW_READER); 523 } 524 mutex_exit(&ds->ds_lock); 525 return (0); 526 } 527 528 int 529 dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 530 dsl_dataset_t **dsp) 531 { 532 int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp); 533 534 if (err) 535 return (err); 536 return (dsl_dataset_hold_ref(*dsp, tag)); 537 } 538 539 int 540 dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, int flags, void *owner, 541 dsl_dataset_t **dsp) 542 { 543 int err = dsl_dataset_hold_obj(dp, dsobj, owner, dsp); 544 545 ASSERT(DS_MODE_TYPE(flags) != DS_MODE_USER); 546 547 if (err) 548 return (err); 549 if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) { 550 dsl_dataset_rele(*dsp, owner); 551 *dsp = NULL; 552 return (EBUSY); 553 } 554 return (0); 555 } 556 557 int 558 dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp) 559 { 560 dsl_dir_t *dd; 561 dsl_pool_t *dp; 562 const char *snapname; 563 uint64_t obj; 564 int err = 0; 565 566 err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname); 567 if (err) 568 return (err); 569 570 dp = dd->dd_pool; 571 obj = dd->dd_phys->dd_head_dataset_obj; 572 rw_enter(&dp->dp_config_rwlock, RW_READER); 573 if (obj) 574 err = dsl_dataset_get_ref(dp, obj, tag, dsp); 575 else 576 err = ENOENT; 577 if (err) 578 goto out; 579 580 err = dsl_dataset_hold_ref(*dsp, tag); 581 582 /* we may be looking for a snapshot */ 583 if (err == 0 && snapname != NULL) { 584 dsl_dataset_t *ds = NULL; 585 586 if (*snapname++ != '@') { 587 dsl_dataset_rele(*dsp, tag); 588 err = ENOENT; 589 goto out; 590 } 591 592 dprintf("looking for snapshot '%s'\n", snapname); 593 err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 594 if (err == 0) 595 err = dsl_dataset_get_ref(dp, obj, tag, &ds); 596 dsl_dataset_rele(*dsp, tag); 597 598 ASSERT3U((err == 0), ==, (ds != NULL)); 599 600 if (ds) { 601 mutex_enter(&ds->ds_lock); 602 if (ds->ds_snapname[0] == 0) 603 (void) strlcpy(ds->ds_snapname, snapname, 604 sizeof (ds->ds_snapname)); 605 mutex_exit(&ds->ds_lock); 606 err = dsl_dataset_hold_ref(ds, tag); 607 *dsp = err ? NULL : ds; 608 } 609 } 610 out: 611 rw_exit(&dp->dp_config_rwlock); 612 dsl_dir_close(dd, FTAG); 613 return (err); 614 } 615 616 int 617 dsl_dataset_own(const char *name, int flags, void *owner, dsl_dataset_t **dsp) 618 { 619 int err = dsl_dataset_hold(name, owner, dsp); 620 if (err) 621 return (err); 622 if ((*dsp)->ds_phys->ds_num_children > 0 && 623 !DS_MODE_IS_READONLY(flags)) { 624 dsl_dataset_rele(*dsp, owner); 625 return (EROFS); 626 } 627 if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) { 628 dsl_dataset_rele(*dsp, owner); 629 return (EBUSY); 630 } 631 return (0); 632 } 633 634 void 635 dsl_dataset_name(dsl_dataset_t *ds, char *name) 636 { 637 if (ds == NULL) { 638 (void) strcpy(name, "mos"); 639 } else { 640 dsl_dir_name(ds->ds_dir, name); 641 VERIFY(0 == dsl_dataset_get_snapname(ds)); 642 if (ds->ds_snapname[0]) { 643 (void) strcat(name, "@"); 644 /* 645 * We use a "recursive" mutex so that we 646 * can call dprintf_ds() with ds_lock held. 647 */ 648 if (!MUTEX_HELD(&ds->ds_lock)) { 649 mutex_enter(&ds->ds_lock); 650 (void) strcat(name, ds->ds_snapname); 651 mutex_exit(&ds->ds_lock); 652 } else { 653 (void) strcat(name, ds->ds_snapname); 654 } 655 } 656 } 657 } 658 659 static int 660 dsl_dataset_namelen(dsl_dataset_t *ds) 661 { 662 int result; 663 664 if (ds == NULL) { 665 result = 3; /* "mos" */ 666 } else { 667 result = dsl_dir_namelen(ds->ds_dir); 668 VERIFY(0 == dsl_dataset_get_snapname(ds)); 669 if (ds->ds_snapname[0]) { 670 ++result; /* adding one for the @-sign */ 671 if (!MUTEX_HELD(&ds->ds_lock)) { 672 mutex_enter(&ds->ds_lock); 673 result += strlen(ds->ds_snapname); 674 mutex_exit(&ds->ds_lock); 675 } else { 676 result += strlen(ds->ds_snapname); 677 } 678 } 679 } 680 681 return (result); 682 } 683 684 void 685 dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag) 686 { 687 dmu_buf_rele(ds->ds_dbuf, tag); 688 } 689 690 void 691 dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 692 { 693 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) { 694 rw_exit(&ds->ds_rwlock); 695 } 696 dsl_dataset_drop_ref(ds, tag); 697 } 698 699 void 700 dsl_dataset_disown(dsl_dataset_t *ds, void *owner) 701 { 702 ASSERT((ds->ds_owner == owner && ds->ds_dbuf) || 703 (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL)); 704 705 mutex_enter(&ds->ds_lock); 706 ds->ds_owner = NULL; 707 if (RW_WRITE_HELD(&ds->ds_rwlock)) { 708 rw_exit(&ds->ds_rwlock); 709 cv_broadcast(&ds->ds_exclusive_cv); 710 } 711 mutex_exit(&ds->ds_lock); 712 if (ds->ds_dbuf) 713 dsl_dataset_drop_ref(ds, owner); 714 else 715 dsl_dataset_evict(ds->ds_dbuf, ds); 716 } 717 718 boolean_t 719 dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *owner) 720 { 721 boolean_t gotit = FALSE; 722 723 mutex_enter(&ds->ds_lock); 724 if (ds->ds_owner == NULL && 725 (!DS_IS_INCONSISTENT(ds) || inconsistentok)) { 726 ds->ds_owner = owner; 727 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) 728 rw_exit(&ds->ds_rwlock); 729 gotit = TRUE; 730 } 731 mutex_exit(&ds->ds_lock); 732 return (gotit); 733 } 734 735 void 736 dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner) 737 { 738 ASSERT3P(owner, ==, ds->ds_owner); 739 if (!RW_WRITE_HELD(&ds->ds_rwlock)) 740 rw_enter(&ds->ds_rwlock, RW_WRITER); 741 } 742 743 uint64_t 744 dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 745 uint64_t flags, dmu_tx_t *tx) 746 { 747 dsl_pool_t *dp = dd->dd_pool; 748 dmu_buf_t *dbuf; 749 dsl_dataset_phys_t *dsphys; 750 uint64_t dsobj; 751 objset_t *mos = dp->dp_meta_objset; 752 753 if (origin == NULL) 754 origin = dp->dp_origin_snap; 755 756 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 757 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 758 ASSERT(dmu_tx_is_syncing(tx)); 759 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 760 761 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 762 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 763 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 764 dmu_buf_will_dirty(dbuf, tx); 765 dsphys = dbuf->db_data; 766 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 767 dsphys->ds_dir_obj = dd->dd_object; 768 dsphys->ds_flags = flags; 769 dsphys->ds_fsid_guid = unique_create(); 770 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 771 sizeof (dsphys->ds_guid)); 772 dsphys->ds_snapnames_zapobj = 773 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 774 DMU_OT_NONE, 0, tx); 775 dsphys->ds_creation_time = gethrestime_sec(); 776 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 777 dsphys->ds_deadlist_obj = 778 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 779 780 if (origin) { 781 dsphys->ds_prev_snap_obj = origin->ds_object; 782 dsphys->ds_prev_snap_txg = 783 origin->ds_phys->ds_creation_txg; 784 dsphys->ds_used_bytes = 785 origin->ds_phys->ds_used_bytes; 786 dsphys->ds_compressed_bytes = 787 origin->ds_phys->ds_compressed_bytes; 788 dsphys->ds_uncompressed_bytes = 789 origin->ds_phys->ds_uncompressed_bytes; 790 dsphys->ds_bp = origin->ds_phys->ds_bp; 791 dsphys->ds_flags |= origin->ds_phys->ds_flags; 792 793 dmu_buf_will_dirty(origin->ds_dbuf, tx); 794 origin->ds_phys->ds_num_children++; 795 796 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 797 if (origin->ds_phys->ds_next_clones_obj == 0) { 798 origin->ds_phys->ds_next_clones_obj = 799 zap_create(mos, 800 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 801 } 802 VERIFY(0 == zap_add_int(mos, 803 origin->ds_phys->ds_next_clones_obj, 804 dsobj, tx)); 805 } 806 807 dmu_buf_will_dirty(dd->dd_dbuf, tx); 808 dd->dd_phys->dd_origin_obj = origin->ds_object; 809 } 810 811 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 812 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 813 814 dmu_buf_rele(dbuf, FTAG); 815 816 dmu_buf_will_dirty(dd->dd_dbuf, tx); 817 dd->dd_phys->dd_head_dataset_obj = dsobj; 818 819 return (dsobj); 820 } 821 822 uint64_t 823 dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 824 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 825 { 826 dsl_pool_t *dp = pdd->dd_pool; 827 uint64_t dsobj, ddobj; 828 dsl_dir_t *dd; 829 830 ASSERT(lastname[0] != '@'); 831 832 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 833 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 834 835 dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx); 836 837 dsl_deleg_set_create_perms(dd, tx, cr); 838 839 dsl_dir_close(dd, FTAG); 840 841 return (dsobj); 842 } 843 844 struct destroyarg { 845 dsl_sync_task_group_t *dstg; 846 char *snapname; 847 char *failed; 848 }; 849 850 static int 851 dsl_snapshot_destroy_one(char *name, void *arg) 852 { 853 struct destroyarg *da = arg; 854 dsl_dataset_t *ds; 855 char *cp; 856 int err; 857 858 (void) strcat(name, "@"); 859 (void) strcat(name, da->snapname); 860 err = dsl_dataset_own(name, DS_MODE_READONLY | DS_MODE_INCONSISTENT, 861 da->dstg, &ds); 862 cp = strchr(name, '@'); 863 *cp = '\0'; 864 if (err == 0) { 865 dsl_dataset_make_exclusive(ds, da->dstg); 866 if (ds->ds_user_ptr) { 867 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 868 ds->ds_user_ptr = NULL; 869 } 870 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 871 dsl_dataset_destroy_sync, ds, da->dstg, 0); 872 } else if (err == ENOENT) { 873 err = 0; 874 } else { 875 (void) strcpy(da->failed, name); 876 } 877 return (err); 878 } 879 880 /* 881 * Destroy 'snapname' in all descendants of 'fsname'. 882 */ 883 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 884 int 885 dsl_snapshots_destroy(char *fsname, char *snapname) 886 { 887 int err; 888 struct destroyarg da; 889 dsl_sync_task_t *dst; 890 spa_t *spa; 891 892 err = spa_open(fsname, &spa, FTAG); 893 if (err) 894 return (err); 895 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 896 da.snapname = snapname; 897 da.failed = fsname; 898 899 err = dmu_objset_find(fsname, 900 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 901 902 if (err == 0) 903 err = dsl_sync_task_group_wait(da.dstg); 904 905 for (dst = list_head(&da.dstg->dstg_tasks); dst; 906 dst = list_next(&da.dstg->dstg_tasks, dst)) { 907 dsl_dataset_t *ds = dst->dst_arg1; 908 /* 909 * Return the file system name that triggered the error 910 */ 911 if (dst->dst_err) { 912 dsl_dataset_name(ds, fsname); 913 *strchr(fsname, '@') = '\0'; 914 } 915 dsl_dataset_disown(ds, da.dstg); 916 } 917 918 dsl_sync_task_group_destroy(da.dstg); 919 spa_close(spa, FTAG); 920 return (err); 921 } 922 923 /* 924 * ds must be opened as OWNER. On return (whether successful or not), 925 * ds will be closed and caller can no longer dereference it. 926 */ 927 int 928 dsl_dataset_destroy(dsl_dataset_t *ds, void *tag) 929 { 930 int err; 931 dsl_sync_task_group_t *dstg; 932 objset_t *os; 933 dsl_dir_t *dd; 934 uint64_t obj; 935 936 if (dsl_dataset_is_snapshot(ds)) { 937 /* Destroying a snapshot is simpler */ 938 dsl_dataset_make_exclusive(ds, tag); 939 940 if (ds->ds_user_ptr) { 941 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 942 ds->ds_user_ptr = NULL; 943 } 944 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 945 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 946 ds, tag, 0); 947 goto out; 948 } 949 950 dd = ds->ds_dir; 951 952 /* 953 * Check for errors and mark this ds as inconsistent, in 954 * case we crash while freeing the objects. 955 */ 956 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 957 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 958 if (err) 959 goto out; 960 961 err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os); 962 if (err) 963 goto out; 964 965 /* 966 * remove the objects in open context, so that we won't 967 * have too much to do in syncing context. 968 */ 969 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 970 ds->ds_phys->ds_prev_snap_txg)) { 971 /* 972 * Ignore errors, if there is not enough disk space 973 * we will deal with it in dsl_dataset_destroy_sync(). 974 */ 975 (void) dmu_free_object(os, obj); 976 } 977 978 dmu_objset_close(os); 979 if (err != ESRCH) 980 goto out; 981 982 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 983 err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); 984 rw_exit(&dd->dd_pool->dp_config_rwlock); 985 986 if (err) 987 goto out; 988 989 if (ds->ds_user_ptr) { 990 /* 991 * We need to sync out all in-flight IO before we try 992 * to evict (the dataset evict func is trying to clear 993 * the cached entries for this dataset in the ARC). 994 */ 995 txg_wait_synced(dd->dd_pool, 0); 996 } 997 998 /* 999 * Blow away the dsl_dir + head dataset. 1000 */ 1001 dsl_dataset_make_exclusive(ds, tag); 1002 if (ds->ds_user_ptr) { 1003 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 1004 ds->ds_user_ptr = NULL; 1005 } 1006 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 1007 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 1008 dsl_dataset_destroy_sync, ds, tag, 0); 1009 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 1010 dsl_dir_destroy_sync, dd, FTAG, 0); 1011 err = dsl_sync_task_group_wait(dstg); 1012 dsl_sync_task_group_destroy(dstg); 1013 /* if it is successful, dsl_dir_destroy_sync will close the dd */ 1014 if (err) 1015 dsl_dir_close(dd, FTAG); 1016 out: 1017 dsl_dataset_disown(ds, tag); 1018 return (err); 1019 } 1020 1021 int 1022 dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost) 1023 { 1024 int err; 1025 1026 ASSERT(ds->ds_owner); 1027 1028 dsl_dataset_make_exclusive(ds, ds->ds_owner); 1029 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1030 dsl_dataset_rollback_check, dsl_dataset_rollback_sync, 1031 ds, &ost, 0); 1032 /* drop exclusive access */ 1033 mutex_enter(&ds->ds_lock); 1034 rw_exit(&ds->ds_rwlock); 1035 cv_broadcast(&ds->ds_exclusive_cv); 1036 mutex_exit(&ds->ds_lock); 1037 return (err); 1038 } 1039 1040 void * 1041 dsl_dataset_set_user_ptr(dsl_dataset_t *ds, 1042 void *p, dsl_dataset_evict_func_t func) 1043 { 1044 void *old; 1045 1046 mutex_enter(&ds->ds_lock); 1047 old = ds->ds_user_ptr; 1048 if (old == NULL) { 1049 ds->ds_user_ptr = p; 1050 ds->ds_user_evict_func = func; 1051 } 1052 mutex_exit(&ds->ds_lock); 1053 return (old); 1054 } 1055 1056 void * 1057 dsl_dataset_get_user_ptr(dsl_dataset_t *ds) 1058 { 1059 return (ds->ds_user_ptr); 1060 } 1061 1062 1063 blkptr_t * 1064 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 1065 { 1066 return (&ds->ds_phys->ds_bp); 1067 } 1068 1069 void 1070 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 1071 { 1072 ASSERT(dmu_tx_is_syncing(tx)); 1073 /* If it's the meta-objset, set dp_meta_rootbp */ 1074 if (ds == NULL) { 1075 tx->tx_pool->dp_meta_rootbp = *bp; 1076 } else { 1077 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1078 ds->ds_phys->ds_bp = *bp; 1079 } 1080 } 1081 1082 spa_t * 1083 dsl_dataset_get_spa(dsl_dataset_t *ds) 1084 { 1085 return (ds->ds_dir->dd_pool->dp_spa); 1086 } 1087 1088 void 1089 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 1090 { 1091 dsl_pool_t *dp; 1092 1093 if (ds == NULL) /* this is the meta-objset */ 1094 return; 1095 1096 ASSERT(ds->ds_user_ptr != NULL); 1097 1098 if (ds->ds_phys->ds_next_snap_obj != 0) 1099 panic("dirtying snapshot!"); 1100 1101 dp = ds->ds_dir->dd_pool; 1102 1103 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 1104 /* up the hold count until we can be written out */ 1105 dmu_buf_add_ref(ds->ds_dbuf, ds); 1106 } 1107 } 1108 1109 /* 1110 * The unique space in the head dataset can be calculated by subtracting 1111 * the space used in the most recent snapshot, that is still being used 1112 * in this file system, from the space currently in use. To figure out 1113 * the space in the most recent snapshot still in use, we need to take 1114 * the total space used in the snapshot and subtract out the space that 1115 * has been freed up since the snapshot was taken. 1116 */ 1117 static void 1118 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 1119 { 1120 uint64_t mrs_used; 1121 uint64_t dlused, dlcomp, dluncomp; 1122 1123 ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj); 1124 1125 if (ds->ds_phys->ds_prev_snap_obj != 0) 1126 mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; 1127 else 1128 mrs_used = 0; 1129 1130 VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp, 1131 &dluncomp)); 1132 1133 ASSERT3U(dlused, <=, mrs_used); 1134 ds->ds_phys->ds_unique_bytes = 1135 ds->ds_phys->ds_used_bytes - (mrs_used - dlused); 1136 1137 if (!DS_UNIQUE_IS_ACCURATE(ds) && 1138 spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1139 SPA_VERSION_UNIQUE_ACCURATE) 1140 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1141 } 1142 1143 static uint64_t 1144 dsl_dataset_unique(dsl_dataset_t *ds) 1145 { 1146 if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds)) 1147 dsl_dataset_recalc_head_uniq(ds); 1148 1149 return (ds->ds_phys->ds_unique_bytes); 1150 } 1151 1152 struct killarg { 1153 dsl_dataset_t *ds; 1154 zio_t *zio; 1155 dmu_tx_t *tx; 1156 }; 1157 1158 /* ARGSUSED */ 1159 static int 1160 kill_blkptr(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb, 1161 const dnode_phys_t *dnp, void *arg) 1162 { 1163 struct killarg *ka = arg; 1164 1165 if (bp == NULL) 1166 return (0); 1167 1168 if ((zb->zb_level == -1ULL && zb->zb_blkid != 0) || 1169 (zb->zb_object != 0 && dnp == NULL)) { 1170 /* 1171 * It's a block in the intent log. It has no 1172 * accounting, so just free it. 1173 */ 1174 VERIFY3U(0, ==, dsl_free(ka->zio, ka->tx->tx_pool, 1175 ka->tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT)); 1176 } else { 1177 ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); 1178 (void) dsl_dataset_block_kill(ka->ds, bp, ka->zio, ka->tx); 1179 } 1180 1181 return (0); 1182 } 1183 1184 /* ARGSUSED */ 1185 static int 1186 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx) 1187 { 1188 dsl_dataset_t *ds = arg1; 1189 dmu_objset_type_t *ost = arg2; 1190 1191 /* 1192 * We can only roll back to emptyness if it is a ZPL objset. 1193 */ 1194 if (*ost != DMU_OST_ZFS && ds->ds_phys->ds_prev_snap_txg == 0) 1195 return (EINVAL); 1196 1197 /* 1198 * This must not be a snapshot. 1199 */ 1200 if (ds->ds_phys->ds_next_snap_obj != 0) 1201 return (EINVAL); 1202 1203 /* 1204 * If we made changes this txg, traverse_dataset won't find 1205 * them. Try again. 1206 */ 1207 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1208 return (EAGAIN); 1209 1210 return (0); 1211 } 1212 1213 /* ARGSUSED */ 1214 static void 1215 dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1216 { 1217 dsl_dataset_t *ds = arg1; 1218 dmu_objset_type_t *ost = arg2; 1219 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1220 1221 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1222 1223 if (ds->ds_user_ptr != NULL) { 1224 /* 1225 * We need to make sure that the objset_impl_t is reopened after 1226 * we do the rollback, otherwise it will have the wrong 1227 * objset_phys_t. Normally this would happen when this 1228 * dataset-open is closed, thus causing the 1229 * dataset to be immediately evicted. But when doing "zfs recv 1230 * -F", we reopen the objset before that, so that there is no 1231 * window where the dataset is closed and inconsistent. 1232 */ 1233 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 1234 ds->ds_user_ptr = NULL; 1235 } 1236 1237 /* Transfer space that was freed since last snap back to the head. */ 1238 { 1239 uint64_t used; 1240 1241 VERIFY(0 == bplist_space_birthrange(&ds->ds_deadlist, 1242 ds->ds_origin_txg, UINT64_MAX, &used)); 1243 dsl_dir_transfer_space(ds->ds_dir, used, 1244 DD_USED_SNAP, DD_USED_HEAD, tx); 1245 } 1246 1247 /* Zero out the deadlist. */ 1248 bplist_close(&ds->ds_deadlist); 1249 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1250 ds->ds_phys->ds_deadlist_obj = 1251 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1252 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1253 ds->ds_phys->ds_deadlist_obj)); 1254 1255 { 1256 /* 1257 * Free blkptrs that we gave birth to - this covers 1258 * claimed but not played log blocks too. 1259 */ 1260 zio_t *zio; 1261 struct killarg ka; 1262 1263 zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL, 1264 ZIO_FLAG_MUSTSUCCEED); 1265 ka.ds = ds; 1266 ka.zio = zio; 1267 ka.tx = tx; 1268 (void) traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1269 TRAVERSE_POST, kill_blkptr, &ka); 1270 (void) zio_wait(zio); 1271 } 1272 1273 ASSERT(!(ds->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) || 1274 ds->ds_phys->ds_unique_bytes == 0); 1275 1276 if (ds->ds_prev && ds->ds_prev != ds->ds_dir->dd_pool->dp_origin_snap) { 1277 /* Change our contents to that of the prev snapshot */ 1278 1279 ASSERT3U(ds->ds_prev->ds_object, ==, 1280 ds->ds_phys->ds_prev_snap_obj); 1281 ASSERT3U(ds->ds_phys->ds_used_bytes, <=, 1282 ds->ds_prev->ds_phys->ds_used_bytes); 1283 1284 ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp; 1285 ds->ds_phys->ds_used_bytes = 1286 ds->ds_prev->ds_phys->ds_used_bytes; 1287 ds->ds_phys->ds_compressed_bytes = 1288 ds->ds_prev->ds_phys->ds_compressed_bytes; 1289 ds->ds_phys->ds_uncompressed_bytes = 1290 ds->ds_prev->ds_phys->ds_uncompressed_bytes; 1291 ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags; 1292 1293 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1294 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1295 ds->ds_prev->ds_phys->ds_unique_bytes = 0; 1296 } 1297 } else { 1298 objset_impl_t *osi; 1299 1300 ASSERT3U(ds->ds_phys->ds_used_bytes, ==, 0); 1301 ASSERT3U(ds->ds_phys->ds_compressed_bytes, ==, 0); 1302 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, ==, 0); 1303 1304 bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t)); 1305 ds->ds_phys->ds_flags = 0; 1306 ds->ds_phys->ds_unique_bytes = 0; 1307 if (spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1308 SPA_VERSION_UNIQUE_ACCURATE) 1309 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1310 1311 osi = dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds, 1312 &ds->ds_phys->ds_bp, *ost, tx); 1313 #ifdef _KERNEL 1314 zfs_create_fs(&osi->os, kcred, NULL, tx); 1315 #endif 1316 } 1317 1318 spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa, 1319 tx, cr, "dataset = %llu", ds->ds_object); 1320 } 1321 1322 /* ARGSUSED */ 1323 static int 1324 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1325 { 1326 dsl_dataset_t *ds = arg1; 1327 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1328 uint64_t count; 1329 int err; 1330 1331 /* 1332 * Can't delete a head dataset if there are snapshots of it. 1333 * (Except if the only snapshots are from the branch we cloned 1334 * from.) 1335 */ 1336 if (ds->ds_prev != NULL && 1337 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1338 return (EINVAL); 1339 1340 /* 1341 * This is really a dsl_dir thing, but check it here so that 1342 * we'll be less likely to leave this dataset inconsistent & 1343 * nearly destroyed. 1344 */ 1345 err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); 1346 if (err) 1347 return (err); 1348 if (count != 0) 1349 return (EEXIST); 1350 1351 return (0); 1352 } 1353 1354 /* ARGSUSED */ 1355 static void 1356 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1357 { 1358 dsl_dataset_t *ds = arg1; 1359 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1360 1361 /* Mark it as inconsistent on-disk, in case we crash */ 1362 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1363 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1364 1365 spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 1366 cr, "dataset = %llu", ds->ds_object); 1367 } 1368 1369 /* ARGSUSED */ 1370 int 1371 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1372 { 1373 dsl_dataset_t *ds = arg1; 1374 1375 /* we have an owner hold, so noone else can destroy us */ 1376 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 1377 1378 /* Can't delete a branch point. */ 1379 if (ds->ds_phys->ds_num_children > 1) 1380 return (EEXIST); 1381 1382 /* 1383 * Can't delete a head dataset if there are snapshots of it. 1384 * (Except if the only snapshots are from the branch we cloned 1385 * from.) 1386 */ 1387 if (ds->ds_prev != NULL && 1388 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1389 return (EINVAL); 1390 1391 /* 1392 * If we made changes this txg, traverse_dsl_dataset won't find 1393 * them. Try again. 1394 */ 1395 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1396 return (EAGAIN); 1397 1398 /* XXX we should do some i/o error checking... */ 1399 return (0); 1400 } 1401 1402 struct refsarg { 1403 kmutex_t lock; 1404 boolean_t gone; 1405 kcondvar_t cv; 1406 }; 1407 1408 /* ARGSUSED */ 1409 static void 1410 dsl_dataset_refs_gone(dmu_buf_t *db, void *argv) 1411 { 1412 struct refsarg *arg = argv; 1413 1414 mutex_enter(&arg->lock); 1415 arg->gone = TRUE; 1416 cv_signal(&arg->cv); 1417 mutex_exit(&arg->lock); 1418 } 1419 1420 static void 1421 dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag) 1422 { 1423 struct refsarg arg; 1424 1425 mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL); 1426 cv_init(&arg.cv, NULL, CV_DEFAULT, NULL); 1427 arg.gone = FALSE; 1428 (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys, 1429 dsl_dataset_refs_gone); 1430 dmu_buf_rele(ds->ds_dbuf, tag); 1431 mutex_enter(&arg.lock); 1432 while (!arg.gone) 1433 cv_wait(&arg.cv, &arg.lock); 1434 ASSERT(arg.gone); 1435 mutex_exit(&arg.lock); 1436 ds->ds_dbuf = NULL; 1437 ds->ds_phys = NULL; 1438 mutex_destroy(&arg.lock); 1439 cv_destroy(&arg.cv); 1440 } 1441 1442 void 1443 dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 1444 { 1445 dsl_dataset_t *ds = arg1; 1446 zio_t *zio; 1447 int err; 1448 int after_branch_point = FALSE; 1449 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1450 objset_t *mos = dp->dp_meta_objset; 1451 dsl_dataset_t *ds_prev = NULL; 1452 uint64_t obj; 1453 1454 ASSERT(ds->ds_owner); 1455 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 1456 ASSERT(ds->ds_prev == NULL || 1457 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1458 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1459 1460 /* signal any waiters that this dataset is going away */ 1461 mutex_enter(&ds->ds_lock); 1462 ds->ds_owner = dsl_reaper; 1463 cv_broadcast(&ds->ds_exclusive_cv); 1464 mutex_exit(&ds->ds_lock); 1465 1466 /* Remove our reservation */ 1467 if (ds->ds_reserved != 0) { 1468 uint64_t val = 0; 1469 dsl_dataset_set_reservation_sync(ds, &val, cr, tx); 1470 ASSERT3U(ds->ds_reserved, ==, 0); 1471 } 1472 1473 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1474 1475 dsl_pool_ds_destroyed(ds, tx); 1476 1477 obj = ds->ds_object; 1478 1479 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1480 if (ds->ds_prev) { 1481 ds_prev = ds->ds_prev; 1482 } else { 1483 VERIFY(0 == dsl_dataset_hold_obj(dp, 1484 ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); 1485 } 1486 after_branch_point = 1487 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1488 1489 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1490 if (after_branch_point && 1491 ds_prev->ds_phys->ds_next_clones_obj != 0) { 1492 VERIFY(0 == zap_remove_int(mos, 1493 ds_prev->ds_phys->ds_next_clones_obj, obj, tx)); 1494 if (ds->ds_phys->ds_next_snap_obj != 0) { 1495 VERIFY(0 == zap_add_int(mos, 1496 ds_prev->ds_phys->ds_next_clones_obj, 1497 ds->ds_phys->ds_next_snap_obj, tx)); 1498 } 1499 } 1500 if (after_branch_point && 1501 ds->ds_phys->ds_next_snap_obj == 0) { 1502 /* This clone is toast. */ 1503 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1504 ds_prev->ds_phys->ds_num_children--; 1505 } else if (!after_branch_point) { 1506 ds_prev->ds_phys->ds_next_snap_obj = 1507 ds->ds_phys->ds_next_snap_obj; 1508 } 1509 } 1510 1511 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1512 1513 if (ds->ds_phys->ds_next_snap_obj != 0) { 1514 blkptr_t bp; 1515 dsl_dataset_t *ds_next; 1516 uint64_t itor = 0; 1517 uint64_t old_unique; 1518 int64_t used = 0, compressed = 0, uncompressed = 0; 1519 1520 VERIFY(0 == dsl_dataset_hold_obj(dp, 1521 ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); 1522 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1523 1524 old_unique = dsl_dataset_unique(ds_next); 1525 1526 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1527 ds_next->ds_phys->ds_prev_snap_obj = 1528 ds->ds_phys->ds_prev_snap_obj; 1529 ds_next->ds_phys->ds_prev_snap_txg = 1530 ds->ds_phys->ds_prev_snap_txg; 1531 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1532 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1533 1534 /* 1535 * Transfer to our deadlist (which will become next's 1536 * new deadlist) any entries from next's current 1537 * deadlist which were born before prev, and free the 1538 * other entries. 1539 * 1540 * XXX we're doing this long task with the config lock held 1541 */ 1542 while (bplist_iterate(&ds_next->ds_deadlist, &itor, &bp) == 0) { 1543 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1544 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1545 &bp, tx)); 1546 if (ds_prev && !after_branch_point && 1547 bp.blk_birth > 1548 ds_prev->ds_phys->ds_prev_snap_txg) { 1549 ds_prev->ds_phys->ds_unique_bytes += 1550 bp_get_dasize(dp->dp_spa, &bp); 1551 } 1552 } else { 1553 used += bp_get_dasize(dp->dp_spa, &bp); 1554 compressed += BP_GET_PSIZE(&bp); 1555 uncompressed += BP_GET_UCSIZE(&bp); 1556 /* XXX check return value? */ 1557 (void) dsl_free(zio, dp, tx->tx_txg, 1558 &bp, NULL, NULL, ARC_NOWAIT); 1559 } 1560 } 1561 1562 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1563 1564 /* change snapused */ 1565 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 1566 -used, -compressed, -uncompressed, tx); 1567 1568 /* free next's deadlist */ 1569 bplist_close(&ds_next->ds_deadlist); 1570 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1571 1572 /* set next's deadlist to our deadlist */ 1573 bplist_close(&ds->ds_deadlist); 1574 ds_next->ds_phys->ds_deadlist_obj = 1575 ds->ds_phys->ds_deadlist_obj; 1576 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1577 ds_next->ds_phys->ds_deadlist_obj)); 1578 ds->ds_phys->ds_deadlist_obj = 0; 1579 1580 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1581 /* 1582 * Update next's unique to include blocks which 1583 * were previously shared by only this snapshot 1584 * and it. Those blocks will be born after the 1585 * prev snap and before this snap, and will have 1586 * died after the next snap and before the one 1587 * after that (ie. be on the snap after next's 1588 * deadlist). 1589 * 1590 * XXX we're doing this long task with the 1591 * config lock held 1592 */ 1593 dsl_dataset_t *ds_after_next; 1594 uint64_t space; 1595 1596 VERIFY(0 == dsl_dataset_hold_obj(dp, 1597 ds_next->ds_phys->ds_next_snap_obj, 1598 FTAG, &ds_after_next)); 1599 1600 VERIFY(0 == 1601 bplist_space_birthrange(&ds_after_next->ds_deadlist, 1602 ds->ds_phys->ds_prev_snap_txg, 1603 ds->ds_phys->ds_creation_txg, &space)); 1604 ds_next->ds_phys->ds_unique_bytes += space; 1605 1606 dsl_dataset_rele(ds_after_next, FTAG); 1607 ASSERT3P(ds_next->ds_prev, ==, NULL); 1608 } else { 1609 ASSERT3P(ds_next->ds_prev, ==, ds); 1610 dsl_dataset_drop_ref(ds_next->ds_prev, ds_next); 1611 ds_next->ds_prev = NULL; 1612 if (ds_prev) { 1613 VERIFY(0 == dsl_dataset_get_ref(dp, 1614 ds->ds_phys->ds_prev_snap_obj, 1615 ds_next, &ds_next->ds_prev)); 1616 } 1617 1618 dsl_dataset_recalc_head_uniq(ds_next); 1619 1620 /* 1621 * Reduce the amount of our unconsmed refreservation 1622 * being charged to our parent by the amount of 1623 * new unique data we have gained. 1624 */ 1625 if (old_unique < ds_next->ds_reserved) { 1626 int64_t mrsdelta; 1627 uint64_t new_unique = 1628 ds_next->ds_phys->ds_unique_bytes; 1629 1630 ASSERT(old_unique <= new_unique); 1631 mrsdelta = MIN(new_unique - old_unique, 1632 ds_next->ds_reserved - old_unique); 1633 dsl_dir_diduse_space(ds->ds_dir, 1634 DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); 1635 } 1636 } 1637 dsl_dataset_rele(ds_next, FTAG); 1638 } else { 1639 /* 1640 * There's no next snapshot, so this is a head dataset. 1641 * Destroy the deadlist. Unless it's a clone, the 1642 * deadlist should be empty. (If it's a clone, it's 1643 * safe to ignore the deadlist contents.) 1644 */ 1645 struct killarg ka; 1646 1647 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1648 bplist_close(&ds->ds_deadlist); 1649 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1650 ds->ds_phys->ds_deadlist_obj = 0; 1651 1652 /* 1653 * Free everything that we point to (that's born after 1654 * the previous snapshot, if we are a clone) 1655 * 1656 * NB: this should be very quick, because we already 1657 * freed all the objects in open context. 1658 */ 1659 ka.ds = ds; 1660 ka.zio = zio; 1661 ka.tx = tx; 1662 err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1663 TRAVERSE_POST, kill_blkptr, &ka); 1664 ASSERT3U(err, ==, 0); 1665 ASSERT(spa_version(dp->dp_spa) < SPA_VERSION_UNIQUE_ACCURATE || 1666 ds->ds_phys->ds_unique_bytes == 0); 1667 } 1668 1669 err = zio_wait(zio); 1670 ASSERT3U(err, ==, 0); 1671 1672 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1673 /* Erase the link in the dir */ 1674 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1675 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1676 ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); 1677 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1678 ASSERT(err == 0); 1679 } else { 1680 /* remove from snapshot namespace */ 1681 dsl_dataset_t *ds_head; 1682 ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); 1683 VERIFY(0 == dsl_dataset_hold_obj(dp, 1684 ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); 1685 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1686 #ifdef ZFS_DEBUG 1687 { 1688 uint64_t val; 1689 1690 err = dsl_dataset_snap_lookup(ds_head, 1691 ds->ds_snapname, &val); 1692 ASSERT3U(err, ==, 0); 1693 ASSERT3U(val, ==, obj); 1694 } 1695 #endif 1696 err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx); 1697 ASSERT(err == 0); 1698 dsl_dataset_rele(ds_head, FTAG); 1699 } 1700 1701 if (ds_prev && ds->ds_prev != ds_prev) 1702 dsl_dataset_rele(ds_prev, FTAG); 1703 1704 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1705 spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx, 1706 cr, "dataset = %llu", ds->ds_object); 1707 1708 if (ds->ds_phys->ds_next_clones_obj != 0) { 1709 uint64_t count; 1710 ASSERT(0 == zap_count(mos, 1711 ds->ds_phys->ds_next_clones_obj, &count) && count == 0); 1712 VERIFY(0 == dmu_object_free(mos, 1713 ds->ds_phys->ds_next_clones_obj, tx)); 1714 } 1715 if (ds->ds_phys->ds_props_obj != 0) 1716 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); 1717 dsl_dir_close(ds->ds_dir, ds); 1718 ds->ds_dir = NULL; 1719 dsl_dataset_drain_refs(ds, tag); 1720 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1721 } 1722 1723 static int 1724 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1725 { 1726 uint64_t asize; 1727 1728 if (!dmu_tx_is_syncing(tx)) 1729 return (0); 1730 1731 /* 1732 * If there's an fs-only reservation, any blocks that might become 1733 * owned by the snapshot dataset must be accommodated by space 1734 * outside of the reservation. 1735 */ 1736 asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1737 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE)) 1738 return (ENOSPC); 1739 1740 /* 1741 * Propogate any reserved space for this snapshot to other 1742 * snapshot checks in this sync group. 1743 */ 1744 if (asize > 0) 1745 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1746 1747 return (0); 1748 } 1749 1750 /* ARGSUSED */ 1751 int 1752 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1753 { 1754 dsl_dataset_t *ds = arg1; 1755 const char *snapname = arg2; 1756 int err; 1757 uint64_t value; 1758 1759 /* 1760 * We don't allow multiple snapshots of the same txg. If there 1761 * is already one, try again. 1762 */ 1763 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1764 return (EAGAIN); 1765 1766 /* 1767 * Check for conflicting name snapshot name. 1768 */ 1769 err = dsl_dataset_snap_lookup(ds, snapname, &value); 1770 if (err == 0) 1771 return (EEXIST); 1772 if (err != ENOENT) 1773 return (err); 1774 1775 /* 1776 * Check that the dataset's name is not too long. Name consists 1777 * of the dataset's length + 1 for the @-sign + snapshot name's length 1778 */ 1779 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 1780 return (ENAMETOOLONG); 1781 1782 err = dsl_dataset_snapshot_reserve_space(ds, tx); 1783 if (err) 1784 return (err); 1785 1786 ds->ds_trysnap_txg = tx->tx_txg; 1787 return (0); 1788 } 1789 1790 void 1791 dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1792 { 1793 dsl_dataset_t *ds = arg1; 1794 const char *snapname = arg2; 1795 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1796 dmu_buf_t *dbuf; 1797 dsl_dataset_phys_t *dsphys; 1798 uint64_t dsobj, crtxg; 1799 objset_t *mos = dp->dp_meta_objset; 1800 int err; 1801 1802 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1803 1804 /* 1805 * The origin's ds_creation_txg has to be < TXG_INITIAL 1806 */ 1807 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1808 crtxg = 1; 1809 else 1810 crtxg = tx->tx_txg; 1811 1812 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1813 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1814 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1815 dmu_buf_will_dirty(dbuf, tx); 1816 dsphys = dbuf->db_data; 1817 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 1818 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1819 dsphys->ds_fsid_guid = unique_create(); 1820 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1821 sizeof (dsphys->ds_guid)); 1822 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1823 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1824 dsphys->ds_next_snap_obj = ds->ds_object; 1825 dsphys->ds_num_children = 1; 1826 dsphys->ds_creation_time = gethrestime_sec(); 1827 dsphys->ds_creation_txg = crtxg; 1828 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1829 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1830 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1831 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1832 dsphys->ds_flags = ds->ds_phys->ds_flags; 1833 dsphys->ds_bp = ds->ds_phys->ds_bp; 1834 dmu_buf_rele(dbuf, FTAG); 1835 1836 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1837 if (ds->ds_prev) { 1838 uint64_t next_clones_obj = 1839 ds->ds_prev->ds_phys->ds_next_clones_obj; 1840 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1841 ds->ds_object || 1842 ds->ds_prev->ds_phys->ds_num_children > 1); 1843 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1844 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1845 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1846 ds->ds_prev->ds_phys->ds_creation_txg); 1847 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1848 } else if (next_clones_obj != 0) { 1849 VERIFY3U(0, ==, zap_remove_int(mos, 1850 next_clones_obj, dsphys->ds_next_snap_obj, tx)); 1851 VERIFY3U(0, ==, zap_add_int(mos, 1852 next_clones_obj, dsobj, tx)); 1853 } 1854 } 1855 1856 /* 1857 * If we have a reference-reservation on this dataset, we will 1858 * need to increase the amount of refreservation being charged 1859 * since our unique space is going to zero. 1860 */ 1861 if (ds->ds_reserved) { 1862 int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1863 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1864 add, 0, 0, tx); 1865 } 1866 1867 bplist_close(&ds->ds_deadlist); 1868 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1869 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 1870 ds->ds_phys->ds_prev_snap_obj = dsobj; 1871 ds->ds_phys->ds_prev_snap_txg = crtxg; 1872 ds->ds_phys->ds_unique_bytes = 0; 1873 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1874 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1875 ds->ds_phys->ds_deadlist_obj = 1876 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1877 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1878 ds->ds_phys->ds_deadlist_obj)); 1879 1880 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1881 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1882 snapname, 8, 1, &dsobj, tx); 1883 ASSERT(err == 0); 1884 1885 if (ds->ds_prev) 1886 dsl_dataset_drop_ref(ds->ds_prev, ds); 1887 VERIFY(0 == dsl_dataset_get_ref(dp, 1888 ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 1889 1890 dsl_pool_ds_snapshotted(ds, tx); 1891 1892 spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr, 1893 "dataset = %llu", dsobj); 1894 } 1895 1896 void 1897 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1898 { 1899 ASSERT(dmu_tx_is_syncing(tx)); 1900 ASSERT(ds->ds_user_ptr != NULL); 1901 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1902 1903 /* 1904 * in case we had to change ds_fsid_guid when we opened it, 1905 * sync it out now. 1906 */ 1907 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1908 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1909 1910 dsl_dir_dirty(ds->ds_dir, tx); 1911 dmu_objset_sync(ds->ds_user_ptr, zio, tx); 1912 } 1913 1914 void 1915 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1916 { 1917 uint64_t refd, avail, uobjs, aobjs; 1918 1919 dsl_dir_stats(ds->ds_dir, nv); 1920 1921 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 1922 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 1923 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 1924 1925 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 1926 ds->ds_phys->ds_creation_time); 1927 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 1928 ds->ds_phys->ds_creation_txg); 1929 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 1930 ds->ds_quota); 1931 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 1932 ds->ds_reserved); 1933 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 1934 ds->ds_phys->ds_guid); 1935 1936 if (ds->ds_phys->ds_next_snap_obj) { 1937 /* 1938 * This is a snapshot; override the dd's space used with 1939 * our unique space and compression ratio. 1940 */ 1941 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 1942 ds->ds_phys->ds_unique_bytes); 1943 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 1944 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 1945 (ds->ds_phys->ds_uncompressed_bytes * 100 / 1946 ds->ds_phys->ds_compressed_bytes)); 1947 } 1948 } 1949 1950 void 1951 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 1952 { 1953 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 1954 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 1955 stat->dds_guid = ds->ds_phys->ds_guid; 1956 if (ds->ds_phys->ds_next_snap_obj) { 1957 stat->dds_is_snapshot = B_TRUE; 1958 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 1959 } else { 1960 stat->dds_is_snapshot = B_FALSE; 1961 stat->dds_num_clones = 0; 1962 } 1963 1964 /* clone origin is really a dsl_dir thing... */ 1965 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 1966 if (dsl_dir_is_clone(ds->ds_dir)) { 1967 dsl_dataset_t *ods; 1968 1969 VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, 1970 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 1971 dsl_dataset_name(ods, stat->dds_origin); 1972 dsl_dataset_drop_ref(ods, FTAG); 1973 } else { 1974 stat->dds_origin[0] = '\0'; 1975 } 1976 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 1977 } 1978 1979 uint64_t 1980 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 1981 { 1982 return (ds->ds_fsid_guid); 1983 } 1984 1985 void 1986 dsl_dataset_space(dsl_dataset_t *ds, 1987 uint64_t *refdbytesp, uint64_t *availbytesp, 1988 uint64_t *usedobjsp, uint64_t *availobjsp) 1989 { 1990 *refdbytesp = ds->ds_phys->ds_used_bytes; 1991 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 1992 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 1993 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 1994 if (ds->ds_quota != 0) { 1995 /* 1996 * Adjust available bytes according to refquota 1997 */ 1998 if (*refdbytesp < ds->ds_quota) 1999 *availbytesp = MIN(*availbytesp, 2000 ds->ds_quota - *refdbytesp); 2001 else 2002 *availbytesp = 0; 2003 } 2004 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 2005 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 2006 } 2007 2008 boolean_t 2009 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 2010 { 2011 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2012 2013 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 2014 dsl_pool_sync_context(dp)); 2015 if (ds->ds_prev == NULL) 2016 return (B_FALSE); 2017 if (ds->ds_phys->ds_bp.blk_birth > 2018 ds->ds_prev->ds_phys->ds_creation_txg) 2019 return (B_TRUE); 2020 return (B_FALSE); 2021 } 2022 2023 /* ARGSUSED */ 2024 static int 2025 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 2026 { 2027 dsl_dataset_t *ds = arg1; 2028 char *newsnapname = arg2; 2029 dsl_dir_t *dd = ds->ds_dir; 2030 dsl_dataset_t *hds; 2031 uint64_t val; 2032 int err; 2033 2034 err = dsl_dataset_hold_obj(dd->dd_pool, 2035 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds); 2036 if (err) 2037 return (err); 2038 2039 /* new name better not be in use */ 2040 err = dsl_dataset_snap_lookup(hds, newsnapname, &val); 2041 dsl_dataset_rele(hds, FTAG); 2042 2043 if (err == 0) 2044 err = EEXIST; 2045 else if (err == ENOENT) 2046 err = 0; 2047 2048 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 2049 if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 2050 err = ENAMETOOLONG; 2051 2052 return (err); 2053 } 2054 2055 static void 2056 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, 2057 cred_t *cr, dmu_tx_t *tx) 2058 { 2059 dsl_dataset_t *ds = arg1; 2060 const char *newsnapname = arg2; 2061 dsl_dir_t *dd = ds->ds_dir; 2062 objset_t *mos = dd->dd_pool->dp_meta_objset; 2063 dsl_dataset_t *hds; 2064 int err; 2065 2066 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 2067 2068 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, 2069 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds)); 2070 2071 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2072 err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx); 2073 ASSERT3U(err, ==, 0); 2074 mutex_enter(&ds->ds_lock); 2075 (void) strcpy(ds->ds_snapname, newsnapname); 2076 mutex_exit(&ds->ds_lock); 2077 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 2078 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 2079 ASSERT3U(err, ==, 0); 2080 2081 spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 2082 cr, "dataset = %llu", ds->ds_object); 2083 dsl_dataset_rele(hds, FTAG); 2084 } 2085 2086 struct renamesnaparg { 2087 dsl_sync_task_group_t *dstg; 2088 char failed[MAXPATHLEN]; 2089 char *oldsnap; 2090 char *newsnap; 2091 }; 2092 2093 static int 2094 dsl_snapshot_rename_one(char *name, void *arg) 2095 { 2096 struct renamesnaparg *ra = arg; 2097 dsl_dataset_t *ds = NULL; 2098 char *cp; 2099 int err; 2100 2101 cp = name + strlen(name); 2102 *cp = '@'; 2103 (void) strcpy(cp + 1, ra->oldsnap); 2104 2105 /* 2106 * For recursive snapshot renames the parent won't be changing 2107 * so we just pass name for both the to/from argument. 2108 */ 2109 err = zfs_secpolicy_rename_perms(name, name, CRED()); 2110 if (err == ENOENT) { 2111 return (0); 2112 } else if (err) { 2113 (void) strcpy(ra->failed, name); 2114 return (err); 2115 } 2116 2117 #ifdef _KERNEL 2118 /* 2119 * For all filesystems undergoing rename, we'll need to unmount it. 2120 */ 2121 (void) zfs_unmount_snap(name, NULL); 2122 #endif 2123 err = dsl_dataset_hold(name, ra->dstg, &ds); 2124 *cp = '\0'; 2125 if (err == ENOENT) { 2126 return (0); 2127 } else if (err) { 2128 (void) strcpy(ra->failed, name); 2129 return (err); 2130 } 2131 2132 dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 2133 dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 2134 2135 return (0); 2136 } 2137 2138 static int 2139 dsl_recursive_rename(char *oldname, const char *newname) 2140 { 2141 int err; 2142 struct renamesnaparg *ra; 2143 dsl_sync_task_t *dst; 2144 spa_t *spa; 2145 char *cp, *fsname = spa_strdup(oldname); 2146 int len = strlen(oldname); 2147 2148 /* truncate the snapshot name to get the fsname */ 2149 cp = strchr(fsname, '@'); 2150 *cp = '\0'; 2151 2152 err = spa_open(fsname, &spa, FTAG); 2153 if (err) { 2154 kmem_free(fsname, len + 1); 2155 return (err); 2156 } 2157 ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); 2158 ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 2159 2160 ra->oldsnap = strchr(oldname, '@') + 1; 2161 ra->newsnap = strchr(newname, '@') + 1; 2162 *ra->failed = '\0'; 2163 2164 err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 2165 DS_FIND_CHILDREN); 2166 kmem_free(fsname, len + 1); 2167 2168 if (err == 0) { 2169 err = dsl_sync_task_group_wait(ra->dstg); 2170 } 2171 2172 for (dst = list_head(&ra->dstg->dstg_tasks); dst; 2173 dst = list_next(&ra->dstg->dstg_tasks, dst)) { 2174 dsl_dataset_t *ds = dst->dst_arg1; 2175 if (dst->dst_err) { 2176 dsl_dir_name(ds->ds_dir, ra->failed); 2177 (void) strcat(ra->failed, "@"); 2178 (void) strcat(ra->failed, ra->newsnap); 2179 } 2180 dsl_dataset_rele(ds, ra->dstg); 2181 } 2182 2183 if (err) 2184 (void) strcpy(oldname, ra->failed); 2185 2186 dsl_sync_task_group_destroy(ra->dstg); 2187 kmem_free(ra, sizeof (struct renamesnaparg)); 2188 spa_close(spa, FTAG); 2189 return (err); 2190 } 2191 2192 static int 2193 dsl_valid_rename(char *oldname, void *arg) 2194 { 2195 int delta = *(int *)arg; 2196 2197 if (strlen(oldname) + delta >= MAXNAMELEN) 2198 return (ENAMETOOLONG); 2199 2200 return (0); 2201 } 2202 2203 #pragma weak dmu_objset_rename = dsl_dataset_rename 2204 int 2205 dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive) 2206 { 2207 dsl_dir_t *dd; 2208 dsl_dataset_t *ds; 2209 const char *tail; 2210 int err; 2211 2212 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 2213 if (err) 2214 return (err); 2215 /* 2216 * If there are more than 2 references there may be holds 2217 * hanging around that haven't been cleared out yet. 2218 */ 2219 if (dmu_buf_refcount(dd->dd_dbuf) > 2) 2220 txg_wait_synced(dd->dd_pool, 0); 2221 if (tail == NULL) { 2222 int delta = strlen(newname) - strlen(oldname); 2223 2224 /* if we're growing, validate child name lengths */ 2225 if (delta > 0) 2226 err = dmu_objset_find(oldname, dsl_valid_rename, 2227 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 2228 2229 if (!err) 2230 err = dsl_dir_rename(dd, newname); 2231 dsl_dir_close(dd, FTAG); 2232 return (err); 2233 } 2234 if (tail[0] != '@') { 2235 /* the name ended in a nonexistant component */ 2236 dsl_dir_close(dd, FTAG); 2237 return (ENOENT); 2238 } 2239 2240 dsl_dir_close(dd, FTAG); 2241 2242 /* new name must be snapshot in same filesystem */ 2243 tail = strchr(newname, '@'); 2244 if (tail == NULL) 2245 return (EINVAL); 2246 tail++; 2247 if (strncmp(oldname, newname, tail - newname) != 0) 2248 return (EXDEV); 2249 2250 if (recursive) { 2251 err = dsl_recursive_rename(oldname, newname); 2252 } else { 2253 err = dsl_dataset_hold(oldname, FTAG, &ds); 2254 if (err) 2255 return (err); 2256 2257 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2258 dsl_dataset_snapshot_rename_check, 2259 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 2260 2261 dsl_dataset_rele(ds, FTAG); 2262 } 2263 2264 return (err); 2265 } 2266 2267 struct promotenode { 2268 list_node_t link; 2269 dsl_dataset_t *ds; 2270 }; 2271 2272 struct promotearg { 2273 list_t shared_snaps, origin_snaps, clone_snaps; 2274 dsl_dataset_t *origin_origin, *origin_head; 2275 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 2276 }; 2277 2278 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2279 2280 /* ARGSUSED */ 2281 static int 2282 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 2283 { 2284 dsl_dataset_t *hds = arg1; 2285 struct promotearg *pa = arg2; 2286 struct promotenode *snap = list_head(&pa->shared_snaps); 2287 dsl_dataset_t *origin_ds = snap->ds; 2288 int err; 2289 2290 /* Check that it is a real clone */ 2291 if (!dsl_dir_is_clone(hds->ds_dir)) 2292 return (EINVAL); 2293 2294 /* Since this is so expensive, don't do the preliminary check */ 2295 if (!dmu_tx_is_syncing(tx)) 2296 return (0); 2297 2298 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) 2299 return (EXDEV); 2300 2301 /* compute origin's new unique space */ 2302 snap = list_tail(&pa->clone_snaps); 2303 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2304 err = bplist_space_birthrange(&snap->ds->ds_deadlist, 2305 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, &pa->unique); 2306 if (err) 2307 return (err); 2308 2309 /* 2310 * Walk the snapshots that we are moving 2311 * 2312 * Compute space to transfer. Consider the incremental changes 2313 * to used for each snapshot: 2314 * (my used) = (prev's used) + (blocks born) - (blocks killed) 2315 * So each snapshot gave birth to: 2316 * (blocks born) = (my used) - (prev's used) + (blocks killed) 2317 * So a sequence would look like: 2318 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2319 * Which simplifies to: 2320 * uN + kN + kN-1 + ... + k1 + k0 2321 * Note however, if we stop before we reach the ORIGIN we get: 2322 * uN + kN + kN-1 + ... + kM - uM-1 2323 */ 2324 pa->used = origin_ds->ds_phys->ds_used_bytes; 2325 pa->comp = origin_ds->ds_phys->ds_compressed_bytes; 2326 pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 2327 for (snap = list_head(&pa->shared_snaps); snap; 2328 snap = list_next(&pa->shared_snaps, snap)) { 2329 uint64_t val, dlused, dlcomp, dluncomp; 2330 dsl_dataset_t *ds = snap->ds; 2331 2332 /* Check that the snapshot name does not conflict */ 2333 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2334 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2335 if (err == 0) 2336 return (EEXIST); 2337 if (err != ENOENT) 2338 return (err); 2339 2340 /* The very first snapshot does not have a deadlist */ 2341 if (ds->ds_phys->ds_prev_snap_obj == 0) 2342 continue; 2343 2344 if (err = bplist_space(&ds->ds_deadlist, 2345 &dlused, &dlcomp, &dluncomp)) 2346 return (err); 2347 pa->used += dlused; 2348 pa->comp += dlcomp; 2349 pa->uncomp += dluncomp; 2350 } 2351 2352 /* 2353 * If we are a clone of a clone then we never reached ORIGIN, 2354 * so we need to subtract out the clone origin's used space. 2355 */ 2356 if (pa->origin_origin) { 2357 pa->used -= pa->origin_origin->ds_phys->ds_used_bytes; 2358 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; 2359 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; 2360 } 2361 2362 /* Check that there is enough space here */ 2363 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2364 pa->used); 2365 if (err) 2366 return (err); 2367 2368 /* 2369 * Compute the amounts of space that will be used by snapshots 2370 * after the promotion (for both origin and clone). For each, 2371 * it is the amount of space that will be on all of their 2372 * deadlists (that was not born before their new origin). 2373 */ 2374 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2375 uint64_t space; 2376 2377 /* 2378 * Note, typically this will not be a clone of a clone, 2379 * so snap->ds->ds_origin_txg will be < TXG_INITIAL, so 2380 * these snaplist_space() -> bplist_space_birthrange() 2381 * calls will be fast because they do not have to 2382 * iterate over all bps. 2383 */ 2384 snap = list_head(&pa->origin_snaps); 2385 err = snaplist_space(&pa->shared_snaps, 2386 snap->ds->ds_origin_txg, &pa->cloneusedsnap); 2387 if (err) 2388 return (err); 2389 2390 err = snaplist_space(&pa->clone_snaps, 2391 snap->ds->ds_origin_txg, &space); 2392 if (err) 2393 return (err); 2394 pa->cloneusedsnap += space; 2395 } 2396 if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2397 err = snaplist_space(&pa->origin_snaps, 2398 origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap); 2399 if (err) 2400 return (err); 2401 } 2402 2403 return (0); 2404 } 2405 2406 static void 2407 dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2408 { 2409 dsl_dataset_t *hds = arg1; 2410 struct promotearg *pa = arg2; 2411 struct promotenode *snap = list_head(&pa->shared_snaps); 2412 dsl_dataset_t *origin_ds = snap->ds; 2413 dsl_dataset_t *origin_head; 2414 dsl_dir_t *dd = hds->ds_dir; 2415 dsl_pool_t *dp = hds->ds_dir->dd_pool; 2416 dsl_dir_t *odd = NULL; 2417 uint64_t oldnext_obj; 2418 int64_t delta; 2419 2420 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 2421 2422 snap = list_head(&pa->origin_snaps); 2423 origin_head = snap->ds; 2424 2425 /* 2426 * We need to explicitly open odd, since origin_ds's dd will be 2427 * changing. 2428 */ 2429 VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, 2430 NULL, FTAG, &odd)); 2431 2432 /* change origin's next snap */ 2433 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2434 oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2435 snap = list_tail(&pa->clone_snaps); 2436 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2437 origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2438 2439 /* change the origin's next clone */ 2440 if (origin_ds->ds_phys->ds_next_clones_obj) { 2441 VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, 2442 origin_ds->ds_phys->ds_next_clones_obj, 2443 origin_ds->ds_phys->ds_next_snap_obj, tx)); 2444 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 2445 origin_ds->ds_phys->ds_next_clones_obj, 2446 oldnext_obj, tx)); 2447 } 2448 2449 /* change origin */ 2450 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2451 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2452 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2453 hds->ds_origin_txg = origin_head->ds_origin_txg; 2454 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2455 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2456 origin_head->ds_origin_txg = origin_ds->ds_phys->ds_creation_txg; 2457 2458 /* move snapshots to this dir */ 2459 for (snap = list_head(&pa->shared_snaps); snap; 2460 snap = list_next(&pa->shared_snaps, snap)) { 2461 dsl_dataset_t *ds = snap->ds; 2462 2463 /* unregister props as dsl_dir is changing */ 2464 if (ds->ds_user_ptr) { 2465 ds->ds_user_evict_func(ds, ds->ds_user_ptr); 2466 ds->ds_user_ptr = NULL; 2467 } 2468 /* move snap name entry */ 2469 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2470 VERIFY(0 == dsl_dataset_snap_remove(origin_head, 2471 ds->ds_snapname, tx)); 2472 VERIFY(0 == zap_add(dp->dp_meta_objset, 2473 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2474 8, 1, &ds->ds_object, tx)); 2475 /* change containing dsl_dir */ 2476 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2477 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2478 ds->ds_phys->ds_dir_obj = dd->dd_object; 2479 ASSERT3P(ds->ds_dir, ==, odd); 2480 dsl_dir_close(ds->ds_dir, ds); 2481 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 2482 NULL, ds, &ds->ds_dir)); 2483 2484 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 2485 } 2486 2487 /* 2488 * Change space accounting. 2489 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2490 * both be valid, or both be 0 (resulting in delta == 0). This 2491 * is true for each of {clone,origin} independently. 2492 */ 2493 2494 delta = pa->cloneusedsnap - 2495 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2496 ASSERT3S(delta, >=, 0); 2497 ASSERT3U(pa->used, >=, delta); 2498 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2499 dsl_dir_diduse_space(dd, DD_USED_HEAD, 2500 pa->used - delta, pa->comp, pa->uncomp, tx); 2501 2502 delta = pa->originusedsnap - 2503 odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2504 ASSERT3S(delta, <=, 0); 2505 ASSERT3U(pa->used, >=, -delta); 2506 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2507 dsl_dir_diduse_space(odd, DD_USED_HEAD, 2508 -pa->used - delta, -pa->comp, -pa->uncomp, tx); 2509 2510 origin_ds->ds_phys->ds_unique_bytes = pa->unique; 2511 2512 /* log history record */ 2513 spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 2514 cr, "dataset = %llu", hds->ds_object); 2515 2516 dsl_dir_close(odd, FTAG); 2517 } 2518 2519 static char *snaplist_tag = "snaplist"; 2520 /* 2521 * Make a list of dsl_dataset_t's for the snapshots between first_obj 2522 * (exclusive) and last_obj (inclusive). The list will be in reverse 2523 * order (last_obj will be the list_head()). If first_obj == 0, do all 2524 * snapshots back to this dataset's origin. 2525 */ 2526 static int 2527 snaplist_make(dsl_pool_t *dp, boolean_t own, 2528 uint64_t first_obj, uint64_t last_obj, list_t *l) 2529 { 2530 uint64_t obj = last_obj; 2531 2532 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock)); 2533 2534 list_create(l, sizeof (struct promotenode), 2535 offsetof(struct promotenode, link)); 2536 2537 while (obj != first_obj) { 2538 dsl_dataset_t *ds; 2539 struct promotenode *snap; 2540 int err; 2541 2542 if (own) { 2543 err = dsl_dataset_own_obj(dp, obj, 2544 0, snaplist_tag, &ds); 2545 if (err == 0) 2546 dsl_dataset_make_exclusive(ds, snaplist_tag); 2547 } else { 2548 err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds); 2549 } 2550 if (err == ENOENT) { 2551 /* lost race with snapshot destroy */ 2552 struct promotenode *last = list_tail(l); 2553 ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj); 2554 obj = last->ds->ds_phys->ds_prev_snap_obj; 2555 continue; 2556 } else if (err) { 2557 return (err); 2558 } 2559 2560 if (first_obj == 0) 2561 first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2562 2563 snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP); 2564 snap->ds = ds; 2565 list_insert_tail(l, snap); 2566 obj = ds->ds_phys->ds_prev_snap_obj; 2567 } 2568 2569 return (0); 2570 } 2571 2572 static int 2573 snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2574 { 2575 struct promotenode *snap; 2576 2577 *spacep = 0; 2578 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2579 uint64_t used; 2580 int err = bplist_space_birthrange(&snap->ds->ds_deadlist, 2581 mintxg, UINT64_MAX, &used); 2582 if (err) 2583 return (err); 2584 *spacep += used; 2585 } 2586 return (0); 2587 } 2588 2589 static void 2590 snaplist_destroy(list_t *l, boolean_t own) 2591 { 2592 struct promotenode *snap; 2593 2594 if (!l || !list_link_active(&l->list_head)) 2595 return; 2596 2597 while ((snap = list_tail(l)) != NULL) { 2598 list_remove(l, snap); 2599 if (own) 2600 dsl_dataset_disown(snap->ds, snaplist_tag); 2601 else 2602 dsl_dataset_rele(snap->ds, snaplist_tag); 2603 kmem_free(snap, sizeof (struct promotenode)); 2604 } 2605 list_destroy(l); 2606 } 2607 2608 /* 2609 * Promote a clone. Nomenclature note: 2610 * "clone" or "cds": the original clone which is being promoted 2611 * "origin" or "ods": the snapshot which is originally clone's origin 2612 * "origin head" or "ohds": the dataset which is the head 2613 * (filesystem/volume) for the origin 2614 * "origin origin": the origin of the origin's filesystem (typically 2615 * NULL, indicating that the clone is not a clone of a clone). 2616 */ 2617 int 2618 dsl_dataset_promote(const char *name) 2619 { 2620 dsl_dataset_t *ds; 2621 dsl_dir_t *dd; 2622 dsl_pool_t *dp; 2623 dmu_object_info_t doi; 2624 struct promotearg pa = { 0 }; 2625 struct promotenode *snap; 2626 int err; 2627 2628 err = dsl_dataset_hold(name, FTAG, &ds); 2629 if (err) 2630 return (err); 2631 dd = ds->ds_dir; 2632 dp = dd->dd_pool; 2633 2634 err = dmu_object_info(dp->dp_meta_objset, 2635 ds->ds_phys->ds_snapnames_zapobj, &doi); 2636 if (err) { 2637 dsl_dataset_rele(ds, FTAG); 2638 return (err); 2639 } 2640 2641 if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) { 2642 dsl_dataset_rele(ds, FTAG); 2643 return (EINVAL); 2644 } 2645 2646 /* 2647 * We are going to inherit all the snapshots taken before our 2648 * origin (i.e., our new origin will be our parent's origin). 2649 * Take ownership of them so that we can rename them into our 2650 * namespace. 2651 */ 2652 rw_enter(&dp->dp_config_rwlock, RW_READER); 2653 2654 err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj, 2655 &pa.shared_snaps); 2656 if (err != 0) 2657 goto out; 2658 2659 err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps); 2660 if (err != 0) 2661 goto out; 2662 2663 snap = list_head(&pa.shared_snaps); 2664 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 2665 err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj, 2666 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps); 2667 if (err != 0) 2668 goto out; 2669 2670 if (dsl_dir_is_clone(snap->ds->ds_dir)) { 2671 err = dsl_dataset_own_obj(dp, 2672 snap->ds->ds_dir->dd_phys->dd_origin_obj, 2673 0, FTAG, &pa.origin_origin); 2674 if (err != 0) 2675 goto out; 2676 } 2677 2678 out: 2679 rw_exit(&dp->dp_config_rwlock); 2680 2681 /* 2682 * Add in 128x the snapnames zapobj size, since we will be moving 2683 * a bunch of snapnames to the promoted ds, and dirtying their 2684 * bonus buffers. 2685 */ 2686 if (err == 0) { 2687 err = dsl_sync_task_do(dp, dsl_dataset_promote_check, 2688 dsl_dataset_promote_sync, ds, &pa, 2689 2 + 2 * doi.doi_physical_blks); 2690 } 2691 2692 snaplist_destroy(&pa.shared_snaps, B_TRUE); 2693 snaplist_destroy(&pa.clone_snaps, B_FALSE); 2694 snaplist_destroy(&pa.origin_snaps, B_FALSE); 2695 if (pa.origin_origin) 2696 dsl_dataset_disown(pa.origin_origin, FTAG); 2697 dsl_dataset_rele(ds, FTAG); 2698 return (err); 2699 } 2700 2701 struct cloneswaparg { 2702 dsl_dataset_t *cds; /* clone dataset */ 2703 dsl_dataset_t *ohds; /* origin's head dataset */ 2704 boolean_t force; 2705 int64_t unused_refres_delta; /* change in unconsumed refreservation */ 2706 }; 2707 2708 /* ARGSUSED */ 2709 static int 2710 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) 2711 { 2712 struct cloneswaparg *csa = arg1; 2713 2714 /* they should both be heads */ 2715 if (dsl_dataset_is_snapshot(csa->cds) || 2716 dsl_dataset_is_snapshot(csa->ohds)) 2717 return (EINVAL); 2718 2719 /* the branch point should be just before them */ 2720 if (csa->cds->ds_prev != csa->ohds->ds_prev) 2721 return (EINVAL); 2722 2723 /* cds should be the clone */ 2724 if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj != 2725 csa->ohds->ds_object) 2726 return (EINVAL); 2727 2728 /* the clone should be a child of the origin */ 2729 if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) 2730 return (EINVAL); 2731 2732 /* ohds shouldn't be modified unless 'force' */ 2733 if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) 2734 return (ETXTBSY); 2735 2736 /* adjust amount of any unconsumed refreservation */ 2737 csa->unused_refres_delta = 2738 (int64_t)MIN(csa->ohds->ds_reserved, 2739 csa->ohds->ds_phys->ds_unique_bytes) - 2740 (int64_t)MIN(csa->ohds->ds_reserved, 2741 csa->cds->ds_phys->ds_unique_bytes); 2742 2743 if (csa->unused_refres_delta > 0 && 2744 csa->unused_refres_delta > 2745 dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) 2746 return (ENOSPC); 2747 2748 return (0); 2749 } 2750 2751 /* ARGSUSED */ 2752 static void 2753 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2754 { 2755 struct cloneswaparg *csa = arg1; 2756 dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; 2757 2758 ASSERT(csa->cds->ds_reserved == 0); 2759 ASSERT(csa->cds->ds_quota == csa->ohds->ds_quota); 2760 2761 dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); 2762 dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); 2763 dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx); 2764 2765 if (csa->cds->ds_user_ptr != NULL) { 2766 csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr); 2767 csa->cds->ds_user_ptr = NULL; 2768 } 2769 2770 if (csa->ohds->ds_user_ptr != NULL) { 2771 csa->ohds->ds_user_evict_func(csa->ohds, 2772 csa->ohds->ds_user_ptr); 2773 csa->ohds->ds_user_ptr = NULL; 2774 } 2775 2776 /* reset origin's unique bytes */ 2777 VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, 2778 csa->cds->ds_prev->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2779 &csa->cds->ds_prev->ds_phys->ds_unique_bytes)); 2780 2781 /* swap blkptrs */ 2782 { 2783 blkptr_t tmp; 2784 tmp = csa->ohds->ds_phys->ds_bp; 2785 csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; 2786 csa->cds->ds_phys->ds_bp = tmp; 2787 } 2788 2789 /* set dd_*_bytes */ 2790 { 2791 int64_t dused, dcomp, duncomp; 2792 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2793 uint64_t odl_used, odl_comp, odl_uncomp; 2794 2795 ASSERT3U(csa->cds->ds_dir->dd_phys-> 2796 dd_used_breakdown[DD_USED_SNAP], ==, 0); 2797 2798 VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used, 2799 &cdl_comp, &cdl_uncomp)); 2800 VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used, 2801 &odl_comp, &odl_uncomp)); 2802 2803 dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - 2804 (csa->ohds->ds_phys->ds_used_bytes + odl_used); 2805 dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - 2806 (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); 2807 duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + 2808 cdl_uncomp - 2809 (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2810 2811 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD, 2812 dused, dcomp, duncomp, tx); 2813 dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD, 2814 -dused, -dcomp, -duncomp, tx); 2815 2816 /* 2817 * The difference in the space used by snapshots is the 2818 * difference in snapshot space due to the head's 2819 * deadlist (since that's the only thing that's 2820 * changing that affects the snapused). 2821 */ 2822 VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, 2823 csa->ohds->ds_origin_txg, UINT64_MAX, &cdl_used)); 2824 VERIFY(0 == bplist_space_birthrange(&csa->ohds->ds_deadlist, 2825 csa->ohds->ds_origin_txg, UINT64_MAX, &odl_used)); 2826 dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used, 2827 DD_USED_HEAD, DD_USED_SNAP, tx); 2828 } 2829 2830 #define SWITCH64(x, y) \ 2831 { \ 2832 uint64_t __tmp = (x); \ 2833 (x) = (y); \ 2834 (y) = __tmp; \ 2835 } 2836 2837 /* swap ds_*_bytes */ 2838 SWITCH64(csa->ohds->ds_phys->ds_used_bytes, 2839 csa->cds->ds_phys->ds_used_bytes); 2840 SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, 2841 csa->cds->ds_phys->ds_compressed_bytes); 2842 SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, 2843 csa->cds->ds_phys->ds_uncompressed_bytes); 2844 SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, 2845 csa->cds->ds_phys->ds_unique_bytes); 2846 2847 /* apply any parent delta for change in unconsumed refreservation */ 2848 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV, 2849 csa->unused_refres_delta, 0, 0, tx); 2850 2851 /* swap deadlists */ 2852 bplist_close(&csa->cds->ds_deadlist); 2853 bplist_close(&csa->ohds->ds_deadlist); 2854 SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, 2855 csa->cds->ds_phys->ds_deadlist_obj); 2856 VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, 2857 csa->cds->ds_phys->ds_deadlist_obj)); 2858 VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, 2859 csa->ohds->ds_phys->ds_deadlist_obj)); 2860 2861 dsl_pool_ds_clone_swapped(csa->ohds, csa->cds, tx); 2862 } 2863 2864 /* 2865 * Swap 'clone' with its origin head file system. Used at the end 2866 * of "online recv" to swizzle the file system to the new version. 2867 */ 2868 int 2869 dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, 2870 boolean_t force) 2871 { 2872 struct cloneswaparg csa; 2873 int error; 2874 2875 ASSERT(clone->ds_owner); 2876 ASSERT(origin_head->ds_owner); 2877 retry: 2878 /* Need exclusive access for the swap */ 2879 rw_enter(&clone->ds_rwlock, RW_WRITER); 2880 if (!rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) { 2881 rw_exit(&clone->ds_rwlock); 2882 rw_enter(&origin_head->ds_rwlock, RW_WRITER); 2883 if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) { 2884 rw_exit(&origin_head->ds_rwlock); 2885 goto retry; 2886 } 2887 } 2888 csa.cds = clone; 2889 csa.ohds = origin_head; 2890 csa.force = force; 2891 error = dsl_sync_task_do(clone->ds_dir->dd_pool, 2892 dsl_dataset_clone_swap_check, 2893 dsl_dataset_clone_swap_sync, &csa, NULL, 9); 2894 return (error); 2895 } 2896 2897 /* 2898 * Given a pool name and a dataset object number in that pool, 2899 * return the name of that dataset. 2900 */ 2901 int 2902 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2903 { 2904 spa_t *spa; 2905 dsl_pool_t *dp; 2906 dsl_dataset_t *ds; 2907 int error; 2908 2909 if ((error = spa_open(pname, &spa, FTAG)) != 0) 2910 return (error); 2911 dp = spa_get_dsl(spa); 2912 rw_enter(&dp->dp_config_rwlock, RW_READER); 2913 if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) { 2914 dsl_dataset_name(ds, buf); 2915 dsl_dataset_rele(ds, FTAG); 2916 } 2917 rw_exit(&dp->dp_config_rwlock); 2918 spa_close(spa, FTAG); 2919 2920 return (error); 2921 } 2922 2923 int 2924 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 2925 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 2926 { 2927 int error = 0; 2928 2929 ASSERT3S(asize, >, 0); 2930 2931 /* 2932 * *ref_rsrv is the portion of asize that will come from any 2933 * unconsumed refreservation space. 2934 */ 2935 *ref_rsrv = 0; 2936 2937 mutex_enter(&ds->ds_lock); 2938 /* 2939 * Make a space adjustment for reserved bytes. 2940 */ 2941 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 2942 ASSERT3U(*used, >=, 2943 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2944 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 2945 *ref_rsrv = 2946 asize - MIN(asize, parent_delta(ds, asize + inflight)); 2947 } 2948 2949 if (!check_quota || ds->ds_quota == 0) { 2950 mutex_exit(&ds->ds_lock); 2951 return (0); 2952 } 2953 /* 2954 * If they are requesting more space, and our current estimate 2955 * is over quota, they get to try again unless the actual 2956 * on-disk is over quota and there are no pending changes (which 2957 * may free up space for us). 2958 */ 2959 if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { 2960 if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) 2961 error = ERESTART; 2962 else 2963 error = EDQUOT; 2964 } 2965 mutex_exit(&ds->ds_lock); 2966 2967 return (error); 2968 } 2969 2970 /* ARGSUSED */ 2971 static int 2972 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) 2973 { 2974 dsl_dataset_t *ds = arg1; 2975 uint64_t *quotap = arg2; 2976 uint64_t new_quota = *quotap; 2977 2978 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) 2979 return (ENOTSUP); 2980 2981 if (new_quota == 0) 2982 return (0); 2983 2984 if (new_quota < ds->ds_phys->ds_used_bytes || 2985 new_quota < ds->ds_reserved) 2986 return (ENOSPC); 2987 2988 return (0); 2989 } 2990 2991 /* ARGSUSED */ 2992 void 2993 dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2994 { 2995 dsl_dataset_t *ds = arg1; 2996 uint64_t *quotap = arg2; 2997 uint64_t new_quota = *quotap; 2998 2999 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3000 3001 ds->ds_quota = new_quota; 3002 3003 dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx); 3004 3005 spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa, 3006 tx, cr, "%lld dataset = %llu ", 3007 (longlong_t)new_quota, ds->ds_object); 3008 } 3009 3010 int 3011 dsl_dataset_set_quota(const char *dsname, uint64_t quota) 3012 { 3013 dsl_dataset_t *ds; 3014 int err; 3015 3016 err = dsl_dataset_hold(dsname, FTAG, &ds); 3017 if (err) 3018 return (err); 3019 3020 if (quota != ds->ds_quota) { 3021 /* 3022 * If someone removes a file, then tries to set the quota, we 3023 * want to make sure the file freeing takes effect. 3024 */ 3025 txg_wait_open(ds->ds_dir->dd_pool, 0); 3026 3027 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3028 dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, 3029 ds, "a, 0); 3030 } 3031 dsl_dataset_rele(ds, FTAG); 3032 return (err); 3033 } 3034 3035 static int 3036 dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) 3037 { 3038 dsl_dataset_t *ds = arg1; 3039 uint64_t *reservationp = arg2; 3040 uint64_t new_reservation = *reservationp; 3041 uint64_t unique; 3042 3043 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 3044 SPA_VERSION_REFRESERVATION) 3045 return (ENOTSUP); 3046 3047 if (dsl_dataset_is_snapshot(ds)) 3048 return (EINVAL); 3049 3050 /* 3051 * If we are doing the preliminary check in open context, the 3052 * space estimates may be inaccurate. 3053 */ 3054 if (!dmu_tx_is_syncing(tx)) 3055 return (0); 3056 3057 mutex_enter(&ds->ds_lock); 3058 unique = dsl_dataset_unique(ds); 3059 mutex_exit(&ds->ds_lock); 3060 3061 if (MAX(unique, new_reservation) > MAX(unique, ds->ds_reserved)) { 3062 uint64_t delta = MAX(unique, new_reservation) - 3063 MAX(unique, ds->ds_reserved); 3064 3065 if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 3066 return (ENOSPC); 3067 if (ds->ds_quota > 0 && 3068 new_reservation > ds->ds_quota) 3069 return (ENOSPC); 3070 } 3071 3072 return (0); 3073 } 3074 3075 /* ARGSUSED */ 3076 static void 3077 dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, 3078 dmu_tx_t *tx) 3079 { 3080 dsl_dataset_t *ds = arg1; 3081 uint64_t *reservationp = arg2; 3082 uint64_t new_reservation = *reservationp; 3083 uint64_t unique; 3084 int64_t delta; 3085 3086 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3087 3088 mutex_enter(&ds->ds_dir->dd_lock); 3089 mutex_enter(&ds->ds_lock); 3090 unique = dsl_dataset_unique(ds); 3091 delta = MAX(0, (int64_t)(new_reservation - unique)) - 3092 MAX(0, (int64_t)(ds->ds_reserved - unique)); 3093 ds->ds_reserved = new_reservation; 3094 mutex_exit(&ds->ds_lock); 3095 3096 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 3097 mutex_exit(&ds->ds_dir->dd_lock); 3098 dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation", 3099 new_reservation, cr, tx); 3100 3101 spa_history_internal_log(LOG_DS_REFRESERV, 3102 ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu", 3103 (longlong_t)new_reservation, ds->ds_object); 3104 } 3105 3106 int 3107 dsl_dataset_set_reservation(const char *dsname, uint64_t reservation) 3108 { 3109 dsl_dataset_t *ds; 3110 int err; 3111 3112 err = dsl_dataset_hold(dsname, FTAG, &ds); 3113 if (err) 3114 return (err); 3115 3116 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3117 dsl_dataset_set_reservation_check, 3118 dsl_dataset_set_reservation_sync, ds, &reservation, 0); 3119 dsl_dataset_rele(ds, FTAG); 3120 return (err); 3121 } 3122