1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/dmu_objset.h> 27 #include <sys/dsl_dataset.h> 28 #include <sys/dsl_dir.h> 29 #include <sys/dsl_prop.h> 30 #include <sys/dsl_synctask.h> 31 #include <sys/dmu_traverse.h> 32 #include <sys/dmu_tx.h> 33 #include <sys/arc.h> 34 #include <sys/zio.h> 35 #include <sys/zap.h> 36 #include <sys/unique.h> 37 #include <sys/zfs_context.h> 38 #include <sys/zfs_ioctl.h> 39 #include <sys/spa.h> 40 #include <sys/zfs_znode.h> 41 #include <sys/sunddi.h> 42 #include <sys/zvol.h> 43 44 static char *dsl_reaper = "the grim reaper"; 45 46 static dsl_checkfunc_t dsl_dataset_destroy_begin_check; 47 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync; 48 static dsl_syncfunc_t dsl_dataset_set_reservation_sync; 49 50 #define DS_REF_MAX (1ULL << 62) 51 52 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE 53 54 #define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper) 55 56 57 /* 58 * Figure out how much of this delta should be propogated to the dsl_dir 59 * layer. If there's a refreservation, that space has already been 60 * partially accounted for in our ancestors. 61 */ 62 static int64_t 63 parent_delta(dsl_dataset_t *ds, int64_t delta) 64 { 65 uint64_t old_bytes, new_bytes; 66 67 if (ds->ds_reserved == 0) 68 return (delta); 69 70 old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved); 71 new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved); 72 73 ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta)); 74 return (new_bytes - old_bytes); 75 } 76 77 void 78 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 79 { 80 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 81 int compressed = BP_GET_PSIZE(bp); 82 int uncompressed = BP_GET_UCSIZE(bp); 83 int64_t delta; 84 85 dprintf_bp(bp, "born, ds=%p\n", ds); 86 87 ASSERT(dmu_tx_is_syncing(tx)); 88 /* It could have been compressed away to nothing */ 89 if (BP_IS_HOLE(bp)) 90 return; 91 ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE); 92 ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES); 93 if (ds == NULL) { 94 /* 95 * Account for the meta-objset space in its placeholder 96 * dsl_dir. 97 */ 98 ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */ 99 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 100 used, compressed, uncompressed, tx); 101 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 102 return; 103 } 104 dmu_buf_will_dirty(ds->ds_dbuf, tx); 105 mutex_enter(&ds->ds_dir->dd_lock); 106 mutex_enter(&ds->ds_lock); 107 delta = parent_delta(ds, used); 108 ds->ds_phys->ds_used_bytes += used; 109 ds->ds_phys->ds_compressed_bytes += compressed; 110 ds->ds_phys->ds_uncompressed_bytes += uncompressed; 111 ds->ds_phys->ds_unique_bytes += used; 112 mutex_exit(&ds->ds_lock); 113 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta, 114 compressed, uncompressed, tx); 115 dsl_dir_transfer_space(ds->ds_dir, used - delta, 116 DD_USED_REFRSRV, DD_USED_HEAD, tx); 117 mutex_exit(&ds->ds_dir->dd_lock); 118 } 119 120 int 121 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio, 122 dmu_tx_t *tx) 123 { 124 int used = bp_get_dasize(tx->tx_pool->dp_spa, bp); 125 int compressed = BP_GET_PSIZE(bp); 126 int uncompressed = BP_GET_UCSIZE(bp); 127 128 ASSERT(pio != NULL); 129 ASSERT(dmu_tx_is_syncing(tx)); 130 /* No block pointer => nothing to free */ 131 if (BP_IS_HOLE(bp)) 132 return (0); 133 134 ASSERT(used > 0); 135 if (ds == NULL) { 136 int err; 137 /* 138 * Account for the meta-objset space in its placeholder 139 * dataset. 140 */ 141 err = dsl_free(pio, tx->tx_pool, 142 tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT); 143 ASSERT(err == 0); 144 145 dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD, 146 -used, -compressed, -uncompressed, tx); 147 dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx); 148 return (used); 149 } 150 ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool); 151 152 ASSERT(!dsl_dataset_is_snapshot(ds)); 153 dmu_buf_will_dirty(ds->ds_dbuf, tx); 154 155 if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) { 156 int err; 157 int64_t delta; 158 159 dprintf_bp(bp, "freeing: %s", ""); 160 err = dsl_free(pio, tx->tx_pool, 161 tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT); 162 ASSERT(err == 0); 163 164 mutex_enter(&ds->ds_dir->dd_lock); 165 mutex_enter(&ds->ds_lock); 166 ASSERT(ds->ds_phys->ds_unique_bytes >= used || 167 !DS_UNIQUE_IS_ACCURATE(ds)); 168 delta = parent_delta(ds, -used); 169 ds->ds_phys->ds_unique_bytes -= used; 170 mutex_exit(&ds->ds_lock); 171 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 172 delta, -compressed, -uncompressed, tx); 173 dsl_dir_transfer_space(ds->ds_dir, -used - delta, 174 DD_USED_REFRSRV, DD_USED_HEAD, tx); 175 mutex_exit(&ds->ds_dir->dd_lock); 176 } else { 177 dprintf_bp(bp, "putting on dead list: %s", ""); 178 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx)); 179 ASSERT3U(ds->ds_prev->ds_object, ==, 180 ds->ds_phys->ds_prev_snap_obj); 181 ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0); 182 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */ 183 if (ds->ds_prev->ds_phys->ds_next_snap_obj == 184 ds->ds_object && bp->blk_birth > 185 ds->ds_prev->ds_phys->ds_prev_snap_txg) { 186 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 187 mutex_enter(&ds->ds_prev->ds_lock); 188 ds->ds_prev->ds_phys->ds_unique_bytes += used; 189 mutex_exit(&ds->ds_prev->ds_lock); 190 } 191 if (bp->blk_birth > ds->ds_origin_txg) { 192 dsl_dir_transfer_space(ds->ds_dir, used, 193 DD_USED_HEAD, DD_USED_SNAP, tx); 194 } 195 } 196 mutex_enter(&ds->ds_lock); 197 ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used); 198 ds->ds_phys->ds_used_bytes -= used; 199 ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed); 200 ds->ds_phys->ds_compressed_bytes -= compressed; 201 ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed); 202 ds->ds_phys->ds_uncompressed_bytes -= uncompressed; 203 mutex_exit(&ds->ds_lock); 204 205 return (used); 206 } 207 208 uint64_t 209 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds) 210 { 211 uint64_t trysnap = 0; 212 213 if (ds == NULL) 214 return (0); 215 /* 216 * The snapshot creation could fail, but that would cause an 217 * incorrect FALSE return, which would only result in an 218 * overestimation of the amount of space that an operation would 219 * consume, which is OK. 220 * 221 * There's also a small window where we could miss a pending 222 * snapshot, because we could set the sync task in the quiescing 223 * phase. So this should only be used as a guess. 224 */ 225 if (ds->ds_trysnap_txg > 226 spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa)) 227 trysnap = ds->ds_trysnap_txg; 228 return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap)); 229 } 230 231 boolean_t 232 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth) 233 { 234 return (blk_birth > dsl_dataset_prev_snap_txg(ds)); 235 } 236 237 /* ARGSUSED */ 238 static void 239 dsl_dataset_evict(dmu_buf_t *db, void *dsv) 240 { 241 dsl_dataset_t *ds = dsv; 242 243 ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds)); 244 245 unique_remove(ds->ds_fsid_guid); 246 247 if (ds->ds_objset != NULL) 248 dmu_objset_evict(ds->ds_objset); 249 250 if (ds->ds_prev) { 251 dsl_dataset_drop_ref(ds->ds_prev, ds); 252 ds->ds_prev = NULL; 253 } 254 255 bplist_close(&ds->ds_deadlist); 256 if (ds->ds_dir) 257 dsl_dir_close(ds->ds_dir, ds); 258 259 ASSERT(!list_link_active(&ds->ds_synced_link)); 260 261 mutex_destroy(&ds->ds_lock); 262 mutex_destroy(&ds->ds_recvlock); 263 mutex_destroy(&ds->ds_opening_lock); 264 mutex_destroy(&ds->ds_deadlist.bpl_lock); 265 rw_destroy(&ds->ds_rwlock); 266 cv_destroy(&ds->ds_exclusive_cv); 267 268 kmem_free(ds, sizeof (dsl_dataset_t)); 269 } 270 271 static int 272 dsl_dataset_get_snapname(dsl_dataset_t *ds) 273 { 274 dsl_dataset_phys_t *headphys; 275 int err; 276 dmu_buf_t *headdbuf; 277 dsl_pool_t *dp = ds->ds_dir->dd_pool; 278 objset_t *mos = dp->dp_meta_objset; 279 280 if (ds->ds_snapname[0]) 281 return (0); 282 if (ds->ds_phys->ds_next_snap_obj == 0) 283 return (0); 284 285 err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj, 286 FTAG, &headdbuf); 287 if (err) 288 return (err); 289 headphys = headdbuf->db_data; 290 err = zap_value_search(dp->dp_meta_objset, 291 headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname); 292 dmu_buf_rele(headdbuf, FTAG); 293 return (err); 294 } 295 296 static int 297 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value) 298 { 299 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 300 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 301 matchtype_t mt; 302 int err; 303 304 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 305 mt = MT_FIRST; 306 else 307 mt = MT_EXACT; 308 309 err = zap_lookup_norm(mos, snapobj, name, 8, 1, 310 value, mt, NULL, 0, NULL); 311 if (err == ENOTSUP && mt == MT_FIRST) 312 err = zap_lookup(mos, snapobj, name, 8, 1, value); 313 return (err); 314 } 315 316 static int 317 dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx) 318 { 319 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 320 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj; 321 matchtype_t mt; 322 int err; 323 324 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET) 325 mt = MT_FIRST; 326 else 327 mt = MT_EXACT; 328 329 err = zap_remove_norm(mos, snapobj, name, mt, tx); 330 if (err == ENOTSUP && mt == MT_FIRST) 331 err = zap_remove(mos, snapobj, name, tx); 332 return (err); 333 } 334 335 static int 336 dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag, 337 dsl_dataset_t **dsp) 338 { 339 objset_t *mos = dp->dp_meta_objset; 340 dmu_buf_t *dbuf; 341 dsl_dataset_t *ds; 342 int err; 343 344 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 345 dsl_pool_sync_context(dp)); 346 347 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf); 348 if (err) 349 return (err); 350 ds = dmu_buf_get_user(dbuf); 351 if (ds == NULL) { 352 dsl_dataset_t *winner; 353 354 ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP); 355 ds->ds_dbuf = dbuf; 356 ds->ds_object = dsobj; 357 ds->ds_phys = dbuf->db_data; 358 359 mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL); 360 mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL); 361 mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL); 362 mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT, 363 NULL); 364 rw_init(&ds->ds_rwlock, 0, 0, 0); 365 cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL); 366 367 err = bplist_open(&ds->ds_deadlist, 368 mos, ds->ds_phys->ds_deadlist_obj); 369 if (err == 0) { 370 err = dsl_dir_open_obj(dp, 371 ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir); 372 } 373 if (err) { 374 /* 375 * we don't really need to close the blist if we 376 * just opened it. 377 */ 378 mutex_destroy(&ds->ds_lock); 379 mutex_destroy(&ds->ds_recvlock); 380 mutex_destroy(&ds->ds_opening_lock); 381 mutex_destroy(&ds->ds_deadlist.bpl_lock); 382 rw_destroy(&ds->ds_rwlock); 383 cv_destroy(&ds->ds_exclusive_cv); 384 kmem_free(ds, sizeof (dsl_dataset_t)); 385 dmu_buf_rele(dbuf, tag); 386 return (err); 387 } 388 389 if (!dsl_dataset_is_snapshot(ds)) { 390 ds->ds_snapname[0] = '\0'; 391 if (ds->ds_phys->ds_prev_snap_obj) { 392 err = dsl_dataset_get_ref(dp, 393 ds->ds_phys->ds_prev_snap_obj, 394 ds, &ds->ds_prev); 395 } 396 397 if (err == 0 && dsl_dir_is_clone(ds->ds_dir)) { 398 dsl_dataset_t *origin; 399 400 err = dsl_dataset_hold_obj(dp, 401 ds->ds_dir->dd_phys->dd_origin_obj, 402 FTAG, &origin); 403 if (err == 0) { 404 ds->ds_origin_txg = 405 origin->ds_phys->ds_creation_txg; 406 dsl_dataset_rele(origin, FTAG); 407 } 408 } 409 } else { 410 if (zfs_flags & ZFS_DEBUG_SNAPNAMES) 411 err = dsl_dataset_get_snapname(ds); 412 if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) { 413 err = zap_count( 414 ds->ds_dir->dd_pool->dp_meta_objset, 415 ds->ds_phys->ds_userrefs_obj, 416 &ds->ds_userrefs); 417 } 418 } 419 420 if (err == 0 && !dsl_dataset_is_snapshot(ds)) { 421 /* 422 * In sync context, we're called with either no lock 423 * or with the write lock. If we're not syncing, 424 * we're always called with the read lock held. 425 */ 426 boolean_t need_lock = 427 !RW_WRITE_HELD(&dp->dp_config_rwlock) && 428 dsl_pool_sync_context(dp); 429 430 if (need_lock) 431 rw_enter(&dp->dp_config_rwlock, RW_READER); 432 433 err = dsl_prop_get_ds(ds, 434 "refreservation", sizeof (uint64_t), 1, 435 &ds->ds_reserved, NULL); 436 if (err == 0) { 437 err = dsl_prop_get_ds(ds, 438 "refquota", sizeof (uint64_t), 1, 439 &ds->ds_quota, NULL); 440 } 441 442 if (need_lock) 443 rw_exit(&dp->dp_config_rwlock); 444 } else { 445 ds->ds_reserved = ds->ds_quota = 0; 446 } 447 448 if (err == 0) { 449 winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys, 450 dsl_dataset_evict); 451 } 452 if (err || winner) { 453 bplist_close(&ds->ds_deadlist); 454 if (ds->ds_prev) 455 dsl_dataset_drop_ref(ds->ds_prev, ds); 456 dsl_dir_close(ds->ds_dir, ds); 457 mutex_destroy(&ds->ds_lock); 458 mutex_destroy(&ds->ds_recvlock); 459 mutex_destroy(&ds->ds_opening_lock); 460 mutex_destroy(&ds->ds_deadlist.bpl_lock); 461 rw_destroy(&ds->ds_rwlock); 462 cv_destroy(&ds->ds_exclusive_cv); 463 kmem_free(ds, sizeof (dsl_dataset_t)); 464 if (err) { 465 dmu_buf_rele(dbuf, tag); 466 return (err); 467 } 468 ds = winner; 469 } else { 470 ds->ds_fsid_guid = 471 unique_insert(ds->ds_phys->ds_fsid_guid); 472 } 473 } 474 ASSERT3P(ds->ds_dbuf, ==, dbuf); 475 ASSERT3P(ds->ds_phys, ==, dbuf->db_data); 476 ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 || 477 spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN || 478 dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap); 479 mutex_enter(&ds->ds_lock); 480 if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) { 481 mutex_exit(&ds->ds_lock); 482 dmu_buf_rele(ds->ds_dbuf, tag); 483 return (ENOENT); 484 } 485 mutex_exit(&ds->ds_lock); 486 *dsp = ds; 487 return (0); 488 } 489 490 static int 491 dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag) 492 { 493 dsl_pool_t *dp = ds->ds_dir->dd_pool; 494 495 /* 496 * In syncing context we don't want the rwlock lock: there 497 * may be an existing writer waiting for sync phase to 498 * finish. We don't need to worry about such writers, since 499 * sync phase is single-threaded, so the writer can't be 500 * doing anything while we are active. 501 */ 502 if (dsl_pool_sync_context(dp)) { 503 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 504 return (0); 505 } 506 507 /* 508 * Normal users will hold the ds_rwlock as a READER until they 509 * are finished (i.e., call dsl_dataset_rele()). "Owners" will 510 * drop their READER lock after they set the ds_owner field. 511 * 512 * If the dataset is being destroyed, the destroy thread will 513 * obtain a WRITER lock for exclusive access after it's done its 514 * open-context work and then change the ds_owner to 515 * dsl_reaper once destruction is assured. So threads 516 * may block here temporarily, until the "destructability" of 517 * the dataset is determined. 518 */ 519 ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock)); 520 mutex_enter(&ds->ds_lock); 521 while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) { 522 rw_exit(&dp->dp_config_rwlock); 523 cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock); 524 if (DSL_DATASET_IS_DESTROYED(ds)) { 525 mutex_exit(&ds->ds_lock); 526 dsl_dataset_drop_ref(ds, tag); 527 rw_enter(&dp->dp_config_rwlock, RW_READER); 528 return (ENOENT); 529 } 530 rw_enter(&dp->dp_config_rwlock, RW_READER); 531 } 532 mutex_exit(&ds->ds_lock); 533 return (0); 534 } 535 536 int 537 dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, 538 dsl_dataset_t **dsp) 539 { 540 int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp); 541 542 if (err) 543 return (err); 544 return (dsl_dataset_hold_ref(*dsp, tag)); 545 } 546 547 int 548 dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok, 549 void *tag, dsl_dataset_t **dsp) 550 { 551 int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp); 552 if (err) 553 return (err); 554 if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { 555 dsl_dataset_rele(*dsp, tag); 556 *dsp = NULL; 557 return (EBUSY); 558 } 559 return (0); 560 } 561 562 int 563 dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp) 564 { 565 dsl_dir_t *dd; 566 dsl_pool_t *dp; 567 const char *snapname; 568 uint64_t obj; 569 int err = 0; 570 571 err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname); 572 if (err) 573 return (err); 574 575 dp = dd->dd_pool; 576 obj = dd->dd_phys->dd_head_dataset_obj; 577 rw_enter(&dp->dp_config_rwlock, RW_READER); 578 if (obj) 579 err = dsl_dataset_get_ref(dp, obj, tag, dsp); 580 else 581 err = ENOENT; 582 if (err) 583 goto out; 584 585 err = dsl_dataset_hold_ref(*dsp, tag); 586 587 /* we may be looking for a snapshot */ 588 if (err == 0 && snapname != NULL) { 589 dsl_dataset_t *ds = NULL; 590 591 if (*snapname++ != '@') { 592 dsl_dataset_rele(*dsp, tag); 593 err = ENOENT; 594 goto out; 595 } 596 597 dprintf("looking for snapshot '%s'\n", snapname); 598 err = dsl_dataset_snap_lookup(*dsp, snapname, &obj); 599 if (err == 0) 600 err = dsl_dataset_get_ref(dp, obj, tag, &ds); 601 dsl_dataset_rele(*dsp, tag); 602 603 ASSERT3U((err == 0), ==, (ds != NULL)); 604 605 if (ds) { 606 mutex_enter(&ds->ds_lock); 607 if (ds->ds_snapname[0] == 0) 608 (void) strlcpy(ds->ds_snapname, snapname, 609 sizeof (ds->ds_snapname)); 610 mutex_exit(&ds->ds_lock); 611 err = dsl_dataset_hold_ref(ds, tag); 612 *dsp = err ? NULL : ds; 613 } 614 } 615 out: 616 rw_exit(&dp->dp_config_rwlock); 617 dsl_dir_close(dd, FTAG); 618 return (err); 619 } 620 621 int 622 dsl_dataset_own(const char *name, boolean_t inconsistentok, 623 void *tag, dsl_dataset_t **dsp) 624 { 625 int err = dsl_dataset_hold(name, tag, dsp); 626 if (err) 627 return (err); 628 if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) { 629 dsl_dataset_rele(*dsp, tag); 630 return (EBUSY); 631 } 632 return (0); 633 } 634 635 void 636 dsl_dataset_name(dsl_dataset_t *ds, char *name) 637 { 638 if (ds == NULL) { 639 (void) strcpy(name, "mos"); 640 } else { 641 dsl_dir_name(ds->ds_dir, name); 642 VERIFY(0 == dsl_dataset_get_snapname(ds)); 643 if (ds->ds_snapname[0]) { 644 (void) strcat(name, "@"); 645 /* 646 * We use a "recursive" mutex so that we 647 * can call dprintf_ds() with ds_lock held. 648 */ 649 if (!MUTEX_HELD(&ds->ds_lock)) { 650 mutex_enter(&ds->ds_lock); 651 (void) strcat(name, ds->ds_snapname); 652 mutex_exit(&ds->ds_lock); 653 } else { 654 (void) strcat(name, ds->ds_snapname); 655 } 656 } 657 } 658 } 659 660 static int 661 dsl_dataset_namelen(dsl_dataset_t *ds) 662 { 663 int result; 664 665 if (ds == NULL) { 666 result = 3; /* "mos" */ 667 } else { 668 result = dsl_dir_namelen(ds->ds_dir); 669 VERIFY(0 == dsl_dataset_get_snapname(ds)); 670 if (ds->ds_snapname[0]) { 671 ++result; /* adding one for the @-sign */ 672 if (!MUTEX_HELD(&ds->ds_lock)) { 673 mutex_enter(&ds->ds_lock); 674 result += strlen(ds->ds_snapname); 675 mutex_exit(&ds->ds_lock); 676 } else { 677 result += strlen(ds->ds_snapname); 678 } 679 } 680 } 681 682 return (result); 683 } 684 685 void 686 dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag) 687 { 688 dmu_buf_rele(ds->ds_dbuf, tag); 689 } 690 691 void 692 dsl_dataset_rele(dsl_dataset_t *ds, void *tag) 693 { 694 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) { 695 rw_exit(&ds->ds_rwlock); 696 } 697 dsl_dataset_drop_ref(ds, tag); 698 } 699 700 void 701 dsl_dataset_disown(dsl_dataset_t *ds, void *tag) 702 { 703 ASSERT((ds->ds_owner == tag && ds->ds_dbuf) || 704 (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL)); 705 706 mutex_enter(&ds->ds_lock); 707 ds->ds_owner = NULL; 708 if (RW_WRITE_HELD(&ds->ds_rwlock)) { 709 rw_exit(&ds->ds_rwlock); 710 cv_broadcast(&ds->ds_exclusive_cv); 711 } 712 mutex_exit(&ds->ds_lock); 713 if (ds->ds_dbuf) 714 dsl_dataset_drop_ref(ds, tag); 715 else 716 dsl_dataset_evict(ds->ds_dbuf, ds); 717 } 718 719 boolean_t 720 dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag) 721 { 722 boolean_t gotit = FALSE; 723 724 mutex_enter(&ds->ds_lock); 725 if (ds->ds_owner == NULL && 726 (!DS_IS_INCONSISTENT(ds) || inconsistentok)) { 727 ds->ds_owner = tag; 728 if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) 729 rw_exit(&ds->ds_rwlock); 730 gotit = TRUE; 731 } 732 mutex_exit(&ds->ds_lock); 733 return (gotit); 734 } 735 736 void 737 dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner) 738 { 739 ASSERT3P(owner, ==, ds->ds_owner); 740 if (!RW_WRITE_HELD(&ds->ds_rwlock)) 741 rw_enter(&ds->ds_rwlock, RW_WRITER); 742 } 743 744 uint64_t 745 dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin, 746 uint64_t flags, dmu_tx_t *tx) 747 { 748 dsl_pool_t *dp = dd->dd_pool; 749 dmu_buf_t *dbuf; 750 dsl_dataset_phys_t *dsphys; 751 uint64_t dsobj; 752 objset_t *mos = dp->dp_meta_objset; 753 754 if (origin == NULL) 755 origin = dp->dp_origin_snap; 756 757 ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp); 758 ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0); 759 ASSERT(dmu_tx_is_syncing(tx)); 760 ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 761 762 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 763 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 764 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 765 dmu_buf_will_dirty(dbuf, tx); 766 dsphys = dbuf->db_data; 767 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 768 dsphys->ds_dir_obj = dd->dd_object; 769 dsphys->ds_flags = flags; 770 dsphys->ds_fsid_guid = unique_create(); 771 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 772 sizeof (dsphys->ds_guid)); 773 dsphys->ds_snapnames_zapobj = 774 zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP, 775 DMU_OT_NONE, 0, tx); 776 dsphys->ds_creation_time = gethrestime_sec(); 777 dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg; 778 dsphys->ds_deadlist_obj = 779 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 780 781 if (origin) { 782 dsphys->ds_prev_snap_obj = origin->ds_object; 783 dsphys->ds_prev_snap_txg = 784 origin->ds_phys->ds_creation_txg; 785 dsphys->ds_used_bytes = 786 origin->ds_phys->ds_used_bytes; 787 dsphys->ds_compressed_bytes = 788 origin->ds_phys->ds_compressed_bytes; 789 dsphys->ds_uncompressed_bytes = 790 origin->ds_phys->ds_uncompressed_bytes; 791 dsphys->ds_bp = origin->ds_phys->ds_bp; 792 dsphys->ds_flags |= origin->ds_phys->ds_flags; 793 794 dmu_buf_will_dirty(origin->ds_dbuf, tx); 795 origin->ds_phys->ds_num_children++; 796 797 if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) { 798 if (origin->ds_phys->ds_next_clones_obj == 0) { 799 origin->ds_phys->ds_next_clones_obj = 800 zap_create(mos, 801 DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx); 802 } 803 VERIFY(0 == zap_add_int(mos, 804 origin->ds_phys->ds_next_clones_obj, 805 dsobj, tx)); 806 } 807 808 dmu_buf_will_dirty(dd->dd_dbuf, tx); 809 dd->dd_phys->dd_origin_obj = origin->ds_object; 810 } 811 812 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 813 dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 814 815 dmu_buf_rele(dbuf, FTAG); 816 817 dmu_buf_will_dirty(dd->dd_dbuf, tx); 818 dd->dd_phys->dd_head_dataset_obj = dsobj; 819 820 return (dsobj); 821 } 822 823 uint64_t 824 dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname, 825 dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx) 826 { 827 dsl_pool_t *dp = pdd->dd_pool; 828 uint64_t dsobj, ddobj; 829 dsl_dir_t *dd; 830 831 ASSERT(lastname[0] != '@'); 832 833 ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx); 834 VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd)); 835 836 dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx); 837 838 dsl_deleg_set_create_perms(dd, tx, cr); 839 840 dsl_dir_close(dd, FTAG); 841 842 return (dsobj); 843 } 844 845 struct destroyarg { 846 dsl_sync_task_group_t *dstg; 847 char *snapname; 848 char *failed; 849 boolean_t defer; 850 }; 851 852 static int 853 dsl_snapshot_destroy_one(char *name, void *arg) 854 { 855 struct destroyarg *da = arg; 856 dsl_dataset_t *ds; 857 int err; 858 char *dsname; 859 860 dsname = kmem_asprintf("%s@%s", name, da->snapname); 861 err = dsl_dataset_own(dsname, B_TRUE, da->dstg, &ds); 862 strfree(dsname); 863 if (err == 0) { 864 struct dsl_ds_destroyarg *dsda; 865 866 dsl_dataset_make_exclusive(ds, da->dstg); 867 if (ds->ds_objset != NULL) { 868 dmu_objset_evict(ds->ds_objset); 869 ds->ds_objset = NULL; 870 } 871 dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP); 872 dsda->ds = ds; 873 dsda->defer = da->defer; 874 dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check, 875 dsl_dataset_destroy_sync, dsda, da->dstg, 0); 876 } else if (err == ENOENT) { 877 err = 0; 878 } else { 879 (void) strcpy(da->failed, name); 880 } 881 return (err); 882 } 883 884 /* 885 * Destroy 'snapname' in all descendants of 'fsname'. 886 */ 887 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy 888 int 889 dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer) 890 { 891 int err; 892 struct destroyarg da; 893 dsl_sync_task_t *dst; 894 spa_t *spa; 895 896 err = spa_open(fsname, &spa, FTAG); 897 if (err) 898 return (err); 899 da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 900 da.snapname = snapname; 901 da.failed = fsname; 902 da.defer = defer; 903 904 err = dmu_objset_find(fsname, 905 dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN); 906 907 if (err == 0) 908 err = dsl_sync_task_group_wait(da.dstg); 909 910 for (dst = list_head(&da.dstg->dstg_tasks); dst; 911 dst = list_next(&da.dstg->dstg_tasks, dst)) { 912 struct dsl_ds_destroyarg *dsda = dst->dst_arg1; 913 dsl_dataset_t *ds = dsda->ds; 914 915 /* 916 * Return the file system name that triggered the error 917 */ 918 if (dst->dst_err) { 919 dsl_dataset_name(ds, fsname); 920 *strchr(fsname, '@') = '\0'; 921 } 922 ASSERT3P(dsda->rm_origin, ==, NULL); 923 dsl_dataset_disown(ds, da.dstg); 924 kmem_free(dsda, sizeof (struct dsl_ds_destroyarg)); 925 } 926 927 dsl_sync_task_group_destroy(da.dstg); 928 spa_close(spa, FTAG); 929 return (err); 930 } 931 932 static boolean_t 933 dsl_dataset_might_destroy_origin(dsl_dataset_t *ds) 934 { 935 boolean_t might_destroy = B_FALSE; 936 937 mutex_enter(&ds->ds_lock); 938 if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 && 939 DS_IS_DEFER_DESTROY(ds)) 940 might_destroy = B_TRUE; 941 mutex_exit(&ds->ds_lock); 942 943 return (might_destroy); 944 } 945 946 #ifdef _KERNEL 947 static int 948 dsl_dataset_zvol_cleanup(dsl_dataset_t *ds, const char *name) 949 { 950 int error; 951 objset_t *os; 952 953 error = dmu_objset_from_ds(ds, &os); 954 if (error) 955 return (error); 956 957 if (dmu_objset_type(os) == DMU_OST_ZVOL) 958 error = zvol_remove_minor(name); 959 960 return (error); 961 } 962 #endif 963 964 /* 965 * If we're removing a clone, and these three conditions are true: 966 * 1) the clone's origin has no other children 967 * 2) the clone's origin has no user references 968 * 3) the clone's origin has been marked for deferred destruction 969 * Then, prepare to remove the origin as part of this sync task group. 970 */ 971 static int 972 dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag) 973 { 974 dsl_dataset_t *ds = dsda->ds; 975 dsl_dataset_t *origin = ds->ds_prev; 976 977 if (dsl_dataset_might_destroy_origin(origin)) { 978 char *name; 979 int namelen; 980 int error; 981 982 namelen = dsl_dataset_namelen(origin) + 1; 983 name = kmem_alloc(namelen, KM_SLEEP); 984 dsl_dataset_name(origin, name); 985 #ifdef _KERNEL 986 error = zfs_unmount_snap(name, NULL); 987 if (error) { 988 kmem_free(name, namelen); 989 return (error); 990 } 991 error = dsl_dataset_zvol_cleanup(origin, name); 992 if (error) { 993 kmem_free(name, namelen); 994 return (error); 995 } 996 #endif 997 error = dsl_dataset_own(name, B_TRUE, tag, &origin); 998 kmem_free(name, namelen); 999 if (error) 1000 return (error); 1001 dsda->rm_origin = origin; 1002 dsl_dataset_make_exclusive(origin, tag); 1003 } 1004 1005 return (0); 1006 } 1007 1008 /* 1009 * ds must be opened as OWNER. On return (whether successful or not), 1010 * ds will be closed and caller can no longer dereference it. 1011 */ 1012 int 1013 dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer) 1014 { 1015 int err; 1016 dsl_sync_task_group_t *dstg; 1017 objset_t *os; 1018 dsl_dir_t *dd; 1019 uint64_t obj; 1020 struct dsl_ds_destroyarg dsda = {0}; 1021 1022 dsda.ds = ds; 1023 1024 if (dsl_dataset_is_snapshot(ds)) { 1025 /* Destroying a snapshot is simpler */ 1026 dsl_dataset_make_exclusive(ds, tag); 1027 1028 if (ds->ds_objset != NULL) { 1029 dmu_objset_evict(ds->ds_objset); 1030 ds->ds_objset = NULL; 1031 } 1032 /* NOTE: defer is always B_FALSE for non-snapshots */ 1033 dsda.defer = defer; 1034 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 1035 dsl_dataset_destroy_check, dsl_dataset_destroy_sync, 1036 &dsda, tag, 0); 1037 ASSERT3P(dsda.rm_origin, ==, NULL); 1038 goto out; 1039 } 1040 1041 dd = ds->ds_dir; 1042 1043 /* 1044 * Check for errors and mark this ds as inconsistent, in 1045 * case we crash while freeing the objects. 1046 */ 1047 err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check, 1048 dsl_dataset_destroy_begin_sync, ds, NULL, 0); 1049 if (err) 1050 goto out; 1051 1052 err = dmu_objset_from_ds(ds, &os); 1053 if (err) 1054 goto out; 1055 1056 /* 1057 * remove the objects in open context, so that we won't 1058 * have too much to do in syncing context. 1059 */ 1060 for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 1061 ds->ds_phys->ds_prev_snap_txg)) { 1062 /* 1063 * Ignore errors, if there is not enough disk space 1064 * we will deal with it in dsl_dataset_destroy_sync(). 1065 */ 1066 (void) dmu_free_object(os, obj); 1067 } 1068 1069 /* 1070 * We need to sync out all in-flight IO before we try to evict 1071 * (the dataset evict func is trying to clear the cached entries 1072 * for this dataset in the ARC). 1073 */ 1074 txg_wait_synced(dd->dd_pool, 0); 1075 1076 /* 1077 * If we managed to free all the objects in open 1078 * context, the user space accounting should be zero. 1079 */ 1080 if (ds->ds_phys->ds_bp.blk_fill == 0 && 1081 dmu_objset_userused_enabled(os)) { 1082 uint64_t count; 1083 1084 ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 || 1085 count == 0); 1086 ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 || 1087 count == 0); 1088 } 1089 1090 if (err != ESRCH) 1091 goto out; 1092 1093 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 1094 err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd); 1095 rw_exit(&dd->dd_pool->dp_config_rwlock); 1096 1097 if (err) 1098 goto out; 1099 1100 if (ds->ds_objset) { 1101 /* 1102 * We need to sync out all in-flight IO before we try 1103 * to evict (the dataset evict func is trying to clear 1104 * the cached entries for this dataset in the ARC). 1105 */ 1106 txg_wait_synced(dd->dd_pool, 0); 1107 } 1108 1109 /* 1110 * Blow away the dsl_dir + head dataset. 1111 */ 1112 dsl_dataset_make_exclusive(ds, tag); 1113 if (ds->ds_objset) { 1114 dmu_objset_evict(ds->ds_objset); 1115 ds->ds_objset = NULL; 1116 } 1117 1118 /* 1119 * If we're removing a clone, we might also need to remove its 1120 * origin. 1121 */ 1122 do { 1123 dsda.need_prep = B_FALSE; 1124 if (dsl_dir_is_clone(dd)) { 1125 err = dsl_dataset_origin_rm_prep(&dsda, tag); 1126 if (err) { 1127 dsl_dir_close(dd, FTAG); 1128 goto out; 1129 } 1130 } 1131 1132 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); 1133 dsl_sync_task_create(dstg, dsl_dataset_destroy_check, 1134 dsl_dataset_destroy_sync, &dsda, tag, 0); 1135 dsl_sync_task_create(dstg, dsl_dir_destroy_check, 1136 dsl_dir_destroy_sync, dd, FTAG, 0); 1137 err = dsl_sync_task_group_wait(dstg); 1138 dsl_sync_task_group_destroy(dstg); 1139 1140 /* 1141 * We could be racing against 'zfs release' or 'zfs destroy -d' 1142 * on the origin snap, in which case we can get EBUSY if we 1143 * needed to destroy the origin snap but were not ready to 1144 * do so. 1145 */ 1146 if (dsda.need_prep) { 1147 ASSERT(err == EBUSY); 1148 ASSERT(dsl_dir_is_clone(dd)); 1149 ASSERT(dsda.rm_origin == NULL); 1150 } 1151 } while (dsda.need_prep); 1152 1153 if (dsda.rm_origin != NULL) 1154 dsl_dataset_disown(dsda.rm_origin, tag); 1155 1156 /* if it is successful, dsl_dir_destroy_sync will close the dd */ 1157 if (err) 1158 dsl_dir_close(dd, FTAG); 1159 out: 1160 dsl_dataset_disown(ds, tag); 1161 return (err); 1162 } 1163 1164 blkptr_t * 1165 dsl_dataset_get_blkptr(dsl_dataset_t *ds) 1166 { 1167 return (&ds->ds_phys->ds_bp); 1168 } 1169 1170 void 1171 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx) 1172 { 1173 ASSERT(dmu_tx_is_syncing(tx)); 1174 /* If it's the meta-objset, set dp_meta_rootbp */ 1175 if (ds == NULL) { 1176 tx->tx_pool->dp_meta_rootbp = *bp; 1177 } else { 1178 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1179 ds->ds_phys->ds_bp = *bp; 1180 } 1181 } 1182 1183 spa_t * 1184 dsl_dataset_get_spa(dsl_dataset_t *ds) 1185 { 1186 return (ds->ds_dir->dd_pool->dp_spa); 1187 } 1188 1189 void 1190 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx) 1191 { 1192 dsl_pool_t *dp; 1193 1194 if (ds == NULL) /* this is the meta-objset */ 1195 return; 1196 1197 ASSERT(ds->ds_objset != NULL); 1198 1199 if (ds->ds_phys->ds_next_snap_obj != 0) 1200 panic("dirtying snapshot!"); 1201 1202 dp = ds->ds_dir->dd_pool; 1203 1204 if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) { 1205 /* up the hold count until we can be written out */ 1206 dmu_buf_add_ref(ds->ds_dbuf, ds); 1207 } 1208 } 1209 1210 /* 1211 * The unique space in the head dataset can be calculated by subtracting 1212 * the space used in the most recent snapshot, that is still being used 1213 * in this file system, from the space currently in use. To figure out 1214 * the space in the most recent snapshot still in use, we need to take 1215 * the total space used in the snapshot and subtract out the space that 1216 * has been freed up since the snapshot was taken. 1217 */ 1218 static void 1219 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds) 1220 { 1221 uint64_t mrs_used; 1222 uint64_t dlused, dlcomp, dluncomp; 1223 1224 ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj); 1225 1226 if (ds->ds_phys->ds_prev_snap_obj != 0) 1227 mrs_used = ds->ds_prev->ds_phys->ds_used_bytes; 1228 else 1229 mrs_used = 0; 1230 1231 VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp, 1232 &dluncomp)); 1233 1234 ASSERT3U(dlused, <=, mrs_used); 1235 ds->ds_phys->ds_unique_bytes = 1236 ds->ds_phys->ds_used_bytes - (mrs_used - dlused); 1237 1238 if (!DS_UNIQUE_IS_ACCURATE(ds) && 1239 spa_version(ds->ds_dir->dd_pool->dp_spa) >= 1240 SPA_VERSION_UNIQUE_ACCURATE) 1241 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1242 } 1243 1244 static uint64_t 1245 dsl_dataset_unique(dsl_dataset_t *ds) 1246 { 1247 if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds)) 1248 dsl_dataset_recalc_head_uniq(ds); 1249 1250 return (ds->ds_phys->ds_unique_bytes); 1251 } 1252 1253 struct killarg { 1254 dsl_dataset_t *ds; 1255 zio_t *zio; 1256 dmu_tx_t *tx; 1257 }; 1258 1259 /* ARGSUSED */ 1260 static int 1261 kill_blkptr(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb, 1262 const dnode_phys_t *dnp, void *arg) 1263 { 1264 struct killarg *ka = arg; 1265 1266 if (bp == NULL) 1267 return (0); 1268 1269 if ((zb->zb_level == -1ULL && zb->zb_blkid != 0) || 1270 (zb->zb_object != 0 && dnp == NULL)) { 1271 /* 1272 * It's a block in the intent log. It has no 1273 * accounting, so just free it. 1274 */ 1275 VERIFY3U(0, ==, dsl_free(ka->zio, ka->tx->tx_pool, 1276 ka->tx->tx_txg, bp, NULL, NULL, ARC_NOWAIT)); 1277 } else { 1278 ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); 1279 (void) dsl_dataset_block_kill(ka->ds, bp, ka->zio, ka->tx); 1280 } 1281 1282 return (0); 1283 } 1284 1285 /* ARGSUSED */ 1286 static int 1287 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx) 1288 { 1289 dsl_dataset_t *ds = arg1; 1290 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 1291 uint64_t count; 1292 int err; 1293 1294 /* 1295 * Can't delete a head dataset if there are snapshots of it. 1296 * (Except if the only snapshots are from the branch we cloned 1297 * from.) 1298 */ 1299 if (ds->ds_prev != NULL && 1300 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1301 return (EINVAL); 1302 1303 /* 1304 * This is really a dsl_dir thing, but check it here so that 1305 * we'll be less likely to leave this dataset inconsistent & 1306 * nearly destroyed. 1307 */ 1308 err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); 1309 if (err) 1310 return (err); 1311 if (count != 0) 1312 return (EEXIST); 1313 1314 return (0); 1315 } 1316 1317 /* ARGSUSED */ 1318 static void 1319 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1320 { 1321 dsl_dataset_t *ds = arg1; 1322 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1323 1324 /* Mark it as inconsistent on-disk, in case we crash */ 1325 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1326 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 1327 1328 spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx, 1329 cr, "dataset = %llu", ds->ds_object); 1330 } 1331 1332 static int 1333 dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag, 1334 dmu_tx_t *tx) 1335 { 1336 dsl_dataset_t *ds = dsda->ds; 1337 dsl_dataset_t *ds_prev = ds->ds_prev; 1338 1339 if (dsl_dataset_might_destroy_origin(ds_prev)) { 1340 struct dsl_ds_destroyarg ndsda = {0}; 1341 1342 /* 1343 * If we're not prepared to remove the origin, don't remove 1344 * the clone either. 1345 */ 1346 if (dsda->rm_origin == NULL) { 1347 dsda->need_prep = B_TRUE; 1348 return (EBUSY); 1349 } 1350 1351 ndsda.ds = ds_prev; 1352 ndsda.is_origin_rm = B_TRUE; 1353 return (dsl_dataset_destroy_check(&ndsda, tag, tx)); 1354 } 1355 1356 /* 1357 * If we're not going to remove the origin after all, 1358 * undo the open context setup. 1359 */ 1360 if (dsda->rm_origin != NULL) { 1361 dsl_dataset_disown(dsda->rm_origin, tag); 1362 dsda->rm_origin = NULL; 1363 } 1364 1365 return (0); 1366 } 1367 1368 /* ARGSUSED */ 1369 int 1370 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 1371 { 1372 struct dsl_ds_destroyarg *dsda = arg1; 1373 dsl_dataset_t *ds = dsda->ds; 1374 1375 /* we have an owner hold, so noone else can destroy us */ 1376 ASSERT(!DSL_DATASET_IS_DESTROYED(ds)); 1377 1378 /* 1379 * Only allow deferred destroy on pools that support it. 1380 * NOTE: deferred destroy is only supported on snapshots. 1381 */ 1382 if (dsda->defer) { 1383 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 1384 SPA_VERSION_USERREFS) 1385 return (ENOTSUP); 1386 ASSERT(dsl_dataset_is_snapshot(ds)); 1387 return (0); 1388 } 1389 1390 /* 1391 * Can't delete a head dataset if there are snapshots of it. 1392 * (Except if the only snapshots are from the branch we cloned 1393 * from.) 1394 */ 1395 if (ds->ds_prev != NULL && 1396 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 1397 return (EINVAL); 1398 1399 /* 1400 * If we made changes this txg, traverse_dsl_dataset won't find 1401 * them. Try again. 1402 */ 1403 if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) 1404 return (EAGAIN); 1405 1406 if (dsl_dataset_is_snapshot(ds)) { 1407 /* 1408 * If this snapshot has an elevated user reference count, 1409 * we can't destroy it yet. 1410 */ 1411 if (ds->ds_userrefs > 0 && !dsda->releasing) 1412 return (EBUSY); 1413 1414 mutex_enter(&ds->ds_lock); 1415 /* 1416 * Can't delete a branch point. However, if we're destroying 1417 * a clone and removing its origin due to it having a user 1418 * hold count of 0 and having been marked for deferred destroy, 1419 * it's OK for the origin to have a single clone. 1420 */ 1421 if (ds->ds_phys->ds_num_children > 1422 (dsda->is_origin_rm ? 2 : 1)) { 1423 mutex_exit(&ds->ds_lock); 1424 return (EEXIST); 1425 } 1426 mutex_exit(&ds->ds_lock); 1427 } else if (dsl_dir_is_clone(ds->ds_dir)) { 1428 return (dsl_dataset_origin_check(dsda, arg2, tx)); 1429 } 1430 1431 /* XXX we should do some i/o error checking... */ 1432 return (0); 1433 } 1434 1435 struct refsarg { 1436 kmutex_t lock; 1437 boolean_t gone; 1438 kcondvar_t cv; 1439 }; 1440 1441 /* ARGSUSED */ 1442 static void 1443 dsl_dataset_refs_gone(dmu_buf_t *db, void *argv) 1444 { 1445 struct refsarg *arg = argv; 1446 1447 mutex_enter(&arg->lock); 1448 arg->gone = TRUE; 1449 cv_signal(&arg->cv); 1450 mutex_exit(&arg->lock); 1451 } 1452 1453 static void 1454 dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag) 1455 { 1456 struct refsarg arg; 1457 1458 mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL); 1459 cv_init(&arg.cv, NULL, CV_DEFAULT, NULL); 1460 arg.gone = FALSE; 1461 (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys, 1462 dsl_dataset_refs_gone); 1463 dmu_buf_rele(ds->ds_dbuf, tag); 1464 mutex_enter(&arg.lock); 1465 while (!arg.gone) 1466 cv_wait(&arg.cv, &arg.lock); 1467 ASSERT(arg.gone); 1468 mutex_exit(&arg.lock); 1469 ds->ds_dbuf = NULL; 1470 ds->ds_phys = NULL; 1471 mutex_destroy(&arg.lock); 1472 cv_destroy(&arg.cv); 1473 } 1474 1475 void 1476 dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 1477 { 1478 struct dsl_ds_destroyarg *dsda = arg1; 1479 dsl_dataset_t *ds = dsda->ds; 1480 zio_t *zio; 1481 int err; 1482 int after_branch_point = FALSE; 1483 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1484 objset_t *mos = dp->dp_meta_objset; 1485 dsl_dataset_t *ds_prev = NULL; 1486 uint64_t obj; 1487 1488 ASSERT(ds->ds_owner); 1489 ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1); 1490 ASSERT(ds->ds_prev == NULL || 1491 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 1492 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 1493 1494 if (dsda->defer) { 1495 ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); 1496 if (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1) { 1497 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1498 ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; 1499 return; 1500 } 1501 } 1502 1503 /* signal any waiters that this dataset is going away */ 1504 mutex_enter(&ds->ds_lock); 1505 ds->ds_owner = dsl_reaper; 1506 cv_broadcast(&ds->ds_exclusive_cv); 1507 mutex_exit(&ds->ds_lock); 1508 1509 /* Remove our reservation */ 1510 if (ds->ds_reserved != 0) { 1511 uint64_t val = 0; 1512 dsl_dataset_set_reservation_sync(ds, &val, cr, tx); 1513 ASSERT3U(ds->ds_reserved, ==, 0); 1514 } 1515 1516 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1517 1518 dsl_pool_ds_destroyed(ds, tx); 1519 1520 obj = ds->ds_object; 1521 1522 if (ds->ds_phys->ds_prev_snap_obj != 0) { 1523 if (ds->ds_prev) { 1524 ds_prev = ds->ds_prev; 1525 } else { 1526 VERIFY(0 == dsl_dataset_hold_obj(dp, 1527 ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); 1528 } 1529 after_branch_point = 1530 (ds_prev->ds_phys->ds_next_snap_obj != obj); 1531 1532 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 1533 if (after_branch_point && 1534 ds_prev->ds_phys->ds_next_clones_obj != 0) { 1535 VERIFY3U(0, ==, zap_remove_int(mos, 1536 ds_prev->ds_phys->ds_next_clones_obj, obj, tx)); 1537 if (ds->ds_phys->ds_next_snap_obj != 0) { 1538 VERIFY(0 == zap_add_int(mos, 1539 ds_prev->ds_phys->ds_next_clones_obj, 1540 ds->ds_phys->ds_next_snap_obj, tx)); 1541 } 1542 } 1543 if (after_branch_point && 1544 ds->ds_phys->ds_next_snap_obj == 0) { 1545 /* This clone is toast. */ 1546 ASSERT(ds_prev->ds_phys->ds_num_children > 1); 1547 ds_prev->ds_phys->ds_num_children--; 1548 1549 /* 1550 * If the clone's origin has no other clones, no 1551 * user holds, and has been marked for deferred 1552 * deletion, then we should have done the necessary 1553 * destroy setup for it. 1554 */ 1555 if (ds_prev->ds_phys->ds_num_children == 1 && 1556 ds_prev->ds_userrefs == 0 && 1557 DS_IS_DEFER_DESTROY(ds_prev)) { 1558 ASSERT3P(dsda->rm_origin, !=, NULL); 1559 } else { 1560 ASSERT3P(dsda->rm_origin, ==, NULL); 1561 } 1562 } else if (!after_branch_point) { 1563 ds_prev->ds_phys->ds_next_snap_obj = 1564 ds->ds_phys->ds_next_snap_obj; 1565 } 1566 } 1567 1568 zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 1569 1570 if (ds->ds_phys->ds_next_snap_obj != 0) { 1571 blkptr_t bp; 1572 dsl_dataset_t *ds_next; 1573 uint64_t itor = 0; 1574 uint64_t old_unique; 1575 int64_t used = 0, compressed = 0, uncompressed = 0; 1576 1577 VERIFY(0 == dsl_dataset_hold_obj(dp, 1578 ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); 1579 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 1580 1581 old_unique = dsl_dataset_unique(ds_next); 1582 1583 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 1584 ds_next->ds_phys->ds_prev_snap_obj = 1585 ds->ds_phys->ds_prev_snap_obj; 1586 ds_next->ds_phys->ds_prev_snap_txg = 1587 ds->ds_phys->ds_prev_snap_txg; 1588 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1589 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 1590 1591 /* 1592 * Transfer to our deadlist (which will become next's 1593 * new deadlist) any entries from next's current 1594 * deadlist which were born before prev, and free the 1595 * other entries. 1596 * 1597 * XXX we're doing this long task with the config lock held 1598 */ 1599 while (bplist_iterate(&ds_next->ds_deadlist, &itor, &bp) == 0) { 1600 if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) { 1601 VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, 1602 &bp, tx)); 1603 if (ds_prev && !after_branch_point && 1604 bp.blk_birth > 1605 ds_prev->ds_phys->ds_prev_snap_txg) { 1606 ds_prev->ds_phys->ds_unique_bytes += 1607 bp_get_dasize(dp->dp_spa, &bp); 1608 } 1609 } else { 1610 used += bp_get_dasize(dp->dp_spa, &bp); 1611 compressed += BP_GET_PSIZE(&bp); 1612 uncompressed += BP_GET_UCSIZE(&bp); 1613 /* XXX check return value? */ 1614 (void) dsl_free(zio, dp, tx->tx_txg, 1615 &bp, NULL, NULL, ARC_NOWAIT); 1616 } 1617 } 1618 1619 ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes); 1620 1621 /* change snapused */ 1622 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 1623 -used, -compressed, -uncompressed, tx); 1624 1625 /* free next's deadlist */ 1626 bplist_close(&ds_next->ds_deadlist); 1627 bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx); 1628 1629 /* set next's deadlist to our deadlist */ 1630 bplist_close(&ds->ds_deadlist); 1631 ds_next->ds_phys->ds_deadlist_obj = 1632 ds->ds_phys->ds_deadlist_obj; 1633 VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos, 1634 ds_next->ds_phys->ds_deadlist_obj)); 1635 ds->ds_phys->ds_deadlist_obj = 0; 1636 1637 if (ds_next->ds_phys->ds_next_snap_obj != 0) { 1638 /* 1639 * Update next's unique to include blocks which 1640 * were previously shared by only this snapshot 1641 * and it. Those blocks will be born after the 1642 * prev snap and before this snap, and will have 1643 * died after the next snap and before the one 1644 * after that (ie. be on the snap after next's 1645 * deadlist). 1646 * 1647 * XXX we're doing this long task with the 1648 * config lock held 1649 */ 1650 dsl_dataset_t *ds_after_next; 1651 uint64_t space; 1652 1653 VERIFY(0 == dsl_dataset_hold_obj(dp, 1654 ds_next->ds_phys->ds_next_snap_obj, 1655 FTAG, &ds_after_next)); 1656 1657 VERIFY(0 == 1658 bplist_space_birthrange(&ds_after_next->ds_deadlist, 1659 ds->ds_phys->ds_prev_snap_txg, 1660 ds->ds_phys->ds_creation_txg, &space)); 1661 ds_next->ds_phys->ds_unique_bytes += space; 1662 1663 dsl_dataset_rele(ds_after_next, FTAG); 1664 ASSERT3P(ds_next->ds_prev, ==, NULL); 1665 } else { 1666 ASSERT3P(ds_next->ds_prev, ==, ds); 1667 dsl_dataset_drop_ref(ds_next->ds_prev, ds_next); 1668 ds_next->ds_prev = NULL; 1669 if (ds_prev) { 1670 VERIFY(0 == dsl_dataset_get_ref(dp, 1671 ds->ds_phys->ds_prev_snap_obj, 1672 ds_next, &ds_next->ds_prev)); 1673 } 1674 1675 dsl_dataset_recalc_head_uniq(ds_next); 1676 1677 /* 1678 * Reduce the amount of our unconsmed refreservation 1679 * being charged to our parent by the amount of 1680 * new unique data we have gained. 1681 */ 1682 if (old_unique < ds_next->ds_reserved) { 1683 int64_t mrsdelta; 1684 uint64_t new_unique = 1685 ds_next->ds_phys->ds_unique_bytes; 1686 1687 ASSERT(old_unique <= new_unique); 1688 mrsdelta = MIN(new_unique - old_unique, 1689 ds_next->ds_reserved - old_unique); 1690 dsl_dir_diduse_space(ds->ds_dir, 1691 DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); 1692 } 1693 } 1694 dsl_dataset_rele(ds_next, FTAG); 1695 } else { 1696 /* 1697 * There's no next snapshot, so this is a head dataset. 1698 * Destroy the deadlist. Unless it's a clone, the 1699 * deadlist should be empty. (If it's a clone, it's 1700 * safe to ignore the deadlist contents.) 1701 */ 1702 struct killarg ka; 1703 1704 ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist)); 1705 bplist_close(&ds->ds_deadlist); 1706 bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx); 1707 ds->ds_phys->ds_deadlist_obj = 0; 1708 1709 /* 1710 * Free everything that we point to (that's born after 1711 * the previous snapshot, if we are a clone) 1712 * 1713 * NB: this should be very quick, because we already 1714 * freed all the objects in open context. 1715 */ 1716 ka.ds = ds; 1717 ka.zio = zio; 1718 ka.tx = tx; 1719 err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg, 1720 TRAVERSE_POST, kill_blkptr, &ka); 1721 ASSERT3U(err, ==, 0); 1722 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || 1723 ds->ds_phys->ds_unique_bytes == 0); 1724 } 1725 1726 err = zio_wait(zio); 1727 ASSERT3U(err, ==, 0); 1728 1729 if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) { 1730 /* Erase the link in the dir */ 1731 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 1732 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 1733 ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); 1734 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx); 1735 ASSERT(err == 0); 1736 } else { 1737 /* remove from snapshot namespace */ 1738 dsl_dataset_t *ds_head; 1739 ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); 1740 VERIFY(0 == dsl_dataset_hold_obj(dp, 1741 ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); 1742 VERIFY(0 == dsl_dataset_get_snapname(ds)); 1743 #ifdef ZFS_DEBUG 1744 { 1745 uint64_t val; 1746 1747 err = dsl_dataset_snap_lookup(ds_head, 1748 ds->ds_snapname, &val); 1749 ASSERT3U(err, ==, 0); 1750 ASSERT3U(val, ==, obj); 1751 } 1752 #endif 1753 err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx); 1754 ASSERT(err == 0); 1755 dsl_dataset_rele(ds_head, FTAG); 1756 } 1757 1758 if (ds_prev && ds->ds_prev != ds_prev) 1759 dsl_dataset_rele(ds_prev, FTAG); 1760 1761 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 1762 spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx, 1763 cr, "dataset = %llu", ds->ds_object); 1764 1765 if (ds->ds_phys->ds_next_clones_obj != 0) { 1766 uint64_t count; 1767 ASSERT(0 == zap_count(mos, 1768 ds->ds_phys->ds_next_clones_obj, &count) && count == 0); 1769 VERIFY(0 == dmu_object_free(mos, 1770 ds->ds_phys->ds_next_clones_obj, tx)); 1771 } 1772 if (ds->ds_phys->ds_props_obj != 0) 1773 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); 1774 if (ds->ds_phys->ds_userrefs_obj != 0) 1775 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx)); 1776 dsl_dir_close(ds->ds_dir, ds); 1777 ds->ds_dir = NULL; 1778 dsl_dataset_drain_refs(ds, tag); 1779 VERIFY(0 == dmu_object_free(mos, obj, tx)); 1780 1781 if (dsda->rm_origin) { 1782 /* 1783 * Remove the origin of the clone we just destroyed. 1784 */ 1785 dsl_dataset_t *origin = ds->ds_prev; 1786 struct dsl_ds_destroyarg ndsda = {0}; 1787 1788 ASSERT3P(origin, ==, dsda->rm_origin); 1789 if (origin->ds_objset) { 1790 dmu_objset_evict(origin->ds_objset); 1791 origin->ds_objset = NULL; 1792 } 1793 1794 dsl_dataset_rele(ds->ds_prev, ds); 1795 ds->ds_prev = NULL; 1796 1797 ndsda.ds = origin; 1798 dsl_dataset_destroy_sync(&ndsda, tag, cr, tx); 1799 } 1800 } 1801 1802 static int 1803 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx) 1804 { 1805 uint64_t asize; 1806 1807 if (!dmu_tx_is_syncing(tx)) 1808 return (0); 1809 1810 /* 1811 * If there's an fs-only reservation, any blocks that might become 1812 * owned by the snapshot dataset must be accommodated by space 1813 * outside of the reservation. 1814 */ 1815 asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1816 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE)) 1817 return (ENOSPC); 1818 1819 /* 1820 * Propogate any reserved space for this snapshot to other 1821 * snapshot checks in this sync group. 1822 */ 1823 if (asize > 0) 1824 dsl_dir_willuse_space(ds->ds_dir, asize, tx); 1825 1826 return (0); 1827 } 1828 1829 /* ARGSUSED */ 1830 int 1831 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx) 1832 { 1833 dsl_dataset_t *ds = arg1; 1834 const char *snapname = arg2; 1835 int err; 1836 uint64_t value; 1837 1838 /* 1839 * We don't allow multiple snapshots of the same txg. If there 1840 * is already one, try again. 1841 */ 1842 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg) 1843 return (EAGAIN); 1844 1845 /* 1846 * Check for conflicting name snapshot name. 1847 */ 1848 err = dsl_dataset_snap_lookup(ds, snapname, &value); 1849 if (err == 0) 1850 return (EEXIST); 1851 if (err != ENOENT) 1852 return (err); 1853 1854 /* 1855 * Check that the dataset's name is not too long. Name consists 1856 * of the dataset's length + 1 for the @-sign + snapshot name's length 1857 */ 1858 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) 1859 return (ENAMETOOLONG); 1860 1861 err = dsl_dataset_snapshot_reserve_space(ds, tx); 1862 if (err) 1863 return (err); 1864 1865 ds->ds_trysnap_txg = tx->tx_txg; 1866 return (0); 1867 } 1868 1869 void 1870 dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 1871 { 1872 dsl_dataset_t *ds = arg1; 1873 const char *snapname = arg2; 1874 dsl_pool_t *dp = ds->ds_dir->dd_pool; 1875 dmu_buf_t *dbuf; 1876 dsl_dataset_phys_t *dsphys; 1877 uint64_t dsobj, crtxg; 1878 objset_t *mos = dp->dp_meta_objset; 1879 int err; 1880 1881 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); 1882 1883 /* 1884 * The origin's ds_creation_txg has to be < TXG_INITIAL 1885 */ 1886 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) 1887 crtxg = 1; 1888 else 1889 crtxg = tx->tx_txg; 1890 1891 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 1892 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx); 1893 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf)); 1894 dmu_buf_will_dirty(dbuf, tx); 1895 dsphys = dbuf->db_data; 1896 bzero(dsphys, sizeof (dsl_dataset_phys_t)); 1897 dsphys->ds_dir_obj = ds->ds_dir->dd_object; 1898 dsphys->ds_fsid_guid = unique_create(); 1899 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid, 1900 sizeof (dsphys->ds_guid)); 1901 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj; 1902 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg; 1903 dsphys->ds_next_snap_obj = ds->ds_object; 1904 dsphys->ds_num_children = 1; 1905 dsphys->ds_creation_time = gethrestime_sec(); 1906 dsphys->ds_creation_txg = crtxg; 1907 dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj; 1908 dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes; 1909 dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes; 1910 dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes; 1911 dsphys->ds_flags = ds->ds_phys->ds_flags; 1912 dsphys->ds_bp = ds->ds_phys->ds_bp; 1913 dmu_buf_rele(dbuf, FTAG); 1914 1915 ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0); 1916 if (ds->ds_prev) { 1917 uint64_t next_clones_obj = 1918 ds->ds_prev->ds_phys->ds_next_clones_obj; 1919 ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj == 1920 ds->ds_object || 1921 ds->ds_prev->ds_phys->ds_num_children > 1); 1922 if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) { 1923 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 1924 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 1925 ds->ds_prev->ds_phys->ds_creation_txg); 1926 ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj; 1927 } else if (next_clones_obj != 0) { 1928 VERIFY3U(0, ==, zap_remove_int(mos, 1929 next_clones_obj, dsphys->ds_next_snap_obj, tx)); 1930 VERIFY3U(0, ==, zap_add_int(mos, 1931 next_clones_obj, dsobj, tx)); 1932 } 1933 } 1934 1935 /* 1936 * If we have a reference-reservation on this dataset, we will 1937 * need to increase the amount of refreservation being charged 1938 * since our unique space is going to zero. 1939 */ 1940 if (ds->ds_reserved) { 1941 int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved); 1942 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, 1943 add, 0, 0, tx); 1944 } 1945 1946 bplist_close(&ds->ds_deadlist); 1947 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1948 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg); 1949 ds->ds_phys->ds_prev_snap_obj = dsobj; 1950 ds->ds_phys->ds_prev_snap_txg = crtxg; 1951 ds->ds_phys->ds_unique_bytes = 0; 1952 if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE) 1953 ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE; 1954 ds->ds_phys->ds_deadlist_obj = 1955 bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx); 1956 VERIFY(0 == bplist_open(&ds->ds_deadlist, mos, 1957 ds->ds_phys->ds_deadlist_obj)); 1958 1959 dprintf("snap '%s' -> obj %llu\n", snapname, dsobj); 1960 err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj, 1961 snapname, 8, 1, &dsobj, tx); 1962 ASSERT(err == 0); 1963 1964 if (ds->ds_prev) 1965 dsl_dataset_drop_ref(ds->ds_prev, ds); 1966 VERIFY(0 == dsl_dataset_get_ref(dp, 1967 ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev)); 1968 1969 dsl_pool_ds_snapshotted(ds, tx); 1970 1971 spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr, 1972 "dataset = %llu", dsobj); 1973 } 1974 1975 void 1976 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) 1977 { 1978 ASSERT(dmu_tx_is_syncing(tx)); 1979 ASSERT(ds->ds_objset != NULL); 1980 ASSERT(ds->ds_phys->ds_next_snap_obj == 0); 1981 1982 /* 1983 * in case we had to change ds_fsid_guid when we opened it, 1984 * sync it out now. 1985 */ 1986 dmu_buf_will_dirty(ds->ds_dbuf, tx); 1987 ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid; 1988 1989 dsl_dir_dirty(ds->ds_dir, tx); 1990 dmu_objset_sync(ds->ds_objset, zio, tx); 1991 } 1992 1993 void 1994 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) 1995 { 1996 uint64_t refd, avail, uobjs, aobjs; 1997 1998 dsl_dir_stats(ds->ds_dir, nv); 1999 2000 dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs); 2001 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail); 2002 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd); 2003 2004 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION, 2005 ds->ds_phys->ds_creation_time); 2006 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG, 2007 ds->ds_phys->ds_creation_txg); 2008 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA, 2009 ds->ds_quota); 2010 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION, 2011 ds->ds_reserved); 2012 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID, 2013 ds->ds_phys->ds_guid); 2014 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS, ds->ds_userrefs); 2015 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY, 2016 DS_IS_DEFER_DESTROY(ds) ? 1 : 0); 2017 2018 if (ds->ds_phys->ds_next_snap_obj) { 2019 /* 2020 * This is a snapshot; override the dd's space used with 2021 * our unique space and compression ratio. 2022 */ 2023 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED, 2024 ds->ds_phys->ds_unique_bytes); 2025 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, 2026 ds->ds_phys->ds_compressed_bytes == 0 ? 100 : 2027 (ds->ds_phys->ds_uncompressed_bytes * 100 / 2028 ds->ds_phys->ds_compressed_bytes)); 2029 } 2030 } 2031 2032 void 2033 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat) 2034 { 2035 stat->dds_creation_txg = ds->ds_phys->ds_creation_txg; 2036 stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT; 2037 stat->dds_guid = ds->ds_phys->ds_guid; 2038 if (ds->ds_phys->ds_next_snap_obj) { 2039 stat->dds_is_snapshot = B_TRUE; 2040 stat->dds_num_clones = ds->ds_phys->ds_num_children - 1; 2041 } else { 2042 stat->dds_is_snapshot = B_FALSE; 2043 stat->dds_num_clones = 0; 2044 } 2045 2046 /* clone origin is really a dsl_dir thing... */ 2047 rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER); 2048 if (dsl_dir_is_clone(ds->ds_dir)) { 2049 dsl_dataset_t *ods; 2050 2051 VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool, 2052 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods)); 2053 dsl_dataset_name(ods, stat->dds_origin); 2054 dsl_dataset_drop_ref(ods, FTAG); 2055 } else { 2056 stat->dds_origin[0] = '\0'; 2057 } 2058 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock); 2059 } 2060 2061 uint64_t 2062 dsl_dataset_fsid_guid(dsl_dataset_t *ds) 2063 { 2064 return (ds->ds_fsid_guid); 2065 } 2066 2067 void 2068 dsl_dataset_space(dsl_dataset_t *ds, 2069 uint64_t *refdbytesp, uint64_t *availbytesp, 2070 uint64_t *usedobjsp, uint64_t *availobjsp) 2071 { 2072 *refdbytesp = ds->ds_phys->ds_used_bytes; 2073 *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE); 2074 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) 2075 *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes; 2076 if (ds->ds_quota != 0) { 2077 /* 2078 * Adjust available bytes according to refquota 2079 */ 2080 if (*refdbytesp < ds->ds_quota) 2081 *availbytesp = MIN(*availbytesp, 2082 ds->ds_quota - *refdbytesp); 2083 else 2084 *availbytesp = 0; 2085 } 2086 *usedobjsp = ds->ds_phys->ds_bp.blk_fill; 2087 *availobjsp = DN_MAX_OBJECT - *usedobjsp; 2088 } 2089 2090 boolean_t 2091 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds) 2092 { 2093 dsl_pool_t *dp = ds->ds_dir->dd_pool; 2094 2095 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 2096 dsl_pool_sync_context(dp)); 2097 if (ds->ds_prev == NULL) 2098 return (B_FALSE); 2099 if (ds->ds_phys->ds_bp.blk_birth > 2100 ds->ds_prev->ds_phys->ds_creation_txg) 2101 return (B_TRUE); 2102 return (B_FALSE); 2103 } 2104 2105 /* ARGSUSED */ 2106 static int 2107 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 2108 { 2109 dsl_dataset_t *ds = arg1; 2110 char *newsnapname = arg2; 2111 dsl_dir_t *dd = ds->ds_dir; 2112 dsl_dataset_t *hds; 2113 uint64_t val; 2114 int err; 2115 2116 err = dsl_dataset_hold_obj(dd->dd_pool, 2117 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds); 2118 if (err) 2119 return (err); 2120 2121 /* new name better not be in use */ 2122 err = dsl_dataset_snap_lookup(hds, newsnapname, &val); 2123 dsl_dataset_rele(hds, FTAG); 2124 2125 if (err == 0) 2126 err = EEXIST; 2127 else if (err == ENOENT) 2128 err = 0; 2129 2130 /* dataset name + 1 for the "@" + the new snapshot name must fit */ 2131 if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN) 2132 err = ENAMETOOLONG; 2133 2134 return (err); 2135 } 2136 2137 static void 2138 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, 2139 cred_t *cr, dmu_tx_t *tx) 2140 { 2141 dsl_dataset_t *ds = arg1; 2142 const char *newsnapname = arg2; 2143 dsl_dir_t *dd = ds->ds_dir; 2144 objset_t *mos = dd->dd_pool->dp_meta_objset; 2145 dsl_dataset_t *hds; 2146 int err; 2147 2148 ASSERT(ds->ds_phys->ds_next_snap_obj != 0); 2149 2150 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, 2151 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds)); 2152 2153 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2154 err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx); 2155 ASSERT3U(err, ==, 0); 2156 mutex_enter(&ds->ds_lock); 2157 (void) strcpy(ds->ds_snapname, newsnapname); 2158 mutex_exit(&ds->ds_lock); 2159 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj, 2160 ds->ds_snapname, 8, 1, &ds->ds_object, tx); 2161 ASSERT3U(err, ==, 0); 2162 2163 spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx, 2164 cr, "dataset = %llu", ds->ds_object); 2165 dsl_dataset_rele(hds, FTAG); 2166 } 2167 2168 struct renamesnaparg { 2169 dsl_sync_task_group_t *dstg; 2170 char failed[MAXPATHLEN]; 2171 char *oldsnap; 2172 char *newsnap; 2173 }; 2174 2175 static int 2176 dsl_snapshot_rename_one(char *name, void *arg) 2177 { 2178 struct renamesnaparg *ra = arg; 2179 dsl_dataset_t *ds = NULL; 2180 char *cp; 2181 int err; 2182 2183 cp = name + strlen(name); 2184 *cp = '@'; 2185 (void) strcpy(cp + 1, ra->oldsnap); 2186 2187 /* 2188 * For recursive snapshot renames the parent won't be changing 2189 * so we just pass name for both the to/from argument. 2190 */ 2191 err = zfs_secpolicy_rename_perms(name, name, CRED()); 2192 if (err == ENOENT) { 2193 return (0); 2194 } else if (err) { 2195 (void) strcpy(ra->failed, name); 2196 return (err); 2197 } 2198 2199 #ifdef _KERNEL 2200 /* 2201 * For all filesystems undergoing rename, we'll need to unmount it. 2202 */ 2203 (void) zfs_unmount_snap(name, NULL); 2204 #endif 2205 err = dsl_dataset_hold(name, ra->dstg, &ds); 2206 *cp = '\0'; 2207 if (err == ENOENT) { 2208 return (0); 2209 } else if (err) { 2210 (void) strcpy(ra->failed, name); 2211 return (err); 2212 } 2213 2214 dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check, 2215 dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0); 2216 2217 return (0); 2218 } 2219 2220 static int 2221 dsl_recursive_rename(char *oldname, const char *newname) 2222 { 2223 int err; 2224 struct renamesnaparg *ra; 2225 dsl_sync_task_t *dst; 2226 spa_t *spa; 2227 char *cp, *fsname = spa_strdup(oldname); 2228 int len = strlen(oldname); 2229 2230 /* truncate the snapshot name to get the fsname */ 2231 cp = strchr(fsname, '@'); 2232 *cp = '\0'; 2233 2234 err = spa_open(fsname, &spa, FTAG); 2235 if (err) { 2236 kmem_free(fsname, len + 1); 2237 return (err); 2238 } 2239 ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP); 2240 ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 2241 2242 ra->oldsnap = strchr(oldname, '@') + 1; 2243 ra->newsnap = strchr(newname, '@') + 1; 2244 *ra->failed = '\0'; 2245 2246 err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra, 2247 DS_FIND_CHILDREN); 2248 kmem_free(fsname, len + 1); 2249 2250 if (err == 0) { 2251 err = dsl_sync_task_group_wait(ra->dstg); 2252 } 2253 2254 for (dst = list_head(&ra->dstg->dstg_tasks); dst; 2255 dst = list_next(&ra->dstg->dstg_tasks, dst)) { 2256 dsl_dataset_t *ds = dst->dst_arg1; 2257 if (dst->dst_err) { 2258 dsl_dir_name(ds->ds_dir, ra->failed); 2259 (void) strcat(ra->failed, "@"); 2260 (void) strcat(ra->failed, ra->newsnap); 2261 } 2262 dsl_dataset_rele(ds, ra->dstg); 2263 } 2264 2265 if (err) 2266 (void) strcpy(oldname, ra->failed); 2267 2268 dsl_sync_task_group_destroy(ra->dstg); 2269 kmem_free(ra, sizeof (struct renamesnaparg)); 2270 spa_close(spa, FTAG); 2271 return (err); 2272 } 2273 2274 static int 2275 dsl_valid_rename(char *oldname, void *arg) 2276 { 2277 int delta = *(int *)arg; 2278 2279 if (strlen(oldname) + delta >= MAXNAMELEN) 2280 return (ENAMETOOLONG); 2281 2282 return (0); 2283 } 2284 2285 #pragma weak dmu_objset_rename = dsl_dataset_rename 2286 int 2287 dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive) 2288 { 2289 dsl_dir_t *dd; 2290 dsl_dataset_t *ds; 2291 const char *tail; 2292 int err; 2293 2294 err = dsl_dir_open(oldname, FTAG, &dd, &tail); 2295 if (err) 2296 return (err); 2297 /* 2298 * If there are more than 2 references there may be holds 2299 * hanging around that haven't been cleared out yet. 2300 */ 2301 if (dmu_buf_refcount(dd->dd_dbuf) > 2) 2302 txg_wait_synced(dd->dd_pool, 0); 2303 if (tail == NULL) { 2304 int delta = strlen(newname) - strlen(oldname); 2305 2306 /* if we're growing, validate child name lengths */ 2307 if (delta > 0) 2308 err = dmu_objset_find(oldname, dsl_valid_rename, 2309 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS); 2310 2311 if (!err) 2312 err = dsl_dir_rename(dd, newname); 2313 dsl_dir_close(dd, FTAG); 2314 return (err); 2315 } 2316 if (tail[0] != '@') { 2317 /* the name ended in a nonexistant component */ 2318 dsl_dir_close(dd, FTAG); 2319 return (ENOENT); 2320 } 2321 2322 dsl_dir_close(dd, FTAG); 2323 2324 /* new name must be snapshot in same filesystem */ 2325 tail = strchr(newname, '@'); 2326 if (tail == NULL) 2327 return (EINVAL); 2328 tail++; 2329 if (strncmp(oldname, newname, tail - newname) != 0) 2330 return (EXDEV); 2331 2332 if (recursive) { 2333 err = dsl_recursive_rename(oldname, newname); 2334 } else { 2335 err = dsl_dataset_hold(oldname, FTAG, &ds); 2336 if (err) 2337 return (err); 2338 2339 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 2340 dsl_dataset_snapshot_rename_check, 2341 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1); 2342 2343 dsl_dataset_rele(ds, FTAG); 2344 } 2345 2346 return (err); 2347 } 2348 2349 struct promotenode { 2350 list_node_t link; 2351 dsl_dataset_t *ds; 2352 }; 2353 2354 struct promotearg { 2355 list_t shared_snaps, origin_snaps, clone_snaps; 2356 dsl_dataset_t *origin_origin, *origin_head; 2357 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap; 2358 }; 2359 2360 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep); 2361 2362 /* ARGSUSED */ 2363 static int 2364 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx) 2365 { 2366 dsl_dataset_t *hds = arg1; 2367 struct promotearg *pa = arg2; 2368 struct promotenode *snap = list_head(&pa->shared_snaps); 2369 dsl_dataset_t *origin_ds = snap->ds; 2370 int err; 2371 2372 /* Check that it is a real clone */ 2373 if (!dsl_dir_is_clone(hds->ds_dir)) 2374 return (EINVAL); 2375 2376 /* Since this is so expensive, don't do the preliminary check */ 2377 if (!dmu_tx_is_syncing(tx)) 2378 return (0); 2379 2380 if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) 2381 return (EXDEV); 2382 2383 /* compute origin's new unique space */ 2384 snap = list_tail(&pa->clone_snaps); 2385 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2386 err = bplist_space_birthrange(&snap->ds->ds_deadlist, 2387 origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, &pa->unique); 2388 if (err) 2389 return (err); 2390 2391 /* 2392 * Walk the snapshots that we are moving 2393 * 2394 * Compute space to transfer. Consider the incremental changes 2395 * to used for each snapshot: 2396 * (my used) = (prev's used) + (blocks born) - (blocks killed) 2397 * So each snapshot gave birth to: 2398 * (blocks born) = (my used) - (prev's used) + (blocks killed) 2399 * So a sequence would look like: 2400 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0) 2401 * Which simplifies to: 2402 * uN + kN + kN-1 + ... + k1 + k0 2403 * Note however, if we stop before we reach the ORIGIN we get: 2404 * uN + kN + kN-1 + ... + kM - uM-1 2405 */ 2406 pa->used = origin_ds->ds_phys->ds_used_bytes; 2407 pa->comp = origin_ds->ds_phys->ds_compressed_bytes; 2408 pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes; 2409 for (snap = list_head(&pa->shared_snaps); snap; 2410 snap = list_next(&pa->shared_snaps, snap)) { 2411 uint64_t val, dlused, dlcomp, dluncomp; 2412 dsl_dataset_t *ds = snap->ds; 2413 2414 /* Check that the snapshot name does not conflict */ 2415 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2416 err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val); 2417 if (err == 0) 2418 return (EEXIST); 2419 if (err != ENOENT) 2420 return (err); 2421 2422 /* The very first snapshot does not have a deadlist */ 2423 if (ds->ds_phys->ds_prev_snap_obj == 0) 2424 continue; 2425 2426 if (err = bplist_space(&ds->ds_deadlist, 2427 &dlused, &dlcomp, &dluncomp)) 2428 return (err); 2429 pa->used += dlused; 2430 pa->comp += dlcomp; 2431 pa->uncomp += dluncomp; 2432 } 2433 2434 /* 2435 * If we are a clone of a clone then we never reached ORIGIN, 2436 * so we need to subtract out the clone origin's used space. 2437 */ 2438 if (pa->origin_origin) { 2439 pa->used -= pa->origin_origin->ds_phys->ds_used_bytes; 2440 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; 2441 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; 2442 } 2443 2444 /* Check that there is enough space here */ 2445 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, 2446 pa->used); 2447 if (err) 2448 return (err); 2449 2450 /* 2451 * Compute the amounts of space that will be used by snapshots 2452 * after the promotion (for both origin and clone). For each, 2453 * it is the amount of space that will be on all of their 2454 * deadlists (that was not born before their new origin). 2455 */ 2456 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2457 uint64_t space; 2458 2459 /* 2460 * Note, typically this will not be a clone of a clone, 2461 * so snap->ds->ds_origin_txg will be < TXG_INITIAL, so 2462 * these snaplist_space() -> bplist_space_birthrange() 2463 * calls will be fast because they do not have to 2464 * iterate over all bps. 2465 */ 2466 snap = list_head(&pa->origin_snaps); 2467 err = snaplist_space(&pa->shared_snaps, 2468 snap->ds->ds_origin_txg, &pa->cloneusedsnap); 2469 if (err) 2470 return (err); 2471 2472 err = snaplist_space(&pa->clone_snaps, 2473 snap->ds->ds_origin_txg, &space); 2474 if (err) 2475 return (err); 2476 pa->cloneusedsnap += space; 2477 } 2478 if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) { 2479 err = snaplist_space(&pa->origin_snaps, 2480 origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap); 2481 if (err) 2482 return (err); 2483 } 2484 2485 return (0); 2486 } 2487 2488 static void 2489 dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2490 { 2491 dsl_dataset_t *hds = arg1; 2492 struct promotearg *pa = arg2; 2493 struct promotenode *snap = list_head(&pa->shared_snaps); 2494 dsl_dataset_t *origin_ds = snap->ds; 2495 dsl_dataset_t *origin_head; 2496 dsl_dir_t *dd = hds->ds_dir; 2497 dsl_pool_t *dp = hds->ds_dir->dd_pool; 2498 dsl_dir_t *odd = NULL; 2499 uint64_t oldnext_obj; 2500 int64_t delta; 2501 2502 ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)); 2503 2504 snap = list_head(&pa->origin_snaps); 2505 origin_head = snap->ds; 2506 2507 /* 2508 * We need to explicitly open odd, since origin_ds's dd will be 2509 * changing. 2510 */ 2511 VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object, 2512 NULL, FTAG, &odd)); 2513 2514 /* change origin's next snap */ 2515 dmu_buf_will_dirty(origin_ds->ds_dbuf, tx); 2516 oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj; 2517 snap = list_tail(&pa->clone_snaps); 2518 ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object); 2519 origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object; 2520 2521 /* change the origin's next clone */ 2522 if (origin_ds->ds_phys->ds_next_clones_obj) { 2523 VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset, 2524 origin_ds->ds_phys->ds_next_clones_obj, 2525 origin_ds->ds_phys->ds_next_snap_obj, tx)); 2526 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset, 2527 origin_ds->ds_phys->ds_next_clones_obj, 2528 oldnext_obj, tx)); 2529 } 2530 2531 /* change origin */ 2532 dmu_buf_will_dirty(dd->dd_dbuf, tx); 2533 ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object); 2534 dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj; 2535 hds->ds_origin_txg = origin_head->ds_origin_txg; 2536 dmu_buf_will_dirty(odd->dd_dbuf, tx); 2537 odd->dd_phys->dd_origin_obj = origin_ds->ds_object; 2538 origin_head->ds_origin_txg = origin_ds->ds_phys->ds_creation_txg; 2539 2540 /* move snapshots to this dir */ 2541 for (snap = list_head(&pa->shared_snaps); snap; 2542 snap = list_next(&pa->shared_snaps, snap)) { 2543 dsl_dataset_t *ds = snap->ds; 2544 2545 /* unregister props as dsl_dir is changing */ 2546 if (ds->ds_objset) { 2547 dmu_objset_evict(ds->ds_objset); 2548 ds->ds_objset = NULL; 2549 } 2550 /* move snap name entry */ 2551 VERIFY(0 == dsl_dataset_get_snapname(ds)); 2552 VERIFY(0 == dsl_dataset_snap_remove(origin_head, 2553 ds->ds_snapname, tx)); 2554 VERIFY(0 == zap_add(dp->dp_meta_objset, 2555 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 2556 8, 1, &ds->ds_object, tx)); 2557 /* change containing dsl_dir */ 2558 dmu_buf_will_dirty(ds->ds_dbuf, tx); 2559 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); 2560 ds->ds_phys->ds_dir_obj = dd->dd_object; 2561 ASSERT3P(ds->ds_dir, ==, odd); 2562 dsl_dir_close(ds->ds_dir, ds); 2563 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object, 2564 NULL, ds, &ds->ds_dir)); 2565 2566 ASSERT3U(dsl_prop_numcb(ds), ==, 0); 2567 } 2568 2569 /* 2570 * Change space accounting. 2571 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either 2572 * both be valid, or both be 0 (resulting in delta == 0). This 2573 * is true for each of {clone,origin} independently. 2574 */ 2575 2576 delta = pa->cloneusedsnap - 2577 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2578 ASSERT3S(delta, >=, 0); 2579 ASSERT3U(pa->used, >=, delta); 2580 dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx); 2581 dsl_dir_diduse_space(dd, DD_USED_HEAD, 2582 pa->used - delta, pa->comp, pa->uncomp, tx); 2583 2584 delta = pa->originusedsnap - 2585 odd->dd_phys->dd_used_breakdown[DD_USED_SNAP]; 2586 ASSERT3S(delta, <=, 0); 2587 ASSERT3U(pa->used, >=, -delta); 2588 dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx); 2589 dsl_dir_diduse_space(odd, DD_USED_HEAD, 2590 -pa->used - delta, -pa->comp, -pa->uncomp, tx); 2591 2592 origin_ds->ds_phys->ds_unique_bytes = pa->unique; 2593 2594 /* log history record */ 2595 spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx, 2596 cr, "dataset = %llu", hds->ds_object); 2597 2598 dsl_dir_close(odd, FTAG); 2599 } 2600 2601 static char *snaplist_tag = "snaplist"; 2602 /* 2603 * Make a list of dsl_dataset_t's for the snapshots between first_obj 2604 * (exclusive) and last_obj (inclusive). The list will be in reverse 2605 * order (last_obj will be the list_head()). If first_obj == 0, do all 2606 * snapshots back to this dataset's origin. 2607 */ 2608 static int 2609 snaplist_make(dsl_pool_t *dp, boolean_t own, 2610 uint64_t first_obj, uint64_t last_obj, list_t *l) 2611 { 2612 uint64_t obj = last_obj; 2613 2614 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock)); 2615 2616 list_create(l, sizeof (struct promotenode), 2617 offsetof(struct promotenode, link)); 2618 2619 while (obj != first_obj) { 2620 dsl_dataset_t *ds; 2621 struct promotenode *snap; 2622 int err; 2623 2624 if (own) { 2625 err = dsl_dataset_own_obj(dp, obj, 2626 0, snaplist_tag, &ds); 2627 if (err == 0) 2628 dsl_dataset_make_exclusive(ds, snaplist_tag); 2629 } else { 2630 err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds); 2631 } 2632 if (err == ENOENT) { 2633 /* lost race with snapshot destroy */ 2634 struct promotenode *last = list_tail(l); 2635 ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj); 2636 obj = last->ds->ds_phys->ds_prev_snap_obj; 2637 continue; 2638 } else if (err) { 2639 return (err); 2640 } 2641 2642 if (first_obj == 0) 2643 first_obj = ds->ds_dir->dd_phys->dd_origin_obj; 2644 2645 snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP); 2646 snap->ds = ds; 2647 list_insert_tail(l, snap); 2648 obj = ds->ds_phys->ds_prev_snap_obj; 2649 } 2650 2651 return (0); 2652 } 2653 2654 static int 2655 snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep) 2656 { 2657 struct promotenode *snap; 2658 2659 *spacep = 0; 2660 for (snap = list_head(l); snap; snap = list_next(l, snap)) { 2661 uint64_t used; 2662 int err = bplist_space_birthrange(&snap->ds->ds_deadlist, 2663 mintxg, UINT64_MAX, &used); 2664 if (err) 2665 return (err); 2666 *spacep += used; 2667 } 2668 return (0); 2669 } 2670 2671 static void 2672 snaplist_destroy(list_t *l, boolean_t own) 2673 { 2674 struct promotenode *snap; 2675 2676 if (!l || !list_link_active(&l->list_head)) 2677 return; 2678 2679 while ((snap = list_tail(l)) != NULL) { 2680 list_remove(l, snap); 2681 if (own) 2682 dsl_dataset_disown(snap->ds, snaplist_tag); 2683 else 2684 dsl_dataset_rele(snap->ds, snaplist_tag); 2685 kmem_free(snap, sizeof (struct promotenode)); 2686 } 2687 list_destroy(l); 2688 } 2689 2690 /* 2691 * Promote a clone. Nomenclature note: 2692 * "clone" or "cds": the original clone which is being promoted 2693 * "origin" or "ods": the snapshot which is originally clone's origin 2694 * "origin head" or "ohds": the dataset which is the head 2695 * (filesystem/volume) for the origin 2696 * "origin origin": the origin of the origin's filesystem (typically 2697 * NULL, indicating that the clone is not a clone of a clone). 2698 */ 2699 int 2700 dsl_dataset_promote(const char *name) 2701 { 2702 dsl_dataset_t *ds; 2703 dsl_dir_t *dd; 2704 dsl_pool_t *dp; 2705 dmu_object_info_t doi; 2706 struct promotearg pa = { 0 }; 2707 struct promotenode *snap; 2708 int err; 2709 2710 err = dsl_dataset_hold(name, FTAG, &ds); 2711 if (err) 2712 return (err); 2713 dd = ds->ds_dir; 2714 dp = dd->dd_pool; 2715 2716 err = dmu_object_info(dp->dp_meta_objset, 2717 ds->ds_phys->ds_snapnames_zapobj, &doi); 2718 if (err) { 2719 dsl_dataset_rele(ds, FTAG); 2720 return (err); 2721 } 2722 2723 if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) { 2724 dsl_dataset_rele(ds, FTAG); 2725 return (EINVAL); 2726 } 2727 2728 /* 2729 * We are going to inherit all the snapshots taken before our 2730 * origin (i.e., our new origin will be our parent's origin). 2731 * Take ownership of them so that we can rename them into our 2732 * namespace. 2733 */ 2734 rw_enter(&dp->dp_config_rwlock, RW_READER); 2735 2736 err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj, 2737 &pa.shared_snaps); 2738 if (err != 0) 2739 goto out; 2740 2741 err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps); 2742 if (err != 0) 2743 goto out; 2744 2745 snap = list_head(&pa.shared_snaps); 2746 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj); 2747 err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj, 2748 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps); 2749 if (err != 0) 2750 goto out; 2751 2752 if (dsl_dir_is_clone(snap->ds->ds_dir)) { 2753 err = dsl_dataset_own_obj(dp, 2754 snap->ds->ds_dir->dd_phys->dd_origin_obj, 2755 0, FTAG, &pa.origin_origin); 2756 if (err != 0) 2757 goto out; 2758 } 2759 2760 out: 2761 rw_exit(&dp->dp_config_rwlock); 2762 2763 /* 2764 * Add in 128x the snapnames zapobj size, since we will be moving 2765 * a bunch of snapnames to the promoted ds, and dirtying their 2766 * bonus buffers. 2767 */ 2768 if (err == 0) { 2769 err = dsl_sync_task_do(dp, dsl_dataset_promote_check, 2770 dsl_dataset_promote_sync, ds, &pa, 2771 2 + 2 * doi.doi_physical_blks); 2772 } 2773 2774 snaplist_destroy(&pa.shared_snaps, B_TRUE); 2775 snaplist_destroy(&pa.clone_snaps, B_FALSE); 2776 snaplist_destroy(&pa.origin_snaps, B_FALSE); 2777 if (pa.origin_origin) 2778 dsl_dataset_disown(pa.origin_origin, FTAG); 2779 dsl_dataset_rele(ds, FTAG); 2780 return (err); 2781 } 2782 2783 struct cloneswaparg { 2784 dsl_dataset_t *cds; /* clone dataset */ 2785 dsl_dataset_t *ohds; /* origin's head dataset */ 2786 boolean_t force; 2787 int64_t unused_refres_delta; /* change in unconsumed refreservation */ 2788 }; 2789 2790 /* ARGSUSED */ 2791 static int 2792 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx) 2793 { 2794 struct cloneswaparg *csa = arg1; 2795 2796 /* they should both be heads */ 2797 if (dsl_dataset_is_snapshot(csa->cds) || 2798 dsl_dataset_is_snapshot(csa->ohds)) 2799 return (EINVAL); 2800 2801 /* the branch point should be just before them */ 2802 if (csa->cds->ds_prev != csa->ohds->ds_prev) 2803 return (EINVAL); 2804 2805 /* cds should be the clone (unless they are unrelated) */ 2806 if (csa->cds->ds_prev != NULL && 2807 csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap && 2808 csa->ohds->ds_object != 2809 csa->cds->ds_prev->ds_phys->ds_next_snap_obj) 2810 return (EINVAL); 2811 2812 /* the clone should be a child of the origin */ 2813 if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir) 2814 return (EINVAL); 2815 2816 /* ohds shouldn't be modified unless 'force' */ 2817 if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds)) 2818 return (ETXTBSY); 2819 2820 /* adjust amount of any unconsumed refreservation */ 2821 csa->unused_refres_delta = 2822 (int64_t)MIN(csa->ohds->ds_reserved, 2823 csa->ohds->ds_phys->ds_unique_bytes) - 2824 (int64_t)MIN(csa->ohds->ds_reserved, 2825 csa->cds->ds_phys->ds_unique_bytes); 2826 2827 if (csa->unused_refres_delta > 0 && 2828 csa->unused_refres_delta > 2829 dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE)) 2830 return (ENOSPC); 2831 2832 return (0); 2833 } 2834 2835 /* ARGSUSED */ 2836 static void 2837 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 2838 { 2839 struct cloneswaparg *csa = arg1; 2840 dsl_pool_t *dp = csa->cds->ds_dir->dd_pool; 2841 2842 ASSERT(csa->cds->ds_reserved == 0); 2843 ASSERT(csa->cds->ds_quota == csa->ohds->ds_quota); 2844 2845 dmu_buf_will_dirty(csa->cds->ds_dbuf, tx); 2846 dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx); 2847 2848 if (csa->cds->ds_objset != NULL) { 2849 dmu_objset_evict(csa->cds->ds_objset); 2850 csa->cds->ds_objset = NULL; 2851 } 2852 2853 if (csa->ohds->ds_objset != NULL) { 2854 dmu_objset_evict(csa->ohds->ds_objset); 2855 csa->ohds->ds_objset = NULL; 2856 } 2857 2858 /* 2859 * Reset origin's unique bytes, if it exists. 2860 */ 2861 if (csa->cds->ds_prev) { 2862 dsl_dataset_t *origin = csa->cds->ds_prev; 2863 dmu_buf_will_dirty(origin->ds_dbuf, tx); 2864 VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, 2865 origin->ds_phys->ds_prev_snap_txg, UINT64_MAX, 2866 &origin->ds_phys->ds_unique_bytes)); 2867 } 2868 2869 /* swap blkptrs */ 2870 { 2871 blkptr_t tmp; 2872 tmp = csa->ohds->ds_phys->ds_bp; 2873 csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp; 2874 csa->cds->ds_phys->ds_bp = tmp; 2875 } 2876 2877 /* set dd_*_bytes */ 2878 { 2879 int64_t dused, dcomp, duncomp; 2880 uint64_t cdl_used, cdl_comp, cdl_uncomp; 2881 uint64_t odl_used, odl_comp, odl_uncomp; 2882 2883 ASSERT3U(csa->cds->ds_dir->dd_phys-> 2884 dd_used_breakdown[DD_USED_SNAP], ==, 0); 2885 2886 VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used, 2887 &cdl_comp, &cdl_uncomp)); 2888 VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used, 2889 &odl_comp, &odl_uncomp)); 2890 2891 dused = csa->cds->ds_phys->ds_used_bytes + cdl_used - 2892 (csa->ohds->ds_phys->ds_used_bytes + odl_used); 2893 dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp - 2894 (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp); 2895 duncomp = csa->cds->ds_phys->ds_uncompressed_bytes + 2896 cdl_uncomp - 2897 (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp); 2898 2899 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD, 2900 dused, dcomp, duncomp, tx); 2901 dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD, 2902 -dused, -dcomp, -duncomp, tx); 2903 2904 /* 2905 * The difference in the space used by snapshots is the 2906 * difference in snapshot space due to the head's 2907 * deadlist (since that's the only thing that's 2908 * changing that affects the snapused). 2909 */ 2910 VERIFY(0 == bplist_space_birthrange(&csa->cds->ds_deadlist, 2911 csa->ohds->ds_origin_txg, UINT64_MAX, &cdl_used)); 2912 VERIFY(0 == bplist_space_birthrange(&csa->ohds->ds_deadlist, 2913 csa->ohds->ds_origin_txg, UINT64_MAX, &odl_used)); 2914 dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used, 2915 DD_USED_HEAD, DD_USED_SNAP, tx); 2916 } 2917 2918 #define SWITCH64(x, y) \ 2919 { \ 2920 uint64_t __tmp = (x); \ 2921 (x) = (y); \ 2922 (y) = __tmp; \ 2923 } 2924 2925 /* swap ds_*_bytes */ 2926 SWITCH64(csa->ohds->ds_phys->ds_used_bytes, 2927 csa->cds->ds_phys->ds_used_bytes); 2928 SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes, 2929 csa->cds->ds_phys->ds_compressed_bytes); 2930 SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes, 2931 csa->cds->ds_phys->ds_uncompressed_bytes); 2932 SWITCH64(csa->ohds->ds_phys->ds_unique_bytes, 2933 csa->cds->ds_phys->ds_unique_bytes); 2934 2935 /* apply any parent delta for change in unconsumed refreservation */ 2936 dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV, 2937 csa->unused_refres_delta, 0, 0, tx); 2938 2939 /* swap deadlists */ 2940 bplist_close(&csa->cds->ds_deadlist); 2941 bplist_close(&csa->ohds->ds_deadlist); 2942 SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj, 2943 csa->cds->ds_phys->ds_deadlist_obj); 2944 VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset, 2945 csa->cds->ds_phys->ds_deadlist_obj)); 2946 VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset, 2947 csa->ohds->ds_phys->ds_deadlist_obj)); 2948 2949 dsl_pool_ds_clone_swapped(csa->ohds, csa->cds, tx); 2950 } 2951 2952 /* 2953 * Swap 'clone' with its origin head datasets. Used at the end of "zfs 2954 * recv" into an existing fs to swizzle the file system to the new 2955 * version, and by "zfs rollback". Can also be used to swap two 2956 * independent head datasets if neither has any snapshots. 2957 */ 2958 int 2959 dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head, 2960 boolean_t force) 2961 { 2962 struct cloneswaparg csa; 2963 int error; 2964 2965 ASSERT(clone->ds_owner); 2966 ASSERT(origin_head->ds_owner); 2967 retry: 2968 /* Need exclusive access for the swap */ 2969 rw_enter(&clone->ds_rwlock, RW_WRITER); 2970 if (!rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) { 2971 rw_exit(&clone->ds_rwlock); 2972 rw_enter(&origin_head->ds_rwlock, RW_WRITER); 2973 if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) { 2974 rw_exit(&origin_head->ds_rwlock); 2975 goto retry; 2976 } 2977 } 2978 csa.cds = clone; 2979 csa.ohds = origin_head; 2980 csa.force = force; 2981 error = dsl_sync_task_do(clone->ds_dir->dd_pool, 2982 dsl_dataset_clone_swap_check, 2983 dsl_dataset_clone_swap_sync, &csa, NULL, 9); 2984 return (error); 2985 } 2986 2987 /* 2988 * Given a pool name and a dataset object number in that pool, 2989 * return the name of that dataset. 2990 */ 2991 int 2992 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf) 2993 { 2994 spa_t *spa; 2995 dsl_pool_t *dp; 2996 dsl_dataset_t *ds; 2997 int error; 2998 2999 if ((error = spa_open(pname, &spa, FTAG)) != 0) 3000 return (error); 3001 dp = spa_get_dsl(spa); 3002 rw_enter(&dp->dp_config_rwlock, RW_READER); 3003 if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) { 3004 dsl_dataset_name(ds, buf); 3005 dsl_dataset_rele(ds, FTAG); 3006 } 3007 rw_exit(&dp->dp_config_rwlock); 3008 spa_close(spa, FTAG); 3009 3010 return (error); 3011 } 3012 3013 int 3014 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, 3015 uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv) 3016 { 3017 int error = 0; 3018 3019 ASSERT3S(asize, >, 0); 3020 3021 /* 3022 * *ref_rsrv is the portion of asize that will come from any 3023 * unconsumed refreservation space. 3024 */ 3025 *ref_rsrv = 0; 3026 3027 mutex_enter(&ds->ds_lock); 3028 /* 3029 * Make a space adjustment for reserved bytes. 3030 */ 3031 if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) { 3032 ASSERT3U(*used, >=, 3033 ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 3034 *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes); 3035 *ref_rsrv = 3036 asize - MIN(asize, parent_delta(ds, asize + inflight)); 3037 } 3038 3039 if (!check_quota || ds->ds_quota == 0) { 3040 mutex_exit(&ds->ds_lock); 3041 return (0); 3042 } 3043 /* 3044 * If they are requesting more space, and our current estimate 3045 * is over quota, they get to try again unless the actual 3046 * on-disk is over quota and there are no pending changes (which 3047 * may free up space for us). 3048 */ 3049 if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) { 3050 if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota) 3051 error = ERESTART; 3052 else 3053 error = EDQUOT; 3054 } 3055 mutex_exit(&ds->ds_lock); 3056 3057 return (error); 3058 } 3059 3060 /* ARGSUSED */ 3061 static int 3062 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) 3063 { 3064 dsl_dataset_t *ds = arg1; 3065 uint64_t *quotap = arg2; 3066 uint64_t new_quota = *quotap; 3067 3068 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA) 3069 return (ENOTSUP); 3070 3071 if (new_quota == 0) 3072 return (0); 3073 3074 if (new_quota < ds->ds_phys->ds_used_bytes || 3075 new_quota < ds->ds_reserved) 3076 return (ENOSPC); 3077 3078 return (0); 3079 } 3080 3081 /* ARGSUSED */ 3082 void 3083 dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 3084 { 3085 dsl_dataset_t *ds = arg1; 3086 uint64_t *quotap = arg2; 3087 uint64_t new_quota = *quotap; 3088 3089 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3090 3091 ds->ds_quota = new_quota; 3092 3093 dsl_dir_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx); 3094 3095 spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa, 3096 tx, cr, "%lld dataset = %llu ", 3097 (longlong_t)new_quota, ds->ds_object); 3098 } 3099 3100 int 3101 dsl_dataset_set_quota(const char *dsname, uint64_t quota) 3102 { 3103 dsl_dataset_t *ds; 3104 int err; 3105 3106 err = dsl_dataset_hold(dsname, FTAG, &ds); 3107 if (err) 3108 return (err); 3109 3110 if (quota != ds->ds_quota) { 3111 /* 3112 * If someone removes a file, then tries to set the quota, we 3113 * want to make sure the file freeing takes effect. 3114 */ 3115 txg_wait_open(ds->ds_dir->dd_pool, 0); 3116 3117 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3118 dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync, 3119 ds, "a, 0); 3120 } 3121 dsl_dataset_rele(ds, FTAG); 3122 return (err); 3123 } 3124 3125 static int 3126 dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) 3127 { 3128 dsl_dataset_t *ds = arg1; 3129 uint64_t *reservationp = arg2; 3130 uint64_t new_reservation = *reservationp; 3131 uint64_t unique; 3132 3133 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 3134 SPA_VERSION_REFRESERVATION) 3135 return (ENOTSUP); 3136 3137 if (dsl_dataset_is_snapshot(ds)) 3138 return (EINVAL); 3139 3140 /* 3141 * If we are doing the preliminary check in open context, the 3142 * space estimates may be inaccurate. 3143 */ 3144 if (!dmu_tx_is_syncing(tx)) 3145 return (0); 3146 3147 mutex_enter(&ds->ds_lock); 3148 unique = dsl_dataset_unique(ds); 3149 mutex_exit(&ds->ds_lock); 3150 3151 if (MAX(unique, new_reservation) > MAX(unique, ds->ds_reserved)) { 3152 uint64_t delta = MAX(unique, new_reservation) - 3153 MAX(unique, ds->ds_reserved); 3154 3155 if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE)) 3156 return (ENOSPC); 3157 if (ds->ds_quota > 0 && 3158 new_reservation > ds->ds_quota) 3159 return (ENOSPC); 3160 } 3161 3162 return (0); 3163 } 3164 3165 /* ARGSUSED */ 3166 static void 3167 dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr, 3168 dmu_tx_t *tx) 3169 { 3170 dsl_dataset_t *ds = arg1; 3171 uint64_t *reservationp = arg2; 3172 uint64_t new_reservation = *reservationp; 3173 uint64_t unique; 3174 int64_t delta; 3175 3176 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3177 3178 mutex_enter(&ds->ds_dir->dd_lock); 3179 mutex_enter(&ds->ds_lock); 3180 unique = dsl_dataset_unique(ds); 3181 delta = MAX(0, (int64_t)(new_reservation - unique)) - 3182 MAX(0, (int64_t)(ds->ds_reserved - unique)); 3183 ds->ds_reserved = new_reservation; 3184 mutex_exit(&ds->ds_lock); 3185 3186 dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx); 3187 mutex_exit(&ds->ds_dir->dd_lock); 3188 dsl_dir_prop_set_uint64_sync(ds->ds_dir, "refreservation", 3189 new_reservation, cr, tx); 3190 3191 spa_history_internal_log(LOG_DS_REFRESERV, 3192 ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu", 3193 (longlong_t)new_reservation, ds->ds_object); 3194 } 3195 3196 int 3197 dsl_dataset_set_reservation(const char *dsname, uint64_t reservation) 3198 { 3199 dsl_dataset_t *ds; 3200 int err; 3201 3202 err = dsl_dataset_hold(dsname, FTAG, &ds); 3203 if (err) 3204 return (err); 3205 3206 err = dsl_sync_task_do(ds->ds_dir->dd_pool, 3207 dsl_dataset_set_reservation_check, 3208 dsl_dataset_set_reservation_sync, ds, &reservation, 0); 3209 dsl_dataset_rele(ds, FTAG); 3210 return (err); 3211 } 3212 3213 static int 3214 dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx) 3215 { 3216 dsl_dataset_t *ds = arg1; 3217 char *htag = arg2; 3218 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3219 int error = 0; 3220 3221 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) 3222 return (ENOTSUP); 3223 3224 if (!dsl_dataset_is_snapshot(ds)) 3225 return (EINVAL); 3226 3227 if (strlen(htag) >= ZAP_MAXNAMELEN) 3228 return (ENAMETOOLONG); 3229 3230 /* tags must be unique */ 3231 mutex_enter(&ds->ds_lock); 3232 if (ds->ds_phys->ds_userrefs_obj) { 3233 error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag, 3234 8, 1, tx); 3235 if (error == 0) 3236 error = EEXIST; 3237 else if (error == ENOENT) 3238 error = 0; 3239 } 3240 mutex_exit(&ds->ds_lock); 3241 3242 return (error); 3243 } 3244 3245 static void 3246 dsl_dataset_user_hold_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx) 3247 { 3248 dsl_dataset_t *ds = arg1; 3249 char *htag = arg2; 3250 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3251 time_t now = gethrestime_sec(); 3252 uint64_t zapobj; 3253 3254 mutex_enter(&ds->ds_lock); 3255 if (ds->ds_phys->ds_userrefs_obj == 0) { 3256 /* 3257 * This is the first user hold for this dataset. Create 3258 * the userrefs zap object. 3259 */ 3260 dmu_buf_will_dirty(ds->ds_dbuf, tx); 3261 zapobj = ds->ds_phys->ds_userrefs_obj = 3262 zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx); 3263 } else { 3264 zapobj = ds->ds_phys->ds_userrefs_obj; 3265 } 3266 ds->ds_userrefs++; 3267 mutex_exit(&ds->ds_lock); 3268 3269 VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx)); 3270 3271 spa_history_internal_log(LOG_DS_USER_HOLD, 3272 ds->ds_dir->dd_pool->dp_spa, tx, cr, "<%s> dataset = %llu", 3273 htag, ds->ds_object); 3274 } 3275 3276 struct dsl_ds_holdarg { 3277 dsl_sync_task_group_t *dstg; 3278 char *htag; 3279 char *snapname; 3280 boolean_t recursive; 3281 boolean_t gotone; 3282 char failed[MAXPATHLEN]; 3283 }; 3284 3285 static int 3286 dsl_dataset_user_hold_one(char *dsname, void *arg) 3287 { 3288 struct dsl_ds_holdarg *ha = arg; 3289 dsl_dataset_t *ds; 3290 int error; 3291 char *name; 3292 3293 /* alloc a buffer to hold dsname@snapname plus terminating NULL */ 3294 name = kmem_asprintf("%s@%s", dsname, ha->snapname); 3295 error = dsl_dataset_hold(name, ha->dstg, &ds); 3296 strfree(name); 3297 if (error == 0) { 3298 ha->gotone = B_TRUE; 3299 dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check, 3300 dsl_dataset_user_hold_sync, ds, ha->htag, 0); 3301 } else if (error == ENOENT && ha->recursive) { 3302 error = 0; 3303 } else { 3304 (void) strcpy(ha->failed, dsname); 3305 } 3306 return (error); 3307 } 3308 3309 int 3310 dsl_dataset_user_hold(char *dsname, char *snapname, char *htag, 3311 boolean_t recursive) 3312 { 3313 struct dsl_ds_holdarg *ha; 3314 dsl_sync_task_t *dst; 3315 spa_t *spa; 3316 int error; 3317 3318 ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); 3319 3320 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 3321 3322 error = spa_open(dsname, &spa, FTAG); 3323 if (error) { 3324 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3325 return (error); 3326 } 3327 3328 ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 3329 ha->htag = htag; 3330 ha->snapname = snapname; 3331 ha->recursive = recursive; 3332 if (recursive) { 3333 error = dmu_objset_find(dsname, dsl_dataset_user_hold_one, 3334 ha, DS_FIND_CHILDREN); 3335 } else { 3336 error = dsl_dataset_user_hold_one(dsname, ha); 3337 } 3338 if (error == 0) 3339 error = dsl_sync_task_group_wait(ha->dstg); 3340 3341 for (dst = list_head(&ha->dstg->dstg_tasks); dst; 3342 dst = list_next(&ha->dstg->dstg_tasks, dst)) { 3343 dsl_dataset_t *ds = dst->dst_arg1; 3344 3345 if (dst->dst_err) { 3346 dsl_dataset_name(ds, ha->failed); 3347 *strchr(ha->failed, '@') = '\0'; 3348 } 3349 dsl_dataset_rele(ds, ha->dstg); 3350 } 3351 3352 if (error == 0 && recursive && !ha->gotone) 3353 error = ENOENT; 3354 3355 if (error) 3356 (void) strcpy(dsname, ha->failed); 3357 3358 dsl_sync_task_group_destroy(ha->dstg); 3359 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3360 spa_close(spa, FTAG); 3361 return (error); 3362 } 3363 3364 struct dsl_ds_releasearg { 3365 dsl_dataset_t *ds; 3366 const char *htag; 3367 boolean_t own; /* do we own or just hold ds? */ 3368 }; 3369 3370 static int 3371 dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag, 3372 boolean_t *might_destroy) 3373 { 3374 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3375 uint64_t zapobj; 3376 uint64_t tmp; 3377 int error; 3378 3379 *might_destroy = B_FALSE; 3380 3381 mutex_enter(&ds->ds_lock); 3382 zapobj = ds->ds_phys->ds_userrefs_obj; 3383 if (zapobj == 0) { 3384 /* The tag can't possibly exist */ 3385 mutex_exit(&ds->ds_lock); 3386 return (ESRCH); 3387 } 3388 3389 /* Make sure the tag exists */ 3390 error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp); 3391 if (error) { 3392 mutex_exit(&ds->ds_lock); 3393 if (error == ENOENT) 3394 error = ESRCH; 3395 return (error); 3396 } 3397 3398 if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 && 3399 DS_IS_DEFER_DESTROY(ds)) 3400 *might_destroy = B_TRUE; 3401 3402 mutex_exit(&ds->ds_lock); 3403 return (0); 3404 } 3405 3406 static int 3407 dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx) 3408 { 3409 struct dsl_ds_releasearg *ra = arg1; 3410 dsl_dataset_t *ds = ra->ds; 3411 boolean_t might_destroy; 3412 int error; 3413 3414 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS) 3415 return (ENOTSUP); 3416 3417 error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy); 3418 if (error) 3419 return (error); 3420 3421 if (might_destroy) { 3422 struct dsl_ds_destroyarg dsda = {0}; 3423 3424 if (dmu_tx_is_syncing(tx)) { 3425 /* 3426 * If we're not prepared to remove the snapshot, 3427 * we can't allow the release to happen right now. 3428 */ 3429 if (!ra->own) 3430 return (EBUSY); 3431 if (ds->ds_objset) { 3432 dmu_objset_evict(ds->ds_objset); 3433 ds->ds_objset = NULL; 3434 } 3435 } 3436 dsda.ds = ds; 3437 dsda.releasing = B_TRUE; 3438 return (dsl_dataset_destroy_check(&dsda, tag, tx)); 3439 } 3440 3441 return (0); 3442 } 3443 3444 static void 3445 dsl_dataset_user_release_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx) 3446 { 3447 struct dsl_ds_releasearg *ra = arg1; 3448 dsl_dataset_t *ds = ra->ds; 3449 spa_t *spa = ds->ds_dir->dd_pool->dp_spa; 3450 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 3451 uint64_t zapobj; 3452 uint64_t dsobj = ds->ds_object; 3453 uint64_t refs; 3454 3455 mutex_enter(&ds->ds_lock); 3456 ds->ds_userrefs--; 3457 refs = ds->ds_userrefs; 3458 mutex_exit(&ds->ds_lock); 3459 zapobj = ds->ds_phys->ds_userrefs_obj; 3460 VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx)); 3461 if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 && 3462 DS_IS_DEFER_DESTROY(ds)) { 3463 struct dsl_ds_destroyarg dsda = {0}; 3464 3465 ASSERT(ra->own); 3466 dsda.ds = ds; 3467 dsda.releasing = B_TRUE; 3468 /* We already did the destroy_check */ 3469 dsl_dataset_destroy_sync(&dsda, tag, cr, tx); 3470 } 3471 3472 spa_history_internal_log(LOG_DS_USER_RELEASE, 3473 spa, tx, cr, "<%s> %lld dataset = %llu", 3474 ra->htag, (longlong_t)refs, dsobj); 3475 } 3476 3477 static int 3478 dsl_dataset_user_release_one(char *dsname, void *arg) 3479 { 3480 struct dsl_ds_holdarg *ha = arg; 3481 struct dsl_ds_releasearg *ra; 3482 dsl_dataset_t *ds; 3483 int error; 3484 void *dtag = ha->dstg; 3485 char *name; 3486 boolean_t own = B_FALSE; 3487 boolean_t might_destroy; 3488 3489 if (strlen(ha->htag) >= ZAP_MAXNAMELEN) 3490 return (ENAMETOOLONG); 3491 3492 /* alloc a buffer to hold dsname@snapname, plus the terminating NULL */ 3493 name = kmem_asprintf("%s@%s", dsname, ha->snapname); 3494 error = dsl_dataset_hold(name, dtag, &ds); 3495 strfree(name); 3496 if (error == ENOENT && ha->recursive) 3497 return (0); 3498 (void) strcpy(ha->failed, dsname); 3499 if (error) 3500 return (error); 3501 3502 ha->gotone = B_TRUE; 3503 3504 ASSERT(dsl_dataset_is_snapshot(ds)); 3505 3506 error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy); 3507 if (error) { 3508 dsl_dataset_rele(ds, dtag); 3509 return (error); 3510 } 3511 3512 if (might_destroy) { 3513 #ifdef _KERNEL 3514 error = zfs_unmount_snap(name, NULL); 3515 if (error) { 3516 dsl_dataset_rele(ds, dtag); 3517 return (error); 3518 } 3519 error = dsl_dataset_zvol_cleanup(ds, name); 3520 if (error) { 3521 dsl_dataset_rele(ds, dtag); 3522 return (error); 3523 } 3524 #endif 3525 if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) { 3526 dsl_dataset_rele(ds, dtag); 3527 return (EBUSY); 3528 } else { 3529 own = B_TRUE; 3530 dsl_dataset_make_exclusive(ds, dtag); 3531 } 3532 } 3533 3534 ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP); 3535 ra->ds = ds; 3536 ra->htag = ha->htag; 3537 ra->own = own; 3538 dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check, 3539 dsl_dataset_user_release_sync, ra, dtag, 0); 3540 3541 return (0); 3542 } 3543 3544 int 3545 dsl_dataset_user_release(char *dsname, char *snapname, char *htag, 3546 boolean_t recursive) 3547 { 3548 struct dsl_ds_holdarg *ha; 3549 dsl_sync_task_t *dst; 3550 spa_t *spa; 3551 int error; 3552 3553 ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP); 3554 3555 (void) strlcpy(ha->failed, dsname, sizeof (ha->failed)); 3556 3557 error = spa_open(dsname, &spa, FTAG); 3558 if (error) { 3559 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3560 return (error); 3561 } 3562 3563 ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); 3564 ha->htag = htag; 3565 ha->snapname = snapname; 3566 ha->recursive = recursive; 3567 if (recursive) { 3568 error = dmu_objset_find(dsname, dsl_dataset_user_release_one, 3569 ha, DS_FIND_CHILDREN); 3570 } else { 3571 error = dsl_dataset_user_release_one(dsname, ha); 3572 } 3573 if (error == 0) 3574 error = dsl_sync_task_group_wait(ha->dstg); 3575 3576 for (dst = list_head(&ha->dstg->dstg_tasks); dst; 3577 dst = list_next(&ha->dstg->dstg_tasks, dst)) { 3578 struct dsl_ds_releasearg *ra = dst->dst_arg1; 3579 dsl_dataset_t *ds = ra->ds; 3580 3581 if (dst->dst_err) 3582 dsl_dataset_name(ds, ha->failed); 3583 3584 if (ra->own) 3585 dsl_dataset_disown(ds, ha->dstg); 3586 else 3587 dsl_dataset_rele(ds, ha->dstg); 3588 3589 kmem_free(ra, sizeof (struct dsl_ds_releasearg)); 3590 } 3591 3592 if (error == 0 && recursive && !ha->gotone) 3593 error = ENOENT; 3594 3595 if (error) 3596 (void) strcpy(dsname, ha->failed); 3597 3598 dsl_sync_task_group_destroy(ha->dstg); 3599 kmem_free(ha, sizeof (struct dsl_ds_holdarg)); 3600 spa_close(spa, FTAG); 3601 return (error); 3602 } 3603 3604 int 3605 dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp) 3606 { 3607 dsl_dataset_t *ds; 3608 int err; 3609 3610 err = dsl_dataset_hold(dsname, FTAG, &ds); 3611 if (err) 3612 return (err); 3613 3614 VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP)); 3615 if (ds->ds_phys->ds_userrefs_obj != 0) { 3616 zap_attribute_t *za; 3617 zap_cursor_t zc; 3618 3619 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 3620 for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset, 3621 ds->ds_phys->ds_userrefs_obj); 3622 zap_cursor_retrieve(&zc, za) == 0; 3623 zap_cursor_advance(&zc)) { 3624 VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name, 3625 za->za_first_integer)); 3626 } 3627 zap_cursor_fini(&zc); 3628 kmem_free(za, sizeof (zap_attribute_t)); 3629 } 3630 dsl_dataset_rele(ds, FTAG); 3631 return (0); 3632 } 3633 3634 /* 3635 * Note, this fuction is used as the callback for dmu_objset_find(). We 3636 * always return 0 so that we will continue to find and process 3637 * inconsistent datasets, even if we encounter an error trying to 3638 * process one of them. 3639 */ 3640 /* ARGSUSED */ 3641 int 3642 dsl_destroy_inconsistent(char *dsname, void *arg) 3643 { 3644 dsl_dataset_t *ds; 3645 3646 if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) { 3647 if (DS_IS_INCONSISTENT(ds)) 3648 (void) dsl_dataset_destroy(ds, FTAG, B_FALSE); 3649 else 3650 dsl_dataset_disown(ds, FTAG); 3651 } 3652 return (0); 3653 } 3654