1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2013 by Delphix. All rights reserved. 24 * Copyright (c) 2013 Steven Hartland. All rights reserved. 25 */ 26 27 #include <sys/zfs_context.h> 28 #include <sys/dsl_userhold.h> 29 #include <sys/dsl_dataset.h> 30 #include <sys/dsl_synctask.h> 31 #include <sys/dmu_tx.h> 32 #include <sys/dsl_pool.h> 33 #include <sys/dsl_dir.h> 34 #include <sys/dmu_traverse.h> 35 #include <sys/dsl_scan.h> 36 #include <sys/dmu_objset.h> 37 #include <sys/zap.h> 38 #include <sys/zfeature.h> 39 #include <sys/zfs_ioctl.h> 40 #include <sys/dsl_deleg.h> 41 42 typedef struct dmu_snapshots_destroy_arg { 43 nvlist_t *dsda_snaps; 44 nvlist_t *dsda_successful_snaps; 45 boolean_t dsda_defer; 46 nvlist_t *dsda_errlist; 47 } dmu_snapshots_destroy_arg_t; 48 49 int 50 dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer) 51 { 52 if (!dsl_dataset_is_snapshot(ds)) 53 return (SET_ERROR(EINVAL)); 54 55 if (dsl_dataset_long_held(ds)) 56 return (SET_ERROR(EBUSY)); 57 58 /* 59 * Only allow deferred destroy on pools that support it. 60 * NOTE: deferred destroy is only supported on snapshots. 61 */ 62 if (defer) { 63 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 64 SPA_VERSION_USERREFS) 65 return (SET_ERROR(ENOTSUP)); 66 return (0); 67 } 68 69 /* 70 * If this snapshot has an elevated user reference count, 71 * we can't destroy it yet. 72 */ 73 if (ds->ds_userrefs > 0) 74 return (SET_ERROR(EBUSY)); 75 76 /* 77 * Can't delete a branch point. 78 */ 79 if (ds->ds_phys->ds_num_children > 1) 80 return (SET_ERROR(EEXIST)); 81 82 return (0); 83 } 84 85 static int 86 dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx) 87 { 88 dmu_snapshots_destroy_arg_t *dsda = arg; 89 dsl_pool_t *dp = dmu_tx_pool(tx); 90 nvpair_t *pair; 91 int error = 0; 92 93 if (!dmu_tx_is_syncing(tx)) 94 return (0); 95 96 for (pair = nvlist_next_nvpair(dsda->dsda_snaps, NULL); 97 pair != NULL; pair = nvlist_next_nvpair(dsda->dsda_snaps, pair)) { 98 dsl_dataset_t *ds; 99 100 error = dsl_dataset_hold(dp, nvpair_name(pair), 101 FTAG, &ds); 102 103 /* 104 * If the snapshot does not exist, silently ignore it 105 * (it's "already destroyed"). 106 */ 107 if (error == ENOENT) 108 continue; 109 110 if (error == 0) { 111 error = dsl_destroy_snapshot_check_impl(ds, 112 dsda->dsda_defer); 113 dsl_dataset_rele(ds, FTAG); 114 } 115 116 if (error == 0) { 117 fnvlist_add_boolean(dsda->dsda_successful_snaps, 118 nvpair_name(pair)); 119 } else { 120 fnvlist_add_int32(dsda->dsda_errlist, 121 nvpair_name(pair), error); 122 } 123 } 124 125 pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL); 126 if (pair != NULL) 127 return (fnvpair_value_int32(pair)); 128 129 return (0); 130 } 131 132 struct process_old_arg { 133 dsl_dataset_t *ds; 134 dsl_dataset_t *ds_prev; 135 boolean_t after_branch_point; 136 zio_t *pio; 137 uint64_t used, comp, uncomp; 138 }; 139 140 static int 141 process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 142 { 143 struct process_old_arg *poa = arg; 144 dsl_pool_t *dp = poa->ds->ds_dir->dd_pool; 145 146 if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) { 147 dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx); 148 if (poa->ds_prev && !poa->after_branch_point && 149 bp->blk_birth > 150 poa->ds_prev->ds_phys->ds_prev_snap_txg) { 151 poa->ds_prev->ds_phys->ds_unique_bytes += 152 bp_get_dsize_sync(dp->dp_spa, bp); 153 } 154 } else { 155 poa->used += bp_get_dsize_sync(dp->dp_spa, bp); 156 poa->comp += BP_GET_PSIZE(bp); 157 poa->uncomp += BP_GET_UCSIZE(bp); 158 dsl_free_sync(poa->pio, dp, tx->tx_txg, bp); 159 } 160 return (0); 161 } 162 163 static void 164 process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev, 165 dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx) 166 { 167 struct process_old_arg poa = { 0 }; 168 dsl_pool_t *dp = ds->ds_dir->dd_pool; 169 objset_t *mos = dp->dp_meta_objset; 170 uint64_t deadlist_obj; 171 172 ASSERT(ds->ds_deadlist.dl_oldfmt); 173 ASSERT(ds_next->ds_deadlist.dl_oldfmt); 174 175 poa.ds = ds; 176 poa.ds_prev = ds_prev; 177 poa.after_branch_point = after_branch_point; 178 poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 179 VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj, 180 process_old_cb, &poa, tx)); 181 VERIFY0(zio_wait(poa.pio)); 182 ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes); 183 184 /* change snapused */ 185 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 186 -poa.used, -poa.comp, -poa.uncomp, tx); 187 188 /* swap next's deadlist to our deadlist */ 189 dsl_deadlist_close(&ds->ds_deadlist); 190 dsl_deadlist_close(&ds_next->ds_deadlist); 191 deadlist_obj = ds->ds_phys->ds_deadlist_obj; 192 ds->ds_phys->ds_deadlist_obj = ds_next->ds_phys->ds_deadlist_obj; 193 ds_next->ds_phys->ds_deadlist_obj = deadlist_obj; 194 dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj); 195 dsl_deadlist_open(&ds_next->ds_deadlist, mos, 196 ds_next->ds_phys->ds_deadlist_obj); 197 } 198 199 static void 200 dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx) 201 { 202 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 203 zap_cursor_t zc; 204 zap_attribute_t za; 205 206 /* 207 * If it is the old version, dd_clones doesn't exist so we can't 208 * find the clones, but dsl_deadlist_remove_key() is a no-op so it 209 * doesn't matter. 210 */ 211 if (ds->ds_dir->dd_phys->dd_clones == 0) 212 return; 213 214 for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones); 215 zap_cursor_retrieve(&zc, &za) == 0; 216 zap_cursor_advance(&zc)) { 217 dsl_dataset_t *clone; 218 219 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 220 za.za_first_integer, FTAG, &clone)); 221 if (clone->ds_dir->dd_origin_txg > mintxg) { 222 dsl_deadlist_remove_key(&clone->ds_deadlist, 223 mintxg, tx); 224 dsl_dataset_remove_clones_key(clone, mintxg, tx); 225 } 226 dsl_dataset_rele(clone, FTAG); 227 } 228 zap_cursor_fini(&zc); 229 } 230 231 void 232 dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx) 233 { 234 int err; 235 int after_branch_point = FALSE; 236 dsl_pool_t *dp = ds->ds_dir->dd_pool; 237 objset_t *mos = dp->dp_meta_objset; 238 dsl_dataset_t *ds_prev = NULL; 239 uint64_t obj; 240 241 ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); 242 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 243 ASSERT(refcount_is_zero(&ds->ds_longholds)); 244 245 if (defer && 246 (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)) { 247 ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); 248 dmu_buf_will_dirty(ds->ds_dbuf, tx); 249 ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY; 250 spa_history_log_internal_ds(ds, "defer_destroy", tx, ""); 251 return; 252 } 253 254 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 255 256 /* We need to log before removing it from the namespace. */ 257 spa_history_log_internal_ds(ds, "destroy", tx, ""); 258 259 dsl_scan_ds_destroyed(ds, tx); 260 261 obj = ds->ds_object; 262 263 if (ds->ds_phys->ds_prev_snap_obj != 0) { 264 ASSERT3P(ds->ds_prev, ==, NULL); 265 VERIFY0(dsl_dataset_hold_obj(dp, 266 ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev)); 267 after_branch_point = 268 (ds_prev->ds_phys->ds_next_snap_obj != obj); 269 270 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 271 if (after_branch_point && 272 ds_prev->ds_phys->ds_next_clones_obj != 0) { 273 dsl_dataset_remove_from_next_clones(ds_prev, obj, tx); 274 if (ds->ds_phys->ds_next_snap_obj != 0) { 275 VERIFY0(zap_add_int(mos, 276 ds_prev->ds_phys->ds_next_clones_obj, 277 ds->ds_phys->ds_next_snap_obj, tx)); 278 } 279 } 280 if (!after_branch_point) { 281 ds_prev->ds_phys->ds_next_snap_obj = 282 ds->ds_phys->ds_next_snap_obj; 283 } 284 } 285 286 dsl_dataset_t *ds_next; 287 uint64_t old_unique; 288 uint64_t used = 0, comp = 0, uncomp = 0; 289 290 VERIFY0(dsl_dataset_hold_obj(dp, 291 ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next)); 292 ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj); 293 294 old_unique = ds_next->ds_phys->ds_unique_bytes; 295 296 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 297 ds_next->ds_phys->ds_prev_snap_obj = 298 ds->ds_phys->ds_prev_snap_obj; 299 ds_next->ds_phys->ds_prev_snap_txg = 300 ds->ds_phys->ds_prev_snap_txg; 301 ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 302 ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0); 303 304 if (ds_next->ds_deadlist.dl_oldfmt) { 305 process_old_deadlist(ds, ds_prev, ds_next, 306 after_branch_point, tx); 307 } else { 308 /* Adjust prev's unique space. */ 309 if (ds_prev && !after_branch_point) { 310 dsl_deadlist_space_range(&ds_next->ds_deadlist, 311 ds_prev->ds_phys->ds_prev_snap_txg, 312 ds->ds_phys->ds_prev_snap_txg, 313 &used, &comp, &uncomp); 314 ds_prev->ds_phys->ds_unique_bytes += used; 315 } 316 317 /* Adjust snapused. */ 318 dsl_deadlist_space_range(&ds_next->ds_deadlist, 319 ds->ds_phys->ds_prev_snap_txg, UINT64_MAX, 320 &used, &comp, &uncomp); 321 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 322 -used, -comp, -uncomp, tx); 323 324 /* Move blocks to be freed to pool's free list. */ 325 dsl_deadlist_move_bpobj(&ds_next->ds_deadlist, 326 &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg, 327 tx); 328 dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, 329 DD_USED_HEAD, used, comp, uncomp, tx); 330 331 /* Merge our deadlist into next's and free it. */ 332 dsl_deadlist_merge(&ds_next->ds_deadlist, 333 ds->ds_phys->ds_deadlist_obj, tx); 334 } 335 dsl_deadlist_close(&ds->ds_deadlist); 336 dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); 337 dmu_buf_will_dirty(ds->ds_dbuf, tx); 338 ds->ds_phys->ds_deadlist_obj = 0; 339 340 /* Collapse range in clone heads */ 341 dsl_dataset_remove_clones_key(ds, 342 ds->ds_phys->ds_creation_txg, tx); 343 344 if (dsl_dataset_is_snapshot(ds_next)) { 345 dsl_dataset_t *ds_nextnext; 346 347 /* 348 * Update next's unique to include blocks which 349 * were previously shared by only this snapshot 350 * and it. Those blocks will be born after the 351 * prev snap and before this snap, and will have 352 * died after the next snap and before the one 353 * after that (ie. be on the snap after next's 354 * deadlist). 355 */ 356 VERIFY0(dsl_dataset_hold_obj(dp, 357 ds_next->ds_phys->ds_next_snap_obj, FTAG, &ds_nextnext)); 358 dsl_deadlist_space_range(&ds_nextnext->ds_deadlist, 359 ds->ds_phys->ds_prev_snap_txg, 360 ds->ds_phys->ds_creation_txg, 361 &used, &comp, &uncomp); 362 ds_next->ds_phys->ds_unique_bytes += used; 363 dsl_dataset_rele(ds_nextnext, FTAG); 364 ASSERT3P(ds_next->ds_prev, ==, NULL); 365 366 /* Collapse range in this head. */ 367 dsl_dataset_t *hds; 368 VERIFY0(dsl_dataset_hold_obj(dp, 369 ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &hds)); 370 dsl_deadlist_remove_key(&hds->ds_deadlist, 371 ds->ds_phys->ds_creation_txg, tx); 372 dsl_dataset_rele(hds, FTAG); 373 374 } else { 375 ASSERT3P(ds_next->ds_prev, ==, ds); 376 dsl_dataset_rele(ds_next->ds_prev, ds_next); 377 ds_next->ds_prev = NULL; 378 if (ds_prev) { 379 VERIFY0(dsl_dataset_hold_obj(dp, 380 ds->ds_phys->ds_prev_snap_obj, 381 ds_next, &ds_next->ds_prev)); 382 } 383 384 dsl_dataset_recalc_head_uniq(ds_next); 385 386 /* 387 * Reduce the amount of our unconsumed refreservation 388 * being charged to our parent by the amount of 389 * new unique data we have gained. 390 */ 391 if (old_unique < ds_next->ds_reserved) { 392 int64_t mrsdelta; 393 uint64_t new_unique = 394 ds_next->ds_phys->ds_unique_bytes; 395 396 ASSERT(old_unique <= new_unique); 397 mrsdelta = MIN(new_unique - old_unique, 398 ds_next->ds_reserved - old_unique); 399 dsl_dir_diduse_space(ds->ds_dir, 400 DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); 401 } 402 } 403 dsl_dataset_rele(ds_next, FTAG); 404 405 /* 406 * This must be done after the dsl_traverse(), because it will 407 * re-open the objset. 408 */ 409 if (ds->ds_objset) { 410 dmu_objset_evict(ds->ds_objset); 411 ds->ds_objset = NULL; 412 } 413 414 /* remove from snapshot namespace */ 415 dsl_dataset_t *ds_head; 416 ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0); 417 VERIFY0(dsl_dataset_hold_obj(dp, 418 ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head)); 419 VERIFY0(dsl_dataset_get_snapname(ds)); 420 #ifdef ZFS_DEBUG 421 { 422 uint64_t val; 423 424 err = dsl_dataset_snap_lookup(ds_head, 425 ds->ds_snapname, &val); 426 ASSERT0(err); 427 ASSERT3U(val, ==, obj); 428 } 429 #endif 430 VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx)); 431 dsl_dataset_rele(ds_head, FTAG); 432 433 if (ds_prev != NULL) 434 dsl_dataset_rele(ds_prev, FTAG); 435 436 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 437 438 if (ds->ds_phys->ds_next_clones_obj != 0) { 439 uint64_t count; 440 ASSERT0(zap_count(mos, 441 ds->ds_phys->ds_next_clones_obj, &count) && count == 0); 442 VERIFY0(dmu_object_free(mos, 443 ds->ds_phys->ds_next_clones_obj, tx)); 444 } 445 if (ds->ds_phys->ds_props_obj != 0) 446 VERIFY0(zap_destroy(mos, ds->ds_phys->ds_props_obj, tx)); 447 if (ds->ds_phys->ds_userrefs_obj != 0) 448 VERIFY0(zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx)); 449 dsl_dir_rele(ds->ds_dir, ds); 450 ds->ds_dir = NULL; 451 VERIFY0(dmu_object_free(mos, obj, tx)); 452 } 453 454 static void 455 dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx) 456 { 457 dmu_snapshots_destroy_arg_t *dsda = arg; 458 dsl_pool_t *dp = dmu_tx_pool(tx); 459 nvpair_t *pair; 460 461 for (pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, NULL); 462 pair != NULL; 463 pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, pair)) { 464 dsl_dataset_t *ds; 465 466 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds)); 467 468 dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx); 469 dsl_dataset_rele(ds, FTAG); 470 } 471 } 472 473 /* 474 * The semantics of this function are described in the comment above 475 * lzc_destroy_snaps(). To summarize: 476 * 477 * The snapshots must all be in the same pool. 478 * 479 * Snapshots that don't exist will be silently ignored (considered to be 480 * "already deleted"). 481 * 482 * On success, all snaps will be destroyed and this will return 0. 483 * On failure, no snaps will be destroyed, the errlist will be filled in, 484 * and this will return an errno. 485 */ 486 int 487 dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer, 488 nvlist_t *errlist) 489 { 490 dmu_snapshots_destroy_arg_t dsda; 491 int error; 492 nvpair_t *pair; 493 494 pair = nvlist_next_nvpair(snaps, NULL); 495 if (pair == NULL) 496 return (0); 497 498 dsda.dsda_snaps = snaps; 499 dsda.dsda_successful_snaps = fnvlist_alloc(); 500 dsda.dsda_defer = defer; 501 dsda.dsda_errlist = errlist; 502 503 error = dsl_sync_task(nvpair_name(pair), 504 dsl_destroy_snapshot_check, dsl_destroy_snapshot_sync, 505 &dsda, 0); 506 fnvlist_free(dsda.dsda_successful_snaps); 507 508 return (error); 509 } 510 511 int 512 dsl_destroy_snapshot(const char *name, boolean_t defer) 513 { 514 int error; 515 nvlist_t *nvl = fnvlist_alloc(); 516 nvlist_t *errlist = fnvlist_alloc(); 517 518 fnvlist_add_boolean(nvl, name); 519 error = dsl_destroy_snapshots_nvl(nvl, defer, errlist); 520 fnvlist_free(errlist); 521 fnvlist_free(nvl); 522 return (error); 523 } 524 525 struct killarg { 526 dsl_dataset_t *ds; 527 dmu_tx_t *tx; 528 }; 529 530 /* ARGSUSED */ 531 static int 532 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 533 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 534 { 535 struct killarg *ka = arg; 536 dmu_tx_t *tx = ka->tx; 537 538 if (bp == NULL) 539 return (0); 540 541 if (zb->zb_level == ZB_ZIL_LEVEL) { 542 ASSERT(zilog != NULL); 543 /* 544 * It's a block in the intent log. It has no 545 * accounting, so just free it. 546 */ 547 dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); 548 } else { 549 ASSERT(zilog == NULL); 550 ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg); 551 (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); 552 } 553 554 return (0); 555 } 556 557 static void 558 old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx) 559 { 560 struct killarg ka; 561 562 /* 563 * Free everything that we point to (that's born after 564 * the previous snapshot, if we are a clone) 565 * 566 * NB: this should be very quick, because we already 567 * freed all the objects in open context. 568 */ 569 ka.ds = ds; 570 ka.tx = tx; 571 VERIFY0(traverse_dataset(ds, 572 ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST, 573 kill_blkptr, &ka)); 574 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0); 575 } 576 577 typedef struct dsl_destroy_head_arg { 578 const char *ddha_name; 579 } dsl_destroy_head_arg_t; 580 581 int 582 dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds) 583 { 584 int error; 585 uint64_t count; 586 objset_t *mos; 587 588 if (dsl_dataset_is_snapshot(ds)) 589 return (SET_ERROR(EINVAL)); 590 591 if (refcount_count(&ds->ds_longholds) != expected_holds) 592 return (SET_ERROR(EBUSY)); 593 594 mos = ds->ds_dir->dd_pool->dp_meta_objset; 595 596 /* 597 * Can't delete a head dataset if there are snapshots of it. 598 * (Except if the only snapshots are from the branch we cloned 599 * from.) 600 */ 601 if (ds->ds_prev != NULL && 602 ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) 603 return (SET_ERROR(EBUSY)); 604 605 /* 606 * Can't delete if there are children of this fs. 607 */ 608 error = zap_count(mos, 609 ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count); 610 if (error != 0) 611 return (error); 612 if (count != 0) 613 return (SET_ERROR(EEXIST)); 614 615 if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) && 616 ds->ds_prev->ds_phys->ds_num_children == 2 && 617 ds->ds_prev->ds_userrefs == 0) { 618 /* We need to remove the origin snapshot as well. */ 619 if (!refcount_is_zero(&ds->ds_prev->ds_longholds)) 620 return (SET_ERROR(EBUSY)); 621 } 622 return (0); 623 } 624 625 static int 626 dsl_destroy_head_check(void *arg, dmu_tx_t *tx) 627 { 628 dsl_destroy_head_arg_t *ddha = arg; 629 dsl_pool_t *dp = dmu_tx_pool(tx); 630 dsl_dataset_t *ds; 631 int error; 632 633 error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds); 634 if (error != 0) 635 return (error); 636 637 error = dsl_destroy_head_check_impl(ds, 0); 638 dsl_dataset_rele(ds, FTAG); 639 return (error); 640 } 641 642 static void 643 dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx) 644 { 645 dsl_dir_t *dd; 646 dsl_pool_t *dp = dmu_tx_pool(tx); 647 objset_t *mos = dp->dp_meta_objset; 648 dd_used_t t; 649 650 ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock)); 651 652 VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd)); 653 654 ASSERT0(dd->dd_phys->dd_head_dataset_obj); 655 656 /* 657 * Remove our reservation. The impl() routine avoids setting the 658 * actual property, which would require the (already destroyed) ds. 659 */ 660 dsl_dir_set_reservation_sync_impl(dd, 0, tx); 661 662 ASSERT0(dd->dd_phys->dd_used_bytes); 663 ASSERT0(dd->dd_phys->dd_reserved); 664 for (t = 0; t < DD_USED_NUM; t++) 665 ASSERT0(dd->dd_phys->dd_used_breakdown[t]); 666 667 VERIFY0(zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx)); 668 VERIFY0(zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx)); 669 VERIFY0(dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx)); 670 VERIFY0(zap_remove(mos, 671 dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx)); 672 673 dsl_dir_rele(dd, FTAG); 674 VERIFY0(dmu_object_free(mos, ddobj, tx)); 675 } 676 677 void 678 dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) 679 { 680 dsl_pool_t *dp = dmu_tx_pool(tx); 681 objset_t *mos = dp->dp_meta_objset; 682 uint64_t obj, ddobj, prevobj = 0; 683 boolean_t rmorigin; 684 685 ASSERT3U(ds->ds_phys->ds_num_children, <=, 1); 686 ASSERT(ds->ds_prev == NULL || 687 ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object); 688 ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg); 689 ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); 690 691 /* We need to log before removing it from the namespace. */ 692 spa_history_log_internal_ds(ds, "destroy", tx, ""); 693 694 rmorigin = (dsl_dir_is_clone(ds->ds_dir) && 695 DS_IS_DEFER_DESTROY(ds->ds_prev) && 696 ds->ds_prev->ds_phys->ds_num_children == 2 && 697 ds->ds_prev->ds_userrefs == 0); 698 699 /* Remove our reservation */ 700 if (ds->ds_reserved != 0) { 701 dsl_dataset_set_refreservation_sync_impl(ds, 702 (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), 703 0, tx); 704 ASSERT0(ds->ds_reserved); 705 } 706 707 dsl_scan_ds_destroyed(ds, tx); 708 709 obj = ds->ds_object; 710 711 if (ds->ds_phys->ds_prev_snap_obj != 0) { 712 /* This is a clone */ 713 ASSERT(ds->ds_prev != NULL); 714 ASSERT3U(ds->ds_prev->ds_phys->ds_next_snap_obj, !=, obj); 715 ASSERT0(ds->ds_phys->ds_next_snap_obj); 716 717 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 718 if (ds->ds_prev->ds_phys->ds_next_clones_obj != 0) { 719 dsl_dataset_remove_from_next_clones(ds->ds_prev, 720 obj, tx); 721 } 722 723 ASSERT3U(ds->ds_prev->ds_phys->ds_num_children, >, 1); 724 ds->ds_prev->ds_phys->ds_num_children--; 725 } 726 727 zfeature_info_t *async_destroy = 728 &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]; 729 objset_t *os; 730 731 /* 732 * Destroy the deadlist. Unless it's a clone, the 733 * deadlist should be empty. (If it's a clone, it's 734 * safe to ignore the deadlist contents.) 735 */ 736 dsl_deadlist_close(&ds->ds_deadlist); 737 dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx); 738 dmu_buf_will_dirty(ds->ds_dbuf, tx); 739 ds->ds_phys->ds_deadlist_obj = 0; 740 741 VERIFY0(dmu_objset_from_ds(ds, &os)); 742 743 if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) { 744 old_synchronous_dataset_destroy(ds, tx); 745 } else { 746 /* 747 * Move the bptree into the pool's list of trees to 748 * clean up and update space accounting information. 749 */ 750 uint64_t used, comp, uncomp; 751 752 zil_destroy_sync(dmu_objset_zil(os), tx); 753 754 if (!spa_feature_is_active(dp->dp_spa, async_destroy)) { 755 dsl_scan_t *scn = dp->dp_scan; 756 757 spa_feature_incr(dp->dp_spa, async_destroy, tx); 758 dp->dp_bptree_obj = bptree_alloc(mos, tx); 759 VERIFY0(zap_add(mos, 760 DMU_POOL_DIRECTORY_OBJECT, 761 DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, 762 &dp->dp_bptree_obj, tx)); 763 ASSERT(!scn->scn_async_destroying); 764 scn->scn_async_destroying = B_TRUE; 765 } 766 767 used = ds->ds_dir->dd_phys->dd_used_bytes; 768 comp = ds->ds_dir->dd_phys->dd_compressed_bytes; 769 uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes; 770 771 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || 772 ds->ds_phys->ds_unique_bytes == used); 773 774 bptree_add(mos, dp->dp_bptree_obj, 775 &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg, 776 used, comp, uncomp, tx); 777 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 778 -used, -comp, -uncomp, tx); 779 dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD, 780 used, comp, uncomp, tx); 781 } 782 783 if (ds->ds_prev != NULL) { 784 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 785 VERIFY0(zap_remove_int(mos, 786 ds->ds_prev->ds_dir->dd_phys->dd_clones, 787 ds->ds_object, tx)); 788 } 789 prevobj = ds->ds_prev->ds_object; 790 dsl_dataset_rele(ds->ds_prev, ds); 791 ds->ds_prev = NULL; 792 } 793 794 /* 795 * This must be done after the dsl_traverse(), because it will 796 * re-open the objset. 797 */ 798 if (ds->ds_objset) { 799 dmu_objset_evict(ds->ds_objset); 800 ds->ds_objset = NULL; 801 } 802 803 /* Erase the link in the dir */ 804 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 805 ds->ds_dir->dd_phys->dd_head_dataset_obj = 0; 806 ddobj = ds->ds_dir->dd_object; 807 ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0); 808 VERIFY0(zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx)); 809 810 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 811 812 ASSERT0(ds->ds_phys->ds_next_clones_obj); 813 ASSERT0(ds->ds_phys->ds_props_obj); 814 ASSERT0(ds->ds_phys->ds_userrefs_obj); 815 dsl_dir_rele(ds->ds_dir, ds); 816 ds->ds_dir = NULL; 817 VERIFY0(dmu_object_free(mos, obj, tx)); 818 819 dsl_dir_destroy_sync(ddobj, tx); 820 821 if (rmorigin) { 822 dsl_dataset_t *prev; 823 VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev)); 824 dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx); 825 dsl_dataset_rele(prev, FTAG); 826 } 827 } 828 829 static void 830 dsl_destroy_head_sync(void *arg, dmu_tx_t *tx) 831 { 832 dsl_destroy_head_arg_t *ddha = arg; 833 dsl_pool_t *dp = dmu_tx_pool(tx); 834 dsl_dataset_t *ds; 835 836 VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds)); 837 dsl_destroy_head_sync_impl(ds, tx); 838 dsl_dataset_rele(ds, FTAG); 839 } 840 841 static void 842 dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx) 843 { 844 dsl_destroy_head_arg_t *ddha = arg; 845 dsl_pool_t *dp = dmu_tx_pool(tx); 846 dsl_dataset_t *ds; 847 848 VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds)); 849 850 /* Mark it as inconsistent on-disk, in case we crash */ 851 dmu_buf_will_dirty(ds->ds_dbuf, tx); 852 ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT; 853 854 spa_history_log_internal_ds(ds, "destroy begin", tx, ""); 855 dsl_dataset_rele(ds, FTAG); 856 } 857 858 int 859 dsl_destroy_head(const char *name) 860 { 861 dsl_destroy_head_arg_t ddha; 862 int error; 863 spa_t *spa; 864 boolean_t isenabled; 865 866 #ifdef _KERNEL 867 zfs_destroy_unmount_origin(name); 868 #endif 869 870 error = spa_open(name, &spa, FTAG); 871 if (error != 0) 872 return (error); 873 isenabled = spa_feature_is_enabled(spa, 874 &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]); 875 spa_close(spa, FTAG); 876 877 ddha.ddha_name = name; 878 879 if (!isenabled) { 880 objset_t *os; 881 882 error = dsl_sync_task(name, dsl_destroy_head_check, 883 dsl_destroy_head_begin_sync, &ddha, 0); 884 if (error != 0) 885 return (error); 886 887 /* 888 * Head deletion is processed in one txg on old pools; 889 * remove the objects from open context so that the txg sync 890 * is not too long. 891 */ 892 error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os); 893 if (error == 0) { 894 uint64_t prev_snap_txg = 895 dmu_objset_ds(os)->ds_phys->ds_prev_snap_txg; 896 for (uint64_t obj = 0; error == 0; 897 error = dmu_object_next(os, &obj, FALSE, 898 prev_snap_txg)) 899 (void) dmu_free_object(os, obj); 900 /* sync out all frees */ 901 txg_wait_synced(dmu_objset_pool(os), 0); 902 dmu_objset_disown(os, FTAG); 903 } 904 } 905 906 return (dsl_sync_task(name, dsl_destroy_head_check, 907 dsl_destroy_head_sync, &ddha, 0)); 908 } 909 910 /* 911 * Note, this function is used as the callback for dmu_objset_find(). We 912 * always return 0 so that we will continue to find and process 913 * inconsistent datasets, even if we encounter an error trying to 914 * process one of them. 915 */ 916 /* ARGSUSED */ 917 int 918 dsl_destroy_inconsistent(const char *dsname, void *arg) 919 { 920 objset_t *os; 921 922 if (dmu_objset_hold(dsname, FTAG, &os) == 0) { 923 boolean_t inconsistent = DS_IS_INCONSISTENT(dmu_objset_ds(os)); 924 dmu_objset_rele(os, FTAG); 925 if (inconsistent) 926 (void) dsl_destroy_head(dsname); 927 } 928 return (0); 929 } 930