1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 24 * Copyright (c) 2013 Steven Hartland. All rights reserved. 25 * Copyright (c) 2013 by Joyent, Inc. All rights reserved. 26 * Copyright (c) 2014 Integros [integros.com] 27 */ 28 29 #include <sys/zfs_context.h> 30 #include <sys/dsl_userhold.h> 31 #include <sys/dsl_dataset.h> 32 #include <sys/dsl_synctask.h> 33 #include <sys/dsl_destroy.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/dsl_pool.h> 36 #include <sys/dsl_dir.h> 37 #include <sys/dmu_traverse.h> 38 #include <sys/dsl_scan.h> 39 #include <sys/dmu_objset.h> 40 #include <sys/zap.h> 41 #include <sys/zfeature.h> 42 #include <sys/zfs_ioctl.h> 43 #include <sys/dsl_deleg.h> 44 #include <sys/dmu_impl.h> 45 #include <sys/zcp.h> 46 47 int 48 dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer) 49 { 50 if (!ds->ds_is_snapshot) 51 return (SET_ERROR(EINVAL)); 52 53 if (dsl_dataset_long_held(ds)) 54 return (SET_ERROR(EBUSY)); 55 56 /* 57 * Only allow deferred destroy on pools that support it. 58 * NOTE: deferred destroy is only supported on snapshots. 59 */ 60 if (defer) { 61 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 62 SPA_VERSION_USERREFS) 63 return (SET_ERROR(ENOTSUP)); 64 return (0); 65 } 66 67 /* 68 * If this snapshot has an elevated user reference count, 69 * we can't destroy it yet. 70 */ 71 if (ds->ds_userrefs > 0) 72 return (SET_ERROR(EBUSY)); 73 74 /* 75 * Can't delete a branch point. 76 */ 77 if (dsl_dataset_phys(ds)->ds_num_children > 1) 78 return (SET_ERROR(EEXIST)); 79 80 return (0); 81 } 82 83 int 84 dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx) 85 { 86 dsl_destroy_snapshot_arg_t *ddsa = arg; 87 const char *dsname = ddsa->ddsa_name; 88 boolean_t defer = ddsa->ddsa_defer; 89 90 dsl_pool_t *dp = dmu_tx_pool(tx); 91 int error = 0; 92 dsl_dataset_t *ds; 93 94 error = dsl_dataset_hold(dp, dsname, FTAG, &ds); 95 96 /* 97 * If the snapshot does not exist, silently ignore it, and 98 * dsl_destroy_snapshot_sync() will be a no-op 99 * (it's "already destroyed"). 100 */ 101 if (error == ENOENT) 102 return (0); 103 104 if (error == 0) { 105 error = dsl_destroy_snapshot_check_impl(ds, defer); 106 dsl_dataset_rele(ds, FTAG); 107 } 108 109 return (error); 110 } 111 112 struct process_old_arg { 113 dsl_dataset_t *ds; 114 dsl_dataset_t *ds_prev; 115 boolean_t after_branch_point; 116 zio_t *pio; 117 uint64_t used, comp, uncomp; 118 }; 119 120 static int 121 process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 122 { 123 struct process_old_arg *poa = arg; 124 dsl_pool_t *dp = poa->ds->ds_dir->dd_pool; 125 126 ASSERT(!BP_IS_HOLE(bp)); 127 128 if (bp->blk_birth <= dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) { 129 dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx); 130 if (poa->ds_prev && !poa->after_branch_point && 131 bp->blk_birth > 132 dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) { 133 dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes += 134 bp_get_dsize_sync(dp->dp_spa, bp); 135 } 136 } else { 137 poa->used += bp_get_dsize_sync(dp->dp_spa, bp); 138 poa->comp += BP_GET_PSIZE(bp); 139 poa->uncomp += BP_GET_UCSIZE(bp); 140 dsl_free_sync(poa->pio, dp, tx->tx_txg, bp); 141 } 142 return (0); 143 } 144 145 static void 146 process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev, 147 dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx) 148 { 149 struct process_old_arg poa = { 0 }; 150 dsl_pool_t *dp = ds->ds_dir->dd_pool; 151 objset_t *mos = dp->dp_meta_objset; 152 uint64_t deadlist_obj; 153 154 ASSERT(ds->ds_deadlist.dl_oldfmt); 155 ASSERT(ds_next->ds_deadlist.dl_oldfmt); 156 157 poa.ds = ds; 158 poa.ds_prev = ds_prev; 159 poa.after_branch_point = after_branch_point; 160 poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 161 VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj, 162 process_old_cb, &poa, tx)); 163 VERIFY0(zio_wait(poa.pio)); 164 ASSERT3U(poa.used, ==, dsl_dataset_phys(ds)->ds_unique_bytes); 165 166 /* change snapused */ 167 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 168 -poa.used, -poa.comp, -poa.uncomp, tx); 169 170 /* swap next's deadlist to our deadlist */ 171 dsl_deadlist_close(&ds->ds_deadlist); 172 dsl_deadlist_close(&ds_next->ds_deadlist); 173 deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj; 174 dsl_dataset_phys(ds)->ds_deadlist_obj = 175 dsl_dataset_phys(ds_next)->ds_deadlist_obj; 176 dsl_dataset_phys(ds_next)->ds_deadlist_obj = deadlist_obj; 177 dsl_deadlist_open(&ds->ds_deadlist, mos, 178 dsl_dataset_phys(ds)->ds_deadlist_obj); 179 dsl_deadlist_open(&ds_next->ds_deadlist, mos, 180 dsl_dataset_phys(ds_next)->ds_deadlist_obj); 181 } 182 183 static void 184 dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx) 185 { 186 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 187 zap_cursor_t zc; 188 zap_attribute_t za; 189 190 /* 191 * If it is the old version, dd_clones doesn't exist so we can't 192 * find the clones, but dsl_deadlist_remove_key() is a no-op so it 193 * doesn't matter. 194 */ 195 if (dsl_dir_phys(ds->ds_dir)->dd_clones == 0) 196 return; 197 198 for (zap_cursor_init(&zc, mos, dsl_dir_phys(ds->ds_dir)->dd_clones); 199 zap_cursor_retrieve(&zc, &za) == 0; 200 zap_cursor_advance(&zc)) { 201 dsl_dataset_t *clone; 202 203 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 204 za.za_first_integer, FTAG, &clone)); 205 if (clone->ds_dir->dd_origin_txg > mintxg) { 206 dsl_deadlist_remove_key(&clone->ds_deadlist, 207 mintxg, tx); 208 if (dsl_dataset_remap_deadlist_exists(clone)) { 209 dsl_deadlist_remove_key( 210 &clone->ds_remap_deadlist, mintxg, tx); 211 } 212 dsl_dataset_remove_clones_key(clone, mintxg, tx); 213 } 214 dsl_dataset_rele(clone, FTAG); 215 } 216 zap_cursor_fini(&zc); 217 } 218 219 static void 220 dsl_destroy_snapshot_handle_remaps(dsl_dataset_t *ds, dsl_dataset_t *ds_next, 221 dmu_tx_t *tx) 222 { 223 dsl_pool_t *dp = ds->ds_dir->dd_pool; 224 225 /* Move blocks to be obsoleted to pool's obsolete list. */ 226 if (dsl_dataset_remap_deadlist_exists(ds_next)) { 227 if (!bpobj_is_open(&dp->dp_obsolete_bpobj)) 228 dsl_pool_create_obsolete_bpobj(dp, tx); 229 230 dsl_deadlist_move_bpobj(&ds_next->ds_remap_deadlist, 231 &dp->dp_obsolete_bpobj, 232 dsl_dataset_phys(ds)->ds_prev_snap_txg, tx); 233 } 234 235 /* Merge our deadlist into next's and free it. */ 236 if (dsl_dataset_remap_deadlist_exists(ds)) { 237 uint64_t remap_deadlist_object = 238 dsl_dataset_get_remap_deadlist_object(ds); 239 ASSERT(remap_deadlist_object != 0); 240 241 mutex_enter(&ds_next->ds_remap_deadlist_lock); 242 if (!dsl_dataset_remap_deadlist_exists(ds_next)) 243 dsl_dataset_create_remap_deadlist(ds_next, tx); 244 mutex_exit(&ds_next->ds_remap_deadlist_lock); 245 246 dsl_deadlist_merge(&ds_next->ds_remap_deadlist, 247 remap_deadlist_object, tx); 248 dsl_dataset_destroy_remap_deadlist(ds, tx); 249 } 250 } 251 252 void 253 dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx) 254 { 255 int err; 256 int after_branch_point = FALSE; 257 dsl_pool_t *dp = ds->ds_dir->dd_pool; 258 objset_t *mos = dp->dp_meta_objset; 259 dsl_dataset_t *ds_prev = NULL; 260 uint64_t obj; 261 262 ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); 263 rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); 264 ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg); 265 rrw_exit(&ds->ds_bp_rwlock, FTAG); 266 ASSERT(zfs_refcount_is_zero(&ds->ds_longholds)); 267 268 if (defer && 269 (ds->ds_userrefs > 0 || 270 dsl_dataset_phys(ds)->ds_num_children > 1)) { 271 ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); 272 dmu_buf_will_dirty(ds->ds_dbuf, tx); 273 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY; 274 spa_history_log_internal_ds(ds, "defer_destroy", tx, ""); 275 return; 276 } 277 278 ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1); 279 280 /* We need to log before removing it from the namespace. */ 281 spa_history_log_internal_ds(ds, "destroy", tx, ""); 282 283 dsl_scan_ds_destroyed(ds, tx); 284 285 obj = ds->ds_object; 286 287 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 288 if (dsl_dataset_feature_is_active(ds, f)) 289 dsl_dataset_deactivate_feature(ds, f, tx); 290 } 291 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { 292 ASSERT3P(ds->ds_prev, ==, NULL); 293 VERIFY0(dsl_dataset_hold_obj(dp, 294 dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &ds_prev)); 295 after_branch_point = 296 (dsl_dataset_phys(ds_prev)->ds_next_snap_obj != obj); 297 298 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 299 if (after_branch_point && 300 dsl_dataset_phys(ds_prev)->ds_next_clones_obj != 0) { 301 dsl_dataset_remove_from_next_clones(ds_prev, obj, tx); 302 if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) { 303 VERIFY0(zap_add_int(mos, 304 dsl_dataset_phys(ds_prev)-> 305 ds_next_clones_obj, 306 dsl_dataset_phys(ds)->ds_next_snap_obj, 307 tx)); 308 } 309 } 310 if (!after_branch_point) { 311 dsl_dataset_phys(ds_prev)->ds_next_snap_obj = 312 dsl_dataset_phys(ds)->ds_next_snap_obj; 313 } 314 } 315 316 dsl_dataset_t *ds_next; 317 uint64_t old_unique; 318 uint64_t used = 0, comp = 0, uncomp = 0; 319 320 VERIFY0(dsl_dataset_hold_obj(dp, 321 dsl_dataset_phys(ds)->ds_next_snap_obj, FTAG, &ds_next)); 322 ASSERT3U(dsl_dataset_phys(ds_next)->ds_prev_snap_obj, ==, obj); 323 324 old_unique = dsl_dataset_phys(ds_next)->ds_unique_bytes; 325 326 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 327 dsl_dataset_phys(ds_next)->ds_prev_snap_obj = 328 dsl_dataset_phys(ds)->ds_prev_snap_obj; 329 dsl_dataset_phys(ds_next)->ds_prev_snap_txg = 330 dsl_dataset_phys(ds)->ds_prev_snap_txg; 331 ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==, 332 ds_prev ? dsl_dataset_phys(ds_prev)->ds_creation_txg : 0); 333 334 if (ds_next->ds_deadlist.dl_oldfmt) { 335 process_old_deadlist(ds, ds_prev, ds_next, 336 after_branch_point, tx); 337 } else { 338 /* Adjust prev's unique space. */ 339 if (ds_prev && !after_branch_point) { 340 dsl_deadlist_space_range(&ds_next->ds_deadlist, 341 dsl_dataset_phys(ds_prev)->ds_prev_snap_txg, 342 dsl_dataset_phys(ds)->ds_prev_snap_txg, 343 &used, &comp, &uncomp); 344 dsl_dataset_phys(ds_prev)->ds_unique_bytes += used; 345 } 346 347 /* Adjust snapused. */ 348 dsl_deadlist_space_range(&ds_next->ds_deadlist, 349 dsl_dataset_phys(ds)->ds_prev_snap_txg, UINT64_MAX, 350 &used, &comp, &uncomp); 351 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 352 -used, -comp, -uncomp, tx); 353 354 /* Move blocks to be freed to pool's free list. */ 355 dsl_deadlist_move_bpobj(&ds_next->ds_deadlist, 356 &dp->dp_free_bpobj, dsl_dataset_phys(ds)->ds_prev_snap_txg, 357 tx); 358 dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, 359 DD_USED_HEAD, used, comp, uncomp, tx); 360 361 /* Merge our deadlist into next's and free it. */ 362 dsl_deadlist_merge(&ds_next->ds_deadlist, 363 dsl_dataset_phys(ds)->ds_deadlist_obj, tx); 364 } 365 366 dsl_deadlist_close(&ds->ds_deadlist); 367 dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx); 368 dmu_buf_will_dirty(ds->ds_dbuf, tx); 369 dsl_dataset_phys(ds)->ds_deadlist_obj = 0; 370 371 dsl_destroy_snapshot_handle_remaps(ds, ds_next, tx); 372 373 /* Collapse range in clone heads */ 374 dsl_dataset_remove_clones_key(ds, 375 dsl_dataset_phys(ds)->ds_creation_txg, tx); 376 377 if (ds_next->ds_is_snapshot) { 378 dsl_dataset_t *ds_nextnext; 379 380 /* 381 * Update next's unique to include blocks which 382 * were previously shared by only this snapshot 383 * and it. Those blocks will be born after the 384 * prev snap and before this snap, and will have 385 * died after the next snap and before the one 386 * after that (ie. be on the snap after next's 387 * deadlist). 388 */ 389 VERIFY0(dsl_dataset_hold_obj(dp, 390 dsl_dataset_phys(ds_next)->ds_next_snap_obj, 391 FTAG, &ds_nextnext)); 392 dsl_deadlist_space_range(&ds_nextnext->ds_deadlist, 393 dsl_dataset_phys(ds)->ds_prev_snap_txg, 394 dsl_dataset_phys(ds)->ds_creation_txg, 395 &used, &comp, &uncomp); 396 dsl_dataset_phys(ds_next)->ds_unique_bytes += used; 397 dsl_dataset_rele(ds_nextnext, FTAG); 398 ASSERT3P(ds_next->ds_prev, ==, NULL); 399 400 /* Collapse range in this head. */ 401 dsl_dataset_t *hds; 402 VERIFY0(dsl_dataset_hold_obj(dp, 403 dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &hds)); 404 dsl_deadlist_remove_key(&hds->ds_deadlist, 405 dsl_dataset_phys(ds)->ds_creation_txg, tx); 406 if (dsl_dataset_remap_deadlist_exists(hds)) { 407 dsl_deadlist_remove_key(&hds->ds_remap_deadlist, 408 dsl_dataset_phys(ds)->ds_creation_txg, tx); 409 } 410 dsl_dataset_rele(hds, FTAG); 411 412 } else { 413 ASSERT3P(ds_next->ds_prev, ==, ds); 414 dsl_dataset_rele(ds_next->ds_prev, ds_next); 415 ds_next->ds_prev = NULL; 416 if (ds_prev) { 417 VERIFY0(dsl_dataset_hold_obj(dp, 418 dsl_dataset_phys(ds)->ds_prev_snap_obj, 419 ds_next, &ds_next->ds_prev)); 420 } 421 422 dsl_dataset_recalc_head_uniq(ds_next); 423 424 /* 425 * Reduce the amount of our unconsumed refreservation 426 * being charged to our parent by the amount of 427 * new unique data we have gained. 428 */ 429 if (old_unique < ds_next->ds_reserved) { 430 int64_t mrsdelta; 431 uint64_t new_unique = 432 dsl_dataset_phys(ds_next)->ds_unique_bytes; 433 434 ASSERT(old_unique <= new_unique); 435 mrsdelta = MIN(new_unique - old_unique, 436 ds_next->ds_reserved - old_unique); 437 dsl_dir_diduse_space(ds->ds_dir, 438 DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); 439 } 440 } 441 dsl_dataset_rele(ds_next, FTAG); 442 443 /* 444 * This must be done after the dsl_traverse(), because it will 445 * re-open the objset. 446 */ 447 if (ds->ds_objset) { 448 dmu_objset_evict(ds->ds_objset); 449 ds->ds_objset = NULL; 450 } 451 452 /* remove from snapshot namespace */ 453 dsl_dataset_t *ds_head; 454 ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0); 455 VERIFY0(dsl_dataset_hold_obj(dp, 456 dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &ds_head)); 457 VERIFY0(dsl_dataset_get_snapname(ds)); 458 #ifdef ZFS_DEBUG 459 { 460 uint64_t val; 461 462 err = dsl_dataset_snap_lookup(ds_head, 463 ds->ds_snapname, &val); 464 ASSERT0(err); 465 ASSERT3U(val, ==, obj); 466 } 467 #endif 468 VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx, B_TRUE)); 469 dsl_dataset_rele(ds_head, FTAG); 470 471 if (ds_prev != NULL) 472 dsl_dataset_rele(ds_prev, FTAG); 473 474 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 475 476 if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) { 477 uint64_t count; 478 ASSERT0(zap_count(mos, 479 dsl_dataset_phys(ds)->ds_next_clones_obj, &count) && 480 count == 0); 481 VERIFY0(dmu_object_free(mos, 482 dsl_dataset_phys(ds)->ds_next_clones_obj, tx)); 483 } 484 if (dsl_dataset_phys(ds)->ds_props_obj != 0) 485 VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_props_obj, 486 tx)); 487 if (dsl_dataset_phys(ds)->ds_userrefs_obj != 0) 488 VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_userrefs_obj, 489 tx)); 490 dsl_dir_rele(ds->ds_dir, ds); 491 ds->ds_dir = NULL; 492 dmu_object_free_zapified(mos, obj, tx); 493 } 494 495 void 496 dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx) 497 { 498 dsl_destroy_snapshot_arg_t *ddsa = arg; 499 const char *dsname = ddsa->ddsa_name; 500 boolean_t defer = ddsa->ddsa_defer; 501 502 dsl_pool_t *dp = dmu_tx_pool(tx); 503 dsl_dataset_t *ds; 504 505 int error = dsl_dataset_hold(dp, dsname, FTAG, &ds); 506 if (error == ENOENT) 507 return; 508 ASSERT0(error); 509 dsl_destroy_snapshot_sync_impl(ds, defer, tx); 510 dsl_dataset_rele(ds, FTAG); 511 } 512 513 /* 514 * The semantics of this function are described in the comment above 515 * lzc_destroy_snaps(). To summarize: 516 * 517 * The snapshots must all be in the same pool. 518 * 519 * Snapshots that don't exist will be silently ignored (considered to be 520 * "already deleted"). 521 * 522 * On success, all snaps will be destroyed and this will return 0. 523 * On failure, no snaps will be destroyed, the errlist will be filled in, 524 * and this will return an errno. 525 */ 526 int 527 dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer, 528 nvlist_t *errlist) 529 { 530 if (nvlist_next_nvpair(snaps, NULL) == NULL) 531 return (0); 532 533 /* 534 * lzc_destroy_snaps() is documented to take an nvlist whose 535 * values "don't matter". We need to convert that nvlist to 536 * one that we know can be converted to LUA. We also don't 537 * care about any duplicate entries because the nvlist will 538 * be converted to a LUA table which should take care of this. 539 */ 540 nvlist_t *snaps_normalized; 541 VERIFY0(nvlist_alloc(&snaps_normalized, 0, KM_SLEEP)); 542 for (nvpair_t *pair = nvlist_next_nvpair(snaps, NULL); 543 pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) { 544 fnvlist_add_boolean_value(snaps_normalized, 545 nvpair_name(pair), B_TRUE); 546 } 547 548 nvlist_t *arg; 549 VERIFY0(nvlist_alloc(&arg, 0, KM_SLEEP)); 550 fnvlist_add_nvlist(arg, "snaps", snaps_normalized); 551 fnvlist_free(snaps_normalized); 552 fnvlist_add_boolean_value(arg, "defer", defer); 553 554 nvlist_t *wrapper; 555 VERIFY0(nvlist_alloc(&wrapper, 0, KM_SLEEP)); 556 fnvlist_add_nvlist(wrapper, ZCP_ARG_ARGLIST, arg); 557 fnvlist_free(arg); 558 559 const char *program = 560 "arg = ...\n" 561 "snaps = arg['snaps']\n" 562 "defer = arg['defer']\n" 563 "errors = { }\n" 564 "has_errors = false\n" 565 "for snap, v in pairs(snaps) do\n" 566 " errno = zfs.check.destroy{snap, defer=defer}\n" 567 " zfs.debug('snap: ' .. snap .. ' errno: ' .. errno)\n" 568 " if errno == ENOENT then\n" 569 " snaps[snap] = nil\n" 570 " elseif errno ~= 0 then\n" 571 " errors[snap] = errno\n" 572 " has_errors = true\n" 573 " end\n" 574 "end\n" 575 "if has_errors then\n" 576 " return errors\n" 577 "end\n" 578 "for snap, v in pairs(snaps) do\n" 579 " errno = zfs.sync.destroy{snap, defer=defer}\n" 580 " assert(errno == 0)\n" 581 "end\n" 582 "return { }\n"; 583 584 nvlist_t *result = fnvlist_alloc(); 585 int error = zcp_eval(nvpair_name(nvlist_next_nvpair(snaps, NULL)), 586 program, 587 B_TRUE, 588 0, 589 zfs_lua_max_memlimit, 590 nvlist_next_nvpair(wrapper, NULL), result); 591 if (error != 0) { 592 char *errorstr = NULL; 593 (void) nvlist_lookup_string(result, ZCP_RET_ERROR, &errorstr); 594 if (errorstr != NULL) { 595 zfs_dbgmsg(errorstr); 596 } 597 return (error); 598 } 599 fnvlist_free(wrapper); 600 601 /* 602 * lzc_destroy_snaps() is documented to fill the errlist with 603 * int32 values, so we need to covert the int64 values that are 604 * returned from LUA. 605 */ 606 int rv = 0; 607 nvlist_t *errlist_raw = fnvlist_lookup_nvlist(result, ZCP_RET_RETURN); 608 for (nvpair_t *pair = nvlist_next_nvpair(errlist_raw, NULL); 609 pair != NULL; pair = nvlist_next_nvpair(errlist_raw, pair)) { 610 int32_t val = (int32_t)fnvpair_value_int64(pair); 611 if (rv == 0) 612 rv = val; 613 fnvlist_add_int32(errlist, nvpair_name(pair), val); 614 } 615 fnvlist_free(result); 616 return (rv); 617 } 618 619 int 620 dsl_destroy_snapshot(const char *name, boolean_t defer) 621 { 622 int error; 623 nvlist_t *nvl = fnvlist_alloc(); 624 nvlist_t *errlist = fnvlist_alloc(); 625 626 fnvlist_add_boolean(nvl, name); 627 error = dsl_destroy_snapshots_nvl(nvl, defer, errlist); 628 fnvlist_free(errlist); 629 fnvlist_free(nvl); 630 return (error); 631 } 632 633 struct killarg { 634 dsl_dataset_t *ds; 635 dmu_tx_t *tx; 636 }; 637 638 /* ARGSUSED */ 639 static int 640 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 641 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 642 { 643 struct killarg *ka = arg; 644 dmu_tx_t *tx = ka->tx; 645 646 if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) 647 return (0); 648 649 if (zb->zb_level == ZB_ZIL_LEVEL) { 650 ASSERT(zilog != NULL); 651 /* 652 * It's a block in the intent log. It has no 653 * accounting, so just free it. 654 */ 655 dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); 656 } else { 657 ASSERT(zilog == NULL); 658 ASSERT3U(bp->blk_birth, >, 659 dsl_dataset_phys(ka->ds)->ds_prev_snap_txg); 660 (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); 661 } 662 663 return (0); 664 } 665 666 static void 667 old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx) 668 { 669 struct killarg ka; 670 671 /* 672 * Free everything that we point to (that's born after 673 * the previous snapshot, if we are a clone) 674 * 675 * NB: this should be very quick, because we already 676 * freed all the objects in open context. 677 */ 678 ka.ds = ds; 679 ka.tx = tx; 680 VERIFY0(traverse_dataset(ds, 681 dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST | 682 TRAVERSE_NO_DECRYPT, kill_blkptr, &ka)); 683 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || 684 dsl_dataset_phys(ds)->ds_unique_bytes == 0); 685 } 686 687 int 688 dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds) 689 { 690 int error; 691 uint64_t count; 692 objset_t *mos; 693 694 ASSERT(!ds->ds_is_snapshot); 695 if (ds->ds_is_snapshot) 696 return (SET_ERROR(EINVAL)); 697 698 if (zfs_refcount_count(&ds->ds_longholds) != expected_holds) 699 return (SET_ERROR(EBUSY)); 700 701 mos = ds->ds_dir->dd_pool->dp_meta_objset; 702 703 /* 704 * Can't delete a head dataset if there are snapshots of it. 705 * (Except if the only snapshots are from the branch we cloned 706 * from.) 707 */ 708 if (ds->ds_prev != NULL && 709 dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == ds->ds_object) 710 return (SET_ERROR(EBUSY)); 711 712 /* 713 * Can't delete if there are children of this fs. 714 */ 715 error = zap_count(mos, 716 dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, &count); 717 if (error != 0) 718 return (error); 719 if (count != 0) 720 return (SET_ERROR(EEXIST)); 721 722 if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) && 723 dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 && 724 ds->ds_prev->ds_userrefs == 0) { 725 /* We need to remove the origin snapshot as well. */ 726 if (!zfs_refcount_is_zero(&ds->ds_prev->ds_longholds)) 727 return (SET_ERROR(EBUSY)); 728 } 729 return (0); 730 } 731 732 int 733 dsl_destroy_head_check(void *arg, dmu_tx_t *tx) 734 { 735 dsl_destroy_head_arg_t *ddha = arg; 736 dsl_pool_t *dp = dmu_tx_pool(tx); 737 dsl_dataset_t *ds; 738 int error; 739 740 error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds); 741 if (error != 0) 742 return (error); 743 744 error = dsl_destroy_head_check_impl(ds, 0); 745 dsl_dataset_rele(ds, FTAG); 746 return (error); 747 } 748 749 static void 750 dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx) 751 { 752 dsl_dir_t *dd; 753 dsl_pool_t *dp = dmu_tx_pool(tx); 754 objset_t *mos = dp->dp_meta_objset; 755 dd_used_t t; 756 757 ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock)); 758 759 VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd)); 760 761 ASSERT0(dsl_dir_phys(dd)->dd_head_dataset_obj); 762 763 /* 764 * Decrement the filesystem count for all parent filesystems. 765 * 766 * When we receive an incremental stream into a filesystem that already 767 * exists, a temporary clone is created. We never count this temporary 768 * clone, whose name begins with a '%'. 769 */ 770 if (dd->dd_myname[0] != '%' && dd->dd_parent != NULL) 771 dsl_fs_ss_count_adjust(dd->dd_parent, -1, 772 DD_FIELD_FILESYSTEM_COUNT, tx); 773 774 /* 775 * Remove our reservation. The impl() routine avoids setting the 776 * actual property, which would require the (already destroyed) ds. 777 */ 778 dsl_dir_set_reservation_sync_impl(dd, 0, tx); 779 780 ASSERT0(dsl_dir_phys(dd)->dd_used_bytes); 781 ASSERT0(dsl_dir_phys(dd)->dd_reserved); 782 for (t = 0; t < DD_USED_NUM; t++) 783 ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]); 784 785 if (dd->dd_crypto_obj != 0) { 786 dsl_crypto_key_destroy_sync(dd->dd_crypto_obj, tx); 787 (void) spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object); 788 } 789 790 VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx)); 791 VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx)); 792 if (dsl_dir_phys(dd)->dd_clones != 0) 793 VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_clones, tx)); 794 VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx)); 795 VERIFY0(zap_remove(mos, 796 dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj, 797 dd->dd_myname, tx)); 798 799 dsl_dir_rele(dd, FTAG); 800 dmu_object_free_zapified(mos, ddobj, tx); 801 } 802 803 void 804 dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) 805 { 806 dsl_pool_t *dp = dmu_tx_pool(tx); 807 objset_t *mos = dp->dp_meta_objset; 808 uint64_t obj, ddobj, prevobj = 0; 809 boolean_t rmorigin; 810 811 ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1); 812 ASSERT(ds->ds_prev == NULL || 813 dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object); 814 rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); 815 ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg); 816 rrw_exit(&ds->ds_bp_rwlock, FTAG); 817 ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); 818 819 /* We need to log before removing it from the namespace. */ 820 spa_history_log_internal_ds(ds, "destroy", tx, ""); 821 822 rmorigin = (dsl_dir_is_clone(ds->ds_dir) && 823 DS_IS_DEFER_DESTROY(ds->ds_prev) && 824 dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 && 825 ds->ds_prev->ds_userrefs == 0); 826 827 /* Remove our reservation. */ 828 if (ds->ds_reserved != 0) { 829 dsl_dataset_set_refreservation_sync_impl(ds, 830 (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), 831 0, tx); 832 ASSERT0(ds->ds_reserved); 833 } 834 835 obj = ds->ds_object; 836 837 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 838 if (dsl_dataset_feature_is_active(ds, f)) 839 dsl_dataset_deactivate_feature(ds, f, tx); 840 } 841 842 dsl_scan_ds_destroyed(ds, tx); 843 844 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { 845 /* This is a clone */ 846 ASSERT(ds->ds_prev != NULL); 847 ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj, !=, 848 obj); 849 ASSERT0(dsl_dataset_phys(ds)->ds_next_snap_obj); 850 851 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 852 if (dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj != 0) { 853 dsl_dataset_remove_from_next_clones(ds->ds_prev, 854 obj, tx); 855 } 856 857 ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_num_children, >, 1); 858 dsl_dataset_phys(ds->ds_prev)->ds_num_children--; 859 } 860 861 /* 862 * Destroy the deadlist. Unless it's a clone, the 863 * deadlist should be empty since the dataset has no snapshots. 864 * (If it's a clone, it's safe to ignore the deadlist contents 865 * since they are still referenced by the origin snapshot.) 866 */ 867 dsl_deadlist_close(&ds->ds_deadlist); 868 dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx); 869 dmu_buf_will_dirty(ds->ds_dbuf, tx); 870 dsl_dataset_phys(ds)->ds_deadlist_obj = 0; 871 872 if (dsl_dataset_remap_deadlist_exists(ds)) 873 dsl_dataset_destroy_remap_deadlist(ds, tx); 874 875 objset_t *os; 876 VERIFY0(dmu_objset_from_ds(ds, &os)); 877 878 if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) { 879 old_synchronous_dataset_destroy(ds, tx); 880 } else { 881 /* 882 * Move the bptree into the pool's list of trees to 883 * clean up and update space accounting information. 884 */ 885 uint64_t used, comp, uncomp; 886 887 zil_destroy_sync(dmu_objset_zil(os), tx); 888 889 if (!spa_feature_is_active(dp->dp_spa, 890 SPA_FEATURE_ASYNC_DESTROY)) { 891 dsl_scan_t *scn = dp->dp_scan; 892 spa_feature_incr(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY, 893 tx); 894 dp->dp_bptree_obj = bptree_alloc(mos, tx); 895 VERIFY0(zap_add(mos, 896 DMU_POOL_DIRECTORY_OBJECT, 897 DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, 898 &dp->dp_bptree_obj, tx)); 899 ASSERT(!scn->scn_async_destroying); 900 scn->scn_async_destroying = B_TRUE; 901 } 902 903 used = dsl_dir_phys(ds->ds_dir)->dd_used_bytes; 904 comp = dsl_dir_phys(ds->ds_dir)->dd_compressed_bytes; 905 uncomp = dsl_dir_phys(ds->ds_dir)->dd_uncompressed_bytes; 906 907 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || 908 dsl_dataset_phys(ds)->ds_unique_bytes == used); 909 910 rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); 911 bptree_add(mos, dp->dp_bptree_obj, 912 &dsl_dataset_phys(ds)->ds_bp, 913 dsl_dataset_phys(ds)->ds_prev_snap_txg, 914 used, comp, uncomp, tx); 915 rrw_exit(&ds->ds_bp_rwlock, FTAG); 916 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 917 -used, -comp, -uncomp, tx); 918 dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD, 919 used, comp, uncomp, tx); 920 } 921 922 if (ds->ds_prev != NULL) { 923 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 924 VERIFY0(zap_remove_int(mos, 925 dsl_dir_phys(ds->ds_prev->ds_dir)->dd_clones, 926 ds->ds_object, tx)); 927 } 928 prevobj = ds->ds_prev->ds_object; 929 dsl_dataset_rele(ds->ds_prev, ds); 930 ds->ds_prev = NULL; 931 } 932 933 /* 934 * This must be done after the dsl_traverse(), because it will 935 * re-open the objset. 936 */ 937 if (ds->ds_objset) { 938 dmu_objset_evict(ds->ds_objset); 939 ds->ds_objset = NULL; 940 } 941 942 /* Erase the link in the dir */ 943 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 944 dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj = 0; 945 ddobj = ds->ds_dir->dd_object; 946 ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0); 947 VERIFY0(zap_destroy(mos, 948 dsl_dataset_phys(ds)->ds_snapnames_zapobj, tx)); 949 950 if (ds->ds_bookmarks != 0) { 951 VERIFY0(zap_destroy(mos, ds->ds_bookmarks, tx)); 952 spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx); 953 } 954 955 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 956 957 ASSERT0(dsl_dataset_phys(ds)->ds_next_clones_obj); 958 ASSERT0(dsl_dataset_phys(ds)->ds_props_obj); 959 ASSERT0(dsl_dataset_phys(ds)->ds_userrefs_obj); 960 dsl_dir_rele(ds->ds_dir, ds); 961 ds->ds_dir = NULL; 962 dmu_object_free_zapified(mos, obj, tx); 963 964 dsl_dir_destroy_sync(ddobj, tx); 965 966 if (rmorigin) { 967 dsl_dataset_t *prev; 968 VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev)); 969 dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx); 970 dsl_dataset_rele(prev, FTAG); 971 } 972 } 973 974 void 975 dsl_destroy_head_sync(void *arg, dmu_tx_t *tx) 976 { 977 dsl_destroy_head_arg_t *ddha = arg; 978 dsl_pool_t *dp = dmu_tx_pool(tx); 979 dsl_dataset_t *ds; 980 981 VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds)); 982 dsl_destroy_head_sync_impl(ds, tx); 983 dsl_dataset_rele(ds, FTAG); 984 } 985 986 static void 987 dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx) 988 { 989 dsl_destroy_head_arg_t *ddha = arg; 990 dsl_pool_t *dp = dmu_tx_pool(tx); 991 dsl_dataset_t *ds; 992 993 VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds)); 994 995 /* Mark it as inconsistent on-disk, in case we crash */ 996 dmu_buf_will_dirty(ds->ds_dbuf, tx); 997 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT; 998 999 spa_history_log_internal_ds(ds, "destroy begin", tx, ""); 1000 dsl_dataset_rele(ds, FTAG); 1001 } 1002 1003 int 1004 dsl_destroy_head(const char *name) 1005 { 1006 dsl_destroy_head_arg_t ddha; 1007 int error; 1008 spa_t *spa; 1009 boolean_t isenabled; 1010 1011 #ifdef _KERNEL 1012 zfs_destroy_unmount_origin(name); 1013 #endif 1014 1015 error = spa_open(name, &spa, FTAG); 1016 if (error != 0) 1017 return (error); 1018 isenabled = spa_feature_is_enabled(spa, SPA_FEATURE_ASYNC_DESTROY); 1019 spa_close(spa, FTAG); 1020 1021 ddha.ddha_name = name; 1022 1023 if (!isenabled) { 1024 objset_t *os; 1025 1026 error = dsl_sync_task(name, dsl_destroy_head_check, 1027 dsl_destroy_head_begin_sync, &ddha, 1028 0, ZFS_SPACE_CHECK_DESTROY); 1029 if (error != 0) 1030 return (error); 1031 1032 /* 1033 * Head deletion is processed in one txg on old pools; 1034 * remove the objects from open context so that the txg sync 1035 * is not too long. 1036 */ 1037 error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_FALSE, 1038 FTAG, &os); 1039 if (error == 0) { 1040 uint64_t prev_snap_txg = 1041 dsl_dataset_phys(dmu_objset_ds(os))-> 1042 ds_prev_snap_txg; 1043 for (uint64_t obj = 0; error == 0; 1044 error = dmu_object_next(os, &obj, FALSE, 1045 prev_snap_txg)) 1046 (void) dmu_free_long_object(os, obj); 1047 /* sync out all frees */ 1048 txg_wait_synced(dmu_objset_pool(os), 0); 1049 dmu_objset_disown(os, B_FALSE, FTAG); 1050 } 1051 } 1052 1053 return (dsl_sync_task(name, dsl_destroy_head_check, 1054 dsl_destroy_head_sync, &ddha, 0, ZFS_SPACE_CHECK_DESTROY)); 1055 } 1056 1057 /* 1058 * Note, this function is used as the callback for dmu_objset_find(). We 1059 * always return 0 so that we will continue to find and process 1060 * inconsistent datasets, even if we encounter an error trying to 1061 * process one of them. 1062 */ 1063 /* ARGSUSED */ 1064 int 1065 dsl_destroy_inconsistent(const char *dsname, void *arg) 1066 { 1067 objset_t *os; 1068 1069 if (dmu_objset_hold(dsname, FTAG, &os) == 0) { 1070 boolean_t need_destroy = DS_IS_INCONSISTENT(dmu_objset_ds(os)); 1071 1072 /* 1073 * If the dataset is inconsistent because a resumable receive 1074 * has failed, then do not destroy it. 1075 */ 1076 if (dsl_dataset_has_resume_receive_state(dmu_objset_ds(os))) 1077 need_destroy = B_FALSE; 1078 1079 dmu_objset_rele(os, FTAG); 1080 if (need_destroy) 1081 (void) dsl_destroy_head(dsname); 1082 } 1083 return (0); 1084 } 1085