1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, 2016 by Delphix. All rights reserved. 24 * Copyright (c) 2013 Steven Hartland. All rights reserved. 25 * Copyright (c) 2013 by Joyent, Inc. All rights reserved. 26 * Copyright (c) 2014 Integros [integros.com] 27 */ 28 29 #include <sys/zfs_context.h> 30 #include <sys/dsl_userhold.h> 31 #include <sys/dsl_dataset.h> 32 #include <sys/dsl_synctask.h> 33 #include <sys/dsl_destroy.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/dsl_pool.h> 36 #include <sys/dsl_dir.h> 37 #include <sys/dmu_traverse.h> 38 #include <sys/dsl_scan.h> 39 #include <sys/dmu_objset.h> 40 #include <sys/zap.h> 41 #include <sys/zfeature.h> 42 #include <sys/zfs_ioctl.h> 43 #include <sys/dsl_deleg.h> 44 #include <sys/dmu_impl.h> 45 #include <sys/zcp.h> 46 47 int 48 dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer) 49 { 50 if (!ds->ds_is_snapshot) 51 return (SET_ERROR(EINVAL)); 52 53 if (dsl_dataset_long_held(ds)) 54 return (SET_ERROR(EBUSY)); 55 56 /* 57 * Only allow deferred destroy on pools that support it. 58 * NOTE: deferred destroy is only supported on snapshots. 59 */ 60 if (defer) { 61 if (spa_version(ds->ds_dir->dd_pool->dp_spa) < 62 SPA_VERSION_USERREFS) 63 return (SET_ERROR(ENOTSUP)); 64 return (0); 65 } 66 67 /* 68 * If this snapshot has an elevated user reference count, 69 * we can't destroy it yet. 70 */ 71 if (ds->ds_userrefs > 0) 72 return (SET_ERROR(EBUSY)); 73 74 /* 75 * Can't delete a branch point. 76 */ 77 if (dsl_dataset_phys(ds)->ds_num_children > 1) 78 return (SET_ERROR(EEXIST)); 79 80 return (0); 81 } 82 83 int 84 dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx) 85 { 86 dsl_destroy_snapshot_arg_t *ddsa = arg; 87 const char *dsname = ddsa->ddsa_name; 88 boolean_t defer = ddsa->ddsa_defer; 89 90 dsl_pool_t *dp = dmu_tx_pool(tx); 91 int error = 0; 92 dsl_dataset_t *ds; 93 94 error = dsl_dataset_hold(dp, dsname, FTAG, &ds); 95 96 /* 97 * If the snapshot does not exist, silently ignore it, and 98 * dsl_destroy_snapshot_sync() will be a no-op 99 * (it's "already destroyed"). 100 */ 101 if (error == ENOENT) 102 return (0); 103 104 if (error == 0) { 105 error = dsl_destroy_snapshot_check_impl(ds, defer); 106 dsl_dataset_rele(ds, FTAG); 107 } 108 109 return (error); 110 } 111 112 struct process_old_arg { 113 dsl_dataset_t *ds; 114 dsl_dataset_t *ds_prev; 115 boolean_t after_branch_point; 116 zio_t *pio; 117 uint64_t used, comp, uncomp; 118 }; 119 120 static int 121 process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx) 122 { 123 struct process_old_arg *poa = arg; 124 dsl_pool_t *dp = poa->ds->ds_dir->dd_pool; 125 126 ASSERT(!BP_IS_HOLE(bp)); 127 128 if (bp->blk_birth <= dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) { 129 dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx); 130 if (poa->ds_prev && !poa->after_branch_point && 131 bp->blk_birth > 132 dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) { 133 dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes += 134 bp_get_dsize_sync(dp->dp_spa, bp); 135 } 136 } else { 137 poa->used += bp_get_dsize_sync(dp->dp_spa, bp); 138 poa->comp += BP_GET_PSIZE(bp); 139 poa->uncomp += BP_GET_UCSIZE(bp); 140 dsl_free_sync(poa->pio, dp, tx->tx_txg, bp); 141 } 142 return (0); 143 } 144 145 static void 146 process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev, 147 dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx) 148 { 149 struct process_old_arg poa = { 0 }; 150 dsl_pool_t *dp = ds->ds_dir->dd_pool; 151 objset_t *mos = dp->dp_meta_objset; 152 uint64_t deadlist_obj; 153 154 ASSERT(ds->ds_deadlist.dl_oldfmt); 155 ASSERT(ds_next->ds_deadlist.dl_oldfmt); 156 157 poa.ds = ds; 158 poa.ds_prev = ds_prev; 159 poa.after_branch_point = after_branch_point; 160 poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED); 161 VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj, 162 process_old_cb, &poa, tx)); 163 VERIFY0(zio_wait(poa.pio)); 164 ASSERT3U(poa.used, ==, dsl_dataset_phys(ds)->ds_unique_bytes); 165 166 /* change snapused */ 167 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 168 -poa.used, -poa.comp, -poa.uncomp, tx); 169 170 /* swap next's deadlist to our deadlist */ 171 dsl_deadlist_close(&ds->ds_deadlist); 172 dsl_deadlist_close(&ds_next->ds_deadlist); 173 deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj; 174 dsl_dataset_phys(ds)->ds_deadlist_obj = 175 dsl_dataset_phys(ds_next)->ds_deadlist_obj; 176 dsl_dataset_phys(ds_next)->ds_deadlist_obj = deadlist_obj; 177 dsl_deadlist_open(&ds->ds_deadlist, mos, 178 dsl_dataset_phys(ds)->ds_deadlist_obj); 179 dsl_deadlist_open(&ds_next->ds_deadlist, mos, 180 dsl_dataset_phys(ds_next)->ds_deadlist_obj); 181 } 182 183 static void 184 dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx) 185 { 186 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset; 187 zap_cursor_t zc; 188 zap_attribute_t za; 189 190 /* 191 * If it is the old version, dd_clones doesn't exist so we can't 192 * find the clones, but dsl_deadlist_remove_key() is a no-op so it 193 * doesn't matter. 194 */ 195 if (dsl_dir_phys(ds->ds_dir)->dd_clones == 0) 196 return; 197 198 for (zap_cursor_init(&zc, mos, dsl_dir_phys(ds->ds_dir)->dd_clones); 199 zap_cursor_retrieve(&zc, &za) == 0; 200 zap_cursor_advance(&zc)) { 201 dsl_dataset_t *clone; 202 203 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool, 204 za.za_first_integer, FTAG, &clone)); 205 if (clone->ds_dir->dd_origin_txg > mintxg) { 206 dsl_deadlist_remove_key(&clone->ds_deadlist, 207 mintxg, tx); 208 dsl_dataset_remove_clones_key(clone, mintxg, tx); 209 } 210 dsl_dataset_rele(clone, FTAG); 211 } 212 zap_cursor_fini(&zc); 213 } 214 215 void 216 dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx) 217 { 218 int err; 219 int after_branch_point = FALSE; 220 dsl_pool_t *dp = ds->ds_dir->dd_pool; 221 objset_t *mos = dp->dp_meta_objset; 222 dsl_dataset_t *ds_prev = NULL; 223 uint64_t obj; 224 225 ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); 226 rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); 227 ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg); 228 rrw_exit(&ds->ds_bp_rwlock, FTAG); 229 ASSERT(refcount_is_zero(&ds->ds_longholds)); 230 231 if (defer && 232 (ds->ds_userrefs > 0 || 233 dsl_dataset_phys(ds)->ds_num_children > 1)) { 234 ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS); 235 dmu_buf_will_dirty(ds->ds_dbuf, tx); 236 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY; 237 spa_history_log_internal_ds(ds, "defer_destroy", tx, ""); 238 return; 239 } 240 241 ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1); 242 243 /* We need to log before removing it from the namespace. */ 244 spa_history_log_internal_ds(ds, "destroy", tx, ""); 245 246 dsl_scan_ds_destroyed(ds, tx); 247 248 obj = ds->ds_object; 249 250 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 251 if (ds->ds_feature_inuse[f]) { 252 dsl_dataset_deactivate_feature(obj, f, tx); 253 ds->ds_feature_inuse[f] = B_FALSE; 254 } 255 } 256 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { 257 ASSERT3P(ds->ds_prev, ==, NULL); 258 VERIFY0(dsl_dataset_hold_obj(dp, 259 dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &ds_prev)); 260 after_branch_point = 261 (dsl_dataset_phys(ds_prev)->ds_next_snap_obj != obj); 262 263 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx); 264 if (after_branch_point && 265 dsl_dataset_phys(ds_prev)->ds_next_clones_obj != 0) { 266 dsl_dataset_remove_from_next_clones(ds_prev, obj, tx); 267 if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) { 268 VERIFY0(zap_add_int(mos, 269 dsl_dataset_phys(ds_prev)-> 270 ds_next_clones_obj, 271 dsl_dataset_phys(ds)->ds_next_snap_obj, 272 tx)); 273 } 274 } 275 if (!after_branch_point) { 276 dsl_dataset_phys(ds_prev)->ds_next_snap_obj = 277 dsl_dataset_phys(ds)->ds_next_snap_obj; 278 } 279 } 280 281 dsl_dataset_t *ds_next; 282 uint64_t old_unique; 283 uint64_t used = 0, comp = 0, uncomp = 0; 284 285 VERIFY0(dsl_dataset_hold_obj(dp, 286 dsl_dataset_phys(ds)->ds_next_snap_obj, FTAG, &ds_next)); 287 ASSERT3U(dsl_dataset_phys(ds_next)->ds_prev_snap_obj, ==, obj); 288 289 old_unique = dsl_dataset_phys(ds_next)->ds_unique_bytes; 290 291 dmu_buf_will_dirty(ds_next->ds_dbuf, tx); 292 dsl_dataset_phys(ds_next)->ds_prev_snap_obj = 293 dsl_dataset_phys(ds)->ds_prev_snap_obj; 294 dsl_dataset_phys(ds_next)->ds_prev_snap_txg = 295 dsl_dataset_phys(ds)->ds_prev_snap_txg; 296 ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==, 297 ds_prev ? dsl_dataset_phys(ds_prev)->ds_creation_txg : 0); 298 299 if (ds_next->ds_deadlist.dl_oldfmt) { 300 process_old_deadlist(ds, ds_prev, ds_next, 301 after_branch_point, tx); 302 } else { 303 /* Adjust prev's unique space. */ 304 if (ds_prev && !after_branch_point) { 305 dsl_deadlist_space_range(&ds_next->ds_deadlist, 306 dsl_dataset_phys(ds_prev)->ds_prev_snap_txg, 307 dsl_dataset_phys(ds)->ds_prev_snap_txg, 308 &used, &comp, &uncomp); 309 dsl_dataset_phys(ds_prev)->ds_unique_bytes += used; 310 } 311 312 /* Adjust snapused. */ 313 dsl_deadlist_space_range(&ds_next->ds_deadlist, 314 dsl_dataset_phys(ds)->ds_prev_snap_txg, UINT64_MAX, 315 &used, &comp, &uncomp); 316 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP, 317 -used, -comp, -uncomp, tx); 318 319 /* Move blocks to be freed to pool's free list. */ 320 dsl_deadlist_move_bpobj(&ds_next->ds_deadlist, 321 &dp->dp_free_bpobj, dsl_dataset_phys(ds)->ds_prev_snap_txg, 322 tx); 323 dsl_dir_diduse_space(tx->tx_pool->dp_free_dir, 324 DD_USED_HEAD, used, comp, uncomp, tx); 325 326 /* Merge our deadlist into next's and free it. */ 327 dsl_deadlist_merge(&ds_next->ds_deadlist, 328 dsl_dataset_phys(ds)->ds_deadlist_obj, tx); 329 } 330 dsl_deadlist_close(&ds->ds_deadlist); 331 dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx); 332 dmu_buf_will_dirty(ds->ds_dbuf, tx); 333 dsl_dataset_phys(ds)->ds_deadlist_obj = 0; 334 335 /* Collapse range in clone heads */ 336 dsl_dataset_remove_clones_key(ds, 337 dsl_dataset_phys(ds)->ds_creation_txg, tx); 338 339 if (ds_next->ds_is_snapshot) { 340 dsl_dataset_t *ds_nextnext; 341 342 /* 343 * Update next's unique to include blocks which 344 * were previously shared by only this snapshot 345 * and it. Those blocks will be born after the 346 * prev snap and before this snap, and will have 347 * died after the next snap and before the one 348 * after that (ie. be on the snap after next's 349 * deadlist). 350 */ 351 VERIFY0(dsl_dataset_hold_obj(dp, 352 dsl_dataset_phys(ds_next)->ds_next_snap_obj, 353 FTAG, &ds_nextnext)); 354 dsl_deadlist_space_range(&ds_nextnext->ds_deadlist, 355 dsl_dataset_phys(ds)->ds_prev_snap_txg, 356 dsl_dataset_phys(ds)->ds_creation_txg, 357 &used, &comp, &uncomp); 358 dsl_dataset_phys(ds_next)->ds_unique_bytes += used; 359 dsl_dataset_rele(ds_nextnext, FTAG); 360 ASSERT3P(ds_next->ds_prev, ==, NULL); 361 362 /* Collapse range in this head. */ 363 dsl_dataset_t *hds; 364 VERIFY0(dsl_dataset_hold_obj(dp, 365 dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &hds)); 366 dsl_deadlist_remove_key(&hds->ds_deadlist, 367 dsl_dataset_phys(ds)->ds_creation_txg, tx); 368 dsl_dataset_rele(hds, FTAG); 369 370 } else { 371 ASSERT3P(ds_next->ds_prev, ==, ds); 372 dsl_dataset_rele(ds_next->ds_prev, ds_next); 373 ds_next->ds_prev = NULL; 374 if (ds_prev) { 375 VERIFY0(dsl_dataset_hold_obj(dp, 376 dsl_dataset_phys(ds)->ds_prev_snap_obj, 377 ds_next, &ds_next->ds_prev)); 378 } 379 380 dsl_dataset_recalc_head_uniq(ds_next); 381 382 /* 383 * Reduce the amount of our unconsumed refreservation 384 * being charged to our parent by the amount of 385 * new unique data we have gained. 386 */ 387 if (old_unique < ds_next->ds_reserved) { 388 int64_t mrsdelta; 389 uint64_t new_unique = 390 dsl_dataset_phys(ds_next)->ds_unique_bytes; 391 392 ASSERT(old_unique <= new_unique); 393 mrsdelta = MIN(new_unique - old_unique, 394 ds_next->ds_reserved - old_unique); 395 dsl_dir_diduse_space(ds->ds_dir, 396 DD_USED_REFRSRV, -mrsdelta, 0, 0, tx); 397 } 398 } 399 dsl_dataset_rele(ds_next, FTAG); 400 401 /* 402 * This must be done after the dsl_traverse(), because it will 403 * re-open the objset. 404 */ 405 if (ds->ds_objset) { 406 dmu_objset_evict(ds->ds_objset); 407 ds->ds_objset = NULL; 408 } 409 410 /* remove from snapshot namespace */ 411 dsl_dataset_t *ds_head; 412 ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0); 413 VERIFY0(dsl_dataset_hold_obj(dp, 414 dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &ds_head)); 415 VERIFY0(dsl_dataset_get_snapname(ds)); 416 #ifdef ZFS_DEBUG 417 { 418 uint64_t val; 419 420 err = dsl_dataset_snap_lookup(ds_head, 421 ds->ds_snapname, &val); 422 ASSERT0(err); 423 ASSERT3U(val, ==, obj); 424 } 425 #endif 426 VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx, B_TRUE)); 427 dsl_dataset_rele(ds_head, FTAG); 428 429 if (ds_prev != NULL) 430 dsl_dataset_rele(ds_prev, FTAG); 431 432 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 433 434 if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) { 435 uint64_t count; 436 ASSERT0(zap_count(mos, 437 dsl_dataset_phys(ds)->ds_next_clones_obj, &count) && 438 count == 0); 439 VERIFY0(dmu_object_free(mos, 440 dsl_dataset_phys(ds)->ds_next_clones_obj, tx)); 441 } 442 if (dsl_dataset_phys(ds)->ds_props_obj != 0) 443 VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_props_obj, 444 tx)); 445 if (dsl_dataset_phys(ds)->ds_userrefs_obj != 0) 446 VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_userrefs_obj, 447 tx)); 448 dsl_dir_rele(ds->ds_dir, ds); 449 ds->ds_dir = NULL; 450 dmu_object_free_zapified(mos, obj, tx); 451 } 452 453 void 454 dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx) 455 { 456 dsl_destroy_snapshot_arg_t *ddsa = arg; 457 const char *dsname = ddsa->ddsa_name; 458 boolean_t defer = ddsa->ddsa_defer; 459 460 dsl_pool_t *dp = dmu_tx_pool(tx); 461 dsl_dataset_t *ds; 462 463 int error = dsl_dataset_hold(dp, dsname, FTAG, &ds); 464 if (error == ENOENT) 465 return; 466 ASSERT0(error); 467 dsl_destroy_snapshot_sync_impl(ds, defer, tx); 468 dsl_dataset_rele(ds, FTAG); 469 } 470 471 /* 472 * The semantics of this function are described in the comment above 473 * lzc_destroy_snaps(). To summarize: 474 * 475 * The snapshots must all be in the same pool. 476 * 477 * Snapshots that don't exist will be silently ignored (considered to be 478 * "already deleted"). 479 * 480 * On success, all snaps will be destroyed and this will return 0. 481 * On failure, no snaps will be destroyed, the errlist will be filled in, 482 * and this will return an errno. 483 */ 484 int 485 dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer, 486 nvlist_t *errlist) 487 { 488 if (nvlist_next_nvpair(snaps, NULL) == NULL) 489 return (0); 490 491 nvlist_t *arg = fnvlist_alloc(); 492 nvlist_t *snaps_normalized = fnvlist_alloc(); 493 /* 494 * lzc_destroy_snaps() is documented to take an nvlist whose 495 * values "don't matter". We need to convert that nvlist to one 496 * that we know can be converted to LUA. 497 */ 498 for (nvpair_t *pair = nvlist_next_nvpair(snaps, NULL); 499 pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) { 500 fnvlist_add_boolean_value(snaps_normalized, 501 nvpair_name(pair), B_TRUE); 502 } 503 fnvlist_add_nvlist(arg, "snaps", snaps_normalized); 504 fnvlist_free(snaps_normalized); 505 fnvlist_add_boolean_value(arg, "defer", defer); 506 507 nvlist_t *wrapper = fnvlist_alloc(); 508 fnvlist_add_nvlist(wrapper, ZCP_ARG_ARGLIST, arg); 509 fnvlist_free(arg); 510 511 const char *program = 512 "arg = ...\n" 513 "snaps = arg['snaps']\n" 514 "defer = arg['defer']\n" 515 "errors = { }\n" 516 "has_errors = false\n" 517 "for snap, v in pairs(snaps) do\n" 518 " errno = zfs.check.destroy{snap, defer=defer}\n" 519 " zfs.debug('snap: ' .. snap .. ' errno: ' .. errno)\n" 520 " if errno == ENOENT then\n" 521 " snaps[snap] = nil\n" 522 " elseif errno ~= 0 then\n" 523 " errors[snap] = errno\n" 524 " has_errors = true\n" 525 " end\n" 526 "end\n" 527 "if has_errors then\n" 528 " return errors\n" 529 "end\n" 530 "for snap, v in pairs(snaps) do\n" 531 " errno = zfs.sync.destroy{snap, defer=defer}\n" 532 " assert(errno == 0)\n" 533 "end\n" 534 "return { }\n"; 535 536 nvlist_t *result = fnvlist_alloc(); 537 int error = zcp_eval(nvpair_name(nvlist_next_nvpair(snaps, NULL)), 538 program, 539 0, 540 zfs_lua_max_memlimit, 541 fnvlist_lookup_nvpair(wrapper, ZCP_ARG_ARGLIST), result); 542 if (error != 0) { 543 char *errorstr = NULL; 544 (void) nvlist_lookup_string(result, ZCP_RET_ERROR, &errorstr); 545 if (errorstr != NULL) { 546 zfs_dbgmsg(errorstr); 547 } 548 return (error); 549 } 550 fnvlist_free(wrapper); 551 552 /* 553 * lzc_destroy_snaps() is documented to fill the errlist with 554 * int32 values, so we need to covert the int64 values that are 555 * returned from LUA. 556 */ 557 int rv = 0; 558 nvlist_t *errlist_raw = fnvlist_lookup_nvlist(result, ZCP_RET_RETURN); 559 for (nvpair_t *pair = nvlist_next_nvpair(errlist_raw, NULL); 560 pair != NULL; pair = nvlist_next_nvpair(errlist_raw, pair)) { 561 int32_t val = (int32_t)fnvpair_value_int64(pair); 562 if (rv == 0) 563 rv = val; 564 fnvlist_add_int32(errlist, nvpair_name(pair), val); 565 } 566 fnvlist_free(result); 567 return (rv); 568 } 569 570 int 571 dsl_destroy_snapshot(const char *name, boolean_t defer) 572 { 573 int error; 574 nvlist_t *nvl = fnvlist_alloc(); 575 nvlist_t *errlist = fnvlist_alloc(); 576 577 fnvlist_add_boolean(nvl, name); 578 error = dsl_destroy_snapshots_nvl(nvl, defer, errlist); 579 fnvlist_free(errlist); 580 fnvlist_free(nvl); 581 return (error); 582 } 583 584 struct killarg { 585 dsl_dataset_t *ds; 586 dmu_tx_t *tx; 587 }; 588 589 /* ARGSUSED */ 590 static int 591 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, 592 const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg) 593 { 594 struct killarg *ka = arg; 595 dmu_tx_t *tx = ka->tx; 596 597 if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) 598 return (0); 599 600 if (zb->zb_level == ZB_ZIL_LEVEL) { 601 ASSERT(zilog != NULL); 602 /* 603 * It's a block in the intent log. It has no 604 * accounting, so just free it. 605 */ 606 dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp); 607 } else { 608 ASSERT(zilog == NULL); 609 ASSERT3U(bp->blk_birth, >, 610 dsl_dataset_phys(ka->ds)->ds_prev_snap_txg); 611 (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE); 612 } 613 614 return (0); 615 } 616 617 static void 618 old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx) 619 { 620 struct killarg ka; 621 622 /* 623 * Free everything that we point to (that's born after 624 * the previous snapshot, if we are a clone) 625 * 626 * NB: this should be very quick, because we already 627 * freed all the objects in open context. 628 */ 629 ka.ds = ds; 630 ka.tx = tx; 631 VERIFY0(traverse_dataset(ds, 632 dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST, 633 kill_blkptr, &ka)); 634 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || 635 dsl_dataset_phys(ds)->ds_unique_bytes == 0); 636 } 637 638 int 639 dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds) 640 { 641 int error; 642 uint64_t count; 643 objset_t *mos; 644 645 ASSERT(!ds->ds_is_snapshot); 646 if (ds->ds_is_snapshot) 647 return (SET_ERROR(EINVAL)); 648 649 if (refcount_count(&ds->ds_longholds) != expected_holds) 650 return (SET_ERROR(EBUSY)); 651 652 mos = ds->ds_dir->dd_pool->dp_meta_objset; 653 654 /* 655 * Can't delete a head dataset if there are snapshots of it. 656 * (Except if the only snapshots are from the branch we cloned 657 * from.) 658 */ 659 if (ds->ds_prev != NULL && 660 dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == ds->ds_object) 661 return (SET_ERROR(EBUSY)); 662 663 /* 664 * Can't delete if there are children of this fs. 665 */ 666 error = zap_count(mos, 667 dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, &count); 668 if (error != 0) 669 return (error); 670 if (count != 0) 671 return (SET_ERROR(EEXIST)); 672 673 if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) && 674 dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 && 675 ds->ds_prev->ds_userrefs == 0) { 676 /* We need to remove the origin snapshot as well. */ 677 if (!refcount_is_zero(&ds->ds_prev->ds_longholds)) 678 return (SET_ERROR(EBUSY)); 679 } 680 return (0); 681 } 682 683 int 684 dsl_destroy_head_check(void *arg, dmu_tx_t *tx) 685 { 686 dsl_destroy_head_arg_t *ddha = arg; 687 dsl_pool_t *dp = dmu_tx_pool(tx); 688 dsl_dataset_t *ds; 689 int error; 690 691 error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds); 692 if (error != 0) 693 return (error); 694 695 error = dsl_destroy_head_check_impl(ds, 0); 696 dsl_dataset_rele(ds, FTAG); 697 return (error); 698 } 699 700 static void 701 dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx) 702 { 703 dsl_dir_t *dd; 704 dsl_pool_t *dp = dmu_tx_pool(tx); 705 objset_t *mos = dp->dp_meta_objset; 706 dd_used_t t; 707 708 ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock)); 709 710 VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd)); 711 712 ASSERT0(dsl_dir_phys(dd)->dd_head_dataset_obj); 713 714 /* 715 * Decrement the filesystem count for all parent filesystems. 716 * 717 * When we receive an incremental stream into a filesystem that already 718 * exists, a temporary clone is created. We never count this temporary 719 * clone, whose name begins with a '%'. 720 */ 721 if (dd->dd_myname[0] != '%' && dd->dd_parent != NULL) 722 dsl_fs_ss_count_adjust(dd->dd_parent, -1, 723 DD_FIELD_FILESYSTEM_COUNT, tx); 724 725 /* 726 * Remove our reservation. The impl() routine avoids setting the 727 * actual property, which would require the (already destroyed) ds. 728 */ 729 dsl_dir_set_reservation_sync_impl(dd, 0, tx); 730 731 ASSERT0(dsl_dir_phys(dd)->dd_used_bytes); 732 ASSERT0(dsl_dir_phys(dd)->dd_reserved); 733 for (t = 0; t < DD_USED_NUM; t++) 734 ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]); 735 736 VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx)); 737 VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx)); 738 VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx)); 739 VERIFY0(zap_remove(mos, 740 dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj, 741 dd->dd_myname, tx)); 742 743 dsl_dir_rele(dd, FTAG); 744 dmu_object_free_zapified(mos, ddobj, tx); 745 } 746 747 void 748 dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx) 749 { 750 dsl_pool_t *dp = dmu_tx_pool(tx); 751 objset_t *mos = dp->dp_meta_objset; 752 uint64_t obj, ddobj, prevobj = 0; 753 boolean_t rmorigin; 754 755 ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1); 756 ASSERT(ds->ds_prev == NULL || 757 dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object); 758 rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); 759 ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg); 760 rrw_exit(&ds->ds_bp_rwlock, FTAG); 761 ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock)); 762 763 /* We need to log before removing it from the namespace. */ 764 spa_history_log_internal_ds(ds, "destroy", tx, ""); 765 766 rmorigin = (dsl_dir_is_clone(ds->ds_dir) && 767 DS_IS_DEFER_DESTROY(ds->ds_prev) && 768 dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 && 769 ds->ds_prev->ds_userrefs == 0); 770 771 /* Remove our reservation. */ 772 if (ds->ds_reserved != 0) { 773 dsl_dataset_set_refreservation_sync_impl(ds, 774 (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED), 775 0, tx); 776 ASSERT0(ds->ds_reserved); 777 } 778 779 obj = ds->ds_object; 780 781 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { 782 if (ds->ds_feature_inuse[f]) { 783 dsl_dataset_deactivate_feature(obj, f, tx); 784 ds->ds_feature_inuse[f] = B_FALSE; 785 } 786 } 787 788 dsl_scan_ds_destroyed(ds, tx); 789 790 if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) { 791 /* This is a clone */ 792 ASSERT(ds->ds_prev != NULL); 793 ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj, !=, 794 obj); 795 ASSERT0(dsl_dataset_phys(ds)->ds_next_snap_obj); 796 797 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx); 798 if (dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj != 0) { 799 dsl_dataset_remove_from_next_clones(ds->ds_prev, 800 obj, tx); 801 } 802 803 ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_num_children, >, 1); 804 dsl_dataset_phys(ds->ds_prev)->ds_num_children--; 805 } 806 807 /* 808 * Destroy the deadlist. Unless it's a clone, the 809 * deadlist should be empty. (If it's a clone, it's 810 * safe to ignore the deadlist contents.) 811 */ 812 dsl_deadlist_close(&ds->ds_deadlist); 813 dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx); 814 dmu_buf_will_dirty(ds->ds_dbuf, tx); 815 dsl_dataset_phys(ds)->ds_deadlist_obj = 0; 816 817 objset_t *os; 818 VERIFY0(dmu_objset_from_ds(ds, &os)); 819 820 if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY)) { 821 old_synchronous_dataset_destroy(ds, tx); 822 } else { 823 /* 824 * Move the bptree into the pool's list of trees to 825 * clean up and update space accounting information. 826 */ 827 uint64_t used, comp, uncomp; 828 829 zil_destroy_sync(dmu_objset_zil(os), tx); 830 831 if (!spa_feature_is_active(dp->dp_spa, 832 SPA_FEATURE_ASYNC_DESTROY)) { 833 dsl_scan_t *scn = dp->dp_scan; 834 spa_feature_incr(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY, 835 tx); 836 dp->dp_bptree_obj = bptree_alloc(mos, tx); 837 VERIFY0(zap_add(mos, 838 DMU_POOL_DIRECTORY_OBJECT, 839 DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1, 840 &dp->dp_bptree_obj, tx)); 841 ASSERT(!scn->scn_async_destroying); 842 scn->scn_async_destroying = B_TRUE; 843 } 844 845 used = dsl_dir_phys(ds->ds_dir)->dd_used_bytes; 846 comp = dsl_dir_phys(ds->ds_dir)->dd_compressed_bytes; 847 uncomp = dsl_dir_phys(ds->ds_dir)->dd_uncompressed_bytes; 848 849 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || 850 dsl_dataset_phys(ds)->ds_unique_bytes == used); 851 852 rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); 853 bptree_add(mos, dp->dp_bptree_obj, 854 &dsl_dataset_phys(ds)->ds_bp, 855 dsl_dataset_phys(ds)->ds_prev_snap_txg, 856 used, comp, uncomp, tx); 857 rrw_exit(&ds->ds_bp_rwlock, FTAG); 858 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, 859 -used, -comp, -uncomp, tx); 860 dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD, 861 used, comp, uncomp, tx); 862 } 863 864 if (ds->ds_prev != NULL) { 865 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) { 866 VERIFY0(zap_remove_int(mos, 867 dsl_dir_phys(ds->ds_prev->ds_dir)->dd_clones, 868 ds->ds_object, tx)); 869 } 870 prevobj = ds->ds_prev->ds_object; 871 dsl_dataset_rele(ds->ds_prev, ds); 872 ds->ds_prev = NULL; 873 } 874 875 /* 876 * This must be done after the dsl_traverse(), because it will 877 * re-open the objset. 878 */ 879 if (ds->ds_objset) { 880 dmu_objset_evict(ds->ds_objset); 881 ds->ds_objset = NULL; 882 } 883 884 /* Erase the link in the dir */ 885 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx); 886 dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj = 0; 887 ddobj = ds->ds_dir->dd_object; 888 ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0); 889 VERIFY0(zap_destroy(mos, 890 dsl_dataset_phys(ds)->ds_snapnames_zapobj, tx)); 891 892 if (ds->ds_bookmarks != 0) { 893 VERIFY0(zap_destroy(mos, ds->ds_bookmarks, tx)); 894 spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx); 895 } 896 897 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx); 898 899 ASSERT0(dsl_dataset_phys(ds)->ds_next_clones_obj); 900 ASSERT0(dsl_dataset_phys(ds)->ds_props_obj); 901 ASSERT0(dsl_dataset_phys(ds)->ds_userrefs_obj); 902 dsl_dir_rele(ds->ds_dir, ds); 903 ds->ds_dir = NULL; 904 dmu_object_free_zapified(mos, obj, tx); 905 906 dsl_dir_destroy_sync(ddobj, tx); 907 908 if (rmorigin) { 909 dsl_dataset_t *prev; 910 VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev)); 911 dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx); 912 dsl_dataset_rele(prev, FTAG); 913 } 914 } 915 916 void 917 dsl_destroy_head_sync(void *arg, dmu_tx_t *tx) 918 { 919 dsl_destroy_head_arg_t *ddha = arg; 920 dsl_pool_t *dp = dmu_tx_pool(tx); 921 dsl_dataset_t *ds; 922 923 VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds)); 924 dsl_destroy_head_sync_impl(ds, tx); 925 dsl_dataset_rele(ds, FTAG); 926 } 927 928 static void 929 dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx) 930 { 931 dsl_destroy_head_arg_t *ddha = arg; 932 dsl_pool_t *dp = dmu_tx_pool(tx); 933 dsl_dataset_t *ds; 934 935 VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds)); 936 937 /* Mark it as inconsistent on-disk, in case we crash */ 938 dmu_buf_will_dirty(ds->ds_dbuf, tx); 939 dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT; 940 941 spa_history_log_internal_ds(ds, "destroy begin", tx, ""); 942 dsl_dataset_rele(ds, FTAG); 943 } 944 945 int 946 dsl_destroy_head(const char *name) 947 { 948 dsl_destroy_head_arg_t ddha; 949 int error; 950 spa_t *spa; 951 boolean_t isenabled; 952 953 #ifdef _KERNEL 954 zfs_destroy_unmount_origin(name); 955 #endif 956 957 error = spa_open(name, &spa, FTAG); 958 if (error != 0) 959 return (error); 960 isenabled = spa_feature_is_enabled(spa, SPA_FEATURE_ASYNC_DESTROY); 961 spa_close(spa, FTAG); 962 963 ddha.ddha_name = name; 964 965 if (!isenabled) { 966 objset_t *os; 967 968 error = dsl_sync_task(name, dsl_destroy_head_check, 969 dsl_destroy_head_begin_sync, &ddha, 970 0, ZFS_SPACE_CHECK_NONE); 971 if (error != 0) 972 return (error); 973 974 /* 975 * Head deletion is processed in one txg on old pools; 976 * remove the objects from open context so that the txg sync 977 * is not too long. 978 */ 979 error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os); 980 if (error == 0) { 981 uint64_t prev_snap_txg = 982 dsl_dataset_phys(dmu_objset_ds(os))-> 983 ds_prev_snap_txg; 984 for (uint64_t obj = 0; error == 0; 985 error = dmu_object_next(os, &obj, FALSE, 986 prev_snap_txg)) 987 (void) dmu_free_long_object(os, obj); 988 /* sync out all frees */ 989 txg_wait_synced(dmu_objset_pool(os), 0); 990 dmu_objset_disown(os, FTAG); 991 } 992 } 993 994 return (dsl_sync_task(name, dsl_destroy_head_check, 995 dsl_destroy_head_sync, &ddha, 0, ZFS_SPACE_CHECK_NONE)); 996 } 997 998 /* 999 * Note, this function is used as the callback for dmu_objset_find(). We 1000 * always return 0 so that we will continue to find and process 1001 * inconsistent datasets, even if we encounter an error trying to 1002 * process one of them. 1003 */ 1004 /* ARGSUSED */ 1005 int 1006 dsl_destroy_inconsistent(const char *dsname, void *arg) 1007 { 1008 objset_t *os; 1009 1010 if (dmu_objset_hold(dsname, FTAG, &os) == 0) { 1011 boolean_t need_destroy = DS_IS_INCONSISTENT(dmu_objset_ds(os)); 1012 1013 /* 1014 * If the dataset is inconsistent because a resumable receive 1015 * has failed, then do not destroy it. 1016 */ 1017 if (dsl_dataset_has_resume_receive_state(dmu_objset_ds(os))) 1018 need_destroy = B_FALSE; 1019 1020 dmu_objset_rele(os, FTAG); 1021 if (need_destroy) 1022 (void) dsl_destroy_head(dsname); 1023 } 1024 return (0); 1025 } 1026