xref: /freebsd/sys/contrib/openzfs/module/zfs/dsl_destroy.c (revision 7ef62cebc2f965b0f640263e179276928885e33d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
25  * Copyright (c) 2013 by Joyent, Inc. All rights reserved.
26  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
27  */
28 
29 #include <sys/zfs_context.h>
30 #include <sys/dsl_userhold.h>
31 #include <sys/dsl_dataset.h>
32 #include <sys/dsl_synctask.h>
33 #include <sys/dsl_destroy.h>
34 #include <sys/dsl_bookmark.h>
35 #include <sys/dmu_tx.h>
36 #include <sys/dsl_pool.h>
37 #include <sys/dsl_dir.h>
38 #include <sys/dmu_traverse.h>
39 #include <sys/dsl_scan.h>
40 #include <sys/dmu_objset.h>
41 #include <sys/zap.h>
42 #include <sys/zfeature.h>
43 #include <sys/zfs_ioctl.h>
44 #include <sys/dsl_deleg.h>
45 #include <sys/dmu_impl.h>
46 #include <sys/zvol.h>
47 #include <sys/zcp.h>
48 #include <sys/dsl_deadlist.h>
49 #include <sys/zthr.h>
50 #include <sys/spa_impl.h>
51 
52 extern int zfs_snapshot_history_enabled;
53 
54 int
55 dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
56 {
57 	if (!ds->ds_is_snapshot)
58 		return (SET_ERROR(EINVAL));
59 
60 	if (dsl_dataset_long_held(ds))
61 		return (SET_ERROR(EBUSY));
62 
63 	/*
64 	 * Only allow deferred destroy on pools that support it.
65 	 * NOTE: deferred destroy is only supported on snapshots.
66 	 */
67 	if (defer) {
68 		if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
69 		    SPA_VERSION_USERREFS)
70 			return (SET_ERROR(ENOTSUP));
71 		return (0);
72 	}
73 
74 	/*
75 	 * If this snapshot has an elevated user reference count,
76 	 * we can't destroy it yet.
77 	 */
78 	if (ds->ds_userrefs > 0)
79 		return (SET_ERROR(EBUSY));
80 
81 	/*
82 	 * Can't delete a branch point.
83 	 */
84 	if (dsl_dataset_phys(ds)->ds_num_children > 1)
85 		return (SET_ERROR(EEXIST));
86 
87 	return (0);
88 }
89 
90 int
91 dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
92 {
93 	dsl_destroy_snapshot_arg_t *ddsa = arg;
94 	const char *dsname = ddsa->ddsa_name;
95 	boolean_t defer = ddsa->ddsa_defer;
96 
97 	dsl_pool_t *dp = dmu_tx_pool(tx);
98 	int error = 0;
99 	dsl_dataset_t *ds;
100 
101 	error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
102 
103 	/*
104 	 * If the snapshot does not exist, silently ignore it, and
105 	 * dsl_destroy_snapshot_sync() will be a no-op
106 	 * (it's "already destroyed").
107 	 */
108 	if (error == ENOENT)
109 		return (0);
110 
111 	if (error == 0) {
112 		error = dsl_destroy_snapshot_check_impl(ds, defer);
113 		dsl_dataset_rele(ds, FTAG);
114 	}
115 
116 	return (error);
117 }
118 
119 struct process_old_arg {
120 	dsl_dataset_t *ds;
121 	dsl_dataset_t *ds_prev;
122 	boolean_t after_branch_point;
123 	zio_t *pio;
124 	uint64_t used, comp, uncomp;
125 };
126 
127 static int
128 process_old_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx)
129 {
130 	struct process_old_arg *poa = arg;
131 	dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
132 
133 	ASSERT(!BP_IS_HOLE(bp));
134 
135 	if (bp->blk_birth <= dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) {
136 		dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, bp_freed, tx);
137 		if (poa->ds_prev && !poa->after_branch_point &&
138 		    bp->blk_birth >
139 		    dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) {
140 			dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes +=
141 			    bp_get_dsize_sync(dp->dp_spa, bp);
142 		}
143 	} else {
144 		poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
145 		poa->comp += BP_GET_PSIZE(bp);
146 		poa->uncomp += BP_GET_UCSIZE(bp);
147 		dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
148 	}
149 	return (0);
150 }
151 
152 static void
153 process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
154     dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
155 {
156 	struct process_old_arg poa = { 0 };
157 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
158 	objset_t *mos = dp->dp_meta_objset;
159 	uint64_t deadlist_obj;
160 
161 	ASSERT(ds->ds_deadlist.dl_oldfmt);
162 	ASSERT(ds_next->ds_deadlist.dl_oldfmt);
163 
164 	poa.ds = ds;
165 	poa.ds_prev = ds_prev;
166 	poa.after_branch_point = after_branch_point;
167 	poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
168 	VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
169 	    process_old_cb, &poa, tx));
170 	VERIFY0(zio_wait(poa.pio));
171 	ASSERT3U(poa.used, ==, dsl_dataset_phys(ds)->ds_unique_bytes);
172 
173 	/* change snapused */
174 	dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
175 	    -poa.used, -poa.comp, -poa.uncomp, tx);
176 
177 	/* swap next's deadlist to our deadlist */
178 	dsl_deadlist_close(&ds->ds_deadlist);
179 	dsl_deadlist_close(&ds_next->ds_deadlist);
180 	deadlist_obj = dsl_dataset_phys(ds)->ds_deadlist_obj;
181 	dsl_dataset_phys(ds)->ds_deadlist_obj =
182 	    dsl_dataset_phys(ds_next)->ds_deadlist_obj;
183 	dsl_dataset_phys(ds_next)->ds_deadlist_obj = deadlist_obj;
184 	dsl_deadlist_open(&ds->ds_deadlist, mos,
185 	    dsl_dataset_phys(ds)->ds_deadlist_obj);
186 	dsl_deadlist_open(&ds_next->ds_deadlist, mos,
187 	    dsl_dataset_phys(ds_next)->ds_deadlist_obj);
188 }
189 
190 typedef struct remaining_clones_key {
191 	dsl_dataset_t *rck_clone;
192 	list_node_t rck_node;
193 } remaining_clones_key_t;
194 
195 static remaining_clones_key_t *
196 rck_alloc(dsl_dataset_t *clone)
197 {
198 	remaining_clones_key_t *rck = kmem_alloc(sizeof (*rck), KM_SLEEP);
199 	rck->rck_clone = clone;
200 	return (rck);
201 }
202 
203 static void
204 dsl_dir_remove_clones_key_impl(dsl_dir_t *dd, uint64_t mintxg, dmu_tx_t *tx,
205     list_t *stack, const void *tag)
206 {
207 	objset_t *mos = dd->dd_pool->dp_meta_objset;
208 
209 	/*
210 	 * If it is the old version, dd_clones doesn't exist so we can't
211 	 * find the clones, but dsl_deadlist_remove_key() is a no-op so it
212 	 * doesn't matter.
213 	 */
214 	if (dsl_dir_phys(dd)->dd_clones == 0)
215 		return;
216 
217 	zap_cursor_t *zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
218 	zap_attribute_t *za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
219 
220 	for (zap_cursor_init(zc, mos, dsl_dir_phys(dd)->dd_clones);
221 	    zap_cursor_retrieve(zc, za) == 0;
222 	    zap_cursor_advance(zc)) {
223 		dsl_dataset_t *clone;
224 
225 		VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
226 		    za->za_first_integer, tag, &clone));
227 
228 		if (clone->ds_dir->dd_origin_txg > mintxg) {
229 			dsl_deadlist_remove_key(&clone->ds_deadlist,
230 			    mintxg, tx);
231 
232 			if (dsl_dataset_remap_deadlist_exists(clone)) {
233 				dsl_deadlist_remove_key(
234 				    &clone->ds_remap_deadlist, mintxg, tx);
235 			}
236 
237 			list_insert_head(stack, rck_alloc(clone));
238 		} else {
239 			dsl_dataset_rele(clone, tag);
240 		}
241 	}
242 	zap_cursor_fini(zc);
243 
244 	kmem_free(za, sizeof (zap_attribute_t));
245 	kmem_free(zc, sizeof (zap_cursor_t));
246 }
247 
248 void
249 dsl_dir_remove_clones_key(dsl_dir_t *top_dd, uint64_t mintxg, dmu_tx_t *tx)
250 {
251 	list_t stack;
252 
253 	list_create(&stack, sizeof (remaining_clones_key_t),
254 	    offsetof(remaining_clones_key_t, rck_node));
255 
256 	dsl_dir_remove_clones_key_impl(top_dd, mintxg, tx, &stack, FTAG);
257 	for (remaining_clones_key_t *rck = list_remove_head(&stack);
258 	    rck != NULL; rck = list_remove_head(&stack)) {
259 		dsl_dataset_t *clone = rck->rck_clone;
260 		dsl_dir_t *clone_dir = clone->ds_dir;
261 
262 		kmem_free(rck, sizeof (*rck));
263 
264 		dsl_dir_remove_clones_key_impl(clone_dir, mintxg, tx,
265 		    &stack, FTAG);
266 		dsl_dataset_rele(clone, FTAG);
267 	}
268 
269 	list_destroy(&stack);
270 }
271 
272 static void
273 dsl_destroy_snapshot_handle_remaps(dsl_dataset_t *ds, dsl_dataset_t *ds_next,
274     dmu_tx_t *tx)
275 {
276 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
277 
278 	/* Move blocks to be obsoleted to pool's obsolete list. */
279 	if (dsl_dataset_remap_deadlist_exists(ds_next)) {
280 		if (!bpobj_is_open(&dp->dp_obsolete_bpobj))
281 			dsl_pool_create_obsolete_bpobj(dp, tx);
282 
283 		dsl_deadlist_move_bpobj(&ds_next->ds_remap_deadlist,
284 		    &dp->dp_obsolete_bpobj,
285 		    dsl_dataset_phys(ds)->ds_prev_snap_txg, tx);
286 	}
287 
288 	/* Merge our deadlist into next's and free it. */
289 	if (dsl_dataset_remap_deadlist_exists(ds)) {
290 		uint64_t remap_deadlist_object =
291 		    dsl_dataset_get_remap_deadlist_object(ds);
292 		ASSERT(remap_deadlist_object != 0);
293 
294 		mutex_enter(&ds_next->ds_remap_deadlist_lock);
295 		if (!dsl_dataset_remap_deadlist_exists(ds_next))
296 			dsl_dataset_create_remap_deadlist(ds_next, tx);
297 		mutex_exit(&ds_next->ds_remap_deadlist_lock);
298 
299 		dsl_deadlist_merge(&ds_next->ds_remap_deadlist,
300 		    remap_deadlist_object, tx);
301 		dsl_dataset_destroy_remap_deadlist(ds, tx);
302 	}
303 }
304 
305 void
306 dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
307 {
308 	int after_branch_point = FALSE;
309 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
310 	objset_t *mos = dp->dp_meta_objset;
311 	dsl_dataset_t *ds_prev = NULL;
312 	uint64_t obj;
313 
314 	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
315 	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
316 	ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
317 	rrw_exit(&ds->ds_bp_rwlock, FTAG);
318 	ASSERT(zfs_refcount_is_zero(&ds->ds_longholds));
319 
320 	if (defer &&
321 	    (ds->ds_userrefs > 0 ||
322 	    dsl_dataset_phys(ds)->ds_num_children > 1)) {
323 		ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
324 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
325 		dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_DEFER_DESTROY;
326 		if (zfs_snapshot_history_enabled) {
327 			spa_history_log_internal_ds(ds, "defer_destroy", tx,
328 			    " ");
329 		}
330 		return;
331 	}
332 
333 	ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
334 
335 	if (zfs_snapshot_history_enabled) {
336 		/* We need to log before removing it from the namespace. */
337 		spa_history_log_internal_ds(ds, "destroy", tx, " ");
338 	}
339 
340 	dsl_scan_ds_destroyed(ds, tx);
341 
342 	obj = ds->ds_object;
343 
344 	boolean_t book_exists = dsl_bookmark_ds_destroyed(ds, tx);
345 
346 	for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
347 		if (dsl_dataset_feature_is_active(ds, f))
348 			dsl_dataset_deactivate_feature(ds, f, tx);
349 	}
350 	if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
351 		ASSERT3P(ds->ds_prev, ==, NULL);
352 		VERIFY0(dsl_dataset_hold_obj(dp,
353 		    dsl_dataset_phys(ds)->ds_prev_snap_obj, FTAG, &ds_prev));
354 		after_branch_point =
355 		    (dsl_dataset_phys(ds_prev)->ds_next_snap_obj != obj);
356 
357 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
358 		if (after_branch_point &&
359 		    dsl_dataset_phys(ds_prev)->ds_next_clones_obj != 0) {
360 			dsl_dataset_remove_from_next_clones(ds_prev, obj, tx);
361 			if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0) {
362 				VERIFY0(zap_add_int(mos,
363 				    dsl_dataset_phys(ds_prev)->
364 				    ds_next_clones_obj,
365 				    dsl_dataset_phys(ds)->ds_next_snap_obj,
366 				    tx));
367 			}
368 		}
369 		if (!after_branch_point) {
370 			dsl_dataset_phys(ds_prev)->ds_next_snap_obj =
371 			    dsl_dataset_phys(ds)->ds_next_snap_obj;
372 		}
373 	}
374 
375 	dsl_dataset_t *ds_next;
376 	uint64_t old_unique;
377 	uint64_t used = 0, comp = 0, uncomp = 0;
378 
379 	VERIFY0(dsl_dataset_hold_obj(dp,
380 	    dsl_dataset_phys(ds)->ds_next_snap_obj, FTAG, &ds_next));
381 	ASSERT3U(dsl_dataset_phys(ds_next)->ds_prev_snap_obj, ==, obj);
382 
383 	old_unique = dsl_dataset_phys(ds_next)->ds_unique_bytes;
384 
385 	dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
386 	dsl_dataset_phys(ds_next)->ds_prev_snap_obj =
387 	    dsl_dataset_phys(ds)->ds_prev_snap_obj;
388 	dsl_dataset_phys(ds_next)->ds_prev_snap_txg =
389 	    dsl_dataset_phys(ds)->ds_prev_snap_txg;
390 	ASSERT3U(dsl_dataset_phys(ds)->ds_prev_snap_txg, ==,
391 	    ds_prev ? dsl_dataset_phys(ds_prev)->ds_creation_txg : 0);
392 
393 	if (ds_next->ds_deadlist.dl_oldfmt) {
394 		process_old_deadlist(ds, ds_prev, ds_next,
395 		    after_branch_point, tx);
396 	} else {
397 		/* Adjust prev's unique space. */
398 		if (ds_prev && !after_branch_point) {
399 			dsl_deadlist_space_range(&ds_next->ds_deadlist,
400 			    dsl_dataset_phys(ds_prev)->ds_prev_snap_txg,
401 			    dsl_dataset_phys(ds)->ds_prev_snap_txg,
402 			    &used, &comp, &uncomp);
403 			dsl_dataset_phys(ds_prev)->ds_unique_bytes += used;
404 		}
405 
406 		/* Adjust snapused. */
407 		dsl_deadlist_space_range(&ds_next->ds_deadlist,
408 		    dsl_dataset_phys(ds)->ds_prev_snap_txg, UINT64_MAX,
409 		    &used, &comp, &uncomp);
410 		dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
411 		    -used, -comp, -uncomp, tx);
412 
413 		/* Move blocks to be freed to pool's free list. */
414 		dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
415 		    &dp->dp_free_bpobj, dsl_dataset_phys(ds)->ds_prev_snap_txg,
416 		    tx);
417 		dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
418 		    DD_USED_HEAD, used, comp, uncomp, tx);
419 
420 		/* Merge our deadlist into next's and free it. */
421 		dsl_deadlist_merge(&ds_next->ds_deadlist,
422 		    dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
423 
424 		/*
425 		 * We are done with the deadlist tree (generated/used
426 		 * by dsl_deadlist_move_bpobj() and dsl_deadlist_merge()).
427 		 * Discard it to save memory.
428 		 */
429 		dsl_deadlist_discard_tree(&ds_next->ds_deadlist);
430 	}
431 
432 	dsl_deadlist_close(&ds->ds_deadlist);
433 	dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
434 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
435 	dsl_dataset_phys(ds)->ds_deadlist_obj = 0;
436 
437 	dsl_destroy_snapshot_handle_remaps(ds, ds_next, tx);
438 
439 	if (!book_exists) {
440 		/* Collapse range in clone heads */
441 		dsl_dir_remove_clones_key(ds->ds_dir,
442 		    dsl_dataset_phys(ds)->ds_creation_txg, tx);
443 	}
444 
445 	if (ds_next->ds_is_snapshot) {
446 		dsl_dataset_t *ds_nextnext;
447 
448 		/*
449 		 * Update next's unique to include blocks which
450 		 * were previously shared by only this snapshot
451 		 * and it.  Those blocks will be born after the
452 		 * prev snap and before this snap, and will have
453 		 * died after the next snap and before the one
454 		 * after that (ie. be on the snap after next's
455 		 * deadlist).
456 		 */
457 		VERIFY0(dsl_dataset_hold_obj(dp,
458 		    dsl_dataset_phys(ds_next)->ds_next_snap_obj,
459 		    FTAG, &ds_nextnext));
460 		dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
461 		    dsl_dataset_phys(ds)->ds_prev_snap_txg,
462 		    dsl_dataset_phys(ds)->ds_creation_txg,
463 		    &used, &comp, &uncomp);
464 		dsl_dataset_phys(ds_next)->ds_unique_bytes += used;
465 		dsl_dataset_rele(ds_nextnext, FTAG);
466 		ASSERT3P(ds_next->ds_prev, ==, NULL);
467 
468 		/* Collapse range in this head. */
469 		dsl_dataset_t *hds;
470 		VERIFY0(dsl_dataset_hold_obj(dp,
471 		    dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj,
472 		    FTAG, &hds));
473 		if (!book_exists) {
474 			/* Collapse range in this head. */
475 			dsl_deadlist_remove_key(&hds->ds_deadlist,
476 			    dsl_dataset_phys(ds)->ds_creation_txg, tx);
477 		}
478 		if (dsl_dataset_remap_deadlist_exists(hds)) {
479 			dsl_deadlist_remove_key(&hds->ds_remap_deadlist,
480 			    dsl_dataset_phys(ds)->ds_creation_txg, tx);
481 		}
482 		dsl_dataset_rele(hds, FTAG);
483 
484 	} else {
485 		ASSERT3P(ds_next->ds_prev, ==, ds);
486 		dsl_dataset_rele(ds_next->ds_prev, ds_next);
487 		ds_next->ds_prev = NULL;
488 		if (ds_prev) {
489 			VERIFY0(dsl_dataset_hold_obj(dp,
490 			    dsl_dataset_phys(ds)->ds_prev_snap_obj,
491 			    ds_next, &ds_next->ds_prev));
492 		}
493 
494 		dsl_dataset_recalc_head_uniq(ds_next);
495 
496 		/*
497 		 * Reduce the amount of our unconsumed refreservation
498 		 * being charged to our parent by the amount of
499 		 * new unique data we have gained.
500 		 */
501 		if (old_unique < ds_next->ds_reserved) {
502 			int64_t mrsdelta;
503 			uint64_t new_unique =
504 			    dsl_dataset_phys(ds_next)->ds_unique_bytes;
505 
506 			ASSERT(old_unique <= new_unique);
507 			mrsdelta = MIN(new_unique - old_unique,
508 			    ds_next->ds_reserved - old_unique);
509 			dsl_dir_diduse_space(ds->ds_dir,
510 			    DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
511 		}
512 	}
513 	dsl_dataset_rele(ds_next, FTAG);
514 
515 	/*
516 	 * This must be done after the dsl_traverse(), because it will
517 	 * re-open the objset.
518 	 */
519 	if (ds->ds_objset) {
520 		dmu_objset_evict(ds->ds_objset);
521 		ds->ds_objset = NULL;
522 	}
523 
524 	/* remove from snapshot namespace */
525 	dsl_dataset_t *ds_head;
526 	ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0);
527 	VERIFY0(dsl_dataset_hold_obj(dp,
528 	    dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &ds_head));
529 	VERIFY0(dsl_dataset_get_snapname(ds));
530 #ifdef ZFS_DEBUG
531 	{
532 		uint64_t val;
533 		int err;
534 
535 		err = dsl_dataset_snap_lookup(ds_head,
536 		    ds->ds_snapname, &val);
537 		ASSERT0(err);
538 		ASSERT3U(val, ==, obj);
539 	}
540 #endif
541 	VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx, B_TRUE));
542 	dsl_dataset_rele(ds_head, FTAG);
543 
544 	if (ds_prev != NULL)
545 		dsl_dataset_rele(ds_prev, FTAG);
546 
547 	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
548 
549 	if (dsl_dataset_phys(ds)->ds_next_clones_obj != 0) {
550 		uint64_t count __maybe_unused;
551 		ASSERT0(zap_count(mos,
552 		    dsl_dataset_phys(ds)->ds_next_clones_obj, &count) &&
553 		    count == 0);
554 		VERIFY0(dmu_object_free(mos,
555 		    dsl_dataset_phys(ds)->ds_next_clones_obj, tx));
556 	}
557 	if (dsl_dataset_phys(ds)->ds_props_obj != 0)
558 		VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_props_obj,
559 		    tx));
560 	if (dsl_dataset_phys(ds)->ds_userrefs_obj != 0)
561 		VERIFY0(zap_destroy(mos, dsl_dataset_phys(ds)->ds_userrefs_obj,
562 		    tx));
563 	dsl_dir_rele(ds->ds_dir, ds);
564 	ds->ds_dir = NULL;
565 	dmu_object_free_zapified(mos, obj, tx);
566 }
567 
568 void
569 dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
570 {
571 	dsl_destroy_snapshot_arg_t *ddsa = arg;
572 	const char *dsname = ddsa->ddsa_name;
573 	boolean_t defer = ddsa->ddsa_defer;
574 
575 	dsl_pool_t *dp = dmu_tx_pool(tx);
576 	dsl_dataset_t *ds;
577 
578 	int error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
579 	if (error == ENOENT)
580 		return;
581 	ASSERT0(error);
582 	dsl_destroy_snapshot_sync_impl(ds, defer, tx);
583 	zvol_remove_minors(dp->dp_spa, dsname, B_TRUE);
584 	dsl_dataset_rele(ds, FTAG);
585 }
586 
587 /*
588  * The semantics of this function are described in the comment above
589  * lzc_destroy_snaps().  To summarize:
590  *
591  * The snapshots must all be in the same pool.
592  *
593  * Snapshots that don't exist will be silently ignored (considered to be
594  * "already deleted").
595  *
596  * On success, all snaps will be destroyed and this will return 0.
597  * On failure, no snaps will be destroyed, the errlist will be filled in,
598  * and this will return an errno.
599  */
600 int
601 dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
602     nvlist_t *errlist)
603 {
604 	if (nvlist_next_nvpair(snaps, NULL) == NULL)
605 		return (0);
606 
607 	/*
608 	 * lzc_destroy_snaps() is documented to take an nvlist whose
609 	 * values "don't matter".  We need to convert that nvlist to
610 	 * one that we know can be converted to LUA.
611 	 */
612 	nvlist_t *snaps_normalized = fnvlist_alloc();
613 	for (nvpair_t *pair = nvlist_next_nvpair(snaps, NULL);
614 	    pair != NULL; pair = nvlist_next_nvpair(snaps, pair)) {
615 		fnvlist_add_boolean_value(snaps_normalized,
616 		    nvpair_name(pair), B_TRUE);
617 	}
618 
619 	nvlist_t *arg = fnvlist_alloc();
620 	fnvlist_add_nvlist(arg, "snaps", snaps_normalized);
621 	fnvlist_free(snaps_normalized);
622 	fnvlist_add_boolean_value(arg, "defer", defer);
623 
624 	nvlist_t *wrapper = fnvlist_alloc();
625 	fnvlist_add_nvlist(wrapper, ZCP_ARG_ARGLIST, arg);
626 	fnvlist_free(arg);
627 
628 	const char *program =
629 	    "arg = ...\n"
630 	    "snaps = arg['snaps']\n"
631 	    "defer = arg['defer']\n"
632 	    "errors = { }\n"
633 	    "has_errors = false\n"
634 	    "for snap, v in pairs(snaps) do\n"
635 	    "    errno = zfs.check.destroy{snap, defer=defer}\n"
636 	    "    zfs.debug('snap: ' .. snap .. ' errno: ' .. errno)\n"
637 	    "    if errno == ENOENT then\n"
638 	    "        snaps[snap] = nil\n"
639 	    "    elseif errno ~= 0 then\n"
640 	    "        errors[snap] = errno\n"
641 	    "        has_errors = true\n"
642 	    "    end\n"
643 	    "end\n"
644 	    "if has_errors then\n"
645 	    "    return errors\n"
646 	    "end\n"
647 	    "for snap, v in pairs(snaps) do\n"
648 	    "    errno = zfs.sync.destroy{snap, defer=defer}\n"
649 	    "    assert(errno == 0)\n"
650 	    "end\n"
651 	    "return { }\n";
652 
653 	nvlist_t *result = fnvlist_alloc();
654 	int error = zcp_eval(nvpair_name(nvlist_next_nvpair(snaps, NULL)),
655 	    program,
656 	    B_TRUE,
657 	    0,
658 	    zfs_lua_max_memlimit,
659 	    fnvlist_lookup_nvpair(wrapper, ZCP_ARG_ARGLIST), result);
660 	if (error != 0) {
661 		const char *errorstr = NULL;
662 		(void) nvlist_lookup_string(result, ZCP_RET_ERROR, &errorstr);
663 		if (errorstr != NULL) {
664 			zfs_dbgmsg("%s", errorstr);
665 		}
666 		fnvlist_free(wrapper);
667 		fnvlist_free(result);
668 		return (error);
669 	}
670 	fnvlist_free(wrapper);
671 
672 	/*
673 	 * lzc_destroy_snaps() is documented to fill the errlist with
674 	 * int32 values, so we need to convert the int64 values that are
675 	 * returned from LUA.
676 	 */
677 	int rv = 0;
678 	nvlist_t *errlist_raw = fnvlist_lookup_nvlist(result, ZCP_RET_RETURN);
679 	for (nvpair_t *pair = nvlist_next_nvpair(errlist_raw, NULL);
680 	    pair != NULL; pair = nvlist_next_nvpair(errlist_raw, pair)) {
681 		int32_t val = (int32_t)fnvpair_value_int64(pair);
682 		if (rv == 0)
683 			rv = val;
684 		fnvlist_add_int32(errlist, nvpair_name(pair), val);
685 	}
686 	fnvlist_free(result);
687 	return (rv);
688 }
689 
690 int
691 dsl_destroy_snapshot(const char *name, boolean_t defer)
692 {
693 	int error;
694 	nvlist_t *nvl = fnvlist_alloc();
695 	nvlist_t *errlist = fnvlist_alloc();
696 
697 	fnvlist_add_boolean(nvl, name);
698 	error = dsl_destroy_snapshots_nvl(nvl, defer, errlist);
699 	fnvlist_free(errlist);
700 	fnvlist_free(nvl);
701 	return (error);
702 }
703 
704 struct killarg {
705 	dsl_dataset_t *ds;
706 	dmu_tx_t *tx;
707 };
708 
709 static int
710 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
711     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
712 {
713 	(void) spa, (void) dnp;
714 	struct killarg *ka = arg;
715 	dmu_tx_t *tx = ka->tx;
716 
717 	if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) ||
718 	    BP_IS_EMBEDDED(bp))
719 		return (0);
720 
721 	if (zb->zb_level == ZB_ZIL_LEVEL) {
722 		ASSERT(zilog != NULL);
723 		/*
724 		 * It's a block in the intent log.  It has no
725 		 * accounting, so just free it.
726 		 */
727 		dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
728 	} else {
729 		ASSERT(zilog == NULL);
730 		ASSERT3U(bp->blk_birth, >,
731 		    dsl_dataset_phys(ka->ds)->ds_prev_snap_txg);
732 		(void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
733 	}
734 
735 	return (0);
736 }
737 
738 static void
739 old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
740 {
741 	struct killarg ka;
742 
743 	spa_history_log_internal_ds(ds, "destroy", tx,
744 	    "(synchronous, mintxg=%llu)",
745 	    (long long)dsl_dataset_phys(ds)->ds_prev_snap_txg);
746 
747 	/*
748 	 * Free everything that we point to (that's born after
749 	 * the previous snapshot, if we are a clone)
750 	 *
751 	 * NB: this should be very quick, because we already
752 	 * freed all the objects in open context.
753 	 */
754 	ka.ds = ds;
755 	ka.tx = tx;
756 	VERIFY0(traverse_dataset(ds,
757 	    dsl_dataset_phys(ds)->ds_prev_snap_txg, TRAVERSE_POST |
758 	    TRAVERSE_NO_DECRYPT, kill_blkptr, &ka));
759 	ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
760 	    dsl_dataset_phys(ds)->ds_unique_bytes == 0);
761 }
762 
763 int
764 dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
765 {
766 	int error;
767 	uint64_t count;
768 	objset_t *mos;
769 
770 	ASSERT(!ds->ds_is_snapshot);
771 	if (ds->ds_is_snapshot)
772 		return (SET_ERROR(EINVAL));
773 
774 	if (zfs_refcount_count(&ds->ds_longholds) != expected_holds)
775 		return (SET_ERROR(EBUSY));
776 
777 	ASSERT0(ds->ds_dir->dd_activity_waiters);
778 
779 	mos = ds->ds_dir->dd_pool->dp_meta_objset;
780 
781 	/*
782 	 * Can't delete a head dataset if there are snapshots of it.
783 	 * (Except if the only snapshots are from the branch we cloned
784 	 * from.)
785 	 */
786 	if (ds->ds_prev != NULL &&
787 	    dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj == ds->ds_object)
788 		return (SET_ERROR(EBUSY));
789 
790 	/*
791 	 * Can't delete if there are children of this fs.
792 	 */
793 	error = zap_count(mos,
794 	    dsl_dir_phys(ds->ds_dir)->dd_child_dir_zapobj, &count);
795 	if (error != 0)
796 		return (error);
797 	if (count != 0)
798 		return (SET_ERROR(EEXIST));
799 
800 	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) &&
801 	    dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
802 	    ds->ds_prev->ds_userrefs == 0) {
803 		/* We need to remove the origin snapshot as well. */
804 		if (!zfs_refcount_is_zero(&ds->ds_prev->ds_longholds))
805 			return (SET_ERROR(EBUSY));
806 	}
807 	return (0);
808 }
809 
810 int
811 dsl_destroy_head_check(void *arg, dmu_tx_t *tx)
812 {
813 	dsl_destroy_head_arg_t *ddha = arg;
814 	dsl_pool_t *dp = dmu_tx_pool(tx);
815 	dsl_dataset_t *ds;
816 	int error;
817 
818 	error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds);
819 	if (error != 0)
820 		return (error);
821 
822 	error = dsl_destroy_head_check_impl(ds, 0);
823 	dsl_dataset_rele(ds, FTAG);
824 	return (error);
825 }
826 
827 static void
828 dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
829 {
830 	dsl_dir_t *dd;
831 	dsl_pool_t *dp = dmu_tx_pool(tx);
832 	objset_t *mos = dp->dp_meta_objset;
833 	dd_used_t t;
834 
835 	ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock));
836 
837 	VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd));
838 
839 	ASSERT0(dsl_dir_phys(dd)->dd_head_dataset_obj);
840 
841 	/* Decrement the filesystem count for all parent filesystems. */
842 	if (dd->dd_parent != NULL)
843 		dsl_fs_ss_count_adjust(dd->dd_parent, -1,
844 		    DD_FIELD_FILESYSTEM_COUNT, tx);
845 
846 	/*
847 	 * Remove our reservation. The impl() routine avoids setting the
848 	 * actual property, which would require the (already destroyed) ds.
849 	 */
850 	dsl_dir_set_reservation_sync_impl(dd, 0, tx);
851 
852 	ASSERT0(dsl_dir_phys(dd)->dd_used_bytes);
853 	ASSERT0(dsl_dir_phys(dd)->dd_reserved);
854 	for (t = 0; t < DD_USED_NUM; t++)
855 		ASSERT0(dsl_dir_phys(dd)->dd_used_breakdown[t]);
856 
857 	if (dd->dd_crypto_obj != 0) {
858 		dsl_crypto_key_destroy_sync(dd->dd_crypto_obj, tx);
859 		(void) spa_keystore_unload_wkey_impl(dp->dp_spa, dd->dd_object);
860 	}
861 
862 	VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_child_dir_zapobj, tx));
863 	VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_props_zapobj, tx));
864 	if (dsl_dir_phys(dd)->dd_clones != 0)
865 		VERIFY0(zap_destroy(mos, dsl_dir_phys(dd)->dd_clones, tx));
866 	VERIFY0(dsl_deleg_destroy(mos, dsl_dir_phys(dd)->dd_deleg_zapobj, tx));
867 	VERIFY0(zap_remove(mos,
868 	    dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj,
869 	    dd->dd_myname, tx));
870 
871 	dsl_dir_rele(dd, FTAG);
872 	dmu_object_free_zapified(mos, ddobj, tx);
873 }
874 
875 static void
876 dsl_clone_destroy_assert(dsl_dir_t *dd)
877 {
878 	uint64_t used, comp, uncomp;
879 
880 	ASSERT(dsl_dir_is_clone(dd));
881 	dsl_deadlist_space(&dd->dd_livelist, &used, &comp, &uncomp);
882 
883 	ASSERT3U(dsl_dir_phys(dd)->dd_used_bytes, ==, used);
884 	ASSERT3U(dsl_dir_phys(dd)->dd_compressed_bytes, ==, comp);
885 	/*
886 	 * Greater than because we do not track embedded block pointers in
887 	 * the livelist
888 	 */
889 	ASSERT3U(dsl_dir_phys(dd)->dd_uncompressed_bytes, >=, uncomp);
890 
891 	ASSERT(list_is_empty(&dd->dd_pending_allocs.bpl_list));
892 	ASSERT(list_is_empty(&dd->dd_pending_frees.bpl_list));
893 }
894 
895 /*
896  * Start the delete process for a clone. Free its zil, verify the space usage
897  * and queue the blkptrs for deletion by adding the livelist to the pool-wide
898  * delete queue.
899  */
900 static void
901 dsl_async_clone_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
902 {
903 	uint64_t zap_obj, to_delete, used, comp, uncomp;
904 	objset_t *os;
905 	dsl_dir_t *dd = ds->ds_dir;
906 	dsl_pool_t *dp = dmu_tx_pool(tx);
907 	objset_t *mos = dp->dp_meta_objset;
908 	spa_t *spa = dmu_tx_pool(tx)->dp_spa;
909 	VERIFY0(dmu_objset_from_ds(ds, &os));
910 
911 	uint64_t mintxg = 0;
912 	dsl_deadlist_entry_t *dle = dsl_deadlist_first(&dd->dd_livelist);
913 	if (dle != NULL)
914 		mintxg = dle->dle_mintxg;
915 
916 	spa_history_log_internal_ds(ds, "destroy", tx,
917 	    "(livelist, mintxg=%llu)", (long long)mintxg);
918 
919 	/* Check that the clone is in a correct state to be deleted */
920 	dsl_clone_destroy_assert(dd);
921 
922 	/* Destroy the zil */
923 	zil_destroy_sync(dmu_objset_zil(os), tx);
924 
925 	VERIFY0(zap_lookup(mos, dd->dd_object,
926 	    DD_FIELD_LIVELIST, sizeof (uint64_t), 1, &to_delete));
927 	/* Initialize deleted_clones entry to track livelists to cleanup */
928 	int error = zap_lookup(mos, DMU_POOL_DIRECTORY_OBJECT,
929 	    DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1, &zap_obj);
930 	if (error == ENOENT) {
931 		zap_obj = zap_create(mos, DMU_OTN_ZAP_METADATA,
932 		    DMU_OT_NONE, 0, tx);
933 		VERIFY0(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
934 		    DMU_POOL_DELETED_CLONES, sizeof (uint64_t), 1,
935 		    &(zap_obj), tx));
936 		spa->spa_livelists_to_delete = zap_obj;
937 	} else if (error != 0) {
938 		zfs_panic_recover("zfs: error %d was returned while looking "
939 		    "up DMU_POOL_DELETED_CLONES in the zap", error);
940 		return;
941 	}
942 	VERIFY0(zap_add_int(mos, zap_obj, to_delete, tx));
943 
944 	/* Clone is no longer using space, now tracked by dp_free_dir */
945 	dsl_deadlist_space(&dd->dd_livelist, &used, &comp, &uncomp);
946 	dsl_dir_diduse_space(dd, DD_USED_HEAD,
947 	    -used, -comp, -dsl_dir_phys(dd)->dd_uncompressed_bytes,
948 	    tx);
949 	dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
950 	    used, comp, uncomp, tx);
951 	dsl_dir_remove_livelist(dd, tx, B_FALSE);
952 	zthr_wakeup(spa->spa_livelist_delete_zthr);
953 }
954 
955 /*
956  * Move the bptree into the pool's list of trees to clean up, update space
957  * accounting information and destroy the zil.
958  */
959 static void
960 dsl_async_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
961 {
962 	uint64_t used, comp, uncomp;
963 	objset_t *os;
964 
965 	VERIFY0(dmu_objset_from_ds(ds, &os));
966 	dsl_pool_t *dp = dmu_tx_pool(tx);
967 	objset_t *mos = dp->dp_meta_objset;
968 
969 	spa_history_log_internal_ds(ds, "destroy", tx,
970 	    "(bptree, mintxg=%llu)",
971 	    (long long)dsl_dataset_phys(ds)->ds_prev_snap_txg);
972 
973 	zil_destroy_sync(dmu_objset_zil(os), tx);
974 
975 	if (!spa_feature_is_active(dp->dp_spa,
976 	    SPA_FEATURE_ASYNC_DESTROY)) {
977 		dsl_scan_t *scn = dp->dp_scan;
978 		spa_feature_incr(dp->dp_spa, SPA_FEATURE_ASYNC_DESTROY,
979 		    tx);
980 		dp->dp_bptree_obj = bptree_alloc(mos, tx);
981 		VERIFY0(zap_add(mos,
982 		    DMU_POOL_DIRECTORY_OBJECT,
983 		    DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
984 		    &dp->dp_bptree_obj, tx));
985 		ASSERT(!scn->scn_async_destroying);
986 		scn->scn_async_destroying = B_TRUE;
987 	}
988 
989 	used = dsl_dir_phys(ds->ds_dir)->dd_used_bytes;
990 	comp = dsl_dir_phys(ds->ds_dir)->dd_compressed_bytes;
991 	uncomp = dsl_dir_phys(ds->ds_dir)->dd_uncompressed_bytes;
992 
993 	ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
994 	    dsl_dataset_phys(ds)->ds_unique_bytes == used);
995 
996 	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
997 	bptree_add(mos, dp->dp_bptree_obj,
998 	    &dsl_dataset_phys(ds)->ds_bp,
999 	    dsl_dataset_phys(ds)->ds_prev_snap_txg,
1000 	    used, comp, uncomp, tx);
1001 	rrw_exit(&ds->ds_bp_rwlock, FTAG);
1002 	dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
1003 	    -used, -comp, -uncomp, tx);
1004 	dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
1005 	    used, comp, uncomp, tx);
1006 }
1007 
1008 void
1009 dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
1010 {
1011 	dsl_pool_t *dp = dmu_tx_pool(tx);
1012 	objset_t *mos = dp->dp_meta_objset;
1013 	uint64_t obj, ddobj, prevobj = 0;
1014 	boolean_t rmorigin;
1015 
1016 	ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
1017 	ASSERT(ds->ds_prev == NULL ||
1018 	    dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object);
1019 	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
1020 	ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
1021 	rrw_exit(&ds->ds_bp_rwlock, FTAG);
1022 	ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
1023 
1024 	dsl_dir_cancel_waiters(ds->ds_dir);
1025 
1026 	rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
1027 	    DS_IS_DEFER_DESTROY(ds->ds_prev) &&
1028 	    dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
1029 	    ds->ds_prev->ds_userrefs == 0);
1030 
1031 	/* Remove our reservation. */
1032 	if (ds->ds_reserved != 0) {
1033 		dsl_dataset_set_refreservation_sync_impl(ds,
1034 		    (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
1035 		    0, tx);
1036 		ASSERT0(ds->ds_reserved);
1037 	}
1038 
1039 	obj = ds->ds_object;
1040 
1041 	for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
1042 		if (dsl_dataset_feature_is_active(ds, f))
1043 			dsl_dataset_deactivate_feature(ds, f, tx);
1044 	}
1045 
1046 	dsl_scan_ds_destroyed(ds, tx);
1047 
1048 	if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
1049 		/* This is a clone */
1050 		ASSERT(ds->ds_prev != NULL);
1051 		ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj, !=,
1052 		    obj);
1053 		ASSERT0(dsl_dataset_phys(ds)->ds_next_snap_obj);
1054 
1055 		dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1056 		if (dsl_dataset_phys(ds->ds_prev)->ds_next_clones_obj != 0) {
1057 			dsl_dataset_remove_from_next_clones(ds->ds_prev,
1058 			    obj, tx);
1059 		}
1060 
1061 		ASSERT3U(dsl_dataset_phys(ds->ds_prev)->ds_num_children, >, 1);
1062 		dsl_dataset_phys(ds->ds_prev)->ds_num_children--;
1063 	}
1064 
1065 	/*
1066 	 * Destroy the deadlist. Unless it's a clone, the
1067 	 * deadlist should be empty since the dataset has no snapshots.
1068 	 * (If it's a clone, it's safe to ignore the deadlist contents
1069 	 * since they are still referenced by the origin snapshot.)
1070 	 */
1071 	dsl_deadlist_close(&ds->ds_deadlist);
1072 	dsl_deadlist_free(mos, dsl_dataset_phys(ds)->ds_deadlist_obj, tx);
1073 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1074 	dsl_dataset_phys(ds)->ds_deadlist_obj = 0;
1075 
1076 	if (dsl_dataset_remap_deadlist_exists(ds))
1077 		dsl_dataset_destroy_remap_deadlist(ds, tx);
1078 
1079 	/*
1080 	 * Each destroy is responsible for both destroying (enqueuing
1081 	 * to be destroyed) the blkptrs comprising the dataset as well as
1082 	 * those belonging to the zil.
1083 	 */
1084 	if (dsl_deadlist_is_open(&ds->ds_dir->dd_livelist)) {
1085 		dsl_async_clone_destroy(ds, tx);
1086 	} else if (spa_feature_is_enabled(dp->dp_spa,
1087 	    SPA_FEATURE_ASYNC_DESTROY)) {
1088 		dsl_async_dataset_destroy(ds, tx);
1089 	} else {
1090 		old_synchronous_dataset_destroy(ds, tx);
1091 	}
1092 
1093 	if (ds->ds_prev != NULL) {
1094 		if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
1095 			VERIFY0(zap_remove_int(mos,
1096 			    dsl_dir_phys(ds->ds_prev->ds_dir)->dd_clones,
1097 			    ds->ds_object, tx));
1098 		}
1099 		prevobj = ds->ds_prev->ds_object;
1100 		dsl_dataset_rele(ds->ds_prev, ds);
1101 		ds->ds_prev = NULL;
1102 	}
1103 
1104 	/*
1105 	 * This must be done after the dsl_traverse(), because it will
1106 	 * re-open the objset.
1107 	 */
1108 	if (ds->ds_objset) {
1109 		dmu_objset_evict(ds->ds_objset);
1110 		ds->ds_objset = NULL;
1111 	}
1112 
1113 	/* Erase the link in the dir */
1114 	dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
1115 	dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj = 0;
1116 	ddobj = ds->ds_dir->dd_object;
1117 	ASSERT(dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0);
1118 	VERIFY0(zap_destroy(mos,
1119 	    dsl_dataset_phys(ds)->ds_snapnames_zapobj, tx));
1120 
1121 	if (ds->ds_bookmarks_obj != 0) {
1122 		void *cookie = NULL;
1123 		dsl_bookmark_node_t *dbn;
1124 
1125 		while ((dbn = avl_destroy_nodes(&ds->ds_bookmarks, &cookie)) !=
1126 		    NULL) {
1127 			if (dbn->dbn_phys.zbm_redaction_obj != 0) {
1128 				VERIFY0(dmu_object_free(mos,
1129 				    dbn->dbn_phys.zbm_redaction_obj, tx));
1130 				spa_feature_decr(dmu_objset_spa(mos),
1131 				    SPA_FEATURE_REDACTION_BOOKMARKS, tx);
1132 			}
1133 			if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) {
1134 				spa_feature_decr(dmu_objset_spa(mos),
1135 				    SPA_FEATURE_BOOKMARK_WRITTEN, tx);
1136 			}
1137 			spa_strfree(dbn->dbn_name);
1138 			mutex_destroy(&dbn->dbn_lock);
1139 			kmem_free(dbn, sizeof (*dbn));
1140 		}
1141 		avl_destroy(&ds->ds_bookmarks);
1142 		VERIFY0(zap_destroy(mos, ds->ds_bookmarks_obj, tx));
1143 		spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
1144 	}
1145 
1146 	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
1147 
1148 	ASSERT0(dsl_dataset_phys(ds)->ds_next_clones_obj);
1149 	ASSERT0(dsl_dataset_phys(ds)->ds_props_obj);
1150 	ASSERT0(dsl_dataset_phys(ds)->ds_userrefs_obj);
1151 	dsl_dir_rele(ds->ds_dir, ds);
1152 	ds->ds_dir = NULL;
1153 	dmu_object_free_zapified(mos, obj, tx);
1154 
1155 	dsl_dir_destroy_sync(ddobj, tx);
1156 
1157 	if (rmorigin) {
1158 		dsl_dataset_t *prev;
1159 		VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev));
1160 		dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx);
1161 		dsl_dataset_rele(prev, FTAG);
1162 	}
1163 	/* Delete errlog. */
1164 	if (spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_HEAD_ERRLOG))
1165 		spa_delete_dataset_errlog(dp->dp_spa, ds->ds_object, tx);
1166 }
1167 
1168 void
1169 dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
1170 {
1171 	dsl_destroy_head_arg_t *ddha = arg;
1172 	dsl_pool_t *dp = dmu_tx_pool(tx);
1173 	dsl_dataset_t *ds;
1174 
1175 	VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
1176 	dsl_destroy_head_sync_impl(ds, tx);
1177 	zvol_remove_minors(dp->dp_spa, ddha->ddha_name, B_TRUE);
1178 	dsl_dataset_rele(ds, FTAG);
1179 }
1180 
1181 static void
1182 dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx)
1183 {
1184 	dsl_destroy_head_arg_t *ddha = arg;
1185 	dsl_pool_t *dp = dmu_tx_pool(tx);
1186 	dsl_dataset_t *ds;
1187 
1188 	VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
1189 
1190 	/* Mark it as inconsistent on-disk, in case we crash */
1191 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1192 	dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
1193 
1194 	spa_history_log_internal_ds(ds, "destroy begin", tx, " ");
1195 	dsl_dataset_rele(ds, FTAG);
1196 }
1197 
1198 int
1199 dsl_destroy_head(const char *name)
1200 {
1201 	dsl_destroy_head_arg_t ddha;
1202 	int error;
1203 	spa_t *spa;
1204 	boolean_t isenabled;
1205 
1206 #ifdef _KERNEL
1207 	zfs_destroy_unmount_origin(name);
1208 #endif
1209 
1210 	error = spa_open(name, &spa, FTAG);
1211 	if (error != 0)
1212 		return (error);
1213 	isenabled = spa_feature_is_enabled(spa, SPA_FEATURE_ASYNC_DESTROY);
1214 	spa_close(spa, FTAG);
1215 
1216 	ddha.ddha_name = name;
1217 
1218 	if (!isenabled) {
1219 		objset_t *os;
1220 
1221 		error = dsl_sync_task(name, dsl_destroy_head_check,
1222 		    dsl_destroy_head_begin_sync, &ddha,
1223 		    0, ZFS_SPACE_CHECK_DESTROY);
1224 		if (error != 0)
1225 			return (error);
1226 
1227 		/*
1228 		 * Head deletion is processed in one txg on old pools;
1229 		 * remove the objects from open context so that the txg sync
1230 		 * is not too long. This optimization can only work for
1231 		 * encrypted datasets if the wrapping key is loaded.
1232 		 */
1233 		error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, B_TRUE,
1234 		    FTAG, &os);
1235 		if (error == 0) {
1236 			uint64_t prev_snap_txg =
1237 			    dsl_dataset_phys(dmu_objset_ds(os))->
1238 			    ds_prev_snap_txg;
1239 			for (uint64_t obj = 0; error == 0;
1240 			    error = dmu_object_next(os, &obj, FALSE,
1241 			    prev_snap_txg))
1242 				(void) dmu_free_long_object(os, obj);
1243 			/* sync out all frees */
1244 			txg_wait_synced(dmu_objset_pool(os), 0);
1245 			dmu_objset_disown(os, B_TRUE, FTAG);
1246 		}
1247 	}
1248 
1249 	return (dsl_sync_task(name, dsl_destroy_head_check,
1250 	    dsl_destroy_head_sync, &ddha, 0, ZFS_SPACE_CHECK_DESTROY));
1251 }
1252 
1253 /*
1254  * Note, this function is used as the callback for dmu_objset_find().  We
1255  * always return 0 so that we will continue to find and process
1256  * inconsistent datasets, even if we encounter an error trying to
1257  * process one of them.
1258  */
1259 int
1260 dsl_destroy_inconsistent(const char *dsname, void *arg)
1261 {
1262 	(void) arg;
1263 	objset_t *os;
1264 
1265 	if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
1266 		boolean_t need_destroy = DS_IS_INCONSISTENT(dmu_objset_ds(os));
1267 
1268 		/*
1269 		 * If the dataset is inconsistent because a resumable receive
1270 		 * has failed, then do not destroy it.
1271 		 */
1272 		if (dsl_dataset_has_resume_receive_state(dmu_objset_ds(os)))
1273 			need_destroy = B_FALSE;
1274 
1275 		dmu_objset_rele(os, FTAG);
1276 		if (need_destroy)
1277 			(void) dsl_destroy_head(dsname);
1278 	}
1279 	return (0);
1280 }
1281 
1282 
1283 #if defined(_KERNEL)
1284 EXPORT_SYMBOL(dsl_destroy_head);
1285 EXPORT_SYMBOL(dsl_destroy_head_sync_impl);
1286 EXPORT_SYMBOL(dsl_dataset_user_hold_check_one);
1287 EXPORT_SYMBOL(dsl_destroy_snapshot_sync_impl);
1288 EXPORT_SYMBOL(dsl_destroy_inconsistent);
1289 EXPORT_SYMBOL(dsl_dataset_user_release_tmp);
1290 EXPORT_SYMBOL(dsl_destroy_head_check_impl);
1291 #endif
1292