xref: /titanic_51/usr/src/uts/common/fs/zfs/dsl_dataset.c (revision cd37da7426f0c49c14ad9a8a07638ca971477566)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/dmu_objset.h>
29 #include <sys/dsl_dataset.h>
30 #include <sys/dsl_dir.h>
31 #include <sys/dsl_prop.h>
32 #include <sys/dsl_synctask.h>
33 #include <sys/dmu_traverse.h>
34 #include <sys/dmu_tx.h>
35 #include <sys/arc.h>
36 #include <sys/zio.h>
37 #include <sys/zap.h>
38 #include <sys/unique.h>
39 #include <sys/zfs_context.h>
40 #include <sys/zfs_ioctl.h>
41 #include <sys/spa.h>
42 #include <sys/sunddi.h>
43 
44 static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
45 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
46 static dsl_checkfunc_t dsl_dataset_rollback_check;
47 static dsl_syncfunc_t dsl_dataset_rollback_sync;
48 static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
49 
50 #define	DS_REF_MAX	(1ULL << 62)
51 
52 #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
53 
54 /*
55  * We use weighted reference counts to express the various forms of exclusion
56  * between different open modes.  A STANDARD open is 1 point, an EXCLUSIVE open
57  * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE.
58  * This makes the exclusion logic simple: the total refcnt for all opens cannot
59  * exceed DS_REF_MAX.  For example, EXCLUSIVE opens are exclusive because their
60  * weight (DS_REF_MAX) consumes the entire refcnt space.  PRIMARY opens consume
61  * just over half of the refcnt space, so there can't be more than one, but it
62  * can peacefully coexist with any number of STANDARD opens.
63  */
64 static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = {
65 	0,			/* DS_MODE_NONE - invalid		*/
66 	1,			/* DS_MODE_STANDARD - unlimited number	*/
67 	(DS_REF_MAX >> 1) + 1,	/* DS_MODE_PRIMARY - only one of these	*/
68 	DS_REF_MAX		/* DS_MODE_EXCLUSIVE - no other opens	*/
69 };
70 
71 /*
72  * Figure out how much of this delta should be propogated to the dsl_dir
73  * layer.  If there's a refreservation, that space has already been
74  * partially accounted for in our ancestors.
75  */
76 static int64_t
77 parent_delta(dsl_dataset_t *ds, int64_t delta)
78 {
79 	uint64_t old_bytes, new_bytes;
80 
81 	if (ds->ds_reserved == 0)
82 		return (delta);
83 
84 	old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
85 	new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
86 
87 	ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
88 	return (new_bytes - old_bytes);
89 }
90 
91 void
92 dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
93 {
94 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
95 	int compressed = BP_GET_PSIZE(bp);
96 	int uncompressed = BP_GET_UCSIZE(bp);
97 	int64_t delta;
98 
99 	dprintf_bp(bp, "born, ds=%p\n", ds);
100 
101 	ASSERT(dmu_tx_is_syncing(tx));
102 	/* It could have been compressed away to nothing */
103 	if (BP_IS_HOLE(bp))
104 		return;
105 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
106 	ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
107 	if (ds == NULL) {
108 		/*
109 		 * Account for the meta-objset space in its placeholder
110 		 * dsl_dir.
111 		 */
112 		ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
113 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
114 		    used, compressed, uncompressed, tx);
115 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
116 		return;
117 	}
118 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
119 	mutex_enter(&ds->ds_lock);
120 	delta = parent_delta(ds, used);
121 	ds->ds_phys->ds_used_bytes += used;
122 	ds->ds_phys->ds_compressed_bytes += compressed;
123 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
124 	ds->ds_phys->ds_unique_bytes += used;
125 	mutex_exit(&ds->ds_lock);
126 	dsl_dir_diduse_space(ds->ds_dir, delta, compressed, uncompressed, tx);
127 }
128 
129 void
130 dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
131     dmu_tx_t *tx)
132 {
133 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
134 	int compressed = BP_GET_PSIZE(bp);
135 	int uncompressed = BP_GET_UCSIZE(bp);
136 
137 	ASSERT(dmu_tx_is_syncing(tx));
138 	/* No block pointer => nothing to free */
139 	if (BP_IS_HOLE(bp))
140 		return;
141 
142 	ASSERT(used > 0);
143 	if (ds == NULL) {
144 		int err;
145 		/*
146 		 * Account for the meta-objset space in its placeholder
147 		 * dataset.
148 		 */
149 		err = arc_free(pio, tx->tx_pool->dp_spa,
150 		    tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
151 		ASSERT(err == 0);
152 
153 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
154 		    -used, -compressed, -uncompressed, tx);
155 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
156 		return;
157 	}
158 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
159 
160 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
161 
162 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
163 		int err;
164 		int64_t delta;
165 
166 		dprintf_bp(bp, "freeing: %s", "");
167 		err = arc_free(pio, tx->tx_pool->dp_spa,
168 		    tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
169 		ASSERT(err == 0);
170 
171 		mutex_enter(&ds->ds_lock);
172 		ASSERT(ds->ds_phys->ds_unique_bytes >= used ||
173 		    !DS_UNIQUE_IS_ACCURATE(ds));
174 		delta = parent_delta(ds, -used);
175 		ds->ds_phys->ds_unique_bytes -= used;
176 		mutex_exit(&ds->ds_lock);
177 		dsl_dir_diduse_space(ds->ds_dir,
178 		    delta, -compressed, -uncompressed, tx);
179 	} else {
180 		dprintf_bp(bp, "putting on dead list: %s", "");
181 		VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx));
182 		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
183 		if (ds->ds_phys->ds_prev_snap_obj != 0) {
184 			ASSERT3U(ds->ds_prev->ds_object, ==,
185 			    ds->ds_phys->ds_prev_snap_obj);
186 			ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
187 			if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
188 			    ds->ds_object && bp->blk_birth >
189 			    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
190 				dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
191 				mutex_enter(&ds->ds_prev->ds_lock);
192 				ds->ds_prev->ds_phys->ds_unique_bytes +=
193 				    used;
194 				mutex_exit(&ds->ds_prev->ds_lock);
195 			}
196 		}
197 	}
198 	mutex_enter(&ds->ds_lock);
199 	ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
200 	ds->ds_phys->ds_used_bytes -= used;
201 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
202 	ds->ds_phys->ds_compressed_bytes -= compressed;
203 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
204 	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
205 	mutex_exit(&ds->ds_lock);
206 }
207 
208 uint64_t
209 dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
210 {
211 	uint64_t trysnap = 0;
212 
213 	if (ds == NULL)
214 		return (0);
215 	/*
216 	 * The snapshot creation could fail, but that would cause an
217 	 * incorrect FALSE return, which would only result in an
218 	 * overestimation of the amount of space that an operation would
219 	 * consume, which is OK.
220 	 *
221 	 * There's also a small window where we could miss a pending
222 	 * snapshot, because we could set the sync task in the quiescing
223 	 * phase.  So this should only be used as a guess.
224 	 */
225 	if (ds->ds_trysnap_txg >
226 	    spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
227 		trysnap = ds->ds_trysnap_txg;
228 	return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
229 }
230 
231 int
232 dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
233 {
234 	return (blk_birth > dsl_dataset_prev_snap_txg(ds));
235 }
236 
237 /* ARGSUSED */
238 static void
239 dsl_dataset_evict(dmu_buf_t *db, void *dsv)
240 {
241 	dsl_dataset_t *ds = dsv;
242 
243 	/* open_refcount == DS_REF_MAX when deleting */
244 	ASSERT(ds->ds_open_refcount == 0 ||
245 	    ds->ds_open_refcount == DS_REF_MAX);
246 
247 	dprintf_ds(ds, "evicting %s\n", "");
248 
249 	unique_remove(ds->ds_fsid_guid);
250 
251 	if (ds->ds_user_ptr != NULL)
252 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
253 
254 	if (ds->ds_prev) {
255 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
256 		ds->ds_prev = NULL;
257 	}
258 
259 	bplist_close(&ds->ds_deadlist);
260 	dsl_dir_close(ds->ds_dir, ds);
261 
262 	ASSERT(!list_link_active(&ds->ds_synced_link));
263 
264 	mutex_destroy(&ds->ds_lock);
265 	mutex_destroy(&ds->ds_opening_lock);
266 	mutex_destroy(&ds->ds_deadlist.bpl_lock);
267 
268 	kmem_free(ds, sizeof (dsl_dataset_t));
269 }
270 
271 static int
272 dsl_dataset_get_snapname(dsl_dataset_t *ds)
273 {
274 	dsl_dataset_phys_t *headphys;
275 	int err;
276 	dmu_buf_t *headdbuf;
277 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
278 	objset_t *mos = dp->dp_meta_objset;
279 
280 	if (ds->ds_snapname[0])
281 		return (0);
282 	if (ds->ds_phys->ds_next_snap_obj == 0)
283 		return (0);
284 
285 	err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
286 	    FTAG, &headdbuf);
287 	if (err)
288 		return (err);
289 	headphys = headdbuf->db_data;
290 	err = zap_value_search(dp->dp_meta_objset,
291 	    headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname);
292 	dmu_buf_rele(headdbuf, FTAG);
293 	return (err);
294 }
295 
296 int
297 dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname,
298     int mode, void *tag, dsl_dataset_t **dsp)
299 {
300 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
301 	objset_t *mos = dp->dp_meta_objset;
302 	dmu_buf_t *dbuf;
303 	dsl_dataset_t *ds;
304 	int err;
305 
306 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
307 	    dsl_pool_sync_context(dp));
308 
309 	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
310 	if (err)
311 		return (err);
312 	ds = dmu_buf_get_user(dbuf);
313 	if (ds == NULL) {
314 		dsl_dataset_t *winner;
315 
316 		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
317 		ds->ds_dbuf = dbuf;
318 		ds->ds_object = dsobj;
319 		ds->ds_phys = dbuf->db_data;
320 
321 		mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
322 		mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
323 		mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT,
324 		    NULL);
325 
326 		err = bplist_open(&ds->ds_deadlist,
327 		    mos, ds->ds_phys->ds_deadlist_obj);
328 		if (err == 0) {
329 			err = dsl_dir_open_obj(dp,
330 			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
331 		}
332 		if (err) {
333 			/*
334 			 * we don't really need to close the blist if we
335 			 * just opened it.
336 			 */
337 			mutex_destroy(&ds->ds_lock);
338 			mutex_destroy(&ds->ds_opening_lock);
339 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
340 			kmem_free(ds, sizeof (dsl_dataset_t));
341 			dmu_buf_rele(dbuf, tag);
342 			return (err);
343 		}
344 
345 		if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) {
346 			ds->ds_snapname[0] = '\0';
347 			if (ds->ds_phys->ds_prev_snap_obj) {
348 				err = dsl_dataset_open_obj(dp,
349 				    ds->ds_phys->ds_prev_snap_obj, NULL,
350 				    DS_MODE_NONE, ds, &ds->ds_prev);
351 			}
352 		} else {
353 			if (snapname) {
354 #ifdef ZFS_DEBUG
355 				dsl_dataset_phys_t *headphys;
356 				dmu_buf_t *headdbuf;
357 				err = dmu_bonus_hold(mos,
358 				    ds->ds_dir->dd_phys->dd_head_dataset_obj,
359 				    FTAG, &headdbuf);
360 				if (err == 0) {
361 					headphys = headdbuf->db_data;
362 					uint64_t foundobj;
363 					err = zap_lookup(dp->dp_meta_objset,
364 					    headphys->ds_snapnames_zapobj,
365 					    snapname, sizeof (foundobj), 1,
366 					    &foundobj);
367 					ASSERT3U(foundobj, ==, dsobj);
368 					dmu_buf_rele(headdbuf, FTAG);
369 				}
370 #endif
371 				(void) strcat(ds->ds_snapname, snapname);
372 			} else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
373 				err = dsl_dataset_get_snapname(ds);
374 			}
375 		}
376 
377 		if (!dsl_dataset_is_snapshot(ds)) {
378 			/*
379 			 * In sync context, we're called with either no lock
380 			 * or with the write lock.  If we're not syncing,
381 			 * we're always called with the read lock held.
382 			 */
383 			boolean_t need_lock =
384 			    !RW_WRITE_HELD(&dp->dp_config_rwlock) &&
385 			    dsl_pool_sync_context(dp);
386 
387 			if (need_lock)
388 				rw_enter(&dp->dp_config_rwlock, RW_READER);
389 
390 			err = dsl_prop_get_ds_locked(ds->ds_dir,
391 			    "refreservation", sizeof (uint64_t), 1,
392 			    &ds->ds_reserved, NULL);
393 			if (err == 0) {
394 				err = dsl_prop_get_ds_locked(ds->ds_dir,
395 				    "refquota", sizeof (uint64_t), 1,
396 				    &ds->ds_quota, NULL);
397 			}
398 
399 			if (need_lock)
400 				rw_exit(&dp->dp_config_rwlock);
401 		} else {
402 			ds->ds_reserved = ds->ds_quota = 0;
403 		}
404 
405 		if (err == 0) {
406 			winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
407 			    dsl_dataset_evict);
408 		}
409 		if (err || winner) {
410 			bplist_close(&ds->ds_deadlist);
411 			if (ds->ds_prev) {
412 				dsl_dataset_close(ds->ds_prev,
413 				    DS_MODE_NONE, ds);
414 			}
415 			dsl_dir_close(ds->ds_dir, ds);
416 			mutex_destroy(&ds->ds_lock);
417 			mutex_destroy(&ds->ds_opening_lock);
418 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
419 			kmem_free(ds, sizeof (dsl_dataset_t));
420 			if (err) {
421 				dmu_buf_rele(dbuf, tag);
422 				return (err);
423 			}
424 			ds = winner;
425 		} else {
426 			ds->ds_fsid_guid =
427 			    unique_insert(ds->ds_phys->ds_fsid_guid);
428 		}
429 	}
430 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
431 	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
432 
433 	mutex_enter(&ds->ds_lock);
434 	if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY &&
435 	    (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) &&
436 	    !DS_MODE_IS_INCONSISTENT(mode)) ||
437 	    (ds->ds_open_refcount + weight > DS_REF_MAX)) {
438 		mutex_exit(&ds->ds_lock);
439 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
440 		return (EBUSY);
441 	}
442 	ds->ds_open_refcount += weight;
443 	mutex_exit(&ds->ds_lock);
444 
445 	*dsp = ds;
446 	return (0);
447 }
448 
449 int
450 dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
451     void *tag, dsl_dataset_t **dsp)
452 {
453 	dsl_dir_t *dd;
454 	dsl_pool_t *dp;
455 	const char *tail;
456 	uint64_t obj;
457 	dsl_dataset_t *ds = NULL;
458 	int err = 0;
459 
460 	err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail);
461 	if (err)
462 		return (err);
463 
464 	dp = dd->dd_pool;
465 	obj = dd->dd_phys->dd_head_dataset_obj;
466 	rw_enter(&dp->dp_config_rwlock, RW_READER);
467 	if (obj == 0) {
468 		/* A dataset with no associated objset */
469 		err = ENOENT;
470 		goto out;
471 	}
472 
473 	if (tail != NULL) {
474 		objset_t *mos = dp->dp_meta_objset;
475 
476 		err = dsl_dataset_open_obj(dp, obj, NULL,
477 		    DS_MODE_NONE, tag, &ds);
478 		if (err)
479 			goto out;
480 		obj = ds->ds_phys->ds_snapnames_zapobj;
481 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
482 		ds = NULL;
483 
484 		if (tail[0] != '@') {
485 			err = ENOENT;
486 			goto out;
487 		}
488 		tail++;
489 
490 		/* Look for a snapshot */
491 		if (!DS_MODE_IS_READONLY(mode)) {
492 			err = EROFS;
493 			goto out;
494 		}
495 		dprintf("looking for snapshot '%s'\n", tail);
496 		err = zap_lookup(mos, obj, tail, 8, 1, &obj);
497 		if (err)
498 			goto out;
499 	}
500 	err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds);
501 
502 out:
503 	rw_exit(&dp->dp_config_rwlock);
504 	dsl_dir_close(dd, FTAG);
505 
506 	ASSERT3U((err == 0), ==, (ds != NULL));
507 	/* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */
508 
509 	*dsp = ds;
510 	return (err);
511 }
512 
513 int
514 dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp)
515 {
516 	return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp));
517 }
518 
519 void
520 dsl_dataset_name(dsl_dataset_t *ds, char *name)
521 {
522 	if (ds == NULL) {
523 		(void) strcpy(name, "mos");
524 	} else {
525 		dsl_dir_name(ds->ds_dir, name);
526 		VERIFY(0 == dsl_dataset_get_snapname(ds));
527 		if (ds->ds_snapname[0]) {
528 			(void) strcat(name, "@");
529 			if (!MUTEX_HELD(&ds->ds_lock)) {
530 				/*
531 				 * We use a "recursive" mutex so that we
532 				 * can call dprintf_ds() with ds_lock held.
533 				 */
534 				mutex_enter(&ds->ds_lock);
535 				(void) strcat(name, ds->ds_snapname);
536 				mutex_exit(&ds->ds_lock);
537 			} else {
538 				(void) strcat(name, ds->ds_snapname);
539 			}
540 		}
541 	}
542 }
543 
544 static int
545 dsl_dataset_namelen(dsl_dataset_t *ds)
546 {
547 	int result;
548 
549 	if (ds == NULL) {
550 		result = 3;	/* "mos" */
551 	} else {
552 		result = dsl_dir_namelen(ds->ds_dir);
553 		VERIFY(0 == dsl_dataset_get_snapname(ds));
554 		if (ds->ds_snapname[0]) {
555 			++result;	/* adding one for the @-sign */
556 			if (!MUTEX_HELD(&ds->ds_lock)) {
557 				/* see dsl_datset_name */
558 				mutex_enter(&ds->ds_lock);
559 				result += strlen(ds->ds_snapname);
560 				mutex_exit(&ds->ds_lock);
561 			} else {
562 				result += strlen(ds->ds_snapname);
563 			}
564 		}
565 	}
566 
567 	return (result);
568 }
569 
570 void
571 dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag)
572 {
573 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
574 	mutex_enter(&ds->ds_lock);
575 	ASSERT3U(ds->ds_open_refcount, >=, weight);
576 	ds->ds_open_refcount -= weight;
577 	dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n",
578 	    mode, ds->ds_open_refcount);
579 	mutex_exit(&ds->ds_lock);
580 
581 	dmu_buf_rele(ds->ds_dbuf, tag);
582 }
583 
584 void
585 dsl_dataset_downgrade(dsl_dataset_t *ds, int oldmode, int newmode)
586 {
587 	uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)];
588 	uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)];
589 	mutex_enter(&ds->ds_lock);
590 	ASSERT3U(ds->ds_open_refcount, >=, oldweight);
591 	ASSERT3U(oldweight, >=, newweight);
592 	ds->ds_open_refcount -= oldweight;
593 	ds->ds_open_refcount += newweight;
594 	mutex_exit(&ds->ds_lock);
595 }
596 
597 boolean_t
598 dsl_dataset_tryupgrade(dsl_dataset_t *ds, int oldmode, int newmode)
599 {
600 	boolean_t rv;
601 	uint64_t oldweight = ds_refcnt_weight[DS_MODE_LEVEL(oldmode)];
602 	uint64_t newweight = ds_refcnt_weight[DS_MODE_LEVEL(newmode)];
603 	mutex_enter(&ds->ds_lock);
604 	ASSERT3U(ds->ds_open_refcount, >=, oldweight);
605 	ASSERT3U(newweight, >=, oldweight);
606 	if (ds->ds_open_refcount - oldweight + newweight > DS_REF_MAX) {
607 		rv = B_FALSE;
608 	} else {
609 		ds->ds_open_refcount -= oldweight;
610 		ds->ds_open_refcount += newweight;
611 		rv = B_TRUE;
612 	}
613 	mutex_exit(&ds->ds_lock);
614 	return (rv);
615 }
616 
617 void
618 dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx)
619 {
620 	objset_t *mos = dp->dp_meta_objset;
621 	dmu_buf_t *dbuf;
622 	dsl_dataset_phys_t *dsphys;
623 	dsl_dataset_t *ds;
624 	uint64_t dsobj;
625 	dsl_dir_t *dd;
626 
627 	dsl_dir_create_root(mos, ddobjp, tx);
628 	VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd));
629 
630 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
631 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
632 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
633 	dmu_buf_will_dirty(dbuf, tx);
634 	dsphys = dbuf->db_data;
635 	dsphys->ds_dir_obj = dd->dd_object;
636 	dsphys->ds_fsid_guid = unique_create();
637 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
638 	    sizeof (dsphys->ds_guid));
639 	dsphys->ds_snapnames_zapobj =
640 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
641 	dsphys->ds_creation_time = gethrestime_sec();
642 	dsphys->ds_creation_txg = tx->tx_txg;
643 	dsphys->ds_deadlist_obj =
644 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
645 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
646 		dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
647 	dmu_buf_rele(dbuf, FTAG);
648 
649 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
650 	dd->dd_phys->dd_head_dataset_obj = dsobj;
651 	dsl_dir_close(dd, FTAG);
652 
653 	VERIFY(0 ==
654 	    dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds));
655 	(void) dmu_objset_create_impl(dp->dp_spa, ds,
656 	    &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx);
657 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
658 }
659 
660 uint64_t
661 dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin, dmu_tx_t *tx)
662 {
663 	dsl_pool_t *dp = dd->dd_pool;
664 	dmu_buf_t *dbuf;
665 	dsl_dataset_phys_t *dsphys;
666 	uint64_t dsobj;
667 	objset_t *mos = dp->dp_meta_objset;
668 
669 	ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
670 	ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0);
671 	ASSERT(dmu_tx_is_syncing(tx));
672 	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
673 
674 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
675 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
676 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
677 	dmu_buf_will_dirty(dbuf, tx);
678 	dsphys = dbuf->db_data;
679 	dsphys->ds_dir_obj = dd->dd_object;
680 	dsphys->ds_fsid_guid = unique_create();
681 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
682 	    sizeof (dsphys->ds_guid));
683 	dsphys->ds_snapnames_zapobj =
684 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
685 	dsphys->ds_creation_time = gethrestime_sec();
686 	dsphys->ds_creation_txg = tx->tx_txg;
687 	dsphys->ds_deadlist_obj =
688 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
689 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
690 		dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
691 
692 	if (origin) {
693 		dsphys->ds_prev_snap_obj = origin->ds_object;
694 		dsphys->ds_prev_snap_txg =
695 		    origin->ds_phys->ds_creation_txg;
696 		dsphys->ds_used_bytes =
697 		    origin->ds_phys->ds_used_bytes;
698 		dsphys->ds_compressed_bytes =
699 		    origin->ds_phys->ds_compressed_bytes;
700 		dsphys->ds_uncompressed_bytes =
701 		    origin->ds_phys->ds_uncompressed_bytes;
702 		dsphys->ds_bp = origin->ds_phys->ds_bp;
703 
704 		dmu_buf_will_dirty(origin->ds_dbuf, tx);
705 		origin->ds_phys->ds_num_children++;
706 
707 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
708 		dd->dd_phys->dd_origin_obj = origin->ds_object;
709 	}
710 	dmu_buf_rele(dbuf, FTAG);
711 
712 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
713 	dd->dd_phys->dd_head_dataset_obj = dsobj;
714 
715 	return (dsobj);
716 }
717 
718 uint64_t
719 dsl_dataset_create_sync(dsl_dir_t *pdd,
720     const char *lastname, dsl_dataset_t *origin, cred_t *cr, dmu_tx_t *tx)
721 {
722 	dsl_pool_t *dp = pdd->dd_pool;
723 	uint64_t dsobj, ddobj;
724 	dsl_dir_t *dd;
725 
726 	ASSERT(lastname[0] != '@');
727 
728 	ddobj = dsl_dir_create_sync(pdd, lastname, tx);
729 	VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
730 
731 	dsobj = dsl_dataset_create_sync_impl(dd, origin, tx);
732 
733 	dsl_deleg_set_create_perms(dd, tx, cr);
734 
735 	dsl_dir_close(dd, FTAG);
736 
737 	return (dsobj);
738 }
739 
740 struct destroyarg {
741 	dsl_sync_task_group_t *dstg;
742 	char *snapname;
743 	char *failed;
744 };
745 
746 static int
747 dsl_snapshot_destroy_one(char *name, void *arg)
748 {
749 	struct destroyarg *da = arg;
750 	dsl_dataset_t *ds;
751 	char *cp;
752 	int err;
753 
754 	(void) strcat(name, "@");
755 	(void) strcat(name, da->snapname);
756 	err = dsl_dataset_open(name,
757 	    DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
758 	    da->dstg, &ds);
759 	cp = strchr(name, '@');
760 	*cp = '\0';
761 	if (err == ENOENT)
762 		return (0);
763 	if (err) {
764 		(void) strcpy(da->failed, name);
765 		return (err);
766 	}
767 
768 	dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
769 	    dsl_dataset_destroy_sync, ds, da->dstg, 0);
770 	return (0);
771 }
772 
773 /*
774  * Destroy 'snapname' in all descendants of 'fsname'.
775  */
776 #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
777 int
778 dsl_snapshots_destroy(char *fsname, char *snapname)
779 {
780 	int err;
781 	struct destroyarg da;
782 	dsl_sync_task_t *dst;
783 	spa_t *spa;
784 
785 	err = spa_open(fsname, &spa, FTAG);
786 	if (err)
787 		return (err);
788 	da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
789 	da.snapname = snapname;
790 	da.failed = fsname;
791 
792 	err = dmu_objset_find(fsname,
793 	    dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
794 
795 	if (err == 0)
796 		err = dsl_sync_task_group_wait(da.dstg);
797 
798 	for (dst = list_head(&da.dstg->dstg_tasks); dst;
799 	    dst = list_next(&da.dstg->dstg_tasks, dst)) {
800 		dsl_dataset_t *ds = dst->dst_arg1;
801 		if (dst->dst_err) {
802 			dsl_dataset_name(ds, fsname);
803 			*strchr(fsname, '@') = '\0';
804 		}
805 		/*
806 		 * If it was successful, destroy_sync would have
807 		 * closed the ds
808 		 */
809 		if (err)
810 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, da.dstg);
811 	}
812 
813 	dsl_sync_task_group_destroy(da.dstg);
814 	spa_close(spa, FTAG);
815 	return (err);
816 }
817 
818 /*
819  * ds must be opened EXCLUSIVE or PRIMARY.  on return (whether
820  * successful or not), ds will be closed and caller can no longer
821  * dereference it.
822  */
823 int
824 dsl_dataset_destroy(dsl_dataset_t *ds, void *tag)
825 {
826 	int err;
827 	dsl_sync_task_group_t *dstg;
828 	objset_t *os;
829 	dsl_dir_t *dd;
830 	uint64_t obj;
831 
832 	if (ds->ds_open_refcount != DS_REF_MAX) {
833 		if (dsl_dataset_tryupgrade(ds, DS_MODE_PRIMARY,
834 		    DS_MODE_EXCLUSIVE) == 0) {
835 			dsl_dataset_close(ds, DS_MODE_PRIMARY, tag);
836 			return (EBUSY);
837 		}
838 	}
839 
840 	if (dsl_dataset_is_snapshot(ds)) {
841 		/* Destroying a snapshot is simpler */
842 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
843 		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
844 		    ds, tag, 0);
845 		goto out;
846 	}
847 
848 	dd = ds->ds_dir;
849 
850 	/*
851 	 * Check for errors and mark this ds as inconsistent, in
852 	 * case we crash while freeing the objects.
853 	 */
854 	err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
855 	    dsl_dataset_destroy_begin_sync, ds, NULL, 0);
856 	if (err)
857 		goto out;
858 
859 	err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os);
860 	if (err)
861 		goto out;
862 
863 	/*
864 	 * remove the objects in open context, so that we won't
865 	 * have too much to do in syncing context.
866 	 */
867 	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
868 	    ds->ds_phys->ds_prev_snap_txg)) {
869 		dmu_tx_t *tx = dmu_tx_create(os);
870 		dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
871 		dmu_tx_hold_bonus(tx, obj);
872 		err = dmu_tx_assign(tx, TXG_WAIT);
873 		if (err) {
874 			/*
875 			 * Perhaps there is not enough disk
876 			 * space.  Just deal with it from
877 			 * dsl_dataset_destroy_sync().
878 			 */
879 			dmu_tx_abort(tx);
880 			continue;
881 		}
882 		VERIFY(0 == dmu_object_free(os, obj, tx));
883 		dmu_tx_commit(tx);
884 	}
885 	/* Make sure it's not dirty before we finish destroying it. */
886 	txg_wait_synced(dd->dd_pool, 0);
887 
888 	dmu_objset_close(os);
889 	if (err != ESRCH)
890 		goto out;
891 
892 	if (ds->ds_user_ptr) {
893 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
894 		ds->ds_user_ptr = NULL;
895 	}
896 
897 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
898 	err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
899 	rw_exit(&dd->dd_pool->dp_config_rwlock);
900 
901 	if (err)
902 		goto out;
903 
904 	/*
905 	 * Blow away the dsl_dir + head dataset.
906 	 */
907 	dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
908 	dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
909 	    dsl_dataset_destroy_sync, ds, tag, 0);
910 	dsl_sync_task_create(dstg, dsl_dir_destroy_check,
911 	    dsl_dir_destroy_sync, dd, FTAG, 0);
912 	err = dsl_sync_task_group_wait(dstg);
913 	dsl_sync_task_group_destroy(dstg);
914 	/* if it is successful, *destroy_sync will close the ds+dd */
915 	if (err)
916 		dsl_dir_close(dd, FTAG);
917 out:
918 	if (err)
919 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag);
920 	return (err);
921 }
922 
923 int
924 dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost)
925 {
926 	ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX);
927 
928 	return (dsl_sync_task_do(ds->ds_dir->dd_pool,
929 	    dsl_dataset_rollback_check, dsl_dataset_rollback_sync,
930 	    ds, &ost, 0));
931 }
932 
933 void *
934 dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
935     void *p, dsl_dataset_evict_func_t func)
936 {
937 	void *old;
938 
939 	mutex_enter(&ds->ds_lock);
940 	old = ds->ds_user_ptr;
941 	if (old == NULL) {
942 		ds->ds_user_ptr = p;
943 		ds->ds_user_evict_func = func;
944 	}
945 	mutex_exit(&ds->ds_lock);
946 	return (old);
947 }
948 
949 void *
950 dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
951 {
952 	return (ds->ds_user_ptr);
953 }
954 
955 
956 blkptr_t *
957 dsl_dataset_get_blkptr(dsl_dataset_t *ds)
958 {
959 	return (&ds->ds_phys->ds_bp);
960 }
961 
962 void
963 dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
964 {
965 	ASSERT(dmu_tx_is_syncing(tx));
966 	/* If it's the meta-objset, set dp_meta_rootbp */
967 	if (ds == NULL) {
968 		tx->tx_pool->dp_meta_rootbp = *bp;
969 	} else {
970 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
971 		ds->ds_phys->ds_bp = *bp;
972 	}
973 }
974 
975 spa_t *
976 dsl_dataset_get_spa(dsl_dataset_t *ds)
977 {
978 	return (ds->ds_dir->dd_pool->dp_spa);
979 }
980 
981 void
982 dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
983 {
984 	dsl_pool_t *dp;
985 
986 	if (ds == NULL) /* this is the meta-objset */
987 		return;
988 
989 	ASSERT(ds->ds_user_ptr != NULL);
990 
991 	if (ds->ds_phys->ds_next_snap_obj != 0)
992 		panic("dirtying snapshot!");
993 
994 	dp = ds->ds_dir->dd_pool;
995 
996 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
997 		/* up the hold count until we can be written out */
998 		dmu_buf_add_ref(ds->ds_dbuf, ds);
999 	}
1000 }
1001 
1002 /*
1003  * The unique space in the head dataset can be calculated by subtracting
1004  * the space used in the most recent snapshot, that is still being used
1005  * in this file system, from the space currently in use.  To figure out
1006  * the space in the most recent snapshot still in use, we need to take
1007  * the total space used in the snapshot and subtract out the space that
1008  * has been freed up since the snapshot was taken.
1009  */
1010 static void
1011 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
1012 {
1013 	uint64_t mrs_used;
1014 	uint64_t dlused, dlcomp, dluncomp;
1015 
1016 	ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj);
1017 
1018 	if (ds->ds_phys->ds_prev_snap_obj != 0)
1019 		mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
1020 	else
1021 		mrs_used = 0;
1022 
1023 	VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp,
1024 	    &dluncomp));
1025 
1026 	ASSERT3U(dlused, <=, mrs_used);
1027 	ds->ds_phys->ds_unique_bytes =
1028 	    ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
1029 
1030 	if (!DS_UNIQUE_IS_ACCURATE(ds) &&
1031 	    spa_version(ds->ds_dir->dd_pool->dp_spa) >=
1032 	    SPA_VERSION_UNIQUE_ACCURATE)
1033 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1034 }
1035 
1036 static uint64_t
1037 dsl_dataset_unique(dsl_dataset_t *ds)
1038 {
1039 	if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds))
1040 		dsl_dataset_recalc_head_uniq(ds);
1041 
1042 	return (ds->ds_phys->ds_unique_bytes);
1043 }
1044 
1045 struct killarg {
1046 	int64_t *usedp;
1047 	int64_t *compressedp;
1048 	int64_t *uncompressedp;
1049 	zio_t *zio;
1050 	dmu_tx_t *tx;
1051 };
1052 
1053 static int
1054 kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
1055 {
1056 	struct killarg *ka = arg;
1057 	blkptr_t *bp = &bc->bc_blkptr;
1058 
1059 	ASSERT3U(bc->bc_errno, ==, 0);
1060 
1061 	/*
1062 	 * Since this callback is not called concurrently, no lock is
1063 	 * needed on the accounting values.
1064 	 */
1065 	*ka->usedp += bp_get_dasize(spa, bp);
1066 	*ka->compressedp += BP_GET_PSIZE(bp);
1067 	*ka->uncompressedp += BP_GET_UCSIZE(bp);
1068 	/* XXX check for EIO? */
1069 	(void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL,
1070 	    ARC_NOWAIT);
1071 	return (0);
1072 }
1073 
1074 /* ARGSUSED */
1075 static int
1076 dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx)
1077 {
1078 	dsl_dataset_t *ds = arg1;
1079 	dmu_objset_type_t *ost = arg2;
1080 
1081 	/*
1082 	 * We can only roll back to emptyness if it is a ZPL objset.
1083 	 */
1084 	if (*ost != DMU_OST_ZFS && ds->ds_phys->ds_prev_snap_txg == 0)
1085 		return (EINVAL);
1086 
1087 	/*
1088 	 * This must not be a snapshot.
1089 	 */
1090 	if (ds->ds_phys->ds_next_snap_obj != 0)
1091 		return (EINVAL);
1092 
1093 	/*
1094 	 * If we made changes this txg, traverse_dsl_dataset won't find
1095 	 * them.  Try again.
1096 	 */
1097 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
1098 		return (EAGAIN);
1099 
1100 	return (0);
1101 }
1102 
1103 /* ARGSUSED */
1104 static void
1105 dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
1106 {
1107 	dsl_dataset_t *ds = arg1;
1108 	dmu_objset_type_t *ost = arg2;
1109 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1110 
1111 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1112 
1113 	/*
1114 	 * Before the roll back destroy the zil.
1115 	 */
1116 	if (ds->ds_user_ptr != NULL) {
1117 		zil_rollback_destroy(
1118 		    ((objset_impl_t *)ds->ds_user_ptr)->os_zil, tx);
1119 
1120 		/*
1121 		 * We need to make sure that the objset_impl_t is reopened after
1122 		 * we do the rollback, otherwise it will have the wrong
1123 		 * objset_phys_t.  Normally this would happen when this
1124 		 * DS_MODE_EXCLUSIVE dataset-open is closed, thus causing the
1125 		 * dataset to be immediately evicted.  But when doing "zfs recv
1126 		 * -F", we reopen the objset before that, so that there is no
1127 		 * window where the dataset is closed and inconsistent.
1128 		 */
1129 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
1130 		ds->ds_user_ptr = NULL;
1131 	}
1132 
1133 	/* Zero out the deadlist. */
1134 	bplist_close(&ds->ds_deadlist);
1135 	bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1136 	ds->ds_phys->ds_deadlist_obj =
1137 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
1138 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
1139 	    ds->ds_phys->ds_deadlist_obj));
1140 
1141 	{
1142 		/* Free blkptrs that we gave birth to */
1143 		zio_t *zio;
1144 		int64_t used = 0, compressed = 0, uncompressed = 0;
1145 		struct killarg ka;
1146 		int64_t delta;
1147 
1148 		zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
1149 		    ZIO_FLAG_MUSTSUCCEED);
1150 		ka.usedp = &used;
1151 		ka.compressedp = &compressed;
1152 		ka.uncompressedp = &uncompressed;
1153 		ka.zio = zio;
1154 		ka.tx = tx;
1155 		(void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1156 		    ADVANCE_POST, kill_blkptr, &ka);
1157 		(void) zio_wait(zio);
1158 
1159 		/* only deduct space beyond any refreservation */
1160 		delta = parent_delta(ds, -used);
1161 		dsl_dir_diduse_space(ds->ds_dir,
1162 		    delta, -compressed, -uncompressed, tx);
1163 	}
1164 
1165 	if (ds->ds_prev) {
1166 		/* Change our contents to that of the prev snapshot */
1167 		ASSERT3U(ds->ds_prev->ds_object, ==,
1168 		    ds->ds_phys->ds_prev_snap_obj);
1169 		ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
1170 		ds->ds_phys->ds_used_bytes =
1171 		    ds->ds_prev->ds_phys->ds_used_bytes;
1172 		ds->ds_phys->ds_compressed_bytes =
1173 		    ds->ds_prev->ds_phys->ds_compressed_bytes;
1174 		ds->ds_phys->ds_uncompressed_bytes =
1175 		    ds->ds_prev->ds_phys->ds_uncompressed_bytes;
1176 		ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags;
1177 		ds->ds_phys->ds_unique_bytes = 0;
1178 
1179 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
1180 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1181 			ds->ds_prev->ds_phys->ds_unique_bytes = 0;
1182 		}
1183 	} else {
1184 		/* Zero out our contents, recreate objset */
1185 		bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t));
1186 		ds->ds_phys->ds_used_bytes = 0;
1187 		ds->ds_phys->ds_compressed_bytes = 0;
1188 		ds->ds_phys->ds_uncompressed_bytes = 0;
1189 		ds->ds_phys->ds_flags = 0;
1190 		ds->ds_phys->ds_unique_bytes = 0;
1191 		(void) dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds,
1192 		    &ds->ds_phys->ds_bp, *ost, tx);
1193 	}
1194 
1195 	spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa,
1196 	    tx, cr, "dataset = %llu", ds->ds_object);
1197 }
1198 
1199 /* ARGSUSED */
1200 static int
1201 dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
1202 {
1203 	dsl_dataset_t *ds = arg1;
1204 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1205 	uint64_t count;
1206 	int err;
1207 
1208 	/*
1209 	 * Can't delete a head dataset if there are snapshots of it.
1210 	 * (Except if the only snapshots are from the branch we cloned
1211 	 * from.)
1212 	 */
1213 	if (ds->ds_prev != NULL &&
1214 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1215 		return (EINVAL);
1216 
1217 	/*
1218 	 * This is really a dsl_dir thing, but check it here so that
1219 	 * we'll be less likely to leave this dataset inconsistent &
1220 	 * nearly destroyed.
1221 	 */
1222 	err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
1223 	if (err)
1224 		return (err);
1225 	if (count != 0)
1226 		return (EEXIST);
1227 
1228 	return (0);
1229 }
1230 
1231 /* ARGSUSED */
1232 static void
1233 dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
1234 {
1235 	dsl_dataset_t *ds = arg1;
1236 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1237 
1238 	/* Mark it as inconsistent on-disk, in case we crash */
1239 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1240 	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
1241 
1242 	spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx,
1243 	    cr, "dataset = %llu", ds->ds_object);
1244 }
1245 
1246 /* ARGSUSED */
1247 int
1248 dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
1249 {
1250 	dsl_dataset_t *ds = arg1;
1251 
1252 	/* Can't delete a branch point. */
1253 	if (ds->ds_phys->ds_num_children > 1)
1254 		return (EEXIST);
1255 
1256 	/*
1257 	 * Can't delete a head dataset if there are snapshots of it.
1258 	 * (Except if the only snapshots are from the branch we cloned
1259 	 * from.)
1260 	 */
1261 	if (ds->ds_prev != NULL &&
1262 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1263 		return (EINVAL);
1264 
1265 	/*
1266 	 * If we made changes this txg, traverse_dsl_dataset won't find
1267 	 * them.  Try again.
1268 	 */
1269 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
1270 		return (EAGAIN);
1271 
1272 	/* XXX we should do some i/o error checking... */
1273 	return (0);
1274 }
1275 
1276 void
1277 dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
1278 {
1279 	dsl_dataset_t *ds = arg1;
1280 	int64_t used = 0, compressed = 0, uncompressed = 0;
1281 	zio_t *zio;
1282 	int err;
1283 	int after_branch_point = FALSE;
1284 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1285 	objset_t *mos = dp->dp_meta_objset;
1286 	dsl_dataset_t *ds_prev = NULL;
1287 	uint64_t obj;
1288 
1289 	ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX);
1290 	ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
1291 	ASSERT(ds->ds_prev == NULL ||
1292 	    ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
1293 	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
1294 
1295 	/* Remove our reservation */
1296 	if (ds->ds_reserved != 0) {
1297 		uint64_t val = 0;
1298 		dsl_dataset_set_reservation_sync(ds, &val, cr, tx);
1299 		ASSERT3U(ds->ds_reserved, ==, 0);
1300 	}
1301 
1302 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
1303 
1304 	obj = ds->ds_object;
1305 
1306 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
1307 		if (ds->ds_prev) {
1308 			ds_prev = ds->ds_prev;
1309 		} else {
1310 			VERIFY(0 == dsl_dataset_open_obj(dp,
1311 			    ds->ds_phys->ds_prev_snap_obj, NULL,
1312 			    DS_MODE_NONE, FTAG, &ds_prev));
1313 		}
1314 		after_branch_point =
1315 		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
1316 
1317 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
1318 		if (after_branch_point &&
1319 		    ds->ds_phys->ds_next_snap_obj == 0) {
1320 			/* This clone is toast. */
1321 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
1322 			ds_prev->ds_phys->ds_num_children--;
1323 		} else if (!after_branch_point) {
1324 			ds_prev->ds_phys->ds_next_snap_obj =
1325 			    ds->ds_phys->ds_next_snap_obj;
1326 		}
1327 	}
1328 
1329 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1330 
1331 	if (ds->ds_phys->ds_next_snap_obj != 0) {
1332 		blkptr_t bp;
1333 		dsl_dataset_t *ds_next;
1334 		uint64_t itor = 0;
1335 		uint64_t old_unique;
1336 
1337 		spa_scrub_restart(dp->dp_spa, tx->tx_txg);
1338 
1339 		VERIFY(0 == dsl_dataset_open_obj(dp,
1340 		    ds->ds_phys->ds_next_snap_obj, NULL,
1341 		    DS_MODE_NONE, FTAG, &ds_next));
1342 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
1343 
1344 		old_unique = dsl_dataset_unique(ds_next);
1345 
1346 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
1347 		ds_next->ds_phys->ds_prev_snap_obj =
1348 		    ds->ds_phys->ds_prev_snap_obj;
1349 		ds_next->ds_phys->ds_prev_snap_txg =
1350 		    ds->ds_phys->ds_prev_snap_txg;
1351 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1352 		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
1353 
1354 		/*
1355 		 * Transfer to our deadlist (which will become next's
1356 		 * new deadlist) any entries from next's current
1357 		 * deadlist which were born before prev, and free the
1358 		 * other entries.
1359 		 *
1360 		 * XXX we're doing this long task with the config lock held
1361 		 */
1362 		while (bplist_iterate(&ds_next->ds_deadlist, &itor,
1363 		    &bp) == 0) {
1364 			if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) {
1365 				VERIFY(0 == bplist_enqueue(&ds->ds_deadlist,
1366 				    &bp, tx));
1367 				if (ds_prev && !after_branch_point &&
1368 				    bp.blk_birth >
1369 				    ds_prev->ds_phys->ds_prev_snap_txg) {
1370 					ds_prev->ds_phys->ds_unique_bytes +=
1371 					    bp_get_dasize(dp->dp_spa, &bp);
1372 				}
1373 			} else {
1374 				used += bp_get_dasize(dp->dp_spa, &bp);
1375 				compressed += BP_GET_PSIZE(&bp);
1376 				uncompressed += BP_GET_UCSIZE(&bp);
1377 				/* XXX check return value? */
1378 				(void) arc_free(zio, dp->dp_spa, tx->tx_txg,
1379 				    &bp, NULL, NULL, ARC_NOWAIT);
1380 			}
1381 		}
1382 
1383 		/* free next's deadlist */
1384 		bplist_close(&ds_next->ds_deadlist);
1385 		bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx);
1386 
1387 		/* set next's deadlist to our deadlist */
1388 		ds_next->ds_phys->ds_deadlist_obj =
1389 		    ds->ds_phys->ds_deadlist_obj;
1390 		VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos,
1391 		    ds_next->ds_phys->ds_deadlist_obj));
1392 		ds->ds_phys->ds_deadlist_obj = 0;
1393 
1394 		if (ds_next->ds_phys->ds_next_snap_obj != 0) {
1395 			/*
1396 			 * Update next's unique to include blocks which
1397 			 * were previously shared by only this snapshot
1398 			 * and it.  Those blocks will be born after the
1399 			 * prev snap and before this snap, and will have
1400 			 * died after the next snap and before the one
1401 			 * after that (ie. be on the snap after next's
1402 			 * deadlist).
1403 			 *
1404 			 * XXX we're doing this long task with the
1405 			 * config lock held
1406 			 */
1407 			dsl_dataset_t *ds_after_next;
1408 
1409 			VERIFY(0 == dsl_dataset_open_obj(dp,
1410 			    ds_next->ds_phys->ds_next_snap_obj, NULL,
1411 			    DS_MODE_NONE, FTAG, &ds_after_next));
1412 			itor = 0;
1413 			while (bplist_iterate(&ds_after_next->ds_deadlist,
1414 			    &itor, &bp) == 0) {
1415 				if (bp.blk_birth >
1416 				    ds->ds_phys->ds_prev_snap_txg &&
1417 				    bp.blk_birth <=
1418 				    ds->ds_phys->ds_creation_txg) {
1419 					ds_next->ds_phys->ds_unique_bytes +=
1420 					    bp_get_dasize(dp->dp_spa, &bp);
1421 				}
1422 			}
1423 
1424 			dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG);
1425 			ASSERT3P(ds_next->ds_prev, ==, NULL);
1426 		} else {
1427 			ASSERT3P(ds_next->ds_prev, ==, ds);
1428 			dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE,
1429 			    ds_next);
1430 			if (ds_prev) {
1431 				VERIFY(0 == dsl_dataset_open_obj(dp,
1432 				    ds->ds_phys->ds_prev_snap_obj, NULL,
1433 				    DS_MODE_NONE, ds_next, &ds_next->ds_prev));
1434 			} else {
1435 				ds_next->ds_prev = NULL;
1436 			}
1437 
1438 			dsl_dataset_recalc_head_uniq(ds_next);
1439 
1440 			/*
1441 			 * Reduce the amount of our unconsmed refreservation
1442 			 * being charged to our parent by the amount of
1443 			 * new unique data we have gained.
1444 			 */
1445 			if (old_unique < ds_next->ds_reserved) {
1446 				int64_t mrsdelta;
1447 				uint64_t new_unique =
1448 				    ds_next->ds_phys->ds_unique_bytes;
1449 
1450 				ASSERT(old_unique <= new_unique);
1451 				mrsdelta = MIN(new_unique - old_unique,
1452 				    ds_next->ds_reserved - old_unique);
1453 				dsl_dir_diduse_space(ds->ds_dir, -mrsdelta,
1454 				    0, 0, tx);
1455 			}
1456 		}
1457 		dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG);
1458 
1459 		/*
1460 		 * NB: unique_bytes might not be accurate for the head objset.
1461 		 * Before SPA_VERSION 9, we didn't update its value when we
1462 		 * deleted the most recent snapshot.
1463 		 */
1464 		ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
1465 	} else {
1466 		/*
1467 		 * There's no next snapshot, so this is a head dataset.
1468 		 * Destroy the deadlist.  Unless it's a clone, the
1469 		 * deadlist should be empty.  (If it's a clone, it's
1470 		 * safe to ignore the deadlist contents.)
1471 		 */
1472 		struct killarg ka;
1473 
1474 		ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist));
1475 		bplist_close(&ds->ds_deadlist);
1476 		bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1477 		ds->ds_phys->ds_deadlist_obj = 0;
1478 
1479 		/*
1480 		 * Free everything that we point to (that's born after
1481 		 * the previous snapshot, if we are a clone)
1482 		 *
1483 		 * XXX we're doing this long task with the config lock held
1484 		 */
1485 		ka.usedp = &used;
1486 		ka.compressedp = &compressed;
1487 		ka.uncompressedp = &uncompressed;
1488 		ka.zio = zio;
1489 		ka.tx = tx;
1490 		err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1491 		    ADVANCE_POST, kill_blkptr, &ka);
1492 		ASSERT3U(err, ==, 0);
1493 		ASSERT(spa_version(dp->dp_spa) <
1494 		    SPA_VERSION_UNIQUE_ACCURATE ||
1495 		    used == ds->ds_phys->ds_unique_bytes);
1496 	}
1497 
1498 	err = zio_wait(zio);
1499 	ASSERT3U(err, ==, 0);
1500 
1501 	dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx);
1502 
1503 	if (ds->ds_phys->ds_snapnames_zapobj) {
1504 		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1505 		ASSERT(err == 0);
1506 	}
1507 
1508 	if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
1509 		/* Erase the link in the dataset */
1510 		dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
1511 		ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
1512 		/*
1513 		 * dsl_dir_sync_destroy() called us, they'll destroy
1514 		 * the dataset.
1515 		 */
1516 	} else {
1517 		/* remove from snapshot namespace */
1518 		dsl_dataset_t *ds_head;
1519 		VERIFY(0 == dsl_dataset_open_obj(dp,
1520 		    ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL,
1521 		    DS_MODE_NONE, FTAG, &ds_head));
1522 		VERIFY(0 == dsl_dataset_get_snapname(ds));
1523 #ifdef ZFS_DEBUG
1524 		{
1525 			uint64_t val;
1526 			err = zap_lookup(mos,
1527 			    ds_head->ds_phys->ds_snapnames_zapobj,
1528 			    ds->ds_snapname, 8, 1, &val);
1529 			ASSERT3U(err, ==, 0);
1530 			ASSERT3U(val, ==, obj);
1531 		}
1532 #endif
1533 		err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj,
1534 		    ds->ds_snapname, tx);
1535 		ASSERT(err == 0);
1536 		dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG);
1537 	}
1538 
1539 	if (ds_prev && ds->ds_prev != ds_prev)
1540 		dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG);
1541 
1542 	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
1543 	spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx,
1544 	    cr, "dataset = %llu", ds->ds_object);
1545 
1546 	dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag);
1547 	VERIFY(0 == dmu_object_free(mos, obj, tx));
1548 
1549 }
1550 
1551 static int
1552 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
1553 {
1554 	uint64_t asize;
1555 
1556 	if (!dmu_tx_is_syncing(tx))
1557 		return (0);
1558 
1559 	/*
1560 	 * If there's an fs-only reservation, any blocks that might become
1561 	 * owned by the snapshot dataset must be accommodated by space
1562 	 * outside of the reservation.
1563 	 */
1564 	asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
1565 	if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE))
1566 		return (ENOSPC);
1567 
1568 	/*
1569 	 * Propogate any reserved space for this snapshot to other
1570 	 * snapshot checks in this sync group.
1571 	 */
1572 	if (asize > 0)
1573 		dsl_dir_willuse_space(ds->ds_dir, asize, tx);
1574 
1575 	return (0);
1576 }
1577 
1578 /* ARGSUSED */
1579 int
1580 dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
1581 {
1582 	dsl_dataset_t *ds = arg1;
1583 	const char *snapname = arg2;
1584 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1585 	int err;
1586 	uint64_t value;
1587 
1588 	/*
1589 	 * We don't allow multiple snapshots of the same txg.  If there
1590 	 * is already one, try again.
1591 	 */
1592 	if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
1593 		return (EAGAIN);
1594 
1595 	/*
1596 	 * Check for conflicting name snapshot name.
1597 	 */
1598 	err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj,
1599 	    snapname, 8, 1, &value);
1600 	if (err == 0)
1601 		return (EEXIST);
1602 	if (err != ENOENT)
1603 		return (err);
1604 
1605 	/*
1606 	 * Check that the dataset's name is not too long.  Name consists
1607 	 * of the dataset's length + 1 for the @-sign + snapshot name's length
1608 	 */
1609 	if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
1610 		return (ENAMETOOLONG);
1611 
1612 	err = dsl_dataset_snapshot_reserve_space(ds, tx);
1613 	if (err)
1614 		return (err);
1615 
1616 	ds->ds_trysnap_txg = tx->tx_txg;
1617 	return (0);
1618 }
1619 
1620 void
1621 dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
1622 {
1623 	dsl_dataset_t *ds = arg1;
1624 	const char *snapname = arg2;
1625 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1626 	dmu_buf_t *dbuf;
1627 	dsl_dataset_phys_t *dsphys;
1628 	uint64_t dsobj;
1629 	objset_t *mos = dp->dp_meta_objset;
1630 	int err;
1631 
1632 	spa_scrub_restart(dp->dp_spa, tx->tx_txg);
1633 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
1634 
1635 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
1636 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
1637 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
1638 	dmu_buf_will_dirty(dbuf, tx);
1639 	dsphys = dbuf->db_data;
1640 	dsphys->ds_dir_obj = ds->ds_dir->dd_object;
1641 	dsphys->ds_fsid_guid = unique_create();
1642 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1643 	    sizeof (dsphys->ds_guid));
1644 	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
1645 	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
1646 	dsphys->ds_next_snap_obj = ds->ds_object;
1647 	dsphys->ds_num_children = 1;
1648 	dsphys->ds_creation_time = gethrestime_sec();
1649 	dsphys->ds_creation_txg = tx->tx_txg;
1650 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
1651 	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
1652 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
1653 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
1654 	dsphys->ds_flags = ds->ds_phys->ds_flags;
1655 	dsphys->ds_bp = ds->ds_phys->ds_bp;
1656 	dmu_buf_rele(dbuf, FTAG);
1657 
1658 	ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
1659 	if (ds->ds_prev) {
1660 		ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
1661 		    ds->ds_object ||
1662 		    ds->ds_prev->ds_phys->ds_num_children > 1);
1663 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
1664 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1665 			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1666 			    ds->ds_prev->ds_phys->ds_creation_txg);
1667 			ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
1668 		}
1669 	}
1670 
1671 	/*
1672 	 * If we have a reference-reservation on this dataset, we will
1673 	 * need to increase the amount of refreservation being charged
1674 	 * since our unique space is going to zero.
1675 	 */
1676 	if (ds->ds_reserved) {
1677 		int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
1678 		dsl_dir_diduse_space(ds->ds_dir, add, 0, 0, tx);
1679 	}
1680 
1681 	bplist_close(&ds->ds_deadlist);
1682 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1683 	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg);
1684 	ds->ds_phys->ds_prev_snap_obj = dsobj;
1685 	ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg;
1686 	ds->ds_phys->ds_unique_bytes = 0;
1687 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
1688 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1689 	ds->ds_phys->ds_deadlist_obj =
1690 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
1691 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
1692 	    ds->ds_phys->ds_deadlist_obj));
1693 
1694 	dprintf("snap '%s' -> obj %llu\n", snapname, dsobj);
1695 	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
1696 	    snapname, 8, 1, &dsobj, tx);
1697 	ASSERT(err == 0);
1698 
1699 	if (ds->ds_prev)
1700 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
1701 	VERIFY(0 == dsl_dataset_open_obj(dp,
1702 	    ds->ds_phys->ds_prev_snap_obj, snapname,
1703 	    DS_MODE_NONE, ds, &ds->ds_prev));
1704 
1705 	spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr,
1706 	    "dataset = %llu", dsobj);
1707 }
1708 
1709 void
1710 dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
1711 {
1712 	ASSERT(dmu_tx_is_syncing(tx));
1713 	ASSERT(ds->ds_user_ptr != NULL);
1714 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
1715 
1716 	/*
1717 	 * in case we had to change ds_fsid_guid when we opened it,
1718 	 * sync it out now.
1719 	 */
1720 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1721 	ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
1722 
1723 	dsl_dir_dirty(ds->ds_dir, tx);
1724 	dmu_objset_sync(ds->ds_user_ptr, zio, tx);
1725 }
1726 
1727 void
1728 dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
1729 {
1730 	uint64_t refd, avail, uobjs, aobjs;
1731 
1732 	dsl_dir_stats(ds->ds_dir, nv);
1733 
1734 	dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
1735 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
1736 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
1737 
1738 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
1739 	    ds->ds_phys->ds_creation_time);
1740 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
1741 	    ds->ds_phys->ds_creation_txg);
1742 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,
1743 	    ds->ds_quota);
1744 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
1745 	    ds->ds_reserved);
1746 
1747 	if (ds->ds_phys->ds_next_snap_obj) {
1748 		/*
1749 		 * This is a snapshot; override the dd's space used with
1750 		 * our unique space and compression ratio.
1751 		 */
1752 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
1753 		    ds->ds_phys->ds_unique_bytes);
1754 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
1755 		    ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
1756 		    (ds->ds_phys->ds_uncompressed_bytes * 100 /
1757 		    ds->ds_phys->ds_compressed_bytes));
1758 	}
1759 }
1760 
1761 void
1762 dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
1763 {
1764 	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
1765 	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
1766 	stat->dds_guid = ds->ds_phys->ds_guid;
1767 	if (ds->ds_phys->ds_next_snap_obj) {
1768 		stat->dds_is_snapshot = B_TRUE;
1769 		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
1770 	}
1771 
1772 	/* clone origin is really a dsl_dir thing... */
1773 	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
1774 	if (ds->ds_dir->dd_phys->dd_origin_obj) {
1775 		dsl_dataset_t *ods;
1776 
1777 		VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool,
1778 		    ds->ds_dir->dd_phys->dd_origin_obj,
1779 		    NULL, DS_MODE_NONE, FTAG, &ods));
1780 		dsl_dataset_name(ods, stat->dds_origin);
1781 		dsl_dataset_close(ods, DS_MODE_NONE, FTAG);
1782 	}
1783 	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
1784 }
1785 
1786 uint64_t
1787 dsl_dataset_fsid_guid(dsl_dataset_t *ds)
1788 {
1789 	return (ds->ds_fsid_guid);
1790 }
1791 
1792 void
1793 dsl_dataset_space(dsl_dataset_t *ds,
1794     uint64_t *refdbytesp, uint64_t *availbytesp,
1795     uint64_t *usedobjsp, uint64_t *availobjsp)
1796 {
1797 	*refdbytesp = ds->ds_phys->ds_used_bytes;
1798 	*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
1799 	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
1800 		*availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
1801 	if (ds->ds_quota != 0) {
1802 		/*
1803 		 * Adjust available bytes according to refquota
1804 		 */
1805 		if (*refdbytesp < ds->ds_quota)
1806 			*availbytesp = MIN(*availbytesp,
1807 			    ds->ds_quota - *refdbytesp);
1808 		else
1809 			*availbytesp = 0;
1810 	}
1811 	*usedobjsp = ds->ds_phys->ds_bp.blk_fill;
1812 	*availobjsp = DN_MAX_OBJECT - *usedobjsp;
1813 }
1814 
1815 boolean_t
1816 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
1817 {
1818 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1819 
1820 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
1821 	    dsl_pool_sync_context(dp));
1822 	if (ds->ds_prev == NULL)
1823 		return (B_FALSE);
1824 	if (ds->ds_phys->ds_bp.blk_birth >
1825 	    ds->ds_prev->ds_phys->ds_creation_txg)
1826 		return (B_TRUE);
1827 	return (B_FALSE);
1828 }
1829 
1830 /* ARGSUSED */
1831 static int
1832 dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
1833 {
1834 	dsl_dataset_t *ds = arg1;
1835 	char *newsnapname = arg2;
1836 	dsl_dir_t *dd = ds->ds_dir;
1837 	objset_t *mos = dd->dd_pool->dp_meta_objset;
1838 	dsl_dataset_t *hds;
1839 	uint64_t val;
1840 	int err;
1841 
1842 	err = dsl_dataset_open_obj(dd->dd_pool,
1843 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds);
1844 	if (err)
1845 		return (err);
1846 
1847 	/* new name better not be in use */
1848 	err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj,
1849 	    newsnapname, 8, 1, &val);
1850 	dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
1851 
1852 	if (err == 0)
1853 		err = EEXIST;
1854 	else if (err == ENOENT)
1855 		err = 0;
1856 
1857 	/* dataset name + 1 for the "@" + the new snapshot name must fit */
1858 	if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN)
1859 		err = ENAMETOOLONG;
1860 
1861 	return (err);
1862 }
1863 
1864 static void
1865 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2,
1866     cred_t *cr, dmu_tx_t *tx)
1867 {
1868 	dsl_dataset_t *ds = arg1;
1869 	const char *newsnapname = arg2;
1870 	dsl_dir_t *dd = ds->ds_dir;
1871 	objset_t *mos = dd->dd_pool->dp_meta_objset;
1872 	dsl_dataset_t *hds;
1873 	int err;
1874 
1875 	ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
1876 
1877 	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
1878 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds));
1879 
1880 	VERIFY(0 == dsl_dataset_get_snapname(ds));
1881 	err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj,
1882 	    ds->ds_snapname, tx);
1883 	ASSERT3U(err, ==, 0);
1884 	mutex_enter(&ds->ds_lock);
1885 	(void) strcpy(ds->ds_snapname, newsnapname);
1886 	mutex_exit(&ds->ds_lock);
1887 	err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
1888 	    ds->ds_snapname, 8, 1, &ds->ds_object, tx);
1889 	ASSERT3U(err, ==, 0);
1890 
1891 	spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx,
1892 	    cr, "dataset = %llu", ds->ds_object);
1893 	dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
1894 }
1895 
1896 struct renamesnaparg {
1897 	dsl_sync_task_group_t *dstg;
1898 	char failed[MAXPATHLEN];
1899 	char *oldsnap;
1900 	char *newsnap;
1901 };
1902 
1903 static int
1904 dsl_snapshot_rename_one(char *name, void *arg)
1905 {
1906 	struct renamesnaparg *ra = arg;
1907 	dsl_dataset_t *ds = NULL;
1908 	char *cp;
1909 	int err;
1910 
1911 	cp = name + strlen(name);
1912 	*cp = '@';
1913 	(void) strcpy(cp + 1, ra->oldsnap);
1914 
1915 	/*
1916 	 * For recursive snapshot renames the parent won't be changing
1917 	 * so we just pass name for both the to/from argument.
1918 	 */
1919 	if (err = zfs_secpolicy_rename_perms(name, name, CRED())) {
1920 		(void) strcpy(ra->failed, name);
1921 		return (err);
1922 	}
1923 
1924 	err = dsl_dataset_open(name, DS_MODE_READONLY | DS_MODE_STANDARD,
1925 	    ra->dstg, &ds);
1926 	if (err == ENOENT) {
1927 		*cp = '\0';
1928 		return (0);
1929 	}
1930 	if (err) {
1931 		(void) strcpy(ra->failed, name);
1932 		*cp = '\0';
1933 		dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg);
1934 		return (err);
1935 	}
1936 
1937 #ifdef _KERNEL
1938 	/* for all filesystems undergoing rename, we'll need to unmount it */
1939 	(void) zfs_unmount_snap(name, NULL);
1940 #endif
1941 
1942 	*cp = '\0';
1943 
1944 	dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
1945 	    dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
1946 
1947 	return (0);
1948 }
1949 
1950 static int
1951 dsl_recursive_rename(char *oldname, const char *newname)
1952 {
1953 	int err;
1954 	struct renamesnaparg *ra;
1955 	dsl_sync_task_t *dst;
1956 	spa_t *spa;
1957 	char *cp, *fsname = spa_strdup(oldname);
1958 	int len = strlen(oldname);
1959 
1960 	/* truncate the snapshot name to get the fsname */
1961 	cp = strchr(fsname, '@');
1962 	*cp = '\0';
1963 
1964 	err = spa_open(fsname, &spa, FTAG);
1965 	if (err) {
1966 		kmem_free(fsname, len + 1);
1967 		return (err);
1968 	}
1969 	ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP);
1970 	ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
1971 
1972 	ra->oldsnap = strchr(oldname, '@') + 1;
1973 	ra->newsnap = strchr(newname, '@') + 1;
1974 	*ra->failed = '\0';
1975 
1976 	err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra,
1977 	    DS_FIND_CHILDREN);
1978 	kmem_free(fsname, len + 1);
1979 
1980 	if (err == 0) {
1981 		err = dsl_sync_task_group_wait(ra->dstg);
1982 	}
1983 
1984 	for (dst = list_head(&ra->dstg->dstg_tasks); dst;
1985 	    dst = list_next(&ra->dstg->dstg_tasks, dst)) {
1986 		dsl_dataset_t *ds = dst->dst_arg1;
1987 		if (dst->dst_err) {
1988 			dsl_dir_name(ds->ds_dir, ra->failed);
1989 			(void) strcat(ra->failed, "@");
1990 			(void) strcat(ra->failed, ra->newsnap);
1991 		}
1992 		dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg);
1993 	}
1994 
1995 	if (err)
1996 		(void) strcpy(oldname, ra->failed);
1997 
1998 	dsl_sync_task_group_destroy(ra->dstg);
1999 	kmem_free(ra, sizeof (struct renamesnaparg));
2000 	spa_close(spa, FTAG);
2001 	return (err);
2002 }
2003 
2004 static int
2005 dsl_valid_rename(char *oldname, void *arg)
2006 {
2007 	int delta = *(int *)arg;
2008 
2009 	if (strlen(oldname) + delta >= MAXNAMELEN)
2010 		return (ENAMETOOLONG);
2011 
2012 	return (0);
2013 }
2014 
2015 #pragma weak dmu_objset_rename = dsl_dataset_rename
2016 int
2017 dsl_dataset_rename(char *oldname, const char *newname,
2018     boolean_t recursive)
2019 {
2020 	dsl_dir_t *dd;
2021 	dsl_dataset_t *ds;
2022 	const char *tail;
2023 	int err;
2024 
2025 	err = dsl_dir_open(oldname, FTAG, &dd, &tail);
2026 	if (err)
2027 		return (err);
2028 	if (tail == NULL) {
2029 		int delta = strlen(newname) - strlen(oldname);
2030 
2031 		/* if we're growing, validate child size lengths */
2032 		if (delta > 0)
2033 			err = dmu_objset_find(oldname, dsl_valid_rename,
2034 			    &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
2035 
2036 		if (!err)
2037 			err = dsl_dir_rename(dd, newname);
2038 		dsl_dir_close(dd, FTAG);
2039 		return (err);
2040 	}
2041 	if (tail[0] != '@') {
2042 		/* the name ended in a nonexistant component */
2043 		dsl_dir_close(dd, FTAG);
2044 		return (ENOENT);
2045 	}
2046 
2047 	dsl_dir_close(dd, FTAG);
2048 
2049 	/* new name must be snapshot in same filesystem */
2050 	tail = strchr(newname, '@');
2051 	if (tail == NULL)
2052 		return (EINVAL);
2053 	tail++;
2054 	if (strncmp(oldname, newname, tail - newname) != 0)
2055 		return (EXDEV);
2056 
2057 	if (recursive) {
2058 		err = dsl_recursive_rename(oldname, newname);
2059 	} else {
2060 		err = dsl_dataset_open(oldname,
2061 		    DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds);
2062 		if (err)
2063 			return (err);
2064 
2065 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
2066 		    dsl_dataset_snapshot_rename_check,
2067 		    dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
2068 
2069 		dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
2070 	}
2071 
2072 	return (err);
2073 }
2074 
2075 struct promotearg {
2076 	uint64_t used, comp, uncomp, unique;
2077 	uint64_t newnext_obj, snapnames_obj;
2078 };
2079 
2080 /* ARGSUSED */
2081 static int
2082 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
2083 {
2084 	dsl_dataset_t *hds = arg1;
2085 	struct promotearg *pa = arg2;
2086 	dsl_dir_t *dd = hds->ds_dir;
2087 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
2088 	dsl_dir_t *odd = NULL;
2089 	dsl_dataset_t *ds = NULL;
2090 	dsl_dataset_t *origin_ds = NULL;
2091 	dsl_dataset_t *newnext_ds = NULL;
2092 	int err;
2093 	char *name = NULL;
2094 	uint64_t itor = 0;
2095 	blkptr_t bp;
2096 
2097 	bzero(pa, sizeof (*pa));
2098 
2099 	/* Check that it is a clone */
2100 	if (dd->dd_phys->dd_origin_obj == 0)
2101 		return (EINVAL);
2102 
2103 	/* Since this is so expensive, don't do the preliminary check */
2104 	if (!dmu_tx_is_syncing(tx))
2105 		return (0);
2106 
2107 	if (err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj,
2108 	    NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds))
2109 		goto out;
2110 	odd = origin_ds->ds_dir;
2111 
2112 	{
2113 		dsl_dataset_t *phds;
2114 		if (err = dsl_dataset_open_obj(dd->dd_pool,
2115 		    odd->dd_phys->dd_head_dataset_obj,
2116 		    NULL, DS_MODE_NONE, FTAG, &phds))
2117 			goto out;
2118 		pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj;
2119 		dsl_dataset_close(phds, DS_MODE_NONE, FTAG);
2120 	}
2121 
2122 	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) {
2123 		err = EXDEV;
2124 		goto out;
2125 	}
2126 
2127 	/* find origin's new next ds */
2128 	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object,
2129 	    NULL, DS_MODE_NONE, FTAG, &newnext_ds));
2130 	while (newnext_ds->ds_phys->ds_prev_snap_obj != origin_ds->ds_object) {
2131 		dsl_dataset_t *prev;
2132 
2133 		if (err = dsl_dataset_open_obj(dd->dd_pool,
2134 		    newnext_ds->ds_phys->ds_prev_snap_obj,
2135 		    NULL, DS_MODE_NONE, FTAG, &prev))
2136 			goto out;
2137 		dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
2138 		newnext_ds = prev;
2139 	}
2140 	pa->newnext_obj = newnext_ds->ds_object;
2141 
2142 	/* compute origin's new unique space */
2143 	while ((err = bplist_iterate(&newnext_ds->ds_deadlist,
2144 	    &itor, &bp)) == 0) {
2145 		if (bp.blk_birth > origin_ds->ds_phys->ds_prev_snap_txg)
2146 			pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp);
2147 	}
2148 	if (err != ENOENT)
2149 		goto out;
2150 
2151 	/* Walk the snapshots that we are moving */
2152 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2153 	ds = origin_ds;
2154 	/* CONSTCOND */
2155 	while (TRUE) {
2156 		uint64_t val, dlused, dlcomp, dluncomp;
2157 		dsl_dataset_t *prev;
2158 
2159 		/* Check that the snapshot name does not conflict */
2160 		dsl_dataset_name(ds, name);
2161 		err = zap_lookup(dd->dd_pool->dp_meta_objset,
2162 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
2163 		    8, 1, &val);
2164 		if (err != ENOENT) {
2165 			if (err == 0)
2166 				err = EEXIST;
2167 			goto out;
2168 		}
2169 
2170 		/*
2171 		 * compute space to transfer.  Each snapshot gave birth to:
2172 		 * (my used) - (prev's used) + (deadlist's used)
2173 		 */
2174 		pa->used += ds->ds_phys->ds_used_bytes;
2175 		pa->comp += ds->ds_phys->ds_compressed_bytes;
2176 		pa->uncomp += ds->ds_phys->ds_uncompressed_bytes;
2177 
2178 		/* If we reach the first snapshot, we're done. */
2179 		if (ds->ds_phys->ds_prev_snap_obj == 0)
2180 			break;
2181 
2182 		if (err = bplist_space(&ds->ds_deadlist,
2183 		    &dlused, &dlcomp, &dluncomp))
2184 			goto out;
2185 		if (err = dsl_dataset_open_obj(dd->dd_pool,
2186 		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
2187 		    FTAG, &prev))
2188 			goto out;
2189 		pa->used += dlused - prev->ds_phys->ds_used_bytes;
2190 		pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes;
2191 		pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes;
2192 
2193 		/*
2194 		 * We could be a clone of a clone.  If we reach our
2195 		 * parent's branch point, we're done.
2196 		 */
2197 		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
2198 			dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
2199 			break;
2200 		}
2201 		if (ds != origin_ds)
2202 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
2203 		ds = prev;
2204 	}
2205 
2206 	/* Check that there is enough space here */
2207 	err = dsl_dir_transfer_possible(odd, dd, pa->used);
2208 
2209 out:
2210 	if (ds && ds != origin_ds)
2211 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
2212 	if (origin_ds)
2213 		dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG);
2214 	if (newnext_ds)
2215 		dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
2216 	if (name)
2217 		kmem_free(name, MAXPATHLEN);
2218 	return (err);
2219 }
2220 
2221 static void
2222 dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
2223 {
2224 	dsl_dataset_t *hds = arg1;
2225 	struct promotearg *pa = arg2;
2226 	dsl_dir_t *dd = hds->ds_dir;
2227 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
2228 	dsl_dir_t *odd = NULL;
2229 	dsl_dataset_t *ds, *origin_ds;
2230 	char *name;
2231 
2232 	ASSERT(dd->dd_phys->dd_origin_obj != 0);
2233 	ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
2234 
2235 	VERIFY(0 == dsl_dataset_open_obj(dp, dd->dd_phys->dd_origin_obj,
2236 	    NULL, DS_MODE_EXCLUSIVE, FTAG, &origin_ds));
2237 	/*
2238 	 * We need to explicitly open odd, since origin_ds's dd will be
2239 	 * changing.
2240 	 */
2241 	VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object,
2242 	    NULL, FTAG, &odd));
2243 
2244 	/* move snapshots to this dir */
2245 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2246 	ds = origin_ds;
2247 	/* CONSTCOND */
2248 	while (TRUE) {
2249 		dsl_dataset_t *prev;
2250 
2251 		/* move snap name entry */
2252 		dsl_dataset_name(ds, name);
2253 		VERIFY(0 == zap_remove(dp->dp_meta_objset,
2254 		    pa->snapnames_obj, ds->ds_snapname, tx));
2255 		VERIFY(0 == zap_add(dp->dp_meta_objset,
2256 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
2257 		    8, 1, &ds->ds_object, tx));
2258 
2259 		/* change containing dsl_dir */
2260 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
2261 		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
2262 		ds->ds_phys->ds_dir_obj = dd->dd_object;
2263 		ASSERT3P(ds->ds_dir, ==, odd);
2264 		dsl_dir_close(ds->ds_dir, ds);
2265 		VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
2266 		    NULL, ds, &ds->ds_dir));
2267 
2268 		ASSERT3U(dsl_prop_numcb(ds), ==, 0);
2269 
2270 		if (ds->ds_phys->ds_prev_snap_obj == 0)
2271 			break;
2272 
2273 		VERIFY(0 == dsl_dataset_open_obj(dp,
2274 		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
2275 		    FTAG, &prev));
2276 
2277 		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
2278 			dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
2279 			break;
2280 		}
2281 		if (ds != origin_ds)
2282 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
2283 		ds = prev;
2284 	}
2285 	if (ds != origin_ds)
2286 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
2287 
2288 	/* change origin's next snap */
2289 	dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
2290 	origin_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj;
2291 
2292 	/* change origin */
2293 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
2294 	ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
2295 	dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
2296 	dmu_buf_will_dirty(odd->dd_dbuf, tx);
2297 	odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
2298 
2299 	/* change space accounting */
2300 	dsl_dir_diduse_space(odd, -pa->used, -pa->comp, -pa->uncomp, tx);
2301 	dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx);
2302 	origin_ds->ds_phys->ds_unique_bytes = pa->unique;
2303 
2304 	/* log history record */
2305 	spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx,
2306 	    cr, "dataset = %llu", ds->ds_object);
2307 
2308 	dsl_dir_close(odd, FTAG);
2309 	dsl_dataset_close(origin_ds, DS_MODE_EXCLUSIVE, FTAG);
2310 	kmem_free(name, MAXPATHLEN);
2311 }
2312 
2313 int
2314 dsl_dataset_promote(const char *name)
2315 {
2316 	dsl_dataset_t *ds;
2317 	int err;
2318 	dmu_object_info_t doi;
2319 	struct promotearg pa;
2320 
2321 	err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds);
2322 	if (err)
2323 		return (err);
2324 
2325 	err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset,
2326 	    ds->ds_phys->ds_snapnames_zapobj, &doi);
2327 	if (err) {
2328 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
2329 		return (err);
2330 	}
2331 
2332 	/*
2333 	 * Add in 128x the snapnames zapobj size, since we will be moving
2334 	 * a bunch of snapnames to the promoted ds, and dirtying their
2335 	 * bonus buffers.
2336 	 */
2337 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
2338 	    dsl_dataset_promote_check,
2339 	    dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks);
2340 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
2341 	return (err);
2342 }
2343 
2344 struct cloneswaparg {
2345 	dsl_dataset_t *cds; /* clone dataset */
2346 	dsl_dataset_t *ohds; /* origin's head dataset */
2347 	boolean_t force;
2348 	int64_t unused_refres_delta; /* change in unconsumed refreservation */
2349 };
2350 
2351 /* ARGSUSED */
2352 static int
2353 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx)
2354 {
2355 	struct cloneswaparg *csa = arg1;
2356 
2357 	/* they should both be heads */
2358 	if (dsl_dataset_is_snapshot(csa->cds) ||
2359 	    dsl_dataset_is_snapshot(csa->ohds))
2360 		return (EINVAL);
2361 
2362 	/* the branch point should be just before them */
2363 	if (csa->cds->ds_prev != csa->ohds->ds_prev)
2364 		return (EINVAL);
2365 
2366 	/* cds should be the clone */
2367 	if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj !=
2368 	    csa->ohds->ds_object)
2369 		return (EINVAL);
2370 
2371 	/* the clone should be a child of the origin */
2372 	if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir)
2373 		return (EINVAL);
2374 
2375 	/* ohds shouldn't be modified unless 'force' */
2376 	if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds))
2377 		return (ETXTBSY);
2378 
2379 	/* adjust amount of any unconsumed refreservation */
2380 	csa->unused_refres_delta =
2381 	    (int64_t)MIN(csa->ohds->ds_reserved,
2382 	    csa->ohds->ds_phys->ds_unique_bytes) -
2383 	    (int64_t)MIN(csa->ohds->ds_reserved,
2384 	    csa->cds->ds_phys->ds_unique_bytes);
2385 
2386 	if (csa->unused_refres_delta > 0 &&
2387 	    csa->unused_refres_delta >
2388 	    dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE))
2389 		return (ENOSPC);
2390 
2391 	return (0);
2392 }
2393 
2394 /* ARGSUSED */
2395 static void
2396 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
2397 {
2398 	struct cloneswaparg *csa = arg1;
2399 	dsl_pool_t *dp = csa->cds->ds_dir->dd_pool;
2400 	uint64_t itor = 0;
2401 	blkptr_t bp;
2402 	uint64_t unique = 0;
2403 	int err;
2404 
2405 	ASSERT(csa->cds->ds_reserved == 0);
2406 	ASSERT(csa->cds->ds_quota == csa->ohds->ds_quota);
2407 
2408 	dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
2409 	dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
2410 	dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx);
2411 
2412 	if (csa->cds->ds_user_ptr != NULL) {
2413 		csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr);
2414 		csa->cds->ds_user_ptr = NULL;
2415 	}
2416 
2417 	if (csa->ohds->ds_user_ptr != NULL) {
2418 		csa->ohds->ds_user_evict_func(csa->ohds,
2419 		    csa->ohds->ds_user_ptr);
2420 		csa->ohds->ds_user_ptr = NULL;
2421 	}
2422 
2423 	/* compute unique space */
2424 	while ((err = bplist_iterate(&csa->cds->ds_deadlist,
2425 	    &itor, &bp)) == 0) {
2426 		if (bp.blk_birth > csa->cds->ds_prev->ds_phys->ds_prev_snap_txg)
2427 			unique += bp_get_dasize(dp->dp_spa, &bp);
2428 	}
2429 	VERIFY(err == ENOENT);
2430 
2431 	/* reset origin's unique bytes */
2432 	csa->cds->ds_prev->ds_phys->ds_unique_bytes = unique;
2433 
2434 	/* swap blkptrs */
2435 	{
2436 		blkptr_t tmp;
2437 		tmp = csa->ohds->ds_phys->ds_bp;
2438 		csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp;
2439 		csa->cds->ds_phys->ds_bp = tmp;
2440 	}
2441 
2442 	/* set dd_*_bytes */
2443 	{
2444 		int64_t dused, dcomp, duncomp;
2445 		uint64_t cdl_used, cdl_comp, cdl_uncomp;
2446 		uint64_t odl_used, odl_comp, odl_uncomp;
2447 
2448 		VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used,
2449 		    &cdl_comp, &cdl_uncomp));
2450 		VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used,
2451 		    &odl_comp, &odl_uncomp));
2452 		dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
2453 		    (csa->ohds->ds_phys->ds_used_bytes + odl_used);
2454 		dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
2455 		    (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
2456 		duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
2457 		    cdl_uncomp -
2458 		    (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
2459 
2460 		dsl_dir_diduse_space(csa->ohds->ds_dir,
2461 		    dused, dcomp, duncomp, tx);
2462 		dsl_dir_diduse_space(csa->cds->ds_dir,
2463 		    -dused, -dcomp, -duncomp, tx);
2464 	}
2465 
2466 #define	SWITCH64(x, y) \
2467 	{ \
2468 		uint64_t __tmp = (x); \
2469 		(x) = (y); \
2470 		(y) = __tmp; \
2471 	}
2472 
2473 	/* swap ds_*_bytes */
2474 	SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
2475 	    csa->cds->ds_phys->ds_used_bytes);
2476 	SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
2477 	    csa->cds->ds_phys->ds_compressed_bytes);
2478 	SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
2479 	    csa->cds->ds_phys->ds_uncompressed_bytes);
2480 	SWITCH64(csa->ohds->ds_phys->ds_unique_bytes,
2481 	    csa->cds->ds_phys->ds_unique_bytes);
2482 
2483 	/* apply any parent delta for change in unconsumed refreservation */
2484 	dsl_dir_diduse_space(csa->ohds->ds_dir, csa->unused_refres_delta,
2485 	    0, 0, tx);
2486 
2487 	/* swap deadlists */
2488 	bplist_close(&csa->cds->ds_deadlist);
2489 	bplist_close(&csa->ohds->ds_deadlist);
2490 	SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj,
2491 	    csa->cds->ds_phys->ds_deadlist_obj);
2492 	VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset,
2493 	    csa->cds->ds_phys->ds_deadlist_obj));
2494 	VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
2495 	    csa->ohds->ds_phys->ds_deadlist_obj));
2496 }
2497 
2498 /*
2499  * Swap 'clone' with its origin head file system.
2500  */
2501 int
2502 dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
2503     boolean_t force)
2504 {
2505 	struct cloneswaparg csa;
2506 
2507 	ASSERT(clone->ds_open_refcount == DS_REF_MAX);
2508 	ASSERT(origin_head->ds_open_refcount == DS_REF_MAX);
2509 
2510 	csa.cds = clone;
2511 	csa.ohds = origin_head;
2512 	csa.force = force;
2513 	return (dsl_sync_task_do(clone->ds_dir->dd_pool,
2514 	    dsl_dataset_clone_swap_check,
2515 	    dsl_dataset_clone_swap_sync, &csa, NULL, 9));
2516 }
2517 
2518 /*
2519  * Given a pool name and a dataset object number in that pool,
2520  * return the name of that dataset.
2521  */
2522 int
2523 dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
2524 {
2525 	spa_t *spa;
2526 	dsl_pool_t *dp;
2527 	dsl_dataset_t *ds = NULL;
2528 	int error;
2529 
2530 	if ((error = spa_open(pname, &spa, FTAG)) != 0)
2531 		return (error);
2532 	dp = spa_get_dsl(spa);
2533 	rw_enter(&dp->dp_config_rwlock, RW_READER);
2534 	if ((error = dsl_dataset_open_obj(dp, obj,
2535 	    NULL, DS_MODE_NONE, FTAG, &ds)) != 0) {
2536 		rw_exit(&dp->dp_config_rwlock);
2537 		spa_close(spa, FTAG);
2538 		return (error);
2539 	}
2540 	dsl_dataset_name(ds, buf);
2541 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
2542 	rw_exit(&dp->dp_config_rwlock);
2543 	spa_close(spa, FTAG);
2544 
2545 	return (0);
2546 }
2547 
2548 int
2549 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
2550     uint64_t asize, uint64_t inflight, uint64_t *used)
2551 {
2552 	int error = 0;
2553 
2554 	ASSERT3S(asize, >, 0);
2555 
2556 	mutex_enter(&ds->ds_lock);
2557 	/*
2558 	 * Make a space adjustment for reserved bytes.
2559 	 */
2560 	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) {
2561 		ASSERT3U(*used, >=,
2562 		    ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
2563 		*used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
2564 	}
2565 
2566 	if (!check_quota || ds->ds_quota == 0) {
2567 		mutex_exit(&ds->ds_lock);
2568 		return (0);
2569 	}
2570 	/*
2571 	 * If they are requesting more space, and our current estimate
2572 	 * is over quota, they get to try again unless the actual
2573 	 * on-disk is over quota and there are no pending changes (which
2574 	 * may free up space for us).
2575 	 */
2576 	if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) {
2577 		if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota)
2578 			error = ERESTART;
2579 		else
2580 			error = EDQUOT;
2581 	}
2582 	mutex_exit(&ds->ds_lock);
2583 
2584 	return (error);
2585 }
2586 
2587 /* ARGSUSED */
2588 static int
2589 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
2590 {
2591 	dsl_dataset_t *ds = arg1;
2592 	uint64_t *quotap = arg2;
2593 	uint64_t new_quota = *quotap;
2594 
2595 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA)
2596 		return (ENOTSUP);
2597 
2598 	if (new_quota == 0)
2599 		return (0);
2600 
2601 	if (new_quota < ds->ds_phys->ds_used_bytes ||
2602 	    new_quota < ds->ds_reserved)
2603 		return (ENOSPC);
2604 
2605 	return (0);
2606 }
2607 
2608 /* ARGSUSED */
2609 void
2610 dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
2611 {
2612 	dsl_dataset_t *ds = arg1;
2613 	uint64_t *quotap = arg2;
2614 	uint64_t new_quota = *quotap;
2615 
2616 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
2617 
2618 	mutex_enter(&ds->ds_lock);
2619 	ds->ds_quota = new_quota;
2620 	mutex_exit(&ds->ds_lock);
2621 
2622 	dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx);
2623 
2624 	spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa,
2625 	    tx, cr, "%lld dataset = %llu ",
2626 	    (longlong_t)new_quota, ds->ds_dir->dd_phys->dd_head_dataset_obj);
2627 }
2628 
2629 int
2630 dsl_dataset_set_quota(const char *dsname, uint64_t quota)
2631 {
2632 	dsl_dataset_t *ds;
2633 	int err;
2634 
2635 	err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds);
2636 	if (err)
2637 		return (err);
2638 
2639 	if (quota != ds->ds_quota) {
2640 		/*
2641 		 * If someone removes a file, then tries to set the quota, we
2642 		 * want to make sure the file freeing takes effect.
2643 		 */
2644 		txg_wait_open(ds->ds_dir->dd_pool, 0);
2645 
2646 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
2647 		    dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync,
2648 		    ds, &quota, 0);
2649 	}
2650 	dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
2651 	return (err);
2652 }
2653 
2654 static int
2655 dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
2656 {
2657 	dsl_dataset_t *ds = arg1;
2658 	uint64_t *reservationp = arg2;
2659 	uint64_t new_reservation = *reservationp;
2660 	int64_t delta;
2661 	uint64_t unique;
2662 
2663 	if (new_reservation > INT64_MAX)
2664 		return (EOVERFLOW);
2665 
2666 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
2667 	    SPA_VERSION_REFRESERVATION)
2668 		return (ENOTSUP);
2669 
2670 	if (dsl_dataset_is_snapshot(ds))
2671 		return (EINVAL);
2672 
2673 	/*
2674 	 * If we are doing the preliminary check in open context, the
2675 	 * space estimates may be inaccurate.
2676 	 */
2677 	if (!dmu_tx_is_syncing(tx))
2678 		return (0);
2679 
2680 	mutex_enter(&ds->ds_lock);
2681 	unique = dsl_dataset_unique(ds);
2682 	delta = MAX(unique, new_reservation) - MAX(unique, ds->ds_reserved);
2683 	mutex_exit(&ds->ds_lock);
2684 
2685 	if (delta > 0 &&
2686 	    delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
2687 		return (ENOSPC);
2688 	if (delta > 0 && ds->ds_quota > 0 &&
2689 	    new_reservation > ds->ds_quota)
2690 		return (ENOSPC);
2691 
2692 	return (0);
2693 }
2694 
2695 /* ARGSUSED */
2696 static void
2697 dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr,
2698     dmu_tx_t *tx)
2699 {
2700 	dsl_dataset_t *ds = arg1;
2701 	uint64_t *reservationp = arg2;
2702 	uint64_t new_reservation = *reservationp;
2703 	uint64_t unique;
2704 	int64_t delta;
2705 
2706 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
2707 
2708 	mutex_enter(&ds->ds_lock);
2709 	unique = dsl_dataset_unique(ds);
2710 	delta = MAX(0, (int64_t)(new_reservation - unique)) -
2711 	    MAX(0, (int64_t)(ds->ds_reserved - unique));
2712 	ds->ds_reserved = new_reservation;
2713 	mutex_exit(&ds->ds_lock);
2714 
2715 	dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation",
2716 	    new_reservation, cr, tx);
2717 
2718 	dsl_dir_diduse_space(ds->ds_dir, delta, 0, 0, tx);
2719 
2720 	spa_history_internal_log(LOG_DS_REFRESERV,
2721 	    ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu",
2722 	    (longlong_t)new_reservation,
2723 	    ds->ds_dir->dd_phys->dd_head_dataset_obj);
2724 }
2725 
2726 int
2727 dsl_dataset_set_reservation(const char *dsname, uint64_t reservation)
2728 {
2729 	dsl_dataset_t *ds;
2730 	int err;
2731 
2732 	err = dsl_dataset_open(dsname, DS_MODE_STANDARD, FTAG, &ds);
2733 	if (err)
2734 		return (err);
2735 
2736 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
2737 	    dsl_dataset_set_reservation_check,
2738 	    dsl_dataset_set_reservation_sync, ds, &reservation, 0);
2739 	dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
2740 	return (err);
2741 }
2742