xref: /freebsd/sys/contrib/openzfs/module/zfs/dmu_objset.c (revision eda14cbc264d6969b02f2b1994cef11148e914f1)
1*eda14cbcSMatt Macy /*
2*eda14cbcSMatt Macy  * CDDL HEADER START
3*eda14cbcSMatt Macy  *
4*eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5*eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6*eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7*eda14cbcSMatt Macy  *
8*eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*eda14cbcSMatt Macy  * or http://www.opensolaris.org/os/licensing.
10*eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11*eda14cbcSMatt Macy  * and limitations under the License.
12*eda14cbcSMatt Macy  *
13*eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14*eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16*eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17*eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18*eda14cbcSMatt Macy  *
19*eda14cbcSMatt Macy  * CDDL HEADER END
20*eda14cbcSMatt Macy  */
21*eda14cbcSMatt Macy 
22*eda14cbcSMatt Macy /*
23*eda14cbcSMatt Macy  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24*eda14cbcSMatt Macy  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
25*eda14cbcSMatt Macy  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
26*eda14cbcSMatt Macy  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
27*eda14cbcSMatt Macy  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
28*eda14cbcSMatt Macy  * Copyright (c) 2015, STRATO AG, Inc. All rights reserved.
29*eda14cbcSMatt Macy  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
30*eda14cbcSMatt Macy  * Copyright 2017 Nexenta Systems, Inc.
31*eda14cbcSMatt Macy  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
32*eda14cbcSMatt Macy  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
33*eda14cbcSMatt Macy  * Copyright (c) 2019, Klara Inc.
34*eda14cbcSMatt Macy  * Copyright (c) 2019, Allan Jude
35*eda14cbcSMatt Macy  */
36*eda14cbcSMatt Macy 
37*eda14cbcSMatt Macy /* Portions Copyright 2010 Robert Milkowski */
38*eda14cbcSMatt Macy 
39*eda14cbcSMatt Macy #include <sys/cred.h>
40*eda14cbcSMatt Macy #include <sys/zfs_context.h>
41*eda14cbcSMatt Macy #include <sys/dmu_objset.h>
42*eda14cbcSMatt Macy #include <sys/dsl_dir.h>
43*eda14cbcSMatt Macy #include <sys/dsl_dataset.h>
44*eda14cbcSMatt Macy #include <sys/dsl_prop.h>
45*eda14cbcSMatt Macy #include <sys/dsl_pool.h>
46*eda14cbcSMatt Macy #include <sys/dsl_synctask.h>
47*eda14cbcSMatt Macy #include <sys/dsl_deleg.h>
48*eda14cbcSMatt Macy #include <sys/dnode.h>
49*eda14cbcSMatt Macy #include <sys/dbuf.h>
50*eda14cbcSMatt Macy #include <sys/zvol.h>
51*eda14cbcSMatt Macy #include <sys/dmu_tx.h>
52*eda14cbcSMatt Macy #include <sys/zap.h>
53*eda14cbcSMatt Macy #include <sys/zil.h>
54*eda14cbcSMatt Macy #include <sys/dmu_impl.h>
55*eda14cbcSMatt Macy #include <sys/zfs_ioctl.h>
56*eda14cbcSMatt Macy #include <sys/sa.h>
57*eda14cbcSMatt Macy #include <sys/zfs_onexit.h>
58*eda14cbcSMatt Macy #include <sys/dsl_destroy.h>
59*eda14cbcSMatt Macy #include <sys/vdev.h>
60*eda14cbcSMatt Macy #include <sys/zfeature.h>
61*eda14cbcSMatt Macy #include <sys/policy.h>
62*eda14cbcSMatt Macy #include <sys/spa_impl.h>
63*eda14cbcSMatt Macy #include <sys/dmu_recv.h>
64*eda14cbcSMatt Macy #include <sys/zfs_project.h>
65*eda14cbcSMatt Macy #include "zfs_namecheck.h"
66*eda14cbcSMatt Macy 
67*eda14cbcSMatt Macy /*
68*eda14cbcSMatt Macy  * Needed to close a window in dnode_move() that allows the objset to be freed
69*eda14cbcSMatt Macy  * before it can be safely accessed.
70*eda14cbcSMatt Macy  */
71*eda14cbcSMatt Macy krwlock_t os_lock;
72*eda14cbcSMatt Macy 
73*eda14cbcSMatt Macy /*
74*eda14cbcSMatt Macy  * Tunable to overwrite the maximum number of threads for the parallelization
75*eda14cbcSMatt Macy  * of dmu_objset_find_dp, needed to speed up the import of pools with many
76*eda14cbcSMatt Macy  * datasets.
77*eda14cbcSMatt Macy  * Default is 4 times the number of leaf vdevs.
78*eda14cbcSMatt Macy  */
79*eda14cbcSMatt Macy int dmu_find_threads = 0;
80*eda14cbcSMatt Macy 
81*eda14cbcSMatt Macy /*
82*eda14cbcSMatt Macy  * Backfill lower metadnode objects after this many have been freed.
83*eda14cbcSMatt Macy  * Backfilling negatively impacts object creation rates, so only do it
84*eda14cbcSMatt Macy  * if there are enough holes to fill.
85*eda14cbcSMatt Macy  */
86*eda14cbcSMatt Macy int dmu_rescan_dnode_threshold = 1 << DN_MAX_INDBLKSHIFT;
87*eda14cbcSMatt Macy 
88*eda14cbcSMatt Macy static char *upgrade_tag = "upgrade_tag";
89*eda14cbcSMatt Macy 
90*eda14cbcSMatt Macy static void dmu_objset_find_dp_cb(void *arg);
91*eda14cbcSMatt Macy 
92*eda14cbcSMatt Macy static void dmu_objset_upgrade(objset_t *os, dmu_objset_upgrade_cb_t cb);
93*eda14cbcSMatt Macy static void dmu_objset_upgrade_stop(objset_t *os);
94*eda14cbcSMatt Macy 
95*eda14cbcSMatt Macy void
96*eda14cbcSMatt Macy dmu_objset_init(void)
97*eda14cbcSMatt Macy {
98*eda14cbcSMatt Macy 	rw_init(&os_lock, NULL, RW_DEFAULT, NULL);
99*eda14cbcSMatt Macy }
100*eda14cbcSMatt Macy 
101*eda14cbcSMatt Macy void
102*eda14cbcSMatt Macy dmu_objset_fini(void)
103*eda14cbcSMatt Macy {
104*eda14cbcSMatt Macy 	rw_destroy(&os_lock);
105*eda14cbcSMatt Macy }
106*eda14cbcSMatt Macy 
107*eda14cbcSMatt Macy spa_t *
108*eda14cbcSMatt Macy dmu_objset_spa(objset_t *os)
109*eda14cbcSMatt Macy {
110*eda14cbcSMatt Macy 	return (os->os_spa);
111*eda14cbcSMatt Macy }
112*eda14cbcSMatt Macy 
113*eda14cbcSMatt Macy zilog_t *
114*eda14cbcSMatt Macy dmu_objset_zil(objset_t *os)
115*eda14cbcSMatt Macy {
116*eda14cbcSMatt Macy 	return (os->os_zil);
117*eda14cbcSMatt Macy }
118*eda14cbcSMatt Macy 
119*eda14cbcSMatt Macy dsl_pool_t *
120*eda14cbcSMatt Macy dmu_objset_pool(objset_t *os)
121*eda14cbcSMatt Macy {
122*eda14cbcSMatt Macy 	dsl_dataset_t *ds;
123*eda14cbcSMatt Macy 
124*eda14cbcSMatt Macy 	if ((ds = os->os_dsl_dataset) != NULL && ds->ds_dir)
125*eda14cbcSMatt Macy 		return (ds->ds_dir->dd_pool);
126*eda14cbcSMatt Macy 	else
127*eda14cbcSMatt Macy 		return (spa_get_dsl(os->os_spa));
128*eda14cbcSMatt Macy }
129*eda14cbcSMatt Macy 
130*eda14cbcSMatt Macy dsl_dataset_t *
131*eda14cbcSMatt Macy dmu_objset_ds(objset_t *os)
132*eda14cbcSMatt Macy {
133*eda14cbcSMatt Macy 	return (os->os_dsl_dataset);
134*eda14cbcSMatt Macy }
135*eda14cbcSMatt Macy 
136*eda14cbcSMatt Macy dmu_objset_type_t
137*eda14cbcSMatt Macy dmu_objset_type(objset_t *os)
138*eda14cbcSMatt Macy {
139*eda14cbcSMatt Macy 	return (os->os_phys->os_type);
140*eda14cbcSMatt Macy }
141*eda14cbcSMatt Macy 
142*eda14cbcSMatt Macy void
143*eda14cbcSMatt Macy dmu_objset_name(objset_t *os, char *buf)
144*eda14cbcSMatt Macy {
145*eda14cbcSMatt Macy 	dsl_dataset_name(os->os_dsl_dataset, buf);
146*eda14cbcSMatt Macy }
147*eda14cbcSMatt Macy 
148*eda14cbcSMatt Macy uint64_t
149*eda14cbcSMatt Macy dmu_objset_id(objset_t *os)
150*eda14cbcSMatt Macy {
151*eda14cbcSMatt Macy 	dsl_dataset_t *ds = os->os_dsl_dataset;
152*eda14cbcSMatt Macy 
153*eda14cbcSMatt Macy 	return (ds ? ds->ds_object : 0);
154*eda14cbcSMatt Macy }
155*eda14cbcSMatt Macy 
156*eda14cbcSMatt Macy uint64_t
157*eda14cbcSMatt Macy dmu_objset_dnodesize(objset_t *os)
158*eda14cbcSMatt Macy {
159*eda14cbcSMatt Macy 	return (os->os_dnodesize);
160*eda14cbcSMatt Macy }
161*eda14cbcSMatt Macy 
162*eda14cbcSMatt Macy zfs_sync_type_t
163*eda14cbcSMatt Macy dmu_objset_syncprop(objset_t *os)
164*eda14cbcSMatt Macy {
165*eda14cbcSMatt Macy 	return (os->os_sync);
166*eda14cbcSMatt Macy }
167*eda14cbcSMatt Macy 
168*eda14cbcSMatt Macy zfs_logbias_op_t
169*eda14cbcSMatt Macy dmu_objset_logbias(objset_t *os)
170*eda14cbcSMatt Macy {
171*eda14cbcSMatt Macy 	return (os->os_logbias);
172*eda14cbcSMatt Macy }
173*eda14cbcSMatt Macy 
174*eda14cbcSMatt Macy static void
175*eda14cbcSMatt Macy checksum_changed_cb(void *arg, uint64_t newval)
176*eda14cbcSMatt Macy {
177*eda14cbcSMatt Macy 	objset_t *os = arg;
178*eda14cbcSMatt Macy 
179*eda14cbcSMatt Macy 	/*
180*eda14cbcSMatt Macy 	 * Inheritance should have been done by now.
181*eda14cbcSMatt Macy 	 */
182*eda14cbcSMatt Macy 	ASSERT(newval != ZIO_CHECKSUM_INHERIT);
183*eda14cbcSMatt Macy 
184*eda14cbcSMatt Macy 	os->os_checksum = zio_checksum_select(newval, ZIO_CHECKSUM_ON_VALUE);
185*eda14cbcSMatt Macy }
186*eda14cbcSMatt Macy 
187*eda14cbcSMatt Macy static void
188*eda14cbcSMatt Macy compression_changed_cb(void *arg, uint64_t newval)
189*eda14cbcSMatt Macy {
190*eda14cbcSMatt Macy 	objset_t *os = arg;
191*eda14cbcSMatt Macy 
192*eda14cbcSMatt Macy 	/*
193*eda14cbcSMatt Macy 	 * Inheritance and range checking should have been done by now.
194*eda14cbcSMatt Macy 	 */
195*eda14cbcSMatt Macy 	ASSERT(newval != ZIO_COMPRESS_INHERIT);
196*eda14cbcSMatt Macy 
197*eda14cbcSMatt Macy 	os->os_compress = zio_compress_select(os->os_spa,
198*eda14cbcSMatt Macy 	    ZIO_COMPRESS_ALGO(newval), ZIO_COMPRESS_ON);
199*eda14cbcSMatt Macy 	os->os_complevel = zio_complevel_select(os->os_spa, os->os_compress,
200*eda14cbcSMatt Macy 	    ZIO_COMPRESS_LEVEL(newval), ZIO_COMPLEVEL_DEFAULT);
201*eda14cbcSMatt Macy }
202*eda14cbcSMatt Macy 
203*eda14cbcSMatt Macy static void
204*eda14cbcSMatt Macy copies_changed_cb(void *arg, uint64_t newval)
205*eda14cbcSMatt Macy {
206*eda14cbcSMatt Macy 	objset_t *os = arg;
207*eda14cbcSMatt Macy 
208*eda14cbcSMatt Macy 	/*
209*eda14cbcSMatt Macy 	 * Inheritance and range checking should have been done by now.
210*eda14cbcSMatt Macy 	 */
211*eda14cbcSMatt Macy 	ASSERT(newval > 0);
212*eda14cbcSMatt Macy 	ASSERT(newval <= spa_max_replication(os->os_spa));
213*eda14cbcSMatt Macy 
214*eda14cbcSMatt Macy 	os->os_copies = newval;
215*eda14cbcSMatt Macy }
216*eda14cbcSMatt Macy 
217*eda14cbcSMatt Macy static void
218*eda14cbcSMatt Macy dedup_changed_cb(void *arg, uint64_t newval)
219*eda14cbcSMatt Macy {
220*eda14cbcSMatt Macy 	objset_t *os = arg;
221*eda14cbcSMatt Macy 	spa_t *spa = os->os_spa;
222*eda14cbcSMatt Macy 	enum zio_checksum checksum;
223*eda14cbcSMatt Macy 
224*eda14cbcSMatt Macy 	/*
225*eda14cbcSMatt Macy 	 * Inheritance should have been done by now.
226*eda14cbcSMatt Macy 	 */
227*eda14cbcSMatt Macy 	ASSERT(newval != ZIO_CHECKSUM_INHERIT);
228*eda14cbcSMatt Macy 
229*eda14cbcSMatt Macy 	checksum = zio_checksum_dedup_select(spa, newval, ZIO_CHECKSUM_OFF);
230*eda14cbcSMatt Macy 
231*eda14cbcSMatt Macy 	os->os_dedup_checksum = checksum & ZIO_CHECKSUM_MASK;
232*eda14cbcSMatt Macy 	os->os_dedup_verify = !!(checksum & ZIO_CHECKSUM_VERIFY);
233*eda14cbcSMatt Macy }
234*eda14cbcSMatt Macy 
235*eda14cbcSMatt Macy static void
236*eda14cbcSMatt Macy primary_cache_changed_cb(void *arg, uint64_t newval)
237*eda14cbcSMatt Macy {
238*eda14cbcSMatt Macy 	objset_t *os = arg;
239*eda14cbcSMatt Macy 
240*eda14cbcSMatt Macy 	/*
241*eda14cbcSMatt Macy 	 * Inheritance and range checking should have been done by now.
242*eda14cbcSMatt Macy 	 */
243*eda14cbcSMatt Macy 	ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
244*eda14cbcSMatt Macy 	    newval == ZFS_CACHE_METADATA);
245*eda14cbcSMatt Macy 
246*eda14cbcSMatt Macy 	os->os_primary_cache = newval;
247*eda14cbcSMatt Macy }
248*eda14cbcSMatt Macy 
249*eda14cbcSMatt Macy static void
250*eda14cbcSMatt Macy secondary_cache_changed_cb(void *arg, uint64_t newval)
251*eda14cbcSMatt Macy {
252*eda14cbcSMatt Macy 	objset_t *os = arg;
253*eda14cbcSMatt Macy 
254*eda14cbcSMatt Macy 	/*
255*eda14cbcSMatt Macy 	 * Inheritance and range checking should have been done by now.
256*eda14cbcSMatt Macy 	 */
257*eda14cbcSMatt Macy 	ASSERT(newval == ZFS_CACHE_ALL || newval == ZFS_CACHE_NONE ||
258*eda14cbcSMatt Macy 	    newval == ZFS_CACHE_METADATA);
259*eda14cbcSMatt Macy 
260*eda14cbcSMatt Macy 	os->os_secondary_cache = newval;
261*eda14cbcSMatt Macy }
262*eda14cbcSMatt Macy 
263*eda14cbcSMatt Macy static void
264*eda14cbcSMatt Macy sync_changed_cb(void *arg, uint64_t newval)
265*eda14cbcSMatt Macy {
266*eda14cbcSMatt Macy 	objset_t *os = arg;
267*eda14cbcSMatt Macy 
268*eda14cbcSMatt Macy 	/*
269*eda14cbcSMatt Macy 	 * Inheritance and range checking should have been done by now.
270*eda14cbcSMatt Macy 	 */
271*eda14cbcSMatt Macy 	ASSERT(newval == ZFS_SYNC_STANDARD || newval == ZFS_SYNC_ALWAYS ||
272*eda14cbcSMatt Macy 	    newval == ZFS_SYNC_DISABLED);
273*eda14cbcSMatt Macy 
274*eda14cbcSMatt Macy 	os->os_sync = newval;
275*eda14cbcSMatt Macy 	if (os->os_zil)
276*eda14cbcSMatt Macy 		zil_set_sync(os->os_zil, newval);
277*eda14cbcSMatt Macy }
278*eda14cbcSMatt Macy 
279*eda14cbcSMatt Macy static void
280*eda14cbcSMatt Macy redundant_metadata_changed_cb(void *arg, uint64_t newval)
281*eda14cbcSMatt Macy {
282*eda14cbcSMatt Macy 	objset_t *os = arg;
283*eda14cbcSMatt Macy 
284*eda14cbcSMatt Macy 	/*
285*eda14cbcSMatt Macy 	 * Inheritance and range checking should have been done by now.
286*eda14cbcSMatt Macy 	 */
287*eda14cbcSMatt Macy 	ASSERT(newval == ZFS_REDUNDANT_METADATA_ALL ||
288*eda14cbcSMatt Macy 	    newval == ZFS_REDUNDANT_METADATA_MOST);
289*eda14cbcSMatt Macy 
290*eda14cbcSMatt Macy 	os->os_redundant_metadata = newval;
291*eda14cbcSMatt Macy }
292*eda14cbcSMatt Macy 
293*eda14cbcSMatt Macy static void
294*eda14cbcSMatt Macy dnodesize_changed_cb(void *arg, uint64_t newval)
295*eda14cbcSMatt Macy {
296*eda14cbcSMatt Macy 	objset_t *os = arg;
297*eda14cbcSMatt Macy 
298*eda14cbcSMatt Macy 	switch (newval) {
299*eda14cbcSMatt Macy 	case ZFS_DNSIZE_LEGACY:
300*eda14cbcSMatt Macy 		os->os_dnodesize = DNODE_MIN_SIZE;
301*eda14cbcSMatt Macy 		break;
302*eda14cbcSMatt Macy 	case ZFS_DNSIZE_AUTO:
303*eda14cbcSMatt Macy 		/*
304*eda14cbcSMatt Macy 		 * Choose a dnode size that will work well for most
305*eda14cbcSMatt Macy 		 * workloads if the user specified "auto". Future code
306*eda14cbcSMatt Macy 		 * improvements could dynamically select a dnode size
307*eda14cbcSMatt Macy 		 * based on observed workload patterns.
308*eda14cbcSMatt Macy 		 */
309*eda14cbcSMatt Macy 		os->os_dnodesize = DNODE_MIN_SIZE * 2;
310*eda14cbcSMatt Macy 		break;
311*eda14cbcSMatt Macy 	case ZFS_DNSIZE_1K:
312*eda14cbcSMatt Macy 	case ZFS_DNSIZE_2K:
313*eda14cbcSMatt Macy 	case ZFS_DNSIZE_4K:
314*eda14cbcSMatt Macy 	case ZFS_DNSIZE_8K:
315*eda14cbcSMatt Macy 	case ZFS_DNSIZE_16K:
316*eda14cbcSMatt Macy 		os->os_dnodesize = newval;
317*eda14cbcSMatt Macy 		break;
318*eda14cbcSMatt Macy 	}
319*eda14cbcSMatt Macy }
320*eda14cbcSMatt Macy 
321*eda14cbcSMatt Macy static void
322*eda14cbcSMatt Macy smallblk_changed_cb(void *arg, uint64_t newval)
323*eda14cbcSMatt Macy {
324*eda14cbcSMatt Macy 	objset_t *os = arg;
325*eda14cbcSMatt Macy 
326*eda14cbcSMatt Macy 	/*
327*eda14cbcSMatt Macy 	 * Inheritance and range checking should have been done by now.
328*eda14cbcSMatt Macy 	 */
329*eda14cbcSMatt Macy 	ASSERT(newval <= SPA_OLD_MAXBLOCKSIZE);
330*eda14cbcSMatt Macy 	ASSERT(ISP2(newval));
331*eda14cbcSMatt Macy 
332*eda14cbcSMatt Macy 	os->os_zpl_special_smallblock = newval;
333*eda14cbcSMatt Macy }
334*eda14cbcSMatt Macy 
335*eda14cbcSMatt Macy static void
336*eda14cbcSMatt Macy logbias_changed_cb(void *arg, uint64_t newval)
337*eda14cbcSMatt Macy {
338*eda14cbcSMatt Macy 	objset_t *os = arg;
339*eda14cbcSMatt Macy 
340*eda14cbcSMatt Macy 	ASSERT(newval == ZFS_LOGBIAS_LATENCY ||
341*eda14cbcSMatt Macy 	    newval == ZFS_LOGBIAS_THROUGHPUT);
342*eda14cbcSMatt Macy 	os->os_logbias = newval;
343*eda14cbcSMatt Macy 	if (os->os_zil)
344*eda14cbcSMatt Macy 		zil_set_logbias(os->os_zil, newval);
345*eda14cbcSMatt Macy }
346*eda14cbcSMatt Macy 
347*eda14cbcSMatt Macy static void
348*eda14cbcSMatt Macy recordsize_changed_cb(void *arg, uint64_t newval)
349*eda14cbcSMatt Macy {
350*eda14cbcSMatt Macy 	objset_t *os = arg;
351*eda14cbcSMatt Macy 
352*eda14cbcSMatt Macy 	os->os_recordsize = newval;
353*eda14cbcSMatt Macy }
354*eda14cbcSMatt Macy 
355*eda14cbcSMatt Macy void
356*eda14cbcSMatt Macy dmu_objset_byteswap(void *buf, size_t size)
357*eda14cbcSMatt Macy {
358*eda14cbcSMatt Macy 	objset_phys_t *osp = buf;
359*eda14cbcSMatt Macy 
360*eda14cbcSMatt Macy 	ASSERT(size == OBJSET_PHYS_SIZE_V1 || size == OBJSET_PHYS_SIZE_V2 ||
361*eda14cbcSMatt Macy 	    size == sizeof (objset_phys_t));
362*eda14cbcSMatt Macy 	dnode_byteswap(&osp->os_meta_dnode);
363*eda14cbcSMatt Macy 	byteswap_uint64_array(&osp->os_zil_header, sizeof (zil_header_t));
364*eda14cbcSMatt Macy 	osp->os_type = BSWAP_64(osp->os_type);
365*eda14cbcSMatt Macy 	osp->os_flags = BSWAP_64(osp->os_flags);
366*eda14cbcSMatt Macy 	if (size >= OBJSET_PHYS_SIZE_V2) {
367*eda14cbcSMatt Macy 		dnode_byteswap(&osp->os_userused_dnode);
368*eda14cbcSMatt Macy 		dnode_byteswap(&osp->os_groupused_dnode);
369*eda14cbcSMatt Macy 		if (size >= sizeof (objset_phys_t))
370*eda14cbcSMatt Macy 			dnode_byteswap(&osp->os_projectused_dnode);
371*eda14cbcSMatt Macy 	}
372*eda14cbcSMatt Macy }
373*eda14cbcSMatt Macy 
374*eda14cbcSMatt Macy /*
375*eda14cbcSMatt Macy  * The hash is a CRC-based hash of the objset_t pointer and the object number.
376*eda14cbcSMatt Macy  */
377*eda14cbcSMatt Macy static uint64_t
378*eda14cbcSMatt Macy dnode_hash(const objset_t *os, uint64_t obj)
379*eda14cbcSMatt Macy {
380*eda14cbcSMatt Macy 	uintptr_t osv = (uintptr_t)os;
381*eda14cbcSMatt Macy 	uint64_t crc = -1ULL;
382*eda14cbcSMatt Macy 
383*eda14cbcSMatt Macy 	ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
384*eda14cbcSMatt Macy 	/*
385*eda14cbcSMatt Macy 	 * The low 6 bits of the pointer don't have much entropy, because
386*eda14cbcSMatt Macy 	 * the objset_t is larger than 2^6 bytes long.
387*eda14cbcSMatt Macy 	 */
388*eda14cbcSMatt Macy 	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (osv >> 6)) & 0xFF];
389*eda14cbcSMatt Macy 	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 0)) & 0xFF];
390*eda14cbcSMatt Macy 	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 8)) & 0xFF];
391*eda14cbcSMatt Macy 	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 16)) & 0xFF];
392*eda14cbcSMatt Macy 
393*eda14cbcSMatt Macy 	crc ^= (osv>>14) ^ (obj>>24);
394*eda14cbcSMatt Macy 
395*eda14cbcSMatt Macy 	return (crc);
396*eda14cbcSMatt Macy }
397*eda14cbcSMatt Macy 
398*eda14cbcSMatt Macy static unsigned int
399*eda14cbcSMatt Macy dnode_multilist_index_func(multilist_t *ml, void *obj)
400*eda14cbcSMatt Macy {
401*eda14cbcSMatt Macy 	dnode_t *dn = obj;
402*eda14cbcSMatt Macy 	return (dnode_hash(dn->dn_objset, dn->dn_object) %
403*eda14cbcSMatt Macy 	    multilist_get_num_sublists(ml));
404*eda14cbcSMatt Macy }
405*eda14cbcSMatt Macy 
406*eda14cbcSMatt Macy /*
407*eda14cbcSMatt Macy  * Instantiates the objset_t in-memory structure corresponding to the
408*eda14cbcSMatt Macy  * objset_phys_t that's pointed to by the specified blkptr_t.
409*eda14cbcSMatt Macy  */
410*eda14cbcSMatt Macy int
411*eda14cbcSMatt Macy dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
412*eda14cbcSMatt Macy     objset_t **osp)
413*eda14cbcSMatt Macy {
414*eda14cbcSMatt Macy 	objset_t *os;
415*eda14cbcSMatt Macy 	int i, err;
416*eda14cbcSMatt Macy 
417*eda14cbcSMatt Macy 	ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));
418*eda14cbcSMatt Macy 	ASSERT(!BP_IS_REDACTED(bp));
419*eda14cbcSMatt Macy 
420*eda14cbcSMatt Macy 	/*
421*eda14cbcSMatt Macy 	 * We need the pool config lock to get properties.
422*eda14cbcSMatt Macy 	 */
423*eda14cbcSMatt Macy 	ASSERT(ds == NULL || dsl_pool_config_held(ds->ds_dir->dd_pool));
424*eda14cbcSMatt Macy 
425*eda14cbcSMatt Macy 	/*
426*eda14cbcSMatt Macy 	 * The $ORIGIN dataset (if it exists) doesn't have an associated
427*eda14cbcSMatt Macy 	 * objset, so there's no reason to open it. The $ORIGIN dataset
428*eda14cbcSMatt Macy 	 * will not exist on pools older than SPA_VERSION_ORIGIN.
429*eda14cbcSMatt Macy 	 */
430*eda14cbcSMatt Macy 	if (ds != NULL && spa_get_dsl(spa) != NULL &&
431*eda14cbcSMatt Macy 	    spa_get_dsl(spa)->dp_origin_snap != NULL) {
432*eda14cbcSMatt Macy 		ASSERT3P(ds->ds_dir, !=,
433*eda14cbcSMatt Macy 		    spa_get_dsl(spa)->dp_origin_snap->ds_dir);
434*eda14cbcSMatt Macy 	}
435*eda14cbcSMatt Macy 
436*eda14cbcSMatt Macy 	os = kmem_zalloc(sizeof (objset_t), KM_SLEEP);
437*eda14cbcSMatt Macy 	os->os_dsl_dataset = ds;
438*eda14cbcSMatt Macy 	os->os_spa = spa;
439*eda14cbcSMatt Macy 	os->os_rootbp = bp;
440*eda14cbcSMatt Macy 	if (!BP_IS_HOLE(os->os_rootbp)) {
441*eda14cbcSMatt Macy 		arc_flags_t aflags = ARC_FLAG_WAIT;
442*eda14cbcSMatt Macy 		zbookmark_phys_t zb;
443*eda14cbcSMatt Macy 		int size;
444*eda14cbcSMatt Macy 		enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
445*eda14cbcSMatt Macy 		SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
446*eda14cbcSMatt Macy 		    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
447*eda14cbcSMatt Macy 
448*eda14cbcSMatt Macy 		if (DMU_OS_IS_L2CACHEABLE(os))
449*eda14cbcSMatt Macy 			aflags |= ARC_FLAG_L2CACHE;
450*eda14cbcSMatt Macy 
451*eda14cbcSMatt Macy 		if (ds != NULL && ds->ds_dir->dd_crypto_obj != 0) {
452*eda14cbcSMatt Macy 			ASSERT3U(BP_GET_COMPRESS(bp), ==, ZIO_COMPRESS_OFF);
453*eda14cbcSMatt Macy 			ASSERT(BP_IS_AUTHENTICATED(bp));
454*eda14cbcSMatt Macy 			zio_flags |= ZIO_FLAG_RAW;
455*eda14cbcSMatt Macy 		}
456*eda14cbcSMatt Macy 
457*eda14cbcSMatt Macy 		dprintf_bp(os->os_rootbp, "reading %s", "");
458*eda14cbcSMatt Macy 		err = arc_read(NULL, spa, os->os_rootbp,
459*eda14cbcSMatt Macy 		    arc_getbuf_func, &os->os_phys_buf,
460*eda14cbcSMatt Macy 		    ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
461*eda14cbcSMatt Macy 		if (err != 0) {
462*eda14cbcSMatt Macy 			kmem_free(os, sizeof (objset_t));
463*eda14cbcSMatt Macy 			/* convert checksum errors into IO errors */
464*eda14cbcSMatt Macy 			if (err == ECKSUM)
465*eda14cbcSMatt Macy 				err = SET_ERROR(EIO);
466*eda14cbcSMatt Macy 			return (err);
467*eda14cbcSMatt Macy 		}
468*eda14cbcSMatt Macy 
469*eda14cbcSMatt Macy 		if (spa_version(spa) < SPA_VERSION_USERSPACE)
470*eda14cbcSMatt Macy 			size = OBJSET_PHYS_SIZE_V1;
471*eda14cbcSMatt Macy 		else if (!spa_feature_is_enabled(spa,
472*eda14cbcSMatt Macy 		    SPA_FEATURE_PROJECT_QUOTA))
473*eda14cbcSMatt Macy 			size = OBJSET_PHYS_SIZE_V2;
474*eda14cbcSMatt Macy 		else
475*eda14cbcSMatt Macy 			size = sizeof (objset_phys_t);
476*eda14cbcSMatt Macy 
477*eda14cbcSMatt Macy 		/* Increase the blocksize if we are permitted. */
478*eda14cbcSMatt Macy 		if (arc_buf_size(os->os_phys_buf) < size) {
479*eda14cbcSMatt Macy 			arc_buf_t *buf = arc_alloc_buf(spa, &os->os_phys_buf,
480*eda14cbcSMatt Macy 			    ARC_BUFC_METADATA, size);
481*eda14cbcSMatt Macy 			bzero(buf->b_data, size);
482*eda14cbcSMatt Macy 			bcopy(os->os_phys_buf->b_data, buf->b_data,
483*eda14cbcSMatt Macy 			    arc_buf_size(os->os_phys_buf));
484*eda14cbcSMatt Macy 			arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf);
485*eda14cbcSMatt Macy 			os->os_phys_buf = buf;
486*eda14cbcSMatt Macy 		}
487*eda14cbcSMatt Macy 
488*eda14cbcSMatt Macy 		os->os_phys = os->os_phys_buf->b_data;
489*eda14cbcSMatt Macy 		os->os_flags = os->os_phys->os_flags;
490*eda14cbcSMatt Macy 	} else {
491*eda14cbcSMatt Macy 		int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
492*eda14cbcSMatt Macy 		    sizeof (objset_phys_t) : OBJSET_PHYS_SIZE_V1;
493*eda14cbcSMatt Macy 		os->os_phys_buf = arc_alloc_buf(spa, &os->os_phys_buf,
494*eda14cbcSMatt Macy 		    ARC_BUFC_METADATA, size);
495*eda14cbcSMatt Macy 		os->os_phys = os->os_phys_buf->b_data;
496*eda14cbcSMatt Macy 		bzero(os->os_phys, size);
497*eda14cbcSMatt Macy 	}
498*eda14cbcSMatt Macy 	/*
499*eda14cbcSMatt Macy 	 * These properties will be filled in by the logic in zfs_get_zplprop()
500*eda14cbcSMatt Macy 	 * when they are queried for the first time.
501*eda14cbcSMatt Macy 	 */
502*eda14cbcSMatt Macy 	os->os_version = OBJSET_PROP_UNINITIALIZED;
503*eda14cbcSMatt Macy 	os->os_normalization = OBJSET_PROP_UNINITIALIZED;
504*eda14cbcSMatt Macy 	os->os_utf8only = OBJSET_PROP_UNINITIALIZED;
505*eda14cbcSMatt Macy 	os->os_casesensitivity = OBJSET_PROP_UNINITIALIZED;
506*eda14cbcSMatt Macy 
507*eda14cbcSMatt Macy 	/*
508*eda14cbcSMatt Macy 	 * Note: the changed_cb will be called once before the register
509*eda14cbcSMatt Macy 	 * func returns, thus changing the checksum/compression from the
510*eda14cbcSMatt Macy 	 * default (fletcher2/off).  Snapshots don't need to know about
511*eda14cbcSMatt Macy 	 * checksum/compression/copies.
512*eda14cbcSMatt Macy 	 */
513*eda14cbcSMatt Macy 	if (ds != NULL) {
514*eda14cbcSMatt Macy 		os->os_encrypted = (ds->ds_dir->dd_crypto_obj != 0);
515*eda14cbcSMatt Macy 
516*eda14cbcSMatt Macy 		err = dsl_prop_register(ds,
517*eda14cbcSMatt Macy 		    zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
518*eda14cbcSMatt Macy 		    primary_cache_changed_cb, os);
519*eda14cbcSMatt Macy 		if (err == 0) {
520*eda14cbcSMatt Macy 			err = dsl_prop_register(ds,
521*eda14cbcSMatt Macy 			    zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
522*eda14cbcSMatt Macy 			    secondary_cache_changed_cb, os);
523*eda14cbcSMatt Macy 		}
524*eda14cbcSMatt Macy 		if (!ds->ds_is_snapshot) {
525*eda14cbcSMatt Macy 			if (err == 0) {
526*eda14cbcSMatt Macy 				err = dsl_prop_register(ds,
527*eda14cbcSMatt Macy 				    zfs_prop_to_name(ZFS_PROP_CHECKSUM),
528*eda14cbcSMatt Macy 				    checksum_changed_cb, os);
529*eda14cbcSMatt Macy 			}
530*eda14cbcSMatt Macy 			if (err == 0) {
531*eda14cbcSMatt Macy 				err = dsl_prop_register(ds,
532*eda14cbcSMatt Macy 				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
533*eda14cbcSMatt Macy 				    compression_changed_cb, os);
534*eda14cbcSMatt Macy 			}
535*eda14cbcSMatt Macy 			if (err == 0) {
536*eda14cbcSMatt Macy 				err = dsl_prop_register(ds,
537*eda14cbcSMatt Macy 				    zfs_prop_to_name(ZFS_PROP_COPIES),
538*eda14cbcSMatt Macy 				    copies_changed_cb, os);
539*eda14cbcSMatt Macy 			}
540*eda14cbcSMatt Macy 			if (err == 0) {
541*eda14cbcSMatt Macy 				err = dsl_prop_register(ds,
542*eda14cbcSMatt Macy 				    zfs_prop_to_name(ZFS_PROP_DEDUP),
543*eda14cbcSMatt Macy 				    dedup_changed_cb, os);
544*eda14cbcSMatt Macy 			}
545*eda14cbcSMatt Macy 			if (err == 0) {
546*eda14cbcSMatt Macy 				err = dsl_prop_register(ds,
547*eda14cbcSMatt Macy 				    zfs_prop_to_name(ZFS_PROP_LOGBIAS),
548*eda14cbcSMatt Macy 				    logbias_changed_cb, os);
549*eda14cbcSMatt Macy 			}
550*eda14cbcSMatt Macy 			if (err == 0) {
551*eda14cbcSMatt Macy 				err = dsl_prop_register(ds,
552*eda14cbcSMatt Macy 				    zfs_prop_to_name(ZFS_PROP_SYNC),
553*eda14cbcSMatt Macy 				    sync_changed_cb, os);
554*eda14cbcSMatt Macy 			}
555*eda14cbcSMatt Macy 			if (err == 0) {
556*eda14cbcSMatt Macy 				err = dsl_prop_register(ds,
557*eda14cbcSMatt Macy 				    zfs_prop_to_name(
558*eda14cbcSMatt Macy 				    ZFS_PROP_REDUNDANT_METADATA),
559*eda14cbcSMatt Macy 				    redundant_metadata_changed_cb, os);
560*eda14cbcSMatt Macy 			}
561*eda14cbcSMatt Macy 			if (err == 0) {
562*eda14cbcSMatt Macy 				err = dsl_prop_register(ds,
563*eda14cbcSMatt Macy 				    zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
564*eda14cbcSMatt Macy 				    recordsize_changed_cb, os);
565*eda14cbcSMatt Macy 			}
566*eda14cbcSMatt Macy 			if (err == 0) {
567*eda14cbcSMatt Macy 				err = dsl_prop_register(ds,
568*eda14cbcSMatt Macy 				    zfs_prop_to_name(ZFS_PROP_DNODESIZE),
569*eda14cbcSMatt Macy 				    dnodesize_changed_cb, os);
570*eda14cbcSMatt Macy 			}
571*eda14cbcSMatt Macy 			if (err == 0) {
572*eda14cbcSMatt Macy 				err = dsl_prop_register(ds,
573*eda14cbcSMatt Macy 				    zfs_prop_to_name(
574*eda14cbcSMatt Macy 				    ZFS_PROP_SPECIAL_SMALL_BLOCKS),
575*eda14cbcSMatt Macy 				    smallblk_changed_cb, os);
576*eda14cbcSMatt Macy 			}
577*eda14cbcSMatt Macy 		}
578*eda14cbcSMatt Macy 		if (err != 0) {
579*eda14cbcSMatt Macy 			arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf);
580*eda14cbcSMatt Macy 			kmem_free(os, sizeof (objset_t));
581*eda14cbcSMatt Macy 			return (err);
582*eda14cbcSMatt Macy 		}
583*eda14cbcSMatt Macy 	} else {
584*eda14cbcSMatt Macy 		/* It's the meta-objset. */
585*eda14cbcSMatt Macy 		os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
586*eda14cbcSMatt Macy 		os->os_compress = ZIO_COMPRESS_ON;
587*eda14cbcSMatt Macy 		os->os_complevel = ZIO_COMPLEVEL_DEFAULT;
588*eda14cbcSMatt Macy 		os->os_encrypted = B_FALSE;
589*eda14cbcSMatt Macy 		os->os_copies = spa_max_replication(spa);
590*eda14cbcSMatt Macy 		os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
591*eda14cbcSMatt Macy 		os->os_dedup_verify = B_FALSE;
592*eda14cbcSMatt Macy 		os->os_logbias = ZFS_LOGBIAS_LATENCY;
593*eda14cbcSMatt Macy 		os->os_sync = ZFS_SYNC_STANDARD;
594*eda14cbcSMatt Macy 		os->os_primary_cache = ZFS_CACHE_ALL;
595*eda14cbcSMatt Macy 		os->os_secondary_cache = ZFS_CACHE_ALL;
596*eda14cbcSMatt Macy 		os->os_dnodesize = DNODE_MIN_SIZE;
597*eda14cbcSMatt Macy 	}
598*eda14cbcSMatt Macy 
599*eda14cbcSMatt Macy 	if (ds == NULL || !ds->ds_is_snapshot)
600*eda14cbcSMatt Macy 		os->os_zil_header = os->os_phys->os_zil_header;
601*eda14cbcSMatt Macy 	os->os_zil = zil_alloc(os, &os->os_zil_header);
602*eda14cbcSMatt Macy 
603*eda14cbcSMatt Macy 	for (i = 0; i < TXG_SIZE; i++) {
604*eda14cbcSMatt Macy 		os->os_dirty_dnodes[i] = multilist_create(sizeof (dnode_t),
605*eda14cbcSMatt Macy 		    offsetof(dnode_t, dn_dirty_link[i]),
606*eda14cbcSMatt Macy 		    dnode_multilist_index_func);
607*eda14cbcSMatt Macy 	}
608*eda14cbcSMatt Macy 	list_create(&os->os_dnodes, sizeof (dnode_t),
609*eda14cbcSMatt Macy 	    offsetof(dnode_t, dn_link));
610*eda14cbcSMatt Macy 	list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
611*eda14cbcSMatt Macy 	    offsetof(dmu_buf_impl_t, db_link));
612*eda14cbcSMatt Macy 
613*eda14cbcSMatt Macy 	list_link_init(&os->os_evicting_node);
614*eda14cbcSMatt Macy 
615*eda14cbcSMatt Macy 	mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
616*eda14cbcSMatt Macy 	mutex_init(&os->os_userused_lock, NULL, MUTEX_DEFAULT, NULL);
617*eda14cbcSMatt Macy 	mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
618*eda14cbcSMatt Macy 	mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
619*eda14cbcSMatt Macy 	os->os_obj_next_percpu_len = boot_ncpus;
620*eda14cbcSMatt Macy 	os->os_obj_next_percpu = kmem_zalloc(os->os_obj_next_percpu_len *
621*eda14cbcSMatt Macy 	    sizeof (os->os_obj_next_percpu[0]), KM_SLEEP);
622*eda14cbcSMatt Macy 
623*eda14cbcSMatt Macy 	dnode_special_open(os, &os->os_phys->os_meta_dnode,
624*eda14cbcSMatt Macy 	    DMU_META_DNODE_OBJECT, &os->os_meta_dnode);
625*eda14cbcSMatt Macy 	if (OBJSET_BUF_HAS_USERUSED(os->os_phys_buf)) {
626*eda14cbcSMatt Macy 		dnode_special_open(os, &os->os_phys->os_userused_dnode,
627*eda14cbcSMatt Macy 		    DMU_USERUSED_OBJECT, &os->os_userused_dnode);
628*eda14cbcSMatt Macy 		dnode_special_open(os, &os->os_phys->os_groupused_dnode,
629*eda14cbcSMatt Macy 		    DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode);
630*eda14cbcSMatt Macy 		if (OBJSET_BUF_HAS_PROJECTUSED(os->os_phys_buf))
631*eda14cbcSMatt Macy 			dnode_special_open(os,
632*eda14cbcSMatt Macy 			    &os->os_phys->os_projectused_dnode,
633*eda14cbcSMatt Macy 			    DMU_PROJECTUSED_OBJECT, &os->os_projectused_dnode);
634*eda14cbcSMatt Macy 	}
635*eda14cbcSMatt Macy 
636*eda14cbcSMatt Macy 	mutex_init(&os->os_upgrade_lock, NULL, MUTEX_DEFAULT, NULL);
637*eda14cbcSMatt Macy 
638*eda14cbcSMatt Macy 	*osp = os;
639*eda14cbcSMatt Macy 	return (0);
640*eda14cbcSMatt Macy }
641*eda14cbcSMatt Macy 
642*eda14cbcSMatt Macy int
643*eda14cbcSMatt Macy dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
644*eda14cbcSMatt Macy {
645*eda14cbcSMatt Macy 	int err = 0;
646*eda14cbcSMatt Macy 
647*eda14cbcSMatt Macy 	/*
648*eda14cbcSMatt Macy 	 * We need the pool_config lock to manipulate the dsl_dataset_t.
649*eda14cbcSMatt Macy 	 * Even if the dataset is long-held, we need the pool_config lock
650*eda14cbcSMatt Macy 	 * to open the objset, as it needs to get properties.
651*eda14cbcSMatt Macy 	 */
652*eda14cbcSMatt Macy 	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
653*eda14cbcSMatt Macy 
654*eda14cbcSMatt Macy 	mutex_enter(&ds->ds_opening_lock);
655*eda14cbcSMatt Macy 	if (ds->ds_objset == NULL) {
656*eda14cbcSMatt Macy 		objset_t *os;
657*eda14cbcSMatt Macy 		rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
658*eda14cbcSMatt Macy 		err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
659*eda14cbcSMatt Macy 		    ds, dsl_dataset_get_blkptr(ds), &os);
660*eda14cbcSMatt Macy 		rrw_exit(&ds->ds_bp_rwlock, FTAG);
661*eda14cbcSMatt Macy 
662*eda14cbcSMatt Macy 		if (err == 0) {
663*eda14cbcSMatt Macy 			mutex_enter(&ds->ds_lock);
664*eda14cbcSMatt Macy 			ASSERT(ds->ds_objset == NULL);
665*eda14cbcSMatt Macy 			ds->ds_objset = os;
666*eda14cbcSMatt Macy 			mutex_exit(&ds->ds_lock);
667*eda14cbcSMatt Macy 		}
668*eda14cbcSMatt Macy 	}
669*eda14cbcSMatt Macy 	*osp = ds->ds_objset;
670*eda14cbcSMatt Macy 	mutex_exit(&ds->ds_opening_lock);
671*eda14cbcSMatt Macy 	return (err);
672*eda14cbcSMatt Macy }
673*eda14cbcSMatt Macy 
674*eda14cbcSMatt Macy /*
675*eda14cbcSMatt Macy  * Holds the pool while the objset is held.  Therefore only one objset
676*eda14cbcSMatt Macy  * can be held at a time.
677*eda14cbcSMatt Macy  */
678*eda14cbcSMatt Macy int
679*eda14cbcSMatt Macy dmu_objset_hold_flags(const char *name, boolean_t decrypt, void *tag,
680*eda14cbcSMatt Macy     objset_t **osp)
681*eda14cbcSMatt Macy {
682*eda14cbcSMatt Macy 	dsl_pool_t *dp;
683*eda14cbcSMatt Macy 	dsl_dataset_t *ds;
684*eda14cbcSMatt Macy 	int err;
685*eda14cbcSMatt Macy 	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
686*eda14cbcSMatt Macy 
687*eda14cbcSMatt Macy 	err = dsl_pool_hold(name, tag, &dp);
688*eda14cbcSMatt Macy 	if (err != 0)
689*eda14cbcSMatt Macy 		return (err);
690*eda14cbcSMatt Macy 	err = dsl_dataset_hold_flags(dp, name, flags, tag, &ds);
691*eda14cbcSMatt Macy 	if (err != 0) {
692*eda14cbcSMatt Macy 		dsl_pool_rele(dp, tag);
693*eda14cbcSMatt Macy 		return (err);
694*eda14cbcSMatt Macy 	}
695*eda14cbcSMatt Macy 
696*eda14cbcSMatt Macy 	err = dmu_objset_from_ds(ds, osp);
697*eda14cbcSMatt Macy 	if (err != 0) {
698*eda14cbcSMatt Macy 		dsl_dataset_rele(ds, tag);
699*eda14cbcSMatt Macy 		dsl_pool_rele(dp, tag);
700*eda14cbcSMatt Macy 	}
701*eda14cbcSMatt Macy 
702*eda14cbcSMatt Macy 	return (err);
703*eda14cbcSMatt Macy }
704*eda14cbcSMatt Macy 
705*eda14cbcSMatt Macy int
706*eda14cbcSMatt Macy dmu_objset_hold(const char *name, void *tag, objset_t **osp)
707*eda14cbcSMatt Macy {
708*eda14cbcSMatt Macy 	return (dmu_objset_hold_flags(name, B_FALSE, tag, osp));
709*eda14cbcSMatt Macy }
710*eda14cbcSMatt Macy 
711*eda14cbcSMatt Macy static int
712*eda14cbcSMatt Macy dmu_objset_own_impl(dsl_dataset_t *ds, dmu_objset_type_t type,
713*eda14cbcSMatt Macy     boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
714*eda14cbcSMatt Macy {
715*eda14cbcSMatt Macy 	int err;
716*eda14cbcSMatt Macy 
717*eda14cbcSMatt Macy 	err = dmu_objset_from_ds(ds, osp);
718*eda14cbcSMatt Macy 	if (err != 0) {
719*eda14cbcSMatt Macy 		return (err);
720*eda14cbcSMatt Macy 	} else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
721*eda14cbcSMatt Macy 		return (SET_ERROR(EINVAL));
722*eda14cbcSMatt Macy 	} else if (!readonly && dsl_dataset_is_snapshot(ds)) {
723*eda14cbcSMatt Macy 		return (SET_ERROR(EROFS));
724*eda14cbcSMatt Macy 	} else if (!readonly && decrypt &&
725*eda14cbcSMatt Macy 	    dsl_dir_incompatible_encryption_version(ds->ds_dir)) {
726*eda14cbcSMatt Macy 		return (SET_ERROR(EROFS));
727*eda14cbcSMatt Macy 	}
728*eda14cbcSMatt Macy 
729*eda14cbcSMatt Macy 	/* if we are decrypting, we can now check MACs in os->os_phys_buf */
730*eda14cbcSMatt Macy 	if (decrypt && arc_is_unauthenticated((*osp)->os_phys_buf)) {
731*eda14cbcSMatt Macy 		zbookmark_phys_t zb;
732*eda14cbcSMatt Macy 
733*eda14cbcSMatt Macy 		SET_BOOKMARK(&zb, ds->ds_object, ZB_ROOT_OBJECT,
734*eda14cbcSMatt Macy 		    ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
735*eda14cbcSMatt Macy 		err = arc_untransform((*osp)->os_phys_buf, (*osp)->os_spa,
736*eda14cbcSMatt Macy 		    &zb, B_FALSE);
737*eda14cbcSMatt Macy 		if (err != 0)
738*eda14cbcSMatt Macy 			return (err);
739*eda14cbcSMatt Macy 
740*eda14cbcSMatt Macy 		ASSERT0(arc_is_unauthenticated((*osp)->os_phys_buf));
741*eda14cbcSMatt Macy 	}
742*eda14cbcSMatt Macy 
743*eda14cbcSMatt Macy 	return (0);
744*eda14cbcSMatt Macy }
745*eda14cbcSMatt Macy 
746*eda14cbcSMatt Macy /*
747*eda14cbcSMatt Macy  * dsl_pool must not be held when this is called.
748*eda14cbcSMatt Macy  * Upon successful return, there will be a longhold on the dataset,
749*eda14cbcSMatt Macy  * and the dsl_pool will not be held.
750*eda14cbcSMatt Macy  */
751*eda14cbcSMatt Macy int
752*eda14cbcSMatt Macy dmu_objset_own(const char *name, dmu_objset_type_t type,
753*eda14cbcSMatt Macy     boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
754*eda14cbcSMatt Macy {
755*eda14cbcSMatt Macy 	dsl_pool_t *dp;
756*eda14cbcSMatt Macy 	dsl_dataset_t *ds;
757*eda14cbcSMatt Macy 	int err;
758*eda14cbcSMatt Macy 	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
759*eda14cbcSMatt Macy 
760*eda14cbcSMatt Macy 	err = dsl_pool_hold(name, FTAG, &dp);
761*eda14cbcSMatt Macy 	if (err != 0)
762*eda14cbcSMatt Macy 		return (err);
763*eda14cbcSMatt Macy 	err = dsl_dataset_own(dp, name, flags, tag, &ds);
764*eda14cbcSMatt Macy 	if (err != 0) {
765*eda14cbcSMatt Macy 		dsl_pool_rele(dp, FTAG);
766*eda14cbcSMatt Macy 		return (err);
767*eda14cbcSMatt Macy 	}
768*eda14cbcSMatt Macy 	err = dmu_objset_own_impl(ds, type, readonly, decrypt, tag, osp);
769*eda14cbcSMatt Macy 	if (err != 0) {
770*eda14cbcSMatt Macy 		dsl_dataset_disown(ds, flags, tag);
771*eda14cbcSMatt Macy 		dsl_pool_rele(dp, FTAG);
772*eda14cbcSMatt Macy 		return (err);
773*eda14cbcSMatt Macy 	}
774*eda14cbcSMatt Macy 
775*eda14cbcSMatt Macy 	/*
776*eda14cbcSMatt Macy 	 * User accounting requires the dataset to be decrypted and rw.
777*eda14cbcSMatt Macy 	 * We also don't begin user accounting during claiming to help
778*eda14cbcSMatt Macy 	 * speed up pool import times and to keep this txg reserved
779*eda14cbcSMatt Macy 	 * completely for recovery work.
780*eda14cbcSMatt Macy 	 */
781*eda14cbcSMatt Macy 	if ((dmu_objset_userobjspace_upgradable(*osp) ||
782*eda14cbcSMatt Macy 	    dmu_objset_projectquota_upgradable(*osp)) &&
783*eda14cbcSMatt Macy 	    !readonly && !dp->dp_spa->spa_claiming &&
784*eda14cbcSMatt Macy 	    (ds->ds_dir->dd_crypto_obj == 0 || decrypt))
785*eda14cbcSMatt Macy 		dmu_objset_id_quota_upgrade(*osp);
786*eda14cbcSMatt Macy 
787*eda14cbcSMatt Macy 	dsl_pool_rele(dp, FTAG);
788*eda14cbcSMatt Macy 	return (0);
789*eda14cbcSMatt Macy }
790*eda14cbcSMatt Macy 
791*eda14cbcSMatt Macy int
792*eda14cbcSMatt Macy dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type,
793*eda14cbcSMatt Macy     boolean_t readonly, boolean_t decrypt, void *tag, objset_t **osp)
794*eda14cbcSMatt Macy {
795*eda14cbcSMatt Macy 	dsl_dataset_t *ds;
796*eda14cbcSMatt Macy 	int err;
797*eda14cbcSMatt Macy 	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
798*eda14cbcSMatt Macy 
799*eda14cbcSMatt Macy 	err = dsl_dataset_own_obj(dp, obj, flags, tag, &ds);
800*eda14cbcSMatt Macy 	if (err != 0)
801*eda14cbcSMatt Macy 		return (err);
802*eda14cbcSMatt Macy 
803*eda14cbcSMatt Macy 	err = dmu_objset_own_impl(ds, type, readonly, decrypt, tag, osp);
804*eda14cbcSMatt Macy 	if (err != 0) {
805*eda14cbcSMatt Macy 		dsl_dataset_disown(ds, flags, tag);
806*eda14cbcSMatt Macy 		return (err);
807*eda14cbcSMatt Macy 	}
808*eda14cbcSMatt Macy 
809*eda14cbcSMatt Macy 	return (0);
810*eda14cbcSMatt Macy }
811*eda14cbcSMatt Macy 
812*eda14cbcSMatt Macy void
813*eda14cbcSMatt Macy dmu_objset_rele_flags(objset_t *os, boolean_t decrypt, void *tag)
814*eda14cbcSMatt Macy {
815*eda14cbcSMatt Macy 	ds_hold_flags_t flags = (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0;
816*eda14cbcSMatt Macy 
817*eda14cbcSMatt Macy 	dsl_pool_t *dp = dmu_objset_pool(os);
818*eda14cbcSMatt Macy 	dsl_dataset_rele_flags(os->os_dsl_dataset, flags, tag);
819*eda14cbcSMatt Macy 	dsl_pool_rele(dp, tag);
820*eda14cbcSMatt Macy }
821*eda14cbcSMatt Macy 
822*eda14cbcSMatt Macy void
823*eda14cbcSMatt Macy dmu_objset_rele(objset_t *os, void *tag)
824*eda14cbcSMatt Macy {
825*eda14cbcSMatt Macy 	dmu_objset_rele_flags(os, B_FALSE, tag);
826*eda14cbcSMatt Macy }
827*eda14cbcSMatt Macy 
828*eda14cbcSMatt Macy /*
829*eda14cbcSMatt Macy  * When we are called, os MUST refer to an objset associated with a dataset
830*eda14cbcSMatt Macy  * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
831*eda14cbcSMatt Macy  * == tag.  We will then release and reacquire ownership of the dataset while
832*eda14cbcSMatt Macy  * holding the pool config_rwlock to avoid intervening namespace or ownership
833*eda14cbcSMatt Macy  * changes may occur.
834*eda14cbcSMatt Macy  *
835*eda14cbcSMatt Macy  * This exists solely to accommodate zfs_ioc_userspace_upgrade()'s desire to
836*eda14cbcSMatt Macy  * release the hold on its dataset and acquire a new one on the dataset of the
837*eda14cbcSMatt Macy  * same name so that it can be partially torn down and reconstructed.
838*eda14cbcSMatt Macy  */
839*eda14cbcSMatt Macy void
840*eda14cbcSMatt Macy dmu_objset_refresh_ownership(dsl_dataset_t *ds, dsl_dataset_t **newds,
841*eda14cbcSMatt Macy     boolean_t decrypt, void *tag)
842*eda14cbcSMatt Macy {
843*eda14cbcSMatt Macy 	dsl_pool_t *dp;
844*eda14cbcSMatt Macy 	char name[ZFS_MAX_DATASET_NAME_LEN];
845*eda14cbcSMatt Macy 
846*eda14cbcSMatt Macy 	VERIFY3P(ds, !=, NULL);
847*eda14cbcSMatt Macy 	VERIFY3P(ds->ds_owner, ==, tag);
848*eda14cbcSMatt Macy 	VERIFY(dsl_dataset_long_held(ds));
849*eda14cbcSMatt Macy 
850*eda14cbcSMatt Macy 	dsl_dataset_name(ds, name);
851*eda14cbcSMatt Macy 	dp = ds->ds_dir->dd_pool;
852*eda14cbcSMatt Macy 	dsl_pool_config_enter(dp, FTAG);
853*eda14cbcSMatt Macy 	dsl_dataset_disown(ds, decrypt, tag);
854*eda14cbcSMatt Macy 	VERIFY0(dsl_dataset_own(dp, name,
855*eda14cbcSMatt Macy 	    (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag, newds));
856*eda14cbcSMatt Macy 	dsl_pool_config_exit(dp, FTAG);
857*eda14cbcSMatt Macy }
858*eda14cbcSMatt Macy 
859*eda14cbcSMatt Macy void
860*eda14cbcSMatt Macy dmu_objset_disown(objset_t *os, boolean_t decrypt, void *tag)
861*eda14cbcSMatt Macy {
862*eda14cbcSMatt Macy 	/*
863*eda14cbcSMatt Macy 	 * Stop upgrading thread
864*eda14cbcSMatt Macy 	 */
865*eda14cbcSMatt Macy 	dmu_objset_upgrade_stop(os);
866*eda14cbcSMatt Macy 	dsl_dataset_disown(os->os_dsl_dataset,
867*eda14cbcSMatt Macy 	    (decrypt) ? DS_HOLD_FLAG_DECRYPT : 0, tag);
868*eda14cbcSMatt Macy }
869*eda14cbcSMatt Macy 
870*eda14cbcSMatt Macy void
871*eda14cbcSMatt Macy dmu_objset_evict_dbufs(objset_t *os)
872*eda14cbcSMatt Macy {
873*eda14cbcSMatt Macy 	dnode_t *dn_marker;
874*eda14cbcSMatt Macy 	dnode_t *dn;
875*eda14cbcSMatt Macy 
876*eda14cbcSMatt Macy 	dn_marker = kmem_alloc(sizeof (dnode_t), KM_SLEEP);
877*eda14cbcSMatt Macy 
878*eda14cbcSMatt Macy 	mutex_enter(&os->os_lock);
879*eda14cbcSMatt Macy 	dn = list_head(&os->os_dnodes);
880*eda14cbcSMatt Macy 	while (dn != NULL) {
881*eda14cbcSMatt Macy 		/*
882*eda14cbcSMatt Macy 		 * Skip dnodes without holds.  We have to do this dance
883*eda14cbcSMatt Macy 		 * because dnode_add_ref() only works if there is already a
884*eda14cbcSMatt Macy 		 * hold.  If the dnode has no holds, then it has no dbufs.
885*eda14cbcSMatt Macy 		 */
886*eda14cbcSMatt Macy 		if (dnode_add_ref(dn, FTAG)) {
887*eda14cbcSMatt Macy 			list_insert_after(&os->os_dnodes, dn, dn_marker);
888*eda14cbcSMatt Macy 			mutex_exit(&os->os_lock);
889*eda14cbcSMatt Macy 
890*eda14cbcSMatt Macy 			dnode_evict_dbufs(dn);
891*eda14cbcSMatt Macy 			dnode_rele(dn, FTAG);
892*eda14cbcSMatt Macy 
893*eda14cbcSMatt Macy 			mutex_enter(&os->os_lock);
894*eda14cbcSMatt Macy 			dn = list_next(&os->os_dnodes, dn_marker);
895*eda14cbcSMatt Macy 			list_remove(&os->os_dnodes, dn_marker);
896*eda14cbcSMatt Macy 		} else {
897*eda14cbcSMatt Macy 			dn = list_next(&os->os_dnodes, dn);
898*eda14cbcSMatt Macy 		}
899*eda14cbcSMatt Macy 	}
900*eda14cbcSMatt Macy 	mutex_exit(&os->os_lock);
901*eda14cbcSMatt Macy 
902*eda14cbcSMatt Macy 	kmem_free(dn_marker, sizeof (dnode_t));
903*eda14cbcSMatt Macy 
904*eda14cbcSMatt Macy 	if (DMU_USERUSED_DNODE(os) != NULL) {
905*eda14cbcSMatt Macy 		if (DMU_PROJECTUSED_DNODE(os) != NULL)
906*eda14cbcSMatt Macy 			dnode_evict_dbufs(DMU_PROJECTUSED_DNODE(os));
907*eda14cbcSMatt Macy 		dnode_evict_dbufs(DMU_GROUPUSED_DNODE(os));
908*eda14cbcSMatt Macy 		dnode_evict_dbufs(DMU_USERUSED_DNODE(os));
909*eda14cbcSMatt Macy 	}
910*eda14cbcSMatt Macy 	dnode_evict_dbufs(DMU_META_DNODE(os));
911*eda14cbcSMatt Macy }
912*eda14cbcSMatt Macy 
913*eda14cbcSMatt Macy /*
914*eda14cbcSMatt Macy  * Objset eviction processing is split into into two pieces.
915*eda14cbcSMatt Macy  * The first marks the objset as evicting, evicts any dbufs that
916*eda14cbcSMatt Macy  * have a refcount of zero, and then queues up the objset for the
917*eda14cbcSMatt Macy  * second phase of eviction.  Once os->os_dnodes has been cleared by
918*eda14cbcSMatt Macy  * dnode_buf_pageout()->dnode_destroy(), the second phase is executed.
919*eda14cbcSMatt Macy  * The second phase closes the special dnodes, dequeues the objset from
920*eda14cbcSMatt Macy  * the list of those undergoing eviction, and finally frees the objset.
921*eda14cbcSMatt Macy  *
922*eda14cbcSMatt Macy  * NOTE: Due to asynchronous eviction processing (invocation of
923*eda14cbcSMatt Macy  *       dnode_buf_pageout()), it is possible for the meta dnode for the
924*eda14cbcSMatt Macy  *       objset to have no holds even though os->os_dnodes is not empty.
925*eda14cbcSMatt Macy  */
926*eda14cbcSMatt Macy void
927*eda14cbcSMatt Macy dmu_objset_evict(objset_t *os)
928*eda14cbcSMatt Macy {
929*eda14cbcSMatt Macy 	dsl_dataset_t *ds = os->os_dsl_dataset;
930*eda14cbcSMatt Macy 
931*eda14cbcSMatt Macy 	for (int t = 0; t < TXG_SIZE; t++)
932*eda14cbcSMatt Macy 		ASSERT(!dmu_objset_is_dirty(os, t));
933*eda14cbcSMatt Macy 
934*eda14cbcSMatt Macy 	if (ds)
935*eda14cbcSMatt Macy 		dsl_prop_unregister_all(ds, os);
936*eda14cbcSMatt Macy 
937*eda14cbcSMatt Macy 	if (os->os_sa)
938*eda14cbcSMatt Macy 		sa_tear_down(os);
939*eda14cbcSMatt Macy 
940*eda14cbcSMatt Macy 	dmu_objset_evict_dbufs(os);
941*eda14cbcSMatt Macy 
942*eda14cbcSMatt Macy 	mutex_enter(&os->os_lock);
943*eda14cbcSMatt Macy 	spa_evicting_os_register(os->os_spa, os);
944*eda14cbcSMatt Macy 	if (list_is_empty(&os->os_dnodes)) {
945*eda14cbcSMatt Macy 		mutex_exit(&os->os_lock);
946*eda14cbcSMatt Macy 		dmu_objset_evict_done(os);
947*eda14cbcSMatt Macy 	} else {
948*eda14cbcSMatt Macy 		mutex_exit(&os->os_lock);
949*eda14cbcSMatt Macy 	}
950*eda14cbcSMatt Macy 
951*eda14cbcSMatt Macy 
952*eda14cbcSMatt Macy }
953*eda14cbcSMatt Macy 
954*eda14cbcSMatt Macy void
955*eda14cbcSMatt Macy dmu_objset_evict_done(objset_t *os)
956*eda14cbcSMatt Macy {
957*eda14cbcSMatt Macy 	ASSERT3P(list_head(&os->os_dnodes), ==, NULL);
958*eda14cbcSMatt Macy 
959*eda14cbcSMatt Macy 	dnode_special_close(&os->os_meta_dnode);
960*eda14cbcSMatt Macy 	if (DMU_USERUSED_DNODE(os)) {
961*eda14cbcSMatt Macy 		if (DMU_PROJECTUSED_DNODE(os))
962*eda14cbcSMatt Macy 			dnode_special_close(&os->os_projectused_dnode);
963*eda14cbcSMatt Macy 		dnode_special_close(&os->os_userused_dnode);
964*eda14cbcSMatt Macy 		dnode_special_close(&os->os_groupused_dnode);
965*eda14cbcSMatt Macy 	}
966*eda14cbcSMatt Macy 	zil_free(os->os_zil);
967*eda14cbcSMatt Macy 
968*eda14cbcSMatt Macy 	arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf);
969*eda14cbcSMatt Macy 
970*eda14cbcSMatt Macy 	/*
971*eda14cbcSMatt Macy 	 * This is a barrier to prevent the objset from going away in
972*eda14cbcSMatt Macy 	 * dnode_move() until we can safely ensure that the objset is still in
973*eda14cbcSMatt Macy 	 * use. We consider the objset valid before the barrier and invalid
974*eda14cbcSMatt Macy 	 * after the barrier.
975*eda14cbcSMatt Macy 	 */
976*eda14cbcSMatt Macy 	rw_enter(&os_lock, RW_READER);
977*eda14cbcSMatt Macy 	rw_exit(&os_lock);
978*eda14cbcSMatt Macy 
979*eda14cbcSMatt Macy 	kmem_free(os->os_obj_next_percpu,
980*eda14cbcSMatt Macy 	    os->os_obj_next_percpu_len * sizeof (os->os_obj_next_percpu[0]));
981*eda14cbcSMatt Macy 
982*eda14cbcSMatt Macy 	mutex_destroy(&os->os_lock);
983*eda14cbcSMatt Macy 	mutex_destroy(&os->os_userused_lock);
984*eda14cbcSMatt Macy 	mutex_destroy(&os->os_obj_lock);
985*eda14cbcSMatt Macy 	mutex_destroy(&os->os_user_ptr_lock);
986*eda14cbcSMatt Macy 	mutex_destroy(&os->os_upgrade_lock);
987*eda14cbcSMatt Macy 	for (int i = 0; i < TXG_SIZE; i++) {
988*eda14cbcSMatt Macy 		multilist_destroy(os->os_dirty_dnodes[i]);
989*eda14cbcSMatt Macy 	}
990*eda14cbcSMatt Macy 	spa_evicting_os_deregister(os->os_spa, os);
991*eda14cbcSMatt Macy 	kmem_free(os, sizeof (objset_t));
992*eda14cbcSMatt Macy }
993*eda14cbcSMatt Macy 
994*eda14cbcSMatt Macy inode_timespec_t
995*eda14cbcSMatt Macy dmu_objset_snap_cmtime(objset_t *os)
996*eda14cbcSMatt Macy {
997*eda14cbcSMatt Macy 	return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
998*eda14cbcSMatt Macy }
999*eda14cbcSMatt Macy 
1000*eda14cbcSMatt Macy objset_t *
1001*eda14cbcSMatt Macy dmu_objset_create_impl_dnstats(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
1002*eda14cbcSMatt Macy     dmu_objset_type_t type, int levels, int blksz, int ibs, dmu_tx_t *tx)
1003*eda14cbcSMatt Macy {
1004*eda14cbcSMatt Macy 	objset_t *os;
1005*eda14cbcSMatt Macy 	dnode_t *mdn;
1006*eda14cbcSMatt Macy 
1007*eda14cbcSMatt Macy 	ASSERT(dmu_tx_is_syncing(tx));
1008*eda14cbcSMatt Macy 
1009*eda14cbcSMatt Macy 	if (blksz == 0)
1010*eda14cbcSMatt Macy 		blksz = DNODE_BLOCK_SIZE;
1011*eda14cbcSMatt Macy 	if (ibs == 0)
1012*eda14cbcSMatt Macy 		ibs = DN_MAX_INDBLKSHIFT;
1013*eda14cbcSMatt Macy 
1014*eda14cbcSMatt Macy 	if (ds != NULL)
1015*eda14cbcSMatt Macy 		VERIFY0(dmu_objset_from_ds(ds, &os));
1016*eda14cbcSMatt Macy 	else
1017*eda14cbcSMatt Macy 		VERIFY0(dmu_objset_open_impl(spa, NULL, bp, &os));
1018*eda14cbcSMatt Macy 
1019*eda14cbcSMatt Macy 	mdn = DMU_META_DNODE(os);
1020*eda14cbcSMatt Macy 
1021*eda14cbcSMatt Macy 	dnode_allocate(mdn, DMU_OT_DNODE, blksz, ibs, DMU_OT_NONE, 0,
1022*eda14cbcSMatt Macy 	    DNODE_MIN_SLOTS, tx);
1023*eda14cbcSMatt Macy 
1024*eda14cbcSMatt Macy 	/*
1025*eda14cbcSMatt Macy 	 * We don't want to have to increase the meta-dnode's nlevels
1026*eda14cbcSMatt Macy 	 * later, because then we could do it in quiescing context while
1027*eda14cbcSMatt Macy 	 * we are also accessing it in open context.
1028*eda14cbcSMatt Macy 	 *
1029*eda14cbcSMatt Macy 	 * This precaution is not necessary for the MOS (ds == NULL),
1030*eda14cbcSMatt Macy 	 * because the MOS is only updated in syncing context.
1031*eda14cbcSMatt Macy 	 * This is most fortunate: the MOS is the only objset that
1032*eda14cbcSMatt Macy 	 * needs to be synced multiple times as spa_sync() iterates
1033*eda14cbcSMatt Macy 	 * to convergence, so minimizing its dn_nlevels matters.
1034*eda14cbcSMatt Macy 	 */
1035*eda14cbcSMatt Macy 	if (ds != NULL) {
1036*eda14cbcSMatt Macy 		if (levels == 0) {
1037*eda14cbcSMatt Macy 			levels = 1;
1038*eda14cbcSMatt Macy 
1039*eda14cbcSMatt Macy 			/*
1040*eda14cbcSMatt Macy 			 * Determine the number of levels necessary for the
1041*eda14cbcSMatt Macy 			 * meta-dnode to contain DN_MAX_OBJECT dnodes.  Note
1042*eda14cbcSMatt Macy 			 * that in order to ensure that we do not overflow
1043*eda14cbcSMatt Macy 			 * 64 bits, there has to be a nlevels that gives us a
1044*eda14cbcSMatt Macy 			 * number of blocks > DN_MAX_OBJECT but < 2^64.
1045*eda14cbcSMatt Macy 			 * Therefore, (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)
1046*eda14cbcSMatt Macy 			 * (10) must be less than (64 - log2(DN_MAX_OBJECT))
1047*eda14cbcSMatt Macy 			 * (16).
1048*eda14cbcSMatt Macy 			 */
1049*eda14cbcSMatt Macy 			while ((uint64_t)mdn->dn_nblkptr <<
1050*eda14cbcSMatt Macy 			    (mdn->dn_datablkshift - DNODE_SHIFT + (levels - 1) *
1051*eda14cbcSMatt Macy 			    (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
1052*eda14cbcSMatt Macy 			    DN_MAX_OBJECT)
1053*eda14cbcSMatt Macy 				levels++;
1054*eda14cbcSMatt Macy 		}
1055*eda14cbcSMatt Macy 
1056*eda14cbcSMatt Macy 		mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =
1057*eda14cbcSMatt Macy 		    mdn->dn_nlevels = levels;
1058*eda14cbcSMatt Macy 	}
1059*eda14cbcSMatt Macy 
1060*eda14cbcSMatt Macy 	ASSERT(type != DMU_OST_NONE);
1061*eda14cbcSMatt Macy 	ASSERT(type != DMU_OST_ANY);
1062*eda14cbcSMatt Macy 	ASSERT(type < DMU_OST_NUMTYPES);
1063*eda14cbcSMatt Macy 	os->os_phys->os_type = type;
1064*eda14cbcSMatt Macy 
1065*eda14cbcSMatt Macy 	/*
1066*eda14cbcSMatt Macy 	 * Enable user accounting if it is enabled and this is not an
1067*eda14cbcSMatt Macy 	 * encrypted receive.
1068*eda14cbcSMatt Macy 	 */
1069*eda14cbcSMatt Macy 	if (dmu_objset_userused_enabled(os) &&
1070*eda14cbcSMatt Macy 	    (!os->os_encrypted || !dmu_objset_is_receiving(os))) {
1071*eda14cbcSMatt Macy 		os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
1072*eda14cbcSMatt Macy 		if (dmu_objset_userobjused_enabled(os)) {
1073*eda14cbcSMatt Macy 			ds->ds_feature_activation[
1074*eda14cbcSMatt Macy 			    SPA_FEATURE_USEROBJ_ACCOUNTING] = (void *)B_TRUE;
1075*eda14cbcSMatt Macy 			os->os_phys->os_flags |=
1076*eda14cbcSMatt Macy 			    OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE;
1077*eda14cbcSMatt Macy 		}
1078*eda14cbcSMatt Macy 		if (dmu_objset_projectquota_enabled(os)) {
1079*eda14cbcSMatt Macy 			ds->ds_feature_activation[
1080*eda14cbcSMatt Macy 			    SPA_FEATURE_PROJECT_QUOTA] = (void *)B_TRUE;
1081*eda14cbcSMatt Macy 			os->os_phys->os_flags |=
1082*eda14cbcSMatt Macy 			    OBJSET_FLAG_PROJECTQUOTA_COMPLETE;
1083*eda14cbcSMatt Macy 		}
1084*eda14cbcSMatt Macy 		os->os_flags = os->os_phys->os_flags;
1085*eda14cbcSMatt Macy 	}
1086*eda14cbcSMatt Macy 
1087*eda14cbcSMatt Macy 	dsl_dataset_dirty(ds, tx);
1088*eda14cbcSMatt Macy 
1089*eda14cbcSMatt Macy 	return (os);
1090*eda14cbcSMatt Macy }
1091*eda14cbcSMatt Macy 
1092*eda14cbcSMatt Macy /* called from dsl for meta-objset */
1093*eda14cbcSMatt Macy objset_t *
1094*eda14cbcSMatt Macy dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
1095*eda14cbcSMatt Macy     dmu_objset_type_t type, dmu_tx_t *tx)
1096*eda14cbcSMatt Macy {
1097*eda14cbcSMatt Macy 	return (dmu_objset_create_impl_dnstats(spa, ds, bp, type, 0, 0, 0, tx));
1098*eda14cbcSMatt Macy }
1099*eda14cbcSMatt Macy 
1100*eda14cbcSMatt Macy typedef struct dmu_objset_create_arg {
1101*eda14cbcSMatt Macy 	const char *doca_name;
1102*eda14cbcSMatt Macy 	cred_t *doca_cred;
1103*eda14cbcSMatt Macy 	proc_t *doca_proc;
1104*eda14cbcSMatt Macy 	void (*doca_userfunc)(objset_t *os, void *arg,
1105*eda14cbcSMatt Macy 	    cred_t *cr, dmu_tx_t *tx);
1106*eda14cbcSMatt Macy 	void *doca_userarg;
1107*eda14cbcSMatt Macy 	dmu_objset_type_t doca_type;
1108*eda14cbcSMatt Macy 	uint64_t doca_flags;
1109*eda14cbcSMatt Macy 	dsl_crypto_params_t *doca_dcp;
1110*eda14cbcSMatt Macy } dmu_objset_create_arg_t;
1111*eda14cbcSMatt Macy 
1112*eda14cbcSMatt Macy /*ARGSUSED*/
1113*eda14cbcSMatt Macy static int
1114*eda14cbcSMatt Macy dmu_objset_create_check(void *arg, dmu_tx_t *tx)
1115*eda14cbcSMatt Macy {
1116*eda14cbcSMatt Macy 	dmu_objset_create_arg_t *doca = arg;
1117*eda14cbcSMatt Macy 	dsl_pool_t *dp = dmu_tx_pool(tx);
1118*eda14cbcSMatt Macy 	dsl_dir_t *pdd;
1119*eda14cbcSMatt Macy 	dsl_dataset_t *parentds;
1120*eda14cbcSMatt Macy 	objset_t *parentos;
1121*eda14cbcSMatt Macy 	const char *tail;
1122*eda14cbcSMatt Macy 	int error;
1123*eda14cbcSMatt Macy 
1124*eda14cbcSMatt Macy 	if (strchr(doca->doca_name, '@') != NULL)
1125*eda14cbcSMatt Macy 		return (SET_ERROR(EINVAL));
1126*eda14cbcSMatt Macy 
1127*eda14cbcSMatt Macy 	if (strlen(doca->doca_name) >= ZFS_MAX_DATASET_NAME_LEN)
1128*eda14cbcSMatt Macy 		return (SET_ERROR(ENAMETOOLONG));
1129*eda14cbcSMatt Macy 
1130*eda14cbcSMatt Macy 	if (dataset_nestcheck(doca->doca_name) != 0)
1131*eda14cbcSMatt Macy 		return (SET_ERROR(ENAMETOOLONG));
1132*eda14cbcSMatt Macy 
1133*eda14cbcSMatt Macy 	error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail);
1134*eda14cbcSMatt Macy 	if (error != 0)
1135*eda14cbcSMatt Macy 		return (error);
1136*eda14cbcSMatt Macy 	if (tail == NULL) {
1137*eda14cbcSMatt Macy 		dsl_dir_rele(pdd, FTAG);
1138*eda14cbcSMatt Macy 		return (SET_ERROR(EEXIST));
1139*eda14cbcSMatt Macy 	}
1140*eda14cbcSMatt Macy 
1141*eda14cbcSMatt Macy 	error = dmu_objset_create_crypt_check(pdd, doca->doca_dcp, NULL);
1142*eda14cbcSMatt Macy 	if (error != 0) {
1143*eda14cbcSMatt Macy 		dsl_dir_rele(pdd, FTAG);
1144*eda14cbcSMatt Macy 		return (error);
1145*eda14cbcSMatt Macy 	}
1146*eda14cbcSMatt Macy 
1147*eda14cbcSMatt Macy 	error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
1148*eda14cbcSMatt Macy 	    doca->doca_cred, doca->doca_proc);
1149*eda14cbcSMatt Macy 	if (error != 0) {
1150*eda14cbcSMatt Macy 		dsl_dir_rele(pdd, FTAG);
1151*eda14cbcSMatt Macy 		return (error);
1152*eda14cbcSMatt Macy 	}
1153*eda14cbcSMatt Macy 
1154*eda14cbcSMatt Macy 	/* can't create below anything but filesystems (eg. no ZVOLs) */
1155*eda14cbcSMatt Macy 	error = dsl_dataset_hold_obj(pdd->dd_pool,
1156*eda14cbcSMatt Macy 	    dsl_dir_phys(pdd)->dd_head_dataset_obj, FTAG, &parentds);
1157*eda14cbcSMatt Macy 	if (error != 0) {
1158*eda14cbcSMatt Macy 		dsl_dir_rele(pdd, FTAG);
1159*eda14cbcSMatt Macy 		return (error);
1160*eda14cbcSMatt Macy 	}
1161*eda14cbcSMatt Macy 	error = dmu_objset_from_ds(parentds, &parentos);
1162*eda14cbcSMatt Macy 	if (error != 0) {
1163*eda14cbcSMatt Macy 		dsl_dataset_rele(parentds, FTAG);
1164*eda14cbcSMatt Macy 		dsl_dir_rele(pdd, FTAG);
1165*eda14cbcSMatt Macy 		return (error);
1166*eda14cbcSMatt Macy 	}
1167*eda14cbcSMatt Macy 	if (dmu_objset_type(parentos) != DMU_OST_ZFS) {
1168*eda14cbcSMatt Macy 		dsl_dataset_rele(parentds, FTAG);
1169*eda14cbcSMatt Macy 		dsl_dir_rele(pdd, FTAG);
1170*eda14cbcSMatt Macy 		return (SET_ERROR(ZFS_ERR_WRONG_PARENT));
1171*eda14cbcSMatt Macy 	}
1172*eda14cbcSMatt Macy 	dsl_dataset_rele(parentds, FTAG);
1173*eda14cbcSMatt Macy 	dsl_dir_rele(pdd, FTAG);
1174*eda14cbcSMatt Macy 
1175*eda14cbcSMatt Macy 	return (error);
1176*eda14cbcSMatt Macy }
1177*eda14cbcSMatt Macy 
1178*eda14cbcSMatt Macy static void
1179*eda14cbcSMatt Macy dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
1180*eda14cbcSMatt Macy {
1181*eda14cbcSMatt Macy 	dmu_objset_create_arg_t *doca = arg;
1182*eda14cbcSMatt Macy 	dsl_pool_t *dp = dmu_tx_pool(tx);
1183*eda14cbcSMatt Macy 	spa_t *spa = dp->dp_spa;
1184*eda14cbcSMatt Macy 	dsl_dir_t *pdd;
1185*eda14cbcSMatt Macy 	const char *tail;
1186*eda14cbcSMatt Macy 	dsl_dataset_t *ds;
1187*eda14cbcSMatt Macy 	uint64_t obj;
1188*eda14cbcSMatt Macy 	blkptr_t *bp;
1189*eda14cbcSMatt Macy 	objset_t *os;
1190*eda14cbcSMatt Macy 	zio_t *rzio;
1191*eda14cbcSMatt Macy 
1192*eda14cbcSMatt Macy 	VERIFY0(dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail));
1193*eda14cbcSMatt Macy 
1194*eda14cbcSMatt Macy 	obj = dsl_dataset_create_sync(pdd, tail, NULL, doca->doca_flags,
1195*eda14cbcSMatt Macy 	    doca->doca_cred, doca->doca_dcp, tx);
1196*eda14cbcSMatt Macy 
1197*eda14cbcSMatt Macy 	VERIFY0(dsl_dataset_hold_obj_flags(pdd->dd_pool, obj,
1198*eda14cbcSMatt Macy 	    DS_HOLD_FLAG_DECRYPT, FTAG, &ds));
1199*eda14cbcSMatt Macy 	rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
1200*eda14cbcSMatt Macy 	bp = dsl_dataset_get_blkptr(ds);
1201*eda14cbcSMatt Macy 	os = dmu_objset_create_impl(spa, ds, bp, doca->doca_type, tx);
1202*eda14cbcSMatt Macy 	rrw_exit(&ds->ds_bp_rwlock, FTAG);
1203*eda14cbcSMatt Macy 
1204*eda14cbcSMatt Macy 	if (doca->doca_userfunc != NULL) {
1205*eda14cbcSMatt Macy 		doca->doca_userfunc(os, doca->doca_userarg,
1206*eda14cbcSMatt Macy 		    doca->doca_cred, tx);
1207*eda14cbcSMatt Macy 	}
1208*eda14cbcSMatt Macy 
1209*eda14cbcSMatt Macy 	/*
1210*eda14cbcSMatt Macy 	 * The doca_userfunc() may write out some data that needs to be
1211*eda14cbcSMatt Macy 	 * encrypted if the dataset is encrypted (specifically the root
1212*eda14cbcSMatt Macy 	 * directory).  This data must be written out before the encryption
1213*eda14cbcSMatt Macy 	 * key mapping is removed by dsl_dataset_rele_flags().  Force the
1214*eda14cbcSMatt Macy 	 * I/O to occur immediately by invoking the relevant sections of
1215*eda14cbcSMatt Macy 	 * dsl_pool_sync().
1216*eda14cbcSMatt Macy 	 */
1217*eda14cbcSMatt Macy 	if (os->os_encrypted) {
1218*eda14cbcSMatt Macy 		dsl_dataset_t *tmpds = NULL;
1219*eda14cbcSMatt Macy 		boolean_t need_sync_done = B_FALSE;
1220*eda14cbcSMatt Macy 
1221*eda14cbcSMatt Macy 		mutex_enter(&ds->ds_lock);
1222*eda14cbcSMatt Macy 		ds->ds_owner = FTAG;
1223*eda14cbcSMatt Macy 		mutex_exit(&ds->ds_lock);
1224*eda14cbcSMatt Macy 
1225*eda14cbcSMatt Macy 		rzio = zio_root(spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1226*eda14cbcSMatt Macy 		tmpds = txg_list_remove_this(&dp->dp_dirty_datasets, ds,
1227*eda14cbcSMatt Macy 		    tx->tx_txg);
1228*eda14cbcSMatt Macy 		if (tmpds != NULL) {
1229*eda14cbcSMatt Macy 			dsl_dataset_sync(ds, rzio, tx);
1230*eda14cbcSMatt Macy 			need_sync_done = B_TRUE;
1231*eda14cbcSMatt Macy 		}
1232*eda14cbcSMatt Macy 		VERIFY0(zio_wait(rzio));
1233*eda14cbcSMatt Macy 
1234*eda14cbcSMatt Macy 		dmu_objset_do_userquota_updates(os, tx);
1235*eda14cbcSMatt Macy 		taskq_wait(dp->dp_sync_taskq);
1236*eda14cbcSMatt Macy 		if (txg_list_member(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
1237*eda14cbcSMatt Macy 			ASSERT3P(ds->ds_key_mapping, !=, NULL);
1238*eda14cbcSMatt Macy 			key_mapping_rele(spa, ds->ds_key_mapping, ds);
1239*eda14cbcSMatt Macy 		}
1240*eda14cbcSMatt Macy 
1241*eda14cbcSMatt Macy 		rzio = zio_root(spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1242*eda14cbcSMatt Macy 		tmpds = txg_list_remove_this(&dp->dp_dirty_datasets, ds,
1243*eda14cbcSMatt Macy 		    tx->tx_txg);
1244*eda14cbcSMatt Macy 		if (tmpds != NULL) {
1245*eda14cbcSMatt Macy 			dmu_buf_rele(ds->ds_dbuf, ds);
1246*eda14cbcSMatt Macy 			dsl_dataset_sync(ds, rzio, tx);
1247*eda14cbcSMatt Macy 		}
1248*eda14cbcSMatt Macy 		VERIFY0(zio_wait(rzio));
1249*eda14cbcSMatt Macy 
1250*eda14cbcSMatt Macy 		if (need_sync_done) {
1251*eda14cbcSMatt Macy 			ASSERT3P(ds->ds_key_mapping, !=, NULL);
1252*eda14cbcSMatt Macy 			key_mapping_rele(spa, ds->ds_key_mapping, ds);
1253*eda14cbcSMatt Macy 			dsl_dataset_sync_done(ds, tx);
1254*eda14cbcSMatt Macy 		}
1255*eda14cbcSMatt Macy 
1256*eda14cbcSMatt Macy 		mutex_enter(&ds->ds_lock);
1257*eda14cbcSMatt Macy 		ds->ds_owner = NULL;
1258*eda14cbcSMatt Macy 		mutex_exit(&ds->ds_lock);
1259*eda14cbcSMatt Macy 	}
1260*eda14cbcSMatt Macy 
1261*eda14cbcSMatt Macy 	spa_history_log_internal_ds(ds, "create", tx, " ");
1262*eda14cbcSMatt Macy 
1263*eda14cbcSMatt Macy 	dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
1264*eda14cbcSMatt Macy 	dsl_dir_rele(pdd, FTAG);
1265*eda14cbcSMatt Macy }
1266*eda14cbcSMatt Macy 
1267*eda14cbcSMatt Macy int
1268*eda14cbcSMatt Macy dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
1269*eda14cbcSMatt Macy     dsl_crypto_params_t *dcp, dmu_objset_create_sync_func_t func, void *arg)
1270*eda14cbcSMatt Macy {
1271*eda14cbcSMatt Macy 	dmu_objset_create_arg_t doca;
1272*eda14cbcSMatt Macy 	dsl_crypto_params_t tmp_dcp = { 0 };
1273*eda14cbcSMatt Macy 
1274*eda14cbcSMatt Macy 	doca.doca_name = name;
1275*eda14cbcSMatt Macy 	doca.doca_cred = CRED();
1276*eda14cbcSMatt Macy 	doca.doca_proc = curproc;
1277*eda14cbcSMatt Macy 	doca.doca_flags = flags;
1278*eda14cbcSMatt Macy 	doca.doca_userfunc = func;
1279*eda14cbcSMatt Macy 	doca.doca_userarg = arg;
1280*eda14cbcSMatt Macy 	doca.doca_type = type;
1281*eda14cbcSMatt Macy 
1282*eda14cbcSMatt Macy 	/*
1283*eda14cbcSMatt Macy 	 * Some callers (mostly for testing) do not provide a dcp on their
1284*eda14cbcSMatt Macy 	 * own but various code inside the sync task will require it to be
1285*eda14cbcSMatt Macy 	 * allocated. Rather than adding NULL checks throughout this code
1286*eda14cbcSMatt Macy 	 * or adding dummy dcp's to all of the callers we simply create a
1287*eda14cbcSMatt Macy 	 * dummy one here and use that. This zero dcp will have the same
1288*eda14cbcSMatt Macy 	 * effect as asking for inheritance of all encryption params.
1289*eda14cbcSMatt Macy 	 */
1290*eda14cbcSMatt Macy 	doca.doca_dcp = (dcp != NULL) ? dcp : &tmp_dcp;
1291*eda14cbcSMatt Macy 
1292*eda14cbcSMatt Macy 	int rv = dsl_sync_task(name,
1293*eda14cbcSMatt Macy 	    dmu_objset_create_check, dmu_objset_create_sync, &doca,
1294*eda14cbcSMatt Macy 	    6, ZFS_SPACE_CHECK_NORMAL);
1295*eda14cbcSMatt Macy 
1296*eda14cbcSMatt Macy 	if (rv == 0)
1297*eda14cbcSMatt Macy 		zvol_create_minor(name);
1298*eda14cbcSMatt Macy 	return (rv);
1299*eda14cbcSMatt Macy }
1300*eda14cbcSMatt Macy 
1301*eda14cbcSMatt Macy typedef struct dmu_objset_clone_arg {
1302*eda14cbcSMatt Macy 	const char *doca_clone;
1303*eda14cbcSMatt Macy 	const char *doca_origin;
1304*eda14cbcSMatt Macy 	cred_t *doca_cred;
1305*eda14cbcSMatt Macy 	proc_t *doca_proc;
1306*eda14cbcSMatt Macy } dmu_objset_clone_arg_t;
1307*eda14cbcSMatt Macy 
1308*eda14cbcSMatt Macy /*ARGSUSED*/
1309*eda14cbcSMatt Macy static int
1310*eda14cbcSMatt Macy dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
1311*eda14cbcSMatt Macy {
1312*eda14cbcSMatt Macy 	dmu_objset_clone_arg_t *doca = arg;
1313*eda14cbcSMatt Macy 	dsl_dir_t *pdd;
1314*eda14cbcSMatt Macy 	const char *tail;
1315*eda14cbcSMatt Macy 	int error;
1316*eda14cbcSMatt Macy 	dsl_dataset_t *origin;
1317*eda14cbcSMatt Macy 	dsl_pool_t *dp = dmu_tx_pool(tx);
1318*eda14cbcSMatt Macy 
1319*eda14cbcSMatt Macy 	if (strchr(doca->doca_clone, '@') != NULL)
1320*eda14cbcSMatt Macy 		return (SET_ERROR(EINVAL));
1321*eda14cbcSMatt Macy 
1322*eda14cbcSMatt Macy 	if (strlen(doca->doca_clone) >= ZFS_MAX_DATASET_NAME_LEN)
1323*eda14cbcSMatt Macy 		return (SET_ERROR(ENAMETOOLONG));
1324*eda14cbcSMatt Macy 
1325*eda14cbcSMatt Macy 	error = dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail);
1326*eda14cbcSMatt Macy 	if (error != 0)
1327*eda14cbcSMatt Macy 		return (error);
1328*eda14cbcSMatt Macy 	if (tail == NULL) {
1329*eda14cbcSMatt Macy 		dsl_dir_rele(pdd, FTAG);
1330*eda14cbcSMatt Macy 		return (SET_ERROR(EEXIST));
1331*eda14cbcSMatt Macy 	}
1332*eda14cbcSMatt Macy 
1333*eda14cbcSMatt Macy 	error = dsl_fs_ss_limit_check(pdd, 1, ZFS_PROP_FILESYSTEM_LIMIT, NULL,
1334*eda14cbcSMatt Macy 	    doca->doca_cred, doca->doca_proc);
1335*eda14cbcSMatt Macy 	if (error != 0) {
1336*eda14cbcSMatt Macy 		dsl_dir_rele(pdd, FTAG);
1337*eda14cbcSMatt Macy 		return (SET_ERROR(EDQUOT));
1338*eda14cbcSMatt Macy 	}
1339*eda14cbcSMatt Macy 
1340*eda14cbcSMatt Macy 	error = dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin);
1341*eda14cbcSMatt Macy 	if (error != 0) {
1342*eda14cbcSMatt Macy 		dsl_dir_rele(pdd, FTAG);
1343*eda14cbcSMatt Macy 		return (error);
1344*eda14cbcSMatt Macy 	}
1345*eda14cbcSMatt Macy 
1346*eda14cbcSMatt Macy 	/* You can only clone snapshots, not the head datasets. */
1347*eda14cbcSMatt Macy 	if (!origin->ds_is_snapshot) {
1348*eda14cbcSMatt Macy 		dsl_dataset_rele(origin, FTAG);
1349*eda14cbcSMatt Macy 		dsl_dir_rele(pdd, FTAG);
1350*eda14cbcSMatt Macy 		return (SET_ERROR(EINVAL));
1351*eda14cbcSMatt Macy 	}
1352*eda14cbcSMatt Macy 
1353*eda14cbcSMatt Macy 	dsl_dataset_rele(origin, FTAG);
1354*eda14cbcSMatt Macy 	dsl_dir_rele(pdd, FTAG);
1355*eda14cbcSMatt Macy 
1356*eda14cbcSMatt Macy 	return (0);
1357*eda14cbcSMatt Macy }
1358*eda14cbcSMatt Macy 
1359*eda14cbcSMatt Macy static void
1360*eda14cbcSMatt Macy dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
1361*eda14cbcSMatt Macy {
1362*eda14cbcSMatt Macy 	dmu_objset_clone_arg_t *doca = arg;
1363*eda14cbcSMatt Macy 	dsl_pool_t *dp = dmu_tx_pool(tx);
1364*eda14cbcSMatt Macy 	dsl_dir_t *pdd;
1365*eda14cbcSMatt Macy 	const char *tail;
1366*eda14cbcSMatt Macy 	dsl_dataset_t *origin, *ds;
1367*eda14cbcSMatt Macy 	uint64_t obj;
1368*eda14cbcSMatt Macy 	char namebuf[ZFS_MAX_DATASET_NAME_LEN];
1369*eda14cbcSMatt Macy 
1370*eda14cbcSMatt Macy 	VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail));
1371*eda14cbcSMatt Macy 	VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin));
1372*eda14cbcSMatt Macy 
1373*eda14cbcSMatt Macy 	obj = dsl_dataset_create_sync(pdd, tail, origin, 0,
1374*eda14cbcSMatt Macy 	    doca->doca_cred, NULL, tx);
1375*eda14cbcSMatt Macy 
1376*eda14cbcSMatt Macy 	VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
1377*eda14cbcSMatt Macy 	dsl_dataset_name(origin, namebuf);
1378*eda14cbcSMatt Macy 	spa_history_log_internal_ds(ds, "clone", tx,
1379*eda14cbcSMatt Macy 	    "origin=%s (%llu)", namebuf, (u_longlong_t)origin->ds_object);
1380*eda14cbcSMatt Macy 	dsl_dataset_rele(ds, FTAG);
1381*eda14cbcSMatt Macy 	dsl_dataset_rele(origin, FTAG);
1382*eda14cbcSMatt Macy 	dsl_dir_rele(pdd, FTAG);
1383*eda14cbcSMatt Macy }
1384*eda14cbcSMatt Macy 
1385*eda14cbcSMatt Macy int
1386*eda14cbcSMatt Macy dmu_objset_clone(const char *clone, const char *origin)
1387*eda14cbcSMatt Macy {
1388*eda14cbcSMatt Macy 	dmu_objset_clone_arg_t doca;
1389*eda14cbcSMatt Macy 
1390*eda14cbcSMatt Macy 	doca.doca_clone = clone;
1391*eda14cbcSMatt Macy 	doca.doca_origin = origin;
1392*eda14cbcSMatt Macy 	doca.doca_cred = CRED();
1393*eda14cbcSMatt Macy 	doca.doca_proc = curproc;
1394*eda14cbcSMatt Macy 
1395*eda14cbcSMatt Macy 	int rv = dsl_sync_task(clone,
1396*eda14cbcSMatt Macy 	    dmu_objset_clone_check, dmu_objset_clone_sync, &doca,
1397*eda14cbcSMatt Macy 	    6, ZFS_SPACE_CHECK_NORMAL);
1398*eda14cbcSMatt Macy 
1399*eda14cbcSMatt Macy 	if (rv == 0)
1400*eda14cbcSMatt Macy 		zvol_create_minor(clone);
1401*eda14cbcSMatt Macy 
1402*eda14cbcSMatt Macy 	return (rv);
1403*eda14cbcSMatt Macy }
1404*eda14cbcSMatt Macy 
1405*eda14cbcSMatt Macy int
1406*eda14cbcSMatt Macy dmu_objset_snapshot_one(const char *fsname, const char *snapname)
1407*eda14cbcSMatt Macy {
1408*eda14cbcSMatt Macy 	int err;
1409*eda14cbcSMatt Macy 	char *longsnap = kmem_asprintf("%s@%s", fsname, snapname);
1410*eda14cbcSMatt Macy 	nvlist_t *snaps = fnvlist_alloc();
1411*eda14cbcSMatt Macy 
1412*eda14cbcSMatt Macy 	fnvlist_add_boolean(snaps, longsnap);
1413*eda14cbcSMatt Macy 	kmem_strfree(longsnap);
1414*eda14cbcSMatt Macy 	err = dsl_dataset_snapshot(snaps, NULL, NULL);
1415*eda14cbcSMatt Macy 	fnvlist_free(snaps);
1416*eda14cbcSMatt Macy 	return (err);
1417*eda14cbcSMatt Macy }
1418*eda14cbcSMatt Macy 
1419*eda14cbcSMatt Macy static void
1420*eda14cbcSMatt Macy dmu_objset_upgrade_task_cb(void *data)
1421*eda14cbcSMatt Macy {
1422*eda14cbcSMatt Macy 	objset_t *os = data;
1423*eda14cbcSMatt Macy 
1424*eda14cbcSMatt Macy 	mutex_enter(&os->os_upgrade_lock);
1425*eda14cbcSMatt Macy 	os->os_upgrade_status = EINTR;
1426*eda14cbcSMatt Macy 	if (!os->os_upgrade_exit) {
1427*eda14cbcSMatt Macy 		mutex_exit(&os->os_upgrade_lock);
1428*eda14cbcSMatt Macy 
1429*eda14cbcSMatt Macy 		os->os_upgrade_status = os->os_upgrade_cb(os);
1430*eda14cbcSMatt Macy 		mutex_enter(&os->os_upgrade_lock);
1431*eda14cbcSMatt Macy 	}
1432*eda14cbcSMatt Macy 	os->os_upgrade_exit = B_TRUE;
1433*eda14cbcSMatt Macy 	os->os_upgrade_id = 0;
1434*eda14cbcSMatt Macy 	mutex_exit(&os->os_upgrade_lock);
1435*eda14cbcSMatt Macy 	dsl_dataset_long_rele(dmu_objset_ds(os), upgrade_tag);
1436*eda14cbcSMatt Macy }
1437*eda14cbcSMatt Macy 
1438*eda14cbcSMatt Macy static void
1439*eda14cbcSMatt Macy dmu_objset_upgrade(objset_t *os, dmu_objset_upgrade_cb_t cb)
1440*eda14cbcSMatt Macy {
1441*eda14cbcSMatt Macy 	if (os->os_upgrade_id != 0)
1442*eda14cbcSMatt Macy 		return;
1443*eda14cbcSMatt Macy 
1444*eda14cbcSMatt Macy 	ASSERT(dsl_pool_config_held(dmu_objset_pool(os)));
1445*eda14cbcSMatt Macy 	dsl_dataset_long_hold(dmu_objset_ds(os), upgrade_tag);
1446*eda14cbcSMatt Macy 
1447*eda14cbcSMatt Macy 	mutex_enter(&os->os_upgrade_lock);
1448*eda14cbcSMatt Macy 	if (os->os_upgrade_id == 0 && os->os_upgrade_status == 0) {
1449*eda14cbcSMatt Macy 		os->os_upgrade_exit = B_FALSE;
1450*eda14cbcSMatt Macy 		os->os_upgrade_cb = cb;
1451*eda14cbcSMatt Macy 		os->os_upgrade_id = taskq_dispatch(
1452*eda14cbcSMatt Macy 		    os->os_spa->spa_upgrade_taskq,
1453*eda14cbcSMatt Macy 		    dmu_objset_upgrade_task_cb, os, TQ_SLEEP);
1454*eda14cbcSMatt Macy 		if (os->os_upgrade_id == TASKQID_INVALID) {
1455*eda14cbcSMatt Macy 			dsl_dataset_long_rele(dmu_objset_ds(os), upgrade_tag);
1456*eda14cbcSMatt Macy 			os->os_upgrade_status = ENOMEM;
1457*eda14cbcSMatt Macy 		}
1458*eda14cbcSMatt Macy 	}
1459*eda14cbcSMatt Macy 	mutex_exit(&os->os_upgrade_lock);
1460*eda14cbcSMatt Macy }
1461*eda14cbcSMatt Macy 
1462*eda14cbcSMatt Macy static void
1463*eda14cbcSMatt Macy dmu_objset_upgrade_stop(objset_t *os)
1464*eda14cbcSMatt Macy {
1465*eda14cbcSMatt Macy 	mutex_enter(&os->os_upgrade_lock);
1466*eda14cbcSMatt Macy 	os->os_upgrade_exit = B_TRUE;
1467*eda14cbcSMatt Macy 	if (os->os_upgrade_id != 0) {
1468*eda14cbcSMatt Macy 		taskqid_t id = os->os_upgrade_id;
1469*eda14cbcSMatt Macy 
1470*eda14cbcSMatt Macy 		os->os_upgrade_id = 0;
1471*eda14cbcSMatt Macy 		mutex_exit(&os->os_upgrade_lock);
1472*eda14cbcSMatt Macy 
1473*eda14cbcSMatt Macy 		if ((taskq_cancel_id(os->os_spa->spa_upgrade_taskq, id)) == 0) {
1474*eda14cbcSMatt Macy 			dsl_dataset_long_rele(dmu_objset_ds(os), upgrade_tag);
1475*eda14cbcSMatt Macy 		}
1476*eda14cbcSMatt Macy 		txg_wait_synced(os->os_spa->spa_dsl_pool, 0);
1477*eda14cbcSMatt Macy 	} else {
1478*eda14cbcSMatt Macy 		mutex_exit(&os->os_upgrade_lock);
1479*eda14cbcSMatt Macy 	}
1480*eda14cbcSMatt Macy }
1481*eda14cbcSMatt Macy 
1482*eda14cbcSMatt Macy static void
1483*eda14cbcSMatt Macy dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_tx_t *tx)
1484*eda14cbcSMatt Macy {
1485*eda14cbcSMatt Macy 	dnode_t *dn;
1486*eda14cbcSMatt Macy 
1487*eda14cbcSMatt Macy 	while ((dn = multilist_sublist_head(list)) != NULL) {
1488*eda14cbcSMatt Macy 		ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
1489*eda14cbcSMatt Macy 		ASSERT(dn->dn_dbuf->db_data_pending);
1490*eda14cbcSMatt Macy 		/*
1491*eda14cbcSMatt Macy 		 * Initialize dn_zio outside dnode_sync() because the
1492*eda14cbcSMatt Macy 		 * meta-dnode needs to set it outside dnode_sync().
1493*eda14cbcSMatt Macy 		 */
1494*eda14cbcSMatt Macy 		dn->dn_zio = dn->dn_dbuf->db_data_pending->dr_zio;
1495*eda14cbcSMatt Macy 		ASSERT(dn->dn_zio);
1496*eda14cbcSMatt Macy 
1497*eda14cbcSMatt Macy 		ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
1498*eda14cbcSMatt Macy 		multilist_sublist_remove(list, dn);
1499*eda14cbcSMatt Macy 
1500*eda14cbcSMatt Macy 		/*
1501*eda14cbcSMatt Macy 		 * If we are not doing useraccounting (os_synced_dnodes == NULL)
1502*eda14cbcSMatt Macy 		 * we are done with this dnode for this txg. Unset dn_dirty_txg
1503*eda14cbcSMatt Macy 		 * if later txgs aren't dirtying it so that future holders do
1504*eda14cbcSMatt Macy 		 * not get a stale value. Otherwise, we will do this in
1505*eda14cbcSMatt Macy 		 * userquota_updates_task() when processing has completely
1506*eda14cbcSMatt Macy 		 * finished for this txg.
1507*eda14cbcSMatt Macy 		 */
1508*eda14cbcSMatt Macy 		multilist_t *newlist = dn->dn_objset->os_synced_dnodes;
1509*eda14cbcSMatt Macy 		if (newlist != NULL) {
1510*eda14cbcSMatt Macy 			(void) dnode_add_ref(dn, newlist);
1511*eda14cbcSMatt Macy 			multilist_insert(newlist, dn);
1512*eda14cbcSMatt Macy 		} else {
1513*eda14cbcSMatt Macy 			mutex_enter(&dn->dn_mtx);
1514*eda14cbcSMatt Macy 			if (dn->dn_dirty_txg == tx->tx_txg)
1515*eda14cbcSMatt Macy 				dn->dn_dirty_txg = 0;
1516*eda14cbcSMatt Macy 			mutex_exit(&dn->dn_mtx);
1517*eda14cbcSMatt Macy 		}
1518*eda14cbcSMatt Macy 
1519*eda14cbcSMatt Macy 		dnode_sync(dn, tx);
1520*eda14cbcSMatt Macy 	}
1521*eda14cbcSMatt Macy }
1522*eda14cbcSMatt Macy 
1523*eda14cbcSMatt Macy /* ARGSUSED */
1524*eda14cbcSMatt Macy static void
1525*eda14cbcSMatt Macy dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
1526*eda14cbcSMatt Macy {
1527*eda14cbcSMatt Macy 	blkptr_t *bp = zio->io_bp;
1528*eda14cbcSMatt Macy 	objset_t *os = arg;
1529*eda14cbcSMatt Macy 	dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
1530*eda14cbcSMatt Macy 	uint64_t fill = 0;
1531*eda14cbcSMatt Macy 
1532*eda14cbcSMatt Macy 	ASSERT(!BP_IS_EMBEDDED(bp));
1533*eda14cbcSMatt Macy 	ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
1534*eda14cbcSMatt Macy 	ASSERT0(BP_GET_LEVEL(bp));
1535*eda14cbcSMatt Macy 
1536*eda14cbcSMatt Macy 	/*
1537*eda14cbcSMatt Macy 	 * Update rootbp fill count: it should be the number of objects
1538*eda14cbcSMatt Macy 	 * allocated in the object set (not counting the "special"
1539*eda14cbcSMatt Macy 	 * objects that are stored in the objset_phys_t -- the meta
1540*eda14cbcSMatt Macy 	 * dnode and user/group/project accounting objects).
1541*eda14cbcSMatt Macy 	 */
1542*eda14cbcSMatt Macy 	for (int i = 0; i < dnp->dn_nblkptr; i++)
1543*eda14cbcSMatt Macy 		fill += BP_GET_FILL(&dnp->dn_blkptr[i]);
1544*eda14cbcSMatt Macy 
1545*eda14cbcSMatt Macy 	BP_SET_FILL(bp, fill);
1546*eda14cbcSMatt Macy 
1547*eda14cbcSMatt Macy 	if (os->os_dsl_dataset != NULL)
1548*eda14cbcSMatt Macy 		rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_WRITER, FTAG);
1549*eda14cbcSMatt Macy 	*os->os_rootbp = *bp;
1550*eda14cbcSMatt Macy 	if (os->os_dsl_dataset != NULL)
1551*eda14cbcSMatt Macy 		rrw_exit(&os->os_dsl_dataset->ds_bp_rwlock, FTAG);
1552*eda14cbcSMatt Macy }
1553*eda14cbcSMatt Macy 
1554*eda14cbcSMatt Macy /* ARGSUSED */
1555*eda14cbcSMatt Macy static void
1556*eda14cbcSMatt Macy dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg)
1557*eda14cbcSMatt Macy {
1558*eda14cbcSMatt Macy 	blkptr_t *bp = zio->io_bp;
1559*eda14cbcSMatt Macy 	blkptr_t *bp_orig = &zio->io_bp_orig;
1560*eda14cbcSMatt Macy 	objset_t *os = arg;
1561*eda14cbcSMatt Macy 
1562*eda14cbcSMatt Macy 	if (zio->io_flags & ZIO_FLAG_IO_REWRITE) {
1563*eda14cbcSMatt Macy 		ASSERT(BP_EQUAL(bp, bp_orig));
1564*eda14cbcSMatt Macy 	} else {
1565*eda14cbcSMatt Macy 		dsl_dataset_t *ds = os->os_dsl_dataset;
1566*eda14cbcSMatt Macy 		dmu_tx_t *tx = os->os_synctx;
1567*eda14cbcSMatt Macy 
1568*eda14cbcSMatt Macy 		(void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
1569*eda14cbcSMatt Macy 		dsl_dataset_block_born(ds, bp, tx);
1570*eda14cbcSMatt Macy 	}
1571*eda14cbcSMatt Macy 	kmem_free(bp, sizeof (*bp));
1572*eda14cbcSMatt Macy }
1573*eda14cbcSMatt Macy 
1574*eda14cbcSMatt Macy typedef struct sync_dnodes_arg {
1575*eda14cbcSMatt Macy 	multilist_t *sda_list;
1576*eda14cbcSMatt Macy 	int sda_sublist_idx;
1577*eda14cbcSMatt Macy 	multilist_t *sda_newlist;
1578*eda14cbcSMatt Macy 	dmu_tx_t *sda_tx;
1579*eda14cbcSMatt Macy } sync_dnodes_arg_t;
1580*eda14cbcSMatt Macy 
1581*eda14cbcSMatt Macy static void
1582*eda14cbcSMatt Macy sync_dnodes_task(void *arg)
1583*eda14cbcSMatt Macy {
1584*eda14cbcSMatt Macy 	sync_dnodes_arg_t *sda = arg;
1585*eda14cbcSMatt Macy 
1586*eda14cbcSMatt Macy 	multilist_sublist_t *ms =
1587*eda14cbcSMatt Macy 	    multilist_sublist_lock(sda->sda_list, sda->sda_sublist_idx);
1588*eda14cbcSMatt Macy 
1589*eda14cbcSMatt Macy 	dmu_objset_sync_dnodes(ms, sda->sda_tx);
1590*eda14cbcSMatt Macy 
1591*eda14cbcSMatt Macy 	multilist_sublist_unlock(ms);
1592*eda14cbcSMatt Macy 
1593*eda14cbcSMatt Macy 	kmem_free(sda, sizeof (*sda));
1594*eda14cbcSMatt Macy }
1595*eda14cbcSMatt Macy 
1596*eda14cbcSMatt Macy 
1597*eda14cbcSMatt Macy /* called from dsl */
1598*eda14cbcSMatt Macy void
1599*eda14cbcSMatt Macy dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
1600*eda14cbcSMatt Macy {
1601*eda14cbcSMatt Macy 	int txgoff;
1602*eda14cbcSMatt Macy 	zbookmark_phys_t zb;
1603*eda14cbcSMatt Macy 	zio_prop_t zp;
1604*eda14cbcSMatt Macy 	zio_t *zio;
1605*eda14cbcSMatt Macy 	list_t *list;
1606*eda14cbcSMatt Macy 	dbuf_dirty_record_t *dr;
1607*eda14cbcSMatt Macy 	int num_sublists;
1608*eda14cbcSMatt Macy 	multilist_t *ml;
1609*eda14cbcSMatt Macy 	blkptr_t *blkptr_copy = kmem_alloc(sizeof (*os->os_rootbp), KM_SLEEP);
1610*eda14cbcSMatt Macy 	*blkptr_copy = *os->os_rootbp;
1611*eda14cbcSMatt Macy 
1612*eda14cbcSMatt Macy 	dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
1613*eda14cbcSMatt Macy 
1614*eda14cbcSMatt Macy 	ASSERT(dmu_tx_is_syncing(tx));
1615*eda14cbcSMatt Macy 	/* XXX the write_done callback should really give us the tx... */
1616*eda14cbcSMatt Macy 	os->os_synctx = tx;
1617*eda14cbcSMatt Macy 
1618*eda14cbcSMatt Macy 	if (os->os_dsl_dataset == NULL) {
1619*eda14cbcSMatt Macy 		/*
1620*eda14cbcSMatt Macy 		 * This is the MOS.  If we have upgraded,
1621*eda14cbcSMatt Macy 		 * spa_max_replication() could change, so reset
1622*eda14cbcSMatt Macy 		 * os_copies here.
1623*eda14cbcSMatt Macy 		 */
1624*eda14cbcSMatt Macy 		os->os_copies = spa_max_replication(os->os_spa);
1625*eda14cbcSMatt Macy 	}
1626*eda14cbcSMatt Macy 
1627*eda14cbcSMatt Macy 	/*
1628*eda14cbcSMatt Macy 	 * Create the root block IO
1629*eda14cbcSMatt Macy 	 */
1630*eda14cbcSMatt Macy 	SET_BOOKMARK(&zb, os->os_dsl_dataset ?
1631*eda14cbcSMatt Macy 	    os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
1632*eda14cbcSMatt Macy 	    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
1633*eda14cbcSMatt Macy 	arc_release(os->os_phys_buf, &os->os_phys_buf);
1634*eda14cbcSMatt Macy 
1635*eda14cbcSMatt Macy 	dmu_write_policy(os, NULL, 0, 0, &zp);
1636*eda14cbcSMatt Macy 
1637*eda14cbcSMatt Macy 	/*
1638*eda14cbcSMatt Macy 	 * If we are either claiming the ZIL or doing a raw receive, write
1639*eda14cbcSMatt Macy 	 * out the os_phys_buf raw. Neither of these actions will effect the
1640*eda14cbcSMatt Macy 	 * MAC at this point.
1641*eda14cbcSMatt Macy 	 */
1642*eda14cbcSMatt Macy 	if (os->os_raw_receive ||
1643*eda14cbcSMatt Macy 	    os->os_next_write_raw[tx->tx_txg & TXG_MASK]) {
1644*eda14cbcSMatt Macy 		ASSERT(os->os_encrypted);
1645*eda14cbcSMatt Macy 		arc_convert_to_raw(os->os_phys_buf,
1646*eda14cbcSMatt Macy 		    os->os_dsl_dataset->ds_object, ZFS_HOST_BYTEORDER,
1647*eda14cbcSMatt Macy 		    DMU_OT_OBJSET, NULL, NULL, NULL);
1648*eda14cbcSMatt Macy 	}
1649*eda14cbcSMatt Macy 
1650*eda14cbcSMatt Macy 	zio = arc_write(pio, os->os_spa, tx->tx_txg,
1651*eda14cbcSMatt Macy 	    blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
1652*eda14cbcSMatt Macy 	    &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
1653*eda14cbcSMatt Macy 	    os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
1654*eda14cbcSMatt Macy 
1655*eda14cbcSMatt Macy 	/*
1656*eda14cbcSMatt Macy 	 * Sync special dnodes - the parent IO for the sync is the root block
1657*eda14cbcSMatt Macy 	 */
1658*eda14cbcSMatt Macy 	DMU_META_DNODE(os)->dn_zio = zio;
1659*eda14cbcSMatt Macy 	dnode_sync(DMU_META_DNODE(os), tx);
1660*eda14cbcSMatt Macy 
1661*eda14cbcSMatt Macy 	os->os_phys->os_flags = os->os_flags;
1662*eda14cbcSMatt Macy 
1663*eda14cbcSMatt Macy 	if (DMU_USERUSED_DNODE(os) &&
1664*eda14cbcSMatt Macy 	    DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) {
1665*eda14cbcSMatt Macy 		DMU_USERUSED_DNODE(os)->dn_zio = zio;
1666*eda14cbcSMatt Macy 		dnode_sync(DMU_USERUSED_DNODE(os), tx);
1667*eda14cbcSMatt Macy 		DMU_GROUPUSED_DNODE(os)->dn_zio = zio;
1668*eda14cbcSMatt Macy 		dnode_sync(DMU_GROUPUSED_DNODE(os), tx);
1669*eda14cbcSMatt Macy 	}
1670*eda14cbcSMatt Macy 
1671*eda14cbcSMatt Macy 	if (DMU_PROJECTUSED_DNODE(os) &&
1672*eda14cbcSMatt Macy 	    DMU_PROJECTUSED_DNODE(os)->dn_type != DMU_OT_NONE) {
1673*eda14cbcSMatt Macy 		DMU_PROJECTUSED_DNODE(os)->dn_zio = zio;
1674*eda14cbcSMatt Macy 		dnode_sync(DMU_PROJECTUSED_DNODE(os), tx);
1675*eda14cbcSMatt Macy 	}
1676*eda14cbcSMatt Macy 
1677*eda14cbcSMatt Macy 	txgoff = tx->tx_txg & TXG_MASK;
1678*eda14cbcSMatt Macy 
1679*eda14cbcSMatt Macy 	if (dmu_objset_userused_enabled(os) &&
1680*eda14cbcSMatt Macy 	    (!os->os_encrypted || !dmu_objset_is_receiving(os))) {
1681*eda14cbcSMatt Macy 		/*
1682*eda14cbcSMatt Macy 		 * We must create the list here because it uses the
1683*eda14cbcSMatt Macy 		 * dn_dirty_link[] of this txg.  But it may already
1684*eda14cbcSMatt Macy 		 * exist because we call dsl_dataset_sync() twice per txg.
1685*eda14cbcSMatt Macy 		 */
1686*eda14cbcSMatt Macy 		if (os->os_synced_dnodes == NULL) {
1687*eda14cbcSMatt Macy 			os->os_synced_dnodes =
1688*eda14cbcSMatt Macy 			    multilist_create(sizeof (dnode_t),
1689*eda14cbcSMatt Macy 			    offsetof(dnode_t, dn_dirty_link[txgoff]),
1690*eda14cbcSMatt Macy 			    dnode_multilist_index_func);
1691*eda14cbcSMatt Macy 		} else {
1692*eda14cbcSMatt Macy 			ASSERT3U(os->os_synced_dnodes->ml_offset, ==,
1693*eda14cbcSMatt Macy 			    offsetof(dnode_t, dn_dirty_link[txgoff]));
1694*eda14cbcSMatt Macy 		}
1695*eda14cbcSMatt Macy 	}
1696*eda14cbcSMatt Macy 
1697*eda14cbcSMatt Macy 	ml = os->os_dirty_dnodes[txgoff];
1698*eda14cbcSMatt Macy 	num_sublists = multilist_get_num_sublists(ml);
1699*eda14cbcSMatt Macy 	for (int i = 0; i < num_sublists; i++) {
1700*eda14cbcSMatt Macy 		if (multilist_sublist_is_empty_idx(ml, i))
1701*eda14cbcSMatt Macy 			continue;
1702*eda14cbcSMatt Macy 		sync_dnodes_arg_t *sda = kmem_alloc(sizeof (*sda), KM_SLEEP);
1703*eda14cbcSMatt Macy 		sda->sda_list = ml;
1704*eda14cbcSMatt Macy 		sda->sda_sublist_idx = i;
1705*eda14cbcSMatt Macy 		sda->sda_tx = tx;
1706*eda14cbcSMatt Macy 		(void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq,
1707*eda14cbcSMatt Macy 		    sync_dnodes_task, sda, 0);
1708*eda14cbcSMatt Macy 		/* callback frees sda */
1709*eda14cbcSMatt Macy 	}
1710*eda14cbcSMatt Macy 	taskq_wait(dmu_objset_pool(os)->dp_sync_taskq);
1711*eda14cbcSMatt Macy 
1712*eda14cbcSMatt Macy 	list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
1713*eda14cbcSMatt Macy 	while ((dr = list_head(list)) != NULL) {
1714*eda14cbcSMatt Macy 		ASSERT0(dr->dr_dbuf->db_level);
1715*eda14cbcSMatt Macy 		list_remove(list, dr);
1716*eda14cbcSMatt Macy 		zio_nowait(dr->dr_zio);
1717*eda14cbcSMatt Macy 	}
1718*eda14cbcSMatt Macy 
1719*eda14cbcSMatt Macy 	/* Enable dnode backfill if enough objects have been freed. */
1720*eda14cbcSMatt Macy 	if (os->os_freed_dnodes >= dmu_rescan_dnode_threshold) {
1721*eda14cbcSMatt Macy 		os->os_rescan_dnodes = B_TRUE;
1722*eda14cbcSMatt Macy 		os->os_freed_dnodes = 0;
1723*eda14cbcSMatt Macy 	}
1724*eda14cbcSMatt Macy 
1725*eda14cbcSMatt Macy 	/*
1726*eda14cbcSMatt Macy 	 * Free intent log blocks up to this tx.
1727*eda14cbcSMatt Macy 	 */
1728*eda14cbcSMatt Macy 	zil_sync(os->os_zil, tx);
1729*eda14cbcSMatt Macy 	os->os_phys->os_zil_header = os->os_zil_header;
1730*eda14cbcSMatt Macy 	zio_nowait(zio);
1731*eda14cbcSMatt Macy }
1732*eda14cbcSMatt Macy 
1733*eda14cbcSMatt Macy boolean_t
1734*eda14cbcSMatt Macy dmu_objset_is_dirty(objset_t *os, uint64_t txg)
1735*eda14cbcSMatt Macy {
1736*eda14cbcSMatt Macy 	return (!multilist_is_empty(os->os_dirty_dnodes[txg & TXG_MASK]));
1737*eda14cbcSMatt Macy }
1738*eda14cbcSMatt Macy 
1739*eda14cbcSMatt Macy static file_info_cb_t *file_cbs[DMU_OST_NUMTYPES];
1740*eda14cbcSMatt Macy 
1741*eda14cbcSMatt Macy void
1742*eda14cbcSMatt Macy dmu_objset_register_type(dmu_objset_type_t ost, file_info_cb_t *cb)
1743*eda14cbcSMatt Macy {
1744*eda14cbcSMatt Macy 	file_cbs[ost] = cb;
1745*eda14cbcSMatt Macy }
1746*eda14cbcSMatt Macy 
1747*eda14cbcSMatt Macy int
1748*eda14cbcSMatt Macy dmu_get_file_info(objset_t *os, dmu_object_type_t bonustype, const void *data,
1749*eda14cbcSMatt Macy     zfs_file_info_t *zfi)
1750*eda14cbcSMatt Macy {
1751*eda14cbcSMatt Macy 	file_info_cb_t *cb = file_cbs[os->os_phys->os_type];
1752*eda14cbcSMatt Macy 	if (cb == NULL)
1753*eda14cbcSMatt Macy 		return (EINVAL);
1754*eda14cbcSMatt Macy 	return (cb(bonustype, data, zfi));
1755*eda14cbcSMatt Macy }
1756*eda14cbcSMatt Macy 
1757*eda14cbcSMatt Macy boolean_t
1758*eda14cbcSMatt Macy dmu_objset_userused_enabled(objset_t *os)
1759*eda14cbcSMatt Macy {
1760*eda14cbcSMatt Macy 	return (spa_version(os->os_spa) >= SPA_VERSION_USERSPACE &&
1761*eda14cbcSMatt Macy 	    file_cbs[os->os_phys->os_type] != NULL &&
1762*eda14cbcSMatt Macy 	    DMU_USERUSED_DNODE(os) != NULL);
1763*eda14cbcSMatt Macy }
1764*eda14cbcSMatt Macy 
1765*eda14cbcSMatt Macy boolean_t
1766*eda14cbcSMatt Macy dmu_objset_userobjused_enabled(objset_t *os)
1767*eda14cbcSMatt Macy {
1768*eda14cbcSMatt Macy 	return (dmu_objset_userused_enabled(os) &&
1769*eda14cbcSMatt Macy 	    spa_feature_is_enabled(os->os_spa, SPA_FEATURE_USEROBJ_ACCOUNTING));
1770*eda14cbcSMatt Macy }
1771*eda14cbcSMatt Macy 
1772*eda14cbcSMatt Macy boolean_t
1773*eda14cbcSMatt Macy dmu_objset_projectquota_enabled(objset_t *os)
1774*eda14cbcSMatt Macy {
1775*eda14cbcSMatt Macy 	return (file_cbs[os->os_phys->os_type] != NULL &&
1776*eda14cbcSMatt Macy 	    DMU_PROJECTUSED_DNODE(os) != NULL &&
1777*eda14cbcSMatt Macy 	    spa_feature_is_enabled(os->os_spa, SPA_FEATURE_PROJECT_QUOTA));
1778*eda14cbcSMatt Macy }
1779*eda14cbcSMatt Macy 
1780*eda14cbcSMatt Macy typedef struct userquota_node {
1781*eda14cbcSMatt Macy 	/* must be in the first filed, see userquota_update_cache() */
1782*eda14cbcSMatt Macy 	char		uqn_id[20 + DMU_OBJACCT_PREFIX_LEN];
1783*eda14cbcSMatt Macy 	int64_t		uqn_delta;
1784*eda14cbcSMatt Macy 	avl_node_t	uqn_node;
1785*eda14cbcSMatt Macy } userquota_node_t;
1786*eda14cbcSMatt Macy 
1787*eda14cbcSMatt Macy typedef struct userquota_cache {
1788*eda14cbcSMatt Macy 	avl_tree_t uqc_user_deltas;
1789*eda14cbcSMatt Macy 	avl_tree_t uqc_group_deltas;
1790*eda14cbcSMatt Macy 	avl_tree_t uqc_project_deltas;
1791*eda14cbcSMatt Macy } userquota_cache_t;
1792*eda14cbcSMatt Macy 
1793*eda14cbcSMatt Macy static int
1794*eda14cbcSMatt Macy userquota_compare(const void *l, const void *r)
1795*eda14cbcSMatt Macy {
1796*eda14cbcSMatt Macy 	const userquota_node_t *luqn = l;
1797*eda14cbcSMatt Macy 	const userquota_node_t *ruqn = r;
1798*eda14cbcSMatt Macy 	int rv;
1799*eda14cbcSMatt Macy 
1800*eda14cbcSMatt Macy 	/*
1801*eda14cbcSMatt Macy 	 * NB: can only access uqn_id because userquota_update_cache() doesn't
1802*eda14cbcSMatt Macy 	 * pass in an entire userquota_node_t.
1803*eda14cbcSMatt Macy 	 */
1804*eda14cbcSMatt Macy 	rv = strcmp(luqn->uqn_id, ruqn->uqn_id);
1805*eda14cbcSMatt Macy 
1806*eda14cbcSMatt Macy 	return (TREE_ISIGN(rv));
1807*eda14cbcSMatt Macy }
1808*eda14cbcSMatt Macy 
1809*eda14cbcSMatt Macy static void
1810*eda14cbcSMatt Macy do_userquota_cacheflush(objset_t *os, userquota_cache_t *cache, dmu_tx_t *tx)
1811*eda14cbcSMatt Macy {
1812*eda14cbcSMatt Macy 	void *cookie;
1813*eda14cbcSMatt Macy 	userquota_node_t *uqn;
1814*eda14cbcSMatt Macy 
1815*eda14cbcSMatt Macy 	ASSERT(dmu_tx_is_syncing(tx));
1816*eda14cbcSMatt Macy 
1817*eda14cbcSMatt Macy 	cookie = NULL;
1818*eda14cbcSMatt Macy 	while ((uqn = avl_destroy_nodes(&cache->uqc_user_deltas,
1819*eda14cbcSMatt Macy 	    &cookie)) != NULL) {
1820*eda14cbcSMatt Macy 		/*
1821*eda14cbcSMatt Macy 		 * os_userused_lock protects against concurrent calls to
1822*eda14cbcSMatt Macy 		 * zap_increment_int().  It's needed because zap_increment_int()
1823*eda14cbcSMatt Macy 		 * is not thread-safe (i.e. not atomic).
1824*eda14cbcSMatt Macy 		 */
1825*eda14cbcSMatt Macy 		mutex_enter(&os->os_userused_lock);
1826*eda14cbcSMatt Macy 		VERIFY0(zap_increment(os, DMU_USERUSED_OBJECT,
1827*eda14cbcSMatt Macy 		    uqn->uqn_id, uqn->uqn_delta, tx));
1828*eda14cbcSMatt Macy 		mutex_exit(&os->os_userused_lock);
1829*eda14cbcSMatt Macy 		kmem_free(uqn, sizeof (*uqn));
1830*eda14cbcSMatt Macy 	}
1831*eda14cbcSMatt Macy 	avl_destroy(&cache->uqc_user_deltas);
1832*eda14cbcSMatt Macy 
1833*eda14cbcSMatt Macy 	cookie = NULL;
1834*eda14cbcSMatt Macy 	while ((uqn = avl_destroy_nodes(&cache->uqc_group_deltas,
1835*eda14cbcSMatt Macy 	    &cookie)) != NULL) {
1836*eda14cbcSMatt Macy 		mutex_enter(&os->os_userused_lock);
1837*eda14cbcSMatt Macy 		VERIFY0(zap_increment(os, DMU_GROUPUSED_OBJECT,
1838*eda14cbcSMatt Macy 		    uqn->uqn_id, uqn->uqn_delta, tx));
1839*eda14cbcSMatt Macy 		mutex_exit(&os->os_userused_lock);
1840*eda14cbcSMatt Macy 		kmem_free(uqn, sizeof (*uqn));
1841*eda14cbcSMatt Macy 	}
1842*eda14cbcSMatt Macy 	avl_destroy(&cache->uqc_group_deltas);
1843*eda14cbcSMatt Macy 
1844*eda14cbcSMatt Macy 	if (dmu_objset_projectquota_enabled(os)) {
1845*eda14cbcSMatt Macy 		cookie = NULL;
1846*eda14cbcSMatt Macy 		while ((uqn = avl_destroy_nodes(&cache->uqc_project_deltas,
1847*eda14cbcSMatt Macy 		    &cookie)) != NULL) {
1848*eda14cbcSMatt Macy 			mutex_enter(&os->os_userused_lock);
1849*eda14cbcSMatt Macy 			VERIFY0(zap_increment(os, DMU_PROJECTUSED_OBJECT,
1850*eda14cbcSMatt Macy 			    uqn->uqn_id, uqn->uqn_delta, tx));
1851*eda14cbcSMatt Macy 			mutex_exit(&os->os_userused_lock);
1852*eda14cbcSMatt Macy 			kmem_free(uqn, sizeof (*uqn));
1853*eda14cbcSMatt Macy 		}
1854*eda14cbcSMatt Macy 		avl_destroy(&cache->uqc_project_deltas);
1855*eda14cbcSMatt Macy 	}
1856*eda14cbcSMatt Macy }
1857*eda14cbcSMatt Macy 
1858*eda14cbcSMatt Macy static void
1859*eda14cbcSMatt Macy userquota_update_cache(avl_tree_t *avl, const char *id, int64_t delta)
1860*eda14cbcSMatt Macy {
1861*eda14cbcSMatt Macy 	userquota_node_t *uqn;
1862*eda14cbcSMatt Macy 	avl_index_t idx;
1863*eda14cbcSMatt Macy 
1864*eda14cbcSMatt Macy 	ASSERT(strlen(id) < sizeof (uqn->uqn_id));
1865*eda14cbcSMatt Macy 	/*
1866*eda14cbcSMatt Macy 	 * Use id directly for searching because uqn_id is the first field of
1867*eda14cbcSMatt Macy 	 * userquota_node_t and fields after uqn_id won't be accessed in
1868*eda14cbcSMatt Macy 	 * avl_find().
1869*eda14cbcSMatt Macy 	 */
1870*eda14cbcSMatt Macy 	uqn = avl_find(avl, (const void *)id, &idx);
1871*eda14cbcSMatt Macy 	if (uqn == NULL) {
1872*eda14cbcSMatt Macy 		uqn = kmem_zalloc(sizeof (*uqn), KM_SLEEP);
1873*eda14cbcSMatt Macy 		strlcpy(uqn->uqn_id, id, sizeof (uqn->uqn_id));
1874*eda14cbcSMatt Macy 		avl_insert(avl, uqn, idx);
1875*eda14cbcSMatt Macy 	}
1876*eda14cbcSMatt Macy 	uqn->uqn_delta += delta;
1877*eda14cbcSMatt Macy }
1878*eda14cbcSMatt Macy 
1879*eda14cbcSMatt Macy static void
1880*eda14cbcSMatt Macy do_userquota_update(objset_t *os, userquota_cache_t *cache, uint64_t used,
1881*eda14cbcSMatt Macy     uint64_t flags, uint64_t user, uint64_t group, uint64_t project,
1882*eda14cbcSMatt Macy     boolean_t subtract)
1883*eda14cbcSMatt Macy {
1884*eda14cbcSMatt Macy 	if (flags & DNODE_FLAG_USERUSED_ACCOUNTED) {
1885*eda14cbcSMatt Macy 		int64_t delta = DNODE_MIN_SIZE + used;
1886*eda14cbcSMatt Macy 		char name[20];
1887*eda14cbcSMatt Macy 
1888*eda14cbcSMatt Macy 		if (subtract)
1889*eda14cbcSMatt Macy 			delta = -delta;
1890*eda14cbcSMatt Macy 
1891*eda14cbcSMatt Macy 		(void) snprintf(name, sizeof (name), "%llx", (longlong_t)user);
1892*eda14cbcSMatt Macy 		userquota_update_cache(&cache->uqc_user_deltas, name, delta);
1893*eda14cbcSMatt Macy 
1894*eda14cbcSMatt Macy 		(void) snprintf(name, sizeof (name), "%llx", (longlong_t)group);
1895*eda14cbcSMatt Macy 		userquota_update_cache(&cache->uqc_group_deltas, name, delta);
1896*eda14cbcSMatt Macy 
1897*eda14cbcSMatt Macy 		if (dmu_objset_projectquota_enabled(os)) {
1898*eda14cbcSMatt Macy 			(void) snprintf(name, sizeof (name), "%llx",
1899*eda14cbcSMatt Macy 			    (longlong_t)project);
1900*eda14cbcSMatt Macy 			userquota_update_cache(&cache->uqc_project_deltas,
1901*eda14cbcSMatt Macy 			    name, delta);
1902*eda14cbcSMatt Macy 		}
1903*eda14cbcSMatt Macy 	}
1904*eda14cbcSMatt Macy }
1905*eda14cbcSMatt Macy 
1906*eda14cbcSMatt Macy static void
1907*eda14cbcSMatt Macy do_userobjquota_update(objset_t *os, userquota_cache_t *cache, uint64_t flags,
1908*eda14cbcSMatt Macy     uint64_t user, uint64_t group, uint64_t project, boolean_t subtract)
1909*eda14cbcSMatt Macy {
1910*eda14cbcSMatt Macy 	if (flags & DNODE_FLAG_USEROBJUSED_ACCOUNTED) {
1911*eda14cbcSMatt Macy 		char name[20 + DMU_OBJACCT_PREFIX_LEN];
1912*eda14cbcSMatt Macy 		int delta = subtract ? -1 : 1;
1913*eda14cbcSMatt Macy 
1914*eda14cbcSMatt Macy 		(void) snprintf(name, sizeof (name), DMU_OBJACCT_PREFIX "%llx",
1915*eda14cbcSMatt Macy 		    (longlong_t)user);
1916*eda14cbcSMatt Macy 		userquota_update_cache(&cache->uqc_user_deltas, name, delta);
1917*eda14cbcSMatt Macy 
1918*eda14cbcSMatt Macy 		(void) snprintf(name, sizeof (name), DMU_OBJACCT_PREFIX "%llx",
1919*eda14cbcSMatt Macy 		    (longlong_t)group);
1920*eda14cbcSMatt Macy 		userquota_update_cache(&cache->uqc_group_deltas, name, delta);
1921*eda14cbcSMatt Macy 
1922*eda14cbcSMatt Macy 		if (dmu_objset_projectquota_enabled(os)) {
1923*eda14cbcSMatt Macy 			(void) snprintf(name, sizeof (name),
1924*eda14cbcSMatt Macy 			    DMU_OBJACCT_PREFIX "%llx", (longlong_t)project);
1925*eda14cbcSMatt Macy 			userquota_update_cache(&cache->uqc_project_deltas,
1926*eda14cbcSMatt Macy 			    name, delta);
1927*eda14cbcSMatt Macy 		}
1928*eda14cbcSMatt Macy 	}
1929*eda14cbcSMatt Macy }
1930*eda14cbcSMatt Macy 
1931*eda14cbcSMatt Macy typedef struct userquota_updates_arg {
1932*eda14cbcSMatt Macy 	objset_t *uua_os;
1933*eda14cbcSMatt Macy 	int uua_sublist_idx;
1934*eda14cbcSMatt Macy 	dmu_tx_t *uua_tx;
1935*eda14cbcSMatt Macy } userquota_updates_arg_t;
1936*eda14cbcSMatt Macy 
1937*eda14cbcSMatt Macy static void
1938*eda14cbcSMatt Macy userquota_updates_task(void *arg)
1939*eda14cbcSMatt Macy {
1940*eda14cbcSMatt Macy 	userquota_updates_arg_t *uua = arg;
1941*eda14cbcSMatt Macy 	objset_t *os = uua->uua_os;
1942*eda14cbcSMatt Macy 	dmu_tx_t *tx = uua->uua_tx;
1943*eda14cbcSMatt Macy 	dnode_t *dn;
1944*eda14cbcSMatt Macy 	userquota_cache_t cache = { { 0 } };
1945*eda14cbcSMatt Macy 
1946*eda14cbcSMatt Macy 	multilist_sublist_t *list =
1947*eda14cbcSMatt Macy 	    multilist_sublist_lock(os->os_synced_dnodes, uua->uua_sublist_idx);
1948*eda14cbcSMatt Macy 
1949*eda14cbcSMatt Macy 	ASSERT(multilist_sublist_head(list) == NULL ||
1950*eda14cbcSMatt Macy 	    dmu_objset_userused_enabled(os));
1951*eda14cbcSMatt Macy 	avl_create(&cache.uqc_user_deltas, userquota_compare,
1952*eda14cbcSMatt Macy 	    sizeof (userquota_node_t), offsetof(userquota_node_t, uqn_node));
1953*eda14cbcSMatt Macy 	avl_create(&cache.uqc_group_deltas, userquota_compare,
1954*eda14cbcSMatt Macy 	    sizeof (userquota_node_t), offsetof(userquota_node_t, uqn_node));
1955*eda14cbcSMatt Macy 	if (dmu_objset_projectquota_enabled(os))
1956*eda14cbcSMatt Macy 		avl_create(&cache.uqc_project_deltas, userquota_compare,
1957*eda14cbcSMatt Macy 		    sizeof (userquota_node_t), offsetof(userquota_node_t,
1958*eda14cbcSMatt Macy 		    uqn_node));
1959*eda14cbcSMatt Macy 
1960*eda14cbcSMatt Macy 	while ((dn = multilist_sublist_head(list)) != NULL) {
1961*eda14cbcSMatt Macy 		int flags;
1962*eda14cbcSMatt Macy 		ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
1963*eda14cbcSMatt Macy 		ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
1964*eda14cbcSMatt Macy 		    dn->dn_phys->dn_flags &
1965*eda14cbcSMatt Macy 		    DNODE_FLAG_USERUSED_ACCOUNTED);
1966*eda14cbcSMatt Macy 
1967*eda14cbcSMatt Macy 		flags = dn->dn_id_flags;
1968*eda14cbcSMatt Macy 		ASSERT(flags);
1969*eda14cbcSMatt Macy 		if (flags & DN_ID_OLD_EXIST)  {
1970*eda14cbcSMatt Macy 			do_userquota_update(os, &cache, dn->dn_oldused,
1971*eda14cbcSMatt Macy 			    dn->dn_oldflags, dn->dn_olduid, dn->dn_oldgid,
1972*eda14cbcSMatt Macy 			    dn->dn_oldprojid, B_TRUE);
1973*eda14cbcSMatt Macy 			do_userobjquota_update(os, &cache, dn->dn_oldflags,
1974*eda14cbcSMatt Macy 			    dn->dn_olduid, dn->dn_oldgid,
1975*eda14cbcSMatt Macy 			    dn->dn_oldprojid, B_TRUE);
1976*eda14cbcSMatt Macy 		}
1977*eda14cbcSMatt Macy 		if (flags & DN_ID_NEW_EXIST) {
1978*eda14cbcSMatt Macy 			do_userquota_update(os, &cache,
1979*eda14cbcSMatt Macy 			    DN_USED_BYTES(dn->dn_phys), dn->dn_phys->dn_flags,
1980*eda14cbcSMatt Macy 			    dn->dn_newuid, dn->dn_newgid,
1981*eda14cbcSMatt Macy 			    dn->dn_newprojid, B_FALSE);
1982*eda14cbcSMatt Macy 			do_userobjquota_update(os, &cache,
1983*eda14cbcSMatt Macy 			    dn->dn_phys->dn_flags, dn->dn_newuid, dn->dn_newgid,
1984*eda14cbcSMatt Macy 			    dn->dn_newprojid, B_FALSE);
1985*eda14cbcSMatt Macy 		}
1986*eda14cbcSMatt Macy 
1987*eda14cbcSMatt Macy 		mutex_enter(&dn->dn_mtx);
1988*eda14cbcSMatt Macy 		dn->dn_oldused = 0;
1989*eda14cbcSMatt Macy 		dn->dn_oldflags = 0;
1990*eda14cbcSMatt Macy 		if (dn->dn_id_flags & DN_ID_NEW_EXIST) {
1991*eda14cbcSMatt Macy 			dn->dn_olduid = dn->dn_newuid;
1992*eda14cbcSMatt Macy 			dn->dn_oldgid = dn->dn_newgid;
1993*eda14cbcSMatt Macy 			dn->dn_oldprojid = dn->dn_newprojid;
1994*eda14cbcSMatt Macy 			dn->dn_id_flags |= DN_ID_OLD_EXIST;
1995*eda14cbcSMatt Macy 			if (dn->dn_bonuslen == 0)
1996*eda14cbcSMatt Macy 				dn->dn_id_flags |= DN_ID_CHKED_SPILL;
1997*eda14cbcSMatt Macy 			else
1998*eda14cbcSMatt Macy 				dn->dn_id_flags |= DN_ID_CHKED_BONUS;
1999*eda14cbcSMatt Macy 		}
2000*eda14cbcSMatt Macy 		dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
2001*eda14cbcSMatt Macy 		if (dn->dn_dirty_txg == spa_syncing_txg(os->os_spa))
2002*eda14cbcSMatt Macy 			dn->dn_dirty_txg = 0;
2003*eda14cbcSMatt Macy 		mutex_exit(&dn->dn_mtx);
2004*eda14cbcSMatt Macy 
2005*eda14cbcSMatt Macy 		multilist_sublist_remove(list, dn);
2006*eda14cbcSMatt Macy 		dnode_rele(dn, os->os_synced_dnodes);
2007*eda14cbcSMatt Macy 	}
2008*eda14cbcSMatt Macy 	do_userquota_cacheflush(os, &cache, tx);
2009*eda14cbcSMatt Macy 	multilist_sublist_unlock(list);
2010*eda14cbcSMatt Macy 	kmem_free(uua, sizeof (*uua));
2011*eda14cbcSMatt Macy }
2012*eda14cbcSMatt Macy 
2013*eda14cbcSMatt Macy void
2014*eda14cbcSMatt Macy dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
2015*eda14cbcSMatt Macy {
2016*eda14cbcSMatt Macy 	int num_sublists;
2017*eda14cbcSMatt Macy 
2018*eda14cbcSMatt Macy 	if (!dmu_objset_userused_enabled(os))
2019*eda14cbcSMatt Macy 		return;
2020*eda14cbcSMatt Macy 
2021*eda14cbcSMatt Macy 	/*
2022*eda14cbcSMatt Macy 	 * If this is a raw receive just return and handle accounting
2023*eda14cbcSMatt Macy 	 * later when we have the keys loaded. We also don't do user
2024*eda14cbcSMatt Macy 	 * accounting during claiming since the datasets are not owned
2025*eda14cbcSMatt Macy 	 * for the duration of claiming and this txg should only be
2026*eda14cbcSMatt Macy 	 * used for recovery.
2027*eda14cbcSMatt Macy 	 */
2028*eda14cbcSMatt Macy 	if (os->os_encrypted && dmu_objset_is_receiving(os))
2029*eda14cbcSMatt Macy 		return;
2030*eda14cbcSMatt Macy 
2031*eda14cbcSMatt Macy 	if (tx->tx_txg <= os->os_spa->spa_claim_max_txg)
2032*eda14cbcSMatt Macy 		return;
2033*eda14cbcSMatt Macy 
2034*eda14cbcSMatt Macy 	/* Allocate the user/group/project used objects if necessary. */
2035*eda14cbcSMatt Macy 	if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
2036*eda14cbcSMatt Macy 		VERIFY0(zap_create_claim(os,
2037*eda14cbcSMatt Macy 		    DMU_USERUSED_OBJECT,
2038*eda14cbcSMatt Macy 		    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
2039*eda14cbcSMatt Macy 		VERIFY0(zap_create_claim(os,
2040*eda14cbcSMatt Macy 		    DMU_GROUPUSED_OBJECT,
2041*eda14cbcSMatt Macy 		    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
2042*eda14cbcSMatt Macy 	}
2043*eda14cbcSMatt Macy 
2044*eda14cbcSMatt Macy 	if (dmu_objset_projectquota_enabled(os) &&
2045*eda14cbcSMatt Macy 	    DMU_PROJECTUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
2046*eda14cbcSMatt Macy 		VERIFY0(zap_create_claim(os, DMU_PROJECTUSED_OBJECT,
2047*eda14cbcSMatt Macy 		    DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
2048*eda14cbcSMatt Macy 	}
2049*eda14cbcSMatt Macy 
2050*eda14cbcSMatt Macy 	num_sublists = multilist_get_num_sublists(os->os_synced_dnodes);
2051*eda14cbcSMatt Macy 	for (int i = 0; i < num_sublists; i++) {
2052*eda14cbcSMatt Macy 		if (multilist_sublist_is_empty_idx(os->os_synced_dnodes, i))
2053*eda14cbcSMatt Macy 			continue;
2054*eda14cbcSMatt Macy 		userquota_updates_arg_t *uua =
2055*eda14cbcSMatt Macy 		    kmem_alloc(sizeof (*uua), KM_SLEEP);
2056*eda14cbcSMatt Macy 		uua->uua_os = os;
2057*eda14cbcSMatt Macy 		uua->uua_sublist_idx = i;
2058*eda14cbcSMatt Macy 		uua->uua_tx = tx;
2059*eda14cbcSMatt Macy 		/* note: caller does taskq_wait() */
2060*eda14cbcSMatt Macy 		(void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq,
2061*eda14cbcSMatt Macy 		    userquota_updates_task, uua, 0);
2062*eda14cbcSMatt Macy 		/* callback frees uua */
2063*eda14cbcSMatt Macy 	}
2064*eda14cbcSMatt Macy }
2065*eda14cbcSMatt Macy 
2066*eda14cbcSMatt Macy /*
2067*eda14cbcSMatt Macy  * Returns a pointer to data to find uid/gid from
2068*eda14cbcSMatt Macy  *
2069*eda14cbcSMatt Macy  * If a dirty record for transaction group that is syncing can't
2070*eda14cbcSMatt Macy  * be found then NULL is returned.  In the NULL case it is assumed
2071*eda14cbcSMatt Macy  * the uid/gid aren't changing.
2072*eda14cbcSMatt Macy  */
2073*eda14cbcSMatt Macy static void *
2074*eda14cbcSMatt Macy dmu_objset_userquota_find_data(dmu_buf_impl_t *db, dmu_tx_t *tx)
2075*eda14cbcSMatt Macy {
2076*eda14cbcSMatt Macy 	dbuf_dirty_record_t *dr;
2077*eda14cbcSMatt Macy 	void *data;
2078*eda14cbcSMatt Macy 
2079*eda14cbcSMatt Macy 	if (db->db_dirtycnt == 0)
2080*eda14cbcSMatt Macy 		return (db->db.db_data);  /* Nothing is changing */
2081*eda14cbcSMatt Macy 
2082*eda14cbcSMatt Macy 	dr = dbuf_find_dirty_eq(db, tx->tx_txg);
2083*eda14cbcSMatt Macy 
2084*eda14cbcSMatt Macy 	if (dr == NULL) {
2085*eda14cbcSMatt Macy 		data = NULL;
2086*eda14cbcSMatt Macy 	} else {
2087*eda14cbcSMatt Macy 		dnode_t *dn;
2088*eda14cbcSMatt Macy 
2089*eda14cbcSMatt Macy 		DB_DNODE_ENTER(dr->dr_dbuf);
2090*eda14cbcSMatt Macy 		dn = DB_DNODE(dr->dr_dbuf);
2091*eda14cbcSMatt Macy 
2092*eda14cbcSMatt Macy 		if (dn->dn_bonuslen == 0 &&
2093*eda14cbcSMatt Macy 		    dr->dr_dbuf->db_blkid == DMU_SPILL_BLKID)
2094*eda14cbcSMatt Macy 			data = dr->dt.dl.dr_data->b_data;
2095*eda14cbcSMatt Macy 		else
2096*eda14cbcSMatt Macy 			data = dr->dt.dl.dr_data;
2097*eda14cbcSMatt Macy 
2098*eda14cbcSMatt Macy 		DB_DNODE_EXIT(dr->dr_dbuf);
2099*eda14cbcSMatt Macy 	}
2100*eda14cbcSMatt Macy 
2101*eda14cbcSMatt Macy 	return (data);
2102*eda14cbcSMatt Macy }
2103*eda14cbcSMatt Macy 
2104*eda14cbcSMatt Macy void
2105*eda14cbcSMatt Macy dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx)
2106*eda14cbcSMatt Macy {
2107*eda14cbcSMatt Macy 	objset_t *os = dn->dn_objset;
2108*eda14cbcSMatt Macy 	void *data = NULL;
2109*eda14cbcSMatt Macy 	dmu_buf_impl_t *db = NULL;
2110*eda14cbcSMatt Macy 	int flags = dn->dn_id_flags;
2111*eda14cbcSMatt Macy 	int error;
2112*eda14cbcSMatt Macy 	boolean_t have_spill = B_FALSE;
2113*eda14cbcSMatt Macy 
2114*eda14cbcSMatt Macy 	if (!dmu_objset_userused_enabled(dn->dn_objset))
2115*eda14cbcSMatt Macy 		return;
2116*eda14cbcSMatt Macy 
2117*eda14cbcSMatt Macy 	/*
2118*eda14cbcSMatt Macy 	 * Raw receives introduce a problem with user accounting. Raw
2119*eda14cbcSMatt Macy 	 * receives cannot update the user accounting info because the
2120*eda14cbcSMatt Macy 	 * user ids and the sizes are encrypted. To guarantee that we
2121*eda14cbcSMatt Macy 	 * never end up with bad user accounting, we simply disable it
2122*eda14cbcSMatt Macy 	 * during raw receives. We also disable this for normal receives
2123*eda14cbcSMatt Macy 	 * so that an incremental raw receive may be done on top of an
2124*eda14cbcSMatt Macy 	 * existing non-raw receive.
2125*eda14cbcSMatt Macy 	 */
2126*eda14cbcSMatt Macy 	if (os->os_encrypted && dmu_objset_is_receiving(os))
2127*eda14cbcSMatt Macy 		return;
2128*eda14cbcSMatt Macy 
2129*eda14cbcSMatt Macy 	if (before && (flags & (DN_ID_CHKED_BONUS|DN_ID_OLD_EXIST|
2130*eda14cbcSMatt Macy 	    DN_ID_CHKED_SPILL)))
2131*eda14cbcSMatt Macy 		return;
2132*eda14cbcSMatt Macy 
2133*eda14cbcSMatt Macy 	if (before && dn->dn_bonuslen != 0)
2134*eda14cbcSMatt Macy 		data = DN_BONUS(dn->dn_phys);
2135*eda14cbcSMatt Macy 	else if (!before && dn->dn_bonuslen != 0) {
2136*eda14cbcSMatt Macy 		if (dn->dn_bonus) {
2137*eda14cbcSMatt Macy 			db = dn->dn_bonus;
2138*eda14cbcSMatt Macy 			mutex_enter(&db->db_mtx);
2139*eda14cbcSMatt Macy 			data = dmu_objset_userquota_find_data(db, tx);
2140*eda14cbcSMatt Macy 		} else {
2141*eda14cbcSMatt Macy 			data = DN_BONUS(dn->dn_phys);
2142*eda14cbcSMatt Macy 		}
2143*eda14cbcSMatt Macy 	} else if (dn->dn_bonuslen == 0 && dn->dn_bonustype == DMU_OT_SA) {
2144*eda14cbcSMatt Macy 			int rf = 0;
2145*eda14cbcSMatt Macy 
2146*eda14cbcSMatt Macy 			if (RW_WRITE_HELD(&dn->dn_struct_rwlock))
2147*eda14cbcSMatt Macy 				rf |= DB_RF_HAVESTRUCT;
2148*eda14cbcSMatt Macy 			error = dmu_spill_hold_by_dnode(dn,
2149*eda14cbcSMatt Macy 			    rf | DB_RF_MUST_SUCCEED,
2150*eda14cbcSMatt Macy 			    FTAG, (dmu_buf_t **)&db);
2151*eda14cbcSMatt Macy 			ASSERT(error == 0);
2152*eda14cbcSMatt Macy 			mutex_enter(&db->db_mtx);
2153*eda14cbcSMatt Macy 			data = (before) ? db->db.db_data :
2154*eda14cbcSMatt Macy 			    dmu_objset_userquota_find_data(db, tx);
2155*eda14cbcSMatt Macy 			have_spill = B_TRUE;
2156*eda14cbcSMatt Macy 	} else {
2157*eda14cbcSMatt Macy 		mutex_enter(&dn->dn_mtx);
2158*eda14cbcSMatt Macy 		dn->dn_id_flags |= DN_ID_CHKED_BONUS;
2159*eda14cbcSMatt Macy 		mutex_exit(&dn->dn_mtx);
2160*eda14cbcSMatt Macy 		return;
2161*eda14cbcSMatt Macy 	}
2162*eda14cbcSMatt Macy 
2163*eda14cbcSMatt Macy 	/*
2164*eda14cbcSMatt Macy 	 * Must always call the callback in case the object
2165*eda14cbcSMatt Macy 	 * type has changed and that type isn't an object type to track
2166*eda14cbcSMatt Macy 	 */
2167*eda14cbcSMatt Macy 	zfs_file_info_t zfi;
2168*eda14cbcSMatt Macy 	error = file_cbs[os->os_phys->os_type](dn->dn_bonustype, data, &zfi);
2169*eda14cbcSMatt Macy 
2170*eda14cbcSMatt Macy 	if (before) {
2171*eda14cbcSMatt Macy 		ASSERT(data);
2172*eda14cbcSMatt Macy 		dn->dn_olduid = zfi.zfi_user;
2173*eda14cbcSMatt Macy 		dn->dn_oldgid = zfi.zfi_group;
2174*eda14cbcSMatt Macy 		dn->dn_oldprojid = zfi.zfi_project;
2175*eda14cbcSMatt Macy 	} else if (data) {
2176*eda14cbcSMatt Macy 		dn->dn_newuid = zfi.zfi_user;
2177*eda14cbcSMatt Macy 		dn->dn_newgid = zfi.zfi_group;
2178*eda14cbcSMatt Macy 		dn->dn_newprojid = zfi.zfi_project;
2179*eda14cbcSMatt Macy 	}
2180*eda14cbcSMatt Macy 
2181*eda14cbcSMatt Macy 	/*
2182*eda14cbcSMatt Macy 	 * Preserve existing uid/gid when the callback can't determine
2183*eda14cbcSMatt Macy 	 * what the new uid/gid are and the callback returned EEXIST.
2184*eda14cbcSMatt Macy 	 * The EEXIST error tells us to just use the existing uid/gid.
2185*eda14cbcSMatt Macy 	 * If we don't know what the old values are then just assign
2186*eda14cbcSMatt Macy 	 * them to 0, since that is a new file  being created.
2187*eda14cbcSMatt Macy 	 */
2188*eda14cbcSMatt Macy 	if (!before && data == NULL && error == EEXIST) {
2189*eda14cbcSMatt Macy 		if (flags & DN_ID_OLD_EXIST) {
2190*eda14cbcSMatt Macy 			dn->dn_newuid = dn->dn_olduid;
2191*eda14cbcSMatt Macy 			dn->dn_newgid = dn->dn_oldgid;
2192*eda14cbcSMatt Macy 			dn->dn_newprojid = dn->dn_oldprojid;
2193*eda14cbcSMatt Macy 		} else {
2194*eda14cbcSMatt Macy 			dn->dn_newuid = 0;
2195*eda14cbcSMatt Macy 			dn->dn_newgid = 0;
2196*eda14cbcSMatt Macy 			dn->dn_newprojid = ZFS_DEFAULT_PROJID;
2197*eda14cbcSMatt Macy 		}
2198*eda14cbcSMatt Macy 		error = 0;
2199*eda14cbcSMatt Macy 	}
2200*eda14cbcSMatt Macy 
2201*eda14cbcSMatt Macy 	if (db)
2202*eda14cbcSMatt Macy 		mutex_exit(&db->db_mtx);
2203*eda14cbcSMatt Macy 
2204*eda14cbcSMatt Macy 	mutex_enter(&dn->dn_mtx);
2205*eda14cbcSMatt Macy 	if (error == 0 && before)
2206*eda14cbcSMatt Macy 		dn->dn_id_flags |= DN_ID_OLD_EXIST;
2207*eda14cbcSMatt Macy 	if (error == 0 && !before)
2208*eda14cbcSMatt Macy 		dn->dn_id_flags |= DN_ID_NEW_EXIST;
2209*eda14cbcSMatt Macy 
2210*eda14cbcSMatt Macy 	if (have_spill) {
2211*eda14cbcSMatt Macy 		dn->dn_id_flags |= DN_ID_CHKED_SPILL;
2212*eda14cbcSMatt Macy 	} else {
2213*eda14cbcSMatt Macy 		dn->dn_id_flags |= DN_ID_CHKED_BONUS;
2214*eda14cbcSMatt Macy 	}
2215*eda14cbcSMatt Macy 	mutex_exit(&dn->dn_mtx);
2216*eda14cbcSMatt Macy 	if (have_spill)
2217*eda14cbcSMatt Macy 		dmu_buf_rele((dmu_buf_t *)db, FTAG);
2218*eda14cbcSMatt Macy }
2219*eda14cbcSMatt Macy 
2220*eda14cbcSMatt Macy boolean_t
2221*eda14cbcSMatt Macy dmu_objset_userspace_present(objset_t *os)
2222*eda14cbcSMatt Macy {
2223*eda14cbcSMatt Macy 	return (os->os_phys->os_flags &
2224*eda14cbcSMatt Macy 	    OBJSET_FLAG_USERACCOUNTING_COMPLETE);
2225*eda14cbcSMatt Macy }
2226*eda14cbcSMatt Macy 
2227*eda14cbcSMatt Macy boolean_t
2228*eda14cbcSMatt Macy dmu_objset_userobjspace_present(objset_t *os)
2229*eda14cbcSMatt Macy {
2230*eda14cbcSMatt Macy 	return (os->os_phys->os_flags &
2231*eda14cbcSMatt Macy 	    OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE);
2232*eda14cbcSMatt Macy }
2233*eda14cbcSMatt Macy 
2234*eda14cbcSMatt Macy boolean_t
2235*eda14cbcSMatt Macy dmu_objset_projectquota_present(objset_t *os)
2236*eda14cbcSMatt Macy {
2237*eda14cbcSMatt Macy 	return (os->os_phys->os_flags &
2238*eda14cbcSMatt Macy 	    OBJSET_FLAG_PROJECTQUOTA_COMPLETE);
2239*eda14cbcSMatt Macy }
2240*eda14cbcSMatt Macy 
2241*eda14cbcSMatt Macy static int
2242*eda14cbcSMatt Macy dmu_objset_space_upgrade(objset_t *os)
2243*eda14cbcSMatt Macy {
2244*eda14cbcSMatt Macy 	uint64_t obj;
2245*eda14cbcSMatt Macy 	int err = 0;
2246*eda14cbcSMatt Macy 
2247*eda14cbcSMatt Macy 	/*
2248*eda14cbcSMatt Macy 	 * We simply need to mark every object dirty, so that it will be
2249*eda14cbcSMatt Macy 	 * synced out and now accounted.  If this is called
2250*eda14cbcSMatt Macy 	 * concurrently, or if we already did some work before crashing,
2251*eda14cbcSMatt Macy 	 * that's fine, since we track each object's accounted state
2252*eda14cbcSMatt Macy 	 * independently.
2253*eda14cbcSMatt Macy 	 */
2254*eda14cbcSMatt Macy 
2255*eda14cbcSMatt Macy 	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
2256*eda14cbcSMatt Macy 		dmu_tx_t *tx;
2257*eda14cbcSMatt Macy 		dmu_buf_t *db;
2258*eda14cbcSMatt Macy 		int objerr;
2259*eda14cbcSMatt Macy 
2260*eda14cbcSMatt Macy 		mutex_enter(&os->os_upgrade_lock);
2261*eda14cbcSMatt Macy 		if (os->os_upgrade_exit)
2262*eda14cbcSMatt Macy 			err = SET_ERROR(EINTR);
2263*eda14cbcSMatt Macy 		mutex_exit(&os->os_upgrade_lock);
2264*eda14cbcSMatt Macy 		if (err != 0)
2265*eda14cbcSMatt Macy 			return (err);
2266*eda14cbcSMatt Macy 
2267*eda14cbcSMatt Macy 		if (issig(JUSTLOOKING) && issig(FORREAL))
2268*eda14cbcSMatt Macy 			return (SET_ERROR(EINTR));
2269*eda14cbcSMatt Macy 
2270*eda14cbcSMatt Macy 		objerr = dmu_bonus_hold(os, obj, FTAG, &db);
2271*eda14cbcSMatt Macy 		if (objerr != 0)
2272*eda14cbcSMatt Macy 			continue;
2273*eda14cbcSMatt Macy 		tx = dmu_tx_create(os);
2274*eda14cbcSMatt Macy 		dmu_tx_hold_bonus(tx, obj);
2275*eda14cbcSMatt Macy 		objerr = dmu_tx_assign(tx, TXG_WAIT);
2276*eda14cbcSMatt Macy 		if (objerr != 0) {
2277*eda14cbcSMatt Macy 			dmu_buf_rele(db, FTAG);
2278*eda14cbcSMatt Macy 			dmu_tx_abort(tx);
2279*eda14cbcSMatt Macy 			continue;
2280*eda14cbcSMatt Macy 		}
2281*eda14cbcSMatt Macy 		dmu_buf_will_dirty(db, tx);
2282*eda14cbcSMatt Macy 		dmu_buf_rele(db, FTAG);
2283*eda14cbcSMatt Macy 		dmu_tx_commit(tx);
2284*eda14cbcSMatt Macy 	}
2285*eda14cbcSMatt Macy 	return (0);
2286*eda14cbcSMatt Macy }
2287*eda14cbcSMatt Macy 
2288*eda14cbcSMatt Macy int
2289*eda14cbcSMatt Macy dmu_objset_userspace_upgrade(objset_t *os)
2290*eda14cbcSMatt Macy {
2291*eda14cbcSMatt Macy 	int err = 0;
2292*eda14cbcSMatt Macy 
2293*eda14cbcSMatt Macy 	if (dmu_objset_userspace_present(os))
2294*eda14cbcSMatt Macy 		return (0);
2295*eda14cbcSMatt Macy 	if (dmu_objset_is_snapshot(os))
2296*eda14cbcSMatt Macy 		return (SET_ERROR(EINVAL));
2297*eda14cbcSMatt Macy 	if (!dmu_objset_userused_enabled(os))
2298*eda14cbcSMatt Macy 		return (SET_ERROR(ENOTSUP));
2299*eda14cbcSMatt Macy 
2300*eda14cbcSMatt Macy 	err = dmu_objset_space_upgrade(os);
2301*eda14cbcSMatt Macy 	if (err)
2302*eda14cbcSMatt Macy 		return (err);
2303*eda14cbcSMatt Macy 
2304*eda14cbcSMatt Macy 	os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
2305*eda14cbcSMatt Macy 	txg_wait_synced(dmu_objset_pool(os), 0);
2306*eda14cbcSMatt Macy 	return (0);
2307*eda14cbcSMatt Macy }
2308*eda14cbcSMatt Macy 
2309*eda14cbcSMatt Macy static int
2310*eda14cbcSMatt Macy dmu_objset_id_quota_upgrade_cb(objset_t *os)
2311*eda14cbcSMatt Macy {
2312*eda14cbcSMatt Macy 	int err = 0;
2313*eda14cbcSMatt Macy 
2314*eda14cbcSMatt Macy 	if (dmu_objset_userobjspace_present(os) &&
2315*eda14cbcSMatt Macy 	    dmu_objset_projectquota_present(os))
2316*eda14cbcSMatt Macy 		return (0);
2317*eda14cbcSMatt Macy 	if (dmu_objset_is_snapshot(os))
2318*eda14cbcSMatt Macy 		return (SET_ERROR(EINVAL));
2319*eda14cbcSMatt Macy 	if (!dmu_objset_userobjused_enabled(os))
2320*eda14cbcSMatt Macy 		return (SET_ERROR(ENOTSUP));
2321*eda14cbcSMatt Macy 	if (!dmu_objset_projectquota_enabled(os) &&
2322*eda14cbcSMatt Macy 	    dmu_objset_userobjspace_present(os))
2323*eda14cbcSMatt Macy 		return (SET_ERROR(ENOTSUP));
2324*eda14cbcSMatt Macy 
2325*eda14cbcSMatt Macy 	dmu_objset_ds(os)->ds_feature_activation[
2326*eda14cbcSMatt Macy 	    SPA_FEATURE_USEROBJ_ACCOUNTING] = (void *)B_TRUE;
2327*eda14cbcSMatt Macy 	if (dmu_objset_projectquota_enabled(os))
2328*eda14cbcSMatt Macy 		dmu_objset_ds(os)->ds_feature_activation[
2329*eda14cbcSMatt Macy 		    SPA_FEATURE_PROJECT_QUOTA] = (void *)B_TRUE;
2330*eda14cbcSMatt Macy 
2331*eda14cbcSMatt Macy 	err = dmu_objset_space_upgrade(os);
2332*eda14cbcSMatt Macy 	if (err)
2333*eda14cbcSMatt Macy 		return (err);
2334*eda14cbcSMatt Macy 
2335*eda14cbcSMatt Macy 	os->os_flags |= OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE;
2336*eda14cbcSMatt Macy 	if (dmu_objset_projectquota_enabled(os))
2337*eda14cbcSMatt Macy 		os->os_flags |= OBJSET_FLAG_PROJECTQUOTA_COMPLETE;
2338*eda14cbcSMatt Macy 
2339*eda14cbcSMatt Macy 	txg_wait_synced(dmu_objset_pool(os), 0);
2340*eda14cbcSMatt Macy 	return (0);
2341*eda14cbcSMatt Macy }
2342*eda14cbcSMatt Macy 
2343*eda14cbcSMatt Macy void
2344*eda14cbcSMatt Macy dmu_objset_id_quota_upgrade(objset_t *os)
2345*eda14cbcSMatt Macy {
2346*eda14cbcSMatt Macy 	dmu_objset_upgrade(os, dmu_objset_id_quota_upgrade_cb);
2347*eda14cbcSMatt Macy }
2348*eda14cbcSMatt Macy 
2349*eda14cbcSMatt Macy boolean_t
2350*eda14cbcSMatt Macy dmu_objset_userobjspace_upgradable(objset_t *os)
2351*eda14cbcSMatt Macy {
2352*eda14cbcSMatt Macy 	return (dmu_objset_type(os) == DMU_OST_ZFS &&
2353*eda14cbcSMatt Macy 	    !dmu_objset_is_snapshot(os) &&
2354*eda14cbcSMatt Macy 	    dmu_objset_userobjused_enabled(os) &&
2355*eda14cbcSMatt Macy 	    !dmu_objset_userobjspace_present(os) &&
2356*eda14cbcSMatt Macy 	    spa_writeable(dmu_objset_spa(os)));
2357*eda14cbcSMatt Macy }
2358*eda14cbcSMatt Macy 
2359*eda14cbcSMatt Macy boolean_t
2360*eda14cbcSMatt Macy dmu_objset_projectquota_upgradable(objset_t *os)
2361*eda14cbcSMatt Macy {
2362*eda14cbcSMatt Macy 	return (dmu_objset_type(os) == DMU_OST_ZFS &&
2363*eda14cbcSMatt Macy 	    !dmu_objset_is_snapshot(os) &&
2364*eda14cbcSMatt Macy 	    dmu_objset_projectquota_enabled(os) &&
2365*eda14cbcSMatt Macy 	    !dmu_objset_projectquota_present(os) &&
2366*eda14cbcSMatt Macy 	    spa_writeable(dmu_objset_spa(os)));
2367*eda14cbcSMatt Macy }
2368*eda14cbcSMatt Macy 
2369*eda14cbcSMatt Macy void
2370*eda14cbcSMatt Macy dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
2371*eda14cbcSMatt Macy     uint64_t *usedobjsp, uint64_t *availobjsp)
2372*eda14cbcSMatt Macy {
2373*eda14cbcSMatt Macy 	dsl_dataset_space(os->os_dsl_dataset, refdbytesp, availbytesp,
2374*eda14cbcSMatt Macy 	    usedobjsp, availobjsp);
2375*eda14cbcSMatt Macy }
2376*eda14cbcSMatt Macy 
2377*eda14cbcSMatt Macy uint64_t
2378*eda14cbcSMatt Macy dmu_objset_fsid_guid(objset_t *os)
2379*eda14cbcSMatt Macy {
2380*eda14cbcSMatt Macy 	return (dsl_dataset_fsid_guid(os->os_dsl_dataset));
2381*eda14cbcSMatt Macy }
2382*eda14cbcSMatt Macy 
2383*eda14cbcSMatt Macy void
2384*eda14cbcSMatt Macy dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat)
2385*eda14cbcSMatt Macy {
2386*eda14cbcSMatt Macy 	stat->dds_type = os->os_phys->os_type;
2387*eda14cbcSMatt Macy 	if (os->os_dsl_dataset)
2388*eda14cbcSMatt Macy 		dsl_dataset_fast_stat(os->os_dsl_dataset, stat);
2389*eda14cbcSMatt Macy }
2390*eda14cbcSMatt Macy 
2391*eda14cbcSMatt Macy void
2392*eda14cbcSMatt Macy dmu_objset_stats(objset_t *os, nvlist_t *nv)
2393*eda14cbcSMatt Macy {
2394*eda14cbcSMatt Macy 	ASSERT(os->os_dsl_dataset ||
2395*eda14cbcSMatt Macy 	    os->os_phys->os_type == DMU_OST_META);
2396*eda14cbcSMatt Macy 
2397*eda14cbcSMatt Macy 	if (os->os_dsl_dataset != NULL)
2398*eda14cbcSMatt Macy 		dsl_dataset_stats(os->os_dsl_dataset, nv);
2399*eda14cbcSMatt Macy 
2400*eda14cbcSMatt Macy 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_TYPE,
2401*eda14cbcSMatt Macy 	    os->os_phys->os_type);
2402*eda14cbcSMatt Macy 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERACCOUNTING,
2403*eda14cbcSMatt Macy 	    dmu_objset_userspace_present(os));
2404*eda14cbcSMatt Macy }
2405*eda14cbcSMatt Macy 
2406*eda14cbcSMatt Macy int
2407*eda14cbcSMatt Macy dmu_objset_is_snapshot(objset_t *os)
2408*eda14cbcSMatt Macy {
2409*eda14cbcSMatt Macy 	if (os->os_dsl_dataset != NULL)
2410*eda14cbcSMatt Macy 		return (os->os_dsl_dataset->ds_is_snapshot);
2411*eda14cbcSMatt Macy 	else
2412*eda14cbcSMatt Macy 		return (B_FALSE);
2413*eda14cbcSMatt Macy }
2414*eda14cbcSMatt Macy 
2415*eda14cbcSMatt Macy int
2416*eda14cbcSMatt Macy dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen,
2417*eda14cbcSMatt Macy     boolean_t *conflict)
2418*eda14cbcSMatt Macy {
2419*eda14cbcSMatt Macy 	dsl_dataset_t *ds = os->os_dsl_dataset;
2420*eda14cbcSMatt Macy 	uint64_t ignored;
2421*eda14cbcSMatt Macy 
2422*eda14cbcSMatt Macy 	if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0)
2423*eda14cbcSMatt Macy 		return (SET_ERROR(ENOENT));
2424*eda14cbcSMatt Macy 
2425*eda14cbcSMatt Macy 	return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset,
2426*eda14cbcSMatt Macy 	    dsl_dataset_phys(ds)->ds_snapnames_zapobj, name, 8, 1, &ignored,
2427*eda14cbcSMatt Macy 	    MT_NORMALIZE, real, maxlen, conflict));
2428*eda14cbcSMatt Macy }
2429*eda14cbcSMatt Macy 
2430*eda14cbcSMatt Macy int
2431*eda14cbcSMatt Macy dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
2432*eda14cbcSMatt Macy     uint64_t *idp, uint64_t *offp, boolean_t *case_conflict)
2433*eda14cbcSMatt Macy {
2434*eda14cbcSMatt Macy 	dsl_dataset_t *ds = os->os_dsl_dataset;
2435*eda14cbcSMatt Macy 	zap_cursor_t cursor;
2436*eda14cbcSMatt Macy 	zap_attribute_t attr;
2437*eda14cbcSMatt Macy 
2438*eda14cbcSMatt Macy 	ASSERT(dsl_pool_config_held(dmu_objset_pool(os)));
2439*eda14cbcSMatt Macy 
2440*eda14cbcSMatt Macy 	if (dsl_dataset_phys(ds)->ds_snapnames_zapobj == 0)
2441*eda14cbcSMatt Macy 		return (SET_ERROR(ENOENT));
2442*eda14cbcSMatt Macy 
2443*eda14cbcSMatt Macy 	zap_cursor_init_serialized(&cursor,
2444*eda14cbcSMatt Macy 	    ds->ds_dir->dd_pool->dp_meta_objset,
2445*eda14cbcSMatt Macy 	    dsl_dataset_phys(ds)->ds_snapnames_zapobj, *offp);
2446*eda14cbcSMatt Macy 
2447*eda14cbcSMatt Macy 	if (zap_cursor_retrieve(&cursor, &attr) != 0) {
2448*eda14cbcSMatt Macy 		zap_cursor_fini(&cursor);
2449*eda14cbcSMatt Macy 		return (SET_ERROR(ENOENT));
2450*eda14cbcSMatt Macy 	}
2451*eda14cbcSMatt Macy 
2452*eda14cbcSMatt Macy 	if (strlen(attr.za_name) + 1 > namelen) {
2453*eda14cbcSMatt Macy 		zap_cursor_fini(&cursor);
2454*eda14cbcSMatt Macy 		return (SET_ERROR(ENAMETOOLONG));
2455*eda14cbcSMatt Macy 	}
2456*eda14cbcSMatt Macy 
2457*eda14cbcSMatt Macy 	(void) strlcpy(name, attr.za_name, namelen);
2458*eda14cbcSMatt Macy 	if (idp)
2459*eda14cbcSMatt Macy 		*idp = attr.za_first_integer;
2460*eda14cbcSMatt Macy 	if (case_conflict)
2461*eda14cbcSMatt Macy 		*case_conflict = attr.za_normalization_conflict;
2462*eda14cbcSMatt Macy 	zap_cursor_advance(&cursor);
2463*eda14cbcSMatt Macy 	*offp = zap_cursor_serialize(&cursor);
2464*eda14cbcSMatt Macy 	zap_cursor_fini(&cursor);
2465*eda14cbcSMatt Macy 
2466*eda14cbcSMatt Macy 	return (0);
2467*eda14cbcSMatt Macy }
2468*eda14cbcSMatt Macy 
2469*eda14cbcSMatt Macy int
2470*eda14cbcSMatt Macy dmu_snapshot_lookup(objset_t *os, const char *name, uint64_t *value)
2471*eda14cbcSMatt Macy {
2472*eda14cbcSMatt Macy 	return (dsl_dataset_snap_lookup(os->os_dsl_dataset, name, value));
2473*eda14cbcSMatt Macy }
2474*eda14cbcSMatt Macy 
2475*eda14cbcSMatt Macy int
2476*eda14cbcSMatt Macy dmu_dir_list_next(objset_t *os, int namelen, char *name,
2477*eda14cbcSMatt Macy     uint64_t *idp, uint64_t *offp)
2478*eda14cbcSMatt Macy {
2479*eda14cbcSMatt Macy 	dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
2480*eda14cbcSMatt Macy 	zap_cursor_t cursor;
2481*eda14cbcSMatt Macy 	zap_attribute_t attr;
2482*eda14cbcSMatt Macy 
2483*eda14cbcSMatt Macy 	/* there is no next dir on a snapshot! */
2484*eda14cbcSMatt Macy 	if (os->os_dsl_dataset->ds_object !=
2485*eda14cbcSMatt Macy 	    dsl_dir_phys(dd)->dd_head_dataset_obj)
2486*eda14cbcSMatt Macy 		return (SET_ERROR(ENOENT));
2487*eda14cbcSMatt Macy 
2488*eda14cbcSMatt Macy 	zap_cursor_init_serialized(&cursor,
2489*eda14cbcSMatt Macy 	    dd->dd_pool->dp_meta_objset,
2490*eda14cbcSMatt Macy 	    dsl_dir_phys(dd)->dd_child_dir_zapobj, *offp);
2491*eda14cbcSMatt Macy 
2492*eda14cbcSMatt Macy 	if (zap_cursor_retrieve(&cursor, &attr) != 0) {
2493*eda14cbcSMatt Macy 		zap_cursor_fini(&cursor);
2494*eda14cbcSMatt Macy 		return (SET_ERROR(ENOENT));
2495*eda14cbcSMatt Macy 	}
2496*eda14cbcSMatt Macy 
2497*eda14cbcSMatt Macy 	if (strlen(attr.za_name) + 1 > namelen) {
2498*eda14cbcSMatt Macy 		zap_cursor_fini(&cursor);
2499*eda14cbcSMatt Macy 		return (SET_ERROR(ENAMETOOLONG));
2500*eda14cbcSMatt Macy 	}
2501*eda14cbcSMatt Macy 
2502*eda14cbcSMatt Macy 	(void) strlcpy(name, attr.za_name, namelen);
2503*eda14cbcSMatt Macy 	if (idp)
2504*eda14cbcSMatt Macy 		*idp = attr.za_first_integer;
2505*eda14cbcSMatt Macy 	zap_cursor_advance(&cursor);
2506*eda14cbcSMatt Macy 	*offp = zap_cursor_serialize(&cursor);
2507*eda14cbcSMatt Macy 	zap_cursor_fini(&cursor);
2508*eda14cbcSMatt Macy 
2509*eda14cbcSMatt Macy 	return (0);
2510*eda14cbcSMatt Macy }
2511*eda14cbcSMatt Macy 
2512*eda14cbcSMatt Macy typedef struct dmu_objset_find_ctx {
2513*eda14cbcSMatt Macy 	taskq_t		*dc_tq;
2514*eda14cbcSMatt Macy 	dsl_pool_t	*dc_dp;
2515*eda14cbcSMatt Macy 	uint64_t	dc_ddobj;
2516*eda14cbcSMatt Macy 	char		*dc_ddname; /* last component of ddobj's name */
2517*eda14cbcSMatt Macy 	int		(*dc_func)(dsl_pool_t *, dsl_dataset_t *, void *);
2518*eda14cbcSMatt Macy 	void		*dc_arg;
2519*eda14cbcSMatt Macy 	int		dc_flags;
2520*eda14cbcSMatt Macy 	kmutex_t	*dc_error_lock;
2521*eda14cbcSMatt Macy 	int		*dc_error;
2522*eda14cbcSMatt Macy } dmu_objset_find_ctx_t;
2523*eda14cbcSMatt Macy 
2524*eda14cbcSMatt Macy static void
2525*eda14cbcSMatt Macy dmu_objset_find_dp_impl(dmu_objset_find_ctx_t *dcp)
2526*eda14cbcSMatt Macy {
2527*eda14cbcSMatt Macy 	dsl_pool_t *dp = dcp->dc_dp;
2528*eda14cbcSMatt Macy 	dsl_dir_t *dd;
2529*eda14cbcSMatt Macy 	dsl_dataset_t *ds;
2530*eda14cbcSMatt Macy 	zap_cursor_t zc;
2531*eda14cbcSMatt Macy 	zap_attribute_t *attr;
2532*eda14cbcSMatt Macy 	uint64_t thisobj;
2533*eda14cbcSMatt Macy 	int err = 0;
2534*eda14cbcSMatt Macy 
2535*eda14cbcSMatt Macy 	/* don't process if there already was an error */
2536*eda14cbcSMatt Macy 	if (*dcp->dc_error != 0)
2537*eda14cbcSMatt Macy 		goto out;
2538*eda14cbcSMatt Macy 
2539*eda14cbcSMatt Macy 	/*
2540*eda14cbcSMatt Macy 	 * Note: passing the name (dc_ddname) here is optional, but it
2541*eda14cbcSMatt Macy 	 * improves performance because we don't need to call
2542*eda14cbcSMatt Macy 	 * zap_value_search() to determine the name.
2543*eda14cbcSMatt Macy 	 */
2544*eda14cbcSMatt Macy 	err = dsl_dir_hold_obj(dp, dcp->dc_ddobj, dcp->dc_ddname, FTAG, &dd);
2545*eda14cbcSMatt Macy 	if (err != 0)
2546*eda14cbcSMatt Macy 		goto out;
2547*eda14cbcSMatt Macy 
2548*eda14cbcSMatt Macy 	/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
2549*eda14cbcSMatt Macy 	if (dd->dd_myname[0] == '$') {
2550*eda14cbcSMatt Macy 		dsl_dir_rele(dd, FTAG);
2551*eda14cbcSMatt Macy 		goto out;
2552*eda14cbcSMatt Macy 	}
2553*eda14cbcSMatt Macy 
2554*eda14cbcSMatt Macy 	thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj;
2555*eda14cbcSMatt Macy 	attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
2556*eda14cbcSMatt Macy 
2557*eda14cbcSMatt Macy 	/*
2558*eda14cbcSMatt Macy 	 * Iterate over all children.
2559*eda14cbcSMatt Macy 	 */
2560*eda14cbcSMatt Macy 	if (dcp->dc_flags & DS_FIND_CHILDREN) {
2561*eda14cbcSMatt Macy 		for (zap_cursor_init(&zc, dp->dp_meta_objset,
2562*eda14cbcSMatt Macy 		    dsl_dir_phys(dd)->dd_child_dir_zapobj);
2563*eda14cbcSMatt Macy 		    zap_cursor_retrieve(&zc, attr) == 0;
2564*eda14cbcSMatt Macy 		    (void) zap_cursor_advance(&zc)) {
2565*eda14cbcSMatt Macy 			ASSERT3U(attr->za_integer_length, ==,
2566*eda14cbcSMatt Macy 			    sizeof (uint64_t));
2567*eda14cbcSMatt Macy 			ASSERT3U(attr->za_num_integers, ==, 1);
2568*eda14cbcSMatt Macy 
2569*eda14cbcSMatt Macy 			dmu_objset_find_ctx_t *child_dcp =
2570*eda14cbcSMatt Macy 			    kmem_alloc(sizeof (*child_dcp), KM_SLEEP);
2571*eda14cbcSMatt Macy 			*child_dcp = *dcp;
2572*eda14cbcSMatt Macy 			child_dcp->dc_ddobj = attr->za_first_integer;
2573*eda14cbcSMatt Macy 			child_dcp->dc_ddname = spa_strdup(attr->za_name);
2574*eda14cbcSMatt Macy 			if (dcp->dc_tq != NULL)
2575*eda14cbcSMatt Macy 				(void) taskq_dispatch(dcp->dc_tq,
2576*eda14cbcSMatt Macy 				    dmu_objset_find_dp_cb, child_dcp, TQ_SLEEP);
2577*eda14cbcSMatt Macy 			else
2578*eda14cbcSMatt Macy 				dmu_objset_find_dp_impl(child_dcp);
2579*eda14cbcSMatt Macy 		}
2580*eda14cbcSMatt Macy 		zap_cursor_fini(&zc);
2581*eda14cbcSMatt Macy 	}
2582*eda14cbcSMatt Macy 
2583*eda14cbcSMatt Macy 	/*
2584*eda14cbcSMatt Macy 	 * Iterate over all snapshots.
2585*eda14cbcSMatt Macy 	 */
2586*eda14cbcSMatt Macy 	if (dcp->dc_flags & DS_FIND_SNAPSHOTS) {
2587*eda14cbcSMatt Macy 		dsl_dataset_t *ds;
2588*eda14cbcSMatt Macy 		err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
2589*eda14cbcSMatt Macy 
2590*eda14cbcSMatt Macy 		if (err == 0) {
2591*eda14cbcSMatt Macy 			uint64_t snapobj;
2592*eda14cbcSMatt Macy 
2593*eda14cbcSMatt Macy 			snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj;
2594*eda14cbcSMatt Macy 			dsl_dataset_rele(ds, FTAG);
2595*eda14cbcSMatt Macy 
2596*eda14cbcSMatt Macy 			for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
2597*eda14cbcSMatt Macy 			    zap_cursor_retrieve(&zc, attr) == 0;
2598*eda14cbcSMatt Macy 			    (void) zap_cursor_advance(&zc)) {
2599*eda14cbcSMatt Macy 				ASSERT3U(attr->za_integer_length, ==,
2600*eda14cbcSMatt Macy 				    sizeof (uint64_t));
2601*eda14cbcSMatt Macy 				ASSERT3U(attr->za_num_integers, ==, 1);
2602*eda14cbcSMatt Macy 
2603*eda14cbcSMatt Macy 				err = dsl_dataset_hold_obj(dp,
2604*eda14cbcSMatt Macy 				    attr->za_first_integer, FTAG, &ds);
2605*eda14cbcSMatt Macy 				if (err != 0)
2606*eda14cbcSMatt Macy 					break;
2607*eda14cbcSMatt Macy 				err = dcp->dc_func(dp, ds, dcp->dc_arg);
2608*eda14cbcSMatt Macy 				dsl_dataset_rele(ds, FTAG);
2609*eda14cbcSMatt Macy 				if (err != 0)
2610*eda14cbcSMatt Macy 					break;
2611*eda14cbcSMatt Macy 			}
2612*eda14cbcSMatt Macy 			zap_cursor_fini(&zc);
2613*eda14cbcSMatt Macy 		}
2614*eda14cbcSMatt Macy 	}
2615*eda14cbcSMatt Macy 
2616*eda14cbcSMatt Macy 	kmem_free(attr, sizeof (zap_attribute_t));
2617*eda14cbcSMatt Macy 
2618*eda14cbcSMatt Macy 	if (err != 0) {
2619*eda14cbcSMatt Macy 		dsl_dir_rele(dd, FTAG);
2620*eda14cbcSMatt Macy 		goto out;
2621*eda14cbcSMatt Macy 	}
2622*eda14cbcSMatt Macy 
2623*eda14cbcSMatt Macy 	/*
2624*eda14cbcSMatt Macy 	 * Apply to self.
2625*eda14cbcSMatt Macy 	 */
2626*eda14cbcSMatt Macy 	err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
2627*eda14cbcSMatt Macy 
2628*eda14cbcSMatt Macy 	/*
2629*eda14cbcSMatt Macy 	 * Note: we hold the dir while calling dsl_dataset_hold_obj() so
2630*eda14cbcSMatt Macy 	 * that the dir will remain cached, and we won't have to re-instantiate
2631*eda14cbcSMatt Macy 	 * it (which could be expensive due to finding its name via
2632*eda14cbcSMatt Macy 	 * zap_value_search()).
2633*eda14cbcSMatt Macy 	 */
2634*eda14cbcSMatt Macy 	dsl_dir_rele(dd, FTAG);
2635*eda14cbcSMatt Macy 	if (err != 0)
2636*eda14cbcSMatt Macy 		goto out;
2637*eda14cbcSMatt Macy 	err = dcp->dc_func(dp, ds, dcp->dc_arg);
2638*eda14cbcSMatt Macy 	dsl_dataset_rele(ds, FTAG);
2639*eda14cbcSMatt Macy 
2640*eda14cbcSMatt Macy out:
2641*eda14cbcSMatt Macy 	if (err != 0) {
2642*eda14cbcSMatt Macy 		mutex_enter(dcp->dc_error_lock);
2643*eda14cbcSMatt Macy 		/* only keep first error */
2644*eda14cbcSMatt Macy 		if (*dcp->dc_error == 0)
2645*eda14cbcSMatt Macy 			*dcp->dc_error = err;
2646*eda14cbcSMatt Macy 		mutex_exit(dcp->dc_error_lock);
2647*eda14cbcSMatt Macy 	}
2648*eda14cbcSMatt Macy 
2649*eda14cbcSMatt Macy 	if (dcp->dc_ddname != NULL)
2650*eda14cbcSMatt Macy 		spa_strfree(dcp->dc_ddname);
2651*eda14cbcSMatt Macy 	kmem_free(dcp, sizeof (*dcp));
2652*eda14cbcSMatt Macy }
2653*eda14cbcSMatt Macy 
2654*eda14cbcSMatt Macy static void
2655*eda14cbcSMatt Macy dmu_objset_find_dp_cb(void *arg)
2656*eda14cbcSMatt Macy {
2657*eda14cbcSMatt Macy 	dmu_objset_find_ctx_t *dcp = arg;
2658*eda14cbcSMatt Macy 	dsl_pool_t *dp = dcp->dc_dp;
2659*eda14cbcSMatt Macy 
2660*eda14cbcSMatt Macy 	/*
2661*eda14cbcSMatt Macy 	 * We need to get a pool_config_lock here, as there are several
2662*eda14cbcSMatt Macy 	 * assert(pool_config_held) down the stack. Getting a lock via
2663*eda14cbcSMatt Macy 	 * dsl_pool_config_enter is risky, as it might be stalled by a
2664*eda14cbcSMatt Macy 	 * pending writer. This would deadlock, as the write lock can
2665*eda14cbcSMatt Macy 	 * only be granted when our parent thread gives up the lock.
2666*eda14cbcSMatt Macy 	 * The _prio interface gives us priority over a pending writer.
2667*eda14cbcSMatt Macy 	 */
2668*eda14cbcSMatt Macy 	dsl_pool_config_enter_prio(dp, FTAG);
2669*eda14cbcSMatt Macy 
2670*eda14cbcSMatt Macy 	dmu_objset_find_dp_impl(dcp);
2671*eda14cbcSMatt Macy 
2672*eda14cbcSMatt Macy 	dsl_pool_config_exit(dp, FTAG);
2673*eda14cbcSMatt Macy }
2674*eda14cbcSMatt Macy 
2675*eda14cbcSMatt Macy /*
2676*eda14cbcSMatt Macy  * Find objsets under and including ddobj, call func(ds) on each.
2677*eda14cbcSMatt Macy  * The order for the enumeration is completely undefined.
2678*eda14cbcSMatt Macy  * func is called with dsl_pool_config held.
2679*eda14cbcSMatt Macy  */
2680*eda14cbcSMatt Macy int
2681*eda14cbcSMatt Macy dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
2682*eda14cbcSMatt Macy     int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags)
2683*eda14cbcSMatt Macy {
2684*eda14cbcSMatt Macy 	int error = 0;
2685*eda14cbcSMatt Macy 	taskq_t *tq = NULL;
2686*eda14cbcSMatt Macy 	int ntasks;
2687*eda14cbcSMatt Macy 	dmu_objset_find_ctx_t *dcp;
2688*eda14cbcSMatt Macy 	kmutex_t err_lock;
2689*eda14cbcSMatt Macy 
2690*eda14cbcSMatt Macy 	mutex_init(&err_lock, NULL, MUTEX_DEFAULT, NULL);
2691*eda14cbcSMatt Macy 	dcp = kmem_alloc(sizeof (*dcp), KM_SLEEP);
2692*eda14cbcSMatt Macy 	dcp->dc_tq = NULL;
2693*eda14cbcSMatt Macy 	dcp->dc_dp = dp;
2694*eda14cbcSMatt Macy 	dcp->dc_ddobj = ddobj;
2695*eda14cbcSMatt Macy 	dcp->dc_ddname = NULL;
2696*eda14cbcSMatt Macy 	dcp->dc_func = func;
2697*eda14cbcSMatt Macy 	dcp->dc_arg = arg;
2698*eda14cbcSMatt Macy 	dcp->dc_flags = flags;
2699*eda14cbcSMatt Macy 	dcp->dc_error_lock = &err_lock;
2700*eda14cbcSMatt Macy 	dcp->dc_error = &error;
2701*eda14cbcSMatt Macy 
2702*eda14cbcSMatt Macy 	if ((flags & DS_FIND_SERIALIZE) || dsl_pool_config_held_writer(dp)) {
2703*eda14cbcSMatt Macy 		/*
2704*eda14cbcSMatt Macy 		 * In case a write lock is held we can't make use of
2705*eda14cbcSMatt Macy 		 * parallelism, as down the stack of the worker threads
2706*eda14cbcSMatt Macy 		 * the lock is asserted via dsl_pool_config_held.
2707*eda14cbcSMatt Macy 		 * In case of a read lock this is solved by getting a read
2708*eda14cbcSMatt Macy 		 * lock in each worker thread, which isn't possible in case
2709*eda14cbcSMatt Macy 		 * of a writer lock. So we fall back to the synchronous path
2710*eda14cbcSMatt Macy 		 * here.
2711*eda14cbcSMatt Macy 		 * In the future it might be possible to get some magic into
2712*eda14cbcSMatt Macy 		 * dsl_pool_config_held in a way that it returns true for
2713*eda14cbcSMatt Macy 		 * the worker threads so that a single lock held from this
2714*eda14cbcSMatt Macy 		 * thread suffices. For now, stay single threaded.
2715*eda14cbcSMatt Macy 		 */
2716*eda14cbcSMatt Macy 		dmu_objset_find_dp_impl(dcp);
2717*eda14cbcSMatt Macy 		mutex_destroy(&err_lock);
2718*eda14cbcSMatt Macy 
2719*eda14cbcSMatt Macy 		return (error);
2720*eda14cbcSMatt Macy 	}
2721*eda14cbcSMatt Macy 
2722*eda14cbcSMatt Macy 	ntasks = dmu_find_threads;
2723*eda14cbcSMatt Macy 	if (ntasks == 0)
2724*eda14cbcSMatt Macy 		ntasks = vdev_count_leaves(dp->dp_spa) * 4;
2725*eda14cbcSMatt Macy 	tq = taskq_create("dmu_objset_find", ntasks, maxclsyspri, ntasks,
2726*eda14cbcSMatt Macy 	    INT_MAX, 0);
2727*eda14cbcSMatt Macy 	if (tq == NULL) {
2728*eda14cbcSMatt Macy 		kmem_free(dcp, sizeof (*dcp));
2729*eda14cbcSMatt Macy 		mutex_destroy(&err_lock);
2730*eda14cbcSMatt Macy 
2731*eda14cbcSMatt Macy 		return (SET_ERROR(ENOMEM));
2732*eda14cbcSMatt Macy 	}
2733*eda14cbcSMatt Macy 	dcp->dc_tq = tq;
2734*eda14cbcSMatt Macy 
2735*eda14cbcSMatt Macy 	/* dcp will be freed by task */
2736*eda14cbcSMatt Macy 	(void) taskq_dispatch(tq, dmu_objset_find_dp_cb, dcp, TQ_SLEEP);
2737*eda14cbcSMatt Macy 
2738*eda14cbcSMatt Macy 	/*
2739*eda14cbcSMatt Macy 	 * PORTING: this code relies on the property of taskq_wait to wait
2740*eda14cbcSMatt Macy 	 * until no more tasks are queued and no more tasks are active. As
2741*eda14cbcSMatt Macy 	 * we always queue new tasks from within other tasks, task_wait
2742*eda14cbcSMatt Macy 	 * reliably waits for the full recursion to finish, even though we
2743*eda14cbcSMatt Macy 	 * enqueue new tasks after taskq_wait has been called.
2744*eda14cbcSMatt Macy 	 * On platforms other than illumos, taskq_wait may not have this
2745*eda14cbcSMatt Macy 	 * property.
2746*eda14cbcSMatt Macy 	 */
2747*eda14cbcSMatt Macy 	taskq_wait(tq);
2748*eda14cbcSMatt Macy 	taskq_destroy(tq);
2749*eda14cbcSMatt Macy 	mutex_destroy(&err_lock);
2750*eda14cbcSMatt Macy 
2751*eda14cbcSMatt Macy 	return (error);
2752*eda14cbcSMatt Macy }
2753*eda14cbcSMatt Macy 
2754*eda14cbcSMatt Macy /*
2755*eda14cbcSMatt Macy  * Find all objsets under name, and for each, call 'func(child_name, arg)'.
2756*eda14cbcSMatt Macy  * The dp_config_rwlock must not be held when this is called, and it
2757*eda14cbcSMatt Macy  * will not be held when the callback is called.
2758*eda14cbcSMatt Macy  * Therefore this function should only be used when the pool is not changing
2759*eda14cbcSMatt Macy  * (e.g. in syncing context), or the callback can deal with the possible races.
2760*eda14cbcSMatt Macy  */
2761*eda14cbcSMatt Macy static int
2762*eda14cbcSMatt Macy dmu_objset_find_impl(spa_t *spa, const char *name,
2763*eda14cbcSMatt Macy     int func(const char *, void *), void *arg, int flags)
2764*eda14cbcSMatt Macy {
2765*eda14cbcSMatt Macy 	dsl_dir_t *dd;
2766*eda14cbcSMatt Macy 	dsl_pool_t *dp = spa_get_dsl(spa);
2767*eda14cbcSMatt Macy 	dsl_dataset_t *ds;
2768*eda14cbcSMatt Macy 	zap_cursor_t zc;
2769*eda14cbcSMatt Macy 	zap_attribute_t *attr;
2770*eda14cbcSMatt Macy 	char *child;
2771*eda14cbcSMatt Macy 	uint64_t thisobj;
2772*eda14cbcSMatt Macy 	int err;
2773*eda14cbcSMatt Macy 
2774*eda14cbcSMatt Macy 	dsl_pool_config_enter(dp, FTAG);
2775*eda14cbcSMatt Macy 
2776*eda14cbcSMatt Macy 	err = dsl_dir_hold(dp, name, FTAG, &dd, NULL);
2777*eda14cbcSMatt Macy 	if (err != 0) {
2778*eda14cbcSMatt Macy 		dsl_pool_config_exit(dp, FTAG);
2779*eda14cbcSMatt Macy 		return (err);
2780*eda14cbcSMatt Macy 	}
2781*eda14cbcSMatt Macy 
2782*eda14cbcSMatt Macy 	/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
2783*eda14cbcSMatt Macy 	if (dd->dd_myname[0] == '$') {
2784*eda14cbcSMatt Macy 		dsl_dir_rele(dd, FTAG);
2785*eda14cbcSMatt Macy 		dsl_pool_config_exit(dp, FTAG);
2786*eda14cbcSMatt Macy 		return (0);
2787*eda14cbcSMatt Macy 	}
2788*eda14cbcSMatt Macy 
2789*eda14cbcSMatt Macy 	thisobj = dsl_dir_phys(dd)->dd_head_dataset_obj;
2790*eda14cbcSMatt Macy 	attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
2791*eda14cbcSMatt Macy 
2792*eda14cbcSMatt Macy 	/*
2793*eda14cbcSMatt Macy 	 * Iterate over all children.
2794*eda14cbcSMatt Macy 	 */
2795*eda14cbcSMatt Macy 	if (flags & DS_FIND_CHILDREN) {
2796*eda14cbcSMatt Macy 		for (zap_cursor_init(&zc, dp->dp_meta_objset,
2797*eda14cbcSMatt Macy 		    dsl_dir_phys(dd)->dd_child_dir_zapobj);
2798*eda14cbcSMatt Macy 		    zap_cursor_retrieve(&zc, attr) == 0;
2799*eda14cbcSMatt Macy 		    (void) zap_cursor_advance(&zc)) {
2800*eda14cbcSMatt Macy 			ASSERT3U(attr->za_integer_length, ==,
2801*eda14cbcSMatt Macy 			    sizeof (uint64_t));
2802*eda14cbcSMatt Macy 			ASSERT3U(attr->za_num_integers, ==, 1);
2803*eda14cbcSMatt Macy 
2804*eda14cbcSMatt Macy 			child = kmem_asprintf("%s/%s", name, attr->za_name);
2805*eda14cbcSMatt Macy 			dsl_pool_config_exit(dp, FTAG);
2806*eda14cbcSMatt Macy 			err = dmu_objset_find_impl(spa, child,
2807*eda14cbcSMatt Macy 			    func, arg, flags);
2808*eda14cbcSMatt Macy 			dsl_pool_config_enter(dp, FTAG);
2809*eda14cbcSMatt Macy 			kmem_strfree(child);
2810*eda14cbcSMatt Macy 			if (err != 0)
2811*eda14cbcSMatt Macy 				break;
2812*eda14cbcSMatt Macy 		}
2813*eda14cbcSMatt Macy 		zap_cursor_fini(&zc);
2814*eda14cbcSMatt Macy 
2815*eda14cbcSMatt Macy 		if (err != 0) {
2816*eda14cbcSMatt Macy 			dsl_dir_rele(dd, FTAG);
2817*eda14cbcSMatt Macy 			dsl_pool_config_exit(dp, FTAG);
2818*eda14cbcSMatt Macy 			kmem_free(attr, sizeof (zap_attribute_t));
2819*eda14cbcSMatt Macy 			return (err);
2820*eda14cbcSMatt Macy 		}
2821*eda14cbcSMatt Macy 	}
2822*eda14cbcSMatt Macy 
2823*eda14cbcSMatt Macy 	/*
2824*eda14cbcSMatt Macy 	 * Iterate over all snapshots.
2825*eda14cbcSMatt Macy 	 */
2826*eda14cbcSMatt Macy 	if (flags & DS_FIND_SNAPSHOTS) {
2827*eda14cbcSMatt Macy 		err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
2828*eda14cbcSMatt Macy 
2829*eda14cbcSMatt Macy 		if (err == 0) {
2830*eda14cbcSMatt Macy 			uint64_t snapobj;
2831*eda14cbcSMatt Macy 
2832*eda14cbcSMatt Macy 			snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj;
2833*eda14cbcSMatt Macy 			dsl_dataset_rele(ds, FTAG);
2834*eda14cbcSMatt Macy 
2835*eda14cbcSMatt Macy 			for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
2836*eda14cbcSMatt Macy 			    zap_cursor_retrieve(&zc, attr) == 0;
2837*eda14cbcSMatt Macy 			    (void) zap_cursor_advance(&zc)) {
2838*eda14cbcSMatt Macy 				ASSERT3U(attr->za_integer_length, ==,
2839*eda14cbcSMatt Macy 				    sizeof (uint64_t));
2840*eda14cbcSMatt Macy 				ASSERT3U(attr->za_num_integers, ==, 1);
2841*eda14cbcSMatt Macy 
2842*eda14cbcSMatt Macy 				child = kmem_asprintf("%s@%s",
2843*eda14cbcSMatt Macy 				    name, attr->za_name);
2844*eda14cbcSMatt Macy 				dsl_pool_config_exit(dp, FTAG);
2845*eda14cbcSMatt Macy 				err = func(child, arg);
2846*eda14cbcSMatt Macy 				dsl_pool_config_enter(dp, FTAG);
2847*eda14cbcSMatt Macy 				kmem_strfree(child);
2848*eda14cbcSMatt Macy 				if (err != 0)
2849*eda14cbcSMatt Macy 					break;
2850*eda14cbcSMatt Macy 			}
2851*eda14cbcSMatt Macy 			zap_cursor_fini(&zc);
2852*eda14cbcSMatt Macy 		}
2853*eda14cbcSMatt Macy 	}
2854*eda14cbcSMatt Macy 
2855*eda14cbcSMatt Macy 	dsl_dir_rele(dd, FTAG);
2856*eda14cbcSMatt Macy 	kmem_free(attr, sizeof (zap_attribute_t));
2857*eda14cbcSMatt Macy 	dsl_pool_config_exit(dp, FTAG);
2858*eda14cbcSMatt Macy 
2859*eda14cbcSMatt Macy 	if (err != 0)
2860*eda14cbcSMatt Macy 		return (err);
2861*eda14cbcSMatt Macy 
2862*eda14cbcSMatt Macy 	/* Apply to self. */
2863*eda14cbcSMatt Macy 	return (func(name, arg));
2864*eda14cbcSMatt Macy }
2865*eda14cbcSMatt Macy 
2866*eda14cbcSMatt Macy /*
2867*eda14cbcSMatt Macy  * See comment above dmu_objset_find_impl().
2868*eda14cbcSMatt Macy  */
2869*eda14cbcSMatt Macy int
2870*eda14cbcSMatt Macy dmu_objset_find(const char *name, int func(const char *, void *), void *arg,
2871*eda14cbcSMatt Macy     int flags)
2872*eda14cbcSMatt Macy {
2873*eda14cbcSMatt Macy 	spa_t *spa;
2874*eda14cbcSMatt Macy 	int error;
2875*eda14cbcSMatt Macy 
2876*eda14cbcSMatt Macy 	error = spa_open(name, &spa, FTAG);
2877*eda14cbcSMatt Macy 	if (error != 0)
2878*eda14cbcSMatt Macy 		return (error);
2879*eda14cbcSMatt Macy 	error = dmu_objset_find_impl(spa, name, func, arg, flags);
2880*eda14cbcSMatt Macy 	spa_close(spa, FTAG);
2881*eda14cbcSMatt Macy 	return (error);
2882*eda14cbcSMatt Macy }
2883*eda14cbcSMatt Macy 
2884*eda14cbcSMatt Macy boolean_t
2885*eda14cbcSMatt Macy dmu_objset_incompatible_encryption_version(objset_t *os)
2886*eda14cbcSMatt Macy {
2887*eda14cbcSMatt Macy 	return (dsl_dir_incompatible_encryption_version(
2888*eda14cbcSMatt Macy 	    os->os_dsl_dataset->ds_dir));
2889*eda14cbcSMatt Macy }
2890*eda14cbcSMatt Macy 
2891*eda14cbcSMatt Macy void
2892*eda14cbcSMatt Macy dmu_objset_set_user(objset_t *os, void *user_ptr)
2893*eda14cbcSMatt Macy {
2894*eda14cbcSMatt Macy 	ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
2895*eda14cbcSMatt Macy 	os->os_user_ptr = user_ptr;
2896*eda14cbcSMatt Macy }
2897*eda14cbcSMatt Macy 
2898*eda14cbcSMatt Macy void *
2899*eda14cbcSMatt Macy dmu_objset_get_user(objset_t *os)
2900*eda14cbcSMatt Macy {
2901*eda14cbcSMatt Macy 	ASSERT(MUTEX_HELD(&os->os_user_ptr_lock));
2902*eda14cbcSMatt Macy 	return (os->os_user_ptr);
2903*eda14cbcSMatt Macy }
2904*eda14cbcSMatt Macy 
2905*eda14cbcSMatt Macy /*
2906*eda14cbcSMatt Macy  * Determine name of filesystem, given name of snapshot.
2907*eda14cbcSMatt Macy  * buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes
2908*eda14cbcSMatt Macy  */
2909*eda14cbcSMatt Macy int
2910*eda14cbcSMatt Macy dmu_fsname(const char *snapname, char *buf)
2911*eda14cbcSMatt Macy {
2912*eda14cbcSMatt Macy 	char *atp = strchr(snapname, '@');
2913*eda14cbcSMatt Macy 	if (atp == NULL)
2914*eda14cbcSMatt Macy 		return (SET_ERROR(EINVAL));
2915*eda14cbcSMatt Macy 	if (atp - snapname >= ZFS_MAX_DATASET_NAME_LEN)
2916*eda14cbcSMatt Macy 		return (SET_ERROR(ENAMETOOLONG));
2917*eda14cbcSMatt Macy 	(void) strlcpy(buf, snapname, atp - snapname + 1);
2918*eda14cbcSMatt Macy 	return (0);
2919*eda14cbcSMatt Macy }
2920*eda14cbcSMatt Macy 
2921*eda14cbcSMatt Macy /*
2922*eda14cbcSMatt Macy  * Call when we think we're going to write/free space in open context
2923*eda14cbcSMatt Macy  * to track the amount of dirty data in the open txg, which is also the
2924*eda14cbcSMatt Macy  * amount of memory that can not be evicted until this txg syncs.
2925*eda14cbcSMatt Macy  *
2926*eda14cbcSMatt Macy  * Note that there are two conditions where this can be called from
2927*eda14cbcSMatt Macy  * syncing context:
2928*eda14cbcSMatt Macy  *
2929*eda14cbcSMatt Macy  * [1] When we just created the dataset, in which case we go on with
2930*eda14cbcSMatt Macy  *     updating any accounting of dirty data as usual.
2931*eda14cbcSMatt Macy  * [2] When we are dirtying MOS data, in which case we only update the
2932*eda14cbcSMatt Macy  *     pool's accounting of dirty data.
2933*eda14cbcSMatt Macy  */
2934*eda14cbcSMatt Macy void
2935*eda14cbcSMatt Macy dmu_objset_willuse_space(objset_t *os, int64_t space, dmu_tx_t *tx)
2936*eda14cbcSMatt Macy {
2937*eda14cbcSMatt Macy 	dsl_dataset_t *ds = os->os_dsl_dataset;
2938*eda14cbcSMatt Macy 	int64_t aspace = spa_get_worst_case_asize(os->os_spa, space);
2939*eda14cbcSMatt Macy 
2940*eda14cbcSMatt Macy 	if (ds != NULL) {
2941*eda14cbcSMatt Macy 		dsl_dir_willuse_space(ds->ds_dir, aspace, tx);
2942*eda14cbcSMatt Macy 	}
2943*eda14cbcSMatt Macy 
2944*eda14cbcSMatt Macy 	dsl_pool_dirty_space(dmu_tx_pool(tx), space, tx);
2945*eda14cbcSMatt Macy }
2946*eda14cbcSMatt Macy 
2947*eda14cbcSMatt Macy #if defined(_KERNEL)
2948*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_zil);
2949*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_pool);
2950*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_ds);
2951*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_type);
2952*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_name);
2953*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_hold);
2954*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_hold_flags);
2955*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_own);
2956*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_rele);
2957*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_rele_flags);
2958*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_disown);
2959*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_from_ds);
2960*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_create);
2961*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_clone);
2962*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_stats);
2963*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_fast_stat);
2964*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_spa);
2965*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_space);
2966*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_fsid_guid);
2967*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_find);
2968*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_byteswap);
2969*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_evict_dbufs);
2970*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_snap_cmtime);
2971*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_dnodesize);
2972*eda14cbcSMatt Macy 
2973*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_sync);
2974*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_is_dirty);
2975*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_create_impl_dnstats);
2976*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_create_impl);
2977*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_open_impl);
2978*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_evict);
2979*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_register_type);
2980*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_do_userquota_updates);
2981*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_userquota_get_ids);
2982*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_userused_enabled);
2983*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_userspace_upgrade);
2984*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_userspace_present);
2985*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_userobjused_enabled);
2986*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_userobjspace_upgradable);
2987*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_userobjspace_present);
2988*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_projectquota_enabled);
2989*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_projectquota_present);
2990*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_projectquota_upgradable);
2991*eda14cbcSMatt Macy EXPORT_SYMBOL(dmu_objset_id_quota_upgrade);
2992*eda14cbcSMatt Macy #endif
2993