xref: /freebsd/usr.sbin/makefs/zfs.c (revision 240afd8c1fcc8c5f29dbd4ff0c915795d414405d)
1*240afd8cSMark Johnston /*-
2*240afd8cSMark Johnston  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3*240afd8cSMark Johnston  *
4*240afd8cSMark Johnston  * Copyright (c) 2022 The FreeBSD Foundation
5*240afd8cSMark Johnston  *
6*240afd8cSMark Johnston  * This software was developed by Mark Johnston under sponsorship from
7*240afd8cSMark Johnston  * the FreeBSD Foundation.
8*240afd8cSMark Johnston  *
9*240afd8cSMark Johnston  * Redistribution and use in source and binary forms, with or without
10*240afd8cSMark Johnston  * modification, are permitted provided that the following conditions are
11*240afd8cSMark Johnston  * met:
12*240afd8cSMark Johnston  * 1. Redistributions of source code must retain the above copyright
13*240afd8cSMark Johnston  *    notice, this list of conditions and the following disclaimer.
14*240afd8cSMark Johnston  * 2. Redistributions in binary form must reproduce the above copyright
15*240afd8cSMark Johnston  *    notice, this list of conditions and the following disclaimer in
16*240afd8cSMark Johnston  *    the documentation and/or other materials provided with the distribution.
17*240afd8cSMark Johnston  *
18*240afd8cSMark Johnston  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19*240afd8cSMark Johnston  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20*240afd8cSMark Johnston  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21*240afd8cSMark Johnston  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22*240afd8cSMark Johnston  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23*240afd8cSMark Johnston  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24*240afd8cSMark Johnston  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25*240afd8cSMark Johnston  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26*240afd8cSMark Johnston  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27*240afd8cSMark Johnston  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28*240afd8cSMark Johnston  * SUCH DAMAGE.
29*240afd8cSMark Johnston  */
30*240afd8cSMark Johnston 
31*240afd8cSMark Johnston #include <sys/param.h>
32*240afd8cSMark Johnston #include <sys/errno.h>
33*240afd8cSMark Johnston #include <sys/queue.h>
34*240afd8cSMark Johnston 
35*240afd8cSMark Johnston #include <assert.h>
36*240afd8cSMark Johnston #include <fcntl.h>
37*240afd8cSMark Johnston #include <stdbool.h>
38*240afd8cSMark Johnston #include <stddef.h>
39*240afd8cSMark Johnston #include <stdlib.h>
40*240afd8cSMark Johnston #include <string.h>
41*240afd8cSMark Johnston #include <unistd.h>
42*240afd8cSMark Johnston 
43*240afd8cSMark Johnston #include <util.h>
44*240afd8cSMark Johnston 
45*240afd8cSMark Johnston #include "makefs.h"
46*240afd8cSMark Johnston #include "zfs.h"
47*240afd8cSMark Johnston 
48*240afd8cSMark Johnston #define	VDEV_LABEL_SPACE	\
49*240afd8cSMark Johnston 	((off_t)(VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE))
50*240afd8cSMark Johnston _Static_assert(VDEV_LABEL_SPACE <= MINDEVSIZE, "");
51*240afd8cSMark Johnston 
52*240afd8cSMark Johnston #define	MINMSSIZE		((off_t)1 << 24) /* 16MB */
53*240afd8cSMark Johnston #define	DFLTMSSIZE		((off_t)1 << 29) /* 512MB */
54*240afd8cSMark Johnston #define	MAXMSSIZE		((off_t)1 << 34) /* 16GB */
55*240afd8cSMark Johnston 
56*240afd8cSMark Johnston #define	INDIR_LEVELS		6
57*240afd8cSMark Johnston /* Indirect blocks are always 128KB. */
58*240afd8cSMark Johnston #define	BLKPTR_PER_INDIR	(MAXBLOCKSIZE / sizeof(blkptr_t))
59*240afd8cSMark Johnston 
60*240afd8cSMark Johnston struct dnode_cursor {
61*240afd8cSMark Johnston 	char		inddir[INDIR_LEVELS][MAXBLOCKSIZE];
62*240afd8cSMark Johnston 	off_t		indloc;
63*240afd8cSMark Johnston 	off_t		indspace;
64*240afd8cSMark Johnston 	dnode_phys_t	*dnode;
65*240afd8cSMark Johnston 	off_t		dataoff;
66*240afd8cSMark Johnston 	off_t		datablksz;
67*240afd8cSMark Johnston };
68*240afd8cSMark Johnston 
69*240afd8cSMark Johnston void
70*240afd8cSMark Johnston zfs_prep_opts(fsinfo_t *fsopts)
71*240afd8cSMark Johnston {
72*240afd8cSMark Johnston 	zfs_opt_t *zfs = ecalloc(1, sizeof(*zfs));
73*240afd8cSMark Johnston 
74*240afd8cSMark Johnston 	const option_t zfs_options[] = {
75*240afd8cSMark Johnston 		{ '\0', "bootfs", &zfs->bootfs, OPT_STRPTR,
76*240afd8cSMark Johnston 		  0, 0, "Bootable dataset" },
77*240afd8cSMark Johnston 		{ '\0', "mssize", &zfs->mssize, OPT_INT64,
78*240afd8cSMark Johnston 		  MINMSSIZE, MAXMSSIZE, "Metaslab size" },
79*240afd8cSMark Johnston 		{ '\0', "poolname", &zfs->poolname, OPT_STRPTR,
80*240afd8cSMark Johnston 		  0, 0, "ZFS pool name" },
81*240afd8cSMark Johnston 		{ '\0', "rootpath", &zfs->rootpath, OPT_STRPTR,
82*240afd8cSMark Johnston 		  0, 0, "Prefix for all dataset mount points" },
83*240afd8cSMark Johnston 		{ '\0', "ashift", &zfs->ashift, OPT_INT32,
84*240afd8cSMark Johnston 		  MINBLOCKSHIFT, MAXBLOCKSHIFT, "ZFS pool ashift" },
85*240afd8cSMark Johnston 		{ '\0', "nowarn", &zfs->nowarn, OPT_BOOL,
86*240afd8cSMark Johnston 		  0, 0, "Suppress warning about experimental ZFS support" },
87*240afd8cSMark Johnston 		{ .name = NULL }
88*240afd8cSMark Johnston 	};
89*240afd8cSMark Johnston 
90*240afd8cSMark Johnston 	STAILQ_INIT(&zfs->datasetdescs);
91*240afd8cSMark Johnston 
92*240afd8cSMark Johnston 	fsopts->fs_specific = zfs;
93*240afd8cSMark Johnston 	fsopts->fs_options = copy_opts(zfs_options);
94*240afd8cSMark Johnston }
95*240afd8cSMark Johnston 
96*240afd8cSMark Johnston int
97*240afd8cSMark Johnston zfs_parse_opts(const char *option, fsinfo_t *fsopts)
98*240afd8cSMark Johnston {
99*240afd8cSMark Johnston 	zfs_opt_t *zfs;
100*240afd8cSMark Johnston 	struct dataset_desc *dsdesc;
101*240afd8cSMark Johnston 	char buf[BUFSIZ], *opt, *val;
102*240afd8cSMark Johnston 	int rv;
103*240afd8cSMark Johnston 
104*240afd8cSMark Johnston 	zfs = fsopts->fs_specific;
105*240afd8cSMark Johnston 
106*240afd8cSMark Johnston 	opt = val = estrdup(option);
107*240afd8cSMark Johnston 	opt = strsep(&val, "=");
108*240afd8cSMark Johnston 	if (strcmp(opt, "fs") == 0) {
109*240afd8cSMark Johnston 		if (val == NULL)
110*240afd8cSMark Johnston 			errx(1, "invalid filesystem parameters `%s'", option);
111*240afd8cSMark Johnston 
112*240afd8cSMark Johnston 		/*
113*240afd8cSMark Johnston 		 * Dataset descriptions will be parsed later, in dsl_init().
114*240afd8cSMark Johnston 		 * Just stash them away for now.
115*240afd8cSMark Johnston 		 */
116*240afd8cSMark Johnston 		dsdesc = ecalloc(1, sizeof(*dsdesc));
117*240afd8cSMark Johnston 		dsdesc->params = estrdup(val);
118*240afd8cSMark Johnston 		free(opt);
119*240afd8cSMark Johnston 		STAILQ_INSERT_TAIL(&zfs->datasetdescs, dsdesc, next);
120*240afd8cSMark Johnston 		return (1);
121*240afd8cSMark Johnston 	}
122*240afd8cSMark Johnston 	free(opt);
123*240afd8cSMark Johnston 
124*240afd8cSMark Johnston 	rv = set_option(fsopts->fs_options, option, buf, sizeof(buf));
125*240afd8cSMark Johnston 	return (rv == -1 ? 0 : 1);
126*240afd8cSMark Johnston }
127*240afd8cSMark Johnston 
128*240afd8cSMark Johnston static void
129*240afd8cSMark Johnston zfs_size_vdev(fsinfo_t *fsopts)
130*240afd8cSMark Johnston {
131*240afd8cSMark Johnston 	zfs_opt_t *zfs;
132*240afd8cSMark Johnston 	off_t asize, mssize, vdevsize, vdevsize1;
133*240afd8cSMark Johnston 
134*240afd8cSMark Johnston 	zfs = fsopts->fs_specific;
135*240afd8cSMark Johnston 
136*240afd8cSMark Johnston 	assert(fsopts->maxsize != 0);
137*240afd8cSMark Johnston 	assert(zfs->ashift != 0);
138*240afd8cSMark Johnston 
139*240afd8cSMark Johnston 	/*
140*240afd8cSMark Johnston 	 * Figure out how big the vdev should be.
141*240afd8cSMark Johnston 	 */
142*240afd8cSMark Johnston 	vdevsize = rounddown2(fsopts->maxsize, 1 << zfs->ashift);
143*240afd8cSMark Johnston 	if (vdevsize < MINDEVSIZE)
144*240afd8cSMark Johnston 		errx(1, "maximum image size is too small");
145*240afd8cSMark Johnston 	if (vdevsize < fsopts->minsize || vdevsize > fsopts->maxsize) {
146*240afd8cSMark Johnston 		errx(1, "image size bounds must be multiples of %d",
147*240afd8cSMark Johnston 		    1 << zfs->ashift);
148*240afd8cSMark Johnston 	}
149*240afd8cSMark Johnston 	asize = vdevsize - VDEV_LABEL_SPACE;
150*240afd8cSMark Johnston 
151*240afd8cSMark Johnston 	/*
152*240afd8cSMark Johnston 	 * Size metaslabs according to the following heuristic:
153*240afd8cSMark Johnston 	 * - provide at least 8 metaslabs,
154*240afd8cSMark Johnston 	 * - without using a metaslab size larger than 512MB.
155*240afd8cSMark Johnston 	 * This approximates what OpenZFS does without being complicated.  In
156*240afd8cSMark Johnston 	 * practice we expect pools to be expanded upon first use, and OpenZFS
157*240afd8cSMark Johnston 	 * does not resize metaslabs in that case, so there is no right answer
158*240afd8cSMark Johnston 	 * here.  In general we want to provide large metaslabs even if the
159*240afd8cSMark Johnston 	 * image size is small, and 512MB is a reasonable size for pools up to
160*240afd8cSMark Johnston 	 * several hundred gigabytes.
161*240afd8cSMark Johnston 	 *
162*240afd8cSMark Johnston 	 * The user may override this heuristic using the "-o mssize" option.
163*240afd8cSMark Johnston 	 */
164*240afd8cSMark Johnston 	mssize = zfs->mssize;
165*240afd8cSMark Johnston 	if (mssize == 0) {
166*240afd8cSMark Johnston 		mssize = MAX(MIN(asize / 8, DFLTMSSIZE), MINMSSIZE);
167*240afd8cSMark Johnston 		if (!powerof2(mssize))
168*240afd8cSMark Johnston 			mssize = 1l << (flsll(mssize) - 1);
169*240afd8cSMark Johnston 	}
170*240afd8cSMark Johnston 	if (!powerof2(mssize))
171*240afd8cSMark Johnston 		errx(1, "metaslab size must be a power of 2");
172*240afd8cSMark Johnston 
173*240afd8cSMark Johnston 	/*
174*240afd8cSMark Johnston 	 * If we have some slop left over, try to cover it by resizing the vdev,
175*240afd8cSMark Johnston 	 * subject to the maxsize and minsize parameters.
176*240afd8cSMark Johnston 	 */
177*240afd8cSMark Johnston 	if (asize % mssize != 0) {
178*240afd8cSMark Johnston 		vdevsize1 = rounddown2(asize, mssize) + VDEV_LABEL_SPACE;
179*240afd8cSMark Johnston 		if (vdevsize1 < fsopts->minsize)
180*240afd8cSMark Johnston 			vdevsize1 = roundup2(asize, mssize) + VDEV_LABEL_SPACE;
181*240afd8cSMark Johnston 		if (vdevsize1 <= fsopts->maxsize)
182*240afd8cSMark Johnston 			vdevsize = vdevsize1;
183*240afd8cSMark Johnston 	}
184*240afd8cSMark Johnston 	asize = vdevsize - VDEV_LABEL_SPACE;
185*240afd8cSMark Johnston 
186*240afd8cSMark Johnston 	zfs->asize = asize;
187*240afd8cSMark Johnston 	zfs->vdevsize = vdevsize;
188*240afd8cSMark Johnston 	zfs->mssize = mssize;
189*240afd8cSMark Johnston 	zfs->msshift = flsll(mssize) - 1;
190*240afd8cSMark Johnston 	zfs->mscount = asize / mssize;
191*240afd8cSMark Johnston }
192*240afd8cSMark Johnston 
193*240afd8cSMark Johnston /*
194*240afd8cSMark Johnston  * Validate options and set some default values.
195*240afd8cSMark Johnston  */
196*240afd8cSMark Johnston static void
197*240afd8cSMark Johnston zfs_check_opts(fsinfo_t *fsopts)
198*240afd8cSMark Johnston {
199*240afd8cSMark Johnston 	zfs_opt_t *zfs;
200*240afd8cSMark Johnston 
201*240afd8cSMark Johnston 	zfs = fsopts->fs_specific;
202*240afd8cSMark Johnston 
203*240afd8cSMark Johnston 	if (fsopts->offset != 0)
204*240afd8cSMark Johnston 		errx(1, "unhandled offset option");
205*240afd8cSMark Johnston 	if (fsopts->maxsize == 0)
206*240afd8cSMark Johnston 		errx(1, "an image size must be specified");
207*240afd8cSMark Johnston 
208*240afd8cSMark Johnston 	if (zfs->poolname == NULL)
209*240afd8cSMark Johnston 		errx(1, "a pool name must be specified");
210*240afd8cSMark Johnston 
211*240afd8cSMark Johnston 	if (zfs->rootpath == NULL)
212*240afd8cSMark Johnston 		easprintf(&zfs->rootpath, "/%s", zfs->poolname);
213*240afd8cSMark Johnston 	if (zfs->rootpath[0] != '/')
214*240afd8cSMark Johnston 		errx(1, "mountpoint `%s' must be absolute", zfs->rootpath);
215*240afd8cSMark Johnston 
216*240afd8cSMark Johnston 	if (zfs->ashift == 0)
217*240afd8cSMark Johnston 		zfs->ashift = 12;
218*240afd8cSMark Johnston 
219*240afd8cSMark Johnston 	zfs_size_vdev(fsopts);
220*240afd8cSMark Johnston }
221*240afd8cSMark Johnston 
222*240afd8cSMark Johnston void
223*240afd8cSMark Johnston zfs_cleanup_opts(fsinfo_t *fsopts)
224*240afd8cSMark Johnston {
225*240afd8cSMark Johnston 	struct dataset_desc *d, *tmp;
226*240afd8cSMark Johnston 	zfs_opt_t *zfs;
227*240afd8cSMark Johnston 
228*240afd8cSMark Johnston 	zfs = fsopts->fs_specific;
229*240afd8cSMark Johnston 	free(zfs->rootpath);
230*240afd8cSMark Johnston 	free(zfs->bootfs);
231*240afd8cSMark Johnston 	free(__DECONST(void *, zfs->poolname));
232*240afd8cSMark Johnston 	STAILQ_FOREACH_SAFE(d, &zfs->datasetdescs, next, tmp) {
233*240afd8cSMark Johnston 		free(d->params);
234*240afd8cSMark Johnston 		free(d);
235*240afd8cSMark Johnston 	}
236*240afd8cSMark Johnston 	free(zfs);
237*240afd8cSMark Johnston 	free(fsopts->fs_options);
238*240afd8cSMark Johnston }
239*240afd8cSMark Johnston 
240*240afd8cSMark Johnston static size_t
241*240afd8cSMark Johnston nvlist_size(const nvlist_t *nvl)
242*240afd8cSMark Johnston {
243*240afd8cSMark Johnston 	return (sizeof(nvl->nv_header) + nvl->nv_size);
244*240afd8cSMark Johnston }
245*240afd8cSMark Johnston 
246*240afd8cSMark Johnston static void
247*240afd8cSMark Johnston nvlist_copy(const nvlist_t *nvl, char *buf, size_t sz)
248*240afd8cSMark Johnston {
249*240afd8cSMark Johnston 	assert(sz >= nvlist_size(nvl));
250*240afd8cSMark Johnston 
251*240afd8cSMark Johnston 	memcpy(buf, &nvl->nv_header, sizeof(nvl->nv_header));
252*240afd8cSMark Johnston 	memcpy(buf + sizeof(nvl->nv_header), nvl->nv_data, nvl->nv_size);
253*240afd8cSMark Johnston }
254*240afd8cSMark Johnston 
255*240afd8cSMark Johnston static nvlist_t *
256*240afd8cSMark Johnston pool_config_nvcreate(zfs_opt_t *zfs)
257*240afd8cSMark Johnston {
258*240afd8cSMark Johnston 	nvlist_t *featuresnv, *poolnv;
259*240afd8cSMark Johnston 
260*240afd8cSMark Johnston 	poolnv = nvlist_create(NV_UNIQUE_NAME);
261*240afd8cSMark Johnston 	nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_TXG, TXG);
262*240afd8cSMark Johnston 	nvlist_add_uint64(poolnv, ZPOOL_CONFIG_VERSION, SPA_VERSION);
263*240afd8cSMark Johnston 	nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_STATE, POOL_STATE_EXPORTED);
264*240afd8cSMark Johnston 	nvlist_add_string(poolnv, ZPOOL_CONFIG_POOL_NAME, zfs->poolname);
265*240afd8cSMark Johnston 	nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_GUID, zfs->poolguid);
266*240afd8cSMark Johnston 	nvlist_add_uint64(poolnv, ZPOOL_CONFIG_TOP_GUID, zfs->vdevguid);
267*240afd8cSMark Johnston 	nvlist_add_uint64(poolnv, ZPOOL_CONFIG_GUID, zfs->vdevguid);
268*240afd8cSMark Johnston 	nvlist_add_uint64(poolnv, ZPOOL_CONFIG_VDEV_CHILDREN, 1);
269*240afd8cSMark Johnston 
270*240afd8cSMark Johnston 	featuresnv = nvlist_create(NV_UNIQUE_NAME);
271*240afd8cSMark Johnston 	nvlist_add_nvlist(poolnv, ZPOOL_CONFIG_FEATURES_FOR_READ, featuresnv);
272*240afd8cSMark Johnston 	nvlist_destroy(featuresnv);
273*240afd8cSMark Johnston 
274*240afd8cSMark Johnston 	return (poolnv);
275*240afd8cSMark Johnston }
276*240afd8cSMark Johnston 
277*240afd8cSMark Johnston static nvlist_t *
278*240afd8cSMark Johnston pool_disk_vdev_config_nvcreate(zfs_opt_t *zfs)
279*240afd8cSMark Johnston {
280*240afd8cSMark Johnston 	nvlist_t *diskvdevnv;
281*240afd8cSMark Johnston 
282*240afd8cSMark Johnston 	assert(zfs->objarrid != 0);
283*240afd8cSMark Johnston 
284*240afd8cSMark Johnston 	diskvdevnv = nvlist_create(NV_UNIQUE_NAME);
285*240afd8cSMark Johnston 	nvlist_add_string(diskvdevnv, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK);
286*240afd8cSMark Johnston 	nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ASHIFT, zfs->ashift);
287*240afd8cSMark Johnston 	nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ASIZE, zfs->asize);
288*240afd8cSMark Johnston 	nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_GUID, zfs->vdevguid);
289*240afd8cSMark Johnston 	nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ID, 0);
290*240afd8cSMark Johnston 	nvlist_add_string(diskvdevnv, ZPOOL_CONFIG_PATH, "/dev/null");
291*240afd8cSMark Johnston 	nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_WHOLE_DISK, 1);
292*240afd8cSMark Johnston 	nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_CREATE_TXG, TXG);
293*240afd8cSMark Johnston 	nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_METASLAB_ARRAY,
294*240afd8cSMark Johnston 	    zfs->objarrid);
295*240afd8cSMark Johnston 	nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_METASLAB_SHIFT,
296*240afd8cSMark Johnston 	    zfs->msshift);
297*240afd8cSMark Johnston 
298*240afd8cSMark Johnston 	return (diskvdevnv);
299*240afd8cSMark Johnston }
300*240afd8cSMark Johnston 
301*240afd8cSMark Johnston static nvlist_t *
302*240afd8cSMark Johnston pool_root_vdev_config_nvcreate(zfs_opt_t *zfs)
303*240afd8cSMark Johnston {
304*240afd8cSMark Johnston 	nvlist_t *diskvdevnv, *rootvdevnv;
305*240afd8cSMark Johnston 
306*240afd8cSMark Johnston 	diskvdevnv = pool_disk_vdev_config_nvcreate(zfs);
307*240afd8cSMark Johnston 	rootvdevnv = nvlist_create(NV_UNIQUE_NAME);
308*240afd8cSMark Johnston 
309*240afd8cSMark Johnston 	nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_ID, 0);
310*240afd8cSMark Johnston 	nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_GUID, zfs->poolguid);
311*240afd8cSMark Johnston 	nvlist_add_string(rootvdevnv, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT);
312*240afd8cSMark Johnston 	nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_CREATE_TXG, TXG);
313*240afd8cSMark Johnston 	nvlist_add_nvlist_array(rootvdevnv, ZPOOL_CONFIG_CHILDREN, &diskvdevnv,
314*240afd8cSMark Johnston 	    1);
315*240afd8cSMark Johnston 	nvlist_destroy(diskvdevnv);
316*240afd8cSMark Johnston 
317*240afd8cSMark Johnston 	return (rootvdevnv);
318*240afd8cSMark Johnston }
319*240afd8cSMark Johnston 
320*240afd8cSMark Johnston /*
321*240afd8cSMark Johnston  * Create the pool's "config" object, which contains an nvlist describing pool
322*240afd8cSMark Johnston  * parameters and the vdev topology.  It is similar but not identical to the
323*240afd8cSMark Johnston  * nvlist stored in vdev labels.  The main difference is that vdev labels do not
324*240afd8cSMark Johnston  * describe the full vdev tree and in particular do not contain the "root"
325*240afd8cSMark Johnston  * meta-vdev.
326*240afd8cSMark Johnston  */
327*240afd8cSMark Johnston static void
328*240afd8cSMark Johnston pool_init_objdir_config(zfs_opt_t *zfs, zfs_zap_t *objdir)
329*240afd8cSMark Johnston {
330*240afd8cSMark Johnston 	dnode_phys_t *dnode;
331*240afd8cSMark Johnston 	nvlist_t *poolconfig, *vdevconfig;
332*240afd8cSMark Johnston 	void *configbuf;
333*240afd8cSMark Johnston 	uint64_t dnid;
334*240afd8cSMark Johnston 	off_t configloc, configblksz;
335*240afd8cSMark Johnston 	int error;
336*240afd8cSMark Johnston 
337*240afd8cSMark Johnston 	dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_PACKED_NVLIST,
338*240afd8cSMark Johnston 	    DMU_OT_PACKED_NVLIST_SIZE, sizeof(uint64_t), &dnid);
339*240afd8cSMark Johnston 
340*240afd8cSMark Johnston 	poolconfig = pool_config_nvcreate(zfs);
341*240afd8cSMark Johnston 
342*240afd8cSMark Johnston 	vdevconfig = pool_root_vdev_config_nvcreate(zfs);
343*240afd8cSMark Johnston 	nvlist_add_nvlist(poolconfig, ZPOOL_CONFIG_VDEV_TREE, vdevconfig);
344*240afd8cSMark Johnston 	nvlist_destroy(vdevconfig);
345*240afd8cSMark Johnston 
346*240afd8cSMark Johnston 	error = nvlist_export(poolconfig);
347*240afd8cSMark Johnston 	if (error != 0)
348*240afd8cSMark Johnston 		errc(1, error, "nvlist_export");
349*240afd8cSMark Johnston 
350*240afd8cSMark Johnston 	configblksz = nvlist_size(poolconfig);
351*240afd8cSMark Johnston 	configloc = objset_space_alloc(zfs, zfs->mos, &configblksz);
352*240afd8cSMark Johnston 	configbuf = ecalloc(1, configblksz);
353*240afd8cSMark Johnston 	nvlist_copy(poolconfig, configbuf, configblksz);
354*240afd8cSMark Johnston 
355*240afd8cSMark Johnston 	vdev_pwrite_dnode_data(zfs, dnode, configbuf, configblksz, configloc);
356*240afd8cSMark Johnston 
357*240afd8cSMark Johnston 	dnode->dn_datablkszsec = configblksz >> MINBLOCKSHIFT;
358*240afd8cSMark Johnston 	dnode->dn_flags = DNODE_FLAG_USED_BYTES;
359*240afd8cSMark Johnston 	*(uint64_t *)DN_BONUS(dnode) = nvlist_size(poolconfig);
360*240afd8cSMark Johnston 
361*240afd8cSMark Johnston 	zap_add_uint64(objdir, DMU_POOL_CONFIG, dnid);
362*240afd8cSMark Johnston 
363*240afd8cSMark Johnston 	nvlist_destroy(poolconfig);
364*240afd8cSMark Johnston 	free(configbuf);
365*240afd8cSMark Johnston }
366*240afd8cSMark Johnston 
367*240afd8cSMark Johnston /*
368*240afd8cSMark Johnston  * Add objects block pointer list objects, used for deferred frees.  We don't do
369*240afd8cSMark Johnston  * anything with them, but they need to be present or OpenZFS will refuse to
370*240afd8cSMark Johnston  * import the pool.
371*240afd8cSMark Johnston  */
372*240afd8cSMark Johnston static void
373*240afd8cSMark Johnston pool_init_objdir_bplists(zfs_opt_t *zfs __unused, zfs_zap_t *objdir)
374*240afd8cSMark Johnston {
375*240afd8cSMark Johnston 	uint64_t dnid;
376*240afd8cSMark Johnston 
377*240afd8cSMark Johnston 	(void)objset_dnode_bonus_alloc(zfs->mos, DMU_OT_BPOBJ, DMU_OT_BPOBJ_HDR,
378*240afd8cSMark Johnston 	    BPOBJ_SIZE_V2, &dnid);
379*240afd8cSMark Johnston 	zap_add_uint64(objdir, DMU_POOL_FREE_BPOBJ, dnid);
380*240afd8cSMark Johnston 
381*240afd8cSMark Johnston 	(void)objset_dnode_bonus_alloc(zfs->mos, DMU_OT_BPOBJ, DMU_OT_BPOBJ_HDR,
382*240afd8cSMark Johnston 	    BPOBJ_SIZE_V2, &dnid);
383*240afd8cSMark Johnston 	zap_add_uint64(objdir, DMU_POOL_SYNC_BPLIST, dnid);
384*240afd8cSMark Johnston }
385*240afd8cSMark Johnston 
386*240afd8cSMark Johnston /*
387*240afd8cSMark Johnston  * Add required feature metadata objects.  We don't know anything about ZFS
388*240afd8cSMark Johnston  * features, so the objects are just empty ZAPs.
389*240afd8cSMark Johnston  */
390*240afd8cSMark Johnston static void
391*240afd8cSMark Johnston pool_init_objdir_feature_maps(zfs_opt_t *zfs, zfs_zap_t *objdir)
392*240afd8cSMark Johnston {
393*240afd8cSMark Johnston 	dnode_phys_t *dnode;
394*240afd8cSMark Johnston 	uint64_t dnid;
395*240afd8cSMark Johnston 
396*240afd8cSMark Johnston 	dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid);
397*240afd8cSMark Johnston 	zap_add_uint64(objdir, DMU_POOL_FEATURES_FOR_READ, dnid);
398*240afd8cSMark Johnston 	zap_write(zfs, zap_alloc(zfs->mos, dnode));
399*240afd8cSMark Johnston 
400*240afd8cSMark Johnston 	dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid);
401*240afd8cSMark Johnston 	zap_add_uint64(objdir, DMU_POOL_FEATURES_FOR_WRITE, dnid);
402*240afd8cSMark Johnston 	zap_write(zfs, zap_alloc(zfs->mos, dnode));
403*240afd8cSMark Johnston 
404*240afd8cSMark Johnston 	dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid);
405*240afd8cSMark Johnston 	zap_add_uint64(objdir, DMU_POOL_FEATURE_DESCRIPTIONS, dnid);
406*240afd8cSMark Johnston 	zap_write(zfs, zap_alloc(zfs->mos, dnode));
407*240afd8cSMark Johnston }
408*240afd8cSMark Johnston 
409*240afd8cSMark Johnston static void
410*240afd8cSMark Johnston pool_init_objdir_dsl(zfs_opt_t *zfs, zfs_zap_t *objdir)
411*240afd8cSMark Johnston {
412*240afd8cSMark Johnston 	zap_add_uint64(objdir, DMU_POOL_ROOT_DATASET,
413*240afd8cSMark Johnston 	    dsl_dir_id(zfs->rootdsldir));
414*240afd8cSMark Johnston }
415*240afd8cSMark Johnston 
416*240afd8cSMark Johnston static void
417*240afd8cSMark Johnston pool_init_objdir_poolprops(zfs_opt_t *zfs, zfs_zap_t *objdir)
418*240afd8cSMark Johnston {
419*240afd8cSMark Johnston 	dnode_phys_t *dnode;
420*240afd8cSMark Johnston 	uint64_t id;
421*240afd8cSMark Johnston 
422*240afd8cSMark Johnston 	dnode = objset_dnode_alloc(zfs->mos, DMU_OT_POOL_PROPS, &id);
423*240afd8cSMark Johnston 	zap_add_uint64(objdir, DMU_POOL_PROPS, id);
424*240afd8cSMark Johnston 
425*240afd8cSMark Johnston 	zfs->poolprops = zap_alloc(zfs->mos, dnode);
426*240afd8cSMark Johnston }
427*240afd8cSMark Johnston 
428*240afd8cSMark Johnston /*
429*240afd8cSMark Johnston  * Initialize the MOS object directory, the root of virtually all of the pool's
430*240afd8cSMark Johnston  * data and metadata.
431*240afd8cSMark Johnston  */
432*240afd8cSMark Johnston static void
433*240afd8cSMark Johnston pool_init_objdir(zfs_opt_t *zfs)
434*240afd8cSMark Johnston {
435*240afd8cSMark Johnston 	zfs_zap_t *zap;
436*240afd8cSMark Johnston 	dnode_phys_t *objdir;
437*240afd8cSMark Johnston 
438*240afd8cSMark Johnston 	objdir = objset_dnode_lookup(zfs->mos, DMU_POOL_DIRECTORY_OBJECT);
439*240afd8cSMark Johnston 
440*240afd8cSMark Johnston 	zap = zap_alloc(zfs->mos, objdir);
441*240afd8cSMark Johnston 	pool_init_objdir_config(zfs, zap);
442*240afd8cSMark Johnston 	pool_init_objdir_bplists(zfs, zap);
443*240afd8cSMark Johnston 	pool_init_objdir_feature_maps(zfs, zap);
444*240afd8cSMark Johnston 	pool_init_objdir_dsl(zfs, zap);
445*240afd8cSMark Johnston 	pool_init_objdir_poolprops(zfs, zap);
446*240afd8cSMark Johnston 	zap_write(zfs, zap);
447*240afd8cSMark Johnston }
448*240afd8cSMark Johnston 
449*240afd8cSMark Johnston /*
450*240afd8cSMark Johnston  * Initialize the meta-object set (MOS) and immediately write out several
451*240afd8cSMark Johnston  * special objects whose contents are already finalized, including the object
452*240afd8cSMark Johnston  * directory.
453*240afd8cSMark Johnston  *
454*240afd8cSMark Johnston  * Once the MOS is finalized, it'll look roughly like this:
455*240afd8cSMark Johnston  *
456*240afd8cSMark Johnston  *	object directory (ZAP)
457*240afd8cSMark Johnston  *	|-> vdev config object (nvlist)
458*240afd8cSMark Johnston  *	|-> features for read
459*240afd8cSMark Johnston  *	|-> features for write
460*240afd8cSMark Johnston  *	|-> feature descriptions
461*240afd8cSMark Johnston  *	|-> sync bplist
462*240afd8cSMark Johnston  *	|-> free bplist
463*240afd8cSMark Johnston  *	|-> pool properties
464*240afd8cSMark Johnston  *	L-> root DSL directory
465*240afd8cSMark Johnston  *	    |-> DSL child directory (ZAP)
466*240afd8cSMark Johnston  *	    |   |-> $MOS (DSL dir)
467*240afd8cSMark Johnston  *	    |   |   |-> child map
468*240afd8cSMark Johnston  *	    |   |   L-> props (ZAP)
469*240afd8cSMark Johnston  *	    |   |-> $FREE (DSL dir)
470*240afd8cSMark Johnston  *	    |   |   |-> child map
471*240afd8cSMark Johnston  *	    |   |   L-> props (ZAP)
472*240afd8cSMark Johnston  *	    |   |-> $ORIGIN (DSL dir)
473*240afd8cSMark Johnston  *	    |   |   |-> child map
474*240afd8cSMark Johnston  *	    |   |   |-> dataset
475*240afd8cSMark Johnston  *	    |   |   |   L-> deadlist
476*240afd8cSMark Johnston  *	    |   |   |-> snapshot
477*240afd8cSMark Johnston  *	    |   |   |   |-> deadlist
478*240afd8cSMark Johnston  *	    |   |   |   L-> snapshot names
479*240afd8cSMark Johnston  *	    |   |   |-> props (ZAP)
480*240afd8cSMark Johnston  *	    |   |   L-> clones (ZAP)
481*240afd8cSMark Johnston  *	    |   |-> dataset 1 (DSL dir)
482*240afd8cSMark Johnston  *	    |   |   |-> DSL dataset
483*240afd8cSMark Johnston  *	    |   |   |   |-> snapshot names
484*240afd8cSMark Johnston  *	    |   |   |   L-> deadlist
485*240afd8cSMark Johnston  *	    |   |   |-> child map
486*240afd8cSMark Johnston  *	    |   |   |   L-> ...
487*240afd8cSMark Johnston  *	    |   |   L-> props
488*240afd8cSMark Johnston  *	    |   |-> dataset 2
489*240afd8cSMark Johnston  *	    |   |   L-> ...
490*240afd8cSMark Johnston  *	    |   |-> ...
491*240afd8cSMark Johnston  *	    |   L-> dataset n
492*240afd8cSMark Johnston  *	    |-> DSL root dataset
493*240afd8cSMark Johnston  *	    |   |-> snapshot names
494*240afd8cSMark Johnston  *	    |   L-> deadlist
495*240afd8cSMark Johnston  *	    L-> props (ZAP)
496*240afd8cSMark Johnston  *	space map object array
497*240afd8cSMark Johnston  *	|-> space map 1
498*240afd8cSMark Johnston  *	|-> space map 2
499*240afd8cSMark Johnston  *	|-> ...
500*240afd8cSMark Johnston  *	L-> space map n (zfs->mscount)
501*240afd8cSMark Johnston  *
502*240afd8cSMark Johnston  * The space map object array is pointed to by the "msarray" property in the
503*240afd8cSMark Johnston  * pool configuration.
504*240afd8cSMark Johnston  */
505*240afd8cSMark Johnston static void
506*240afd8cSMark Johnston pool_init(zfs_opt_t *zfs)
507*240afd8cSMark Johnston {
508*240afd8cSMark Johnston 	uint64_t dnid;
509*240afd8cSMark Johnston 
510*240afd8cSMark Johnston 	zfs->poolguid = ((uint64_t)random() << 32) | random();
511*240afd8cSMark Johnston 	zfs->vdevguid = ((uint64_t)random() << 32) | random();
512*240afd8cSMark Johnston 
513*240afd8cSMark Johnston 	zfs->mos = objset_alloc(zfs, DMU_OST_META);
514*240afd8cSMark Johnston 
515*240afd8cSMark Johnston 	(void)objset_dnode_alloc(zfs->mos, DMU_OT_OBJECT_DIRECTORY, &dnid);
516*240afd8cSMark Johnston 	assert(dnid == DMU_POOL_DIRECTORY_OBJECT);
517*240afd8cSMark Johnston 
518*240afd8cSMark Johnston 	(void)objset_dnode_alloc(zfs->mos, DMU_OT_OBJECT_ARRAY, &zfs->objarrid);
519*240afd8cSMark Johnston 
520*240afd8cSMark Johnston 	dsl_init(zfs);
521*240afd8cSMark Johnston 
522*240afd8cSMark Johnston 	pool_init_objdir(zfs);
523*240afd8cSMark Johnston }
524*240afd8cSMark Johnston 
525*240afd8cSMark Johnston static void
526*240afd8cSMark Johnston pool_labels_write(zfs_opt_t *zfs)
527*240afd8cSMark Johnston {
528*240afd8cSMark Johnston 	uberblock_t *ub;
529*240afd8cSMark Johnston 	vdev_label_t *label;
530*240afd8cSMark Johnston 	nvlist_t *poolconfig, *vdevconfig;
531*240afd8cSMark Johnston 	int error;
532*240afd8cSMark Johnston 
533*240afd8cSMark Johnston 	label = ecalloc(1, sizeof(*label));
534*240afd8cSMark Johnston 
535*240afd8cSMark Johnston 	/*
536*240afd8cSMark Johnston 	 * Assemble the vdev configuration and store it in the label.
537*240afd8cSMark Johnston 	 */
538*240afd8cSMark Johnston 	poolconfig = pool_config_nvcreate(zfs);
539*240afd8cSMark Johnston 	vdevconfig = pool_disk_vdev_config_nvcreate(zfs);
540*240afd8cSMark Johnston 	nvlist_add_nvlist(poolconfig, ZPOOL_CONFIG_VDEV_TREE, vdevconfig);
541*240afd8cSMark Johnston 	nvlist_destroy(vdevconfig);
542*240afd8cSMark Johnston 
543*240afd8cSMark Johnston 	error = nvlist_export(poolconfig);
544*240afd8cSMark Johnston 	if (error != 0)
545*240afd8cSMark Johnston 		errc(1, error, "nvlist_export");
546*240afd8cSMark Johnston 	nvlist_copy(poolconfig, label->vl_vdev_phys.vp_nvlist,
547*240afd8cSMark Johnston 	    sizeof(label->vl_vdev_phys.vp_nvlist));
548*240afd8cSMark Johnston 	nvlist_destroy(poolconfig);
549*240afd8cSMark Johnston 
550*240afd8cSMark Johnston 	/*
551*240afd8cSMark Johnston 	 * Fill out the uberblock.  Just make each one the same.  The embedded
552*240afd8cSMark Johnston 	 * checksum is calculated in vdev_label_write().
553*240afd8cSMark Johnston 	 */
554*240afd8cSMark Johnston 	for (size_t uoff = 0; uoff < sizeof(label->vl_uberblock);
555*240afd8cSMark Johnston 	    uoff += (1 << zfs->ashift)) {
556*240afd8cSMark Johnston 		ub = (uberblock_t *)(&label->vl_uberblock[0] + uoff);
557*240afd8cSMark Johnston 		ub->ub_magic = UBERBLOCK_MAGIC;
558*240afd8cSMark Johnston 		ub->ub_version = SPA_VERSION;
559*240afd8cSMark Johnston 		ub->ub_txg = TXG;
560*240afd8cSMark Johnston 		ub->ub_guid_sum = zfs->poolguid + zfs->vdevguid;
561*240afd8cSMark Johnston 		ub->ub_timestamp = 0;
562*240afd8cSMark Johnston 
563*240afd8cSMark Johnston 		ub->ub_software_version = SPA_VERSION;
564*240afd8cSMark Johnston 		ub->ub_mmp_magic = MMP_MAGIC;
565*240afd8cSMark Johnston 		ub->ub_mmp_delay = 0;
566*240afd8cSMark Johnston 		ub->ub_mmp_config = 0;
567*240afd8cSMark Johnston 		ub->ub_checkpoint_txg = 0;
568*240afd8cSMark Johnston 		objset_root_blkptr_copy(zfs->mos, &ub->ub_rootbp);
569*240afd8cSMark Johnston 	}
570*240afd8cSMark Johnston 
571*240afd8cSMark Johnston 	/*
572*240afd8cSMark Johnston 	 * Write out four copies of the label: two at the beginning of the vdev
573*240afd8cSMark Johnston 	 * and two at the end.
574*240afd8cSMark Johnston 	 */
575*240afd8cSMark Johnston 	for (int i = 0; i < VDEV_LABELS; i++)
576*240afd8cSMark Johnston 		vdev_label_write(zfs, i, label);
577*240afd8cSMark Johnston 
578*240afd8cSMark Johnston 	free(label);
579*240afd8cSMark Johnston }
580*240afd8cSMark Johnston 
581*240afd8cSMark Johnston static void
582*240afd8cSMark Johnston pool_fini(zfs_opt_t *zfs)
583*240afd8cSMark Johnston {
584*240afd8cSMark Johnston 	zap_write(zfs, zfs->poolprops);
585*240afd8cSMark Johnston 	dsl_write(zfs);
586*240afd8cSMark Johnston 	objset_write(zfs, zfs->mos);
587*240afd8cSMark Johnston 	pool_labels_write(zfs);
588*240afd8cSMark Johnston }
589*240afd8cSMark Johnston 
590*240afd8cSMark Johnston struct dnode_cursor *
591*240afd8cSMark Johnston dnode_cursor_init(zfs_opt_t *zfs, zfs_objset_t *os, dnode_phys_t *dnode,
592*240afd8cSMark Johnston     off_t size, off_t blksz)
593*240afd8cSMark Johnston {
594*240afd8cSMark Johnston 	struct dnode_cursor *c;
595*240afd8cSMark Johnston 	uint64_t nbppindir, indlevel, ndatablks, nindblks;
596*240afd8cSMark Johnston 
597*240afd8cSMark Johnston 	assert(dnode->dn_nblkptr == 1);
598*240afd8cSMark Johnston 	assert(blksz <= MAXBLOCKSIZE);
599*240afd8cSMark Johnston 
600*240afd8cSMark Johnston 	if (blksz == 0) {
601*240afd8cSMark Johnston 		/* Must be between 1<<ashift and 128KB. */
602*240afd8cSMark Johnston 		blksz = MIN(MAXBLOCKSIZE, MAX(1 << zfs->ashift,
603*240afd8cSMark Johnston 		    powerof2(size) ? size : (1ul << flsll(size))));
604*240afd8cSMark Johnston 	}
605*240afd8cSMark Johnston 	assert(powerof2(blksz));
606*240afd8cSMark Johnston 
607*240afd8cSMark Johnston 	/*
608*240afd8cSMark Johnston 	 * Do we need indirect blocks?  Figure out how many levels are needed
609*240afd8cSMark Johnston 	 * (indlevel == 1 means no indirect blocks) and how much space is needed
610*240afd8cSMark Johnston 	 * (it has to be allocated up-front to break the dependency cycle
611*240afd8cSMark Johnston 	 * described in objset_write()).
612*240afd8cSMark Johnston 	 */
613*240afd8cSMark Johnston 	ndatablks = size == 0 ? 0 : howmany(size, blksz);
614*240afd8cSMark Johnston 	nindblks = 0;
615*240afd8cSMark Johnston 	for (indlevel = 1, nbppindir = 1; ndatablks > nbppindir; indlevel++) {
616*240afd8cSMark Johnston 		nbppindir *= BLKPTR_PER_INDIR;
617*240afd8cSMark Johnston 		nindblks += howmany(ndatablks, indlevel * nbppindir);
618*240afd8cSMark Johnston 	}
619*240afd8cSMark Johnston 	assert(indlevel < INDIR_LEVELS);
620*240afd8cSMark Johnston 
621*240afd8cSMark Johnston 	dnode->dn_nlevels = (uint8_t)indlevel;
622*240afd8cSMark Johnston 	dnode->dn_maxblkid = ndatablks > 0 ? ndatablks - 1 : 0;
623*240afd8cSMark Johnston 	dnode->dn_datablkszsec = blksz >> MINBLOCKSHIFT;
624*240afd8cSMark Johnston 
625*240afd8cSMark Johnston 	c = ecalloc(1, sizeof(*c));
626*240afd8cSMark Johnston 	if (nindblks > 0) {
627*240afd8cSMark Johnston 		c->indspace = nindblks * MAXBLOCKSIZE;
628*240afd8cSMark Johnston 		c->indloc = objset_space_alloc(zfs, os, &c->indspace);
629*240afd8cSMark Johnston 	}
630*240afd8cSMark Johnston 	c->dnode = dnode;
631*240afd8cSMark Johnston 	c->dataoff = 0;
632*240afd8cSMark Johnston 	c->datablksz = blksz;
633*240afd8cSMark Johnston 
634*240afd8cSMark Johnston 	return (c);
635*240afd8cSMark Johnston }
636*240afd8cSMark Johnston 
637*240afd8cSMark Johnston static void
638*240afd8cSMark Johnston _dnode_cursor_flush(zfs_opt_t *zfs, struct dnode_cursor *c, int levels)
639*240afd8cSMark Johnston {
640*240afd8cSMark Johnston 	blkptr_t *bp, *pbp;
641*240afd8cSMark Johnston 	void *buf;
642*240afd8cSMark Johnston 	uint64_t fill;
643*240afd8cSMark Johnston 	off_t blkid, blksz, loc;
644*240afd8cSMark Johnston 
645*240afd8cSMark Johnston 	assert(levels > 0);
646*240afd8cSMark Johnston 	assert(levels <= c->dnode->dn_nlevels - 1);
647*240afd8cSMark Johnston 
648*240afd8cSMark Johnston 	blksz = MAXBLOCKSIZE;
649*240afd8cSMark Johnston 	blkid = (c->dataoff / c->datablksz) / BLKPTR_PER_INDIR;
650*240afd8cSMark Johnston 	for (int level = 1; level <= levels; level++) {
651*240afd8cSMark Johnston 		buf = c->inddir[level - 1];
652*240afd8cSMark Johnston 
653*240afd8cSMark Johnston 		if (level == c->dnode->dn_nlevels - 1) {
654*240afd8cSMark Johnston 			pbp = &c->dnode->dn_blkptr[0];
655*240afd8cSMark Johnston 		} else {
656*240afd8cSMark Johnston 			uint64_t iblkid;
657*240afd8cSMark Johnston 
658*240afd8cSMark Johnston 			iblkid = blkid & (BLKPTR_PER_INDIR - 1);
659*240afd8cSMark Johnston 			pbp = (blkptr_t *)
660*240afd8cSMark Johnston 			    &c->inddir[level][iblkid * sizeof(blkptr_t)];
661*240afd8cSMark Johnston 		}
662*240afd8cSMark Johnston 
663*240afd8cSMark Johnston 		/*
664*240afd8cSMark Johnston 		 * Space for indirect blocks is allocated up-front; see the
665*240afd8cSMark Johnston 		 * comment in objset_write().
666*240afd8cSMark Johnston 		 */
667*240afd8cSMark Johnston 		loc = c->indloc;
668*240afd8cSMark Johnston 		c->indloc += blksz;
669*240afd8cSMark Johnston 		assert(c->indspace >= blksz);
670*240afd8cSMark Johnston 		c->indspace -= blksz;
671*240afd8cSMark Johnston 
672*240afd8cSMark Johnston 		bp = buf;
673*240afd8cSMark Johnston 		fill = 0;
674*240afd8cSMark Johnston 		for (size_t i = 0; i < BLKPTR_PER_INDIR; i++)
675*240afd8cSMark Johnston 			fill += BP_GET_FILL(&bp[i]);
676*240afd8cSMark Johnston 
677*240afd8cSMark Johnston 		vdev_pwrite_dnode_indir(zfs, c->dnode, level, fill, buf, blksz,
678*240afd8cSMark Johnston 		    loc, pbp);
679*240afd8cSMark Johnston 		memset(buf, 0, MAXBLOCKSIZE);
680*240afd8cSMark Johnston 
681*240afd8cSMark Johnston 		blkid /= BLKPTR_PER_INDIR;
682*240afd8cSMark Johnston 	}
683*240afd8cSMark Johnston }
684*240afd8cSMark Johnston 
685*240afd8cSMark Johnston blkptr_t *
686*240afd8cSMark Johnston dnode_cursor_next(zfs_opt_t *zfs, struct dnode_cursor *c, off_t off)
687*240afd8cSMark Johnston {
688*240afd8cSMark Johnston 	off_t blkid, l1id;
689*240afd8cSMark Johnston 	int levels;
690*240afd8cSMark Johnston 
691*240afd8cSMark Johnston 	if (c->dnode->dn_nlevels == 1) {
692*240afd8cSMark Johnston 		assert(off < MAXBLOCKSIZE);
693*240afd8cSMark Johnston 		return (&c->dnode->dn_blkptr[0]);
694*240afd8cSMark Johnston 	}
695*240afd8cSMark Johnston 
696*240afd8cSMark Johnston 	assert(off % c->datablksz == 0);
697*240afd8cSMark Johnston 
698*240afd8cSMark Johnston 	/* Do we need to flush any full indirect blocks? */
699*240afd8cSMark Johnston 	if (off > 0) {
700*240afd8cSMark Johnston 		blkid = off / c->datablksz;
701*240afd8cSMark Johnston 		for (levels = 0; levels < c->dnode->dn_nlevels - 1; levels++) {
702*240afd8cSMark Johnston 			if (blkid % BLKPTR_PER_INDIR != 0)
703*240afd8cSMark Johnston 				break;
704*240afd8cSMark Johnston 			blkid /= BLKPTR_PER_INDIR;
705*240afd8cSMark Johnston 		}
706*240afd8cSMark Johnston 		if (levels > 0)
707*240afd8cSMark Johnston 			_dnode_cursor_flush(zfs, c, levels);
708*240afd8cSMark Johnston 	}
709*240afd8cSMark Johnston 
710*240afd8cSMark Johnston 	c->dataoff = off;
711*240afd8cSMark Johnston 	l1id = (off / c->datablksz) & (BLKPTR_PER_INDIR - 1);
712*240afd8cSMark Johnston 	return ((blkptr_t *)&c->inddir[0][l1id * sizeof(blkptr_t)]);
713*240afd8cSMark Johnston }
714*240afd8cSMark Johnston 
715*240afd8cSMark Johnston void
716*240afd8cSMark Johnston dnode_cursor_finish(zfs_opt_t *zfs, struct dnode_cursor *c)
717*240afd8cSMark Johnston {
718*240afd8cSMark Johnston 	int levels;
719*240afd8cSMark Johnston 
720*240afd8cSMark Johnston 	levels = c->dnode->dn_nlevels - 1;
721*240afd8cSMark Johnston 	if (levels > 0)
722*240afd8cSMark Johnston 		_dnode_cursor_flush(zfs, c, levels);
723*240afd8cSMark Johnston 	assert(c->indspace == 0);
724*240afd8cSMark Johnston 	free(c);
725*240afd8cSMark Johnston }
726*240afd8cSMark Johnston 
727*240afd8cSMark Johnston void
728*240afd8cSMark Johnston zfs_makefs(const char *image, const char *dir, fsnode *root, fsinfo_t *fsopts)
729*240afd8cSMark Johnston {
730*240afd8cSMark Johnston 	zfs_opt_t *zfs;
731*240afd8cSMark Johnston 	int dirfd;
732*240afd8cSMark Johnston 
733*240afd8cSMark Johnston 	zfs = fsopts->fs_specific;
734*240afd8cSMark Johnston 
735*240afd8cSMark Johnston 	/*
736*240afd8cSMark Johnston 	 * Use a fixed seed to provide reproducible pseudo-random numbers for
737*240afd8cSMark Johnston 	 * on-disk structures when needed (e.g., GUIDs, ZAP hash salts).
738*240afd8cSMark Johnston 	 */
739*240afd8cSMark Johnston 	srandom(1729);
740*240afd8cSMark Johnston 
741*240afd8cSMark Johnston 	zfs_check_opts(fsopts);
742*240afd8cSMark Johnston 
743*240afd8cSMark Johnston 	if (!zfs->nowarn) {
744*240afd8cSMark Johnston 		fprintf(stderr,
745*240afd8cSMark Johnston 		    "ZFS support is currently considered experimental. "
746*240afd8cSMark Johnston 		    "Do not use it for anything critical.\n");
747*240afd8cSMark Johnston 	}
748*240afd8cSMark Johnston 
749*240afd8cSMark Johnston 	dirfd = open(dir, O_DIRECTORY | O_RDONLY);
750*240afd8cSMark Johnston 	if (dirfd < 0)
751*240afd8cSMark Johnston 		err(1, "open(%s)", dir);
752*240afd8cSMark Johnston 
753*240afd8cSMark Johnston 	vdev_init(zfs, image);
754*240afd8cSMark Johnston 	pool_init(zfs);
755*240afd8cSMark Johnston 	fs_build(zfs, dirfd, root);
756*240afd8cSMark Johnston 	pool_fini(zfs);
757*240afd8cSMark Johnston 	vdev_fini(zfs);
758*240afd8cSMark Johnston }
759