1240afd8cSMark Johnston /*-
24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
3240afd8cSMark Johnston *
4240afd8cSMark Johnston * Copyright (c) 2022 The FreeBSD Foundation
5240afd8cSMark Johnston *
6240afd8cSMark Johnston * This software was developed by Mark Johnston under sponsorship from
7240afd8cSMark Johnston * the FreeBSD Foundation.
8240afd8cSMark Johnston *
9240afd8cSMark Johnston * Redistribution and use in source and binary forms, with or without
10240afd8cSMark Johnston * modification, are permitted provided that the following conditions are
11240afd8cSMark Johnston * met:
12240afd8cSMark Johnston * 1. Redistributions of source code must retain the above copyright
13240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer.
14240afd8cSMark Johnston * 2. Redistributions in binary form must reproduce the above copyright
15240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer in
16240afd8cSMark Johnston * the documentation and/or other materials provided with the distribution.
17240afd8cSMark Johnston *
18240afd8cSMark Johnston * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19240afd8cSMark Johnston * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20240afd8cSMark Johnston * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21240afd8cSMark Johnston * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22240afd8cSMark Johnston * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23240afd8cSMark Johnston * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24240afd8cSMark Johnston * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25240afd8cSMark Johnston * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26240afd8cSMark Johnston * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27240afd8cSMark Johnston * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28240afd8cSMark Johnston * SUCH DAMAGE.
29240afd8cSMark Johnston */
30240afd8cSMark Johnston
31240afd8cSMark Johnston #include <sys/param.h>
32240afd8cSMark Johnston #include <sys/errno.h>
33240afd8cSMark Johnston #include <sys/queue.h>
34240afd8cSMark Johnston
35240afd8cSMark Johnston #include <assert.h>
36a9e7a44cSMark Johnston #include <ctype.h>
37240afd8cSMark Johnston #include <fcntl.h>
38187084ddSMark Johnston #include <stdalign.h>
39240afd8cSMark Johnston #include <stdbool.h>
40240afd8cSMark Johnston #include <stddef.h>
41240afd8cSMark Johnston #include <stdlib.h>
42240afd8cSMark Johnston #include <string.h>
43240afd8cSMark Johnston #include <unistd.h>
44240afd8cSMark Johnston
45240afd8cSMark Johnston #include <util.h>
46240afd8cSMark Johnston
47240afd8cSMark Johnston #include "makefs.h"
48240afd8cSMark Johnston #include "zfs.h"
49240afd8cSMark Johnston
50240afd8cSMark Johnston #define VDEV_LABEL_SPACE \
51240afd8cSMark Johnston ((off_t)(VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE))
52240afd8cSMark Johnston _Static_assert(VDEV_LABEL_SPACE <= MINDEVSIZE, "");
53240afd8cSMark Johnston
54240afd8cSMark Johnston #define MINMSSIZE ((off_t)1 << 24) /* 16MB */
55240afd8cSMark Johnston #define DFLTMSSIZE ((off_t)1 << 29) /* 512MB */
56240afd8cSMark Johnston #define MAXMSSIZE ((off_t)1 << 34) /* 16GB */
57240afd8cSMark Johnston
58240afd8cSMark Johnston #define INDIR_LEVELS 6
59240afd8cSMark Johnston /* Indirect blocks are always 128KB. */
60240afd8cSMark Johnston #define BLKPTR_PER_INDIR (MAXBLOCKSIZE / sizeof(blkptr_t))
61240afd8cSMark Johnston
62240afd8cSMark Johnston struct dnode_cursor {
63240afd8cSMark Johnston char inddir[INDIR_LEVELS][MAXBLOCKSIZE];
64240afd8cSMark Johnston off_t indloc;
65240afd8cSMark Johnston off_t indspace;
66240afd8cSMark Johnston dnode_phys_t *dnode;
67240afd8cSMark Johnston off_t dataoff;
68240afd8cSMark Johnston off_t datablksz;
69240afd8cSMark Johnston };
70240afd8cSMark Johnston
71240afd8cSMark Johnston void
zfs_prep_opts(fsinfo_t * fsopts)72240afd8cSMark Johnston zfs_prep_opts(fsinfo_t *fsopts)
73240afd8cSMark Johnston {
74c4d26f02SMark Johnston zfs_opt_t *zfs;
75187084ddSMark Johnston size_t align;
76187084ddSMark Johnston
77187084ddSMark Johnston align = alignof(uint64_t);
78c4d26f02SMark Johnston zfs = aligned_alloc(align, roundup2(sizeof(*zfs), align));
79187084ddSMark Johnston if (zfs == NULL)
80187084ddSMark Johnston err(1, "aligned_alloc");
81187084ddSMark Johnston memset(zfs, 0, sizeof(*zfs));
82240afd8cSMark Johnston
83240afd8cSMark Johnston const option_t zfs_options[] = {
84240afd8cSMark Johnston { '\0', "bootfs", &zfs->bootfs, OPT_STRPTR,
85240afd8cSMark Johnston 0, 0, "Bootable dataset" },
86240afd8cSMark Johnston { '\0', "mssize", &zfs->mssize, OPT_INT64,
87240afd8cSMark Johnston MINMSSIZE, MAXMSSIZE, "Metaslab size" },
88240afd8cSMark Johnston { '\0', "poolname", &zfs->poolname, OPT_STRPTR,
89240afd8cSMark Johnston 0, 0, "ZFS pool name" },
90240afd8cSMark Johnston { '\0', "rootpath", &zfs->rootpath, OPT_STRPTR,
91240afd8cSMark Johnston 0, 0, "Prefix for all dataset mount points" },
92240afd8cSMark Johnston { '\0', "ashift", &zfs->ashift, OPT_INT32,
93240afd8cSMark Johnston MINBLOCKSHIFT, MAXBLOCKSHIFT, "ZFS pool ashift" },
94*4e15366cSMark Johnston { '\0', "verify-txgs", &zfs->verify_txgs, OPT_BOOL,
95*4e15366cSMark Johnston 0, 0, "Make OpenZFS verify data upon import" },
96240afd8cSMark Johnston { '\0', "nowarn", &zfs->nowarn, OPT_BOOL,
97d9fe7182SMark Johnston 0, 0, "Provided for backwards compatibility, ignored" },
98240afd8cSMark Johnston { .name = NULL }
99240afd8cSMark Johnston };
100240afd8cSMark Johnston
101240afd8cSMark Johnston STAILQ_INIT(&zfs->datasetdescs);
102240afd8cSMark Johnston
103240afd8cSMark Johnston fsopts->fs_specific = zfs;
104240afd8cSMark Johnston fsopts->fs_options = copy_opts(zfs_options);
105240afd8cSMark Johnston }
106240afd8cSMark Johnston
107240afd8cSMark Johnston int
zfs_parse_opts(const char * option,fsinfo_t * fsopts)108240afd8cSMark Johnston zfs_parse_opts(const char *option, fsinfo_t *fsopts)
109240afd8cSMark Johnston {
110240afd8cSMark Johnston zfs_opt_t *zfs;
111240afd8cSMark Johnston struct dataset_desc *dsdesc;
112240afd8cSMark Johnston char buf[BUFSIZ], *opt, *val;
113240afd8cSMark Johnston int rv;
114240afd8cSMark Johnston
115240afd8cSMark Johnston zfs = fsopts->fs_specific;
116240afd8cSMark Johnston
117240afd8cSMark Johnston opt = val = estrdup(option);
118240afd8cSMark Johnston opt = strsep(&val, "=");
119240afd8cSMark Johnston if (strcmp(opt, "fs") == 0) {
120240afd8cSMark Johnston if (val == NULL)
121240afd8cSMark Johnston errx(1, "invalid filesystem parameters `%s'", option);
122240afd8cSMark Johnston
123240afd8cSMark Johnston /*
124240afd8cSMark Johnston * Dataset descriptions will be parsed later, in dsl_init().
125240afd8cSMark Johnston * Just stash them away for now.
126240afd8cSMark Johnston */
127240afd8cSMark Johnston dsdesc = ecalloc(1, sizeof(*dsdesc));
128240afd8cSMark Johnston dsdesc->params = estrdup(val);
129240afd8cSMark Johnston free(opt);
130240afd8cSMark Johnston STAILQ_INSERT_TAIL(&zfs->datasetdescs, dsdesc, next);
131240afd8cSMark Johnston return (1);
132240afd8cSMark Johnston }
133240afd8cSMark Johnston free(opt);
134240afd8cSMark Johnston
135240afd8cSMark Johnston rv = set_option(fsopts->fs_options, option, buf, sizeof(buf));
136240afd8cSMark Johnston return (rv == -1 ? 0 : 1);
137240afd8cSMark Johnston }
138240afd8cSMark Johnston
139240afd8cSMark Johnston static void
zfs_size_vdev(fsinfo_t * fsopts)140240afd8cSMark Johnston zfs_size_vdev(fsinfo_t *fsopts)
141240afd8cSMark Johnston {
142240afd8cSMark Johnston zfs_opt_t *zfs;
143240afd8cSMark Johnston off_t asize, mssize, vdevsize, vdevsize1;
144240afd8cSMark Johnston
145240afd8cSMark Johnston zfs = fsopts->fs_specific;
146240afd8cSMark Johnston
147240afd8cSMark Johnston assert(fsopts->maxsize != 0);
148240afd8cSMark Johnston assert(zfs->ashift != 0);
149240afd8cSMark Johnston
150240afd8cSMark Johnston /*
151240afd8cSMark Johnston * Figure out how big the vdev should be.
152240afd8cSMark Johnston */
153240afd8cSMark Johnston vdevsize = rounddown2(fsopts->maxsize, 1 << zfs->ashift);
154240afd8cSMark Johnston if (vdevsize < MINDEVSIZE)
155240afd8cSMark Johnston errx(1, "maximum image size is too small");
156240afd8cSMark Johnston if (vdevsize < fsopts->minsize || vdevsize > fsopts->maxsize) {
157240afd8cSMark Johnston errx(1, "image size bounds must be multiples of %d",
158240afd8cSMark Johnston 1 << zfs->ashift);
159240afd8cSMark Johnston }
160240afd8cSMark Johnston asize = vdevsize - VDEV_LABEL_SPACE;
161240afd8cSMark Johnston
162240afd8cSMark Johnston /*
163240afd8cSMark Johnston * Size metaslabs according to the following heuristic:
164240afd8cSMark Johnston * - provide at least 8 metaslabs,
165240afd8cSMark Johnston * - without using a metaslab size larger than 512MB.
166240afd8cSMark Johnston * This approximates what OpenZFS does without being complicated. In
167240afd8cSMark Johnston * practice we expect pools to be expanded upon first use, and OpenZFS
168240afd8cSMark Johnston * does not resize metaslabs in that case, so there is no right answer
169240afd8cSMark Johnston * here. In general we want to provide large metaslabs even if the
170240afd8cSMark Johnston * image size is small, and 512MB is a reasonable size for pools up to
171240afd8cSMark Johnston * several hundred gigabytes.
172240afd8cSMark Johnston *
173240afd8cSMark Johnston * The user may override this heuristic using the "-o mssize" option.
174240afd8cSMark Johnston */
175240afd8cSMark Johnston mssize = zfs->mssize;
176240afd8cSMark Johnston if (mssize == 0) {
177240afd8cSMark Johnston mssize = MAX(MIN(asize / 8, DFLTMSSIZE), MINMSSIZE);
178240afd8cSMark Johnston if (!powerof2(mssize))
179240afd8cSMark Johnston mssize = 1l << (flsll(mssize) - 1);
180240afd8cSMark Johnston }
181240afd8cSMark Johnston if (!powerof2(mssize))
182240afd8cSMark Johnston errx(1, "metaslab size must be a power of 2");
183240afd8cSMark Johnston
184240afd8cSMark Johnston /*
185240afd8cSMark Johnston * If we have some slop left over, try to cover it by resizing the vdev,
186240afd8cSMark Johnston * subject to the maxsize and minsize parameters.
187240afd8cSMark Johnston */
188240afd8cSMark Johnston if (asize % mssize != 0) {
189240afd8cSMark Johnston vdevsize1 = rounddown2(asize, mssize) + VDEV_LABEL_SPACE;
190240afd8cSMark Johnston if (vdevsize1 < fsopts->minsize)
191240afd8cSMark Johnston vdevsize1 = roundup2(asize, mssize) + VDEV_LABEL_SPACE;
192240afd8cSMark Johnston if (vdevsize1 <= fsopts->maxsize)
193240afd8cSMark Johnston vdevsize = vdevsize1;
194240afd8cSMark Johnston }
195240afd8cSMark Johnston asize = vdevsize - VDEV_LABEL_SPACE;
196240afd8cSMark Johnston
197240afd8cSMark Johnston zfs->asize = asize;
198240afd8cSMark Johnston zfs->vdevsize = vdevsize;
199240afd8cSMark Johnston zfs->mssize = mssize;
200240afd8cSMark Johnston zfs->msshift = flsll(mssize) - 1;
201240afd8cSMark Johnston zfs->mscount = asize / mssize;
202240afd8cSMark Johnston }
203240afd8cSMark Johnston
204240afd8cSMark Johnston /*
205240afd8cSMark Johnston * Validate options and set some default values.
206240afd8cSMark Johnston */
207240afd8cSMark Johnston static void
zfs_check_opts(fsinfo_t * fsopts)208240afd8cSMark Johnston zfs_check_opts(fsinfo_t *fsopts)
209240afd8cSMark Johnston {
210240afd8cSMark Johnston zfs_opt_t *zfs;
211240afd8cSMark Johnston
212240afd8cSMark Johnston zfs = fsopts->fs_specific;
213240afd8cSMark Johnston
214240afd8cSMark Johnston if (fsopts->offset != 0)
215240afd8cSMark Johnston errx(1, "unhandled offset option");
216240afd8cSMark Johnston if (fsopts->maxsize == 0)
217240afd8cSMark Johnston errx(1, "an image size must be specified");
218240afd8cSMark Johnston
219240afd8cSMark Johnston if (zfs->poolname == NULL)
220240afd8cSMark Johnston errx(1, "a pool name must be specified");
221a9e7a44cSMark Johnston if (!isalpha(zfs->poolname[0]))
222a9e7a44cSMark Johnston errx(1, "the pool name must begin with a letter");
223a9e7a44cSMark Johnston for (size_t i = 0, len = strlen(zfs->poolname); i < len; i++) {
224a9e7a44cSMark Johnston if (!isalnum(zfs->poolname[i]) && zfs->poolname[i] != '_')
225a9e7a44cSMark Johnston errx(1, "invalid character '%c' in pool name",
226a9e7a44cSMark Johnston zfs->poolname[i]);
227a9e7a44cSMark Johnston }
228a9e7a44cSMark Johnston if (strcmp(zfs->poolname, "mirror") == 0 ||
229a9e7a44cSMark Johnston strcmp(zfs->poolname, "raidz") == 0 ||
230a9e7a44cSMark Johnston strcmp(zfs->poolname, "draid") == 0) {
231a9e7a44cSMark Johnston errx(1, "pool name '%s' is reserved and cannot be used",
232a9e7a44cSMark Johnston zfs->poolname);
233a9e7a44cSMark Johnston }
234240afd8cSMark Johnston
235240afd8cSMark Johnston if (zfs->rootpath == NULL)
236240afd8cSMark Johnston easprintf(&zfs->rootpath, "/%s", zfs->poolname);
237240afd8cSMark Johnston if (zfs->rootpath[0] != '/')
238240afd8cSMark Johnston errx(1, "mountpoint `%s' must be absolute", zfs->rootpath);
239240afd8cSMark Johnston
240240afd8cSMark Johnston if (zfs->ashift == 0)
241240afd8cSMark Johnston zfs->ashift = 12;
242240afd8cSMark Johnston
243240afd8cSMark Johnston zfs_size_vdev(fsopts);
244240afd8cSMark Johnston }
245240afd8cSMark Johnston
246240afd8cSMark Johnston void
zfs_cleanup_opts(fsinfo_t * fsopts)247240afd8cSMark Johnston zfs_cleanup_opts(fsinfo_t *fsopts)
248240afd8cSMark Johnston {
249240afd8cSMark Johnston struct dataset_desc *d, *tmp;
250240afd8cSMark Johnston zfs_opt_t *zfs;
251240afd8cSMark Johnston
252240afd8cSMark Johnston zfs = fsopts->fs_specific;
253240afd8cSMark Johnston free(zfs->rootpath);
254240afd8cSMark Johnston free(zfs->bootfs);
255240afd8cSMark Johnston free(__DECONST(void *, zfs->poolname));
256240afd8cSMark Johnston STAILQ_FOREACH_SAFE(d, &zfs->datasetdescs, next, tmp) {
257240afd8cSMark Johnston free(d->params);
258240afd8cSMark Johnston free(d);
259240afd8cSMark Johnston }
260240afd8cSMark Johnston free(zfs);
261240afd8cSMark Johnston free(fsopts->fs_options);
262240afd8cSMark Johnston }
263240afd8cSMark Johnston
264240afd8cSMark Johnston static size_t
nvlist_size(const nvlist_t * nvl)265240afd8cSMark Johnston nvlist_size(const nvlist_t *nvl)
266240afd8cSMark Johnston {
267240afd8cSMark Johnston return (sizeof(nvl->nv_header) + nvl->nv_size);
268240afd8cSMark Johnston }
269240afd8cSMark Johnston
270240afd8cSMark Johnston static void
nvlist_copy(const nvlist_t * nvl,char * buf,size_t sz)271240afd8cSMark Johnston nvlist_copy(const nvlist_t *nvl, char *buf, size_t sz)
272240afd8cSMark Johnston {
273240afd8cSMark Johnston assert(sz >= nvlist_size(nvl));
274240afd8cSMark Johnston
275240afd8cSMark Johnston memcpy(buf, &nvl->nv_header, sizeof(nvl->nv_header));
276240afd8cSMark Johnston memcpy(buf + sizeof(nvl->nv_header), nvl->nv_data, nvl->nv_size);
277240afd8cSMark Johnston }
278240afd8cSMark Johnston
27914c5cf3aSMark Johnston /*
28014c5cf3aSMark Johnston * Avoid returning a GUID of 0, just to avoid the possibility that something
28114c5cf3aSMark Johnston * will interpret that as meaning that the GUID is uninitialized.
28214c5cf3aSMark Johnston */
28314c5cf3aSMark Johnston uint64_t
randomguid(void)28414c5cf3aSMark Johnston randomguid(void)
28514c5cf3aSMark Johnston {
28614c5cf3aSMark Johnston uint64_t ret;
28714c5cf3aSMark Johnston
28814c5cf3aSMark Johnston do {
28914c5cf3aSMark Johnston ret = ((uint64_t)random() << 32) | random();
29014c5cf3aSMark Johnston } while (ret == 0);
29114c5cf3aSMark Johnston
29214c5cf3aSMark Johnston return (ret);
29314c5cf3aSMark Johnston }
29414c5cf3aSMark Johnston
295240afd8cSMark Johnston static nvlist_t *
pool_config_nvcreate(zfs_opt_t * zfs)296240afd8cSMark Johnston pool_config_nvcreate(zfs_opt_t *zfs)
297240afd8cSMark Johnston {
298240afd8cSMark Johnston nvlist_t *featuresnv, *poolnv;
299240afd8cSMark Johnston
300240afd8cSMark Johnston poolnv = nvlist_create(NV_UNIQUE_NAME);
301240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_TXG, TXG);
302240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_VERSION, SPA_VERSION);
303240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_STATE, POOL_STATE_EXPORTED);
304240afd8cSMark Johnston nvlist_add_string(poolnv, ZPOOL_CONFIG_POOL_NAME, zfs->poolname);
305240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_GUID, zfs->poolguid);
306240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_TOP_GUID, zfs->vdevguid);
307240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_GUID, zfs->vdevguid);
308240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_VDEV_CHILDREN, 1);
309240afd8cSMark Johnston
310240afd8cSMark Johnston featuresnv = nvlist_create(NV_UNIQUE_NAME);
311240afd8cSMark Johnston nvlist_add_nvlist(poolnv, ZPOOL_CONFIG_FEATURES_FOR_READ, featuresnv);
312240afd8cSMark Johnston nvlist_destroy(featuresnv);
313240afd8cSMark Johnston
314240afd8cSMark Johnston return (poolnv);
315240afd8cSMark Johnston }
316240afd8cSMark Johnston
317240afd8cSMark Johnston static nvlist_t *
pool_disk_vdev_config_nvcreate(zfs_opt_t * zfs)318240afd8cSMark Johnston pool_disk_vdev_config_nvcreate(zfs_opt_t *zfs)
319240afd8cSMark Johnston {
320240afd8cSMark Johnston nvlist_t *diskvdevnv;
321240afd8cSMark Johnston
322240afd8cSMark Johnston assert(zfs->objarrid != 0);
323240afd8cSMark Johnston
324240afd8cSMark Johnston diskvdevnv = nvlist_create(NV_UNIQUE_NAME);
325240afd8cSMark Johnston nvlist_add_string(diskvdevnv, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK);
326240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ASHIFT, zfs->ashift);
327240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ASIZE, zfs->asize);
328240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_GUID, zfs->vdevguid);
329240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ID, 0);
330240afd8cSMark Johnston nvlist_add_string(diskvdevnv, ZPOOL_CONFIG_PATH, "/dev/null");
331240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_WHOLE_DISK, 1);
332240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_CREATE_TXG, TXG);
333240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_METASLAB_ARRAY,
334240afd8cSMark Johnston zfs->objarrid);
335240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_METASLAB_SHIFT,
336240afd8cSMark Johnston zfs->msshift);
337240afd8cSMark Johnston
338240afd8cSMark Johnston return (diskvdevnv);
339240afd8cSMark Johnston }
340240afd8cSMark Johnston
341240afd8cSMark Johnston static nvlist_t *
pool_root_vdev_config_nvcreate(zfs_opt_t * zfs)342240afd8cSMark Johnston pool_root_vdev_config_nvcreate(zfs_opt_t *zfs)
343240afd8cSMark Johnston {
344240afd8cSMark Johnston nvlist_t *diskvdevnv, *rootvdevnv;
345240afd8cSMark Johnston
346240afd8cSMark Johnston diskvdevnv = pool_disk_vdev_config_nvcreate(zfs);
347240afd8cSMark Johnston rootvdevnv = nvlist_create(NV_UNIQUE_NAME);
348240afd8cSMark Johnston
349240afd8cSMark Johnston nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_ID, 0);
350240afd8cSMark Johnston nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_GUID, zfs->poolguid);
351240afd8cSMark Johnston nvlist_add_string(rootvdevnv, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT);
352240afd8cSMark Johnston nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_CREATE_TXG, TXG);
353240afd8cSMark Johnston nvlist_add_nvlist_array(rootvdevnv, ZPOOL_CONFIG_CHILDREN, &diskvdevnv,
354240afd8cSMark Johnston 1);
355240afd8cSMark Johnston nvlist_destroy(diskvdevnv);
356240afd8cSMark Johnston
357240afd8cSMark Johnston return (rootvdevnv);
358240afd8cSMark Johnston }
359240afd8cSMark Johnston
360240afd8cSMark Johnston /*
361240afd8cSMark Johnston * Create the pool's "config" object, which contains an nvlist describing pool
362240afd8cSMark Johnston * parameters and the vdev topology. It is similar but not identical to the
363240afd8cSMark Johnston * nvlist stored in vdev labels. The main difference is that vdev labels do not
364240afd8cSMark Johnston * describe the full vdev tree and in particular do not contain the "root"
365240afd8cSMark Johnston * meta-vdev.
366240afd8cSMark Johnston */
367240afd8cSMark Johnston static void
pool_init_objdir_config(zfs_opt_t * zfs,zfs_zap_t * objdir)368240afd8cSMark Johnston pool_init_objdir_config(zfs_opt_t *zfs, zfs_zap_t *objdir)
369240afd8cSMark Johnston {
370240afd8cSMark Johnston dnode_phys_t *dnode;
371240afd8cSMark Johnston nvlist_t *poolconfig, *vdevconfig;
372240afd8cSMark Johnston void *configbuf;
373240afd8cSMark Johnston uint64_t dnid;
374240afd8cSMark Johnston off_t configloc, configblksz;
375240afd8cSMark Johnston int error;
376240afd8cSMark Johnston
377240afd8cSMark Johnston dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_PACKED_NVLIST,
378240afd8cSMark Johnston DMU_OT_PACKED_NVLIST_SIZE, sizeof(uint64_t), &dnid);
379240afd8cSMark Johnston
380240afd8cSMark Johnston poolconfig = pool_config_nvcreate(zfs);
381240afd8cSMark Johnston
382240afd8cSMark Johnston vdevconfig = pool_root_vdev_config_nvcreate(zfs);
383240afd8cSMark Johnston nvlist_add_nvlist(poolconfig, ZPOOL_CONFIG_VDEV_TREE, vdevconfig);
384240afd8cSMark Johnston nvlist_destroy(vdevconfig);
385240afd8cSMark Johnston
386240afd8cSMark Johnston error = nvlist_export(poolconfig);
387240afd8cSMark Johnston if (error != 0)
388240afd8cSMark Johnston errc(1, error, "nvlist_export");
389240afd8cSMark Johnston
390240afd8cSMark Johnston configblksz = nvlist_size(poolconfig);
391240afd8cSMark Johnston configloc = objset_space_alloc(zfs, zfs->mos, &configblksz);
392240afd8cSMark Johnston configbuf = ecalloc(1, configblksz);
393240afd8cSMark Johnston nvlist_copy(poolconfig, configbuf, configblksz);
394240afd8cSMark Johnston
395240afd8cSMark Johnston vdev_pwrite_dnode_data(zfs, dnode, configbuf, configblksz, configloc);
396240afd8cSMark Johnston
397240afd8cSMark Johnston dnode->dn_datablkszsec = configblksz >> MINBLOCKSHIFT;
398240afd8cSMark Johnston dnode->dn_flags = DNODE_FLAG_USED_BYTES;
399240afd8cSMark Johnston *(uint64_t *)DN_BONUS(dnode) = nvlist_size(poolconfig);
400240afd8cSMark Johnston
401240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_CONFIG, dnid);
402240afd8cSMark Johnston
403240afd8cSMark Johnston nvlist_destroy(poolconfig);
404240afd8cSMark Johnston free(configbuf);
405240afd8cSMark Johnston }
406240afd8cSMark Johnston
407240afd8cSMark Johnston /*
408240afd8cSMark Johnston * Add objects block pointer list objects, used for deferred frees. We don't do
409240afd8cSMark Johnston * anything with them, but they need to be present or OpenZFS will refuse to
410240afd8cSMark Johnston * import the pool.
411240afd8cSMark Johnston */
412240afd8cSMark Johnston static void
pool_init_objdir_bplists(zfs_opt_t * zfs __unused,zfs_zap_t * objdir)413240afd8cSMark Johnston pool_init_objdir_bplists(zfs_opt_t *zfs __unused, zfs_zap_t *objdir)
414240afd8cSMark Johnston {
415240afd8cSMark Johnston uint64_t dnid;
416240afd8cSMark Johnston
417240afd8cSMark Johnston (void)objset_dnode_bonus_alloc(zfs->mos, DMU_OT_BPOBJ, DMU_OT_BPOBJ_HDR,
418240afd8cSMark Johnston BPOBJ_SIZE_V2, &dnid);
419240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_FREE_BPOBJ, dnid);
420240afd8cSMark Johnston
421240afd8cSMark Johnston (void)objset_dnode_bonus_alloc(zfs->mos, DMU_OT_BPOBJ, DMU_OT_BPOBJ_HDR,
422240afd8cSMark Johnston BPOBJ_SIZE_V2, &dnid);
423240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_SYNC_BPLIST, dnid);
424240afd8cSMark Johnston }
425240afd8cSMark Johnston
426240afd8cSMark Johnston /*
427240afd8cSMark Johnston * Add required feature metadata objects. We don't know anything about ZFS
428240afd8cSMark Johnston * features, so the objects are just empty ZAPs.
429240afd8cSMark Johnston */
430240afd8cSMark Johnston static void
pool_init_objdir_feature_maps(zfs_opt_t * zfs,zfs_zap_t * objdir)431240afd8cSMark Johnston pool_init_objdir_feature_maps(zfs_opt_t *zfs, zfs_zap_t *objdir)
432240afd8cSMark Johnston {
433240afd8cSMark Johnston dnode_phys_t *dnode;
434240afd8cSMark Johnston uint64_t dnid;
435240afd8cSMark Johnston
436240afd8cSMark Johnston dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid);
437240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_FEATURES_FOR_READ, dnid);
438240afd8cSMark Johnston zap_write(zfs, zap_alloc(zfs->mos, dnode));
439240afd8cSMark Johnston
440240afd8cSMark Johnston dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid);
441240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_FEATURES_FOR_WRITE, dnid);
442240afd8cSMark Johnston zap_write(zfs, zap_alloc(zfs->mos, dnode));
443240afd8cSMark Johnston
444240afd8cSMark Johnston dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid);
445240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_FEATURE_DESCRIPTIONS, dnid);
446240afd8cSMark Johnston zap_write(zfs, zap_alloc(zfs->mos, dnode));
447240afd8cSMark Johnston }
448240afd8cSMark Johnston
449240afd8cSMark Johnston static void
pool_init_objdir_dsl(zfs_opt_t * zfs,zfs_zap_t * objdir)450240afd8cSMark Johnston pool_init_objdir_dsl(zfs_opt_t *zfs, zfs_zap_t *objdir)
451240afd8cSMark Johnston {
452240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_ROOT_DATASET,
453240afd8cSMark Johnston dsl_dir_id(zfs->rootdsldir));
454240afd8cSMark Johnston }
455240afd8cSMark Johnston
456240afd8cSMark Johnston static void
pool_init_objdir_poolprops(zfs_opt_t * zfs,zfs_zap_t * objdir)457240afd8cSMark Johnston pool_init_objdir_poolprops(zfs_opt_t *zfs, zfs_zap_t *objdir)
458240afd8cSMark Johnston {
459240afd8cSMark Johnston dnode_phys_t *dnode;
460240afd8cSMark Johnston uint64_t id;
461240afd8cSMark Johnston
462240afd8cSMark Johnston dnode = objset_dnode_alloc(zfs->mos, DMU_OT_POOL_PROPS, &id);
463240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_PROPS, id);
464240afd8cSMark Johnston
465240afd8cSMark Johnston zfs->poolprops = zap_alloc(zfs->mos, dnode);
466240afd8cSMark Johnston }
467240afd8cSMark Johnston
468240afd8cSMark Johnston /*
469240afd8cSMark Johnston * Initialize the MOS object directory, the root of virtually all of the pool's
470240afd8cSMark Johnston * data and metadata.
471240afd8cSMark Johnston */
472240afd8cSMark Johnston static void
pool_init_objdir(zfs_opt_t * zfs)473240afd8cSMark Johnston pool_init_objdir(zfs_opt_t *zfs)
474240afd8cSMark Johnston {
475240afd8cSMark Johnston zfs_zap_t *zap;
476240afd8cSMark Johnston dnode_phys_t *objdir;
477240afd8cSMark Johnston
478240afd8cSMark Johnston objdir = objset_dnode_lookup(zfs->mos, DMU_POOL_DIRECTORY_OBJECT);
479240afd8cSMark Johnston
480240afd8cSMark Johnston zap = zap_alloc(zfs->mos, objdir);
481240afd8cSMark Johnston pool_init_objdir_config(zfs, zap);
482240afd8cSMark Johnston pool_init_objdir_bplists(zfs, zap);
483240afd8cSMark Johnston pool_init_objdir_feature_maps(zfs, zap);
484240afd8cSMark Johnston pool_init_objdir_dsl(zfs, zap);
485240afd8cSMark Johnston pool_init_objdir_poolprops(zfs, zap);
486240afd8cSMark Johnston zap_write(zfs, zap);
487240afd8cSMark Johnston }
488240afd8cSMark Johnston
489240afd8cSMark Johnston /*
490240afd8cSMark Johnston * Initialize the meta-object set (MOS) and immediately write out several
491240afd8cSMark Johnston * special objects whose contents are already finalized, including the object
492240afd8cSMark Johnston * directory.
493240afd8cSMark Johnston *
494240afd8cSMark Johnston * Once the MOS is finalized, it'll look roughly like this:
495240afd8cSMark Johnston *
496240afd8cSMark Johnston * object directory (ZAP)
497240afd8cSMark Johnston * |-> vdev config object (nvlist)
498240afd8cSMark Johnston * |-> features for read
499240afd8cSMark Johnston * |-> features for write
500240afd8cSMark Johnston * |-> feature descriptions
501240afd8cSMark Johnston * |-> sync bplist
502240afd8cSMark Johnston * |-> free bplist
503240afd8cSMark Johnston * |-> pool properties
504240afd8cSMark Johnston * L-> root DSL directory
505240afd8cSMark Johnston * |-> DSL child directory (ZAP)
506240afd8cSMark Johnston * | |-> $MOS (DSL dir)
507240afd8cSMark Johnston * | | |-> child map
508240afd8cSMark Johnston * | | L-> props (ZAP)
509240afd8cSMark Johnston * | |-> $FREE (DSL dir)
510240afd8cSMark Johnston * | | |-> child map
511240afd8cSMark Johnston * | | L-> props (ZAP)
512240afd8cSMark Johnston * | |-> $ORIGIN (DSL dir)
513240afd8cSMark Johnston * | | |-> child map
514240afd8cSMark Johnston * | | |-> dataset
515240afd8cSMark Johnston * | | | L-> deadlist
516240afd8cSMark Johnston * | | |-> snapshot
517240afd8cSMark Johnston * | | | |-> deadlist
518240afd8cSMark Johnston * | | | L-> snapshot names
519240afd8cSMark Johnston * | | |-> props (ZAP)
520240afd8cSMark Johnston * | | L-> clones (ZAP)
521240afd8cSMark Johnston * | |-> dataset 1 (DSL dir)
522240afd8cSMark Johnston * | | |-> DSL dataset
523240afd8cSMark Johnston * | | | |-> snapshot names
524240afd8cSMark Johnston * | | | L-> deadlist
525240afd8cSMark Johnston * | | |-> child map
526240afd8cSMark Johnston * | | | L-> ...
527240afd8cSMark Johnston * | | L-> props
528240afd8cSMark Johnston * | |-> dataset 2
529240afd8cSMark Johnston * | | L-> ...
530240afd8cSMark Johnston * | |-> ...
531240afd8cSMark Johnston * | L-> dataset n
532240afd8cSMark Johnston * |-> DSL root dataset
533240afd8cSMark Johnston * | |-> snapshot names
534240afd8cSMark Johnston * | L-> deadlist
535240afd8cSMark Johnston * L-> props (ZAP)
536240afd8cSMark Johnston * space map object array
537240afd8cSMark Johnston * |-> space map 1
538240afd8cSMark Johnston * |-> space map 2
539240afd8cSMark Johnston * |-> ...
540240afd8cSMark Johnston * L-> space map n (zfs->mscount)
541240afd8cSMark Johnston *
542240afd8cSMark Johnston * The space map object array is pointed to by the "msarray" property in the
543240afd8cSMark Johnston * pool configuration.
544240afd8cSMark Johnston */
545240afd8cSMark Johnston static void
pool_init(zfs_opt_t * zfs)546240afd8cSMark Johnston pool_init(zfs_opt_t *zfs)
547240afd8cSMark Johnston {
548240afd8cSMark Johnston uint64_t dnid;
549240afd8cSMark Johnston
55014c5cf3aSMark Johnston zfs->poolguid = randomguid();
55114c5cf3aSMark Johnston zfs->vdevguid = randomguid();
552240afd8cSMark Johnston
553240afd8cSMark Johnston zfs->mos = objset_alloc(zfs, DMU_OST_META);
554240afd8cSMark Johnston
555240afd8cSMark Johnston (void)objset_dnode_alloc(zfs->mos, DMU_OT_OBJECT_DIRECTORY, &dnid);
556240afd8cSMark Johnston assert(dnid == DMU_POOL_DIRECTORY_OBJECT);
557240afd8cSMark Johnston
558240afd8cSMark Johnston (void)objset_dnode_alloc(zfs->mos, DMU_OT_OBJECT_ARRAY, &zfs->objarrid);
559240afd8cSMark Johnston
560240afd8cSMark Johnston dsl_init(zfs);
561240afd8cSMark Johnston
562240afd8cSMark Johnston pool_init_objdir(zfs);
563240afd8cSMark Johnston }
564240afd8cSMark Johnston
565240afd8cSMark Johnston static void
pool_labels_write(zfs_opt_t * zfs)566240afd8cSMark Johnston pool_labels_write(zfs_opt_t *zfs)
567240afd8cSMark Johnston {
568240afd8cSMark Johnston uberblock_t *ub;
569240afd8cSMark Johnston vdev_label_t *label;
570240afd8cSMark Johnston nvlist_t *poolconfig, *vdevconfig;
571240afd8cSMark Johnston int error;
572240afd8cSMark Johnston
573240afd8cSMark Johnston label = ecalloc(1, sizeof(*label));
574240afd8cSMark Johnston
575240afd8cSMark Johnston /*
576240afd8cSMark Johnston * Assemble the vdev configuration and store it in the label.
577240afd8cSMark Johnston */
578240afd8cSMark Johnston poolconfig = pool_config_nvcreate(zfs);
579240afd8cSMark Johnston vdevconfig = pool_disk_vdev_config_nvcreate(zfs);
580240afd8cSMark Johnston nvlist_add_nvlist(poolconfig, ZPOOL_CONFIG_VDEV_TREE, vdevconfig);
581240afd8cSMark Johnston nvlist_destroy(vdevconfig);
582240afd8cSMark Johnston
583240afd8cSMark Johnston error = nvlist_export(poolconfig);
584240afd8cSMark Johnston if (error != 0)
585240afd8cSMark Johnston errc(1, error, "nvlist_export");
586240afd8cSMark Johnston nvlist_copy(poolconfig, label->vl_vdev_phys.vp_nvlist,
587240afd8cSMark Johnston sizeof(label->vl_vdev_phys.vp_nvlist));
588240afd8cSMark Johnston nvlist_destroy(poolconfig);
589240afd8cSMark Johnston
590240afd8cSMark Johnston /*
591240afd8cSMark Johnston * Fill out the uberblock. Just make each one the same. The embedded
592240afd8cSMark Johnston * checksum is calculated in vdev_label_write().
593240afd8cSMark Johnston */
594240afd8cSMark Johnston for (size_t uoff = 0; uoff < sizeof(label->vl_uberblock);
595240afd8cSMark Johnston uoff += (1 << zfs->ashift)) {
596240afd8cSMark Johnston ub = (uberblock_t *)(&label->vl_uberblock[0] + uoff);
597240afd8cSMark Johnston ub->ub_magic = UBERBLOCK_MAGIC;
598240afd8cSMark Johnston ub->ub_version = SPA_VERSION;
599*4e15366cSMark Johnston
600*4e15366cSMark Johnston /*
601*4e15366cSMark Johnston * Upon import, OpenZFS will perform metadata verification of
602*4e15366cSMark Johnston * the last TXG by default. If all data is written in the same
603*4e15366cSMark Johnston * TXG, it'll all get verified, which can be painfully slow in
604*4e15366cSMark Johnston * some cases, e.g., initial boot in a cloud environment with
605*4e15366cSMark Johnston * slow storage. So, fabricate additional TXGs to avoid this
606*4e15366cSMark Johnston * overhead, unless the user requests otherwise.
607*4e15366cSMark Johnston */
608240afd8cSMark Johnston ub->ub_txg = TXG;
609*4e15366cSMark Johnston if (!zfs->verify_txgs)
610*4e15366cSMark Johnston ub->ub_txg += TXG_SIZE;
611240afd8cSMark Johnston ub->ub_guid_sum = zfs->poolguid + zfs->vdevguid;
612240afd8cSMark Johnston ub->ub_timestamp = 0;
613240afd8cSMark Johnston
614240afd8cSMark Johnston ub->ub_software_version = SPA_VERSION;
615240afd8cSMark Johnston ub->ub_mmp_magic = MMP_MAGIC;
616240afd8cSMark Johnston ub->ub_mmp_delay = 0;
617240afd8cSMark Johnston ub->ub_mmp_config = 0;
618240afd8cSMark Johnston ub->ub_checkpoint_txg = 0;
619240afd8cSMark Johnston objset_root_blkptr_copy(zfs->mos, &ub->ub_rootbp);
620240afd8cSMark Johnston }
621240afd8cSMark Johnston
622240afd8cSMark Johnston /*
623240afd8cSMark Johnston * Write out four copies of the label: two at the beginning of the vdev
624240afd8cSMark Johnston * and two at the end.
625240afd8cSMark Johnston */
626240afd8cSMark Johnston for (int i = 0; i < VDEV_LABELS; i++)
627240afd8cSMark Johnston vdev_label_write(zfs, i, label);
628240afd8cSMark Johnston
629240afd8cSMark Johnston free(label);
630240afd8cSMark Johnston }
631240afd8cSMark Johnston
632240afd8cSMark Johnston static void
pool_fini(zfs_opt_t * zfs)633240afd8cSMark Johnston pool_fini(zfs_opt_t *zfs)
634240afd8cSMark Johnston {
635240afd8cSMark Johnston zap_write(zfs, zfs->poolprops);
636240afd8cSMark Johnston dsl_write(zfs);
637240afd8cSMark Johnston objset_write(zfs, zfs->mos);
638240afd8cSMark Johnston pool_labels_write(zfs);
639240afd8cSMark Johnston }
640240afd8cSMark Johnston
641240afd8cSMark Johnston struct dnode_cursor *
dnode_cursor_init(zfs_opt_t * zfs,zfs_objset_t * os,dnode_phys_t * dnode,off_t size,off_t blksz)642240afd8cSMark Johnston dnode_cursor_init(zfs_opt_t *zfs, zfs_objset_t *os, dnode_phys_t *dnode,
643240afd8cSMark Johnston off_t size, off_t blksz)
644240afd8cSMark Johnston {
645240afd8cSMark Johnston struct dnode_cursor *c;
646240afd8cSMark Johnston uint64_t nbppindir, indlevel, ndatablks, nindblks;
647240afd8cSMark Johnston
648240afd8cSMark Johnston assert(dnode->dn_nblkptr == 1);
649240afd8cSMark Johnston assert(blksz <= MAXBLOCKSIZE);
650240afd8cSMark Johnston
651240afd8cSMark Johnston if (blksz == 0) {
652240afd8cSMark Johnston /* Must be between 1<<ashift and 128KB. */
653240afd8cSMark Johnston blksz = MIN(MAXBLOCKSIZE, MAX(1 << zfs->ashift,
6549821e244SJohn Baldwin powerof2(size) ? size : (1l << flsll(size))));
655240afd8cSMark Johnston }
656240afd8cSMark Johnston assert(powerof2(blksz));
657240afd8cSMark Johnston
658240afd8cSMark Johnston /*
659240afd8cSMark Johnston * Do we need indirect blocks? Figure out how many levels are needed
660240afd8cSMark Johnston * (indlevel == 1 means no indirect blocks) and how much space is needed
661240afd8cSMark Johnston * (it has to be allocated up-front to break the dependency cycle
662240afd8cSMark Johnston * described in objset_write()).
663240afd8cSMark Johnston */
664240afd8cSMark Johnston ndatablks = size == 0 ? 0 : howmany(size, blksz);
665240afd8cSMark Johnston nindblks = 0;
666240afd8cSMark Johnston for (indlevel = 1, nbppindir = 1; ndatablks > nbppindir; indlevel++) {
667240afd8cSMark Johnston nbppindir *= BLKPTR_PER_INDIR;
668240afd8cSMark Johnston nindblks += howmany(ndatablks, indlevel * nbppindir);
669240afd8cSMark Johnston }
670240afd8cSMark Johnston assert(indlevel < INDIR_LEVELS);
671240afd8cSMark Johnston
672240afd8cSMark Johnston dnode->dn_nlevels = (uint8_t)indlevel;
673240afd8cSMark Johnston dnode->dn_maxblkid = ndatablks > 0 ? ndatablks - 1 : 0;
674240afd8cSMark Johnston dnode->dn_datablkszsec = blksz >> MINBLOCKSHIFT;
675240afd8cSMark Johnston
676240afd8cSMark Johnston c = ecalloc(1, sizeof(*c));
677240afd8cSMark Johnston if (nindblks > 0) {
678240afd8cSMark Johnston c->indspace = nindblks * MAXBLOCKSIZE;
679240afd8cSMark Johnston c->indloc = objset_space_alloc(zfs, os, &c->indspace);
680240afd8cSMark Johnston }
681240afd8cSMark Johnston c->dnode = dnode;
682240afd8cSMark Johnston c->dataoff = 0;
683240afd8cSMark Johnston c->datablksz = blksz;
684240afd8cSMark Johnston
685240afd8cSMark Johnston return (c);
686240afd8cSMark Johnston }
687240afd8cSMark Johnston
688240afd8cSMark Johnston static void
_dnode_cursor_flush(zfs_opt_t * zfs,struct dnode_cursor * c,unsigned int levels)689b5a2bf51SMark Johnston _dnode_cursor_flush(zfs_opt_t *zfs, struct dnode_cursor *c, unsigned int levels)
690240afd8cSMark Johnston {
691240afd8cSMark Johnston blkptr_t *bp, *pbp;
692240afd8cSMark Johnston void *buf;
693240afd8cSMark Johnston uint64_t fill;
694240afd8cSMark Johnston off_t blkid, blksz, loc;
695240afd8cSMark Johnston
696240afd8cSMark Johnston assert(levels > 0);
6978a77bc5eSDimitry Andric assert(levels <= c->dnode->dn_nlevels - 1U);
698240afd8cSMark Johnston
699240afd8cSMark Johnston blksz = MAXBLOCKSIZE;
700240afd8cSMark Johnston blkid = (c->dataoff / c->datablksz) / BLKPTR_PER_INDIR;
701b5a2bf51SMark Johnston for (unsigned int level = 1; level <= levels; level++) {
702240afd8cSMark Johnston buf = c->inddir[level - 1];
703240afd8cSMark Johnston
7048a77bc5eSDimitry Andric if (level == c->dnode->dn_nlevels - 1U) {
705240afd8cSMark Johnston pbp = &c->dnode->dn_blkptr[0];
706240afd8cSMark Johnston } else {
707240afd8cSMark Johnston uint64_t iblkid;
708240afd8cSMark Johnston
709240afd8cSMark Johnston iblkid = blkid & (BLKPTR_PER_INDIR - 1);
710240afd8cSMark Johnston pbp = (blkptr_t *)
711240afd8cSMark Johnston &c->inddir[level][iblkid * sizeof(blkptr_t)];
712240afd8cSMark Johnston }
713240afd8cSMark Johnston
714240afd8cSMark Johnston /*
715240afd8cSMark Johnston * Space for indirect blocks is allocated up-front; see the
716240afd8cSMark Johnston * comment in objset_write().
717240afd8cSMark Johnston */
718240afd8cSMark Johnston loc = c->indloc;
719240afd8cSMark Johnston c->indloc += blksz;
720240afd8cSMark Johnston assert(c->indspace >= blksz);
721240afd8cSMark Johnston c->indspace -= blksz;
722240afd8cSMark Johnston
723240afd8cSMark Johnston bp = buf;
724240afd8cSMark Johnston fill = 0;
725240afd8cSMark Johnston for (size_t i = 0; i < BLKPTR_PER_INDIR; i++)
726240afd8cSMark Johnston fill += BP_GET_FILL(&bp[i]);
727240afd8cSMark Johnston
728240afd8cSMark Johnston vdev_pwrite_dnode_indir(zfs, c->dnode, level, fill, buf, blksz,
729240afd8cSMark Johnston loc, pbp);
730240afd8cSMark Johnston memset(buf, 0, MAXBLOCKSIZE);
731240afd8cSMark Johnston
732240afd8cSMark Johnston blkid /= BLKPTR_PER_INDIR;
733240afd8cSMark Johnston }
734240afd8cSMark Johnston }
735240afd8cSMark Johnston
736240afd8cSMark Johnston blkptr_t *
dnode_cursor_next(zfs_opt_t * zfs,struct dnode_cursor * c,off_t off)737240afd8cSMark Johnston dnode_cursor_next(zfs_opt_t *zfs, struct dnode_cursor *c, off_t off)
738240afd8cSMark Johnston {
739240afd8cSMark Johnston off_t blkid, l1id;
740b5a2bf51SMark Johnston unsigned int levels;
741240afd8cSMark Johnston
742240afd8cSMark Johnston if (c->dnode->dn_nlevels == 1) {
743240afd8cSMark Johnston assert(off < MAXBLOCKSIZE);
744240afd8cSMark Johnston return (&c->dnode->dn_blkptr[0]);
745240afd8cSMark Johnston }
746240afd8cSMark Johnston
747240afd8cSMark Johnston assert(off % c->datablksz == 0);
748240afd8cSMark Johnston
749240afd8cSMark Johnston /* Do we need to flush any full indirect blocks? */
750240afd8cSMark Johnston if (off > 0) {
751240afd8cSMark Johnston blkid = off / c->datablksz;
7528a77bc5eSDimitry Andric for (levels = 0; levels < c->dnode->dn_nlevels - 1U; levels++) {
753240afd8cSMark Johnston if (blkid % BLKPTR_PER_INDIR != 0)
754240afd8cSMark Johnston break;
755240afd8cSMark Johnston blkid /= BLKPTR_PER_INDIR;
756240afd8cSMark Johnston }
757240afd8cSMark Johnston if (levels > 0)
758240afd8cSMark Johnston _dnode_cursor_flush(zfs, c, levels);
759240afd8cSMark Johnston }
760240afd8cSMark Johnston
761240afd8cSMark Johnston c->dataoff = off;
762240afd8cSMark Johnston l1id = (off / c->datablksz) & (BLKPTR_PER_INDIR - 1);
763240afd8cSMark Johnston return ((blkptr_t *)&c->inddir[0][l1id * sizeof(blkptr_t)]);
764240afd8cSMark Johnston }
765240afd8cSMark Johnston
766240afd8cSMark Johnston void
dnode_cursor_finish(zfs_opt_t * zfs,struct dnode_cursor * c)767240afd8cSMark Johnston dnode_cursor_finish(zfs_opt_t *zfs, struct dnode_cursor *c)
768240afd8cSMark Johnston {
769b5a2bf51SMark Johnston unsigned int levels;
770240afd8cSMark Johnston
771b5a2bf51SMark Johnston assert(c->dnode->dn_nlevels > 0);
772240afd8cSMark Johnston levels = c->dnode->dn_nlevels - 1;
773240afd8cSMark Johnston if (levels > 0)
774240afd8cSMark Johnston _dnode_cursor_flush(zfs, c, levels);
775240afd8cSMark Johnston assert(c->indspace == 0);
776240afd8cSMark Johnston free(c);
777240afd8cSMark Johnston }
778240afd8cSMark Johnston
779240afd8cSMark Johnston void
zfs_makefs(const char * image,const char * dir,fsnode * root,fsinfo_t * fsopts)780240afd8cSMark Johnston zfs_makefs(const char *image, const char *dir, fsnode *root, fsinfo_t *fsopts)
781240afd8cSMark Johnston {
782240afd8cSMark Johnston zfs_opt_t *zfs;
783240afd8cSMark Johnston int dirfd;
784240afd8cSMark Johnston
785240afd8cSMark Johnston zfs = fsopts->fs_specific;
786240afd8cSMark Johnston
787240afd8cSMark Johnston /*
788240afd8cSMark Johnston * Use a fixed seed to provide reproducible pseudo-random numbers for
789240afd8cSMark Johnston * on-disk structures when needed (e.g., GUIDs, ZAP hash salts).
790240afd8cSMark Johnston */
791240afd8cSMark Johnston srandom(1729);
792240afd8cSMark Johnston
793240afd8cSMark Johnston zfs_check_opts(fsopts);
794240afd8cSMark Johnston
795240afd8cSMark Johnston dirfd = open(dir, O_DIRECTORY | O_RDONLY);
796240afd8cSMark Johnston if (dirfd < 0)
797240afd8cSMark Johnston err(1, "open(%s)", dir);
798240afd8cSMark Johnston
799240afd8cSMark Johnston vdev_init(zfs, image);
800240afd8cSMark Johnston pool_init(zfs);
801240afd8cSMark Johnston fs_build(zfs, dirfd, root);
802240afd8cSMark Johnston pool_fini(zfs);
803240afd8cSMark Johnston vdev_fini(zfs);
804240afd8cSMark Johnston }
805