1*240afd8cSMark Johnston /*- 2*240afd8cSMark Johnston * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3*240afd8cSMark Johnston * 4*240afd8cSMark Johnston * Copyright (c) 2022 The FreeBSD Foundation 5*240afd8cSMark Johnston * 6*240afd8cSMark Johnston * This software was developed by Mark Johnston under sponsorship from 7*240afd8cSMark Johnston * the FreeBSD Foundation. 8*240afd8cSMark Johnston * 9*240afd8cSMark Johnston * Redistribution and use in source and binary forms, with or without 10*240afd8cSMark Johnston * modification, are permitted provided that the following conditions are 11*240afd8cSMark Johnston * met: 12*240afd8cSMark Johnston * 1. Redistributions of source code must retain the above copyright 13*240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer. 14*240afd8cSMark Johnston * 2. Redistributions in binary form must reproduce the above copyright 15*240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer in 16*240afd8cSMark Johnston * the documentation and/or other materials provided with the distribution. 17*240afd8cSMark Johnston * 18*240afd8cSMark Johnston * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19*240afd8cSMark Johnston * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20*240afd8cSMark Johnston * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21*240afd8cSMark Johnston * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22*240afd8cSMark Johnston * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23*240afd8cSMark Johnston * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24*240afd8cSMark Johnston * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25*240afd8cSMark Johnston * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26*240afd8cSMark Johnston * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27*240afd8cSMark Johnston * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28*240afd8cSMark Johnston * SUCH DAMAGE. 29*240afd8cSMark Johnston */ 30*240afd8cSMark Johnston 31*240afd8cSMark Johnston #include <sys/param.h> 32*240afd8cSMark Johnston #include <sys/errno.h> 33*240afd8cSMark Johnston #include <sys/queue.h> 34*240afd8cSMark Johnston 35*240afd8cSMark Johnston #include <assert.h> 36*240afd8cSMark Johnston #include <fcntl.h> 37*240afd8cSMark Johnston #include <stdbool.h> 38*240afd8cSMark Johnston #include <stddef.h> 39*240afd8cSMark Johnston #include <stdlib.h> 40*240afd8cSMark Johnston #include <string.h> 41*240afd8cSMark Johnston #include <unistd.h> 42*240afd8cSMark Johnston 43*240afd8cSMark Johnston #include <util.h> 44*240afd8cSMark Johnston 45*240afd8cSMark Johnston #include "makefs.h" 46*240afd8cSMark Johnston #include "zfs.h" 47*240afd8cSMark Johnston 48*240afd8cSMark Johnston #define VDEV_LABEL_SPACE \ 49*240afd8cSMark Johnston ((off_t)(VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) 50*240afd8cSMark Johnston _Static_assert(VDEV_LABEL_SPACE <= MINDEVSIZE, ""); 51*240afd8cSMark Johnston 52*240afd8cSMark Johnston #define MINMSSIZE ((off_t)1 << 24) /* 16MB */ 53*240afd8cSMark Johnston #define DFLTMSSIZE ((off_t)1 << 29) /* 512MB */ 54*240afd8cSMark Johnston #define MAXMSSIZE ((off_t)1 << 34) /* 16GB */ 55*240afd8cSMark Johnston 56*240afd8cSMark Johnston #define INDIR_LEVELS 6 57*240afd8cSMark Johnston /* Indirect blocks are always 128KB. */ 58*240afd8cSMark Johnston #define BLKPTR_PER_INDIR (MAXBLOCKSIZE / sizeof(blkptr_t)) 59*240afd8cSMark Johnston 60*240afd8cSMark Johnston struct dnode_cursor { 61*240afd8cSMark Johnston char inddir[INDIR_LEVELS][MAXBLOCKSIZE]; 62*240afd8cSMark Johnston off_t indloc; 63*240afd8cSMark Johnston off_t indspace; 64*240afd8cSMark Johnston dnode_phys_t *dnode; 65*240afd8cSMark Johnston off_t dataoff; 66*240afd8cSMark Johnston off_t datablksz; 67*240afd8cSMark Johnston }; 68*240afd8cSMark Johnston 69*240afd8cSMark Johnston void 70*240afd8cSMark Johnston zfs_prep_opts(fsinfo_t *fsopts) 71*240afd8cSMark Johnston { 72*240afd8cSMark Johnston zfs_opt_t *zfs = ecalloc(1, sizeof(*zfs)); 73*240afd8cSMark Johnston 74*240afd8cSMark Johnston const option_t zfs_options[] = { 75*240afd8cSMark Johnston { '\0', "bootfs", &zfs->bootfs, OPT_STRPTR, 76*240afd8cSMark Johnston 0, 0, "Bootable dataset" }, 77*240afd8cSMark Johnston { '\0', "mssize", &zfs->mssize, OPT_INT64, 78*240afd8cSMark Johnston MINMSSIZE, MAXMSSIZE, "Metaslab size" }, 79*240afd8cSMark Johnston { '\0', "poolname", &zfs->poolname, OPT_STRPTR, 80*240afd8cSMark Johnston 0, 0, "ZFS pool name" }, 81*240afd8cSMark Johnston { '\0', "rootpath", &zfs->rootpath, OPT_STRPTR, 82*240afd8cSMark Johnston 0, 0, "Prefix for all dataset mount points" }, 83*240afd8cSMark Johnston { '\0', "ashift", &zfs->ashift, OPT_INT32, 84*240afd8cSMark Johnston MINBLOCKSHIFT, MAXBLOCKSHIFT, "ZFS pool ashift" }, 85*240afd8cSMark Johnston { '\0', "nowarn", &zfs->nowarn, OPT_BOOL, 86*240afd8cSMark Johnston 0, 0, "Suppress warning about experimental ZFS support" }, 87*240afd8cSMark Johnston { .name = NULL } 88*240afd8cSMark Johnston }; 89*240afd8cSMark Johnston 90*240afd8cSMark Johnston STAILQ_INIT(&zfs->datasetdescs); 91*240afd8cSMark Johnston 92*240afd8cSMark Johnston fsopts->fs_specific = zfs; 93*240afd8cSMark Johnston fsopts->fs_options = copy_opts(zfs_options); 94*240afd8cSMark Johnston } 95*240afd8cSMark Johnston 96*240afd8cSMark Johnston int 97*240afd8cSMark Johnston zfs_parse_opts(const char *option, fsinfo_t *fsopts) 98*240afd8cSMark Johnston { 99*240afd8cSMark Johnston zfs_opt_t *zfs; 100*240afd8cSMark Johnston struct dataset_desc *dsdesc; 101*240afd8cSMark Johnston char buf[BUFSIZ], *opt, *val; 102*240afd8cSMark Johnston int rv; 103*240afd8cSMark Johnston 104*240afd8cSMark Johnston zfs = fsopts->fs_specific; 105*240afd8cSMark Johnston 106*240afd8cSMark Johnston opt = val = estrdup(option); 107*240afd8cSMark Johnston opt = strsep(&val, "="); 108*240afd8cSMark Johnston if (strcmp(opt, "fs") == 0) { 109*240afd8cSMark Johnston if (val == NULL) 110*240afd8cSMark Johnston errx(1, "invalid filesystem parameters `%s'", option); 111*240afd8cSMark Johnston 112*240afd8cSMark Johnston /* 113*240afd8cSMark Johnston * Dataset descriptions will be parsed later, in dsl_init(). 114*240afd8cSMark Johnston * Just stash them away for now. 115*240afd8cSMark Johnston */ 116*240afd8cSMark Johnston dsdesc = ecalloc(1, sizeof(*dsdesc)); 117*240afd8cSMark Johnston dsdesc->params = estrdup(val); 118*240afd8cSMark Johnston free(opt); 119*240afd8cSMark Johnston STAILQ_INSERT_TAIL(&zfs->datasetdescs, dsdesc, next); 120*240afd8cSMark Johnston return (1); 121*240afd8cSMark Johnston } 122*240afd8cSMark Johnston free(opt); 123*240afd8cSMark Johnston 124*240afd8cSMark Johnston rv = set_option(fsopts->fs_options, option, buf, sizeof(buf)); 125*240afd8cSMark Johnston return (rv == -1 ? 0 : 1); 126*240afd8cSMark Johnston } 127*240afd8cSMark Johnston 128*240afd8cSMark Johnston static void 129*240afd8cSMark Johnston zfs_size_vdev(fsinfo_t *fsopts) 130*240afd8cSMark Johnston { 131*240afd8cSMark Johnston zfs_opt_t *zfs; 132*240afd8cSMark Johnston off_t asize, mssize, vdevsize, vdevsize1; 133*240afd8cSMark Johnston 134*240afd8cSMark Johnston zfs = fsopts->fs_specific; 135*240afd8cSMark Johnston 136*240afd8cSMark Johnston assert(fsopts->maxsize != 0); 137*240afd8cSMark Johnston assert(zfs->ashift != 0); 138*240afd8cSMark Johnston 139*240afd8cSMark Johnston /* 140*240afd8cSMark Johnston * Figure out how big the vdev should be. 141*240afd8cSMark Johnston */ 142*240afd8cSMark Johnston vdevsize = rounddown2(fsopts->maxsize, 1 << zfs->ashift); 143*240afd8cSMark Johnston if (vdevsize < MINDEVSIZE) 144*240afd8cSMark Johnston errx(1, "maximum image size is too small"); 145*240afd8cSMark Johnston if (vdevsize < fsopts->minsize || vdevsize > fsopts->maxsize) { 146*240afd8cSMark Johnston errx(1, "image size bounds must be multiples of %d", 147*240afd8cSMark Johnston 1 << zfs->ashift); 148*240afd8cSMark Johnston } 149*240afd8cSMark Johnston asize = vdevsize - VDEV_LABEL_SPACE; 150*240afd8cSMark Johnston 151*240afd8cSMark Johnston /* 152*240afd8cSMark Johnston * Size metaslabs according to the following heuristic: 153*240afd8cSMark Johnston * - provide at least 8 metaslabs, 154*240afd8cSMark Johnston * - without using a metaslab size larger than 512MB. 155*240afd8cSMark Johnston * This approximates what OpenZFS does without being complicated. In 156*240afd8cSMark Johnston * practice we expect pools to be expanded upon first use, and OpenZFS 157*240afd8cSMark Johnston * does not resize metaslabs in that case, so there is no right answer 158*240afd8cSMark Johnston * here. In general we want to provide large metaslabs even if the 159*240afd8cSMark Johnston * image size is small, and 512MB is a reasonable size for pools up to 160*240afd8cSMark Johnston * several hundred gigabytes. 161*240afd8cSMark Johnston * 162*240afd8cSMark Johnston * The user may override this heuristic using the "-o mssize" option. 163*240afd8cSMark Johnston */ 164*240afd8cSMark Johnston mssize = zfs->mssize; 165*240afd8cSMark Johnston if (mssize == 0) { 166*240afd8cSMark Johnston mssize = MAX(MIN(asize / 8, DFLTMSSIZE), MINMSSIZE); 167*240afd8cSMark Johnston if (!powerof2(mssize)) 168*240afd8cSMark Johnston mssize = 1l << (flsll(mssize) - 1); 169*240afd8cSMark Johnston } 170*240afd8cSMark Johnston if (!powerof2(mssize)) 171*240afd8cSMark Johnston errx(1, "metaslab size must be a power of 2"); 172*240afd8cSMark Johnston 173*240afd8cSMark Johnston /* 174*240afd8cSMark Johnston * If we have some slop left over, try to cover it by resizing the vdev, 175*240afd8cSMark Johnston * subject to the maxsize and minsize parameters. 176*240afd8cSMark Johnston */ 177*240afd8cSMark Johnston if (asize % mssize != 0) { 178*240afd8cSMark Johnston vdevsize1 = rounddown2(asize, mssize) + VDEV_LABEL_SPACE; 179*240afd8cSMark Johnston if (vdevsize1 < fsopts->minsize) 180*240afd8cSMark Johnston vdevsize1 = roundup2(asize, mssize) + VDEV_LABEL_SPACE; 181*240afd8cSMark Johnston if (vdevsize1 <= fsopts->maxsize) 182*240afd8cSMark Johnston vdevsize = vdevsize1; 183*240afd8cSMark Johnston } 184*240afd8cSMark Johnston asize = vdevsize - VDEV_LABEL_SPACE; 185*240afd8cSMark Johnston 186*240afd8cSMark Johnston zfs->asize = asize; 187*240afd8cSMark Johnston zfs->vdevsize = vdevsize; 188*240afd8cSMark Johnston zfs->mssize = mssize; 189*240afd8cSMark Johnston zfs->msshift = flsll(mssize) - 1; 190*240afd8cSMark Johnston zfs->mscount = asize / mssize; 191*240afd8cSMark Johnston } 192*240afd8cSMark Johnston 193*240afd8cSMark Johnston /* 194*240afd8cSMark Johnston * Validate options and set some default values. 195*240afd8cSMark Johnston */ 196*240afd8cSMark Johnston static void 197*240afd8cSMark Johnston zfs_check_opts(fsinfo_t *fsopts) 198*240afd8cSMark Johnston { 199*240afd8cSMark Johnston zfs_opt_t *zfs; 200*240afd8cSMark Johnston 201*240afd8cSMark Johnston zfs = fsopts->fs_specific; 202*240afd8cSMark Johnston 203*240afd8cSMark Johnston if (fsopts->offset != 0) 204*240afd8cSMark Johnston errx(1, "unhandled offset option"); 205*240afd8cSMark Johnston if (fsopts->maxsize == 0) 206*240afd8cSMark Johnston errx(1, "an image size must be specified"); 207*240afd8cSMark Johnston 208*240afd8cSMark Johnston if (zfs->poolname == NULL) 209*240afd8cSMark Johnston errx(1, "a pool name must be specified"); 210*240afd8cSMark Johnston 211*240afd8cSMark Johnston if (zfs->rootpath == NULL) 212*240afd8cSMark Johnston easprintf(&zfs->rootpath, "/%s", zfs->poolname); 213*240afd8cSMark Johnston if (zfs->rootpath[0] != '/') 214*240afd8cSMark Johnston errx(1, "mountpoint `%s' must be absolute", zfs->rootpath); 215*240afd8cSMark Johnston 216*240afd8cSMark Johnston if (zfs->ashift == 0) 217*240afd8cSMark Johnston zfs->ashift = 12; 218*240afd8cSMark Johnston 219*240afd8cSMark Johnston zfs_size_vdev(fsopts); 220*240afd8cSMark Johnston } 221*240afd8cSMark Johnston 222*240afd8cSMark Johnston void 223*240afd8cSMark Johnston zfs_cleanup_opts(fsinfo_t *fsopts) 224*240afd8cSMark Johnston { 225*240afd8cSMark Johnston struct dataset_desc *d, *tmp; 226*240afd8cSMark Johnston zfs_opt_t *zfs; 227*240afd8cSMark Johnston 228*240afd8cSMark Johnston zfs = fsopts->fs_specific; 229*240afd8cSMark Johnston free(zfs->rootpath); 230*240afd8cSMark Johnston free(zfs->bootfs); 231*240afd8cSMark Johnston free(__DECONST(void *, zfs->poolname)); 232*240afd8cSMark Johnston STAILQ_FOREACH_SAFE(d, &zfs->datasetdescs, next, tmp) { 233*240afd8cSMark Johnston free(d->params); 234*240afd8cSMark Johnston free(d); 235*240afd8cSMark Johnston } 236*240afd8cSMark Johnston free(zfs); 237*240afd8cSMark Johnston free(fsopts->fs_options); 238*240afd8cSMark Johnston } 239*240afd8cSMark Johnston 240*240afd8cSMark Johnston static size_t 241*240afd8cSMark Johnston nvlist_size(const nvlist_t *nvl) 242*240afd8cSMark Johnston { 243*240afd8cSMark Johnston return (sizeof(nvl->nv_header) + nvl->nv_size); 244*240afd8cSMark Johnston } 245*240afd8cSMark Johnston 246*240afd8cSMark Johnston static void 247*240afd8cSMark Johnston nvlist_copy(const nvlist_t *nvl, char *buf, size_t sz) 248*240afd8cSMark Johnston { 249*240afd8cSMark Johnston assert(sz >= nvlist_size(nvl)); 250*240afd8cSMark Johnston 251*240afd8cSMark Johnston memcpy(buf, &nvl->nv_header, sizeof(nvl->nv_header)); 252*240afd8cSMark Johnston memcpy(buf + sizeof(nvl->nv_header), nvl->nv_data, nvl->nv_size); 253*240afd8cSMark Johnston } 254*240afd8cSMark Johnston 255*240afd8cSMark Johnston static nvlist_t * 256*240afd8cSMark Johnston pool_config_nvcreate(zfs_opt_t *zfs) 257*240afd8cSMark Johnston { 258*240afd8cSMark Johnston nvlist_t *featuresnv, *poolnv; 259*240afd8cSMark Johnston 260*240afd8cSMark Johnston poolnv = nvlist_create(NV_UNIQUE_NAME); 261*240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_TXG, TXG); 262*240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_VERSION, SPA_VERSION); 263*240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_STATE, POOL_STATE_EXPORTED); 264*240afd8cSMark Johnston nvlist_add_string(poolnv, ZPOOL_CONFIG_POOL_NAME, zfs->poolname); 265*240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_GUID, zfs->poolguid); 266*240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_TOP_GUID, zfs->vdevguid); 267*240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_GUID, zfs->vdevguid); 268*240afd8cSMark Johnston nvlist_add_uint64(poolnv, ZPOOL_CONFIG_VDEV_CHILDREN, 1); 269*240afd8cSMark Johnston 270*240afd8cSMark Johnston featuresnv = nvlist_create(NV_UNIQUE_NAME); 271*240afd8cSMark Johnston nvlist_add_nvlist(poolnv, ZPOOL_CONFIG_FEATURES_FOR_READ, featuresnv); 272*240afd8cSMark Johnston nvlist_destroy(featuresnv); 273*240afd8cSMark Johnston 274*240afd8cSMark Johnston return (poolnv); 275*240afd8cSMark Johnston } 276*240afd8cSMark Johnston 277*240afd8cSMark Johnston static nvlist_t * 278*240afd8cSMark Johnston pool_disk_vdev_config_nvcreate(zfs_opt_t *zfs) 279*240afd8cSMark Johnston { 280*240afd8cSMark Johnston nvlist_t *diskvdevnv; 281*240afd8cSMark Johnston 282*240afd8cSMark Johnston assert(zfs->objarrid != 0); 283*240afd8cSMark Johnston 284*240afd8cSMark Johnston diskvdevnv = nvlist_create(NV_UNIQUE_NAME); 285*240afd8cSMark Johnston nvlist_add_string(diskvdevnv, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK); 286*240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ASHIFT, zfs->ashift); 287*240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ASIZE, zfs->asize); 288*240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_GUID, zfs->vdevguid); 289*240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ID, 0); 290*240afd8cSMark Johnston nvlist_add_string(diskvdevnv, ZPOOL_CONFIG_PATH, "/dev/null"); 291*240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_WHOLE_DISK, 1); 292*240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_CREATE_TXG, TXG); 293*240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_METASLAB_ARRAY, 294*240afd8cSMark Johnston zfs->objarrid); 295*240afd8cSMark Johnston nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_METASLAB_SHIFT, 296*240afd8cSMark Johnston zfs->msshift); 297*240afd8cSMark Johnston 298*240afd8cSMark Johnston return (diskvdevnv); 299*240afd8cSMark Johnston } 300*240afd8cSMark Johnston 301*240afd8cSMark Johnston static nvlist_t * 302*240afd8cSMark Johnston pool_root_vdev_config_nvcreate(zfs_opt_t *zfs) 303*240afd8cSMark Johnston { 304*240afd8cSMark Johnston nvlist_t *diskvdevnv, *rootvdevnv; 305*240afd8cSMark Johnston 306*240afd8cSMark Johnston diskvdevnv = pool_disk_vdev_config_nvcreate(zfs); 307*240afd8cSMark Johnston rootvdevnv = nvlist_create(NV_UNIQUE_NAME); 308*240afd8cSMark Johnston 309*240afd8cSMark Johnston nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_ID, 0); 310*240afd8cSMark Johnston nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_GUID, zfs->poolguid); 311*240afd8cSMark Johnston nvlist_add_string(rootvdevnv, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT); 312*240afd8cSMark Johnston nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_CREATE_TXG, TXG); 313*240afd8cSMark Johnston nvlist_add_nvlist_array(rootvdevnv, ZPOOL_CONFIG_CHILDREN, &diskvdevnv, 314*240afd8cSMark Johnston 1); 315*240afd8cSMark Johnston nvlist_destroy(diskvdevnv); 316*240afd8cSMark Johnston 317*240afd8cSMark Johnston return (rootvdevnv); 318*240afd8cSMark Johnston } 319*240afd8cSMark Johnston 320*240afd8cSMark Johnston /* 321*240afd8cSMark Johnston * Create the pool's "config" object, which contains an nvlist describing pool 322*240afd8cSMark Johnston * parameters and the vdev topology. It is similar but not identical to the 323*240afd8cSMark Johnston * nvlist stored in vdev labels. The main difference is that vdev labels do not 324*240afd8cSMark Johnston * describe the full vdev tree and in particular do not contain the "root" 325*240afd8cSMark Johnston * meta-vdev. 326*240afd8cSMark Johnston */ 327*240afd8cSMark Johnston static void 328*240afd8cSMark Johnston pool_init_objdir_config(zfs_opt_t *zfs, zfs_zap_t *objdir) 329*240afd8cSMark Johnston { 330*240afd8cSMark Johnston dnode_phys_t *dnode; 331*240afd8cSMark Johnston nvlist_t *poolconfig, *vdevconfig; 332*240afd8cSMark Johnston void *configbuf; 333*240afd8cSMark Johnston uint64_t dnid; 334*240afd8cSMark Johnston off_t configloc, configblksz; 335*240afd8cSMark Johnston int error; 336*240afd8cSMark Johnston 337*240afd8cSMark Johnston dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_PACKED_NVLIST, 338*240afd8cSMark Johnston DMU_OT_PACKED_NVLIST_SIZE, sizeof(uint64_t), &dnid); 339*240afd8cSMark Johnston 340*240afd8cSMark Johnston poolconfig = pool_config_nvcreate(zfs); 341*240afd8cSMark Johnston 342*240afd8cSMark Johnston vdevconfig = pool_root_vdev_config_nvcreate(zfs); 343*240afd8cSMark Johnston nvlist_add_nvlist(poolconfig, ZPOOL_CONFIG_VDEV_TREE, vdevconfig); 344*240afd8cSMark Johnston nvlist_destroy(vdevconfig); 345*240afd8cSMark Johnston 346*240afd8cSMark Johnston error = nvlist_export(poolconfig); 347*240afd8cSMark Johnston if (error != 0) 348*240afd8cSMark Johnston errc(1, error, "nvlist_export"); 349*240afd8cSMark Johnston 350*240afd8cSMark Johnston configblksz = nvlist_size(poolconfig); 351*240afd8cSMark Johnston configloc = objset_space_alloc(zfs, zfs->mos, &configblksz); 352*240afd8cSMark Johnston configbuf = ecalloc(1, configblksz); 353*240afd8cSMark Johnston nvlist_copy(poolconfig, configbuf, configblksz); 354*240afd8cSMark Johnston 355*240afd8cSMark Johnston vdev_pwrite_dnode_data(zfs, dnode, configbuf, configblksz, configloc); 356*240afd8cSMark Johnston 357*240afd8cSMark Johnston dnode->dn_datablkszsec = configblksz >> MINBLOCKSHIFT; 358*240afd8cSMark Johnston dnode->dn_flags = DNODE_FLAG_USED_BYTES; 359*240afd8cSMark Johnston *(uint64_t *)DN_BONUS(dnode) = nvlist_size(poolconfig); 360*240afd8cSMark Johnston 361*240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_CONFIG, dnid); 362*240afd8cSMark Johnston 363*240afd8cSMark Johnston nvlist_destroy(poolconfig); 364*240afd8cSMark Johnston free(configbuf); 365*240afd8cSMark Johnston } 366*240afd8cSMark Johnston 367*240afd8cSMark Johnston /* 368*240afd8cSMark Johnston * Add objects block pointer list objects, used for deferred frees. We don't do 369*240afd8cSMark Johnston * anything with them, but they need to be present or OpenZFS will refuse to 370*240afd8cSMark Johnston * import the pool. 371*240afd8cSMark Johnston */ 372*240afd8cSMark Johnston static void 373*240afd8cSMark Johnston pool_init_objdir_bplists(zfs_opt_t *zfs __unused, zfs_zap_t *objdir) 374*240afd8cSMark Johnston { 375*240afd8cSMark Johnston uint64_t dnid; 376*240afd8cSMark Johnston 377*240afd8cSMark Johnston (void)objset_dnode_bonus_alloc(zfs->mos, DMU_OT_BPOBJ, DMU_OT_BPOBJ_HDR, 378*240afd8cSMark Johnston BPOBJ_SIZE_V2, &dnid); 379*240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_FREE_BPOBJ, dnid); 380*240afd8cSMark Johnston 381*240afd8cSMark Johnston (void)objset_dnode_bonus_alloc(zfs->mos, DMU_OT_BPOBJ, DMU_OT_BPOBJ_HDR, 382*240afd8cSMark Johnston BPOBJ_SIZE_V2, &dnid); 383*240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_SYNC_BPLIST, dnid); 384*240afd8cSMark Johnston } 385*240afd8cSMark Johnston 386*240afd8cSMark Johnston /* 387*240afd8cSMark Johnston * Add required feature metadata objects. We don't know anything about ZFS 388*240afd8cSMark Johnston * features, so the objects are just empty ZAPs. 389*240afd8cSMark Johnston */ 390*240afd8cSMark Johnston static void 391*240afd8cSMark Johnston pool_init_objdir_feature_maps(zfs_opt_t *zfs, zfs_zap_t *objdir) 392*240afd8cSMark Johnston { 393*240afd8cSMark Johnston dnode_phys_t *dnode; 394*240afd8cSMark Johnston uint64_t dnid; 395*240afd8cSMark Johnston 396*240afd8cSMark Johnston dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid); 397*240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_FEATURES_FOR_READ, dnid); 398*240afd8cSMark Johnston zap_write(zfs, zap_alloc(zfs->mos, dnode)); 399*240afd8cSMark Johnston 400*240afd8cSMark Johnston dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid); 401*240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_FEATURES_FOR_WRITE, dnid); 402*240afd8cSMark Johnston zap_write(zfs, zap_alloc(zfs->mos, dnode)); 403*240afd8cSMark Johnston 404*240afd8cSMark Johnston dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid); 405*240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_FEATURE_DESCRIPTIONS, dnid); 406*240afd8cSMark Johnston zap_write(zfs, zap_alloc(zfs->mos, dnode)); 407*240afd8cSMark Johnston } 408*240afd8cSMark Johnston 409*240afd8cSMark Johnston static void 410*240afd8cSMark Johnston pool_init_objdir_dsl(zfs_opt_t *zfs, zfs_zap_t *objdir) 411*240afd8cSMark Johnston { 412*240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_ROOT_DATASET, 413*240afd8cSMark Johnston dsl_dir_id(zfs->rootdsldir)); 414*240afd8cSMark Johnston } 415*240afd8cSMark Johnston 416*240afd8cSMark Johnston static void 417*240afd8cSMark Johnston pool_init_objdir_poolprops(zfs_opt_t *zfs, zfs_zap_t *objdir) 418*240afd8cSMark Johnston { 419*240afd8cSMark Johnston dnode_phys_t *dnode; 420*240afd8cSMark Johnston uint64_t id; 421*240afd8cSMark Johnston 422*240afd8cSMark Johnston dnode = objset_dnode_alloc(zfs->mos, DMU_OT_POOL_PROPS, &id); 423*240afd8cSMark Johnston zap_add_uint64(objdir, DMU_POOL_PROPS, id); 424*240afd8cSMark Johnston 425*240afd8cSMark Johnston zfs->poolprops = zap_alloc(zfs->mos, dnode); 426*240afd8cSMark Johnston } 427*240afd8cSMark Johnston 428*240afd8cSMark Johnston /* 429*240afd8cSMark Johnston * Initialize the MOS object directory, the root of virtually all of the pool's 430*240afd8cSMark Johnston * data and metadata. 431*240afd8cSMark Johnston */ 432*240afd8cSMark Johnston static void 433*240afd8cSMark Johnston pool_init_objdir(zfs_opt_t *zfs) 434*240afd8cSMark Johnston { 435*240afd8cSMark Johnston zfs_zap_t *zap; 436*240afd8cSMark Johnston dnode_phys_t *objdir; 437*240afd8cSMark Johnston 438*240afd8cSMark Johnston objdir = objset_dnode_lookup(zfs->mos, DMU_POOL_DIRECTORY_OBJECT); 439*240afd8cSMark Johnston 440*240afd8cSMark Johnston zap = zap_alloc(zfs->mos, objdir); 441*240afd8cSMark Johnston pool_init_objdir_config(zfs, zap); 442*240afd8cSMark Johnston pool_init_objdir_bplists(zfs, zap); 443*240afd8cSMark Johnston pool_init_objdir_feature_maps(zfs, zap); 444*240afd8cSMark Johnston pool_init_objdir_dsl(zfs, zap); 445*240afd8cSMark Johnston pool_init_objdir_poolprops(zfs, zap); 446*240afd8cSMark Johnston zap_write(zfs, zap); 447*240afd8cSMark Johnston } 448*240afd8cSMark Johnston 449*240afd8cSMark Johnston /* 450*240afd8cSMark Johnston * Initialize the meta-object set (MOS) and immediately write out several 451*240afd8cSMark Johnston * special objects whose contents are already finalized, including the object 452*240afd8cSMark Johnston * directory. 453*240afd8cSMark Johnston * 454*240afd8cSMark Johnston * Once the MOS is finalized, it'll look roughly like this: 455*240afd8cSMark Johnston * 456*240afd8cSMark Johnston * object directory (ZAP) 457*240afd8cSMark Johnston * |-> vdev config object (nvlist) 458*240afd8cSMark Johnston * |-> features for read 459*240afd8cSMark Johnston * |-> features for write 460*240afd8cSMark Johnston * |-> feature descriptions 461*240afd8cSMark Johnston * |-> sync bplist 462*240afd8cSMark Johnston * |-> free bplist 463*240afd8cSMark Johnston * |-> pool properties 464*240afd8cSMark Johnston * L-> root DSL directory 465*240afd8cSMark Johnston * |-> DSL child directory (ZAP) 466*240afd8cSMark Johnston * | |-> $MOS (DSL dir) 467*240afd8cSMark Johnston * | | |-> child map 468*240afd8cSMark Johnston * | | L-> props (ZAP) 469*240afd8cSMark Johnston * | |-> $FREE (DSL dir) 470*240afd8cSMark Johnston * | | |-> child map 471*240afd8cSMark Johnston * | | L-> props (ZAP) 472*240afd8cSMark Johnston * | |-> $ORIGIN (DSL dir) 473*240afd8cSMark Johnston * | | |-> child map 474*240afd8cSMark Johnston * | | |-> dataset 475*240afd8cSMark Johnston * | | | L-> deadlist 476*240afd8cSMark Johnston * | | |-> snapshot 477*240afd8cSMark Johnston * | | | |-> deadlist 478*240afd8cSMark Johnston * | | | L-> snapshot names 479*240afd8cSMark Johnston * | | |-> props (ZAP) 480*240afd8cSMark Johnston * | | L-> clones (ZAP) 481*240afd8cSMark Johnston * | |-> dataset 1 (DSL dir) 482*240afd8cSMark Johnston * | | |-> DSL dataset 483*240afd8cSMark Johnston * | | | |-> snapshot names 484*240afd8cSMark Johnston * | | | L-> deadlist 485*240afd8cSMark Johnston * | | |-> child map 486*240afd8cSMark Johnston * | | | L-> ... 487*240afd8cSMark Johnston * | | L-> props 488*240afd8cSMark Johnston * | |-> dataset 2 489*240afd8cSMark Johnston * | | L-> ... 490*240afd8cSMark Johnston * | |-> ... 491*240afd8cSMark Johnston * | L-> dataset n 492*240afd8cSMark Johnston * |-> DSL root dataset 493*240afd8cSMark Johnston * | |-> snapshot names 494*240afd8cSMark Johnston * | L-> deadlist 495*240afd8cSMark Johnston * L-> props (ZAP) 496*240afd8cSMark Johnston * space map object array 497*240afd8cSMark Johnston * |-> space map 1 498*240afd8cSMark Johnston * |-> space map 2 499*240afd8cSMark Johnston * |-> ... 500*240afd8cSMark Johnston * L-> space map n (zfs->mscount) 501*240afd8cSMark Johnston * 502*240afd8cSMark Johnston * The space map object array is pointed to by the "msarray" property in the 503*240afd8cSMark Johnston * pool configuration. 504*240afd8cSMark Johnston */ 505*240afd8cSMark Johnston static void 506*240afd8cSMark Johnston pool_init(zfs_opt_t *zfs) 507*240afd8cSMark Johnston { 508*240afd8cSMark Johnston uint64_t dnid; 509*240afd8cSMark Johnston 510*240afd8cSMark Johnston zfs->poolguid = ((uint64_t)random() << 32) | random(); 511*240afd8cSMark Johnston zfs->vdevguid = ((uint64_t)random() << 32) | random(); 512*240afd8cSMark Johnston 513*240afd8cSMark Johnston zfs->mos = objset_alloc(zfs, DMU_OST_META); 514*240afd8cSMark Johnston 515*240afd8cSMark Johnston (void)objset_dnode_alloc(zfs->mos, DMU_OT_OBJECT_DIRECTORY, &dnid); 516*240afd8cSMark Johnston assert(dnid == DMU_POOL_DIRECTORY_OBJECT); 517*240afd8cSMark Johnston 518*240afd8cSMark Johnston (void)objset_dnode_alloc(zfs->mos, DMU_OT_OBJECT_ARRAY, &zfs->objarrid); 519*240afd8cSMark Johnston 520*240afd8cSMark Johnston dsl_init(zfs); 521*240afd8cSMark Johnston 522*240afd8cSMark Johnston pool_init_objdir(zfs); 523*240afd8cSMark Johnston } 524*240afd8cSMark Johnston 525*240afd8cSMark Johnston static void 526*240afd8cSMark Johnston pool_labels_write(zfs_opt_t *zfs) 527*240afd8cSMark Johnston { 528*240afd8cSMark Johnston uberblock_t *ub; 529*240afd8cSMark Johnston vdev_label_t *label; 530*240afd8cSMark Johnston nvlist_t *poolconfig, *vdevconfig; 531*240afd8cSMark Johnston int error; 532*240afd8cSMark Johnston 533*240afd8cSMark Johnston label = ecalloc(1, sizeof(*label)); 534*240afd8cSMark Johnston 535*240afd8cSMark Johnston /* 536*240afd8cSMark Johnston * Assemble the vdev configuration and store it in the label. 537*240afd8cSMark Johnston */ 538*240afd8cSMark Johnston poolconfig = pool_config_nvcreate(zfs); 539*240afd8cSMark Johnston vdevconfig = pool_disk_vdev_config_nvcreate(zfs); 540*240afd8cSMark Johnston nvlist_add_nvlist(poolconfig, ZPOOL_CONFIG_VDEV_TREE, vdevconfig); 541*240afd8cSMark Johnston nvlist_destroy(vdevconfig); 542*240afd8cSMark Johnston 543*240afd8cSMark Johnston error = nvlist_export(poolconfig); 544*240afd8cSMark Johnston if (error != 0) 545*240afd8cSMark Johnston errc(1, error, "nvlist_export"); 546*240afd8cSMark Johnston nvlist_copy(poolconfig, label->vl_vdev_phys.vp_nvlist, 547*240afd8cSMark Johnston sizeof(label->vl_vdev_phys.vp_nvlist)); 548*240afd8cSMark Johnston nvlist_destroy(poolconfig); 549*240afd8cSMark Johnston 550*240afd8cSMark Johnston /* 551*240afd8cSMark Johnston * Fill out the uberblock. Just make each one the same. The embedded 552*240afd8cSMark Johnston * checksum is calculated in vdev_label_write(). 553*240afd8cSMark Johnston */ 554*240afd8cSMark Johnston for (size_t uoff = 0; uoff < sizeof(label->vl_uberblock); 555*240afd8cSMark Johnston uoff += (1 << zfs->ashift)) { 556*240afd8cSMark Johnston ub = (uberblock_t *)(&label->vl_uberblock[0] + uoff); 557*240afd8cSMark Johnston ub->ub_magic = UBERBLOCK_MAGIC; 558*240afd8cSMark Johnston ub->ub_version = SPA_VERSION; 559*240afd8cSMark Johnston ub->ub_txg = TXG; 560*240afd8cSMark Johnston ub->ub_guid_sum = zfs->poolguid + zfs->vdevguid; 561*240afd8cSMark Johnston ub->ub_timestamp = 0; 562*240afd8cSMark Johnston 563*240afd8cSMark Johnston ub->ub_software_version = SPA_VERSION; 564*240afd8cSMark Johnston ub->ub_mmp_magic = MMP_MAGIC; 565*240afd8cSMark Johnston ub->ub_mmp_delay = 0; 566*240afd8cSMark Johnston ub->ub_mmp_config = 0; 567*240afd8cSMark Johnston ub->ub_checkpoint_txg = 0; 568*240afd8cSMark Johnston objset_root_blkptr_copy(zfs->mos, &ub->ub_rootbp); 569*240afd8cSMark Johnston } 570*240afd8cSMark Johnston 571*240afd8cSMark Johnston /* 572*240afd8cSMark Johnston * Write out four copies of the label: two at the beginning of the vdev 573*240afd8cSMark Johnston * and two at the end. 574*240afd8cSMark Johnston */ 575*240afd8cSMark Johnston for (int i = 0; i < VDEV_LABELS; i++) 576*240afd8cSMark Johnston vdev_label_write(zfs, i, label); 577*240afd8cSMark Johnston 578*240afd8cSMark Johnston free(label); 579*240afd8cSMark Johnston } 580*240afd8cSMark Johnston 581*240afd8cSMark Johnston static void 582*240afd8cSMark Johnston pool_fini(zfs_opt_t *zfs) 583*240afd8cSMark Johnston { 584*240afd8cSMark Johnston zap_write(zfs, zfs->poolprops); 585*240afd8cSMark Johnston dsl_write(zfs); 586*240afd8cSMark Johnston objset_write(zfs, zfs->mos); 587*240afd8cSMark Johnston pool_labels_write(zfs); 588*240afd8cSMark Johnston } 589*240afd8cSMark Johnston 590*240afd8cSMark Johnston struct dnode_cursor * 591*240afd8cSMark Johnston dnode_cursor_init(zfs_opt_t *zfs, zfs_objset_t *os, dnode_phys_t *dnode, 592*240afd8cSMark Johnston off_t size, off_t blksz) 593*240afd8cSMark Johnston { 594*240afd8cSMark Johnston struct dnode_cursor *c; 595*240afd8cSMark Johnston uint64_t nbppindir, indlevel, ndatablks, nindblks; 596*240afd8cSMark Johnston 597*240afd8cSMark Johnston assert(dnode->dn_nblkptr == 1); 598*240afd8cSMark Johnston assert(blksz <= MAXBLOCKSIZE); 599*240afd8cSMark Johnston 600*240afd8cSMark Johnston if (blksz == 0) { 601*240afd8cSMark Johnston /* Must be between 1<<ashift and 128KB. */ 602*240afd8cSMark Johnston blksz = MIN(MAXBLOCKSIZE, MAX(1 << zfs->ashift, 603*240afd8cSMark Johnston powerof2(size) ? size : (1ul << flsll(size)))); 604*240afd8cSMark Johnston } 605*240afd8cSMark Johnston assert(powerof2(blksz)); 606*240afd8cSMark Johnston 607*240afd8cSMark Johnston /* 608*240afd8cSMark Johnston * Do we need indirect blocks? Figure out how many levels are needed 609*240afd8cSMark Johnston * (indlevel == 1 means no indirect blocks) and how much space is needed 610*240afd8cSMark Johnston * (it has to be allocated up-front to break the dependency cycle 611*240afd8cSMark Johnston * described in objset_write()). 612*240afd8cSMark Johnston */ 613*240afd8cSMark Johnston ndatablks = size == 0 ? 0 : howmany(size, blksz); 614*240afd8cSMark Johnston nindblks = 0; 615*240afd8cSMark Johnston for (indlevel = 1, nbppindir = 1; ndatablks > nbppindir; indlevel++) { 616*240afd8cSMark Johnston nbppindir *= BLKPTR_PER_INDIR; 617*240afd8cSMark Johnston nindblks += howmany(ndatablks, indlevel * nbppindir); 618*240afd8cSMark Johnston } 619*240afd8cSMark Johnston assert(indlevel < INDIR_LEVELS); 620*240afd8cSMark Johnston 621*240afd8cSMark Johnston dnode->dn_nlevels = (uint8_t)indlevel; 622*240afd8cSMark Johnston dnode->dn_maxblkid = ndatablks > 0 ? ndatablks - 1 : 0; 623*240afd8cSMark Johnston dnode->dn_datablkszsec = blksz >> MINBLOCKSHIFT; 624*240afd8cSMark Johnston 625*240afd8cSMark Johnston c = ecalloc(1, sizeof(*c)); 626*240afd8cSMark Johnston if (nindblks > 0) { 627*240afd8cSMark Johnston c->indspace = nindblks * MAXBLOCKSIZE; 628*240afd8cSMark Johnston c->indloc = objset_space_alloc(zfs, os, &c->indspace); 629*240afd8cSMark Johnston } 630*240afd8cSMark Johnston c->dnode = dnode; 631*240afd8cSMark Johnston c->dataoff = 0; 632*240afd8cSMark Johnston c->datablksz = blksz; 633*240afd8cSMark Johnston 634*240afd8cSMark Johnston return (c); 635*240afd8cSMark Johnston } 636*240afd8cSMark Johnston 637*240afd8cSMark Johnston static void 638*240afd8cSMark Johnston _dnode_cursor_flush(zfs_opt_t *zfs, struct dnode_cursor *c, int levels) 639*240afd8cSMark Johnston { 640*240afd8cSMark Johnston blkptr_t *bp, *pbp; 641*240afd8cSMark Johnston void *buf; 642*240afd8cSMark Johnston uint64_t fill; 643*240afd8cSMark Johnston off_t blkid, blksz, loc; 644*240afd8cSMark Johnston 645*240afd8cSMark Johnston assert(levels > 0); 646*240afd8cSMark Johnston assert(levels <= c->dnode->dn_nlevels - 1); 647*240afd8cSMark Johnston 648*240afd8cSMark Johnston blksz = MAXBLOCKSIZE; 649*240afd8cSMark Johnston blkid = (c->dataoff / c->datablksz) / BLKPTR_PER_INDIR; 650*240afd8cSMark Johnston for (int level = 1; level <= levels; level++) { 651*240afd8cSMark Johnston buf = c->inddir[level - 1]; 652*240afd8cSMark Johnston 653*240afd8cSMark Johnston if (level == c->dnode->dn_nlevels - 1) { 654*240afd8cSMark Johnston pbp = &c->dnode->dn_blkptr[0]; 655*240afd8cSMark Johnston } else { 656*240afd8cSMark Johnston uint64_t iblkid; 657*240afd8cSMark Johnston 658*240afd8cSMark Johnston iblkid = blkid & (BLKPTR_PER_INDIR - 1); 659*240afd8cSMark Johnston pbp = (blkptr_t *) 660*240afd8cSMark Johnston &c->inddir[level][iblkid * sizeof(blkptr_t)]; 661*240afd8cSMark Johnston } 662*240afd8cSMark Johnston 663*240afd8cSMark Johnston /* 664*240afd8cSMark Johnston * Space for indirect blocks is allocated up-front; see the 665*240afd8cSMark Johnston * comment in objset_write(). 666*240afd8cSMark Johnston */ 667*240afd8cSMark Johnston loc = c->indloc; 668*240afd8cSMark Johnston c->indloc += blksz; 669*240afd8cSMark Johnston assert(c->indspace >= blksz); 670*240afd8cSMark Johnston c->indspace -= blksz; 671*240afd8cSMark Johnston 672*240afd8cSMark Johnston bp = buf; 673*240afd8cSMark Johnston fill = 0; 674*240afd8cSMark Johnston for (size_t i = 0; i < BLKPTR_PER_INDIR; i++) 675*240afd8cSMark Johnston fill += BP_GET_FILL(&bp[i]); 676*240afd8cSMark Johnston 677*240afd8cSMark Johnston vdev_pwrite_dnode_indir(zfs, c->dnode, level, fill, buf, blksz, 678*240afd8cSMark Johnston loc, pbp); 679*240afd8cSMark Johnston memset(buf, 0, MAXBLOCKSIZE); 680*240afd8cSMark Johnston 681*240afd8cSMark Johnston blkid /= BLKPTR_PER_INDIR; 682*240afd8cSMark Johnston } 683*240afd8cSMark Johnston } 684*240afd8cSMark Johnston 685*240afd8cSMark Johnston blkptr_t * 686*240afd8cSMark Johnston dnode_cursor_next(zfs_opt_t *zfs, struct dnode_cursor *c, off_t off) 687*240afd8cSMark Johnston { 688*240afd8cSMark Johnston off_t blkid, l1id; 689*240afd8cSMark Johnston int levels; 690*240afd8cSMark Johnston 691*240afd8cSMark Johnston if (c->dnode->dn_nlevels == 1) { 692*240afd8cSMark Johnston assert(off < MAXBLOCKSIZE); 693*240afd8cSMark Johnston return (&c->dnode->dn_blkptr[0]); 694*240afd8cSMark Johnston } 695*240afd8cSMark Johnston 696*240afd8cSMark Johnston assert(off % c->datablksz == 0); 697*240afd8cSMark Johnston 698*240afd8cSMark Johnston /* Do we need to flush any full indirect blocks? */ 699*240afd8cSMark Johnston if (off > 0) { 700*240afd8cSMark Johnston blkid = off / c->datablksz; 701*240afd8cSMark Johnston for (levels = 0; levels < c->dnode->dn_nlevels - 1; levels++) { 702*240afd8cSMark Johnston if (blkid % BLKPTR_PER_INDIR != 0) 703*240afd8cSMark Johnston break; 704*240afd8cSMark Johnston blkid /= BLKPTR_PER_INDIR; 705*240afd8cSMark Johnston } 706*240afd8cSMark Johnston if (levels > 0) 707*240afd8cSMark Johnston _dnode_cursor_flush(zfs, c, levels); 708*240afd8cSMark Johnston } 709*240afd8cSMark Johnston 710*240afd8cSMark Johnston c->dataoff = off; 711*240afd8cSMark Johnston l1id = (off / c->datablksz) & (BLKPTR_PER_INDIR - 1); 712*240afd8cSMark Johnston return ((blkptr_t *)&c->inddir[0][l1id * sizeof(blkptr_t)]); 713*240afd8cSMark Johnston } 714*240afd8cSMark Johnston 715*240afd8cSMark Johnston void 716*240afd8cSMark Johnston dnode_cursor_finish(zfs_opt_t *zfs, struct dnode_cursor *c) 717*240afd8cSMark Johnston { 718*240afd8cSMark Johnston int levels; 719*240afd8cSMark Johnston 720*240afd8cSMark Johnston levels = c->dnode->dn_nlevels - 1; 721*240afd8cSMark Johnston if (levels > 0) 722*240afd8cSMark Johnston _dnode_cursor_flush(zfs, c, levels); 723*240afd8cSMark Johnston assert(c->indspace == 0); 724*240afd8cSMark Johnston free(c); 725*240afd8cSMark Johnston } 726*240afd8cSMark Johnston 727*240afd8cSMark Johnston void 728*240afd8cSMark Johnston zfs_makefs(const char *image, const char *dir, fsnode *root, fsinfo_t *fsopts) 729*240afd8cSMark Johnston { 730*240afd8cSMark Johnston zfs_opt_t *zfs; 731*240afd8cSMark Johnston int dirfd; 732*240afd8cSMark Johnston 733*240afd8cSMark Johnston zfs = fsopts->fs_specific; 734*240afd8cSMark Johnston 735*240afd8cSMark Johnston /* 736*240afd8cSMark Johnston * Use a fixed seed to provide reproducible pseudo-random numbers for 737*240afd8cSMark Johnston * on-disk structures when needed (e.g., GUIDs, ZAP hash salts). 738*240afd8cSMark Johnston */ 739*240afd8cSMark Johnston srandom(1729); 740*240afd8cSMark Johnston 741*240afd8cSMark Johnston zfs_check_opts(fsopts); 742*240afd8cSMark Johnston 743*240afd8cSMark Johnston if (!zfs->nowarn) { 744*240afd8cSMark Johnston fprintf(stderr, 745*240afd8cSMark Johnston "ZFS support is currently considered experimental. " 746*240afd8cSMark Johnston "Do not use it for anything critical.\n"); 747*240afd8cSMark Johnston } 748*240afd8cSMark Johnston 749*240afd8cSMark Johnston dirfd = open(dir, O_DIRECTORY | O_RDONLY); 750*240afd8cSMark Johnston if (dirfd < 0) 751*240afd8cSMark Johnston err(1, "open(%s)", dir); 752*240afd8cSMark Johnston 753*240afd8cSMark Johnston vdev_init(zfs, image); 754*240afd8cSMark Johnston pool_init(zfs); 755*240afd8cSMark Johnston fs_build(zfs, dirfd, root); 756*240afd8cSMark Johnston pool_fini(zfs); 757*240afd8cSMark Johnston vdev_fini(zfs); 758*240afd8cSMark Johnston } 759