1*240afd8cSMark Johnston /*- 2*240afd8cSMark Johnston * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3*240afd8cSMark Johnston * 4*240afd8cSMark Johnston * Copyright (c) 2022 The FreeBSD Foundation 5*240afd8cSMark Johnston * 6*240afd8cSMark Johnston * This software was developed by Mark Johnston under sponsorship from 7*240afd8cSMark Johnston * the FreeBSD Foundation. 8*240afd8cSMark Johnston * 9*240afd8cSMark Johnston * Redistribution and use in source and binary forms, with or without 10*240afd8cSMark Johnston * modification, are permitted provided that the following conditions are 11*240afd8cSMark Johnston * met: 12*240afd8cSMark Johnston * 1. Redistributions of source code must retain the above copyright 13*240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer. 14*240afd8cSMark Johnston * 2. Redistributions in binary form must reproduce the above copyright 15*240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer in 16*240afd8cSMark Johnston * the documentation and/or other materials provided with the distribution. 17*240afd8cSMark Johnston * 18*240afd8cSMark Johnston * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19*240afd8cSMark Johnston * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20*240afd8cSMark Johnston * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21*240afd8cSMark Johnston * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22*240afd8cSMark Johnston * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23*240afd8cSMark Johnston * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24*240afd8cSMark Johnston * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25*240afd8cSMark Johnston * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26*240afd8cSMark Johnston * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27*240afd8cSMark Johnston * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28*240afd8cSMark Johnston * SUCH DAMAGE. 29*240afd8cSMark Johnston */ 30*240afd8cSMark Johnston 31*240afd8cSMark Johnston #include <assert.h> 32*240afd8cSMark Johnston #include <string.h> 33*240afd8cSMark Johnston 34*240afd8cSMark Johnston #include <util.h> 35*240afd8cSMark Johnston 36*240afd8cSMark Johnston #include "zfs.h" 37*240afd8cSMark Johnston 38*240afd8cSMark Johnston #define DNODES_PER_CHUNK (MAXBLOCKSIZE / sizeof(dnode_phys_t)) 39*240afd8cSMark Johnston 40*240afd8cSMark Johnston struct objset_dnode_chunk { 41*240afd8cSMark Johnston dnode_phys_t buf[DNODES_PER_CHUNK]; 42*240afd8cSMark Johnston unsigned int nextfree; 43*240afd8cSMark Johnston STAILQ_ENTRY(objset_dnode_chunk) next; 44*240afd8cSMark Johnston }; 45*240afd8cSMark Johnston 46*240afd8cSMark Johnston typedef struct zfs_objset { 47*240afd8cSMark Johnston /* Physical object set. */ 48*240afd8cSMark Johnston objset_phys_t *phys; 49*240afd8cSMark Johnston off_t osloc; 50*240afd8cSMark Johnston off_t osblksz; 51*240afd8cSMark Johnston blkptr_t osbp; /* set in objset_write() */ 52*240afd8cSMark Johnston 53*240afd8cSMark Johnston /* Accounting. */ 54*240afd8cSMark Johnston off_t space; /* bytes allocated to this objset */ 55*240afd8cSMark Johnston 56*240afd8cSMark Johnston /* dnode allocator. */ 57*240afd8cSMark Johnston uint64_t dnodecount; 58*240afd8cSMark Johnston STAILQ_HEAD(, objset_dnode_chunk) dnodechunks; 59*240afd8cSMark Johnston } zfs_objset_t; 60*240afd8cSMark Johnston 61*240afd8cSMark Johnston static void 62*240afd8cSMark Johnston dnode_init(dnode_phys_t *dnode, uint8_t type, uint8_t bonustype, 63*240afd8cSMark Johnston uint16_t bonuslen) 64*240afd8cSMark Johnston { 65*240afd8cSMark Johnston dnode->dn_indblkshift = MAXBLOCKSHIFT; 66*240afd8cSMark Johnston dnode->dn_type = type; 67*240afd8cSMark Johnston dnode->dn_bonustype = bonustype; 68*240afd8cSMark Johnston dnode->dn_bonuslen = bonuslen; 69*240afd8cSMark Johnston dnode->dn_checksum = ZIO_CHECKSUM_FLETCHER_4; 70*240afd8cSMark Johnston dnode->dn_nlevels = 1; 71*240afd8cSMark Johnston dnode->dn_nblkptr = 1; 72*240afd8cSMark Johnston dnode->dn_flags = DNODE_FLAG_USED_BYTES; 73*240afd8cSMark Johnston } 74*240afd8cSMark Johnston 75*240afd8cSMark Johnston zfs_objset_t * 76*240afd8cSMark Johnston objset_alloc(zfs_opt_t *zfs, uint64_t type) 77*240afd8cSMark Johnston { 78*240afd8cSMark Johnston struct objset_dnode_chunk *chunk; 79*240afd8cSMark Johnston zfs_objset_t *os; 80*240afd8cSMark Johnston 81*240afd8cSMark Johnston os = ecalloc(1, sizeof(*os)); 82*240afd8cSMark Johnston os->osblksz = sizeof(objset_phys_t); 83*240afd8cSMark Johnston os->osloc = objset_space_alloc(zfs, os, &os->osblksz); 84*240afd8cSMark Johnston 85*240afd8cSMark Johnston /* 86*240afd8cSMark Johnston * Object ID zero is always reserved for the meta dnode, which is 87*240afd8cSMark Johnston * embedded in the objset itself. 88*240afd8cSMark Johnston */ 89*240afd8cSMark Johnston STAILQ_INIT(&os->dnodechunks); 90*240afd8cSMark Johnston chunk = ecalloc(1, sizeof(*chunk)); 91*240afd8cSMark Johnston chunk->nextfree = 1; 92*240afd8cSMark Johnston STAILQ_INSERT_HEAD(&os->dnodechunks, chunk, next); 93*240afd8cSMark Johnston os->dnodecount = 1; 94*240afd8cSMark Johnston 95*240afd8cSMark Johnston os->phys = ecalloc(1, os->osblksz); 96*240afd8cSMark Johnston os->phys->os_type = type; 97*240afd8cSMark Johnston 98*240afd8cSMark Johnston dnode_init(&os->phys->os_meta_dnode, DMU_OT_DNODE, DMU_OT_NONE, 0); 99*240afd8cSMark Johnston os->phys->os_meta_dnode.dn_datablkszsec = 100*240afd8cSMark Johnston DNODE_BLOCK_SIZE >> MINBLOCKSHIFT; 101*240afd8cSMark Johnston 102*240afd8cSMark Johnston return (os); 103*240afd8cSMark Johnston } 104*240afd8cSMark Johnston 105*240afd8cSMark Johnston /* 106*240afd8cSMark Johnston * Write the dnode array and physical object set to disk. 107*240afd8cSMark Johnston */ 108*240afd8cSMark Johnston static void 109*240afd8cSMark Johnston _objset_write(zfs_opt_t *zfs, zfs_objset_t *os, struct dnode_cursor *c, 110*240afd8cSMark Johnston off_t loc) 111*240afd8cSMark Johnston { 112*240afd8cSMark Johnston struct objset_dnode_chunk *chunk, *tmp; 113*240afd8cSMark Johnston unsigned int total; 114*240afd8cSMark Johnston 115*240afd8cSMark Johnston /* 116*240afd8cSMark Johnston * Write out the dnode array, i.e., the meta-dnode. For some reason its 117*240afd8cSMark Johnston * data blocks must be 16KB in size no matter how large the array is. 118*240afd8cSMark Johnston */ 119*240afd8cSMark Johnston total = 0; 120*240afd8cSMark Johnston STAILQ_FOREACH_SAFE(chunk, &os->dnodechunks, next, tmp) { 121*240afd8cSMark Johnston unsigned int i; 122*240afd8cSMark Johnston 123*240afd8cSMark Johnston assert(chunk->nextfree <= os->dnodecount); 124*240afd8cSMark Johnston assert(chunk->nextfree <= DNODES_PER_CHUNK); 125*240afd8cSMark Johnston 126*240afd8cSMark Johnston for (i = 0; i < chunk->nextfree; i += DNODES_PER_BLOCK) { 127*240afd8cSMark Johnston blkptr_t *bp; 128*240afd8cSMark Johnston uint64_t fill; 129*240afd8cSMark Johnston 130*240afd8cSMark Johnston if (chunk->nextfree - i < DNODES_PER_BLOCK) 131*240afd8cSMark Johnston fill = DNODES_PER_BLOCK - (chunk->nextfree - i); 132*240afd8cSMark Johnston else 133*240afd8cSMark Johnston fill = 0; 134*240afd8cSMark Johnston bp = dnode_cursor_next(zfs, c, 135*240afd8cSMark Johnston (total + i) * sizeof(dnode_phys_t)); 136*240afd8cSMark Johnston vdev_pwrite_dnode_indir(zfs, &os->phys->os_meta_dnode, 137*240afd8cSMark Johnston 0, fill, chunk->buf + i, DNODE_BLOCK_SIZE, loc, bp); 138*240afd8cSMark Johnston loc += DNODE_BLOCK_SIZE; 139*240afd8cSMark Johnston } 140*240afd8cSMark Johnston total += i; 141*240afd8cSMark Johnston 142*240afd8cSMark Johnston free(chunk); 143*240afd8cSMark Johnston } 144*240afd8cSMark Johnston dnode_cursor_finish(zfs, c); 145*240afd8cSMark Johnston STAILQ_INIT(&os->dnodechunks); 146*240afd8cSMark Johnston 147*240afd8cSMark Johnston /* 148*240afd8cSMark Johnston * Write the object set itself. The saved block pointer will be copied 149*240afd8cSMark Johnston * into the referencing DSL dataset or the uberblocks. 150*240afd8cSMark Johnston */ 151*240afd8cSMark Johnston vdev_pwrite_data(zfs, DMU_OT_OBJSET, ZIO_CHECKSUM_FLETCHER_4, 0, 1, 152*240afd8cSMark Johnston os->phys, os->osblksz, os->osloc, &os->osbp); 153*240afd8cSMark Johnston } 154*240afd8cSMark Johnston 155*240afd8cSMark Johnston void 156*240afd8cSMark Johnston objset_write(zfs_opt_t *zfs, zfs_objset_t *os) 157*240afd8cSMark Johnston { 158*240afd8cSMark Johnston struct dnode_cursor *c; 159*240afd8cSMark Johnston off_t dnodeloc, dnodesz; 160*240afd8cSMark Johnston uint64_t dnodecount; 161*240afd8cSMark Johnston 162*240afd8cSMark Johnston /* 163*240afd8cSMark Johnston * There is a chicken-and-egg problem here when writing the MOS: we 164*240afd8cSMark Johnston * cannot write space maps before we're finished allocating space from 165*240afd8cSMark Johnston * the vdev, and we can't write the MOS without having allocated space 166*240afd8cSMark Johnston * for indirect dnode blocks. Thus, rather than lazily allocating 167*240afd8cSMark Johnston * indirect blocks for the meta-dnode (which would be simpler), they are 168*240afd8cSMark Johnston * allocated up-front and before writing space maps. 169*240afd8cSMark Johnston */ 170*240afd8cSMark Johnston dnodecount = os->dnodecount; 171*240afd8cSMark Johnston if (os == zfs->mos) 172*240afd8cSMark Johnston dnodecount += zfs->mscount; 173*240afd8cSMark Johnston dnodesz = dnodecount * sizeof(dnode_phys_t); 174*240afd8cSMark Johnston c = dnode_cursor_init(zfs, os, &os->phys->os_meta_dnode, dnodesz, 175*240afd8cSMark Johnston DNODE_BLOCK_SIZE); 176*240afd8cSMark Johnston dnodesz = roundup2(dnodesz, DNODE_BLOCK_SIZE); 177*240afd8cSMark Johnston dnodeloc = objset_space_alloc(zfs, os, &dnodesz); 178*240afd8cSMark Johnston 179*240afd8cSMark Johnston if (os == zfs->mos) { 180*240afd8cSMark Johnston vdev_spacemap_write(zfs); 181*240afd8cSMark Johnston 182*240afd8cSMark Johnston /* 183*240afd8cSMark Johnston * We've finished allocating space, account for it in $MOS. 184*240afd8cSMark Johnston */ 185*240afd8cSMark Johnston dsl_dir_size_set(zfs->mosdsldir, os->space); 186*240afd8cSMark Johnston } 187*240afd8cSMark Johnston _objset_write(zfs, os, c, dnodeloc); 188*240afd8cSMark Johnston } 189*240afd8cSMark Johnston 190*240afd8cSMark Johnston dnode_phys_t * 191*240afd8cSMark Johnston objset_dnode_bonus_alloc(zfs_objset_t *os, uint8_t type, uint8_t bonustype, 192*240afd8cSMark Johnston uint16_t bonuslen, uint64_t *idp) 193*240afd8cSMark Johnston { 194*240afd8cSMark Johnston struct objset_dnode_chunk *chunk; 195*240afd8cSMark Johnston dnode_phys_t *dnode; 196*240afd8cSMark Johnston 197*240afd8cSMark Johnston assert(bonuslen <= DN_OLD_MAX_BONUSLEN); 198*240afd8cSMark Johnston assert(!STAILQ_EMPTY(&os->dnodechunks)); 199*240afd8cSMark Johnston 200*240afd8cSMark Johnston chunk = STAILQ_LAST(&os->dnodechunks, objset_dnode_chunk, next); 201*240afd8cSMark Johnston if (chunk->nextfree == DNODES_PER_CHUNK) { 202*240afd8cSMark Johnston chunk = ecalloc(1, sizeof(*chunk)); 203*240afd8cSMark Johnston STAILQ_INSERT_TAIL(&os->dnodechunks, chunk, next); 204*240afd8cSMark Johnston } 205*240afd8cSMark Johnston *idp = os->dnodecount++; 206*240afd8cSMark Johnston dnode = &chunk->buf[chunk->nextfree++]; 207*240afd8cSMark Johnston dnode_init(dnode, type, bonustype, bonuslen); 208*240afd8cSMark Johnston dnode->dn_datablkszsec = os->osblksz >> MINBLOCKSHIFT; 209*240afd8cSMark Johnston return (dnode); 210*240afd8cSMark Johnston } 211*240afd8cSMark Johnston 212*240afd8cSMark Johnston dnode_phys_t * 213*240afd8cSMark Johnston objset_dnode_alloc(zfs_objset_t *os, uint8_t type, uint64_t *idp) 214*240afd8cSMark Johnston { 215*240afd8cSMark Johnston return (objset_dnode_bonus_alloc(os, type, DMU_OT_NONE, 0, idp)); 216*240afd8cSMark Johnston } 217*240afd8cSMark Johnston 218*240afd8cSMark Johnston /* 219*240afd8cSMark Johnston * Look up a physical dnode by ID. This is not used often so a linear search is 220*240afd8cSMark Johnston * fine. 221*240afd8cSMark Johnston */ 222*240afd8cSMark Johnston dnode_phys_t * 223*240afd8cSMark Johnston objset_dnode_lookup(zfs_objset_t *os, uint64_t id) 224*240afd8cSMark Johnston { 225*240afd8cSMark Johnston struct objset_dnode_chunk *chunk; 226*240afd8cSMark Johnston 227*240afd8cSMark Johnston assert(id > 0); 228*240afd8cSMark Johnston assert(id < os->dnodecount); 229*240afd8cSMark Johnston 230*240afd8cSMark Johnston STAILQ_FOREACH(chunk, &os->dnodechunks, next) { 231*240afd8cSMark Johnston if (id < DNODES_PER_CHUNK) 232*240afd8cSMark Johnston return (&chunk->buf[id]); 233*240afd8cSMark Johnston id -= DNODES_PER_CHUNK; 234*240afd8cSMark Johnston } 235*240afd8cSMark Johnston assert(0); 236*240afd8cSMark Johnston return (NULL); 237*240afd8cSMark Johnston } 238*240afd8cSMark Johnston 239*240afd8cSMark Johnston off_t 240*240afd8cSMark Johnston objset_space_alloc(zfs_opt_t *zfs, zfs_objset_t *os, off_t *lenp) 241*240afd8cSMark Johnston { 242*240afd8cSMark Johnston off_t loc; 243*240afd8cSMark Johnston 244*240afd8cSMark Johnston loc = vdev_space_alloc(zfs, lenp); 245*240afd8cSMark Johnston os->space += *lenp; 246*240afd8cSMark Johnston return (loc); 247*240afd8cSMark Johnston } 248*240afd8cSMark Johnston 249*240afd8cSMark Johnston uint64_t 250*240afd8cSMark Johnston objset_space(const zfs_objset_t *os) 251*240afd8cSMark Johnston { 252*240afd8cSMark Johnston return (os->space); 253*240afd8cSMark Johnston } 254*240afd8cSMark Johnston 255*240afd8cSMark Johnston void 256*240afd8cSMark Johnston objset_root_blkptr_copy(const zfs_objset_t *os, blkptr_t *bp) 257*240afd8cSMark Johnston { 258*240afd8cSMark Johnston memcpy(bp, &os->osbp, sizeof(blkptr_t)); 259*240afd8cSMark Johnston } 260