1240afd8cSMark Johnston /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 3240afd8cSMark Johnston * 4240afd8cSMark Johnston * Copyright (c) 2022 The FreeBSD Foundation 5240afd8cSMark Johnston * 6240afd8cSMark Johnston * This software was developed by Mark Johnston under sponsorship from 7240afd8cSMark Johnston * the FreeBSD Foundation. 8240afd8cSMark Johnston * 9240afd8cSMark Johnston * Redistribution and use in source and binary forms, with or without 10240afd8cSMark Johnston * modification, are permitted provided that the following conditions are 11240afd8cSMark Johnston * met: 12240afd8cSMark Johnston * 1. Redistributions of source code must retain the above copyright 13240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer. 14240afd8cSMark Johnston * 2. Redistributions in binary form must reproduce the above copyright 15240afd8cSMark Johnston * notice, this list of conditions and the following disclaimer in 16240afd8cSMark Johnston * the documentation and/or other materials provided with the distribution. 17240afd8cSMark Johnston * 18240afd8cSMark Johnston * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19240afd8cSMark Johnston * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20240afd8cSMark Johnston * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21240afd8cSMark Johnston * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22240afd8cSMark Johnston * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23240afd8cSMark Johnston * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24240afd8cSMark Johnston * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25240afd8cSMark Johnston * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26240afd8cSMark Johnston * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27240afd8cSMark Johnston * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28240afd8cSMark Johnston * SUCH DAMAGE. 29240afd8cSMark Johnston */ 30240afd8cSMark Johnston 31240afd8cSMark Johnston #include <assert.h> 32c6890399SJessica Clarke #include <stdlib.h> 33240afd8cSMark Johnston #include <string.h> 34240afd8cSMark Johnston 35240afd8cSMark Johnston #include <util.h> 36240afd8cSMark Johnston 37240afd8cSMark Johnston #include "zfs.h" 38240afd8cSMark Johnston 39240afd8cSMark Johnston #define DNODES_PER_CHUNK (MAXBLOCKSIZE / sizeof(dnode_phys_t)) 40240afd8cSMark Johnston 41240afd8cSMark Johnston struct objset_dnode_chunk { 42240afd8cSMark Johnston dnode_phys_t buf[DNODES_PER_CHUNK]; 43240afd8cSMark Johnston unsigned int nextfree; 44240afd8cSMark Johnston STAILQ_ENTRY(objset_dnode_chunk) next; 45240afd8cSMark Johnston }; 46240afd8cSMark Johnston 47240afd8cSMark Johnston typedef struct zfs_objset { 48240afd8cSMark Johnston /* Physical object set. */ 49240afd8cSMark Johnston objset_phys_t *phys; 50240afd8cSMark Johnston off_t osloc; 51240afd8cSMark Johnston off_t osblksz; 52240afd8cSMark Johnston blkptr_t osbp; /* set in objset_write() */ 53240afd8cSMark Johnston 54240afd8cSMark Johnston /* Accounting. */ 55240afd8cSMark Johnston off_t space; /* bytes allocated to this objset */ 56240afd8cSMark Johnston 57240afd8cSMark Johnston /* dnode allocator. */ 58240afd8cSMark Johnston uint64_t dnodecount; 59240afd8cSMark Johnston STAILQ_HEAD(, objset_dnode_chunk) dnodechunks; 60240afd8cSMark Johnston } zfs_objset_t; 61240afd8cSMark Johnston 62240afd8cSMark Johnston static void 63240afd8cSMark Johnston dnode_init(dnode_phys_t *dnode, uint8_t type, uint8_t bonustype, 64240afd8cSMark Johnston uint16_t bonuslen) 65240afd8cSMark Johnston { 66240afd8cSMark Johnston dnode->dn_indblkshift = MAXBLOCKSHIFT; 67240afd8cSMark Johnston dnode->dn_type = type; 68240afd8cSMark Johnston dnode->dn_bonustype = bonustype; 69240afd8cSMark Johnston dnode->dn_bonuslen = bonuslen; 70240afd8cSMark Johnston dnode->dn_checksum = ZIO_CHECKSUM_FLETCHER_4; 71240afd8cSMark Johnston dnode->dn_nlevels = 1; 72240afd8cSMark Johnston dnode->dn_nblkptr = 1; 73240afd8cSMark Johnston dnode->dn_flags = DNODE_FLAG_USED_BYTES; 74240afd8cSMark Johnston } 75240afd8cSMark Johnston 76240afd8cSMark Johnston zfs_objset_t * 77240afd8cSMark Johnston objset_alloc(zfs_opt_t *zfs, uint64_t type) 78240afd8cSMark Johnston { 79240afd8cSMark Johnston struct objset_dnode_chunk *chunk; 80240afd8cSMark Johnston zfs_objset_t *os; 81240afd8cSMark Johnston 82240afd8cSMark Johnston os = ecalloc(1, sizeof(*os)); 83240afd8cSMark Johnston os->osblksz = sizeof(objset_phys_t); 84240afd8cSMark Johnston os->osloc = objset_space_alloc(zfs, os, &os->osblksz); 85240afd8cSMark Johnston 86240afd8cSMark Johnston /* 87240afd8cSMark Johnston * Object ID zero is always reserved for the meta dnode, which is 88240afd8cSMark Johnston * embedded in the objset itself. 89240afd8cSMark Johnston */ 90240afd8cSMark Johnston STAILQ_INIT(&os->dnodechunks); 91240afd8cSMark Johnston chunk = ecalloc(1, sizeof(*chunk)); 92240afd8cSMark Johnston chunk->nextfree = 1; 93240afd8cSMark Johnston STAILQ_INSERT_HEAD(&os->dnodechunks, chunk, next); 94240afd8cSMark Johnston os->dnodecount = 1; 95240afd8cSMark Johnston 96240afd8cSMark Johnston os->phys = ecalloc(1, os->osblksz); 97240afd8cSMark Johnston os->phys->os_type = type; 98240afd8cSMark Johnston 99240afd8cSMark Johnston dnode_init(&os->phys->os_meta_dnode, DMU_OT_DNODE, DMU_OT_NONE, 0); 100240afd8cSMark Johnston os->phys->os_meta_dnode.dn_datablkszsec = 101240afd8cSMark Johnston DNODE_BLOCK_SIZE >> MINBLOCKSHIFT; 102240afd8cSMark Johnston 103240afd8cSMark Johnston return (os); 104240afd8cSMark Johnston } 105240afd8cSMark Johnston 106240afd8cSMark Johnston /* 107240afd8cSMark Johnston * Write the dnode array and physical object set to disk. 108240afd8cSMark Johnston */ 109240afd8cSMark Johnston static void 110240afd8cSMark Johnston _objset_write(zfs_opt_t *zfs, zfs_objset_t *os, struct dnode_cursor *c, 111240afd8cSMark Johnston off_t loc) 112240afd8cSMark Johnston { 113240afd8cSMark Johnston struct objset_dnode_chunk *chunk, *tmp; 114240afd8cSMark Johnston unsigned int total; 115240afd8cSMark Johnston 116240afd8cSMark Johnston /* 117240afd8cSMark Johnston * Write out the dnode array, i.e., the meta-dnode. For some reason its 118240afd8cSMark Johnston * data blocks must be 16KB in size no matter how large the array is. 119240afd8cSMark Johnston */ 120240afd8cSMark Johnston total = 0; 121240afd8cSMark Johnston STAILQ_FOREACH_SAFE(chunk, &os->dnodechunks, next, tmp) { 122240afd8cSMark Johnston unsigned int i; 123240afd8cSMark Johnston 124*46402fd2SMark Johnston assert(chunk->nextfree > 0); 125240afd8cSMark Johnston assert(chunk->nextfree <= os->dnodecount); 126240afd8cSMark Johnston assert(chunk->nextfree <= DNODES_PER_CHUNK); 127240afd8cSMark Johnston 128240afd8cSMark Johnston for (i = 0; i < chunk->nextfree; i += DNODES_PER_BLOCK) { 129240afd8cSMark Johnston blkptr_t *bp; 130240afd8cSMark Johnston uint64_t fill; 131240afd8cSMark Johnston 132240afd8cSMark Johnston if (chunk->nextfree - i < DNODES_PER_BLOCK) 133240afd8cSMark Johnston fill = DNODES_PER_BLOCK - (chunk->nextfree - i); 134240afd8cSMark Johnston else 135240afd8cSMark Johnston fill = 0; 136240afd8cSMark Johnston bp = dnode_cursor_next(zfs, c, 137240afd8cSMark Johnston (total + i) * sizeof(dnode_phys_t)); 138240afd8cSMark Johnston vdev_pwrite_dnode_indir(zfs, &os->phys->os_meta_dnode, 139240afd8cSMark Johnston 0, fill, chunk->buf + i, DNODE_BLOCK_SIZE, loc, bp); 140240afd8cSMark Johnston loc += DNODE_BLOCK_SIZE; 141240afd8cSMark Johnston } 142240afd8cSMark Johnston total += i; 143240afd8cSMark Johnston 144240afd8cSMark Johnston free(chunk); 145240afd8cSMark Johnston } 146240afd8cSMark Johnston dnode_cursor_finish(zfs, c); 147240afd8cSMark Johnston STAILQ_INIT(&os->dnodechunks); 148240afd8cSMark Johnston 149240afd8cSMark Johnston /* 150240afd8cSMark Johnston * Write the object set itself. The saved block pointer will be copied 151240afd8cSMark Johnston * into the referencing DSL dataset or the uberblocks. 152240afd8cSMark Johnston */ 153*46402fd2SMark Johnston vdev_pwrite_data(zfs, DMU_OT_OBJSET, ZIO_CHECKSUM_FLETCHER_4, 0, 154*46402fd2SMark Johnston os->dnodecount - 1, os->phys, os->osblksz, os->osloc, &os->osbp); 155240afd8cSMark Johnston } 156240afd8cSMark Johnston 157240afd8cSMark Johnston void 158240afd8cSMark Johnston objset_write(zfs_opt_t *zfs, zfs_objset_t *os) 159240afd8cSMark Johnston { 160240afd8cSMark Johnston struct dnode_cursor *c; 161240afd8cSMark Johnston off_t dnodeloc, dnodesz; 162240afd8cSMark Johnston uint64_t dnodecount; 163240afd8cSMark Johnston 164240afd8cSMark Johnston /* 165240afd8cSMark Johnston * There is a chicken-and-egg problem here when writing the MOS: we 166240afd8cSMark Johnston * cannot write space maps before we're finished allocating space from 167240afd8cSMark Johnston * the vdev, and we can't write the MOS without having allocated space 168240afd8cSMark Johnston * for indirect dnode blocks. Thus, rather than lazily allocating 169240afd8cSMark Johnston * indirect blocks for the meta-dnode (which would be simpler), they are 170240afd8cSMark Johnston * allocated up-front and before writing space maps. 171240afd8cSMark Johnston */ 172240afd8cSMark Johnston dnodecount = os->dnodecount; 173240afd8cSMark Johnston if (os == zfs->mos) 174240afd8cSMark Johnston dnodecount += zfs->mscount; 175240afd8cSMark Johnston dnodesz = dnodecount * sizeof(dnode_phys_t); 176240afd8cSMark Johnston c = dnode_cursor_init(zfs, os, &os->phys->os_meta_dnode, dnodesz, 177240afd8cSMark Johnston DNODE_BLOCK_SIZE); 178240afd8cSMark Johnston dnodesz = roundup2(dnodesz, DNODE_BLOCK_SIZE); 179240afd8cSMark Johnston dnodeloc = objset_space_alloc(zfs, os, &dnodesz); 180240afd8cSMark Johnston 181240afd8cSMark Johnston if (os == zfs->mos) { 182240afd8cSMark Johnston vdev_spacemap_write(zfs); 183240afd8cSMark Johnston 184240afd8cSMark Johnston /* 1854f816f5bSMark Johnston * We've finished allocating space, account for it in $MOS and 1864f816f5bSMark Johnston * in the parent directory. 187240afd8cSMark Johnston */ 1884f816f5bSMark Johnston dsl_dir_size_add(zfs->mosdsldir, os->space); 1894f816f5bSMark Johnston dsl_dir_size_add(zfs->rootdsldir, os->space); 190240afd8cSMark Johnston } 191240afd8cSMark Johnston _objset_write(zfs, os, c, dnodeloc); 192240afd8cSMark Johnston } 193240afd8cSMark Johnston 194240afd8cSMark Johnston dnode_phys_t * 195240afd8cSMark Johnston objset_dnode_bonus_alloc(zfs_objset_t *os, uint8_t type, uint8_t bonustype, 196240afd8cSMark Johnston uint16_t bonuslen, uint64_t *idp) 197240afd8cSMark Johnston { 198240afd8cSMark Johnston struct objset_dnode_chunk *chunk; 199240afd8cSMark Johnston dnode_phys_t *dnode; 200240afd8cSMark Johnston 201240afd8cSMark Johnston assert(bonuslen <= DN_OLD_MAX_BONUSLEN); 202240afd8cSMark Johnston assert(!STAILQ_EMPTY(&os->dnodechunks)); 203240afd8cSMark Johnston 204240afd8cSMark Johnston chunk = STAILQ_LAST(&os->dnodechunks, objset_dnode_chunk, next); 205240afd8cSMark Johnston if (chunk->nextfree == DNODES_PER_CHUNK) { 206240afd8cSMark Johnston chunk = ecalloc(1, sizeof(*chunk)); 207240afd8cSMark Johnston STAILQ_INSERT_TAIL(&os->dnodechunks, chunk, next); 208240afd8cSMark Johnston } 209240afd8cSMark Johnston *idp = os->dnodecount++; 210240afd8cSMark Johnston dnode = &chunk->buf[chunk->nextfree++]; 211240afd8cSMark Johnston dnode_init(dnode, type, bonustype, bonuslen); 212240afd8cSMark Johnston dnode->dn_datablkszsec = os->osblksz >> MINBLOCKSHIFT; 213240afd8cSMark Johnston return (dnode); 214240afd8cSMark Johnston } 215240afd8cSMark Johnston 216240afd8cSMark Johnston dnode_phys_t * 217240afd8cSMark Johnston objset_dnode_alloc(zfs_objset_t *os, uint8_t type, uint64_t *idp) 218240afd8cSMark Johnston { 219240afd8cSMark Johnston return (objset_dnode_bonus_alloc(os, type, DMU_OT_NONE, 0, idp)); 220240afd8cSMark Johnston } 221240afd8cSMark Johnston 222240afd8cSMark Johnston /* 223240afd8cSMark Johnston * Look up a physical dnode by ID. This is not used often so a linear search is 224240afd8cSMark Johnston * fine. 225240afd8cSMark Johnston */ 226240afd8cSMark Johnston dnode_phys_t * 227240afd8cSMark Johnston objset_dnode_lookup(zfs_objset_t *os, uint64_t id) 228240afd8cSMark Johnston { 229240afd8cSMark Johnston struct objset_dnode_chunk *chunk; 230240afd8cSMark Johnston 231240afd8cSMark Johnston assert(id > 0); 232240afd8cSMark Johnston assert(id < os->dnodecount); 233240afd8cSMark Johnston 234240afd8cSMark Johnston STAILQ_FOREACH(chunk, &os->dnodechunks, next) { 235240afd8cSMark Johnston if (id < DNODES_PER_CHUNK) 236240afd8cSMark Johnston return (&chunk->buf[id]); 237240afd8cSMark Johnston id -= DNODES_PER_CHUNK; 238240afd8cSMark Johnston } 239240afd8cSMark Johnston assert(0); 240240afd8cSMark Johnston return (NULL); 241240afd8cSMark Johnston } 242240afd8cSMark Johnston 243240afd8cSMark Johnston off_t 244240afd8cSMark Johnston objset_space_alloc(zfs_opt_t *zfs, zfs_objset_t *os, off_t *lenp) 245240afd8cSMark Johnston { 246240afd8cSMark Johnston off_t loc; 247240afd8cSMark Johnston 248240afd8cSMark Johnston loc = vdev_space_alloc(zfs, lenp); 249240afd8cSMark Johnston os->space += *lenp; 250240afd8cSMark Johnston return (loc); 251240afd8cSMark Johnston } 252240afd8cSMark Johnston 253240afd8cSMark Johnston uint64_t 254240afd8cSMark Johnston objset_space(const zfs_objset_t *os) 255240afd8cSMark Johnston { 256240afd8cSMark Johnston return (os->space); 257240afd8cSMark Johnston } 258240afd8cSMark Johnston 259240afd8cSMark Johnston void 260240afd8cSMark Johnston objset_root_blkptr_copy(const zfs_objset_t *os, blkptr_t *bp) 261240afd8cSMark Johnston { 262240afd8cSMark Johnston memcpy(bp, &os->osbp, sizeof(blkptr_t)); 263240afd8cSMark Johnston } 264