1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 The FreeBSD Foundation 5 * 6 * This software was developed by Mark Johnston under sponsorship from 7 * the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions are 11 * met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <assert.h> 32 #include <stdlib.h> 33 #include <string.h> 34 35 #include <util.h> 36 37 #include "zfs.h" 38 39 #define DNODES_PER_CHUNK (MAXBLOCKSIZE / sizeof(dnode_phys_t)) 40 41 struct objset_dnode_chunk { 42 dnode_phys_t buf[DNODES_PER_CHUNK]; 43 unsigned int nextfree; 44 STAILQ_ENTRY(objset_dnode_chunk) next; 45 }; 46 47 typedef struct zfs_objset { 48 /* Physical object set. */ 49 objset_phys_t *phys; 50 off_t osloc; 51 off_t osblksz; 52 blkptr_t osbp; /* set in objset_write() */ 53 54 /* Accounting. */ 55 off_t space; /* bytes allocated to this objset */ 56 57 /* dnode allocator. */ 58 uint64_t dnodecount; 59 STAILQ_HEAD(, objset_dnode_chunk) dnodechunks; 60 } zfs_objset_t; 61 62 static void 63 dnode_init(dnode_phys_t *dnode, uint8_t type, uint8_t bonustype, 64 uint16_t bonuslen) 65 { 66 dnode->dn_indblkshift = MAXBLOCKSHIFT; 67 dnode->dn_type = type; 68 dnode->dn_bonustype = bonustype; 69 dnode->dn_bonuslen = bonuslen; 70 dnode->dn_checksum = ZIO_CHECKSUM_FLETCHER_4; 71 dnode->dn_nlevels = 1; 72 dnode->dn_nblkptr = 1; 73 dnode->dn_flags = DNODE_FLAG_USED_BYTES; 74 } 75 76 zfs_objset_t * 77 objset_alloc(zfs_opt_t *zfs, uint64_t type) 78 { 79 struct objset_dnode_chunk *chunk; 80 zfs_objset_t *os; 81 82 os = ecalloc(1, sizeof(*os)); 83 os->osblksz = sizeof(objset_phys_t); 84 os->osloc = objset_space_alloc(zfs, os, &os->osblksz); 85 86 /* 87 * Object ID zero is always reserved for the meta dnode, which is 88 * embedded in the objset itself. 89 */ 90 STAILQ_INIT(&os->dnodechunks); 91 chunk = ecalloc(1, sizeof(*chunk)); 92 chunk->nextfree = 1; 93 STAILQ_INSERT_HEAD(&os->dnodechunks, chunk, next); 94 os->dnodecount = 1; 95 96 os->phys = ecalloc(1, os->osblksz); 97 os->phys->os_type = type; 98 99 dnode_init(&os->phys->os_meta_dnode, DMU_OT_DNODE, DMU_OT_NONE, 0); 100 os->phys->os_meta_dnode.dn_datablkszsec = 101 DNODE_BLOCK_SIZE >> MINBLOCKSHIFT; 102 103 return (os); 104 } 105 106 /* 107 * Write the dnode array and physical object set to disk. 108 */ 109 static void 110 _objset_write(zfs_opt_t *zfs, zfs_objset_t *os, struct dnode_cursor *c, 111 off_t loc) 112 { 113 struct objset_dnode_chunk *chunk, *tmp; 114 unsigned int total; 115 116 /* 117 * Write out the dnode array, i.e., the meta-dnode. For some reason its 118 * data blocks must be 16KB in size no matter how large the array is. 119 */ 120 total = 0; 121 STAILQ_FOREACH_SAFE(chunk, &os->dnodechunks, next, tmp) { 122 unsigned int i; 123 124 assert(chunk->nextfree > 0); 125 assert(chunk->nextfree <= os->dnodecount); 126 assert(chunk->nextfree <= DNODES_PER_CHUNK); 127 128 for (i = 0; i < chunk->nextfree; i += DNODES_PER_BLOCK) { 129 blkptr_t *bp; 130 uint64_t fill; 131 132 if (chunk->nextfree - i < DNODES_PER_BLOCK) 133 fill = DNODES_PER_BLOCK - (chunk->nextfree - i); 134 else 135 fill = 0; 136 bp = dnode_cursor_next(zfs, c, 137 (total + i) * sizeof(dnode_phys_t)); 138 vdev_pwrite_dnode_indir(zfs, &os->phys->os_meta_dnode, 139 0, fill, chunk->buf + i, DNODE_BLOCK_SIZE, loc, bp); 140 loc += DNODE_BLOCK_SIZE; 141 } 142 total += i; 143 144 free(chunk); 145 } 146 dnode_cursor_finish(zfs, c); 147 STAILQ_INIT(&os->dnodechunks); 148 149 /* 150 * Write the object set itself. The saved block pointer will be copied 151 * into the referencing DSL dataset or the uberblocks. 152 */ 153 vdev_pwrite_data(zfs, DMU_OT_OBJSET, ZIO_CHECKSUM_FLETCHER_4, 0, 154 os->dnodecount - 1, os->phys, os->osblksz, os->osloc, &os->osbp); 155 } 156 157 void 158 objset_write(zfs_opt_t *zfs, zfs_objset_t *os) 159 { 160 struct dnode_cursor *c; 161 off_t dnodeloc, dnodesz; 162 uint64_t dnodecount; 163 164 /* 165 * There is a chicken-and-egg problem here when writing the MOS: we 166 * cannot write space maps before we're finished allocating space from 167 * the vdev, and we can't write the MOS without having allocated space 168 * for indirect dnode blocks. Thus, rather than lazily allocating 169 * indirect blocks for the meta-dnode (which would be simpler), they are 170 * allocated up-front and before writing space maps. 171 */ 172 dnodecount = os->dnodecount; 173 if (os == zfs->mos) 174 dnodecount += zfs->mscount; 175 dnodesz = dnodecount * sizeof(dnode_phys_t); 176 c = dnode_cursor_init(zfs, os, &os->phys->os_meta_dnode, dnodesz, 177 DNODE_BLOCK_SIZE); 178 dnodesz = roundup2(dnodesz, DNODE_BLOCK_SIZE); 179 dnodeloc = objset_space_alloc(zfs, os, &dnodesz); 180 181 if (os == zfs->mos) { 182 vdev_spacemap_write(zfs); 183 184 /* 185 * We've finished allocating space, account for it in $MOS and 186 * in the parent directory. 187 */ 188 dsl_dir_size_add(zfs->mosdsldir, os->space); 189 dsl_dir_size_add(zfs->rootdsldir, os->space); 190 } 191 _objset_write(zfs, os, c, dnodeloc); 192 } 193 194 dnode_phys_t * 195 objset_dnode_bonus_alloc(zfs_objset_t *os, uint8_t type, uint8_t bonustype, 196 uint16_t bonuslen, uint64_t *idp) 197 { 198 struct objset_dnode_chunk *chunk; 199 dnode_phys_t *dnode; 200 201 assert(bonuslen <= DN_OLD_MAX_BONUSLEN); 202 assert(!STAILQ_EMPTY(&os->dnodechunks)); 203 204 chunk = STAILQ_LAST(&os->dnodechunks, objset_dnode_chunk, next); 205 if (chunk->nextfree == DNODES_PER_CHUNK) { 206 chunk = ecalloc(1, sizeof(*chunk)); 207 STAILQ_INSERT_TAIL(&os->dnodechunks, chunk, next); 208 } 209 *idp = os->dnodecount++; 210 dnode = &chunk->buf[chunk->nextfree++]; 211 dnode_init(dnode, type, bonustype, bonuslen); 212 dnode->dn_datablkszsec = os->osblksz >> MINBLOCKSHIFT; 213 return (dnode); 214 } 215 216 dnode_phys_t * 217 objset_dnode_alloc(zfs_objset_t *os, uint8_t type, uint64_t *idp) 218 { 219 return (objset_dnode_bonus_alloc(os, type, DMU_OT_NONE, 0, idp)); 220 } 221 222 /* 223 * Look up a physical dnode by ID. This is not used often so a linear search is 224 * fine. 225 */ 226 dnode_phys_t * 227 objset_dnode_lookup(zfs_objset_t *os, uint64_t id) 228 { 229 struct objset_dnode_chunk *chunk; 230 231 assert(id > 0); 232 assert(id < os->dnodecount); 233 234 STAILQ_FOREACH(chunk, &os->dnodechunks, next) { 235 if (id < DNODES_PER_CHUNK) 236 return (&chunk->buf[id]); 237 id -= DNODES_PER_CHUNK; 238 } 239 assert(0); 240 return (NULL); 241 } 242 243 off_t 244 objset_space_alloc(zfs_opt_t *zfs, zfs_objset_t *os, off_t *lenp) 245 { 246 off_t loc; 247 248 loc = vdev_space_alloc(zfs, lenp); 249 os->space += *lenp; 250 return (loc); 251 } 252 253 uint64_t 254 objset_space(const zfs_objset_t *os) 255 { 256 return (os->space); 257 } 258 259 void 260 objset_root_blkptr_copy(const zfs_objset_t *os, blkptr_t *bp) 261 { 262 memcpy(bp, &os->osbp, sizeof(blkptr_t)); 263 } 264