1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2022 The FreeBSD Foundation
5 *
6 * This software was developed by Mark Johnston under sponsorship from
7 * the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions are
11 * met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/param.h>
32 #include <assert.h>
33 #include <stdlib.h>
34 #include <string.h>
35
36 #include <util.h>
37
38 #include "zfs.h"
39
40 #define DNODES_PER_CHUNK (MAXBLOCKSIZE / sizeof(dnode_phys_t))
41
42 struct objset_dnode_chunk {
43 dnode_phys_t buf[DNODES_PER_CHUNK];
44 unsigned int nextfree;
45 STAILQ_ENTRY(objset_dnode_chunk) next;
46 };
47
48 typedef struct zfs_objset {
49 /* Physical object set. */
50 objset_phys_t *phys;
51 off_t osloc;
52 off_t osblksz;
53 blkptr_t osbp; /* set in objset_write() */
54
55 /* Accounting. */
56 off_t space; /* bytes allocated to this objset */
57
58 /* dnode allocator. */
59 uint64_t dnodecount;
60 STAILQ_HEAD(, objset_dnode_chunk) dnodechunks;
61 } zfs_objset_t;
62
63 static void
dnode_init(dnode_phys_t * dnode,uint8_t type,uint8_t bonustype,uint16_t bonuslen)64 dnode_init(dnode_phys_t *dnode, uint8_t type, uint8_t bonustype,
65 uint16_t bonuslen)
66 {
67 dnode->dn_indblkshift = MAXBLOCKSHIFT;
68 dnode->dn_type = type;
69 dnode->dn_bonustype = bonustype;
70 dnode->dn_bonuslen = bonuslen;
71 dnode->dn_checksum = ZIO_CHECKSUM_FLETCHER_4;
72 dnode->dn_nlevels = 1;
73 dnode->dn_nblkptr = 1;
74 dnode->dn_flags = DNODE_FLAG_USED_BYTES;
75 }
76
77 zfs_objset_t *
objset_alloc(zfs_opt_t * zfs,uint64_t type)78 objset_alloc(zfs_opt_t *zfs, uint64_t type)
79 {
80 struct objset_dnode_chunk *chunk;
81 zfs_objset_t *os;
82
83 os = ecalloc(1, sizeof(*os));
84 os->osblksz = sizeof(objset_phys_t);
85 os->osloc = objset_space_alloc(zfs, os, &os->osblksz);
86
87 /*
88 * Object ID zero is always reserved for the meta dnode, which is
89 * embedded in the objset itself.
90 */
91 STAILQ_INIT(&os->dnodechunks);
92 chunk = ecalloc(1, sizeof(*chunk));
93 chunk->nextfree = 1;
94 STAILQ_INSERT_HEAD(&os->dnodechunks, chunk, next);
95 os->dnodecount = 1;
96
97 os->phys = ecalloc(1, os->osblksz);
98 os->phys->os_type = type;
99
100 dnode_init(&os->phys->os_meta_dnode, DMU_OT_DNODE, DMU_OT_NONE, 0);
101 os->phys->os_meta_dnode.dn_datablkszsec =
102 DNODE_BLOCK_SIZE >> MINBLOCKSHIFT;
103
104 return (os);
105 }
106
107 /*
108 * Write the dnode array and physical object set to disk.
109 */
110 static void
_objset_write(zfs_opt_t * zfs,zfs_objset_t * os,struct dnode_cursor * c,off_t loc)111 _objset_write(zfs_opt_t *zfs, zfs_objset_t *os, struct dnode_cursor *c,
112 off_t loc)
113 {
114 struct objset_dnode_chunk *chunk, *tmp;
115 unsigned int total;
116
117 /*
118 * Write out the dnode array, i.e., the meta-dnode. For some reason its
119 * data blocks must be 16KB in size no matter how large the array is.
120 */
121 total = 0;
122 STAILQ_FOREACH_SAFE(chunk, &os->dnodechunks, next, tmp) {
123 unsigned int i;
124
125 assert(chunk->nextfree > 0);
126 assert(chunk->nextfree <= os->dnodecount);
127 assert(chunk->nextfree <= DNODES_PER_CHUNK);
128
129 for (i = 0; i < chunk->nextfree; i += DNODES_PER_BLOCK) {
130 blkptr_t *bp;
131 uint64_t fill;
132
133 if (chunk->nextfree - i < DNODES_PER_BLOCK)
134 fill = DNODES_PER_BLOCK - (chunk->nextfree - i);
135 else
136 fill = 0;
137 bp = dnode_cursor_next(zfs, c,
138 (total + i) * sizeof(dnode_phys_t));
139 vdev_pwrite_dnode_indir(zfs, &os->phys->os_meta_dnode,
140 0, fill, chunk->buf + i, DNODE_BLOCK_SIZE, loc, bp);
141 loc += DNODE_BLOCK_SIZE;
142 }
143 total += i;
144
145 free(chunk);
146 }
147 dnode_cursor_finish(zfs, c);
148 STAILQ_INIT(&os->dnodechunks);
149
150 /*
151 * Write the object set itself. The saved block pointer will be copied
152 * into the referencing DSL dataset or the uberblocks.
153 */
154 vdev_pwrite_data(zfs, DMU_OT_OBJSET, ZIO_CHECKSUM_FLETCHER_4, 0,
155 os->dnodecount - 1, os->phys, os->osblksz, os->osloc, &os->osbp);
156 }
157
158 void
objset_write(zfs_opt_t * zfs,zfs_objset_t * os)159 objset_write(zfs_opt_t *zfs, zfs_objset_t *os)
160 {
161 struct dnode_cursor *c;
162 off_t dnodeloc, dnodesz;
163 uint64_t dnodecount;
164
165 /*
166 * There is a chicken-and-egg problem here when writing the MOS: we
167 * cannot write space maps before we're finished allocating space from
168 * the vdev, and we can't write the MOS without having allocated space
169 * for indirect dnode blocks. Thus, rather than lazily allocating
170 * indirect blocks for the meta-dnode (which would be simpler), they are
171 * allocated up-front and before writing space maps.
172 */
173 dnodecount = os->dnodecount;
174 if (os == zfs->mos)
175 dnodecount += zfs->mscount;
176 dnodesz = dnodecount * sizeof(dnode_phys_t);
177 c = dnode_cursor_init(zfs, os, &os->phys->os_meta_dnode, dnodesz,
178 DNODE_BLOCK_SIZE);
179 dnodesz = roundup2(dnodesz, DNODE_BLOCK_SIZE);
180 dnodeloc = objset_space_alloc(zfs, os, &dnodesz);
181
182 if (os == zfs->mos) {
183 vdev_spacemap_write(zfs);
184
185 /*
186 * We've finished allocating space, account for it in $MOS and
187 * in the parent directory.
188 */
189 dsl_dir_root_finalize(zfs, os->space);
190 }
191 _objset_write(zfs, os, c, dnodeloc);
192 }
193
194 dnode_phys_t *
objset_dnode_bonus_alloc(zfs_objset_t * os,uint8_t type,uint8_t bonustype,uint16_t bonuslen,uint64_t * idp)195 objset_dnode_bonus_alloc(zfs_objset_t *os, uint8_t type, uint8_t bonustype,
196 uint16_t bonuslen, uint64_t *idp)
197 {
198 struct objset_dnode_chunk *chunk;
199 dnode_phys_t *dnode;
200
201 assert(bonuslen <= DN_OLD_MAX_BONUSLEN);
202 assert(!STAILQ_EMPTY(&os->dnodechunks));
203
204 chunk = STAILQ_LAST(&os->dnodechunks, objset_dnode_chunk, next);
205 if (chunk->nextfree == DNODES_PER_CHUNK) {
206 chunk = ecalloc(1, sizeof(*chunk));
207 STAILQ_INSERT_TAIL(&os->dnodechunks, chunk, next);
208 }
209 *idp = os->dnodecount++;
210 dnode = &chunk->buf[chunk->nextfree++];
211 dnode_init(dnode, type, bonustype, bonuslen);
212 dnode->dn_datablkszsec = os->osblksz >> MINBLOCKSHIFT;
213 return (dnode);
214 }
215
216 dnode_phys_t *
objset_dnode_alloc(zfs_objset_t * os,uint8_t type,uint64_t * idp)217 objset_dnode_alloc(zfs_objset_t *os, uint8_t type, uint64_t *idp)
218 {
219 return (objset_dnode_bonus_alloc(os, type, DMU_OT_NONE, 0, idp));
220 }
221
222 /*
223 * Look up a physical dnode by ID. This is not used often so a linear search is
224 * fine.
225 */
226 dnode_phys_t *
objset_dnode_lookup(zfs_objset_t * os,uint64_t id)227 objset_dnode_lookup(zfs_objset_t *os, uint64_t id)
228 {
229 struct objset_dnode_chunk *chunk;
230
231 assert(id > 0);
232 assert(id < os->dnodecount);
233
234 STAILQ_FOREACH(chunk, &os->dnodechunks, next) {
235 if (id < DNODES_PER_CHUNK)
236 return (&chunk->buf[id]);
237 id -= DNODES_PER_CHUNK;
238 }
239 assert(0);
240 return (NULL);
241 }
242
243 off_t
objset_space_alloc(zfs_opt_t * zfs,zfs_objset_t * os,off_t * lenp)244 objset_space_alloc(zfs_opt_t *zfs, zfs_objset_t *os, off_t *lenp)
245 {
246 off_t loc;
247
248 loc = vdev_space_alloc(zfs, lenp);
249 os->space += *lenp;
250 return (loc);
251 }
252
253 uint64_t
objset_space(const zfs_objset_t * os)254 objset_space(const zfs_objset_t *os)
255 {
256 return (os->space);
257 }
258
259 void
objset_root_blkptr_copy(const zfs_objset_t * os,blkptr_t * bp)260 objset_root_blkptr_copy(const zfs_objset_t *os, blkptr_t *bp)
261 {
262 memcpy(bp, &os->osbp, sizeof(blkptr_t));
263 }
264