xref: /freebsd/usr.sbin/makefs/zfs/objset.c (revision 035dd78d30ba28a3dc15c05ec85ad10127165677)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2022 The FreeBSD Foundation
5  *
6  * This software was developed by Mark Johnston under sponsorship from
7  * the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are
11  * met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <assert.h>
32 #include <stdlib.h>
33 #include <string.h>
34 
35 #include <util.h>
36 
37 #include "zfs.h"
38 
39 #define	DNODES_PER_CHUNK	(MAXBLOCKSIZE / sizeof(dnode_phys_t))
40 
41 struct objset_dnode_chunk {
42 	dnode_phys_t	buf[DNODES_PER_CHUNK];
43 	unsigned int	nextfree;
44 	STAILQ_ENTRY(objset_dnode_chunk) next;
45 };
46 
47 typedef struct zfs_objset {
48 	/* Physical object set. */
49 	objset_phys_t	*phys;
50 	off_t		osloc;
51 	off_t		osblksz;
52 	blkptr_t	osbp;		/* set in objset_write() */
53 
54 	/* Accounting. */
55 	off_t		space;		/* bytes allocated to this objset */
56 
57 	/* dnode allocator. */
58 	uint64_t	dnodecount;
59 	STAILQ_HEAD(, objset_dnode_chunk) dnodechunks;
60 } zfs_objset_t;
61 
62 static void
63 dnode_init(dnode_phys_t *dnode, uint8_t type, uint8_t bonustype,
64     uint16_t bonuslen)
65 {
66 	dnode->dn_indblkshift = MAXBLOCKSHIFT;
67 	dnode->dn_type = type;
68 	dnode->dn_bonustype = bonustype;
69 	dnode->dn_bonuslen = bonuslen;
70 	dnode->dn_checksum = ZIO_CHECKSUM_FLETCHER_4;
71 	dnode->dn_nlevels = 1;
72 	dnode->dn_nblkptr = 1;
73 	dnode->dn_flags = DNODE_FLAG_USED_BYTES;
74 }
75 
76 zfs_objset_t *
77 objset_alloc(zfs_opt_t *zfs, uint64_t type)
78 {
79 	struct objset_dnode_chunk *chunk;
80 	zfs_objset_t *os;
81 
82 	os = ecalloc(1, sizeof(*os));
83 	os->osblksz = sizeof(objset_phys_t);
84 	os->osloc = objset_space_alloc(zfs, os, &os->osblksz);
85 
86 	/*
87 	 * Object ID zero is always reserved for the meta dnode, which is
88 	 * embedded in the objset itself.
89 	 */
90 	STAILQ_INIT(&os->dnodechunks);
91 	chunk = ecalloc(1, sizeof(*chunk));
92 	chunk->nextfree = 1;
93 	STAILQ_INSERT_HEAD(&os->dnodechunks, chunk, next);
94 	os->dnodecount = 1;
95 
96 	os->phys = ecalloc(1, os->osblksz);
97 	os->phys->os_type = type;
98 
99 	dnode_init(&os->phys->os_meta_dnode, DMU_OT_DNODE, DMU_OT_NONE, 0);
100 	os->phys->os_meta_dnode.dn_datablkszsec =
101 	    DNODE_BLOCK_SIZE >> MINBLOCKSHIFT;
102 
103 	return (os);
104 }
105 
106 /*
107  * Write the dnode array and physical object set to disk.
108  */
109 static void
110 _objset_write(zfs_opt_t *zfs, zfs_objset_t *os, struct dnode_cursor *c,
111     off_t loc)
112 {
113 	struct objset_dnode_chunk *chunk, *tmp;
114 	unsigned int total;
115 
116 	/*
117 	 * Write out the dnode array, i.e., the meta-dnode.  For some reason its
118 	 * data blocks must be 16KB in size no matter how large the array is.
119 	 */
120 	total = 0;
121 	STAILQ_FOREACH_SAFE(chunk, &os->dnodechunks, next, tmp) {
122 		unsigned int i;
123 
124 		assert(chunk->nextfree <= os->dnodecount);
125 		assert(chunk->nextfree <= DNODES_PER_CHUNK);
126 
127 		for (i = 0; i < chunk->nextfree; i += DNODES_PER_BLOCK) {
128 			blkptr_t *bp;
129 			uint64_t fill;
130 
131 			if (chunk->nextfree - i < DNODES_PER_BLOCK)
132 				fill = DNODES_PER_BLOCK - (chunk->nextfree - i);
133 			else
134 				fill = 0;
135 			bp = dnode_cursor_next(zfs, c,
136 			    (total + i) * sizeof(dnode_phys_t));
137 			vdev_pwrite_dnode_indir(zfs, &os->phys->os_meta_dnode,
138 			    0, fill, chunk->buf + i, DNODE_BLOCK_SIZE, loc, bp);
139 			loc += DNODE_BLOCK_SIZE;
140 		}
141 		total += i;
142 
143 		free(chunk);
144 	}
145 	dnode_cursor_finish(zfs, c);
146 	STAILQ_INIT(&os->dnodechunks);
147 
148 	/*
149 	 * Write the object set itself.  The saved block pointer will be copied
150 	 * into the referencing DSL dataset or the uberblocks.
151 	 */
152 	vdev_pwrite_data(zfs, DMU_OT_OBJSET, ZIO_CHECKSUM_FLETCHER_4, 0, 1,
153 	    os->phys, os->osblksz, os->osloc, &os->osbp);
154 }
155 
156 void
157 objset_write(zfs_opt_t *zfs, zfs_objset_t *os)
158 {
159 	struct dnode_cursor *c;
160 	off_t dnodeloc, dnodesz;
161 	uint64_t dnodecount;
162 
163 	/*
164 	 * There is a chicken-and-egg problem here when writing the MOS: we
165 	 * cannot write space maps before we're finished allocating space from
166 	 * the vdev, and we can't write the MOS without having allocated space
167 	 * for indirect dnode blocks.  Thus, rather than lazily allocating
168 	 * indirect blocks for the meta-dnode (which would be simpler), they are
169 	 * allocated up-front and before writing space maps.
170 	 */
171 	dnodecount = os->dnodecount;
172 	if (os == zfs->mos)
173 		dnodecount += zfs->mscount;
174 	dnodesz = dnodecount * sizeof(dnode_phys_t);
175 	c = dnode_cursor_init(zfs, os, &os->phys->os_meta_dnode, dnodesz,
176 	    DNODE_BLOCK_SIZE);
177 	dnodesz = roundup2(dnodesz, DNODE_BLOCK_SIZE);
178 	dnodeloc = objset_space_alloc(zfs, os, &dnodesz);
179 
180 	if (os == zfs->mos) {
181 		vdev_spacemap_write(zfs);
182 
183 		/*
184 		 * We've finished allocating space, account for it in $MOS and
185 		 * in the parent directory.
186 		 */
187 		dsl_dir_size_add(zfs->mosdsldir, os->space);
188 		dsl_dir_size_add(zfs->rootdsldir, os->space);
189 	}
190 	_objset_write(zfs, os, c, dnodeloc);
191 }
192 
193 dnode_phys_t *
194 objset_dnode_bonus_alloc(zfs_objset_t *os, uint8_t type, uint8_t bonustype,
195     uint16_t bonuslen, uint64_t *idp)
196 {
197 	struct objset_dnode_chunk *chunk;
198 	dnode_phys_t *dnode;
199 
200 	assert(bonuslen <= DN_OLD_MAX_BONUSLEN);
201 	assert(!STAILQ_EMPTY(&os->dnodechunks));
202 
203 	chunk = STAILQ_LAST(&os->dnodechunks, objset_dnode_chunk, next);
204 	if (chunk->nextfree == DNODES_PER_CHUNK) {
205 		chunk = ecalloc(1, sizeof(*chunk));
206 		STAILQ_INSERT_TAIL(&os->dnodechunks, chunk, next);
207 	}
208 	*idp = os->dnodecount++;
209 	dnode = &chunk->buf[chunk->nextfree++];
210 	dnode_init(dnode, type, bonustype, bonuslen);
211 	dnode->dn_datablkszsec = os->osblksz >> MINBLOCKSHIFT;
212 	return (dnode);
213 }
214 
215 dnode_phys_t *
216 objset_dnode_alloc(zfs_objset_t *os, uint8_t type, uint64_t *idp)
217 {
218 	return (objset_dnode_bonus_alloc(os, type, DMU_OT_NONE, 0, idp));
219 }
220 
221 /*
222  * Look up a physical dnode by ID.  This is not used often so a linear search is
223  * fine.
224  */
225 dnode_phys_t *
226 objset_dnode_lookup(zfs_objset_t *os, uint64_t id)
227 {
228 	struct objset_dnode_chunk *chunk;
229 
230 	assert(id > 0);
231 	assert(id < os->dnodecount);
232 
233 	STAILQ_FOREACH(chunk, &os->dnodechunks, next) {
234 		if (id < DNODES_PER_CHUNK)
235 			return (&chunk->buf[id]);
236 		id -= DNODES_PER_CHUNK;
237 	}
238 	assert(0);
239 	return (NULL);
240 }
241 
242 off_t
243 objset_space_alloc(zfs_opt_t *zfs, zfs_objset_t *os, off_t *lenp)
244 {
245 	off_t loc;
246 
247 	loc = vdev_space_alloc(zfs, lenp);
248 	os->space += *lenp;
249 	return (loc);
250 }
251 
252 uint64_t
253 objset_space(const zfs_objset_t *os)
254 {
255 	return (os->space);
256 }
257 
258 void
259 objset_root_blkptr_copy(const zfs_objset_t *os, blkptr_t *bp)
260 {
261 	memcpy(bp, &os->osbp, sizeof(blkptr_t));
262 }
263