xref: /freebsd/usr.sbin/makefs/zfs/objset.c (revision da1255560f36d6cacda82fa94c3ba94c12d25050)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2022 The FreeBSD Foundation
5  *
6  * This software was developed by Mark Johnston under sponsorship from
7  * the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are
11  * met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/param.h>
32 #include <assert.h>
33 #include <stdlib.h>
34 #include <string.h>
35 
36 #include <util.h>
37 
38 #include "zfs.h"
39 
40 #define	DNODES_PER_CHUNK	(MAXBLOCKSIZE / sizeof(dnode_phys_t))
41 
42 struct objset_dnode_chunk {
43 	dnode_phys_t	buf[DNODES_PER_CHUNK];
44 	unsigned int	nextfree;
45 	STAILQ_ENTRY(objset_dnode_chunk) next;
46 };
47 
48 typedef struct zfs_objset {
49 	/* Physical object set. */
50 	objset_phys_t	*phys;
51 	off_t		osloc;
52 	off_t		osblksz;
53 	blkptr_t	osbp;		/* set in objset_write() */
54 
55 	/* Accounting. */
56 	off_t		space;		/* bytes allocated to this objset */
57 
58 	/* dnode allocator. */
59 	uint64_t	dnodecount;
60 	STAILQ_HEAD(, objset_dnode_chunk) dnodechunks;
61 } zfs_objset_t;
62 
63 static void
dnode_init(dnode_phys_t * dnode,uint8_t type,uint8_t bonustype,uint16_t bonuslen)64 dnode_init(dnode_phys_t *dnode, uint8_t type, uint8_t bonustype,
65     uint16_t bonuslen)
66 {
67 	dnode->dn_indblkshift = MAXBLOCKSHIFT;
68 	dnode->dn_type = type;
69 	dnode->dn_bonustype = bonustype;
70 	dnode->dn_bonuslen = bonuslen;
71 	dnode->dn_checksum = ZIO_CHECKSUM_FLETCHER_4;
72 	dnode->dn_nlevels = 1;
73 	dnode->dn_nblkptr = 1;
74 	dnode->dn_flags = DNODE_FLAG_USED_BYTES;
75 }
76 
77 zfs_objset_t *
objset_alloc(zfs_opt_t * zfs,uint64_t type)78 objset_alloc(zfs_opt_t *zfs, uint64_t type)
79 {
80 	struct objset_dnode_chunk *chunk;
81 	zfs_objset_t *os;
82 
83 	os = ecalloc(1, sizeof(*os));
84 	os->osblksz = sizeof(objset_phys_t);
85 	os->osloc = objset_space_alloc(zfs, os, &os->osblksz);
86 
87 	/*
88 	 * Object ID zero is always reserved for the meta dnode, which is
89 	 * embedded in the objset itself.
90 	 */
91 	STAILQ_INIT(&os->dnodechunks);
92 	chunk = ecalloc(1, sizeof(*chunk));
93 	chunk->nextfree = 1;
94 	STAILQ_INSERT_HEAD(&os->dnodechunks, chunk, next);
95 	os->dnodecount = 1;
96 
97 	os->phys = ecalloc(1, os->osblksz);
98 	os->phys->os_type = type;
99 
100 	dnode_init(&os->phys->os_meta_dnode, DMU_OT_DNODE, DMU_OT_NONE, 0);
101 	os->phys->os_meta_dnode.dn_datablkszsec =
102 	    DNODE_BLOCK_SIZE >> MINBLOCKSHIFT;
103 
104 	return (os);
105 }
106 
107 /*
108  * Write the dnode array and physical object set to disk.
109  */
110 static void
_objset_write(zfs_opt_t * zfs,zfs_objset_t * os,struct dnode_cursor * c,off_t loc)111 _objset_write(zfs_opt_t *zfs, zfs_objset_t *os, struct dnode_cursor *c,
112     off_t loc)
113 {
114 	struct objset_dnode_chunk *chunk, *tmp;
115 	unsigned int total;
116 
117 	/*
118 	 * Write out the dnode array, i.e., the meta-dnode.  For some reason its
119 	 * data blocks must be 16KB in size no matter how large the array is.
120 	 */
121 	total = 0;
122 	STAILQ_FOREACH_SAFE(chunk, &os->dnodechunks, next, tmp) {
123 		unsigned int i;
124 
125 		assert(chunk->nextfree > 0);
126 		assert(chunk->nextfree <= os->dnodecount);
127 		assert(chunk->nextfree <= DNODES_PER_CHUNK);
128 
129 		for (i = 0; i < chunk->nextfree; i += DNODES_PER_BLOCK) {
130 			blkptr_t *bp;
131 			uint64_t fill;
132 
133 			if (chunk->nextfree - i < DNODES_PER_BLOCK)
134 				fill = DNODES_PER_BLOCK - (chunk->nextfree - i);
135 			else
136 				fill = 0;
137 			bp = dnode_cursor_next(zfs, c,
138 			    (total + i) * sizeof(dnode_phys_t));
139 			vdev_pwrite_dnode_indir(zfs, &os->phys->os_meta_dnode,
140 			    0, fill, chunk->buf + i, DNODE_BLOCK_SIZE, loc, bp);
141 			loc += DNODE_BLOCK_SIZE;
142 		}
143 		total += i;
144 
145 		free(chunk);
146 	}
147 	dnode_cursor_finish(zfs, c);
148 	STAILQ_INIT(&os->dnodechunks);
149 
150 	/*
151 	 * Write the object set itself.  The saved block pointer will be copied
152 	 * into the referencing DSL dataset or the uberblocks.
153 	 */
154 	vdev_pwrite_data(zfs, DMU_OT_OBJSET, ZIO_CHECKSUM_FLETCHER_4, 0,
155 	    os->dnodecount - 1, os->phys, os->osblksz, os->osloc, &os->osbp);
156 }
157 
158 void
objset_write(zfs_opt_t * zfs,zfs_objset_t * os)159 objset_write(zfs_opt_t *zfs, zfs_objset_t *os)
160 {
161 	struct dnode_cursor *c;
162 	off_t dnodeloc, dnodesz;
163 	uint64_t dnodecount;
164 
165 	/*
166 	 * There is a chicken-and-egg problem here when writing the MOS: we
167 	 * cannot write space maps before we're finished allocating space from
168 	 * the vdev, and we can't write the MOS without having allocated space
169 	 * for indirect dnode blocks.  Thus, rather than lazily allocating
170 	 * indirect blocks for the meta-dnode (which would be simpler), they are
171 	 * allocated up-front and before writing space maps.
172 	 */
173 	dnodecount = os->dnodecount;
174 	if (os == zfs->mos)
175 		dnodecount += zfs->mscount;
176 	dnodesz = dnodecount * sizeof(dnode_phys_t);
177 	c = dnode_cursor_init(zfs, os, &os->phys->os_meta_dnode, dnodesz,
178 	    DNODE_BLOCK_SIZE);
179 	dnodesz = roundup2(dnodesz, DNODE_BLOCK_SIZE);
180 	dnodeloc = objset_space_alloc(zfs, os, &dnodesz);
181 
182 	if (os == zfs->mos) {
183 		vdev_spacemap_write(zfs);
184 
185 		/*
186 		 * We've finished allocating space, account for it in $MOS and
187 		 * in the parent directory.
188 		 */
189 		dsl_dir_root_finalize(zfs, os->space);
190 	}
191 	_objset_write(zfs, os, c, dnodeloc);
192 }
193 
194 dnode_phys_t *
objset_dnode_bonus_alloc(zfs_objset_t * os,uint8_t type,uint8_t bonustype,uint16_t bonuslen,uint64_t * idp)195 objset_dnode_bonus_alloc(zfs_objset_t *os, uint8_t type, uint8_t bonustype,
196     uint16_t bonuslen, uint64_t *idp)
197 {
198 	struct objset_dnode_chunk *chunk;
199 	dnode_phys_t *dnode;
200 
201 	assert(bonuslen <= DN_OLD_MAX_BONUSLEN);
202 	assert(!STAILQ_EMPTY(&os->dnodechunks));
203 
204 	chunk = STAILQ_LAST(&os->dnodechunks, objset_dnode_chunk, next);
205 	if (chunk->nextfree == DNODES_PER_CHUNK) {
206 		chunk = ecalloc(1, sizeof(*chunk));
207 		STAILQ_INSERT_TAIL(&os->dnodechunks, chunk, next);
208 	}
209 	*idp = os->dnodecount++;
210 	dnode = &chunk->buf[chunk->nextfree++];
211 	dnode_init(dnode, type, bonustype, bonuslen);
212 	dnode->dn_datablkszsec = os->osblksz >> MINBLOCKSHIFT;
213 	return (dnode);
214 }
215 
216 dnode_phys_t *
objset_dnode_alloc(zfs_objset_t * os,uint8_t type,uint64_t * idp)217 objset_dnode_alloc(zfs_objset_t *os, uint8_t type, uint64_t *idp)
218 {
219 	return (objset_dnode_bonus_alloc(os, type, DMU_OT_NONE, 0, idp));
220 }
221 
222 /*
223  * Look up a physical dnode by ID.  This is not used often so a linear search is
224  * fine.
225  */
226 dnode_phys_t *
objset_dnode_lookup(zfs_objset_t * os,uint64_t id)227 objset_dnode_lookup(zfs_objset_t *os, uint64_t id)
228 {
229 	struct objset_dnode_chunk *chunk;
230 
231 	assert(id > 0);
232 	assert(id < os->dnodecount);
233 
234 	STAILQ_FOREACH(chunk, &os->dnodechunks, next) {
235 		if (id < DNODES_PER_CHUNK)
236 			return (&chunk->buf[id]);
237 		id -= DNODES_PER_CHUNK;
238 	}
239 	assert(0);
240 	return (NULL);
241 }
242 
243 off_t
objset_space_alloc(zfs_opt_t * zfs,zfs_objset_t * os,off_t * lenp)244 objset_space_alloc(zfs_opt_t *zfs, zfs_objset_t *os, off_t *lenp)
245 {
246 	off_t loc;
247 
248 	loc = vdev_space_alloc(zfs, lenp);
249 	os->space += *lenp;
250 	return (loc);
251 }
252 
253 uint64_t
objset_space(const zfs_objset_t * os)254 objset_space(const zfs_objset_t *os)
255 {
256 	return (os->space);
257 }
258 
259 void
objset_root_blkptr_copy(const zfs_objset_t * os,blkptr_t * bp)260 objset_root_blkptr_copy(const zfs_objset_t *os, blkptr_t *bp)
261 {
262 	memcpy(bp, &os->osbp, sizeof(blkptr_t));
263 }
264