xref: /freebsd/usr.sbin/makefs/zfs/objset.c (revision 240afd8c1fcc8c5f29dbd4ff0c915795d414405d)
1*240afd8cSMark Johnston /*-
2*240afd8cSMark Johnston  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3*240afd8cSMark Johnston  *
4*240afd8cSMark Johnston  * Copyright (c) 2022 The FreeBSD Foundation
5*240afd8cSMark Johnston  *
6*240afd8cSMark Johnston  * This software was developed by Mark Johnston under sponsorship from
7*240afd8cSMark Johnston  * the FreeBSD Foundation.
8*240afd8cSMark Johnston  *
9*240afd8cSMark Johnston  * Redistribution and use in source and binary forms, with or without
10*240afd8cSMark Johnston  * modification, are permitted provided that the following conditions are
11*240afd8cSMark Johnston  * met:
12*240afd8cSMark Johnston  * 1. Redistributions of source code must retain the above copyright
13*240afd8cSMark Johnston  *    notice, this list of conditions and the following disclaimer.
14*240afd8cSMark Johnston  * 2. Redistributions in binary form must reproduce the above copyright
15*240afd8cSMark Johnston  *    notice, this list of conditions and the following disclaimer in
16*240afd8cSMark Johnston  *    the documentation and/or other materials provided with the distribution.
17*240afd8cSMark Johnston  *
18*240afd8cSMark Johnston  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19*240afd8cSMark Johnston  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20*240afd8cSMark Johnston  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21*240afd8cSMark Johnston  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22*240afd8cSMark Johnston  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23*240afd8cSMark Johnston  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24*240afd8cSMark Johnston  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25*240afd8cSMark Johnston  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26*240afd8cSMark Johnston  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27*240afd8cSMark Johnston  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28*240afd8cSMark Johnston  * SUCH DAMAGE.
29*240afd8cSMark Johnston  */
30*240afd8cSMark Johnston 
31*240afd8cSMark Johnston #include <assert.h>
32*240afd8cSMark Johnston #include <string.h>
33*240afd8cSMark Johnston 
34*240afd8cSMark Johnston #include <util.h>
35*240afd8cSMark Johnston 
36*240afd8cSMark Johnston #include "zfs.h"
37*240afd8cSMark Johnston 
38*240afd8cSMark Johnston #define	DNODES_PER_CHUNK	(MAXBLOCKSIZE / sizeof(dnode_phys_t))
39*240afd8cSMark Johnston 
40*240afd8cSMark Johnston struct objset_dnode_chunk {
41*240afd8cSMark Johnston 	dnode_phys_t	buf[DNODES_PER_CHUNK];
42*240afd8cSMark Johnston 	unsigned int	nextfree;
43*240afd8cSMark Johnston 	STAILQ_ENTRY(objset_dnode_chunk) next;
44*240afd8cSMark Johnston };
45*240afd8cSMark Johnston 
46*240afd8cSMark Johnston typedef struct zfs_objset {
47*240afd8cSMark Johnston 	/* Physical object set. */
48*240afd8cSMark Johnston 	objset_phys_t	*phys;
49*240afd8cSMark Johnston 	off_t		osloc;
50*240afd8cSMark Johnston 	off_t		osblksz;
51*240afd8cSMark Johnston 	blkptr_t	osbp;		/* set in objset_write() */
52*240afd8cSMark Johnston 
53*240afd8cSMark Johnston 	/* Accounting. */
54*240afd8cSMark Johnston 	off_t		space;		/* bytes allocated to this objset */
55*240afd8cSMark Johnston 
56*240afd8cSMark Johnston 	/* dnode allocator. */
57*240afd8cSMark Johnston 	uint64_t	dnodecount;
58*240afd8cSMark Johnston 	STAILQ_HEAD(, objset_dnode_chunk) dnodechunks;
59*240afd8cSMark Johnston } zfs_objset_t;
60*240afd8cSMark Johnston 
61*240afd8cSMark Johnston static void
62*240afd8cSMark Johnston dnode_init(dnode_phys_t *dnode, uint8_t type, uint8_t bonustype,
63*240afd8cSMark Johnston     uint16_t bonuslen)
64*240afd8cSMark Johnston {
65*240afd8cSMark Johnston 	dnode->dn_indblkshift = MAXBLOCKSHIFT;
66*240afd8cSMark Johnston 	dnode->dn_type = type;
67*240afd8cSMark Johnston 	dnode->dn_bonustype = bonustype;
68*240afd8cSMark Johnston 	dnode->dn_bonuslen = bonuslen;
69*240afd8cSMark Johnston 	dnode->dn_checksum = ZIO_CHECKSUM_FLETCHER_4;
70*240afd8cSMark Johnston 	dnode->dn_nlevels = 1;
71*240afd8cSMark Johnston 	dnode->dn_nblkptr = 1;
72*240afd8cSMark Johnston 	dnode->dn_flags = DNODE_FLAG_USED_BYTES;
73*240afd8cSMark Johnston }
74*240afd8cSMark Johnston 
75*240afd8cSMark Johnston zfs_objset_t *
76*240afd8cSMark Johnston objset_alloc(zfs_opt_t *zfs, uint64_t type)
77*240afd8cSMark Johnston {
78*240afd8cSMark Johnston 	struct objset_dnode_chunk *chunk;
79*240afd8cSMark Johnston 	zfs_objset_t *os;
80*240afd8cSMark Johnston 
81*240afd8cSMark Johnston 	os = ecalloc(1, sizeof(*os));
82*240afd8cSMark Johnston 	os->osblksz = sizeof(objset_phys_t);
83*240afd8cSMark Johnston 	os->osloc = objset_space_alloc(zfs, os, &os->osblksz);
84*240afd8cSMark Johnston 
85*240afd8cSMark Johnston 	/*
86*240afd8cSMark Johnston 	 * Object ID zero is always reserved for the meta dnode, which is
87*240afd8cSMark Johnston 	 * embedded in the objset itself.
88*240afd8cSMark Johnston 	 */
89*240afd8cSMark Johnston 	STAILQ_INIT(&os->dnodechunks);
90*240afd8cSMark Johnston 	chunk = ecalloc(1, sizeof(*chunk));
91*240afd8cSMark Johnston 	chunk->nextfree = 1;
92*240afd8cSMark Johnston 	STAILQ_INSERT_HEAD(&os->dnodechunks, chunk, next);
93*240afd8cSMark Johnston 	os->dnodecount = 1;
94*240afd8cSMark Johnston 
95*240afd8cSMark Johnston 	os->phys = ecalloc(1, os->osblksz);
96*240afd8cSMark Johnston 	os->phys->os_type = type;
97*240afd8cSMark Johnston 
98*240afd8cSMark Johnston 	dnode_init(&os->phys->os_meta_dnode, DMU_OT_DNODE, DMU_OT_NONE, 0);
99*240afd8cSMark Johnston 	os->phys->os_meta_dnode.dn_datablkszsec =
100*240afd8cSMark Johnston 	    DNODE_BLOCK_SIZE >> MINBLOCKSHIFT;
101*240afd8cSMark Johnston 
102*240afd8cSMark Johnston 	return (os);
103*240afd8cSMark Johnston }
104*240afd8cSMark Johnston 
105*240afd8cSMark Johnston /*
106*240afd8cSMark Johnston  * Write the dnode array and physical object set to disk.
107*240afd8cSMark Johnston  */
108*240afd8cSMark Johnston static void
109*240afd8cSMark Johnston _objset_write(zfs_opt_t *zfs, zfs_objset_t *os, struct dnode_cursor *c,
110*240afd8cSMark Johnston     off_t loc)
111*240afd8cSMark Johnston {
112*240afd8cSMark Johnston 	struct objset_dnode_chunk *chunk, *tmp;
113*240afd8cSMark Johnston 	unsigned int total;
114*240afd8cSMark Johnston 
115*240afd8cSMark Johnston 	/*
116*240afd8cSMark Johnston 	 * Write out the dnode array, i.e., the meta-dnode.  For some reason its
117*240afd8cSMark Johnston 	 * data blocks must be 16KB in size no matter how large the array is.
118*240afd8cSMark Johnston 	 */
119*240afd8cSMark Johnston 	total = 0;
120*240afd8cSMark Johnston 	STAILQ_FOREACH_SAFE(chunk, &os->dnodechunks, next, tmp) {
121*240afd8cSMark Johnston 		unsigned int i;
122*240afd8cSMark Johnston 
123*240afd8cSMark Johnston 		assert(chunk->nextfree <= os->dnodecount);
124*240afd8cSMark Johnston 		assert(chunk->nextfree <= DNODES_PER_CHUNK);
125*240afd8cSMark Johnston 
126*240afd8cSMark Johnston 		for (i = 0; i < chunk->nextfree; i += DNODES_PER_BLOCK) {
127*240afd8cSMark Johnston 			blkptr_t *bp;
128*240afd8cSMark Johnston 			uint64_t fill;
129*240afd8cSMark Johnston 
130*240afd8cSMark Johnston 			if (chunk->nextfree - i < DNODES_PER_BLOCK)
131*240afd8cSMark Johnston 				fill = DNODES_PER_BLOCK - (chunk->nextfree - i);
132*240afd8cSMark Johnston 			else
133*240afd8cSMark Johnston 				fill = 0;
134*240afd8cSMark Johnston 			bp = dnode_cursor_next(zfs, c,
135*240afd8cSMark Johnston 			    (total + i) * sizeof(dnode_phys_t));
136*240afd8cSMark Johnston 			vdev_pwrite_dnode_indir(zfs, &os->phys->os_meta_dnode,
137*240afd8cSMark Johnston 			    0, fill, chunk->buf + i, DNODE_BLOCK_SIZE, loc, bp);
138*240afd8cSMark Johnston 			loc += DNODE_BLOCK_SIZE;
139*240afd8cSMark Johnston 		}
140*240afd8cSMark Johnston 		total += i;
141*240afd8cSMark Johnston 
142*240afd8cSMark Johnston 		free(chunk);
143*240afd8cSMark Johnston 	}
144*240afd8cSMark Johnston 	dnode_cursor_finish(zfs, c);
145*240afd8cSMark Johnston 	STAILQ_INIT(&os->dnodechunks);
146*240afd8cSMark Johnston 
147*240afd8cSMark Johnston 	/*
148*240afd8cSMark Johnston 	 * Write the object set itself.  The saved block pointer will be copied
149*240afd8cSMark Johnston 	 * into the referencing DSL dataset or the uberblocks.
150*240afd8cSMark Johnston 	 */
151*240afd8cSMark Johnston 	vdev_pwrite_data(zfs, DMU_OT_OBJSET, ZIO_CHECKSUM_FLETCHER_4, 0, 1,
152*240afd8cSMark Johnston 	    os->phys, os->osblksz, os->osloc, &os->osbp);
153*240afd8cSMark Johnston }
154*240afd8cSMark Johnston 
155*240afd8cSMark Johnston void
156*240afd8cSMark Johnston objset_write(zfs_opt_t *zfs, zfs_objset_t *os)
157*240afd8cSMark Johnston {
158*240afd8cSMark Johnston 	struct dnode_cursor *c;
159*240afd8cSMark Johnston 	off_t dnodeloc, dnodesz;
160*240afd8cSMark Johnston 	uint64_t dnodecount;
161*240afd8cSMark Johnston 
162*240afd8cSMark Johnston 	/*
163*240afd8cSMark Johnston 	 * There is a chicken-and-egg problem here when writing the MOS: we
164*240afd8cSMark Johnston 	 * cannot write space maps before we're finished allocating space from
165*240afd8cSMark Johnston 	 * the vdev, and we can't write the MOS without having allocated space
166*240afd8cSMark Johnston 	 * for indirect dnode blocks.  Thus, rather than lazily allocating
167*240afd8cSMark Johnston 	 * indirect blocks for the meta-dnode (which would be simpler), they are
168*240afd8cSMark Johnston 	 * allocated up-front and before writing space maps.
169*240afd8cSMark Johnston 	 */
170*240afd8cSMark Johnston 	dnodecount = os->dnodecount;
171*240afd8cSMark Johnston 	if (os == zfs->mos)
172*240afd8cSMark Johnston 		dnodecount += zfs->mscount;
173*240afd8cSMark Johnston 	dnodesz = dnodecount * sizeof(dnode_phys_t);
174*240afd8cSMark Johnston 	c = dnode_cursor_init(zfs, os, &os->phys->os_meta_dnode, dnodesz,
175*240afd8cSMark Johnston 	    DNODE_BLOCK_SIZE);
176*240afd8cSMark Johnston 	dnodesz = roundup2(dnodesz, DNODE_BLOCK_SIZE);
177*240afd8cSMark Johnston 	dnodeloc = objset_space_alloc(zfs, os, &dnodesz);
178*240afd8cSMark Johnston 
179*240afd8cSMark Johnston 	if (os == zfs->mos) {
180*240afd8cSMark Johnston 		vdev_spacemap_write(zfs);
181*240afd8cSMark Johnston 
182*240afd8cSMark Johnston 		/*
183*240afd8cSMark Johnston 		 * We've finished allocating space, account for it in $MOS.
184*240afd8cSMark Johnston 		 */
185*240afd8cSMark Johnston 		dsl_dir_size_set(zfs->mosdsldir, os->space);
186*240afd8cSMark Johnston 	}
187*240afd8cSMark Johnston 	_objset_write(zfs, os, c, dnodeloc);
188*240afd8cSMark Johnston }
189*240afd8cSMark Johnston 
190*240afd8cSMark Johnston dnode_phys_t *
191*240afd8cSMark Johnston objset_dnode_bonus_alloc(zfs_objset_t *os, uint8_t type, uint8_t bonustype,
192*240afd8cSMark Johnston     uint16_t bonuslen, uint64_t *idp)
193*240afd8cSMark Johnston {
194*240afd8cSMark Johnston 	struct objset_dnode_chunk *chunk;
195*240afd8cSMark Johnston 	dnode_phys_t *dnode;
196*240afd8cSMark Johnston 
197*240afd8cSMark Johnston 	assert(bonuslen <= DN_OLD_MAX_BONUSLEN);
198*240afd8cSMark Johnston 	assert(!STAILQ_EMPTY(&os->dnodechunks));
199*240afd8cSMark Johnston 
200*240afd8cSMark Johnston 	chunk = STAILQ_LAST(&os->dnodechunks, objset_dnode_chunk, next);
201*240afd8cSMark Johnston 	if (chunk->nextfree == DNODES_PER_CHUNK) {
202*240afd8cSMark Johnston 		chunk = ecalloc(1, sizeof(*chunk));
203*240afd8cSMark Johnston 		STAILQ_INSERT_TAIL(&os->dnodechunks, chunk, next);
204*240afd8cSMark Johnston 	}
205*240afd8cSMark Johnston 	*idp = os->dnodecount++;
206*240afd8cSMark Johnston 	dnode = &chunk->buf[chunk->nextfree++];
207*240afd8cSMark Johnston 	dnode_init(dnode, type, bonustype, bonuslen);
208*240afd8cSMark Johnston 	dnode->dn_datablkszsec = os->osblksz >> MINBLOCKSHIFT;
209*240afd8cSMark Johnston 	return (dnode);
210*240afd8cSMark Johnston }
211*240afd8cSMark Johnston 
212*240afd8cSMark Johnston dnode_phys_t *
213*240afd8cSMark Johnston objset_dnode_alloc(zfs_objset_t *os, uint8_t type, uint64_t *idp)
214*240afd8cSMark Johnston {
215*240afd8cSMark Johnston 	return (objset_dnode_bonus_alloc(os, type, DMU_OT_NONE, 0, idp));
216*240afd8cSMark Johnston }
217*240afd8cSMark Johnston 
218*240afd8cSMark Johnston /*
219*240afd8cSMark Johnston  * Look up a physical dnode by ID.  This is not used often so a linear search is
220*240afd8cSMark Johnston  * fine.
221*240afd8cSMark Johnston  */
222*240afd8cSMark Johnston dnode_phys_t *
223*240afd8cSMark Johnston objset_dnode_lookup(zfs_objset_t *os, uint64_t id)
224*240afd8cSMark Johnston {
225*240afd8cSMark Johnston 	struct objset_dnode_chunk *chunk;
226*240afd8cSMark Johnston 
227*240afd8cSMark Johnston 	assert(id > 0);
228*240afd8cSMark Johnston 	assert(id < os->dnodecount);
229*240afd8cSMark Johnston 
230*240afd8cSMark Johnston 	STAILQ_FOREACH(chunk, &os->dnodechunks, next) {
231*240afd8cSMark Johnston 		if (id < DNODES_PER_CHUNK)
232*240afd8cSMark Johnston 			return (&chunk->buf[id]);
233*240afd8cSMark Johnston 		id -= DNODES_PER_CHUNK;
234*240afd8cSMark Johnston 	}
235*240afd8cSMark Johnston 	assert(0);
236*240afd8cSMark Johnston 	return (NULL);
237*240afd8cSMark Johnston }
238*240afd8cSMark Johnston 
239*240afd8cSMark Johnston off_t
240*240afd8cSMark Johnston objset_space_alloc(zfs_opt_t *zfs, zfs_objset_t *os, off_t *lenp)
241*240afd8cSMark Johnston {
242*240afd8cSMark Johnston 	off_t loc;
243*240afd8cSMark Johnston 
244*240afd8cSMark Johnston 	loc = vdev_space_alloc(zfs, lenp);
245*240afd8cSMark Johnston 	os->space += *lenp;
246*240afd8cSMark Johnston 	return (loc);
247*240afd8cSMark Johnston }
248*240afd8cSMark Johnston 
249*240afd8cSMark Johnston uint64_t
250*240afd8cSMark Johnston objset_space(const zfs_objset_t *os)
251*240afd8cSMark Johnston {
252*240afd8cSMark Johnston 	return (os->space);
253*240afd8cSMark Johnston }
254*240afd8cSMark Johnston 
255*240afd8cSMark Johnston void
256*240afd8cSMark Johnston objset_root_blkptr_copy(const zfs_objset_t *os, blkptr_t *bp)
257*240afd8cSMark Johnston {
258*240afd8cSMark Johnston 	memcpy(bp, &os->osbp, sizeof(blkptr_t));
259*240afd8cSMark Johnston }
260