xref: /freebsd/sys/contrib/openzfs/include/sys/space_map.h (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
29  */
30 
31 #ifndef _SYS_SPACE_MAP_H
32 #define	_SYS_SPACE_MAP_H
33 
34 #include <sys/avl.h>
35 #include <sys/range_tree.h>
36 #include <sys/dmu.h>
37 
38 #ifdef	__cplusplus
39 extern "C" {
40 #endif
41 
42 /*
43  * The size of the space map object has increased to include a histogram.
44  * The SPACE_MAP_SIZE_V0 designates the original size and is used to
45  * maintain backward compatibility.
46  */
47 #define	SPACE_MAP_SIZE_V0	(3 * sizeof (uint64_t))
48 #define	SPACE_MAP_HISTOGRAM_SIZE	32
49 
50 /*
51  * The space_map_phys is the on-disk representation of the space map.
52  * Consumers of space maps should never reference any of the members of this
53  * structure directly. These members may only be updated in syncing context.
54  *
55  * Note the smp_object is no longer used but remains in the structure
56  * for backward compatibility.
57  */
58 typedef struct space_map_phys {
59 	/* object number: not needed but kept for backwards compatibility */
60 	uint64_t	smp_object;
61 
62 	/* length of the object in bytes */
63 	uint64_t	smp_length;
64 
65 	/* space allocated from the map */
66 	int64_t		smp_alloc;
67 
68 	/* reserved */
69 	uint64_t	smp_pad[5];
70 
71 	/*
72 	 * The smp_histogram maintains a histogram of free regions. Each
73 	 * bucket, smp_histogram[i], contains the number of free regions
74 	 * whose size is:
75 	 * 2^(i+sm_shift) <= size of free region in bytes < 2^(i+sm_shift+1)
76 	 *
77 	 * Note that, if log space map feature is enabled, histograms of
78 	 * space maps that belong to metaslabs will take into account any
79 	 * unflushed changes for their metaslabs, even though the actual
80 	 * space map doesn't have entries for these changes.
81 	 */
82 	uint64_t	smp_histogram[SPACE_MAP_HISTOGRAM_SIZE];
83 } space_map_phys_t;
84 
85 /*
86  * The space map object defines a region of space, its size, how much is
87  * allocated, and the on-disk object that stores this information.
88  * Consumers of space maps may only access the members of this structure.
89  *
90  * Note: the space_map may not be accessed concurrently; consumers
91  * must provide external locking if required.
92  */
93 typedef struct space_map {
94 	uint64_t	sm_start;	/* start of map */
95 	uint64_t	sm_size;	/* size of map */
96 	uint8_t		sm_shift;	/* unit shift */
97 	objset_t	*sm_os;		/* objset for this map */
98 	uint64_t	sm_object;	/* object id for this map */
99 	uint32_t	sm_blksz;	/* block size for space map */
100 	dmu_buf_t	*sm_dbuf;	/* space_map_phys_t dbuf */
101 	space_map_phys_t *sm_phys;	/* on-disk space map */
102 } space_map_t;
103 
104 /*
105  * debug entry
106  *
107  *     2     2        10                     50
108  *  +-----+-----+------------+----------------------------------+
109  *  | 1 0 | act |  syncpass  |        txg (lower bits)          |
110  *  +-----+-----+------------+----------------------------------+
111  *   63 62 61 60 59        50 49                                0
112  *
113  *
114  * one-word entry
115  *
116  *    1               47                   1           15
117  *  +-----------------------------------------------------------+
118  *  | 0 |   offset (sm_shift units)    | type |       run       |
119  *  +-----------------------------------------------------------+
120  *   63  62                          16   15   14               0
121  *
122  *
123  * two-word entry
124  *
125  *     2     2               36                      24
126  *  +-----+-----+---------------------------+-------------------+
127  *  | 1 1 | pad |            run            |       vdev        |
128  *  +-----+-----+---------------------------+-------------------+
129  *   63 62 61 60 59                       24 23                 0
130  *
131  *     1                            63
132  *  +------+----------------------------------------------------+
133  *  | type |                      offset                        |
134  *  +------+----------------------------------------------------+
135  *     63   62                                                  0
136  *
137  * Note that a two-word entry will not straddle a block boundary.
138  * If necessary, the last word of a block will be padded with a
139  * debug entry (with act = syncpass = txg = 0).
140  */
141 
142 typedef enum {
143 	SM_ALLOC,
144 	SM_FREE
145 } maptype_t;
146 
147 typedef struct space_map_entry {
148 	maptype_t sme_type;
149 	uint32_t sme_vdev;	/* max is 2^24-1; SM_NO_VDEVID if not present */
150 	uint64_t sme_offset;	/* max is 2^63-1; units of sm_shift */
151 	uint64_t sme_run;	/* max is 2^36; units of sm_shift */
152 
153 	/*
154 	 * The following fields are not part of the actual space map entry
155 	 * on-disk and they are populated with the values from the debug
156 	 * entry most recently visited starting from the beginning to the
157 	 * end of the space map.
158 	 */
159 	uint64_t sme_txg;
160 	uint64_t sme_sync_pass;
161 } space_map_entry_t;
162 
163 #define	SM_NO_VDEVID	(1 << SPA_VDEVBITS)
164 
165 /* one-word entry constants */
166 #define	SM_DEBUG_PREFIX	2
167 #define	SM_OFFSET_BITS	47
168 #define	SM_RUN_BITS	15
169 
170 /* two-word entry constants */
171 #define	SM2_PREFIX	3
172 #define	SM2_OFFSET_BITS	63
173 #define	SM2_RUN_BITS	36
174 
175 #define	SM_PREFIX_DECODE(x)	BF64_DECODE(x, 62, 2)
176 #define	SM_PREFIX_ENCODE(x)	BF64_ENCODE(x, 62, 2)
177 
178 #define	SM_DEBUG_ACTION_DECODE(x)	BF64_DECODE(x, 60, 2)
179 #define	SM_DEBUG_ACTION_ENCODE(x)	BF64_ENCODE(x, 60, 2)
180 #define	SM_DEBUG_SYNCPASS_DECODE(x)	BF64_DECODE(x, 50, 10)
181 #define	SM_DEBUG_SYNCPASS_ENCODE(x)	BF64_ENCODE(x, 50, 10)
182 #define	SM_DEBUG_TXG_DECODE(x)		BF64_DECODE(x, 0, 50)
183 #define	SM_DEBUG_TXG_ENCODE(x)		BF64_ENCODE(x, 0, 50)
184 
185 #define	SM_OFFSET_DECODE(x)	BF64_DECODE(x, 16, SM_OFFSET_BITS)
186 #define	SM_OFFSET_ENCODE(x)	BF64_ENCODE(x, 16, SM_OFFSET_BITS)
187 #define	SM_TYPE_DECODE(x)	BF64_DECODE(x, 15, 1)
188 #define	SM_TYPE_ENCODE(x)	BF64_ENCODE(x, 15, 1)
189 #define	SM_RUN_DECODE(x)	(BF64_DECODE(x, 0, SM_RUN_BITS) + 1)
190 #define	SM_RUN_ENCODE(x)	BF64_ENCODE((x) - 1, 0, SM_RUN_BITS)
191 #define	SM_RUN_MAX		SM_RUN_DECODE(~0ULL)
192 #define	SM_OFFSET_MAX		SM_OFFSET_DECODE(~0ULL)
193 
194 #define	SM2_RUN_DECODE(x)	(BF64_DECODE(x, SPA_VDEVBITS, SM2_RUN_BITS) + 1)
195 #define	SM2_RUN_ENCODE(x)	BF64_ENCODE((x) - 1, SPA_VDEVBITS, SM2_RUN_BITS)
196 #define	SM2_VDEV_DECODE(x)	BF64_DECODE(x, 0, SPA_VDEVBITS)
197 #define	SM2_VDEV_ENCODE(x)	BF64_ENCODE(x, 0, SPA_VDEVBITS)
198 #define	SM2_TYPE_DECODE(x)	BF64_DECODE(x, SM2_OFFSET_BITS, 1)
199 #define	SM2_TYPE_ENCODE(x)	BF64_ENCODE(x, SM2_OFFSET_BITS, 1)
200 #define	SM2_OFFSET_DECODE(x)	BF64_DECODE(x, 0, SM2_OFFSET_BITS)
201 #define	SM2_OFFSET_ENCODE(x)	BF64_ENCODE(x, 0, SM2_OFFSET_BITS)
202 #define	SM2_RUN_MAX		SM2_RUN_DECODE(~0ULL)
203 #define	SM2_OFFSET_MAX		SM2_OFFSET_DECODE(~0ULL)
204 
205 boolean_t sm_entry_is_debug(uint64_t e);
206 boolean_t sm_entry_is_single_word(uint64_t e);
207 boolean_t sm_entry_is_double_word(uint64_t e);
208 
209 typedef int (*sm_cb_t)(space_map_entry_t *sme, void *arg);
210 
211 int space_map_load(space_map_t *sm, zfs_range_tree_t *rt, maptype_t maptype);
212 int space_map_load_length(space_map_t *sm, zfs_range_tree_t *rt,
213     maptype_t maptype, uint64_t length);
214 int space_map_iterate(space_map_t *sm, uint64_t length,
215     sm_cb_t callback, void *arg);
216 int space_map_incremental_destroy(space_map_t *sm, sm_cb_t callback, void *arg,
217     dmu_tx_t *tx);
218 
219 boolean_t space_map_histogram_verify(space_map_t *sm, zfs_range_tree_t *rt);
220 void space_map_histogram_clear(space_map_t *sm);
221 void space_map_histogram_add(space_map_t *sm, zfs_range_tree_t *rt,
222     dmu_tx_t *tx);
223 
224 uint64_t space_map_object(space_map_t *sm);
225 int64_t space_map_allocated(space_map_t *sm);
226 uint64_t space_map_length(space_map_t *sm);
227 uint64_t space_map_entries(space_map_t *sm, zfs_range_tree_t *rt);
228 uint64_t space_map_nblocks(space_map_t *sm);
229 
230 void space_map_write(space_map_t *sm, zfs_range_tree_t *rt, maptype_t maptype,
231     uint64_t vdev_id, dmu_tx_t *tx);
232 uint64_t space_map_estimate_optimal_size(space_map_t *sm, zfs_range_tree_t *rt,
233     uint64_t vdev_id);
234 void space_map_truncate(space_map_t *sm, int blocksize, dmu_tx_t *tx);
235 uint64_t space_map_alloc(objset_t *os, int blocksize, dmu_tx_t *tx);
236 void space_map_free(space_map_t *sm, dmu_tx_t *tx);
237 void space_map_free_obj(objset_t *os, uint64_t smobj, dmu_tx_t *tx);
238 
239 int space_map_open(space_map_t **smp, objset_t *os, uint64_t object,
240     uint64_t start, uint64_t size, uint8_t shift);
241 void space_map_close(space_map_t *sm);
242 
243 #ifdef	__cplusplus
244 }
245 #endif
246 
247 #endif	/* _SYS_SPACE_MAP_H */
248