xref: /freebsd/sys/contrib/openzfs/include/sys/ddt_impl.h (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2016 by Delphix. All rights reserved.
25  * Copyright (c) 2023, Klara Inc.
26  */
27 
28 #ifndef _SYS_DDT_IMPL_H
29 #define	_SYS_DDT_IMPL_H
30 
31 #include <sys/ddt.h>
32 #include <sys/bitops.h>
33 
34 #ifdef	__cplusplus
35 extern "C" {
36 #endif
37 
38 /* DDT version numbers */
39 #define	DDT_VERSION_LEGACY		(0)
40 #define	DDT_VERSION_FDT			(1)
41 
42 /* Dummy version to signal that configure is still necessary */
43 #define	DDT_VERSION_UNCONFIGURED	(UINT64_MAX)
44 
45 /* Names of interesting objects in the DDT root dir */
46 #define	DDT_DIR_VERSION		"version"
47 #define	DDT_DIR_FLAGS		"flags"
48 
49 /* Fill a lightweight entry from a live entry. */
50 #define	DDT_ENTRY_TO_LIGHTWEIGHT(ddt, dde, ddlwe) do {			\
51 	memset((ddlwe), 0, sizeof (*ddlwe));				\
52 	(ddlwe)->ddlwe_key = (dde)->dde_key;				\
53 	(ddlwe)->ddlwe_type = (dde)->dde_type;				\
54 	(ddlwe)->ddlwe_class = (dde)->dde_class;			\
55 	memcpy(&(ddlwe)->ddlwe_phys, (dde)->dde_phys, DDT_PHYS_SIZE(ddt)); \
56 } while (0)
57 
58 #define	DDT_LOG_ENTRY_TO_LIGHTWEIGHT(ddt, ddle, ddlwe) do {             \
59 	memset((ddlwe), 0, sizeof (*ddlwe));                            \
60 	(ddlwe)->ddlwe_key = (ddle)->ddle_key;                          \
61 	(ddlwe)->ddlwe_type = (ddle)->ddle_type;                        \
62 	(ddlwe)->ddlwe_class = (ddle)->ddle_class;                      \
63 	memcpy(&(ddlwe)->ddlwe_phys, (ddle)->ddle_phys, DDT_PHYS_SIZE(ddt)); \
64 } while (0)
65 
66 /*
67  * An entry on the log tree. These are "frozen", and a record of what's in
68  * the on-disk log. They can't be used in place, but can be "loaded" back into
69  * the live tree.
70  */
71 typedef struct {
72 	ddt_key_t	ddle_key;	/* ddt_log_tree key */
73 	avl_node_t	ddle_node;	/* ddt_log_tree node */
74 
75 	ddt_type_t	ddle_type;	/* storage type */
76 	ddt_class_t	ddle_class;	/* storage class */
77 
78 	/* extra allocation for flat/trad phys */
79 	ddt_univ_phys_t	ddle_phys[];
80 } ddt_log_entry_t;
81 
82 /* On-disk log record types. */
83 typedef enum {
84 	DLR_INVALID	= 0,	/* end of block marker */
85 	DLR_ENTRY	= 1,	/* an entry to add or replace in the log tree */
86 } ddt_log_record_type_t;
87 
88 /* On-disk log record header. */
89 typedef struct {
90 	/*
91 	 * dlr_info is a packed u64, use the DLR_GET/DLR_SET macros below to
92 	 * access it.
93 	 *
94 	 * bits 0-7:    record type (ddt_log_record_type_t)
95 	 * bits 8-15:  length of record header+payload
96 	 * bits 16-47:  reserved, all zero
97 	 * bits 48-55:   if type==DLR_ENTRY, storage type (ddt_type)
98 	 *                otherwise all zero
99 	 * bits 56-63:  if type==DLR_ENTRY, storage class (ddt_class)
100 	 *                otherwise all zero
101 	 */
102 	uint64_t	dlr_info;
103 	uint8_t		dlr_payload[];
104 } ddt_log_record_t;
105 
106 #define	DLR_GET_TYPE(dlr)		BF64_GET((dlr)->dlr_info, 0, 8)
107 #define	DLR_SET_TYPE(dlr, v)		BF64_SET((dlr)->dlr_info, 0, 8, v)
108 #define	DLR_GET_RECLEN(dlr)		BF64_GET((dlr)->dlr_info, 8, 16)
109 #define	DLR_SET_RECLEN(dlr, v)		BF64_SET((dlr)->dlr_info, 8, 16, v)
110 #define	DLR_GET_ENTRY_TYPE(dlr)		BF64_GET((dlr)->dlr_info, 48, 8)
111 #define	DLR_SET_ENTRY_TYPE(dlr, v)	BF64_SET((dlr)->dlr_info, 48, 8, v)
112 #define	DLR_GET_ENTRY_CLASS(dlr)	BF64_GET((dlr)->dlr_info, 56, 8)
113 #define	DLR_SET_ENTRY_CLASS(dlr, v)	BF64_SET((dlr)->dlr_info, 56, 8, v)
114 
115 /* Payload for DLR_ENTRY. */
116 typedef struct {
117 	ddt_key_t	dlre_key;
118 	ddt_univ_phys_t	dlre_phys[];
119 } ddt_log_record_entry_t;
120 
121 /* Log flags (ddl_flags, dlh_flags) */
122 #define	DDL_FLAG_FLUSHING	(1 << 0)	/* this log is being flushed */
123 #define	DDL_FLAG_CHECKPOINT	(1 << 1)	/* header has a checkpoint */
124 
125 /* On-disk log header, stored in the bonus buffer. */
126 typedef struct {
127 	/*
128 	 * dlh_info is a packed u64, use the DLH_GET/DLH_SET macros below to
129 	 * access it.
130 	 *
131 	 * bits 0-7:   log version
132 	 * bits 8-15:  log flags
133 	 * bits 16-63: reserved, all zero
134 	 */
135 	uint64_t	dlh_info;
136 
137 	uint64_t	dlh_length;	/* log size in bytes */
138 	uint64_t	dlh_first_txg;	/* txg this log went active */
139 	ddt_key_t	dlh_checkpoint;	/* last checkpoint */
140 } ddt_log_header_t;
141 
142 #define	DLH_GET_VERSION(dlh)	BF64_GET((dlh)->dlh_info, 0, 8)
143 #define	DLH_SET_VERSION(dlh, v)	BF64_SET((dlh)->dlh_info, 0, 8, v)
144 #define	DLH_GET_FLAGS(dlh)	BF64_GET((dlh)->dlh_info, 8, 8)
145 #define	DLH_SET_FLAGS(dlh, v)	BF64_SET((dlh)->dlh_info, 8, 8, v)
146 
147 /* DDT log update state */
148 typedef struct {
149 	dmu_tx_t	*dlu_tx;	/* tx the update is being applied to */
150 	dnode_t		*dlu_dn;	/* log object dnode */
151 	dmu_buf_t	**dlu_dbp;	/* array of block buffer pointers */
152 	int		dlu_ndbp;	/* number of block buffer pointers */
153 	uint16_t	dlu_reclen;	/* cached length of record */
154 	uint64_t	dlu_block;	/* block for next entry */
155 	uint64_t	dlu_offset;	/* offset for next entry */
156 } ddt_log_update_t;
157 
158 /*
159  * Ops vector to access a specific DDT object type.
160  */
161 typedef struct {
162 	char ddt_op_name[32];
163 	int (*ddt_op_create)(objset_t *os, uint64_t *object, dmu_tx_t *tx,
164 	    boolean_t prehash);
165 	int (*ddt_op_destroy)(objset_t *os, uint64_t object, dmu_tx_t *tx);
166 	int (*ddt_op_lookup)(objset_t *os, uint64_t object,
167 	    const ddt_key_t *ddk, void *phys, size_t psize);
168 	int (*ddt_op_contains)(objset_t *os, uint64_t object,
169 	    const ddt_key_t *ddk);
170 	void (*ddt_op_prefetch)(objset_t *os, uint64_t object,
171 	    const ddt_key_t *ddk);
172 	void (*ddt_op_prefetch_all)(objset_t *os, uint64_t object);
173 	int (*ddt_op_update)(objset_t *os, uint64_t object,
174 	    const ddt_key_t *ddk, const void *phys, size_t psize,
175 	    dmu_tx_t *tx);
176 	int (*ddt_op_remove)(objset_t *os, uint64_t object,
177 	    const ddt_key_t *ddk, dmu_tx_t *tx);
178 	int (*ddt_op_walk)(objset_t *os, uint64_t object, uint64_t *walk,
179 	    ddt_key_t *ddk, void *phys, size_t psize);
180 	int (*ddt_op_count)(objset_t *os, uint64_t object, uint64_t *count);
181 } ddt_ops_t;
182 
183 extern const ddt_ops_t ddt_zap_ops;
184 
185 /* Dedup log API */
186 extern void ddt_log_begin(ddt_t *ddt, size_t nentries, dmu_tx_t *tx,
187     ddt_log_update_t *dlu);
188 extern void ddt_log_entry(ddt_t *ddt, ddt_lightweight_entry_t *dde,
189     ddt_log_update_t *dlu);
190 extern void ddt_log_commit(ddt_t *ddt, ddt_log_update_t *dlu);
191 
192 extern boolean_t ddt_log_take_first(ddt_t *ddt, ddt_log_t *ddl,
193     ddt_lightweight_entry_t *ddlwe);
194 
195 extern boolean_t ddt_log_find_key(ddt_t *ddt, const ddt_key_t *ddk,
196     ddt_lightweight_entry_t *ddlwe);
197 extern boolean_t ddt_log_remove_key(ddt_t *ddt, ddt_log_t *ddl,
198     const ddt_key_t *ddk);
199 
200 extern void ddt_log_checkpoint(ddt_t *ddt, ddt_lightweight_entry_t *ddlwe,
201     dmu_tx_t *tx);
202 extern void ddt_log_truncate(ddt_t *ddt, dmu_tx_t *tx);
203 
204 extern boolean_t ddt_log_swap(ddt_t *ddt, dmu_tx_t *tx);
205 
206 extern void ddt_log_destroy(ddt_t *ddt, dmu_tx_t *tx);
207 
208 extern int ddt_log_load(ddt_t *ddt);
209 extern void ddt_log_alloc(ddt_t *ddt);
210 extern void ddt_log_free(ddt_t *ddt);
211 
212 extern void ddt_log_init(void);
213 extern void ddt_log_fini(void);
214 
215 /*
216  * These are only exposed so that zdb can access them. Try not to use them
217  * outside of the DDT implementation proper, and if you do, consider moving
218  * them up.
219  */
220 
221 /*
222  * We use a histogram to convert a percentage request into a
223  * cutoff value where entries older than the cutoff get pruned.
224  *
225  * The histogram bins represent hours in power-of-two increments.
226  * 16 bins covers up to four years.
227  */
228 #define	HIST_BINS 16
229 
230 typedef struct ddt_age_histo {
231 	uint64_t dah_entries;
232 	uint64_t dah_age_histo[HIST_BINS];
233 } ddt_age_histo_t;
234 
235 void ddt_prune_walk(spa_t *spa, uint64_t cutoff, ddt_age_histo_t *histogram);
236 
237 #if defined(_KERNEL) || !defined(ZFS_DEBUG)
238 #define	ddt_dump_age_histogram(histo, cutoff)	((void)0)
239 #else
240 static inline void
ddt_dump_age_histogram(ddt_age_histo_t * histogram,uint64_t cutoff)241 ddt_dump_age_histogram(ddt_age_histo_t *histogram, uint64_t cutoff)
242 {
243 	if (histogram->dah_entries == 0)
244 		return;
245 
246 	(void) printf("DDT prune unique class age, %llu hour cutoff\n",
247 	    (u_longlong_t)(gethrestime_sec() - cutoff)/3600);
248 	(void) printf("%5s  %9s  %4s\n", "age", "blocks", "amnt");
249 	(void) printf("%5s  %9s  %4s\n", "-----", "---------", "----");
250 	for (int i = 0; i < HIST_BINS; i++) {
251 		(void) printf("%5d  %9llu %4d%%\n", 1<<i,
252 		    (u_longlong_t)histogram->dah_age_histo[i],
253 		    (int)((histogram->dah_age_histo[i] * 100) /
254 		    histogram->dah_entries));
255 	}
256 }
257 #endif
258 
259 /*
260  * Enough room to expand DMU_POOL_DDT format for all possible DDT
261  * checksum/class/type combinations.
262  */
263 #define	DDT_NAMELEN	32
264 
265 extern uint64_t ddt_phys_total_refcnt(const ddt_t *ddt,
266     const ddt_univ_phys_t *ddp);
267 
268 extern void ddt_key_fill(ddt_key_t *ddk, const blkptr_t *bp);
269 
270 extern void ddt_object_name(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
271     char *name);
272 extern int ddt_object_walk(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
273     uint64_t *walk, ddt_lightweight_entry_t *ddlwe);
274 extern int ddt_object_count(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
275     uint64_t *count);
276 extern int ddt_object_info(ddt_t *ddt, ddt_type_t type, ddt_class_t clazz,
277     dmu_object_info_t *);
278 
279 #ifdef	__cplusplus
280 }
281 #endif
282 
283 #endif	/* _SYS_DDT_H */
284