xref: /freebsd/sys/contrib/openzfs/module/zfs/ddt_zap.c (revision 4e62c3cafa4c4e41efd6f87b7fe559cf819cf3e4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2018 by Delphix. All rights reserved.
25  * Copyright (c) 2023, Klara Inc.
26  */
27 
28 #include <sys/zfs_context.h>
29 #include <sys/spa.h>
30 #include <sys/zio.h>
31 #include <sys/ddt.h>
32 #include <sys/ddt_impl.h>
33 #include <sys/zap.h>
34 #include <sys/dmu_tx.h>
35 #include <sys/zio_compress.h>
36 
37 static unsigned int ddt_zap_default_bs = 15;
38 static unsigned int ddt_zap_default_ibs = 15;
39 
40 #define	DDT_ZAP_COMPRESS_BYTEORDER_MASK	0x80
41 #define	DDT_ZAP_COMPRESS_FUNCTION_MASK	0x7f
42 
43 #define	DDT_KEY_WORDS	(sizeof (ddt_key_t) / sizeof (uint64_t))
44 
45 static size_t
46 ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len)
47 {
48 	uchar_t *version = dst++;
49 	int cpfunc = ZIO_COMPRESS_ZLE;
50 	zio_compress_info_t *ci = &zio_compress_table[cpfunc];
51 	size_t c_len;
52 
53 	ASSERT3U(d_len, >=, s_len + 1);	/* no compression plus version byte */
54 
55 	/* Call compress function directly to avoid hole detection. */
56 	abd_t sabd, dabd;
57 	abd_get_from_buf_struct(&sabd, (void *)src, s_len);
58 	abd_get_from_buf_struct(&dabd, dst, d_len);
59 	c_len = ci->ci_compress(&sabd, &dabd, s_len, d_len - 1, ci->ci_level);
60 	abd_free(&dabd);
61 	abd_free(&sabd);
62 
63 	if (c_len == s_len) {
64 		cpfunc = ZIO_COMPRESS_OFF;
65 		memcpy(dst, src, s_len);
66 	}
67 
68 	*version = cpfunc;
69 	if (ZFS_HOST_BYTEORDER)
70 		*version |= DDT_ZAP_COMPRESS_BYTEORDER_MASK;
71 
72 	return (c_len + 1);
73 }
74 
75 static void
76 ddt_zap_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len)
77 {
78 	uchar_t version = *src++;
79 	int cpfunc = version & DDT_ZAP_COMPRESS_FUNCTION_MASK;
80 
81 	if (zio_compress_table[cpfunc].ci_decompress == NULL) {
82 		memcpy(dst, src, d_len);
83 		return;
84 	}
85 
86 	abd_t sabd, dabd;
87 	abd_get_from_buf_struct(&sabd, src, s_len);
88 	abd_get_from_buf_struct(&dabd, dst, d_len);
89 	VERIFY0(zio_decompress_data(cpfunc, &sabd, &dabd, s_len, d_len, NULL));
90 	abd_free(&dabd);
91 	abd_free(&sabd);
92 
93 	if (((version & DDT_ZAP_COMPRESS_BYTEORDER_MASK) != 0) !=
94 	    (ZFS_HOST_BYTEORDER != 0))
95 		byteswap_uint64_array(dst, d_len);
96 }
97 
98 static int
99 ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash)
100 {
101 	zap_flags_t flags = ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY;
102 
103 	if (prehash)
104 		flags |= ZAP_FLAG_PRE_HASHED_KEY;
105 
106 	*objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP,
107 	    ddt_zap_default_bs, ddt_zap_default_ibs,
108 	    DMU_OT_NONE, 0, tx);
109 	if (*objectp == 0)
110 		return (SET_ERROR(ENOTSUP));
111 
112 	return (0);
113 }
114 
115 static int
116 ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx)
117 {
118 	return (zap_destroy(os, object, tx));
119 }
120 
121 static int
122 ddt_zap_lookup(objset_t *os, uint64_t object,
123     const ddt_key_t *ddk, void *phys, size_t psize)
124 {
125 	uchar_t *cbuf;
126 	uint64_t one, csize;
127 	int error;
128 
129 	error = zap_length_uint64(os, object, (uint64_t *)ddk,
130 	    DDT_KEY_WORDS, &one, &csize);
131 	if (error)
132 		return (error);
133 
134 	ASSERT3U(one, ==, 1);
135 	ASSERT3U(csize, <=, psize + 1);
136 
137 	cbuf = kmem_alloc(csize, KM_SLEEP);
138 
139 	error = zap_lookup_uint64(os, object, (uint64_t *)ddk,
140 	    DDT_KEY_WORDS, 1, csize, cbuf);
141 	if (error == 0)
142 		ddt_zap_decompress(cbuf, phys, csize, psize);
143 
144 	kmem_free(cbuf, csize);
145 
146 	return (error);
147 }
148 
149 static int
150 ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk)
151 {
152 	return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS,
153 	    NULL, NULL));
154 }
155 
156 static void
157 ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk)
158 {
159 	(void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS);
160 }
161 
162 static void
163 ddt_zap_prefetch_all(objset_t *os, uint64_t object)
164 {
165 	(void) zap_prefetch_object(os, object);
166 }
167 
168 static int
169 ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,
170     const void *phys, size_t psize, dmu_tx_t *tx)
171 {
172 	const size_t cbuf_size = psize + 1;
173 
174 	uchar_t *cbuf = kmem_alloc(cbuf_size, KM_SLEEP);
175 
176 	uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size);
177 
178 	int error = zap_update_uint64(os, object, (uint64_t *)ddk,
179 	    DDT_KEY_WORDS, 1, csize, cbuf, tx);
180 
181 	kmem_free(cbuf, cbuf_size);
182 
183 	return (error);
184 }
185 
186 static int
187 ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk,
188     dmu_tx_t *tx)
189 {
190 	return (zap_remove_uint64(os, object, (uint64_t *)ddk,
191 	    DDT_KEY_WORDS, tx));
192 }
193 
194 static int
195 ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,
196     void *phys, size_t psize)
197 {
198 	zap_cursor_t zc;
199 	zap_attribute_t *za;
200 	int error;
201 
202 	za = zap_attribute_alloc();
203 	if (*walk == 0) {
204 		/*
205 		 * We don't want to prefetch the entire ZAP object, because
206 		 * it can be enormous.  Also the primary use of DDT iteration
207 		 * is for scrubbing, in which case we will be issuing many
208 		 * scrub I/Os for each ZAP block that we read in, so
209 		 * reading the ZAP is unlikely to be the bottleneck.
210 		 */
211 		zap_cursor_init_noprefetch(&zc, os, object);
212 	} else {
213 		zap_cursor_init_serialized(&zc, os, object, *walk);
214 	}
215 	if ((error = zap_cursor_retrieve(&zc, za)) == 0) {
216 		uint64_t csize = za->za_num_integers;
217 
218 		ASSERT3U(za->za_integer_length, ==, 1);
219 		ASSERT3U(csize, <=, psize + 1);
220 
221 		uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP);
222 
223 		error = zap_lookup_uint64(os, object, (uint64_t *)za->za_name,
224 		    DDT_KEY_WORDS, 1, csize, cbuf);
225 		ASSERT0(error);
226 		if (error == 0) {
227 			ddt_zap_decompress(cbuf, phys, csize, psize);
228 			*ddk = *(ddt_key_t *)za->za_name;
229 		}
230 
231 		kmem_free(cbuf, csize);
232 
233 		zap_cursor_advance(&zc);
234 		*walk = zap_cursor_serialize(&zc);
235 	}
236 	zap_cursor_fini(&zc);
237 	zap_attribute_free(za);
238 	return (error);
239 }
240 
241 static int
242 ddt_zap_count(objset_t *os, uint64_t object, uint64_t *count)
243 {
244 	return (zap_count(os, object, count));
245 }
246 
247 const ddt_ops_t ddt_zap_ops = {
248 	"zap",
249 	ddt_zap_create,
250 	ddt_zap_destroy,
251 	ddt_zap_lookup,
252 	ddt_zap_contains,
253 	ddt_zap_prefetch,
254 	ddt_zap_prefetch_all,
255 	ddt_zap_update,
256 	ddt_zap_remove,
257 	ddt_zap_walk,
258 	ddt_zap_count,
259 };
260 
261 ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_bs, UINT, ZMOD_RW,
262 	"DDT ZAP leaf blockshift");
263 ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_ibs, UINT, ZMOD_RW,
264 	"DDT ZAP indirect blockshift");
265