xref: /freebsd/sys/contrib/openzfs/module/zfs/ddt_zap.c (revision 4b15965daa99044daf184221b7c283bf7f2d7e66)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2018 by Delphix. All rights reserved.
26  * Copyright (c) 2023, Klara Inc.
27  */
28 
29 #include <sys/zfs_context.h>
30 #include <sys/spa.h>
31 #include <sys/zio.h>
32 #include <sys/ddt.h>
33 #include <sys/ddt_impl.h>
34 #include <sys/zap.h>
35 #include <sys/dmu_tx.h>
36 #include <sys/zio_compress.h>
37 
38 static unsigned int ddt_zap_default_bs = 15;
39 static unsigned int ddt_zap_default_ibs = 15;
40 
41 #define	DDT_ZAP_COMPRESS_BYTEORDER_MASK	0x80
42 #define	DDT_ZAP_COMPRESS_FUNCTION_MASK	0x7f
43 
44 #define	DDT_KEY_WORDS	(sizeof (ddt_key_t) / sizeof (uint64_t))
45 
46 static size_t
47 ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len)
48 {
49 	uchar_t *version = dst++;
50 	int cpfunc = ZIO_COMPRESS_ZLE;
51 	zio_compress_info_t *ci = &zio_compress_table[cpfunc];
52 	size_t c_len;
53 
54 	ASSERT3U(d_len, >=, s_len + 1);	/* no compression plus version byte */
55 
56 	/* Call compress function directly to avoid hole detection. */
57 	abd_t sabd, dabd;
58 	abd_get_from_buf_struct(&sabd, (void *)src, s_len);
59 	abd_get_from_buf_struct(&dabd, dst, d_len);
60 	c_len = ci->ci_compress(&sabd, &dabd, s_len, d_len - 1, ci->ci_level);
61 	abd_free(&dabd);
62 	abd_free(&sabd);
63 
64 	if (c_len == s_len) {
65 		cpfunc = ZIO_COMPRESS_OFF;
66 		memcpy(dst, src, s_len);
67 	}
68 
69 	*version = cpfunc;
70 	if (ZFS_HOST_BYTEORDER)
71 		*version |= DDT_ZAP_COMPRESS_BYTEORDER_MASK;
72 
73 	return (c_len + 1);
74 }
75 
76 static void
77 ddt_zap_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len)
78 {
79 	uchar_t version = *src++;
80 	int cpfunc = version & DDT_ZAP_COMPRESS_FUNCTION_MASK;
81 
82 	if (zio_compress_table[cpfunc].ci_decompress == NULL) {
83 		memcpy(dst, src, d_len);
84 		return;
85 	}
86 
87 	abd_t sabd, dabd;
88 	abd_get_from_buf_struct(&sabd, src, s_len);
89 	abd_get_from_buf_struct(&dabd, dst, d_len);
90 	VERIFY0(zio_decompress_data(cpfunc, &sabd, &dabd, s_len, d_len, NULL));
91 	abd_free(&dabd);
92 	abd_free(&sabd);
93 
94 	if (((version & DDT_ZAP_COMPRESS_BYTEORDER_MASK) != 0) !=
95 	    (ZFS_HOST_BYTEORDER != 0))
96 		byteswap_uint64_array(dst, d_len);
97 }
98 
99 static int
100 ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash)
101 {
102 	zap_flags_t flags = ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY;
103 
104 	if (prehash)
105 		flags |= ZAP_FLAG_PRE_HASHED_KEY;
106 
107 	*objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP,
108 	    ddt_zap_default_bs, ddt_zap_default_ibs,
109 	    DMU_OT_NONE, 0, tx);
110 	if (*objectp == 0)
111 		return (SET_ERROR(ENOTSUP));
112 
113 	return (0);
114 }
115 
116 static int
117 ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx)
118 {
119 	return (zap_destroy(os, object, tx));
120 }
121 
122 static int
123 ddt_zap_lookup(objset_t *os, uint64_t object,
124     const ddt_key_t *ddk, void *phys, size_t psize)
125 {
126 	uchar_t *cbuf;
127 	uint64_t one, csize;
128 	int error;
129 
130 	error = zap_length_uint64(os, object, (uint64_t *)ddk,
131 	    DDT_KEY_WORDS, &one, &csize);
132 	if (error)
133 		return (error);
134 
135 	ASSERT3U(one, ==, 1);
136 	ASSERT3U(csize, <=, psize + 1);
137 
138 	cbuf = kmem_alloc(csize, KM_SLEEP);
139 
140 	error = zap_lookup_uint64(os, object, (uint64_t *)ddk,
141 	    DDT_KEY_WORDS, 1, csize, cbuf);
142 	if (error == 0)
143 		ddt_zap_decompress(cbuf, phys, csize, psize);
144 
145 	kmem_free(cbuf, csize);
146 
147 	return (error);
148 }
149 
150 static int
151 ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk)
152 {
153 	return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS,
154 	    NULL, NULL));
155 }
156 
157 static void
158 ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk)
159 {
160 	(void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS);
161 }
162 
163 static void
164 ddt_zap_prefetch_all(objset_t *os, uint64_t object)
165 {
166 	(void) zap_prefetch_object(os, object);
167 }
168 
169 static int
170 ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,
171     const void *phys, size_t psize, dmu_tx_t *tx)
172 {
173 	const size_t cbuf_size = psize + 1;
174 
175 	uchar_t *cbuf = kmem_alloc(cbuf_size, KM_SLEEP);
176 
177 	uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size);
178 
179 	int error = zap_update_uint64(os, object, (uint64_t *)ddk,
180 	    DDT_KEY_WORDS, 1, csize, cbuf, tx);
181 
182 	kmem_free(cbuf, cbuf_size);
183 
184 	return (error);
185 }
186 
187 static int
188 ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk,
189     dmu_tx_t *tx)
190 {
191 	return (zap_remove_uint64(os, object, (uint64_t *)ddk,
192 	    DDT_KEY_WORDS, tx));
193 }
194 
195 static int
196 ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,
197     void *phys, size_t psize)
198 {
199 	zap_cursor_t zc;
200 	zap_attribute_t *za;
201 	int error;
202 
203 	za = zap_attribute_alloc();
204 	if (*walk == 0) {
205 		/*
206 		 * We don't want to prefetch the entire ZAP object, because
207 		 * it can be enormous.  Also the primary use of DDT iteration
208 		 * is for scrubbing, in which case we will be issuing many
209 		 * scrub I/Os for each ZAP block that we read in, so
210 		 * reading the ZAP is unlikely to be the bottleneck.
211 		 */
212 		zap_cursor_init_noprefetch(&zc, os, object);
213 	} else {
214 		zap_cursor_init_serialized(&zc, os, object, *walk);
215 	}
216 	if ((error = zap_cursor_retrieve(&zc, za)) == 0) {
217 		uint64_t csize = za->za_num_integers;
218 
219 		ASSERT3U(za->za_integer_length, ==, 1);
220 		ASSERT3U(csize, <=, psize + 1);
221 
222 		uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP);
223 
224 		error = zap_lookup_uint64(os, object, (uint64_t *)za->za_name,
225 		    DDT_KEY_WORDS, 1, csize, cbuf);
226 		ASSERT0(error);
227 		if (error == 0) {
228 			ddt_zap_decompress(cbuf, phys, csize, psize);
229 			*ddk = *(ddt_key_t *)za->za_name;
230 		}
231 
232 		kmem_free(cbuf, csize);
233 
234 		zap_cursor_advance(&zc);
235 		*walk = zap_cursor_serialize(&zc);
236 	}
237 	zap_cursor_fini(&zc);
238 	zap_attribute_free(za);
239 	return (error);
240 }
241 
242 static int
243 ddt_zap_count(objset_t *os, uint64_t object, uint64_t *count)
244 {
245 	return (zap_count(os, object, count));
246 }
247 
248 const ddt_ops_t ddt_zap_ops = {
249 	"zap",
250 	ddt_zap_create,
251 	ddt_zap_destroy,
252 	ddt_zap_lookup,
253 	ddt_zap_contains,
254 	ddt_zap_prefetch,
255 	ddt_zap_prefetch_all,
256 	ddt_zap_update,
257 	ddt_zap_remove,
258 	ddt_zap_walk,
259 	ddt_zap_count,
260 };
261 
262 ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_bs, UINT, ZMOD_RW,
263 	"DDT ZAP leaf blockshift");
264 ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_ibs, UINT, ZMOD_RW,
265 	"DDT ZAP indirect blockshift");
266