xref: /freebsd/sys/contrib/openzfs/module/zfs/ddt_zap.c (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1*61145dc2SMartin Matuska // SPDX-License-Identifier: CDDL-1.0
2eda14cbcSMatt Macy /*
3eda14cbcSMatt Macy  * CDDL HEADER START
4eda14cbcSMatt Macy  *
5eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
6eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
7eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
8eda14cbcSMatt Macy  *
9eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10271171e0SMartin Matuska  * or https://opensource.org/licenses/CDDL-1.0.
11eda14cbcSMatt Macy  * See the License for the specific language governing permissions
12eda14cbcSMatt Macy  * and limitations under the License.
13eda14cbcSMatt Macy  *
14eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
15eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
17eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
18eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
19eda14cbcSMatt Macy  *
20eda14cbcSMatt Macy  * CDDL HEADER END
21eda14cbcSMatt Macy  */
22eda14cbcSMatt Macy 
23eda14cbcSMatt Macy /*
24eda14cbcSMatt Macy  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
25eda14cbcSMatt Macy  * Copyright (c) 2018 by Delphix. All rights reserved.
26e2df9bb4SMartin Matuska  * Copyright (c) 2023, Klara Inc.
27eda14cbcSMatt Macy  */
28eda14cbcSMatt Macy 
29eda14cbcSMatt Macy #include <sys/zfs_context.h>
30eda14cbcSMatt Macy #include <sys/spa.h>
31eda14cbcSMatt Macy #include <sys/zio.h>
32eda14cbcSMatt Macy #include <sys/ddt.h>
334fefe1b7SMartin Matuska #include <sys/ddt_impl.h>
34eda14cbcSMatt Macy #include <sys/zap.h>
35eda14cbcSMatt Macy #include <sys/dmu_tx.h>
364fefe1b7SMartin Matuska #include <sys/zio_compress.h>
37eda14cbcSMatt Macy 
380a97523dSMartin Matuska static unsigned int ddt_zap_default_bs = 15;
390a97523dSMartin Matuska static unsigned int ddt_zap_default_ibs = 15;
40eda14cbcSMatt Macy 
414fefe1b7SMartin Matuska #define	DDT_ZAP_COMPRESS_BYTEORDER_MASK	0x80
424fefe1b7SMartin Matuska #define	DDT_ZAP_COMPRESS_FUNCTION_MASK	0x7f
434fefe1b7SMartin Matuska 
444fefe1b7SMartin Matuska #define	DDT_KEY_WORDS	(sizeof (ddt_key_t) / sizeof (uint64_t))
454fefe1b7SMartin Matuska 
464fefe1b7SMartin Matuska static size_t
ddt_zap_compress(const void * src,uchar_t * dst,size_t s_len,size_t d_len)474fefe1b7SMartin Matuska ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len)
484fefe1b7SMartin Matuska {
494fefe1b7SMartin Matuska 	uchar_t *version = dst++;
504fefe1b7SMartin Matuska 	int cpfunc = ZIO_COMPRESS_ZLE;
514fefe1b7SMartin Matuska 	zio_compress_info_t *ci = &zio_compress_table[cpfunc];
524fefe1b7SMartin Matuska 	size_t c_len;
534fefe1b7SMartin Matuska 
544fefe1b7SMartin Matuska 	ASSERT3U(d_len, >=, s_len + 1);	/* no compression plus version byte */
554fefe1b7SMartin Matuska 
56e2df9bb4SMartin Matuska 	/* Call compress function directly to avoid hole detection. */
57e2df9bb4SMartin Matuska 	abd_t sabd, dabd;
58e2df9bb4SMartin Matuska 	abd_get_from_buf_struct(&sabd, (void *)src, s_len);
59e2df9bb4SMartin Matuska 	abd_get_from_buf_struct(&dabd, dst, d_len);
60e2df9bb4SMartin Matuska 	c_len = ci->ci_compress(&sabd, &dabd, s_len, d_len - 1, ci->ci_level);
61e2df9bb4SMartin Matuska 	abd_free(&dabd);
62e2df9bb4SMartin Matuska 	abd_free(&sabd);
634fefe1b7SMartin Matuska 
644fefe1b7SMartin Matuska 	if (c_len == s_len) {
654fefe1b7SMartin Matuska 		cpfunc = ZIO_COMPRESS_OFF;
664fefe1b7SMartin Matuska 		memcpy(dst, src, s_len);
674fefe1b7SMartin Matuska 	}
684fefe1b7SMartin Matuska 
694fefe1b7SMartin Matuska 	*version = cpfunc;
704fefe1b7SMartin Matuska 	if (ZFS_HOST_BYTEORDER)
714fefe1b7SMartin Matuska 		*version |= DDT_ZAP_COMPRESS_BYTEORDER_MASK;
724fefe1b7SMartin Matuska 
734fefe1b7SMartin Matuska 	return (c_len + 1);
744fefe1b7SMartin Matuska }
754fefe1b7SMartin Matuska 
764fefe1b7SMartin Matuska static void
ddt_zap_decompress(uchar_t * src,void * dst,size_t s_len,size_t d_len)774fefe1b7SMartin Matuska ddt_zap_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len)
784fefe1b7SMartin Matuska {
794fefe1b7SMartin Matuska 	uchar_t version = *src++;
804fefe1b7SMartin Matuska 	int cpfunc = version & DDT_ZAP_COMPRESS_FUNCTION_MASK;
814fefe1b7SMartin Matuska 
82e2df9bb4SMartin Matuska 	if (zio_compress_table[cpfunc].ci_decompress == NULL) {
834fefe1b7SMartin Matuska 		memcpy(dst, src, d_len);
84e2df9bb4SMartin Matuska 		return;
85e2df9bb4SMartin Matuska 	}
86e2df9bb4SMartin Matuska 
87e2df9bb4SMartin Matuska 	abd_t sabd, dabd;
88e2df9bb4SMartin Matuska 	abd_get_from_buf_struct(&sabd, src, s_len);
89e2df9bb4SMartin Matuska 	abd_get_from_buf_struct(&dabd, dst, d_len);
90e2df9bb4SMartin Matuska 	VERIFY0(zio_decompress_data(cpfunc, &sabd, &dabd, s_len, d_len, NULL));
91e2df9bb4SMartin Matuska 	abd_free(&dabd);
92e2df9bb4SMartin Matuska 	abd_free(&sabd);
934fefe1b7SMartin Matuska 
944fefe1b7SMartin Matuska 	if (((version & DDT_ZAP_COMPRESS_BYTEORDER_MASK) != 0) !=
954fefe1b7SMartin Matuska 	    (ZFS_HOST_BYTEORDER != 0))
964fefe1b7SMartin Matuska 		byteswap_uint64_array(dst, d_len);
974fefe1b7SMartin Matuska }
984fefe1b7SMartin Matuska 
99eda14cbcSMatt Macy static int
ddt_zap_create(objset_t * os,uint64_t * objectp,dmu_tx_t * tx,boolean_t prehash)100eda14cbcSMatt Macy ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash)
101eda14cbcSMatt Macy {
102eda14cbcSMatt Macy 	zap_flags_t flags = ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY;
103eda14cbcSMatt Macy 
104eda14cbcSMatt Macy 	if (prehash)
105eda14cbcSMatt Macy 		flags |= ZAP_FLAG_PRE_HASHED_KEY;
106eda14cbcSMatt Macy 
107eda14cbcSMatt Macy 	*objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP,
1080a97523dSMartin Matuska 	    ddt_zap_default_bs, ddt_zap_default_ibs,
109eda14cbcSMatt Macy 	    DMU_OT_NONE, 0, tx);
1104fefe1b7SMartin Matuska 	if (*objectp == 0)
1114fefe1b7SMartin Matuska 		return (SET_ERROR(ENOTSUP));
112eda14cbcSMatt Macy 
1134fefe1b7SMartin Matuska 	return (0);
114eda14cbcSMatt Macy }
115eda14cbcSMatt Macy 
116eda14cbcSMatt Macy static int
ddt_zap_destroy(objset_t * os,uint64_t object,dmu_tx_t * tx)117eda14cbcSMatt Macy ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx)
118eda14cbcSMatt Macy {
119eda14cbcSMatt Macy 	return (zap_destroy(os, object, tx));
120eda14cbcSMatt Macy }
121eda14cbcSMatt Macy 
122eda14cbcSMatt Macy static int
ddt_zap_lookup(objset_t * os,uint64_t object,const ddt_key_t * ddk,void * phys,size_t psize)1234fefe1b7SMartin Matuska ddt_zap_lookup(objset_t *os, uint64_t object,
124e2df9bb4SMartin Matuska     const ddt_key_t *ddk, void *phys, size_t psize)
125eda14cbcSMatt Macy {
126eda14cbcSMatt Macy 	uchar_t *cbuf;
127eda14cbcSMatt Macy 	uint64_t one, csize;
128eda14cbcSMatt Macy 	int error;
129eda14cbcSMatt Macy 
1304fefe1b7SMartin Matuska 	error = zap_length_uint64(os, object, (uint64_t *)ddk,
131eda14cbcSMatt Macy 	    DDT_KEY_WORDS, &one, &csize);
132eda14cbcSMatt Macy 	if (error)
1334fefe1b7SMartin Matuska 		return (error);
134eda14cbcSMatt Macy 
1354fefe1b7SMartin Matuska 	ASSERT3U(one, ==, 1);
1364fefe1b7SMartin Matuska 	ASSERT3U(csize, <=, psize + 1);
137eda14cbcSMatt Macy 
1384fefe1b7SMartin Matuska 	cbuf = kmem_alloc(csize, KM_SLEEP);
1394fefe1b7SMartin Matuska 
1404fefe1b7SMartin Matuska 	error = zap_lookup_uint64(os, object, (uint64_t *)ddk,
141eda14cbcSMatt Macy 	    DDT_KEY_WORDS, 1, csize, cbuf);
1424fefe1b7SMartin Matuska 	if (error == 0)
1434fefe1b7SMartin Matuska 		ddt_zap_decompress(cbuf, phys, csize, psize);
144eda14cbcSMatt Macy 
1454fefe1b7SMartin Matuska 	kmem_free(cbuf, csize);
146eda14cbcSMatt Macy 
147eda14cbcSMatt Macy 	return (error);
148eda14cbcSMatt Macy }
149eda14cbcSMatt Macy 
1504fefe1b7SMartin Matuska static int
ddt_zap_contains(objset_t * os,uint64_t object,const ddt_key_t * ddk)1514fefe1b7SMartin Matuska ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk)
1524fefe1b7SMartin Matuska {
1534fefe1b7SMartin Matuska 	return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS,
1544fefe1b7SMartin Matuska 	    NULL, NULL));
1554fefe1b7SMartin Matuska }
1564fefe1b7SMartin Matuska 
157eda14cbcSMatt Macy static void
ddt_zap_prefetch(objset_t * os,uint64_t object,const ddt_key_t * ddk)1584fefe1b7SMartin Matuska ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk)
159eda14cbcSMatt Macy {
1604fefe1b7SMartin Matuska 	(void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS);
161eda14cbcSMatt Macy }
162eda14cbcSMatt Macy 
163ce4dcb97SMartin Matuska static void
ddt_zap_prefetch_all(objset_t * os,uint64_t object)164ce4dcb97SMartin Matuska ddt_zap_prefetch_all(objset_t *os, uint64_t object)
165ce4dcb97SMartin Matuska {
166ce4dcb97SMartin Matuska 	(void) zap_prefetch_object(os, object);
167ce4dcb97SMartin Matuska }
168ce4dcb97SMartin Matuska 
169eda14cbcSMatt Macy static int
ddt_zap_update(objset_t * os,uint64_t object,const ddt_key_t * ddk,const void * phys,size_t psize,dmu_tx_t * tx)1704fefe1b7SMartin Matuska ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk,
171e2df9bb4SMartin Matuska     const void *phys, size_t psize, dmu_tx_t *tx)
172eda14cbcSMatt Macy {
1734fefe1b7SMartin Matuska 	const size_t cbuf_size = psize + 1;
174eda14cbcSMatt Macy 
1754fefe1b7SMartin Matuska 	uchar_t *cbuf = kmem_alloc(cbuf_size, KM_SLEEP);
176eda14cbcSMatt Macy 
1774fefe1b7SMartin Matuska 	uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size);
1784fefe1b7SMartin Matuska 
1794fefe1b7SMartin Matuska 	int error = zap_update_uint64(os, object, (uint64_t *)ddk,
1804fefe1b7SMartin Matuska 	    DDT_KEY_WORDS, 1, csize, cbuf, tx);
1814fefe1b7SMartin Matuska 
1824fefe1b7SMartin Matuska 	kmem_free(cbuf, cbuf_size);
1834fefe1b7SMartin Matuska 
1844fefe1b7SMartin Matuska 	return (error);
185eda14cbcSMatt Macy }
186eda14cbcSMatt Macy 
187eda14cbcSMatt Macy static int
ddt_zap_remove(objset_t * os,uint64_t object,const ddt_key_t * ddk,dmu_tx_t * tx)1884fefe1b7SMartin Matuska ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk,
1894fefe1b7SMartin Matuska     dmu_tx_t *tx)
190eda14cbcSMatt Macy {
1914fefe1b7SMartin Matuska 	return (zap_remove_uint64(os, object, (uint64_t *)ddk,
192eda14cbcSMatt Macy 	    DDT_KEY_WORDS, tx));
193eda14cbcSMatt Macy }
194eda14cbcSMatt Macy 
195eda14cbcSMatt Macy static int
ddt_zap_walk(objset_t * os,uint64_t object,uint64_t * walk,ddt_key_t * ddk,void * phys,size_t psize)1964fefe1b7SMartin Matuska ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk,
197e2df9bb4SMartin Matuska     void *phys, size_t psize)
198eda14cbcSMatt Macy {
199eda14cbcSMatt Macy 	zap_cursor_t zc;
2007a7741afSMartin Matuska 	zap_attribute_t *za;
201eda14cbcSMatt Macy 	int error;
202eda14cbcSMatt Macy 
2037a7741afSMartin Matuska 	za = zap_attribute_alloc();
204eda14cbcSMatt Macy 	if (*walk == 0) {
205eda14cbcSMatt Macy 		/*
206eda14cbcSMatt Macy 		 * We don't want to prefetch the entire ZAP object, because
207eda14cbcSMatt Macy 		 * it can be enormous.  Also the primary use of DDT iteration
208eda14cbcSMatt Macy 		 * is for scrubbing, in which case we will be issuing many
209eda14cbcSMatt Macy 		 * scrub I/Os for each ZAP block that we read in, so
210eda14cbcSMatt Macy 		 * reading the ZAP is unlikely to be the bottleneck.
211eda14cbcSMatt Macy 		 */
212eda14cbcSMatt Macy 		zap_cursor_init_noprefetch(&zc, os, object);
213eda14cbcSMatt Macy 	} else {
214eda14cbcSMatt Macy 		zap_cursor_init_serialized(&zc, os, object, *walk);
215eda14cbcSMatt Macy 	}
2167a7741afSMartin Matuska 	if ((error = zap_cursor_retrieve(&zc, za)) == 0) {
2177a7741afSMartin Matuska 		uint64_t csize = za->za_num_integers;
2184fefe1b7SMartin Matuska 
2197a7741afSMartin Matuska 		ASSERT3U(za->za_integer_length, ==, 1);
2204fefe1b7SMartin Matuska 		ASSERT3U(csize, <=, psize + 1);
2214fefe1b7SMartin Matuska 
2224fefe1b7SMartin Matuska 		uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP);
2234fefe1b7SMartin Matuska 
2247a7741afSMartin Matuska 		error = zap_lookup_uint64(os, object, (uint64_t *)za->za_name,
225eda14cbcSMatt Macy 		    DDT_KEY_WORDS, 1, csize, cbuf);
2264fefe1b7SMartin Matuska 		ASSERT0(error);
227eda14cbcSMatt Macy 		if (error == 0) {
2284fefe1b7SMartin Matuska 			ddt_zap_decompress(cbuf, phys, csize, psize);
2297a7741afSMartin Matuska 			*ddk = *(ddt_key_t *)za->za_name;
230eda14cbcSMatt Macy 		}
2314fefe1b7SMartin Matuska 
2324fefe1b7SMartin Matuska 		kmem_free(cbuf, csize);
2334fefe1b7SMartin Matuska 
234eda14cbcSMatt Macy 		zap_cursor_advance(&zc);
235eda14cbcSMatt Macy 		*walk = zap_cursor_serialize(&zc);
236eda14cbcSMatt Macy 	}
237eda14cbcSMatt Macy 	zap_cursor_fini(&zc);
2387a7741afSMartin Matuska 	zap_attribute_free(za);
239eda14cbcSMatt Macy 	return (error);
240eda14cbcSMatt Macy }
241eda14cbcSMatt Macy 
242eda14cbcSMatt Macy static int
ddt_zap_count(objset_t * os,uint64_t object,uint64_t * count)243eda14cbcSMatt Macy ddt_zap_count(objset_t *os, uint64_t object, uint64_t *count)
244eda14cbcSMatt Macy {
245eda14cbcSMatt Macy 	return (zap_count(os, object, count));
246eda14cbcSMatt Macy }
247eda14cbcSMatt Macy 
248eda14cbcSMatt Macy const ddt_ops_t ddt_zap_ops = {
249eda14cbcSMatt Macy 	"zap",
250eda14cbcSMatt Macy 	ddt_zap_create,
251eda14cbcSMatt Macy 	ddt_zap_destroy,
252eda14cbcSMatt Macy 	ddt_zap_lookup,
2534fefe1b7SMartin Matuska 	ddt_zap_contains,
254eda14cbcSMatt Macy 	ddt_zap_prefetch,
255ce4dcb97SMartin Matuska 	ddt_zap_prefetch_all,
256eda14cbcSMatt Macy 	ddt_zap_update,
257eda14cbcSMatt Macy 	ddt_zap_remove,
258eda14cbcSMatt Macy 	ddt_zap_walk,
259eda14cbcSMatt Macy 	ddt_zap_count,
260eda14cbcSMatt Macy };
2610a97523dSMartin Matuska 
2620a97523dSMartin Matuska ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_bs, UINT, ZMOD_RW,
2630a97523dSMartin Matuska 	"DDT ZAP leaf blockshift");
2640a97523dSMartin Matuska ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_ibs, UINT, ZMOD_RW,
2650a97523dSMartin Matuska 	"DDT ZAP indirect blockshift");
266