1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 25 * Copyright (c) 2018 by Delphix. All rights reserved. 26 * Copyright (c) 2023, Klara Inc. 27 */ 28 29 #include <sys/zfs_context.h> 30 #include <sys/spa.h> 31 #include <sys/zio.h> 32 #include <sys/ddt.h> 33 #include <sys/ddt_impl.h> 34 #include <sys/zap.h> 35 #include <sys/dmu_tx.h> 36 #include <sys/zio_compress.h> 37 38 static unsigned int ddt_zap_default_bs = 15; 39 static unsigned int ddt_zap_default_ibs = 15; 40 41 #define DDT_ZAP_COMPRESS_BYTEORDER_MASK 0x80 42 #define DDT_ZAP_COMPRESS_FUNCTION_MASK 0x7f 43 44 #define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t)) 45 46 static size_t 47 ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len) 48 { 49 uchar_t *version = dst++; 50 int cpfunc = ZIO_COMPRESS_ZLE; 51 zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 52 size_t c_len; 53 54 ASSERT3U(d_len, >=, s_len + 1); /* no compression plus version byte */ 55 56 /* Call compress function directly to avoid hole detection. */ 57 abd_t sabd, dabd; 58 abd_get_from_buf_struct(&sabd, (void *)src, s_len); 59 abd_get_from_buf_struct(&dabd, dst, d_len); 60 c_len = ci->ci_compress(&sabd, &dabd, s_len, d_len - 1, ci->ci_level); 61 abd_free(&dabd); 62 abd_free(&sabd); 63 64 if (c_len == s_len) { 65 cpfunc = ZIO_COMPRESS_OFF; 66 memcpy(dst, src, s_len); 67 } 68 69 *version = cpfunc; 70 if (ZFS_HOST_BYTEORDER) 71 *version |= DDT_ZAP_COMPRESS_BYTEORDER_MASK; 72 73 return (c_len + 1); 74 } 75 76 static void 77 ddt_zap_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len) 78 { 79 uchar_t version = *src++; 80 int cpfunc = version & DDT_ZAP_COMPRESS_FUNCTION_MASK; 81 82 if (zio_compress_table[cpfunc].ci_decompress == NULL) { 83 memcpy(dst, src, d_len); 84 return; 85 } 86 87 abd_t sabd, dabd; 88 abd_get_from_buf_struct(&sabd, src, s_len); 89 abd_get_from_buf_struct(&dabd, dst, d_len); 90 VERIFY0(zio_decompress_data(cpfunc, &sabd, &dabd, s_len, d_len, NULL)); 91 abd_free(&dabd); 92 abd_free(&sabd); 93 94 if (((version & DDT_ZAP_COMPRESS_BYTEORDER_MASK) != 0) != 95 (ZFS_HOST_BYTEORDER != 0)) 96 byteswap_uint64_array(dst, d_len); 97 } 98 99 static int 100 ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash) 101 { 102 zap_flags_t flags = ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY; 103 104 if (prehash) 105 flags |= ZAP_FLAG_PRE_HASHED_KEY; 106 107 *objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP, 108 ddt_zap_default_bs, ddt_zap_default_ibs, 109 DMU_OT_NONE, 0, tx); 110 if (*objectp == 0) 111 return (SET_ERROR(ENOTSUP)); 112 113 return (0); 114 } 115 116 static int 117 ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx) 118 { 119 return (zap_destroy(os, object, tx)); 120 } 121 122 static int 123 ddt_zap_lookup(objset_t *os, uint64_t object, 124 const ddt_key_t *ddk, void *phys, size_t psize) 125 { 126 uchar_t *cbuf; 127 uint64_t one, csize; 128 int error; 129 130 error = zap_length_uint64(os, object, (uint64_t *)ddk, 131 DDT_KEY_WORDS, &one, &csize); 132 if (error) 133 return (error); 134 135 ASSERT3U(one, ==, 1); 136 ASSERT3U(csize, <=, psize + 1); 137 138 cbuf = kmem_alloc(csize, KM_SLEEP); 139 140 error = zap_lookup_uint64(os, object, (uint64_t *)ddk, 141 DDT_KEY_WORDS, 1, csize, cbuf); 142 if (error == 0) 143 ddt_zap_decompress(cbuf, phys, csize, psize); 144 145 kmem_free(cbuf, csize); 146 147 return (error); 148 } 149 150 static int 151 ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk) 152 { 153 return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS, 154 NULL, NULL)); 155 } 156 157 static void 158 ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk) 159 { 160 (void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS); 161 } 162 163 static void 164 ddt_zap_prefetch_all(objset_t *os, uint64_t object) 165 { 166 (void) zap_prefetch_object(os, object); 167 } 168 169 static int 170 ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk, 171 const void *phys, size_t psize, dmu_tx_t *tx) 172 { 173 const size_t cbuf_size = psize + 1; 174 175 uchar_t *cbuf = kmem_alloc(cbuf_size, KM_SLEEP); 176 177 uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size); 178 179 int error = zap_update_uint64(os, object, (uint64_t *)ddk, 180 DDT_KEY_WORDS, 1, csize, cbuf, tx); 181 182 kmem_free(cbuf, cbuf_size); 183 184 return (error); 185 } 186 187 static int 188 ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk, 189 dmu_tx_t *tx) 190 { 191 return (zap_remove_uint64(os, object, (uint64_t *)ddk, 192 DDT_KEY_WORDS, tx)); 193 } 194 195 static int 196 ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk, 197 void *phys, size_t psize) 198 { 199 zap_cursor_t zc; 200 zap_attribute_t *za; 201 int error; 202 203 za = zap_attribute_alloc(); 204 if (*walk == 0) { 205 /* 206 * We don't want to prefetch the entire ZAP object, because 207 * it can be enormous. Also the primary use of DDT iteration 208 * is for scrubbing, in which case we will be issuing many 209 * scrub I/Os for each ZAP block that we read in, so 210 * reading the ZAP is unlikely to be the bottleneck. 211 */ 212 zap_cursor_init_noprefetch(&zc, os, object); 213 } else { 214 zap_cursor_init_serialized(&zc, os, object, *walk); 215 } 216 if ((error = zap_cursor_retrieve(&zc, za)) == 0) { 217 uint64_t csize = za->za_num_integers; 218 219 ASSERT3U(za->za_integer_length, ==, 1); 220 ASSERT3U(csize, <=, psize + 1); 221 222 uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP); 223 224 error = zap_lookup_uint64(os, object, (uint64_t *)za->za_name, 225 DDT_KEY_WORDS, 1, csize, cbuf); 226 ASSERT0(error); 227 if (error == 0) { 228 ddt_zap_decompress(cbuf, phys, csize, psize); 229 *ddk = *(ddt_key_t *)za->za_name; 230 } 231 232 kmem_free(cbuf, csize); 233 234 zap_cursor_advance(&zc); 235 *walk = zap_cursor_serialize(&zc); 236 } 237 zap_cursor_fini(&zc); 238 zap_attribute_free(za); 239 return (error); 240 } 241 242 static int 243 ddt_zap_count(objset_t *os, uint64_t object, uint64_t *count) 244 { 245 return (zap_count(os, object, count)); 246 } 247 248 const ddt_ops_t ddt_zap_ops = { 249 "zap", 250 ddt_zap_create, 251 ddt_zap_destroy, 252 ddt_zap_lookup, 253 ddt_zap_contains, 254 ddt_zap_prefetch, 255 ddt_zap_prefetch_all, 256 ddt_zap_update, 257 ddt_zap_remove, 258 ddt_zap_walk, 259 ddt_zap_count, 260 }; 261 262 ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_bs, UINT, ZMOD_RW, 263 "DDT ZAP leaf blockshift"); 264 ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_ibs, UINT, ZMOD_RW, 265 "DDT ZAP indirect blockshift"); 266