1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2018 by Delphix. All rights reserved. 25 * Copyright (c) 2023, Klara Inc. 26 */ 27 28 #include <sys/zfs_context.h> 29 #include <sys/spa.h> 30 #include <sys/zio.h> 31 #include <sys/ddt.h> 32 #include <sys/ddt_impl.h> 33 #include <sys/zap.h> 34 #include <sys/dmu_tx.h> 35 #include <sys/zio_compress.h> 36 37 static unsigned int ddt_zap_default_bs = 15; 38 static unsigned int ddt_zap_default_ibs = 15; 39 40 #define DDT_ZAP_COMPRESS_BYTEORDER_MASK 0x80 41 #define DDT_ZAP_COMPRESS_FUNCTION_MASK 0x7f 42 43 #define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t)) 44 45 static size_t 46 ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len) 47 { 48 uchar_t *version = dst++; 49 int cpfunc = ZIO_COMPRESS_ZLE; 50 zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 51 size_t c_len; 52 53 ASSERT3U(d_len, >=, s_len + 1); /* no compression plus version byte */ 54 55 /* Call compress function directly to avoid hole detection. */ 56 abd_t sabd, dabd; 57 abd_get_from_buf_struct(&sabd, (void *)src, s_len); 58 abd_get_from_buf_struct(&dabd, dst, d_len); 59 c_len = ci->ci_compress(&sabd, &dabd, s_len, d_len - 1, ci->ci_level); 60 abd_free(&dabd); 61 abd_free(&sabd); 62 63 if (c_len == s_len) { 64 cpfunc = ZIO_COMPRESS_OFF; 65 memcpy(dst, src, s_len); 66 } 67 68 *version = cpfunc; 69 if (ZFS_HOST_BYTEORDER) 70 *version |= DDT_ZAP_COMPRESS_BYTEORDER_MASK; 71 72 return (c_len + 1); 73 } 74 75 static void 76 ddt_zap_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len) 77 { 78 uchar_t version = *src++; 79 int cpfunc = version & DDT_ZAP_COMPRESS_FUNCTION_MASK; 80 81 if (zio_compress_table[cpfunc].ci_decompress == NULL) { 82 memcpy(dst, src, d_len); 83 return; 84 } 85 86 abd_t sabd, dabd; 87 abd_get_from_buf_struct(&sabd, src, s_len); 88 abd_get_from_buf_struct(&dabd, dst, d_len); 89 VERIFY0(zio_decompress_data(cpfunc, &sabd, &dabd, s_len, d_len, NULL)); 90 abd_free(&dabd); 91 abd_free(&sabd); 92 93 if (((version & DDT_ZAP_COMPRESS_BYTEORDER_MASK) != 0) != 94 (ZFS_HOST_BYTEORDER != 0)) 95 byteswap_uint64_array(dst, d_len); 96 } 97 98 static int 99 ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash) 100 { 101 zap_flags_t flags = ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY; 102 103 if (prehash) 104 flags |= ZAP_FLAG_PRE_HASHED_KEY; 105 106 *objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP, 107 ddt_zap_default_bs, ddt_zap_default_ibs, 108 DMU_OT_NONE, 0, tx); 109 if (*objectp == 0) 110 return (SET_ERROR(ENOTSUP)); 111 112 return (0); 113 } 114 115 static int 116 ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx) 117 { 118 return (zap_destroy(os, object, tx)); 119 } 120 121 static int 122 ddt_zap_lookup(objset_t *os, uint64_t object, 123 const ddt_key_t *ddk, void *phys, size_t psize) 124 { 125 uchar_t *cbuf; 126 uint64_t one, csize; 127 int error; 128 129 error = zap_length_uint64(os, object, (uint64_t *)ddk, 130 DDT_KEY_WORDS, &one, &csize); 131 if (error) 132 return (error); 133 134 ASSERT3U(one, ==, 1); 135 ASSERT3U(csize, <=, psize + 1); 136 137 cbuf = kmem_alloc(csize, KM_SLEEP); 138 139 error = zap_lookup_uint64(os, object, (uint64_t *)ddk, 140 DDT_KEY_WORDS, 1, csize, cbuf); 141 if (error == 0) 142 ddt_zap_decompress(cbuf, phys, csize, psize); 143 144 kmem_free(cbuf, csize); 145 146 return (error); 147 } 148 149 static int 150 ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk) 151 { 152 return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS, 153 NULL, NULL)); 154 } 155 156 static void 157 ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk) 158 { 159 (void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS); 160 } 161 162 static void 163 ddt_zap_prefetch_all(objset_t *os, uint64_t object) 164 { 165 (void) zap_prefetch_object(os, object); 166 } 167 168 static int 169 ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk, 170 const void *phys, size_t psize, dmu_tx_t *tx) 171 { 172 const size_t cbuf_size = psize + 1; 173 174 uchar_t *cbuf = kmem_alloc(cbuf_size, KM_SLEEP); 175 176 uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size); 177 178 int error = zap_update_uint64(os, object, (uint64_t *)ddk, 179 DDT_KEY_WORDS, 1, csize, cbuf, tx); 180 181 kmem_free(cbuf, cbuf_size); 182 183 return (error); 184 } 185 186 static int 187 ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk, 188 dmu_tx_t *tx) 189 { 190 return (zap_remove_uint64(os, object, (uint64_t *)ddk, 191 DDT_KEY_WORDS, tx)); 192 } 193 194 static int 195 ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk, 196 void *phys, size_t psize) 197 { 198 zap_cursor_t zc; 199 zap_attribute_t *za; 200 int error; 201 202 za = zap_attribute_alloc(); 203 if (*walk == 0) { 204 /* 205 * We don't want to prefetch the entire ZAP object, because 206 * it can be enormous. Also the primary use of DDT iteration 207 * is for scrubbing, in which case we will be issuing many 208 * scrub I/Os for each ZAP block that we read in, so 209 * reading the ZAP is unlikely to be the bottleneck. 210 */ 211 zap_cursor_init_noprefetch(&zc, os, object); 212 } else { 213 zap_cursor_init_serialized(&zc, os, object, *walk); 214 } 215 if ((error = zap_cursor_retrieve(&zc, za)) == 0) { 216 uint64_t csize = za->za_num_integers; 217 218 ASSERT3U(za->za_integer_length, ==, 1); 219 ASSERT3U(csize, <=, psize + 1); 220 221 uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP); 222 223 error = zap_lookup_uint64(os, object, (uint64_t *)za->za_name, 224 DDT_KEY_WORDS, 1, csize, cbuf); 225 ASSERT0(error); 226 if (error == 0) { 227 ddt_zap_decompress(cbuf, phys, csize, psize); 228 *ddk = *(ddt_key_t *)za->za_name; 229 } 230 231 kmem_free(cbuf, csize); 232 233 zap_cursor_advance(&zc); 234 *walk = zap_cursor_serialize(&zc); 235 } 236 zap_cursor_fini(&zc); 237 zap_attribute_free(za); 238 return (error); 239 } 240 241 static int 242 ddt_zap_count(objset_t *os, uint64_t object, uint64_t *count) 243 { 244 return (zap_count(os, object, count)); 245 } 246 247 const ddt_ops_t ddt_zap_ops = { 248 "zap", 249 ddt_zap_create, 250 ddt_zap_destroy, 251 ddt_zap_lookup, 252 ddt_zap_contains, 253 ddt_zap_prefetch, 254 ddt_zap_prefetch_all, 255 ddt_zap_update, 256 ddt_zap_remove, 257 ddt_zap_walk, 258 ddt_zap_count, 259 }; 260 261 ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_bs, UINT, ZMOD_RW, 262 "DDT ZAP leaf blockshift"); 263 ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_ibs, UINT, ZMOD_RW, 264 "DDT ZAP indirect blockshift"); 265