1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2018 by Delphix. All rights reserved. 25 */ 26 27 #include <sys/zfs_context.h> 28 #include <sys/spa.h> 29 #include <sys/zio.h> 30 #include <sys/ddt.h> 31 #include <sys/ddt_impl.h> 32 #include <sys/zap.h> 33 #include <sys/dmu_tx.h> 34 #include <sys/zio_compress.h> 35 36 static unsigned int ddt_zap_default_bs = 15; 37 static unsigned int ddt_zap_default_ibs = 15; 38 39 #define DDT_ZAP_COMPRESS_BYTEORDER_MASK 0x80 40 #define DDT_ZAP_COMPRESS_FUNCTION_MASK 0x7f 41 42 #define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t)) 43 44 static size_t 45 ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len) 46 { 47 uchar_t *version = dst++; 48 int cpfunc = ZIO_COMPRESS_ZLE; 49 zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 50 size_t c_len; 51 52 ASSERT3U(d_len, >=, s_len + 1); /* no compression plus version byte */ 53 54 c_len = ci->ci_compress((void *)src, dst, s_len, d_len - 1, 55 ci->ci_level); 56 57 if (c_len == s_len) { 58 cpfunc = ZIO_COMPRESS_OFF; 59 memcpy(dst, src, s_len); 60 } 61 62 *version = cpfunc; 63 if (ZFS_HOST_BYTEORDER) 64 *version |= DDT_ZAP_COMPRESS_BYTEORDER_MASK; 65 66 return (c_len + 1); 67 } 68 69 static void 70 ddt_zap_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len) 71 { 72 uchar_t version = *src++; 73 int cpfunc = version & DDT_ZAP_COMPRESS_FUNCTION_MASK; 74 zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 75 76 if (ci->ci_decompress != NULL) 77 (void) ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level); 78 else 79 memcpy(dst, src, d_len); 80 81 if (((version & DDT_ZAP_COMPRESS_BYTEORDER_MASK) != 0) != 82 (ZFS_HOST_BYTEORDER != 0)) 83 byteswap_uint64_array(dst, d_len); 84 } 85 86 static int 87 ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash) 88 { 89 zap_flags_t flags = ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY; 90 91 if (prehash) 92 flags |= ZAP_FLAG_PRE_HASHED_KEY; 93 94 *objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP, 95 ddt_zap_default_bs, ddt_zap_default_ibs, 96 DMU_OT_NONE, 0, tx); 97 if (*objectp == 0) 98 return (SET_ERROR(ENOTSUP)); 99 100 return (0); 101 } 102 103 static int 104 ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx) 105 { 106 return (zap_destroy(os, object, tx)); 107 } 108 109 static int 110 ddt_zap_lookup(objset_t *os, uint64_t object, 111 const ddt_key_t *ddk, ddt_phys_t *phys, size_t psize) 112 { 113 uchar_t *cbuf; 114 uint64_t one, csize; 115 int error; 116 117 error = zap_length_uint64(os, object, (uint64_t *)ddk, 118 DDT_KEY_WORDS, &one, &csize); 119 if (error) 120 return (error); 121 122 ASSERT3U(one, ==, 1); 123 ASSERT3U(csize, <=, psize + 1); 124 125 cbuf = kmem_alloc(csize, KM_SLEEP); 126 127 error = zap_lookup_uint64(os, object, (uint64_t *)ddk, 128 DDT_KEY_WORDS, 1, csize, cbuf); 129 if (error == 0) 130 ddt_zap_decompress(cbuf, phys, csize, psize); 131 132 kmem_free(cbuf, csize); 133 134 return (error); 135 } 136 137 static int 138 ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk) 139 { 140 return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS, 141 NULL, NULL)); 142 } 143 144 static void 145 ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk) 146 { 147 (void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS); 148 } 149 150 static void 151 ddt_zap_prefetch_all(objset_t *os, uint64_t object) 152 { 153 (void) zap_prefetch_object(os, object); 154 } 155 156 static int 157 ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk, 158 const ddt_phys_t *phys, size_t psize, dmu_tx_t *tx) 159 { 160 const size_t cbuf_size = psize + 1; 161 162 uchar_t *cbuf = kmem_alloc(cbuf_size, KM_SLEEP); 163 164 uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size); 165 166 int error = zap_update_uint64(os, object, (uint64_t *)ddk, 167 DDT_KEY_WORDS, 1, csize, cbuf, tx); 168 169 kmem_free(cbuf, cbuf_size); 170 171 return (error); 172 } 173 174 static int 175 ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk, 176 dmu_tx_t *tx) 177 { 178 return (zap_remove_uint64(os, object, (uint64_t *)ddk, 179 DDT_KEY_WORDS, tx)); 180 } 181 182 static int 183 ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk, 184 ddt_phys_t *phys, size_t psize) 185 { 186 zap_cursor_t zc; 187 zap_attribute_t za; 188 int error; 189 190 if (*walk == 0) { 191 /* 192 * We don't want to prefetch the entire ZAP object, because 193 * it can be enormous. Also the primary use of DDT iteration 194 * is for scrubbing, in which case we will be issuing many 195 * scrub I/Os for each ZAP block that we read in, so 196 * reading the ZAP is unlikely to be the bottleneck. 197 */ 198 zap_cursor_init_noprefetch(&zc, os, object); 199 } else { 200 zap_cursor_init_serialized(&zc, os, object, *walk); 201 } 202 if ((error = zap_cursor_retrieve(&zc, &za)) == 0) { 203 uint64_t csize = za.za_num_integers; 204 205 ASSERT3U(za.za_integer_length, ==, 1); 206 ASSERT3U(csize, <=, psize + 1); 207 208 uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP); 209 210 error = zap_lookup_uint64(os, object, (uint64_t *)za.za_name, 211 DDT_KEY_WORDS, 1, csize, cbuf); 212 ASSERT0(error); 213 if (error == 0) { 214 ddt_zap_decompress(cbuf, phys, csize, psize); 215 *ddk = *(ddt_key_t *)za.za_name; 216 } 217 218 kmem_free(cbuf, csize); 219 220 zap_cursor_advance(&zc); 221 *walk = zap_cursor_serialize(&zc); 222 } 223 zap_cursor_fini(&zc); 224 return (error); 225 } 226 227 static int 228 ddt_zap_count(objset_t *os, uint64_t object, uint64_t *count) 229 { 230 return (zap_count(os, object, count)); 231 } 232 233 const ddt_ops_t ddt_zap_ops = { 234 "zap", 235 ddt_zap_create, 236 ddt_zap_destroy, 237 ddt_zap_lookup, 238 ddt_zap_contains, 239 ddt_zap_prefetch, 240 ddt_zap_prefetch_all, 241 ddt_zap_update, 242 ddt_zap_remove, 243 ddt_zap_walk, 244 ddt_zap_count, 245 }; 246 247 /* BEGIN CSTYLED */ 248 ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_bs, UINT, ZMOD_RW, 249 "DDT ZAP leaf blockshift"); 250 ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_ibs, UINT, ZMOD_RW, 251 "DDT ZAP indirect blockshift"); 252 /* END CSTYLED */ 253