1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy 22eda14cbcSMatt Macy /* 23eda14cbcSMatt Macy * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24eda14cbcSMatt Macy * Copyright (c) 2018 by Delphix. All rights reserved. 25eda14cbcSMatt Macy */ 26eda14cbcSMatt Macy 27eda14cbcSMatt Macy #include <sys/zfs_context.h> 28eda14cbcSMatt Macy #include <sys/spa.h> 29eda14cbcSMatt Macy #include <sys/zio.h> 30eda14cbcSMatt Macy #include <sys/ddt.h> 314fefe1b7SMartin Matuska #include <sys/ddt_impl.h> 32eda14cbcSMatt Macy #include <sys/zap.h> 33eda14cbcSMatt Macy #include <sys/dmu_tx.h> 344fefe1b7SMartin Matuska #include <sys/zio_compress.h> 35eda14cbcSMatt Macy 360a97523dSMartin Matuska static unsigned int ddt_zap_default_bs = 15; 370a97523dSMartin Matuska static unsigned int ddt_zap_default_ibs = 15; 38eda14cbcSMatt Macy 394fefe1b7SMartin Matuska #define DDT_ZAP_COMPRESS_BYTEORDER_MASK 0x80 404fefe1b7SMartin Matuska #define DDT_ZAP_COMPRESS_FUNCTION_MASK 0x7f 414fefe1b7SMartin Matuska 424fefe1b7SMartin Matuska #define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t)) 434fefe1b7SMartin Matuska 444fefe1b7SMartin Matuska static size_t 454fefe1b7SMartin Matuska ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len) 464fefe1b7SMartin Matuska { 474fefe1b7SMartin Matuska uchar_t *version = dst++; 484fefe1b7SMartin Matuska int cpfunc = ZIO_COMPRESS_ZLE; 494fefe1b7SMartin Matuska zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 504fefe1b7SMartin Matuska size_t c_len; 514fefe1b7SMartin Matuska 524fefe1b7SMartin Matuska ASSERT3U(d_len, >=, s_len + 1); /* no compression plus version byte */ 534fefe1b7SMartin Matuska 544fefe1b7SMartin Matuska c_len = ci->ci_compress((void *)src, dst, s_len, d_len - 1, 554fefe1b7SMartin Matuska ci->ci_level); 564fefe1b7SMartin Matuska 574fefe1b7SMartin Matuska if (c_len == s_len) { 584fefe1b7SMartin Matuska cpfunc = ZIO_COMPRESS_OFF; 594fefe1b7SMartin Matuska memcpy(dst, src, s_len); 604fefe1b7SMartin Matuska } 614fefe1b7SMartin Matuska 624fefe1b7SMartin Matuska *version = cpfunc; 634fefe1b7SMartin Matuska if (ZFS_HOST_BYTEORDER) 644fefe1b7SMartin Matuska *version |= DDT_ZAP_COMPRESS_BYTEORDER_MASK; 654fefe1b7SMartin Matuska 664fefe1b7SMartin Matuska return (c_len + 1); 674fefe1b7SMartin Matuska } 684fefe1b7SMartin Matuska 694fefe1b7SMartin Matuska static void 704fefe1b7SMartin Matuska ddt_zap_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len) 714fefe1b7SMartin Matuska { 724fefe1b7SMartin Matuska uchar_t version = *src++; 734fefe1b7SMartin Matuska int cpfunc = version & DDT_ZAP_COMPRESS_FUNCTION_MASK; 744fefe1b7SMartin Matuska zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 754fefe1b7SMartin Matuska 764fefe1b7SMartin Matuska if (ci->ci_decompress != NULL) 774fefe1b7SMartin Matuska (void) ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level); 784fefe1b7SMartin Matuska else 794fefe1b7SMartin Matuska memcpy(dst, src, d_len); 804fefe1b7SMartin Matuska 814fefe1b7SMartin Matuska if (((version & DDT_ZAP_COMPRESS_BYTEORDER_MASK) != 0) != 824fefe1b7SMartin Matuska (ZFS_HOST_BYTEORDER != 0)) 834fefe1b7SMartin Matuska byteswap_uint64_array(dst, d_len); 844fefe1b7SMartin Matuska } 854fefe1b7SMartin Matuska 86eda14cbcSMatt Macy static int 87eda14cbcSMatt Macy ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash) 88eda14cbcSMatt Macy { 89eda14cbcSMatt Macy zap_flags_t flags = ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY; 90eda14cbcSMatt Macy 91eda14cbcSMatt Macy if (prehash) 92eda14cbcSMatt Macy flags |= ZAP_FLAG_PRE_HASHED_KEY; 93eda14cbcSMatt Macy 94eda14cbcSMatt Macy *objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP, 950a97523dSMartin Matuska ddt_zap_default_bs, ddt_zap_default_ibs, 96eda14cbcSMatt Macy DMU_OT_NONE, 0, tx); 974fefe1b7SMartin Matuska if (*objectp == 0) 984fefe1b7SMartin Matuska return (SET_ERROR(ENOTSUP)); 99eda14cbcSMatt Macy 1004fefe1b7SMartin Matuska return (0); 101eda14cbcSMatt Macy } 102eda14cbcSMatt Macy 103eda14cbcSMatt Macy static int 104eda14cbcSMatt Macy ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx) 105eda14cbcSMatt Macy { 106eda14cbcSMatt Macy return (zap_destroy(os, object, tx)); 107eda14cbcSMatt Macy } 108eda14cbcSMatt Macy 109eda14cbcSMatt Macy static int 1104fefe1b7SMartin Matuska ddt_zap_lookup(objset_t *os, uint64_t object, 1114fefe1b7SMartin Matuska const ddt_key_t *ddk, ddt_phys_t *phys, size_t psize) 112eda14cbcSMatt Macy { 113eda14cbcSMatt Macy uchar_t *cbuf; 114eda14cbcSMatt Macy uint64_t one, csize; 115eda14cbcSMatt Macy int error; 116eda14cbcSMatt Macy 1174fefe1b7SMartin Matuska error = zap_length_uint64(os, object, (uint64_t *)ddk, 118eda14cbcSMatt Macy DDT_KEY_WORDS, &one, &csize); 119eda14cbcSMatt Macy if (error) 1204fefe1b7SMartin Matuska return (error); 121eda14cbcSMatt Macy 1224fefe1b7SMartin Matuska ASSERT3U(one, ==, 1); 1234fefe1b7SMartin Matuska ASSERT3U(csize, <=, psize + 1); 124eda14cbcSMatt Macy 1254fefe1b7SMartin Matuska cbuf = kmem_alloc(csize, KM_SLEEP); 1264fefe1b7SMartin Matuska 1274fefe1b7SMartin Matuska error = zap_lookup_uint64(os, object, (uint64_t *)ddk, 128eda14cbcSMatt Macy DDT_KEY_WORDS, 1, csize, cbuf); 1294fefe1b7SMartin Matuska if (error == 0) 1304fefe1b7SMartin Matuska ddt_zap_decompress(cbuf, phys, csize, psize); 131eda14cbcSMatt Macy 1324fefe1b7SMartin Matuska kmem_free(cbuf, csize); 133eda14cbcSMatt Macy 134eda14cbcSMatt Macy return (error); 135eda14cbcSMatt Macy } 136eda14cbcSMatt Macy 1374fefe1b7SMartin Matuska static int 1384fefe1b7SMartin Matuska ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk) 1394fefe1b7SMartin Matuska { 1404fefe1b7SMartin Matuska return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS, 1414fefe1b7SMartin Matuska NULL, NULL)); 1424fefe1b7SMartin Matuska } 1434fefe1b7SMartin Matuska 144eda14cbcSMatt Macy static void 1454fefe1b7SMartin Matuska ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk) 146eda14cbcSMatt Macy { 1474fefe1b7SMartin Matuska (void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS); 148eda14cbcSMatt Macy } 149eda14cbcSMatt Macy 150*ce4dcb97SMartin Matuska static void 151*ce4dcb97SMartin Matuska ddt_zap_prefetch_all(objset_t *os, uint64_t object) 152*ce4dcb97SMartin Matuska { 153*ce4dcb97SMartin Matuska (void) zap_prefetch_object(os, object); 154*ce4dcb97SMartin Matuska } 155*ce4dcb97SMartin Matuska 156eda14cbcSMatt Macy static int 1574fefe1b7SMartin Matuska ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk, 1584fefe1b7SMartin Matuska const ddt_phys_t *phys, size_t psize, dmu_tx_t *tx) 159eda14cbcSMatt Macy { 1604fefe1b7SMartin Matuska const size_t cbuf_size = psize + 1; 161eda14cbcSMatt Macy 1624fefe1b7SMartin Matuska uchar_t *cbuf = kmem_alloc(cbuf_size, KM_SLEEP); 163eda14cbcSMatt Macy 1644fefe1b7SMartin Matuska uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size); 1654fefe1b7SMartin Matuska 1664fefe1b7SMartin Matuska int error = zap_update_uint64(os, object, (uint64_t *)ddk, 1674fefe1b7SMartin Matuska DDT_KEY_WORDS, 1, csize, cbuf, tx); 1684fefe1b7SMartin Matuska 1694fefe1b7SMartin Matuska kmem_free(cbuf, cbuf_size); 1704fefe1b7SMartin Matuska 1714fefe1b7SMartin Matuska return (error); 172eda14cbcSMatt Macy } 173eda14cbcSMatt Macy 174eda14cbcSMatt Macy static int 1754fefe1b7SMartin Matuska ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk, 1764fefe1b7SMartin Matuska dmu_tx_t *tx) 177eda14cbcSMatt Macy { 1784fefe1b7SMartin Matuska return (zap_remove_uint64(os, object, (uint64_t *)ddk, 179eda14cbcSMatt Macy DDT_KEY_WORDS, tx)); 180eda14cbcSMatt Macy } 181eda14cbcSMatt Macy 182eda14cbcSMatt Macy static int 1834fefe1b7SMartin Matuska ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk, 1844fefe1b7SMartin Matuska ddt_phys_t *phys, size_t psize) 185eda14cbcSMatt Macy { 186eda14cbcSMatt Macy zap_cursor_t zc; 187eda14cbcSMatt Macy zap_attribute_t za; 188eda14cbcSMatt Macy int error; 189eda14cbcSMatt Macy 190eda14cbcSMatt Macy if (*walk == 0) { 191eda14cbcSMatt Macy /* 192eda14cbcSMatt Macy * We don't want to prefetch the entire ZAP object, because 193eda14cbcSMatt Macy * it can be enormous. Also the primary use of DDT iteration 194eda14cbcSMatt Macy * is for scrubbing, in which case we will be issuing many 195eda14cbcSMatt Macy * scrub I/Os for each ZAP block that we read in, so 196eda14cbcSMatt Macy * reading the ZAP is unlikely to be the bottleneck. 197eda14cbcSMatt Macy */ 198eda14cbcSMatt Macy zap_cursor_init_noprefetch(&zc, os, object); 199eda14cbcSMatt Macy } else { 200eda14cbcSMatt Macy zap_cursor_init_serialized(&zc, os, object, *walk); 201eda14cbcSMatt Macy } 202eda14cbcSMatt Macy if ((error = zap_cursor_retrieve(&zc, &za)) == 0) { 203eda14cbcSMatt Macy uint64_t csize = za.za_num_integers; 2044fefe1b7SMartin Matuska 2054fefe1b7SMartin Matuska ASSERT3U(za.za_integer_length, ==, 1); 2064fefe1b7SMartin Matuska ASSERT3U(csize, <=, psize + 1); 2074fefe1b7SMartin Matuska 2084fefe1b7SMartin Matuska uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP); 2094fefe1b7SMartin Matuska 210eda14cbcSMatt Macy error = zap_lookup_uint64(os, object, (uint64_t *)za.za_name, 211eda14cbcSMatt Macy DDT_KEY_WORDS, 1, csize, cbuf); 2124fefe1b7SMartin Matuska ASSERT0(error); 213eda14cbcSMatt Macy if (error == 0) { 2144fefe1b7SMartin Matuska ddt_zap_decompress(cbuf, phys, csize, psize); 2154fefe1b7SMartin Matuska *ddk = *(ddt_key_t *)za.za_name; 216eda14cbcSMatt Macy } 2174fefe1b7SMartin Matuska 2184fefe1b7SMartin Matuska kmem_free(cbuf, csize); 2194fefe1b7SMartin Matuska 220eda14cbcSMatt Macy zap_cursor_advance(&zc); 221eda14cbcSMatt Macy *walk = zap_cursor_serialize(&zc); 222eda14cbcSMatt Macy } 223eda14cbcSMatt Macy zap_cursor_fini(&zc); 224eda14cbcSMatt Macy return (error); 225eda14cbcSMatt Macy } 226eda14cbcSMatt Macy 227eda14cbcSMatt Macy static int 228eda14cbcSMatt Macy ddt_zap_count(objset_t *os, uint64_t object, uint64_t *count) 229eda14cbcSMatt Macy { 230eda14cbcSMatt Macy return (zap_count(os, object, count)); 231eda14cbcSMatt Macy } 232eda14cbcSMatt Macy 233eda14cbcSMatt Macy const ddt_ops_t ddt_zap_ops = { 234eda14cbcSMatt Macy "zap", 235eda14cbcSMatt Macy ddt_zap_create, 236eda14cbcSMatt Macy ddt_zap_destroy, 237eda14cbcSMatt Macy ddt_zap_lookup, 2384fefe1b7SMartin Matuska ddt_zap_contains, 239eda14cbcSMatt Macy ddt_zap_prefetch, 240*ce4dcb97SMartin Matuska ddt_zap_prefetch_all, 241eda14cbcSMatt Macy ddt_zap_update, 242eda14cbcSMatt Macy ddt_zap_remove, 243eda14cbcSMatt Macy ddt_zap_walk, 244eda14cbcSMatt Macy ddt_zap_count, 245eda14cbcSMatt Macy }; 2460a97523dSMartin Matuska 2470a97523dSMartin Matuska /* BEGIN CSTYLED */ 2480a97523dSMartin Matuska ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_bs, UINT, ZMOD_RW, 2490a97523dSMartin Matuska "DDT ZAP leaf blockshift"); 2500a97523dSMartin Matuska ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_ibs, UINT, ZMOD_RW, 2510a97523dSMartin Matuska "DDT ZAP indirect blockshift"); 2520a97523dSMartin Matuska /* END CSTYLED */ 253