1*eda14cbcSMatt Macy /* 2*eda14cbcSMatt Macy * CDDL HEADER START 3*eda14cbcSMatt Macy * 4*eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5*eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6*eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7*eda14cbcSMatt Macy * 8*eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*eda14cbcSMatt Macy * or http://www.opensolaris.org/os/licensing. 10*eda14cbcSMatt Macy * See the License for the specific language governing permissions 11*eda14cbcSMatt Macy * and limitations under the License. 12*eda14cbcSMatt Macy * 13*eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14*eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16*eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17*eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18*eda14cbcSMatt Macy * 19*eda14cbcSMatt Macy * CDDL HEADER END 20*eda14cbcSMatt Macy */ 21*eda14cbcSMatt Macy 22*eda14cbcSMatt Macy /* 23*eda14cbcSMatt Macy * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24*eda14cbcSMatt Macy * Copyright (c) 2018 by Delphix. All rights reserved. 25*eda14cbcSMatt Macy */ 26*eda14cbcSMatt Macy 27*eda14cbcSMatt Macy #include <sys/zfs_context.h> 28*eda14cbcSMatt Macy #include <sys/spa.h> 29*eda14cbcSMatt Macy #include <sys/zio.h> 30*eda14cbcSMatt Macy #include <sys/ddt.h> 31*eda14cbcSMatt Macy #include <sys/zap.h> 32*eda14cbcSMatt Macy #include <sys/dmu_tx.h> 33*eda14cbcSMatt Macy 34*eda14cbcSMatt Macy int ddt_zap_leaf_blockshift = 12; 35*eda14cbcSMatt Macy int ddt_zap_indirect_blockshift = 12; 36*eda14cbcSMatt Macy 37*eda14cbcSMatt Macy static int 38*eda14cbcSMatt Macy ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash) 39*eda14cbcSMatt Macy { 40*eda14cbcSMatt Macy zap_flags_t flags = ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY; 41*eda14cbcSMatt Macy 42*eda14cbcSMatt Macy if (prehash) 43*eda14cbcSMatt Macy flags |= ZAP_FLAG_PRE_HASHED_KEY; 44*eda14cbcSMatt Macy 45*eda14cbcSMatt Macy *objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP, 46*eda14cbcSMatt Macy ddt_zap_leaf_blockshift, ddt_zap_indirect_blockshift, 47*eda14cbcSMatt Macy DMU_OT_NONE, 0, tx); 48*eda14cbcSMatt Macy 49*eda14cbcSMatt Macy return (*objectp == 0 ? SET_ERROR(ENOTSUP) : 0); 50*eda14cbcSMatt Macy } 51*eda14cbcSMatt Macy 52*eda14cbcSMatt Macy static int 53*eda14cbcSMatt Macy ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx) 54*eda14cbcSMatt Macy { 55*eda14cbcSMatt Macy return (zap_destroy(os, object, tx)); 56*eda14cbcSMatt Macy } 57*eda14cbcSMatt Macy 58*eda14cbcSMatt Macy static int 59*eda14cbcSMatt Macy ddt_zap_lookup(objset_t *os, uint64_t object, ddt_entry_t *dde) 60*eda14cbcSMatt Macy { 61*eda14cbcSMatt Macy uchar_t *cbuf; 62*eda14cbcSMatt Macy uint64_t one, csize; 63*eda14cbcSMatt Macy int error; 64*eda14cbcSMatt Macy 65*eda14cbcSMatt Macy cbuf = kmem_alloc(sizeof (dde->dde_phys) + 1, KM_SLEEP); 66*eda14cbcSMatt Macy 67*eda14cbcSMatt Macy error = zap_length_uint64(os, object, (uint64_t *)&dde->dde_key, 68*eda14cbcSMatt Macy DDT_KEY_WORDS, &one, &csize); 69*eda14cbcSMatt Macy if (error) 70*eda14cbcSMatt Macy goto out; 71*eda14cbcSMatt Macy 72*eda14cbcSMatt Macy ASSERT(one == 1); 73*eda14cbcSMatt Macy ASSERT(csize <= (sizeof (dde->dde_phys) + 1)); 74*eda14cbcSMatt Macy 75*eda14cbcSMatt Macy error = zap_lookup_uint64(os, object, (uint64_t *)&dde->dde_key, 76*eda14cbcSMatt Macy DDT_KEY_WORDS, 1, csize, cbuf); 77*eda14cbcSMatt Macy if (error) 78*eda14cbcSMatt Macy goto out; 79*eda14cbcSMatt Macy 80*eda14cbcSMatt Macy ddt_decompress(cbuf, dde->dde_phys, csize, sizeof (dde->dde_phys)); 81*eda14cbcSMatt Macy out: 82*eda14cbcSMatt Macy kmem_free(cbuf, sizeof (dde->dde_phys) + 1); 83*eda14cbcSMatt Macy 84*eda14cbcSMatt Macy return (error); 85*eda14cbcSMatt Macy } 86*eda14cbcSMatt Macy 87*eda14cbcSMatt Macy static void 88*eda14cbcSMatt Macy ddt_zap_prefetch(objset_t *os, uint64_t object, ddt_entry_t *dde) 89*eda14cbcSMatt Macy { 90*eda14cbcSMatt Macy (void) zap_prefetch_uint64(os, object, (uint64_t *)&dde->dde_key, 91*eda14cbcSMatt Macy DDT_KEY_WORDS); 92*eda14cbcSMatt Macy } 93*eda14cbcSMatt Macy 94*eda14cbcSMatt Macy static int 95*eda14cbcSMatt Macy ddt_zap_update(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx) 96*eda14cbcSMatt Macy { 97*eda14cbcSMatt Macy uchar_t cbuf[sizeof (dde->dde_phys) + 1]; 98*eda14cbcSMatt Macy uint64_t csize; 99*eda14cbcSMatt Macy 100*eda14cbcSMatt Macy csize = ddt_compress(dde->dde_phys, cbuf, 101*eda14cbcSMatt Macy sizeof (dde->dde_phys), sizeof (cbuf)); 102*eda14cbcSMatt Macy 103*eda14cbcSMatt Macy return (zap_update_uint64(os, object, (uint64_t *)&dde->dde_key, 104*eda14cbcSMatt Macy DDT_KEY_WORDS, 1, csize, cbuf, tx)); 105*eda14cbcSMatt Macy } 106*eda14cbcSMatt Macy 107*eda14cbcSMatt Macy static int 108*eda14cbcSMatt Macy ddt_zap_remove(objset_t *os, uint64_t object, ddt_entry_t *dde, dmu_tx_t *tx) 109*eda14cbcSMatt Macy { 110*eda14cbcSMatt Macy return (zap_remove_uint64(os, object, (uint64_t *)&dde->dde_key, 111*eda14cbcSMatt Macy DDT_KEY_WORDS, tx)); 112*eda14cbcSMatt Macy } 113*eda14cbcSMatt Macy 114*eda14cbcSMatt Macy static int 115*eda14cbcSMatt Macy ddt_zap_walk(objset_t *os, uint64_t object, ddt_entry_t *dde, uint64_t *walk) 116*eda14cbcSMatt Macy { 117*eda14cbcSMatt Macy zap_cursor_t zc; 118*eda14cbcSMatt Macy zap_attribute_t za; 119*eda14cbcSMatt Macy int error; 120*eda14cbcSMatt Macy 121*eda14cbcSMatt Macy if (*walk == 0) { 122*eda14cbcSMatt Macy /* 123*eda14cbcSMatt Macy * We don't want to prefetch the entire ZAP object, because 124*eda14cbcSMatt Macy * it can be enormous. Also the primary use of DDT iteration 125*eda14cbcSMatt Macy * is for scrubbing, in which case we will be issuing many 126*eda14cbcSMatt Macy * scrub I/Os for each ZAP block that we read in, so 127*eda14cbcSMatt Macy * reading the ZAP is unlikely to be the bottleneck. 128*eda14cbcSMatt Macy */ 129*eda14cbcSMatt Macy zap_cursor_init_noprefetch(&zc, os, object); 130*eda14cbcSMatt Macy } else { 131*eda14cbcSMatt Macy zap_cursor_init_serialized(&zc, os, object, *walk); 132*eda14cbcSMatt Macy } 133*eda14cbcSMatt Macy if ((error = zap_cursor_retrieve(&zc, &za)) == 0) { 134*eda14cbcSMatt Macy uchar_t cbuf[sizeof (dde->dde_phys) + 1]; 135*eda14cbcSMatt Macy uint64_t csize = za.za_num_integers; 136*eda14cbcSMatt Macy ASSERT(za.za_integer_length == 1); 137*eda14cbcSMatt Macy error = zap_lookup_uint64(os, object, (uint64_t *)za.za_name, 138*eda14cbcSMatt Macy DDT_KEY_WORDS, 1, csize, cbuf); 139*eda14cbcSMatt Macy ASSERT(error == 0); 140*eda14cbcSMatt Macy if (error == 0) { 141*eda14cbcSMatt Macy ddt_decompress(cbuf, dde->dde_phys, csize, 142*eda14cbcSMatt Macy sizeof (dde->dde_phys)); 143*eda14cbcSMatt Macy dde->dde_key = *(ddt_key_t *)za.za_name; 144*eda14cbcSMatt Macy } 145*eda14cbcSMatt Macy zap_cursor_advance(&zc); 146*eda14cbcSMatt Macy *walk = zap_cursor_serialize(&zc); 147*eda14cbcSMatt Macy } 148*eda14cbcSMatt Macy zap_cursor_fini(&zc); 149*eda14cbcSMatt Macy return (error); 150*eda14cbcSMatt Macy } 151*eda14cbcSMatt Macy 152*eda14cbcSMatt Macy static int 153*eda14cbcSMatt Macy ddt_zap_count(objset_t *os, uint64_t object, uint64_t *count) 154*eda14cbcSMatt Macy { 155*eda14cbcSMatt Macy return (zap_count(os, object, count)); 156*eda14cbcSMatt Macy } 157*eda14cbcSMatt Macy 158*eda14cbcSMatt Macy const ddt_ops_t ddt_zap_ops = { 159*eda14cbcSMatt Macy "zap", 160*eda14cbcSMatt Macy ddt_zap_create, 161*eda14cbcSMatt Macy ddt_zap_destroy, 162*eda14cbcSMatt Macy ddt_zap_lookup, 163*eda14cbcSMatt Macy ddt_zap_prefetch, 164*eda14cbcSMatt Macy ddt_zap_update, 165*eda14cbcSMatt Macy ddt_zap_remove, 166*eda14cbcSMatt Macy ddt_zap_walk, 167*eda14cbcSMatt Macy ddt_zap_count, 168*eda14cbcSMatt Macy }; 169