1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/zfs_context.h> 27 #include <sys/spa.h> 28 #include <sys/zio.h> 29 #include <sys/zio_checksum.h> 30 31 /* 32 * Checksum vectors. 33 * 34 * In the SPA, everything is checksummed. We support checksum vectors 35 * for three distinct reasons: 36 * 37 * 1. Different kinds of data need different levels of protection. 38 * For SPA metadata, we always want a very strong checksum. 39 * For user data, we let users make the trade-off between speed 40 * and checksum strength. 41 * 42 * 2. Cryptographic hash and MAC algorithms are an area of active research. 43 * It is likely that in future hash functions will be at least as strong 44 * as current best-of-breed, and may be substantially faster as well. 45 * We want the ability to take advantage of these new hashes as soon as 46 * they become available. 47 * 48 * 3. If someone develops hardware that can compute a strong hash quickly, 49 * we want the ability to take advantage of that hardware. 50 * 51 * Of course, we don't want a checksum upgrade to invalidate existing 52 * data, so we store the checksum *function* in eight bits of the bp. 53 * This gives us room for up to 256 different checksum functions. 54 * 55 * When writing a block, we always checksum it with the latest-and-greatest 56 * checksum function of the appropriate strength. When reading a block, 57 * we compare the expected checksum against the actual checksum, which we 58 * compute via the checksum function specified by BP_GET_CHECKSUM(bp). 59 */ 60 61 /*ARGSUSED*/ 62 static void 63 zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp) 64 { 65 ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); 66 } 67 68 zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { 69 {{NULL, NULL}, 0, 0, 0, "inherit"}, 70 {{NULL, NULL}, 0, 0, 0, "on"}, 71 {{zio_checksum_off, zio_checksum_off}, 0, 0, 0, "off"}, 72 {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "label"}, 73 {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "gang_header"}, 74 {{fletcher_2_native, fletcher_2_byteswap}, 0, 1, 0, "zilog"}, 75 {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, 0, "fletcher2"}, 76 {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, 0, "fletcher4"}, 77 {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, 1, "sha256"}, 78 }; 79 80 enum zio_checksum 81 zio_checksum_select(enum zio_checksum child, enum zio_checksum parent) 82 { 83 ASSERT(child < ZIO_CHECKSUM_FUNCTIONS); 84 ASSERT(parent < ZIO_CHECKSUM_FUNCTIONS); 85 ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON); 86 87 if (child == ZIO_CHECKSUM_INHERIT) 88 return (parent); 89 90 if (child == ZIO_CHECKSUM_ON) 91 return (ZIO_CHECKSUM_ON_VALUE); 92 93 return (child); 94 } 95 96 enum zio_checksum 97 zio_checksum_dedup_select(spa_t *spa, enum zio_checksum child, 98 enum zio_checksum parent) 99 { 100 ASSERT((child & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS); 101 ASSERT((parent & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS); 102 ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON); 103 104 if (child == ZIO_CHECKSUM_INHERIT) 105 return (parent); 106 107 if (child == ZIO_CHECKSUM_ON) 108 return (spa_dedup_checksum(spa)); 109 110 if (child == (ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY)) 111 return (spa_dedup_checksum(spa) | ZIO_CHECKSUM_VERIFY); 112 113 ASSERT(zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_dedup || 114 (child & ZIO_CHECKSUM_VERIFY) || child == ZIO_CHECKSUM_OFF); 115 116 return (child); 117 } 118 119 /* 120 * Set the external verifier for a gang block based on <vdev, offset, txg>, 121 * a tuple which is guaranteed to be unique for the life of the pool. 122 */ 123 static void 124 zio_checksum_gang_verifier(zio_cksum_t *zcp, blkptr_t *bp) 125 { 126 dva_t *dva = BP_IDENTITY(bp); 127 uint64_t txg = BP_PHYSICAL_BIRTH(bp); 128 129 ASSERT(BP_IS_GANG(bp)); 130 131 ZIO_SET_CHECKSUM(zcp, DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), txg, 0); 132 } 133 134 /* 135 * Set the external verifier for a label block based on its offset. 136 * The vdev is implicit, and the txg is unknowable at pool open time -- 137 * hence the logic in vdev_uberblock_load() to find the most recent copy. 138 */ 139 static void 140 zio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset) 141 { 142 ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0); 143 } 144 145 /* 146 * Generate the checksum. 147 */ 148 void 149 zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, 150 void *data, uint64_t size) 151 { 152 blkptr_t *bp = zio->io_bp; 153 uint64_t offset = zio->io_offset; 154 zio_block_tail_t *zbt = (zio_block_tail_t *)((char *)data + size) - 1; 155 zio_checksum_info_t *ci = &zio_checksum_table[checksum]; 156 zio_cksum_t zbt_cksum; 157 158 ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS); 159 ASSERT(ci->ci_func[0] != NULL); 160 161 if (ci->ci_zbt) { 162 if (checksum == ZIO_CHECKSUM_GANG_HEADER) 163 zio_checksum_gang_verifier(&zbt->zbt_cksum, bp); 164 else if (checksum == ZIO_CHECKSUM_LABEL) 165 zio_checksum_label_verifier(&zbt->zbt_cksum, offset); 166 else 167 bp->blk_cksum = zbt->zbt_cksum; 168 zbt->zbt_magic = ZBT_MAGIC; 169 ci->ci_func[0](data, size, &zbt_cksum); 170 zbt->zbt_cksum = zbt_cksum; 171 } else { 172 ci->ci_func[0](data, size, &bp->blk_cksum); 173 } 174 } 175 176 int 177 zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info) 178 { 179 blkptr_t *bp = zio->io_bp; 180 uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum : 181 (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp))); 182 int byteswap; 183 int error; 184 uint64_t size = (bp == NULL ? zio->io_size : 185 (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp))); 186 uint64_t offset = zio->io_offset; 187 void *data = zio->io_data; 188 zio_block_tail_t *zbt = (zio_block_tail_t *)((char *)data + size) - 1; 189 zio_checksum_info_t *ci = &zio_checksum_table[checksum]; 190 zio_cksum_t actual_cksum, expected_cksum, verifier; 191 192 if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) 193 return (EINVAL); 194 195 if (ci->ci_zbt) { 196 if (checksum == ZIO_CHECKSUM_GANG_HEADER) 197 zio_checksum_gang_verifier(&verifier, bp); 198 else if (checksum == ZIO_CHECKSUM_LABEL) 199 zio_checksum_label_verifier(&verifier, offset); 200 else 201 verifier = bp->blk_cksum; 202 203 byteswap = (zbt->zbt_magic == BSWAP_64(ZBT_MAGIC)); 204 205 if (byteswap) 206 byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); 207 208 expected_cksum = zbt->zbt_cksum; 209 zbt->zbt_cksum = verifier; 210 ci->ci_func[byteswap](data, size, &actual_cksum); 211 zbt->zbt_cksum = expected_cksum; 212 213 if (byteswap) 214 byteswap_uint64_array(&expected_cksum, 215 sizeof (zio_cksum_t)); 216 } else { 217 ASSERT(!BP_IS_GANG(bp)); 218 byteswap = BP_SHOULD_BYTESWAP(bp); 219 expected_cksum = bp->blk_cksum; 220 ci->ci_func[byteswap](data, size, &actual_cksum); 221 } 222 223 info->zbc_expected = expected_cksum; 224 info->zbc_actual = actual_cksum; 225 info->zbc_checksum_name = ci->ci_name; 226 info->zbc_byteswapped = byteswap; 227 info->zbc_injected = 0; 228 info->zbc_has_cksum = 1; 229 230 if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) 231 return (ECKSUM); 232 233 if (zio_injection_enabled && !zio->io_error && 234 (error = zio_handle_fault_injection(zio, ECKSUM)) != 0) { 235 236 info->zbc_injected = 1; 237 return (error); 238 } 239 240 return (0); 241 } 242