1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2013 by Delphix. All rights reserved. 24 */ 25 26 #include <sys/zfs_context.h> 27 #include <sys/spa.h> 28 #include <sys/zio.h> 29 #include <sys/zio_checksum.h> 30 #include <sys/zil.h> 31 #include <zfs_fletcher.h> 32 33 /* 34 * Checksum vectors. 35 * 36 * In the SPA, everything is checksummed. We support checksum vectors 37 * for three distinct reasons: 38 * 39 * 1. Different kinds of data need different levels of protection. 40 * For SPA metadata, we always want a very strong checksum. 41 * For user data, we let users make the trade-off between speed 42 * and checksum strength. 43 * 44 * 2. Cryptographic hash and MAC algorithms are an area of active research. 45 * It is likely that in future hash functions will be at least as strong 46 * as current best-of-breed, and may be substantially faster as well. 47 * We want the ability to take advantage of these new hashes as soon as 48 * they become available. 49 * 50 * 3. If someone develops hardware that can compute a strong hash quickly, 51 * we want the ability to take advantage of that hardware. 52 * 53 * Of course, we don't want a checksum upgrade to invalidate existing 54 * data, so we store the checksum *function* in eight bits of the bp. 55 * This gives us room for up to 256 different checksum functions. 56 * 57 * When writing a block, we always checksum it with the latest-and-greatest 58 * checksum function of the appropriate strength. When reading a block, 59 * we compare the expected checksum against the actual checksum, which we 60 * compute via the checksum function specified by BP_GET_CHECKSUM(bp). 61 */ 62 63 /*ARGSUSED*/ 64 static void 65 zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp) 66 { 67 ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0); 68 } 69 70 zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = { 71 {{NULL, NULL}, 0, 0, 0, "inherit"}, 72 {{NULL, NULL}, 0, 0, 0, "on"}, 73 {{zio_checksum_off, zio_checksum_off}, 0, 0, 0, "off"}, 74 {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "label"}, 75 {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "gang_header"}, 76 {{fletcher_2_native, fletcher_2_byteswap}, 0, 1, 0, "zilog"}, 77 {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, 0, "fletcher2"}, 78 {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, 0, "fletcher4"}, 79 {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, 1, "sha256"}, 80 {{fletcher_4_native, fletcher_4_byteswap}, 0, 1, 0, "zilog2"}, 81 }; 82 83 enum zio_checksum 84 zio_checksum_select(enum zio_checksum child, enum zio_checksum parent) 85 { 86 ASSERT(child < ZIO_CHECKSUM_FUNCTIONS); 87 ASSERT(parent < ZIO_CHECKSUM_FUNCTIONS); 88 ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON); 89 90 if (child == ZIO_CHECKSUM_INHERIT) 91 return (parent); 92 93 if (child == ZIO_CHECKSUM_ON) 94 return (ZIO_CHECKSUM_ON_VALUE); 95 96 return (child); 97 } 98 99 enum zio_checksum 100 zio_checksum_dedup_select(spa_t *spa, enum zio_checksum child, 101 enum zio_checksum parent) 102 { 103 ASSERT((child & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS); 104 ASSERT((parent & ZIO_CHECKSUM_MASK) < ZIO_CHECKSUM_FUNCTIONS); 105 ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON); 106 107 if (child == ZIO_CHECKSUM_INHERIT) 108 return (parent); 109 110 if (child == ZIO_CHECKSUM_ON) 111 return (spa_dedup_checksum(spa)); 112 113 if (child == (ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY)) 114 return (spa_dedup_checksum(spa) | ZIO_CHECKSUM_VERIFY); 115 116 ASSERT(zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_dedup || 117 (child & ZIO_CHECKSUM_VERIFY) || child == ZIO_CHECKSUM_OFF); 118 119 return (child); 120 } 121 122 /* 123 * Set the external verifier for a gang block based on <vdev, offset, txg>, 124 * a tuple which is guaranteed to be unique for the life of the pool. 125 */ 126 static void 127 zio_checksum_gang_verifier(zio_cksum_t *zcp, blkptr_t *bp) 128 { 129 dva_t *dva = BP_IDENTITY(bp); 130 uint64_t txg = BP_PHYSICAL_BIRTH(bp); 131 132 ASSERT(BP_IS_GANG(bp)); 133 134 ZIO_SET_CHECKSUM(zcp, DVA_GET_VDEV(dva), DVA_GET_OFFSET(dva), txg, 0); 135 } 136 137 /* 138 * Set the external verifier for a label block based on its offset. 139 * The vdev is implicit, and the txg is unknowable at pool open time -- 140 * hence the logic in vdev_uberblock_load() to find the most recent copy. 141 */ 142 static void 143 zio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset) 144 { 145 ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0); 146 } 147 148 /* 149 * Generate the checksum. 150 */ 151 void 152 zio_checksum_compute(zio_t *zio, enum zio_checksum checksum, 153 void *data, uint64_t size) 154 { 155 blkptr_t *bp = zio->io_bp; 156 uint64_t offset = zio->io_offset; 157 zio_checksum_info_t *ci = &zio_checksum_table[checksum]; 158 zio_cksum_t cksum; 159 160 ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS); 161 ASSERT(ci->ci_func[0] != NULL); 162 163 if (ci->ci_eck) { 164 zio_eck_t *eck; 165 166 if (checksum == ZIO_CHECKSUM_ZILOG2) { 167 zil_chain_t *zilc = data; 168 169 size = P2ROUNDUP_TYPED(zilc->zc_nused, ZIL_MIN_BLKSZ, 170 uint64_t); 171 eck = &zilc->zc_eck; 172 } else { 173 eck = (zio_eck_t *)((char *)data + size) - 1; 174 } 175 if (checksum == ZIO_CHECKSUM_GANG_HEADER) 176 zio_checksum_gang_verifier(&eck->zec_cksum, bp); 177 else if (checksum == ZIO_CHECKSUM_LABEL) 178 zio_checksum_label_verifier(&eck->zec_cksum, offset); 179 else 180 bp->blk_cksum = eck->zec_cksum; 181 eck->zec_magic = ZEC_MAGIC; 182 ci->ci_func[0](data, size, &cksum); 183 eck->zec_cksum = cksum; 184 } else { 185 ci->ci_func[0](data, size, &bp->blk_cksum); 186 } 187 } 188 189 int 190 zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info) 191 { 192 blkptr_t *bp = zio->io_bp; 193 uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum : 194 (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp))); 195 int byteswap; 196 int error; 197 uint64_t size = (bp == NULL ? zio->io_size : 198 (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp))); 199 uint64_t offset = zio->io_offset; 200 void *data = zio->io_data; 201 zio_checksum_info_t *ci = &zio_checksum_table[checksum]; 202 zio_cksum_t actual_cksum, expected_cksum, verifier; 203 204 if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL) 205 return (SET_ERROR(EINVAL)); 206 207 if (ci->ci_eck) { 208 zio_eck_t *eck; 209 210 if (checksum == ZIO_CHECKSUM_ZILOG2) { 211 zil_chain_t *zilc = data; 212 uint64_t nused; 213 214 eck = &zilc->zc_eck; 215 if (eck->zec_magic == ZEC_MAGIC) 216 nused = zilc->zc_nused; 217 else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC)) 218 nused = BSWAP_64(zilc->zc_nused); 219 else 220 return (SET_ERROR(ECKSUM)); 221 222 if (nused > size) 223 return (SET_ERROR(ECKSUM)); 224 225 size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t); 226 } else { 227 eck = (zio_eck_t *)((char *)data + size) - 1; 228 } 229 230 if (checksum == ZIO_CHECKSUM_GANG_HEADER) 231 zio_checksum_gang_verifier(&verifier, bp); 232 else if (checksum == ZIO_CHECKSUM_LABEL) 233 zio_checksum_label_verifier(&verifier, offset); 234 else 235 verifier = bp->blk_cksum; 236 237 byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC)); 238 239 if (byteswap) 240 byteswap_uint64_array(&verifier, sizeof (zio_cksum_t)); 241 242 expected_cksum = eck->zec_cksum; 243 eck->zec_cksum = verifier; 244 ci->ci_func[byteswap](data, size, &actual_cksum); 245 eck->zec_cksum = expected_cksum; 246 247 if (byteswap) 248 byteswap_uint64_array(&expected_cksum, 249 sizeof (zio_cksum_t)); 250 } else { 251 ASSERT(!BP_IS_GANG(bp)); 252 byteswap = BP_SHOULD_BYTESWAP(bp); 253 expected_cksum = bp->blk_cksum; 254 ci->ci_func[byteswap](data, size, &actual_cksum); 255 } 256 257 info->zbc_expected = expected_cksum; 258 info->zbc_actual = actual_cksum; 259 info->zbc_checksum_name = ci->ci_name; 260 info->zbc_byteswapped = byteswap; 261 info->zbc_injected = 0; 262 info->zbc_has_cksum = 1; 263 264 if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum)) 265 return (SET_ERROR(ECKSUM)); 266 267 if (zio_injection_enabled && !zio->io_error && 268 (error = zio_handle_fault_injection(zio, ECKSUM)) != 0) { 269 270 info->zbc_injected = 1; 271 return (error); 272 } 273 274 return (0); 275 } 276