1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (C) 2016 Gvozden Nešković. All rights reserved. 24 */ 25 26 #include <sys/vdev_raidz_impl.h> 27 28 /* 29 * Provide native CPU scalar routines. 30 * Support 32bit and 64bit CPUs. 31 */ 32 #if ((~(0x0ULL)) >> 24) == 0xffULL 33 #define ELEM_SIZE 4 34 typedef uint32_t iv_t; 35 #elif ((~(0x0ULL)) >> 56) == 0xffULL 36 #define ELEM_SIZE 8 37 typedef uint64_t iv_t; 38 #endif 39 40 /* 41 * Vector type used in scalar implementation 42 * 43 * The union is expected to be of native CPU register size. Since addition 44 * uses XOR operation, it can be performed an all byte elements at once. 45 * Multiplication requires per byte access. 46 */ 47 typedef union { 48 iv_t e; 49 uint8_t b[ELEM_SIZE]; 50 } v_t; 51 52 /* 53 * Precomputed lookup tables for multiplication by a constant 54 * 55 * Reconstruction path requires multiplication by a constant factors. Instead of 56 * performing two step lookup (log & exp tables), a direct lookup can be used 57 * instead. Multiplication of element 'a' by a constant 'c' is obtained as: 58 * 59 * r = vdev_raidz_mul_lt[c_log][a]; 60 * 61 * where c_log = vdev_raidz_log2[c]. Log of coefficient factors is used because 62 * they are faster to obtain while solving the syndrome equations. 63 * 64 * PERFORMANCE NOTE: 65 * Even though the complete lookup table uses 64kiB, only relatively small 66 * portion of it is used at the same time. Following shows number of accessed 67 * bytes for different cases: 68 * - 1 failed disk: 256B (1 mul. coefficient) 69 * - 2 failed disks: 512B (2 mul. coefficients) 70 * - 3 failed disks: 1536B (6 mul. coefficients) 71 * 72 * Size of actually accessed lookup table regions is only larger for 73 * reconstruction of 3 failed disks, when compared to traditional log/exp 74 * method. But since the result is obtained in one lookup step performance is 75 * doubled. 76 */ 77 static uint8_t vdev_raidz_mul_lt[256][256] __attribute__((aligned(256))); 78 79 static void 80 raidz_init_scalar(void) 81 { 82 int c, i; 83 for (c = 0; c < 256; c++) 84 for (i = 0; i < 256; i++) 85 vdev_raidz_mul_lt[c][i] = gf_mul(c, i); 86 87 } 88 89 #define PREFETCHNTA(ptr, offset) {} 90 #define PREFETCH(ptr, offset) {} 91 92 #define XOR_ACC(src, acc) acc.e ^= ((v_t *)src)[0].e 93 #define XOR(src, acc) acc.e ^= src.e 94 #define ZERO(acc) acc.e = 0 95 #define COPY(src, dst) dst = src 96 #define LOAD(src, val) val = ((v_t *)src)[0] 97 #define STORE(dst, val) ((v_t *)dst)[0] = val 98 99 /* 100 * Constants used for optimized multiplication by 2. 101 */ 102 static const struct { 103 iv_t mod; 104 iv_t mask; 105 iv_t msb; 106 } scalar_mul2_consts = { 107 #if ELEM_SIZE == 8 108 .mod = 0x1d1d1d1d1d1d1d1dULL, 109 .mask = 0xfefefefefefefefeULL, 110 .msb = 0x8080808080808080ULL, 111 #else 112 .mod = 0x1d1d1d1dULL, 113 .mask = 0xfefefefeULL, 114 .msb = 0x80808080ULL, 115 #endif 116 }; 117 118 #define MUL2_SETUP() {} 119 120 #define MUL2(a) \ 121 { \ 122 iv_t _mask; \ 123 \ 124 _mask = (a).e & scalar_mul2_consts.msb; \ 125 _mask = (_mask << 1) - (_mask >> 7); \ 126 (a).e = ((a).e << 1) & scalar_mul2_consts.mask; \ 127 (a).e = (a).e ^ (_mask & scalar_mul2_consts.mod); \ 128 } 129 130 #define MUL4(a) \ 131 { \ 132 MUL2(a); \ 133 MUL2(a); \ 134 } 135 136 #define MUL(c, a) \ 137 { \ 138 const uint8_t *mul_lt = vdev_raidz_mul_lt[c]; \ 139 switch (ELEM_SIZE) { \ 140 case 8: \ 141 a.b[7] = mul_lt[a.b[7]]; \ 142 a.b[6] = mul_lt[a.b[6]]; \ 143 a.b[5] = mul_lt[a.b[5]]; \ 144 a.b[4] = mul_lt[a.b[4]]; \ 145 zfs_fallthrough; \ 146 case 4: \ 147 a.b[3] = mul_lt[a.b[3]]; \ 148 a.b[2] = mul_lt[a.b[2]]; \ 149 a.b[1] = mul_lt[a.b[1]]; \ 150 a.b[0] = mul_lt[a.b[0]]; \ 151 break; \ 152 } \ 153 } 154 155 #define raidz_math_begin() {} 156 #define raidz_math_end() {} 157 158 #define SYN_STRIDE 1 159 160 #define ZERO_DEFINE() v_t d0 161 #define ZERO_STRIDE 1 162 #define ZERO_D d0 163 164 #define COPY_DEFINE() v_t d0 165 #define COPY_STRIDE 1 166 #define COPY_D d0 167 168 #define ADD_DEFINE() v_t d0 169 #define ADD_STRIDE 1 170 #define ADD_D d0 171 172 #define MUL_DEFINE() v_t d0 173 #define MUL_STRIDE 1 174 #define MUL_D d0 175 176 #define GEN_P_STRIDE 1 177 #define GEN_P_DEFINE() v_t p0 178 #define GEN_P_P p0 179 180 #define GEN_PQ_STRIDE 1 181 #define GEN_PQ_DEFINE() v_t d0, c0 182 #define GEN_PQ_D d0 183 #define GEN_PQ_C c0 184 185 #define GEN_PQR_STRIDE 1 186 #define GEN_PQR_DEFINE() v_t d0, c0 187 #define GEN_PQR_D d0 188 #define GEN_PQR_C c0 189 190 #define SYN_Q_DEFINE() v_t d0, x0 191 #define SYN_Q_D d0 192 #define SYN_Q_X x0 193 194 195 #define SYN_R_DEFINE() v_t d0, x0 196 #define SYN_R_D d0 197 #define SYN_R_X x0 198 199 200 #define SYN_PQ_DEFINE() v_t d0, x0 201 #define SYN_PQ_D d0 202 #define SYN_PQ_X x0 203 204 205 #define REC_PQ_STRIDE 1 206 #define REC_PQ_DEFINE() v_t x0, y0, t0 207 #define REC_PQ_X x0 208 #define REC_PQ_Y y0 209 #define REC_PQ_T t0 210 211 212 #define SYN_PR_DEFINE() v_t d0, x0 213 #define SYN_PR_D d0 214 #define SYN_PR_X x0 215 216 #define REC_PR_STRIDE 1 217 #define REC_PR_DEFINE() v_t x0, y0, t0 218 #define REC_PR_X x0 219 #define REC_PR_Y y0 220 #define REC_PR_T t0 221 222 223 #define SYN_QR_DEFINE() v_t d0, x0 224 #define SYN_QR_D d0 225 #define SYN_QR_X x0 226 227 228 #define REC_QR_STRIDE 1 229 #define REC_QR_DEFINE() v_t x0, y0, t0 230 #define REC_QR_X x0 231 #define REC_QR_Y y0 232 #define REC_QR_T t0 233 234 235 #define SYN_PQR_DEFINE() v_t d0, x0 236 #define SYN_PQR_D d0 237 #define SYN_PQR_X x0 238 239 #define REC_PQR_STRIDE 1 240 #define REC_PQR_DEFINE() v_t x0, y0, z0, xs0, ys0 241 #define REC_PQR_X x0 242 #define REC_PQR_Y y0 243 #define REC_PQR_Z z0 244 #define REC_PQR_XS xs0 245 #define REC_PQR_YS ys0 246 247 #include "vdev_raidz_math_impl.h" 248 249 DEFINE_GEN_METHODS(scalar); 250 DEFINE_REC_METHODS(scalar); 251 252 boolean_t 253 raidz_will_scalar_work(void) 254 { 255 return (B_TRUE); /* always */ 256 } 257 258 const raidz_impl_ops_t vdev_raidz_scalar_impl = { 259 .init = raidz_init_scalar, 260 .fini = NULL, 261 .gen = RAIDZ_GEN_METHODS(scalar), 262 .rec = RAIDZ_REC_METHODS(scalar), 263 .is_supported = &raidz_will_scalar_work, 264 .name = "scalar" 265 }; 266 267 /* Powers of 2 in the RAID-Z Galois field. */ 268 const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256))) = { 269 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 270 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26, 271 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9, 272 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 273 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35, 274 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23, 275 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0, 276 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1, 277 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc, 278 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0, 279 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f, 280 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2, 281 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88, 282 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce, 283 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93, 284 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc, 285 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9, 286 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54, 287 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa, 288 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73, 289 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e, 290 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff, 291 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4, 292 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41, 293 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e, 294 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6, 295 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef, 296 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09, 297 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5, 298 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16, 299 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83, 300 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01 301 }; 302 303 /* Logs of 2 in the RAID-Z Galois field. */ 304 const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256))) = { 305 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6, 306 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b, 307 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81, 308 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71, 309 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21, 310 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45, 311 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9, 312 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6, 313 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd, 314 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88, 315 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd, 316 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40, 317 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e, 318 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d, 319 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b, 320 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57, 321 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d, 322 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18, 323 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c, 324 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e, 325 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd, 326 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61, 327 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e, 328 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2, 329 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76, 330 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6, 331 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa, 332 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a, 333 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51, 334 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7, 335 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8, 336 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf, 337 }; 338