1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23 /*
24 * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
25 */
26
27 #include <sys/vdev_raidz_impl.h>
28
29 /*
30 * Provide native CPU scalar routines.
31 * Support 32bit and 64bit CPUs.
32 */
33 #if ((~(0x0ULL)) >> 24) == 0xffULL
34 #define ELEM_SIZE 4
35 typedef uint32_t iv_t;
36 #elif ((~(0x0ULL)) >> 56) == 0xffULL
37 #define ELEM_SIZE 8
38 typedef uint64_t iv_t;
39 #endif
40
41 /*
42 * Vector type used in scalar implementation
43 *
44 * The union is expected to be of native CPU register size. Since addition
45 * uses XOR operation, it can be performed an all byte elements at once.
46 * Multiplication requires per byte access.
47 */
48 typedef union {
49 iv_t e;
50 uint8_t b[ELEM_SIZE];
51 } v_t;
52
53 /*
54 * Precomputed lookup tables for multiplication by a constant
55 *
56 * Reconstruction path requires multiplication by a constant factors. Instead of
57 * performing two step lookup (log & exp tables), a direct lookup can be used
58 * instead. Multiplication of element 'a' by a constant 'c' is obtained as:
59 *
60 * r = vdev_raidz_mul_lt[c_log][a];
61 *
62 * where c_log = vdev_raidz_log2[c]. Log of coefficient factors is used because
63 * they are faster to obtain while solving the syndrome equations.
64 *
65 * PERFORMANCE NOTE:
66 * Even though the complete lookup table uses 64kiB, only relatively small
67 * portion of it is used at the same time. Following shows number of accessed
68 * bytes for different cases:
69 * - 1 failed disk: 256B (1 mul. coefficient)
70 * - 2 failed disks: 512B (2 mul. coefficients)
71 * - 3 failed disks: 1536B (6 mul. coefficients)
72 *
73 * Size of actually accessed lookup table regions is only larger for
74 * reconstruction of 3 failed disks, when compared to traditional log/exp
75 * method. But since the result is obtained in one lookup step performance is
76 * doubled.
77 */
78 static uint8_t vdev_raidz_mul_lt[256][256] __attribute__((aligned(256)));
79
80 static void
raidz_init_scalar(void)81 raidz_init_scalar(void)
82 {
83 int c, i;
84 for (c = 0; c < 256; c++)
85 for (i = 0; i < 256; i++)
86 vdev_raidz_mul_lt[c][i] = gf_mul(c, i);
87
88 }
89
90 #define PREFETCHNTA(ptr, offset) {}
91 #define PREFETCH(ptr, offset) {}
92
93 #define XOR_ACC(src, acc) acc.e ^= ((v_t *)src)[0].e
94 #define XOR(src, acc) acc.e ^= src.e
95 #define ZERO(acc) acc.e = 0
96 #define COPY(src, dst) dst = src
97 #define LOAD(src, val) val = ((v_t *)src)[0]
98 #define STORE(dst, val) ((v_t *)dst)[0] = val
99
100 /*
101 * Constants used for optimized multiplication by 2.
102 */
103 static const struct {
104 iv_t mod;
105 iv_t mask;
106 iv_t msb;
107 } scalar_mul2_consts = {
108 #if ELEM_SIZE == 8
109 .mod = 0x1d1d1d1d1d1d1d1dULL,
110 .mask = 0xfefefefefefefefeULL,
111 .msb = 0x8080808080808080ULL,
112 #else
113 .mod = 0x1d1d1d1dULL,
114 .mask = 0xfefefefeULL,
115 .msb = 0x80808080ULL,
116 #endif
117 };
118
119 #define MUL2_SETUP() {}
120
121 #define MUL2(a) \
122 { \
123 iv_t _mask; \
124 \
125 _mask = (a).e & scalar_mul2_consts.msb; \
126 _mask = (_mask << 1) - (_mask >> 7); \
127 (a).e = ((a).e << 1) & scalar_mul2_consts.mask; \
128 (a).e = (a).e ^ (_mask & scalar_mul2_consts.mod); \
129 }
130
131 #define MUL4(a) \
132 { \
133 MUL2(a); \
134 MUL2(a); \
135 }
136
137 #define MUL(c, a) \
138 { \
139 const uint8_t *mul_lt = vdev_raidz_mul_lt[c]; \
140 switch (ELEM_SIZE) { \
141 case 8: \
142 a.b[7] = mul_lt[a.b[7]]; \
143 a.b[6] = mul_lt[a.b[6]]; \
144 a.b[5] = mul_lt[a.b[5]]; \
145 a.b[4] = mul_lt[a.b[4]]; \
146 zfs_fallthrough; \
147 case 4: \
148 a.b[3] = mul_lt[a.b[3]]; \
149 a.b[2] = mul_lt[a.b[2]]; \
150 a.b[1] = mul_lt[a.b[1]]; \
151 a.b[0] = mul_lt[a.b[0]]; \
152 break; \
153 } \
154 }
155
156 #define raidz_math_begin() {}
157 #define raidz_math_end() {}
158
159 #define SYN_STRIDE 1
160
161 #define ZERO_DEFINE() v_t d0
162 #define ZERO_STRIDE 1
163 #define ZERO_D d0
164
165 #define COPY_DEFINE() v_t d0
166 #define COPY_STRIDE 1
167 #define COPY_D d0
168
169 #define ADD_DEFINE() v_t d0
170 #define ADD_STRIDE 1
171 #define ADD_D d0
172
173 #define MUL_DEFINE() v_t d0
174 #define MUL_STRIDE 1
175 #define MUL_D d0
176
177 #define GEN_P_STRIDE 1
178 #define GEN_P_DEFINE() v_t p0
179 #define GEN_P_P p0
180
181 #define GEN_PQ_STRIDE 1
182 #define GEN_PQ_DEFINE() v_t d0, c0
183 #define GEN_PQ_D d0
184 #define GEN_PQ_C c0
185
186 #define GEN_PQR_STRIDE 1
187 #define GEN_PQR_DEFINE() v_t d0, c0
188 #define GEN_PQR_D d0
189 #define GEN_PQR_C c0
190
191 #define SYN_Q_DEFINE() v_t d0, x0
192 #define SYN_Q_D d0
193 #define SYN_Q_X x0
194
195
196 #define SYN_R_DEFINE() v_t d0, x0
197 #define SYN_R_D d0
198 #define SYN_R_X x0
199
200
201 #define SYN_PQ_DEFINE() v_t d0, x0
202 #define SYN_PQ_D d0
203 #define SYN_PQ_X x0
204
205
206 #define REC_PQ_STRIDE 1
207 #define REC_PQ_DEFINE() v_t x0, y0, t0
208 #define REC_PQ_X x0
209 #define REC_PQ_Y y0
210 #define REC_PQ_T t0
211
212
213 #define SYN_PR_DEFINE() v_t d0, x0
214 #define SYN_PR_D d0
215 #define SYN_PR_X x0
216
217 #define REC_PR_STRIDE 1
218 #define REC_PR_DEFINE() v_t x0, y0, t0
219 #define REC_PR_X x0
220 #define REC_PR_Y y0
221 #define REC_PR_T t0
222
223
224 #define SYN_QR_DEFINE() v_t d0, x0
225 #define SYN_QR_D d0
226 #define SYN_QR_X x0
227
228
229 #define REC_QR_STRIDE 1
230 #define REC_QR_DEFINE() v_t x0, y0, t0
231 #define REC_QR_X x0
232 #define REC_QR_Y y0
233 #define REC_QR_T t0
234
235
236 #define SYN_PQR_DEFINE() v_t d0, x0
237 #define SYN_PQR_D d0
238 #define SYN_PQR_X x0
239
240 #define REC_PQR_STRIDE 1
241 #define REC_PQR_DEFINE() v_t x0, y0, z0, xs0, ys0
242 #define REC_PQR_X x0
243 #define REC_PQR_Y y0
244 #define REC_PQR_Z z0
245 #define REC_PQR_XS xs0
246 #define REC_PQR_YS ys0
247
248 #include "vdev_raidz_math_impl.h"
249
250 DEFINE_GEN_METHODS(scalar);
251 DEFINE_REC_METHODS(scalar);
252
253 boolean_t
raidz_will_scalar_work(void)254 raidz_will_scalar_work(void)
255 {
256 return (B_TRUE); /* always */
257 }
258
259 const raidz_impl_ops_t vdev_raidz_scalar_impl = {
260 .init = raidz_init_scalar,
261 .fini = NULL,
262 .gen = RAIDZ_GEN_METHODS(scalar),
263 .rec = RAIDZ_REC_METHODS(scalar),
264 .is_supported = &raidz_will_scalar_work,
265 .name = "scalar"
266 };
267
268 /* Powers of 2 in the RAID-Z Galois field. */
269 const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256))) = {
270 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
271 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
272 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
273 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
274 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
275 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
276 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
277 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
278 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
279 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
280 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
281 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
282 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
283 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
284 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
285 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
286 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
287 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
288 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
289 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
290 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
291 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
292 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
293 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
294 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
295 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
296 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
297 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
298 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
299 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
300 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
301 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
302 };
303
304 /* Logs of 2 in the RAID-Z Galois field. */
305 const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256))) = {
306 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
307 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
308 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
309 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
310 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
311 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
312 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
313 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
314 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
315 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
316 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
317 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
318 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
319 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
320 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
321 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
322 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
323 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
324 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
325 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
326 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
327 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
328 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
329 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
330 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
331 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
332 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
333 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
334 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
335 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
336 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
337 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
338 };
339