xref: /freebsd/sys/contrib/openzfs/module/zfs/vdev_raidz_math_impl.h (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
24  */
25 
26 #ifndef _VDEV_RAIDZ_MATH_IMPL_H
27 #define	_VDEV_RAIDZ_MATH_IMPL_H
28 
29 #include <sys/types.h>
30 #include <sys/vdev_raidz_impl.h>
31 
32 #define	raidz_inline inline __attribute__((always_inline))
33 #ifndef noinline
34 #define	noinline __attribute__((noinline))
35 #endif
36 
37 /*
38  * Functions calculate multiplication constants for data reconstruction.
39  * Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and
40  * used parity columns for reconstruction.
41  * @rr			RAIDZ row
42  * @tgtidx		array of missing data indexes
43  * @coeff		output array of coefficients. Array must be provided by
44  *         		user and must hold minimum MUL_CNT values.
45  */
46 static noinline void
raidz_rec_q_coeff(const raidz_row_t * rr,const int * tgtidx,unsigned * coeff)47 raidz_rec_q_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
48 {
49 	const unsigned ncols = rr->rr_cols;
50 	const unsigned x = tgtidx[TARGET_X];
51 
52 	coeff[MUL_Q_X] = gf_exp2(255 - (ncols - x - 1));
53 }
54 
55 static noinline void
raidz_rec_r_coeff(const raidz_row_t * rr,const int * tgtidx,unsigned * coeff)56 raidz_rec_r_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
57 {
58 	const unsigned ncols = rr->rr_cols;
59 	const unsigned x = tgtidx[TARGET_X];
60 
61 	coeff[MUL_R_X] = gf_exp4(255 - (ncols - x - 1));
62 }
63 
64 static noinline void
raidz_rec_pq_coeff(const raidz_row_t * rr,const int * tgtidx,unsigned * coeff)65 raidz_rec_pq_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
66 {
67 	const unsigned ncols = rr->rr_cols;
68 	const unsigned x = tgtidx[TARGET_X];
69 	const unsigned y = tgtidx[TARGET_Y];
70 	gf_t a, b, e;
71 
72 	a = gf_exp2(x + 255 - y);
73 	b = gf_exp2(255 - (ncols - x - 1));
74 	e = a ^ 0x01;
75 
76 	coeff[MUL_PQ_X] = gf_div(a, e);
77 	coeff[MUL_PQ_Y] = gf_div(b, e);
78 }
79 
80 static noinline void
raidz_rec_pr_coeff(const raidz_row_t * rr,const int * tgtidx,unsigned * coeff)81 raidz_rec_pr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
82 {
83 	const unsigned ncols = rr->rr_cols;
84 	const unsigned x = tgtidx[TARGET_X];
85 	const unsigned y = tgtidx[TARGET_Y];
86 
87 	gf_t a, b, e;
88 
89 	a = gf_exp4(x + 255 - y);
90 	b = gf_exp4(255 - (ncols - x - 1));
91 	e = a ^ 0x01;
92 
93 	coeff[MUL_PR_X] = gf_div(a, e);
94 	coeff[MUL_PR_Y] = gf_div(b, e);
95 }
96 
97 static noinline void
raidz_rec_qr_coeff(const raidz_row_t * rr,const int * tgtidx,unsigned * coeff)98 raidz_rec_qr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
99 {
100 	const unsigned ncols = rr->rr_cols;
101 	const unsigned x = tgtidx[TARGET_X];
102 	const unsigned y = tgtidx[TARGET_Y];
103 
104 	gf_t nx, ny, nxxy, nxyy, d;
105 
106 	nx = gf_exp2(ncols - x - 1);
107 	ny = gf_exp2(ncols - y - 1);
108 	nxxy = gf_mul(gf_mul(nx, nx), ny);
109 	nxyy = gf_mul(gf_mul(nx, ny), ny);
110 	d = nxxy ^ nxyy;
111 
112 	coeff[MUL_QR_XQ] = ny;
113 	coeff[MUL_QR_X]	= gf_div(ny, d);
114 	coeff[MUL_QR_YQ] = nx;
115 	coeff[MUL_QR_Y]	= gf_div(nx, d);
116 }
117 
118 static noinline void
raidz_rec_pqr_coeff(const raidz_row_t * rr,const int * tgtidx,unsigned * coeff)119 raidz_rec_pqr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
120 {
121 	const unsigned ncols = rr->rr_cols;
122 	const unsigned x = tgtidx[TARGET_X];
123 	const unsigned y = tgtidx[TARGET_Y];
124 	const unsigned z = tgtidx[TARGET_Z];
125 
126 	gf_t nx, ny, nz, nxx, nyy, nzz, nyyz, nyzz, xd, yd;
127 
128 	nx = gf_exp2(ncols - x - 1);
129 	ny = gf_exp2(ncols - y - 1);
130 	nz = gf_exp2(ncols - z - 1);
131 
132 	nxx = gf_exp4(ncols - x - 1);
133 	nyy = gf_exp4(ncols - y - 1);
134 	nzz = gf_exp4(ncols - z - 1);
135 
136 	nyyz = gf_mul(gf_mul(ny, nz), ny);
137 	nyzz = gf_mul(nzz, ny);
138 
139 	xd = gf_mul(nxx, ny) ^ gf_mul(nx, nyy) ^ nyyz ^
140 	    gf_mul(nxx, nz) ^ gf_mul(nzz, nx) ^  nyzz;
141 
142 	yd = gf_inv(ny ^ nz);
143 
144 	coeff[MUL_PQR_XP] = gf_div(nyyz ^ nyzz, xd);
145 	coeff[MUL_PQR_XQ] = gf_div(nyy ^ nzz, xd);
146 	coeff[MUL_PQR_XR] = gf_div(ny ^ nz, xd);
147 	coeff[MUL_PQR_YU] = nx;
148 	coeff[MUL_PQR_YP] = gf_mul(nz, yd);
149 	coeff[MUL_PQR_YQ] = yd;
150 }
151 
152 /*
153  * Method for zeroing a buffer (can be implemented using SIMD).
154  * This method is used by multiple for gen/rec functions.
155  *
156  * @dc		Destination buffer
157  * @dsize	Destination buffer size
158  * @private	Unused
159  */
160 static int
raidz_zero_abd_cb(void * dc,size_t dsize,void * private)161 raidz_zero_abd_cb(void *dc, size_t dsize, void *private)
162 {
163 	v_t *dst = (v_t *)dc;
164 	size_t i;
165 
166 	ZERO_DEFINE();
167 
168 	(void) private; /* unused */
169 
170 	ZERO(ZERO_D);
171 
172 	for (i = 0; i < dsize / sizeof (v_t); i += (2 * ZERO_STRIDE)) {
173 		STORE(dst + i, ZERO_D);
174 		STORE(dst + i + ZERO_STRIDE, ZERO_D);
175 	}
176 
177 	return (0);
178 }
179 
180 #define	raidz_zero(dabd, size)						\
181 {									\
182 	abd_iterate_func(dabd, 0, size, raidz_zero_abd_cb, NULL);	\
183 }
184 
185 /*
186  * Method for copying two buffers (can be implemented using SIMD).
187  * This method is used by multiple for gen/rec functions.
188  *
189  * @dc		Destination buffer
190  * @sc		Source buffer
191  * @dsize	Destination buffer size
192  * @ssize	Source buffer size
193  * @private	Unused
194  */
195 static int
raidz_copy_abd_cb(void * dc,void * sc,size_t size,void * private)196 raidz_copy_abd_cb(void *dc, void *sc, size_t size, void *private)
197 {
198 	v_t *dst = (v_t *)dc;
199 	const v_t *src = (v_t *)sc;
200 	size_t i;
201 
202 	COPY_DEFINE();
203 
204 	(void) private; /* unused */
205 
206 	for (i = 0; i < size / sizeof (v_t); i += (2 * COPY_STRIDE)) {
207 		LOAD(src + i, COPY_D);
208 		STORE(dst + i, COPY_D);
209 
210 		LOAD(src + i + COPY_STRIDE, COPY_D);
211 		STORE(dst + i + COPY_STRIDE, COPY_D);
212 	}
213 
214 	return (0);
215 }
216 
217 
218 #define	raidz_copy(dabd, sabd, off, size)				\
219 {									\
220 	abd_iterate_func2(dabd, sabd, off, off, size, raidz_copy_abd_cb, \
221 	    NULL);							\
222 }
223 
224 /*
225  * Method for adding (XORing) two buffers.
226  * Source and destination are XORed together and result is stored in
227  * destination buffer. This method is used by multiple for gen/rec functions.
228  *
229  * @dc		Destination buffer
230  * @sc		Source buffer
231  * @dsize	Destination buffer size
232  * @ssize	Source buffer size
233  * @private	Unused
234  */
235 static int
raidz_add_abd_cb(void * dc,void * sc,size_t size,void * private)236 raidz_add_abd_cb(void *dc, void *sc, size_t size, void *private)
237 {
238 	v_t *dst = (v_t *)dc;
239 	const v_t *src = (v_t *)sc;
240 	size_t i;
241 
242 	ADD_DEFINE();
243 
244 	(void) private; /* unused */
245 
246 	for (i = 0; i < size / sizeof (v_t); i += (2 * ADD_STRIDE)) {
247 		LOAD(dst + i, ADD_D);
248 		XOR_ACC(src + i, ADD_D);
249 		STORE(dst + i, ADD_D);
250 
251 		LOAD(dst + i + ADD_STRIDE, ADD_D);
252 		XOR_ACC(src + i + ADD_STRIDE, ADD_D);
253 		STORE(dst + i + ADD_STRIDE, ADD_D);
254 	}
255 
256 	return (0);
257 }
258 
259 #define	raidz_add(dabd, sabd, off, size)				\
260 {									\
261 	abd_iterate_func2(dabd, sabd, off, off, size, raidz_add_abd_cb, \
262 	    NULL);							\
263 }
264 
265 /*
266  * Method for multiplying a buffer with a constant in GF(2^8).
267  * Symbols from buffer are multiplied by a constant and result is stored
268  * back in the same buffer.
269  *
270  * @dc		In/Out data buffer.
271  * @size	Size of the buffer
272  * @private	pointer to the multiplication constant (unsigned)
273  */
274 static int
raidz_mul_abd_cb(void * dc,size_t size,void * private)275 raidz_mul_abd_cb(void *dc, size_t size, void *private)
276 {
277 	const unsigned mul = *((unsigned *)private);
278 	v_t *d = (v_t *)dc;
279 	size_t i;
280 
281 	MUL_DEFINE();
282 
283 	for (i = 0; i < size / sizeof (v_t); i += (2 * MUL_STRIDE)) {
284 		LOAD(d + i, MUL_D);
285 		MUL(mul, MUL_D);
286 		STORE(d + i, MUL_D);
287 
288 		LOAD(d + i + MUL_STRIDE, MUL_D);
289 		MUL(mul, MUL_D);
290 		STORE(d + i + MUL_STRIDE, MUL_D);
291 	}
292 
293 	return (0);
294 }
295 
296 
297 /*
298  * Syndrome generation/update macros
299  *
300  * Require LOAD(), XOR(), STORE(), MUL2(), and MUL4() macros
301  */
302 #define	P_D_SYNDROME(D, T, t)		\
303 {					\
304 	LOAD((t), T);			\
305 	XOR(D, T);			\
306 	STORE((t), T);			\
307 }
308 
309 #define	Q_D_SYNDROME(D, T, t)		\
310 {					\
311 	LOAD((t), T);			\
312 	MUL2(T);			\
313 	XOR(D, T);			\
314 	STORE((t), T);			\
315 }
316 
317 #define	Q_SYNDROME(T, t)		\
318 {					\
319 	LOAD((t), T);			\
320 	MUL2(T);			\
321 	STORE((t), T);			\
322 }
323 
324 #define	R_D_SYNDROME(D, T, t)		\
325 {					\
326 	LOAD((t), T);			\
327 	MUL4(T);			\
328 	XOR(D, T);			\
329 	STORE((t), T);			\
330 }
331 
332 #define	R_SYNDROME(T, t)		\
333 {					\
334 	LOAD((t), T);			\
335 	MUL4(T);			\
336 	STORE((t), T);			\
337 }
338 
339 
340 /*
341  * PARITY CALCULATION
342  *
343  * Macros *_SYNDROME are used for parity/syndrome calculation.
344  * *_D_SYNDROME() macros are used to calculate syndrome between 0 and
345  * length of data column, and *_SYNDROME() macros are only for updating
346  * the parity/syndrome if data column is shorter.
347  *
348  * P parity is calculated using raidz_add_abd().
349  *
350  * For CPU L2 cache blocking we process 64KB at a time.
351  */
352 #define	CHUNK		65536
353 
354 /*
355  * Generate P parity (RAIDZ1)
356  *
357  * @rr	RAIDZ row
358  */
359 static raidz_inline void
raidz_generate_p_impl(raidz_row_t * const rr)360 raidz_generate_p_impl(raidz_row_t * const rr)
361 {
362 	size_t c;
363 	const size_t ncols = rr->rr_cols;
364 	const size_t psize = rr->rr_col[CODE_P].rc_size;
365 	abd_t *pabd = rr->rr_col[CODE_P].rc_abd;
366 	size_t off, size;
367 
368 	raidz_math_begin();
369 
370 	for (off = 0; off < psize; off += CHUNK) {
371 
372 		/* start with first data column */
373 		size = MIN(CHUNK, psize - off);
374 		raidz_copy(pabd, rr->rr_col[1].rc_abd, off, size);
375 
376 		for (c = 2; c < ncols; c++) {
377 			size = rr->rr_col[c].rc_size;
378 			if (size <= off)
379 				continue;
380 
381 			/* add data column */
382 			size = MIN(CHUNK, size - off);
383 			abd_t *dabd = rr->rr_col[c].rc_abd;
384 			raidz_add(pabd, dabd, off, size);
385 		}
386 	}
387 
388 	raidz_math_end();
389 }
390 
391 
392 /*
393  * Generate PQ parity (RAIDZ2)
394  * The function is called per data column.
395  *
396  * @c		array of pointers to parity (code) columns
397  * @dc		pointer to data column
398  * @csize	size of parity columns
399  * @dsize	size of data column
400  */
401 static void
raidz_gen_pq_add(void ** c,const void * dc,const size_t csize,const size_t dsize)402 raidz_gen_pq_add(void **c, const void *dc, const size_t csize,
403     const size_t dsize)
404 {
405 	v_t *p = (v_t *)c[0];
406 	v_t *q = (v_t *)c[1];
407 	const v_t *d = (const v_t *)dc;
408 	const v_t * const dend = d + (dsize / sizeof (v_t));
409 	const v_t * const qend = q + (csize / sizeof (v_t));
410 
411 	GEN_PQ_DEFINE();
412 
413 	MUL2_SETUP();
414 
415 	for (; d < dend; d += GEN_PQ_STRIDE, p += GEN_PQ_STRIDE,
416 	    q += GEN_PQ_STRIDE) {
417 		LOAD(d, GEN_PQ_D);
418 		P_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, p);
419 		Q_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, q);
420 	}
421 	for (; q < qend; q += GEN_PQ_STRIDE) {
422 		Q_SYNDROME(GEN_PQ_C, q);
423 	}
424 }
425 
426 
427 /*
428  * Generate PQ parity (RAIDZ2)
429  *
430  * @rr	RAIDZ row
431  */
432 static raidz_inline void
raidz_generate_pq_impl(raidz_row_t * const rr)433 raidz_generate_pq_impl(raidz_row_t * const rr)
434 {
435 	size_t c;
436 	const size_t ncols = rr->rr_cols;
437 	const size_t csize = rr->rr_col[CODE_P].rc_size;
438 	size_t off, size, dsize;
439 	abd_t *dabd;
440 	abd_t *cabds[] = {
441 		rr->rr_col[CODE_P].rc_abd,
442 		rr->rr_col[CODE_Q].rc_abd
443 	};
444 
445 	raidz_math_begin();
446 
447 	for (off = 0; off < csize; off += CHUNK) {
448 
449 		size = MIN(CHUNK, csize - off);
450 		raidz_copy(cabds[CODE_P], rr->rr_col[2].rc_abd, off, size);
451 		raidz_copy(cabds[CODE_Q], rr->rr_col[2].rc_abd, off, size);
452 
453 		for (c = 3; c < ncols; c++) {
454 			dabd = rr->rr_col[c].rc_abd;
455 			dsize = rr->rr_col[c].rc_size;
456 			dsize = (dsize > off) ? MIN(CHUNK, dsize - off) : 0;
457 
458 			abd_raidz_gen_iterate(cabds, dabd, off, size, dsize, 2,
459 			    raidz_gen_pq_add);
460 		}
461 	}
462 
463 	raidz_math_end();
464 }
465 
466 
467 /*
468  * Generate PQR parity (RAIDZ3)
469  * The function is called per data column.
470  *
471  * @c		array of pointers to parity (code) columns
472  * @dc		pointer to data column
473  * @csize	size of parity columns
474  * @dsize	size of data column
475  */
476 static void
raidz_gen_pqr_add(void ** c,const void * dc,const size_t csize,const size_t dsize)477 raidz_gen_pqr_add(void **c, const void *dc, const size_t csize,
478     const size_t dsize)
479 {
480 	v_t *p = (v_t *)c[CODE_P];
481 	v_t *q = (v_t *)c[CODE_Q];
482 	v_t *r = (v_t *)c[CODE_R];
483 	const v_t *d = (const v_t *)dc;
484 	const v_t * const dend = d + (dsize / sizeof (v_t));
485 	const v_t * const qend = q + (csize / sizeof (v_t));
486 
487 	GEN_PQR_DEFINE();
488 
489 	MUL2_SETUP();
490 
491 	for (; d < dend; d += GEN_PQR_STRIDE, p += GEN_PQR_STRIDE,
492 	    q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
493 		LOAD(d, GEN_PQR_D);
494 		P_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, p);
495 		Q_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, q);
496 		R_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, r);
497 	}
498 	for (; q < qend; q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
499 		Q_SYNDROME(GEN_PQR_C, q);
500 		R_SYNDROME(GEN_PQR_C, r);
501 	}
502 }
503 
504 
505 /*
506  * Generate PQR parity (RAIDZ3)
507  *
508  * @rr	RAIDZ row
509  */
510 static raidz_inline void
raidz_generate_pqr_impl(raidz_row_t * const rr)511 raidz_generate_pqr_impl(raidz_row_t * const rr)
512 {
513 	size_t c;
514 	const size_t ncols = rr->rr_cols;
515 	const size_t csize = rr->rr_col[CODE_P].rc_size;
516 	size_t off, size, dsize;
517 	abd_t *dabd;
518 	abd_t *cabds[] = {
519 		rr->rr_col[CODE_P].rc_abd,
520 		rr->rr_col[CODE_Q].rc_abd,
521 		rr->rr_col[CODE_R].rc_abd
522 	};
523 
524 	raidz_math_begin();
525 
526 	for (off = 0; off < csize; off += CHUNK) {
527 
528 		size = MIN(CHUNK, csize - off);
529 		raidz_copy(cabds[CODE_P], rr->rr_col[3].rc_abd, off, size);
530 		raidz_copy(cabds[CODE_Q], rr->rr_col[3].rc_abd, off, size);
531 		raidz_copy(cabds[CODE_R], rr->rr_col[3].rc_abd, off, size);
532 
533 		for (c = 4; c < ncols; c++) {
534 			dabd = rr->rr_col[c].rc_abd;
535 			dsize = rr->rr_col[c].rc_size;
536 			dsize = (dsize > off) ? MIN(CHUNK, dsize - off) : 0;
537 
538 			abd_raidz_gen_iterate(cabds, dabd, off, size, dsize, 3,
539 			    raidz_gen_pqr_add);
540 		}
541 	}
542 
543 	raidz_math_end();
544 }
545 
546 
547 /*
548  * DATA RECONSTRUCTION
549  *
550  * Data reconstruction process consists of two phases:
551  * 	- Syndrome calculation
552  * 	- Data reconstruction
553  *
554  * Syndrome is calculated by generating parity using available data columns
555  * and zeros in places of erasure. Existing parity is added to corresponding
556  * syndrome value to obtain the [P|Q|R]syn values from equation:
557  * 	P = Psyn + Dx + Dy + Dz
558  * 	Q = Qsyn + 2^x * Dx + 2^y * Dy + 2^z * Dz
559  * 	R = Rsyn + 4^x * Dx + 4^y * Dy + 4^z * Dz
560  *
561  * For data reconstruction phase, the corresponding equations are solved
562  * for missing data (Dx, Dy, Dz). This generally involves multiplying known
563  * symbols by an coefficient and adding them together. The multiplication
564  * constant coefficients are calculated ahead of the operation in
565  * raidz_rec_[q|r|pq|pq|qr|pqr]_coeff() functions.
566  *
567  * IMPLEMENTATION NOTE: RAID-Z block can have complex geometry, with "big"
568  * and "short" columns.
569  * For this reason, reconstruction is performed in minimum of
570  * two steps. First, from offset 0 to short_size, then from short_size to
571  * short_size. Calculation functions REC_[*]_BLOCK() are implemented to work
572  * over both ranges. The split also enables removal of conditional expressions
573  * from loop bodies, improving throughput of SIMD implementations.
574  * For the best performance, all functions marked with raidz_inline attribute
575  * must be inlined by compiler.
576  *
577  *    parity          data
578  *    columns         columns
579  * <----------> <------------------>
580  *                   x       y  <----+ missing columns (x, y)
581  *                   |       |
582  * +---+---+---+---+-v-+---+-v-+---+   ^ 0
583  * |   |   |   |   |   |   |   |   |   |
584  * |   |   |   |   |   |   |   |   |   |
585  * | P | Q | R | D | D | D | D | D |   |
586  * |   |   |   | 0 | 1 | 2 | 3 | 4 |   |
587  * |   |   |   |   |   |   |   |   |   v
588  * |   |   |   |   |   +---+---+---+   ^ short_size
589  * |   |   |   |   |   |               |
590  * +---+---+---+---+---+               v big_size
591  * <------------------> <---------->
592  *      big columns     short columns
593  *
594  */
595 
596 
597 
598 
599 /*
600  * Reconstruct single data column using P parity
601  *
602  * @syn_method	raidz_add_abd()
603  * @rec_method	not applicable
604  *
605  * @rr		RAIDZ row
606  * @tgtidx	array of missing data indexes
607  */
608 static raidz_inline int
raidz_reconstruct_p_impl(raidz_row_t * rr,const int * tgtidx)609 raidz_reconstruct_p_impl(raidz_row_t *rr, const int *tgtidx)
610 {
611 	size_t c;
612 	const size_t firstdc = rr->rr_firstdatacol;
613 	const size_t ncols = rr->rr_cols;
614 	const size_t x = tgtidx[TARGET_X];
615 	const size_t xsize = rr->rr_col[x].rc_size;
616 	abd_t *xabd = rr->rr_col[x].rc_abd;
617 	size_t off, size;
618 
619 	if (xabd == NULL)
620 		return (1 << CODE_P);
621 
622 	raidz_math_begin();
623 
624 	for (off = 0; off < xsize; off += CHUNK) {
625 
626 		/* copy P into target */
627 		size = MIN(CHUNK, xsize - off);
628 		raidz_copy(xabd, rr->rr_col[CODE_P].rc_abd, off, size);
629 
630 		/* generate p_syndrome */
631 		for (c = firstdc; c < ncols; c++) {
632 			if (c == x)
633 				continue;
634 			size = rr->rr_col[c].rc_size;
635 			if (size <= off)
636 				continue;
637 
638 			size = MIN(CHUNK, MIN(size, xsize) - off);
639 			abd_t *dabd = rr->rr_col[c].rc_abd;
640 			raidz_add(xabd, dabd, off, size);
641 		}
642 	}
643 
644 	raidz_math_end();
645 
646 	return (1 << CODE_P);
647 }
648 
649 
650 /*
651  * Generate Q syndrome (Qsyn)
652  *
653  * @xc		array of pointers to syndrome columns
654  * @dc		data column (NULL if missing)
655  * @xsize	size of syndrome columns
656  * @dsize	size of data column (0 if missing)
657  */
658 static void
raidz_syn_q_abd(void ** xc,const void * dc,const size_t xsize,const size_t dsize)659 raidz_syn_q_abd(void **xc, const void *dc, const size_t xsize,
660     const size_t dsize)
661 {
662 	v_t *x = (v_t *)xc[TARGET_X];
663 	const v_t *d = (const v_t *)dc;
664 	const v_t * const dend = d + (dsize / sizeof (v_t));
665 	const v_t * const xend = x + (xsize / sizeof (v_t));
666 
667 	SYN_Q_DEFINE();
668 
669 	MUL2_SETUP();
670 
671 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
672 		LOAD(d, SYN_Q_D);
673 		Q_D_SYNDROME(SYN_Q_D, SYN_Q_X, x);
674 	}
675 	for (; x < xend; x += SYN_STRIDE) {
676 		Q_SYNDROME(SYN_Q_X, x);
677 	}
678 }
679 
680 
681 /*
682  * Reconstruct single data column using Q parity
683  *
684  * @syn_method	raidz_add_abd()
685  * @rec_method	raidz_mul_abd_cb()
686  *
687  * @rr		RAIDZ row
688  * @tgtidx	array of missing data indexes
689  */
690 static raidz_inline int
raidz_reconstruct_q_impl(raidz_row_t * rr,const int * tgtidx)691 raidz_reconstruct_q_impl(raidz_row_t *rr, const int *tgtidx)
692 {
693 	size_t c;
694 	size_t dsize;
695 	abd_t *dabd;
696 	const size_t firstdc = rr->rr_firstdatacol;
697 	const size_t ncols = rr->rr_cols;
698 	const size_t x = tgtidx[TARGET_X];
699 	abd_t *xabd = rr->rr_col[x].rc_abd;
700 	const size_t xsize = rr->rr_col[x].rc_size;
701 	abd_t *tabds[] = { xabd };
702 
703 	if (xabd == NULL)
704 		return (1 << CODE_Q);
705 
706 	unsigned coeff[MUL_CNT];
707 	raidz_rec_q_coeff(rr, tgtidx, coeff);
708 
709 	raidz_math_begin();
710 
711 	/* Start with first data column if present */
712 	if (firstdc != x) {
713 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
714 	} else {
715 		raidz_zero(xabd, xsize);
716 	}
717 
718 	/* generate q_syndrome */
719 	for (c = firstdc+1; c < ncols; c++) {
720 		if (c == x) {
721 			dabd = NULL;
722 			dsize = 0;
723 		} else {
724 			dabd = rr->rr_col[c].rc_abd;
725 			dsize = rr->rr_col[c].rc_size;
726 		}
727 
728 		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 1,
729 		    raidz_syn_q_abd);
730 	}
731 
732 	/* add Q to the syndrome */
733 	raidz_add(xabd, rr->rr_col[CODE_Q].rc_abd, 0, xsize);
734 
735 	/* transform the syndrome */
736 	abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void*) coeff);
737 
738 	raidz_math_end();
739 
740 	return (1 << CODE_Q);
741 }
742 
743 
744 /*
745  * Generate R syndrome (Rsyn)
746  *
747  * @xc		array of pointers to syndrome columns
748  * @dc		data column (NULL if missing)
749  * @tsize	size of syndrome columns
750  * @dsize	size of data column (0 if missing)
751  */
752 static void
raidz_syn_r_abd(void ** xc,const void * dc,const size_t tsize,const size_t dsize)753 raidz_syn_r_abd(void **xc, const void *dc, const size_t tsize,
754     const size_t dsize)
755 {
756 	v_t *x = (v_t *)xc[TARGET_X];
757 	const v_t *d = (const v_t *)dc;
758 	const v_t * const dend = d + (dsize / sizeof (v_t));
759 	const v_t * const xend = x + (tsize / sizeof (v_t));
760 
761 	SYN_R_DEFINE();
762 
763 	MUL2_SETUP();
764 
765 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
766 		LOAD(d, SYN_R_D);
767 		R_D_SYNDROME(SYN_R_D, SYN_R_X, x);
768 	}
769 	for (; x < xend; x += SYN_STRIDE) {
770 		R_SYNDROME(SYN_R_X, x);
771 	}
772 }
773 
774 
775 /*
776  * Reconstruct single data column using R parity
777  *
778  * @syn_method	raidz_add_abd()
779  * @rec_method	raidz_mul_abd_cb()
780  *
781  * @rr		RAIDZ rr
782  * @tgtidx	array of missing data indexes
783  */
784 static raidz_inline int
raidz_reconstruct_r_impl(raidz_row_t * rr,const int * tgtidx)785 raidz_reconstruct_r_impl(raidz_row_t *rr, const int *tgtidx)
786 {
787 	size_t c;
788 	size_t dsize;
789 	abd_t *dabd;
790 	const size_t firstdc = rr->rr_firstdatacol;
791 	const size_t ncols = rr->rr_cols;
792 	const size_t x = tgtidx[TARGET_X];
793 	const size_t xsize = rr->rr_col[x].rc_size;
794 	abd_t *xabd = rr->rr_col[x].rc_abd;
795 	abd_t *tabds[] = { xabd };
796 
797 	if (xabd == NULL)
798 		return (1 << CODE_R);
799 
800 	unsigned coeff[MUL_CNT];
801 	raidz_rec_r_coeff(rr, tgtidx, coeff);
802 
803 	raidz_math_begin();
804 
805 	/* Start with first data column if present */
806 	if (firstdc != x) {
807 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
808 	} else {
809 		raidz_zero(xabd, xsize);
810 	}
811 
812 
813 	/* generate q_syndrome */
814 	for (c = firstdc+1; c < ncols; c++) {
815 		if (c == x) {
816 			dabd = NULL;
817 			dsize = 0;
818 		} else {
819 			dabd = rr->rr_col[c].rc_abd;
820 			dsize = rr->rr_col[c].rc_size;
821 		}
822 
823 		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 1,
824 		    raidz_syn_r_abd);
825 	}
826 
827 	/* add R to the syndrome */
828 	raidz_add(xabd, rr->rr_col[CODE_R].rc_abd, 0, xsize);
829 
830 	/* transform the syndrome */
831 	abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void *)coeff);
832 
833 	raidz_math_end();
834 
835 	return (1 << CODE_R);
836 }
837 
838 
839 /*
840  * Generate P and Q syndromes
841  *
842  * @xc		array of pointers to syndrome columns
843  * @dc		data column (NULL if missing)
844  * @tsize	size of syndrome columns
845  * @dsize	size of data column (0 if missing)
846  */
847 static void
raidz_syn_pq_abd(void ** tc,const void * dc,const size_t tsize,const size_t dsize)848 raidz_syn_pq_abd(void **tc, const void *dc, const size_t tsize,
849     const size_t dsize)
850 {
851 	v_t *x = (v_t *)tc[TARGET_X];
852 	v_t *y = (v_t *)tc[TARGET_Y];
853 	const v_t *d = (const v_t *)dc;
854 	const v_t * const dend = d + (dsize / sizeof (v_t));
855 	const v_t * const yend = y + (tsize / sizeof (v_t));
856 
857 	SYN_PQ_DEFINE();
858 
859 	MUL2_SETUP();
860 
861 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
862 		LOAD(d, SYN_PQ_D);
863 		P_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, x);
864 		Q_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, y);
865 	}
866 	for (; y < yend; y += SYN_STRIDE) {
867 		Q_SYNDROME(SYN_PQ_X, y);
868 	}
869 }
870 
871 /*
872  * Reconstruct data using PQ parity and PQ syndromes
873  *
874  * @tc		syndrome/result columns
875  * @tsize	size of syndrome/result columns
876  * @c		parity columns
877  * @mul		array of multiplication constants
878  */
879 static void
raidz_rec_pq_abd(void ** tc,const size_t tsize,void ** c,const unsigned * mul)880 raidz_rec_pq_abd(void **tc, const size_t tsize, void **c,
881     const unsigned *mul)
882 {
883 	v_t *x = (v_t *)tc[TARGET_X];
884 	v_t *y = (v_t *)tc[TARGET_Y];
885 	const v_t * const xend = x + (tsize / sizeof (v_t));
886 	const v_t *p = (v_t *)c[CODE_P];
887 	const v_t *q = (v_t *)c[CODE_Q];
888 
889 	REC_PQ_DEFINE();
890 
891 	for (; x < xend; x += REC_PQ_STRIDE, y += REC_PQ_STRIDE,
892 	    p += REC_PQ_STRIDE, q += REC_PQ_STRIDE) {
893 		LOAD(x, REC_PQ_X);
894 		LOAD(y, REC_PQ_Y);
895 
896 		XOR_ACC(p, REC_PQ_X);
897 		XOR_ACC(q, REC_PQ_Y);
898 
899 		/* Save Pxy */
900 		COPY(REC_PQ_X,  REC_PQ_T);
901 
902 		/* Calc X */
903 		MUL(mul[MUL_PQ_X], REC_PQ_X);
904 		MUL(mul[MUL_PQ_Y], REC_PQ_Y);
905 		XOR(REC_PQ_Y,  REC_PQ_X);
906 		STORE(x, REC_PQ_X);
907 
908 		/* Calc Y */
909 		XOR(REC_PQ_T,  REC_PQ_X);
910 		STORE(y, REC_PQ_X);
911 	}
912 }
913 
914 
915 /*
916  * Reconstruct two data columns using PQ parity
917  *
918  * @syn_method	raidz_syn_pq_abd()
919  * @rec_method	raidz_rec_pq_abd()
920  *
921  * @rr		RAIDZ row
922  * @tgtidx	array of missing data indexes
923  */
924 static raidz_inline int
raidz_reconstruct_pq_impl(raidz_row_t * rr,const int * tgtidx)925 raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx)
926 {
927 	size_t c;
928 	size_t dsize;
929 	abd_t *dabd;
930 	const size_t firstdc = rr->rr_firstdatacol;
931 	const size_t ncols = rr->rr_cols;
932 	const size_t x = tgtidx[TARGET_X];
933 	const size_t y = tgtidx[TARGET_Y];
934 	const size_t xsize = rr->rr_col[x].rc_size;
935 	const size_t ysize = rr->rr_col[y].rc_size;
936 	abd_t *xabd = rr->rr_col[x].rc_abd;
937 	abd_t *yabd = rr->rr_col[y].rc_abd;
938 	abd_t *tabds[2] = { xabd, yabd };
939 	abd_t *cabds[] = {
940 		rr->rr_col[CODE_P].rc_abd,
941 		rr->rr_col[CODE_Q].rc_abd
942 	};
943 
944 	if (xabd == NULL)
945 		return ((1 << CODE_P) | (1 << CODE_Q));
946 
947 	unsigned coeff[MUL_CNT];
948 	raidz_rec_pq_coeff(rr, tgtidx, coeff);
949 
950 	/*
951 	 * Check if some of targets is shorter then others
952 	 * In this case, shorter target needs to be replaced with
953 	 * new buffer so that syndrome can be calculated.
954 	 */
955 	if (ysize < xsize) {
956 		yabd = abd_alloc(xsize, B_FALSE);
957 		tabds[1] = yabd;
958 	}
959 
960 	raidz_math_begin();
961 
962 	/* Start with first data column if present */
963 	if (firstdc != x) {
964 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
965 		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
966 	} else {
967 		raidz_zero(xabd, xsize);
968 		raidz_zero(yabd, xsize);
969 	}
970 
971 	/* generate q_syndrome */
972 	for (c = firstdc+1; c < ncols; c++) {
973 		if (c == x || c == y) {
974 			dabd = NULL;
975 			dsize = 0;
976 		} else {
977 			dabd = rr->rr_col[c].rc_abd;
978 			dsize = rr->rr_col[c].rc_size;
979 		}
980 
981 		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
982 		    raidz_syn_pq_abd);
983 	}
984 
985 	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pq_abd, coeff);
986 
987 	/* Copy shorter targets back to the original abd buffer */
988 	if (ysize < xsize)
989 		raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
990 
991 	raidz_math_end();
992 
993 	if (ysize < xsize)
994 		abd_free(yabd);
995 
996 	return ((1 << CODE_P) | (1 << CODE_Q));
997 }
998 
999 
1000 /*
1001  * Generate P and R syndromes
1002  *
1003  * @xc		array of pointers to syndrome columns
1004  * @dc		data column (NULL if missing)
1005  * @tsize	size of syndrome columns
1006  * @dsize	size of data column (0 if missing)
1007  */
1008 static void
raidz_syn_pr_abd(void ** c,const void * dc,const size_t tsize,const size_t dsize)1009 raidz_syn_pr_abd(void **c, const void *dc, const size_t tsize,
1010     const size_t dsize)
1011 {
1012 	v_t *x = (v_t *)c[TARGET_X];
1013 	v_t *y = (v_t *)c[TARGET_Y];
1014 	const v_t *d = (const v_t *)dc;
1015 	const v_t * const dend = d + (dsize / sizeof (v_t));
1016 	const v_t * const yend = y + (tsize / sizeof (v_t));
1017 
1018 	SYN_PR_DEFINE();
1019 
1020 	MUL2_SETUP();
1021 
1022 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
1023 		LOAD(d, SYN_PR_D);
1024 		P_D_SYNDROME(SYN_PR_D, SYN_PR_X, x);
1025 		R_D_SYNDROME(SYN_PR_D, SYN_PR_X, y);
1026 	}
1027 	for (; y < yend; y += SYN_STRIDE) {
1028 		R_SYNDROME(SYN_PR_X, y);
1029 	}
1030 }
1031 
1032 /*
1033  * Reconstruct data using PR parity and PR syndromes
1034  *
1035  * @tc		syndrome/result columns
1036  * @tsize	size of syndrome/result columns
1037  * @c		parity columns
1038  * @mul		array of multiplication constants
1039  */
1040 static void
raidz_rec_pr_abd(void ** t,const size_t tsize,void ** c,const unsigned * mul)1041 raidz_rec_pr_abd(void **t, const size_t tsize, void **c,
1042     const unsigned *mul)
1043 {
1044 	v_t *x = (v_t *)t[TARGET_X];
1045 	v_t *y = (v_t *)t[TARGET_Y];
1046 	const v_t * const xend = x + (tsize / sizeof (v_t));
1047 	const v_t *p = (v_t *)c[CODE_P];
1048 	const v_t *q = (v_t *)c[CODE_Q];
1049 
1050 	REC_PR_DEFINE();
1051 
1052 	for (; x < xend; x += REC_PR_STRIDE, y += REC_PR_STRIDE,
1053 	    p += REC_PR_STRIDE, q += REC_PR_STRIDE) {
1054 		LOAD(x, REC_PR_X);
1055 		LOAD(y, REC_PR_Y);
1056 		XOR_ACC(p, REC_PR_X);
1057 		XOR_ACC(q, REC_PR_Y);
1058 
1059 		/* Save Pxy */
1060 		COPY(REC_PR_X,  REC_PR_T);
1061 
1062 		/* Calc X */
1063 		MUL(mul[MUL_PR_X], REC_PR_X);
1064 		MUL(mul[MUL_PR_Y], REC_PR_Y);
1065 		XOR(REC_PR_Y,  REC_PR_X);
1066 		STORE(x, REC_PR_X);
1067 
1068 		/* Calc Y */
1069 		XOR(REC_PR_T,  REC_PR_X);
1070 		STORE(y, REC_PR_X);
1071 	}
1072 }
1073 
1074 
1075 /*
1076  * Reconstruct two data columns using PR parity
1077  *
1078  * @syn_method	raidz_syn_pr_abd()
1079  * @rec_method	raidz_rec_pr_abd()
1080  *
1081  * @rr		RAIDZ row
1082  * @tgtidx	array of missing data indexes
1083  */
1084 static raidz_inline int
raidz_reconstruct_pr_impl(raidz_row_t * rr,const int * tgtidx)1085 raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
1086 {
1087 	size_t c;
1088 	size_t dsize;
1089 	abd_t *dabd;
1090 	const size_t firstdc = rr->rr_firstdatacol;
1091 	const size_t ncols = rr->rr_cols;
1092 	const size_t x = tgtidx[0];
1093 	const size_t y = tgtidx[1];
1094 	const size_t xsize = rr->rr_col[x].rc_size;
1095 	const size_t ysize = rr->rr_col[y].rc_size;
1096 	abd_t *xabd = rr->rr_col[x].rc_abd;
1097 	abd_t *yabd = rr->rr_col[y].rc_abd;
1098 	abd_t *tabds[2] = { xabd, yabd };
1099 	abd_t *cabds[] = {
1100 		rr->rr_col[CODE_P].rc_abd,
1101 		rr->rr_col[CODE_R].rc_abd
1102 	};
1103 
1104 	if (xabd == NULL)
1105 		return ((1 << CODE_P) | (1 << CODE_R));
1106 
1107 	unsigned coeff[MUL_CNT];
1108 	raidz_rec_pr_coeff(rr, tgtidx, coeff);
1109 
1110 	/*
1111 	 * Check if some of targets are shorter then others.
1112 	 * They need to be replaced with a new buffer so that syndrome can
1113 	 * be calculated on full length.
1114 	 */
1115 	if (ysize < xsize) {
1116 		yabd = abd_alloc(xsize, B_FALSE);
1117 		tabds[1] = yabd;
1118 	}
1119 
1120 	raidz_math_begin();
1121 
1122 	/* Start with first data column if present */
1123 	if (firstdc != x) {
1124 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1125 		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1126 	} else {
1127 		raidz_zero(xabd, xsize);
1128 		raidz_zero(yabd, xsize);
1129 	}
1130 
1131 	/* generate q_syndrome */
1132 	for (c = firstdc+1; c < ncols; c++) {
1133 		if (c == x || c == y) {
1134 			dabd = NULL;
1135 			dsize = 0;
1136 		} else {
1137 			dabd = rr->rr_col[c].rc_abd;
1138 			dsize = rr->rr_col[c].rc_size;
1139 		}
1140 
1141 		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
1142 		    raidz_syn_pr_abd);
1143 	}
1144 
1145 	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pr_abd, coeff);
1146 
1147 	/*
1148 	 * Copy shorter targets back to the original abd buffer
1149 	 */
1150 	if (ysize < xsize)
1151 		raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
1152 
1153 	raidz_math_end();
1154 
1155 	if (ysize < xsize)
1156 		abd_free(yabd);
1157 
1158 	return ((1 << CODE_P) | (1 << CODE_R));
1159 }
1160 
1161 
1162 /*
1163  * Generate Q and R syndromes
1164  *
1165  * @xc		array of pointers to syndrome columns
1166  * @dc		data column (NULL if missing)
1167  * @tsize	size of syndrome columns
1168  * @dsize	size of data column (0 if missing)
1169  */
1170 static void
raidz_syn_qr_abd(void ** c,const void * dc,const size_t tsize,const size_t dsize)1171 raidz_syn_qr_abd(void **c, const void *dc, const size_t tsize,
1172     const size_t dsize)
1173 {
1174 	v_t *x = (v_t *)c[TARGET_X];
1175 	v_t *y = (v_t *)c[TARGET_Y];
1176 	const v_t * const xend = x + (tsize / sizeof (v_t));
1177 	const v_t *d = (const v_t *)dc;
1178 	const v_t * const dend = d + (dsize / sizeof (v_t));
1179 
1180 	SYN_QR_DEFINE();
1181 
1182 	MUL2_SETUP();
1183 
1184 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
1185 		LOAD(d, SYN_PQ_D);
1186 		Q_D_SYNDROME(SYN_QR_D, SYN_QR_X, x);
1187 		R_D_SYNDROME(SYN_QR_D, SYN_QR_X, y);
1188 	}
1189 	for (; x < xend; x += SYN_STRIDE, y += SYN_STRIDE) {
1190 		Q_SYNDROME(SYN_QR_X, x);
1191 		R_SYNDROME(SYN_QR_X, y);
1192 	}
1193 }
1194 
1195 
1196 /*
1197  * Reconstruct data using QR parity and QR syndromes
1198  *
1199  * @tc		syndrome/result columns
1200  * @tsize	size of syndrome/result columns
1201  * @c		parity columns
1202  * @mul		array of multiplication constants
1203  */
1204 static void
raidz_rec_qr_abd(void ** t,const size_t tsize,void ** c,const unsigned * mul)1205 raidz_rec_qr_abd(void **t, const size_t tsize, void **c,
1206     const unsigned *mul)
1207 {
1208 	v_t *x = (v_t *)t[TARGET_X];
1209 	v_t *y = (v_t *)t[TARGET_Y];
1210 	const v_t * const xend = x + (tsize / sizeof (v_t));
1211 	const v_t *p = (v_t *)c[CODE_P];
1212 	const v_t *q = (v_t *)c[CODE_Q];
1213 
1214 	REC_QR_DEFINE();
1215 
1216 	for (; x < xend; x += REC_QR_STRIDE, y += REC_QR_STRIDE,
1217 	    p += REC_QR_STRIDE, q += REC_QR_STRIDE) {
1218 		LOAD(x, REC_QR_X);
1219 		LOAD(y, REC_QR_Y);
1220 
1221 		XOR_ACC(p, REC_QR_X);
1222 		XOR_ACC(q, REC_QR_Y);
1223 
1224 		/* Save Pxy */
1225 		COPY(REC_QR_X,  REC_QR_T);
1226 
1227 		/* Calc X */
1228 		MUL(mul[MUL_QR_XQ], REC_QR_X);	/* X = Q * xqm */
1229 		XOR(REC_QR_Y, REC_QR_X);	/* X = R ^ X   */
1230 		MUL(mul[MUL_QR_X], REC_QR_X);	/* X = X * xm  */
1231 		STORE(x, REC_QR_X);
1232 
1233 		/* Calc Y */
1234 		MUL(mul[MUL_QR_YQ], REC_QR_T);	/* X = Q * xqm */
1235 		XOR(REC_QR_Y, REC_QR_T);	/* X = R ^ X   */
1236 		MUL(mul[MUL_QR_Y], REC_QR_T);	/* X = X * xm  */
1237 		STORE(y, REC_QR_T);
1238 	}
1239 }
1240 
1241 
1242 /*
1243  * Reconstruct two data columns using QR parity
1244  *
1245  * @syn_method	raidz_syn_qr_abd()
1246  * @rec_method	raidz_rec_qr_abd()
1247  *
1248  * @rr		RAIDZ row
1249  * @tgtidx	array of missing data indexes
1250  */
1251 static raidz_inline int
raidz_reconstruct_qr_impl(raidz_row_t * rr,const int * tgtidx)1252 raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
1253 {
1254 	size_t c;
1255 	size_t dsize;
1256 	abd_t *dabd;
1257 	const size_t firstdc = rr->rr_firstdatacol;
1258 	const size_t ncols = rr->rr_cols;
1259 	const size_t x = tgtidx[TARGET_X];
1260 	const size_t y = tgtidx[TARGET_Y];
1261 	const size_t xsize = rr->rr_col[x].rc_size;
1262 	const size_t ysize = rr->rr_col[y].rc_size;
1263 	abd_t *xabd = rr->rr_col[x].rc_abd;
1264 	abd_t *yabd = rr->rr_col[y].rc_abd;
1265 	abd_t *tabds[2] = { xabd, yabd };
1266 	abd_t *cabds[] = {
1267 		rr->rr_col[CODE_Q].rc_abd,
1268 		rr->rr_col[CODE_R].rc_abd
1269 	};
1270 
1271 	if (xabd == NULL)
1272 		return ((1 << CODE_Q) | (1 << CODE_R));
1273 
1274 	unsigned coeff[MUL_CNT];
1275 	raidz_rec_qr_coeff(rr, tgtidx, coeff);
1276 
1277 	/*
1278 	 * Check if some of targets is shorter then others
1279 	 * In this case, shorter target needs to be replaced with
1280 	 * new buffer so that syndrome can be calculated.
1281 	 */
1282 	if (ysize < xsize) {
1283 		yabd = abd_alloc(xsize, B_FALSE);
1284 		tabds[1] = yabd;
1285 	}
1286 
1287 	raidz_math_begin();
1288 
1289 	/* Start with first data column if present */
1290 	if (firstdc != x) {
1291 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1292 		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1293 	} else {
1294 		raidz_zero(xabd, xsize);
1295 		raidz_zero(yabd, xsize);
1296 	}
1297 
1298 	/* generate q_syndrome */
1299 	for (c = firstdc+1; c < ncols; c++) {
1300 		if (c == x || c == y) {
1301 			dabd = NULL;
1302 			dsize = 0;
1303 		} else {
1304 			dabd = rr->rr_col[c].rc_abd;
1305 			dsize = rr->rr_col[c].rc_size;
1306 		}
1307 
1308 		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
1309 		    raidz_syn_qr_abd);
1310 	}
1311 
1312 	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_qr_abd, coeff);
1313 
1314 	/*
1315 	 * Copy shorter targets back to the original abd buffer
1316 	 */
1317 	if (ysize < xsize)
1318 		raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
1319 
1320 	raidz_math_end();
1321 
1322 	if (ysize < xsize)
1323 		abd_free(yabd);
1324 
1325 
1326 	return ((1 << CODE_Q) | (1 << CODE_R));
1327 }
1328 
1329 
1330 /*
1331  * Generate P, Q, and R syndromes
1332  *
1333  * @xc		array of pointers to syndrome columns
1334  * @dc		data column (NULL if missing)
1335  * @tsize	size of syndrome columns
1336  * @dsize	size of data column (0 if missing)
1337  */
1338 static void
raidz_syn_pqr_abd(void ** c,const void * dc,const size_t tsize,const size_t dsize)1339 raidz_syn_pqr_abd(void **c, const void *dc, const size_t tsize,
1340     const size_t dsize)
1341 {
1342 	v_t *x = (v_t *)c[TARGET_X];
1343 	v_t *y = (v_t *)c[TARGET_Y];
1344 	v_t *z = (v_t *)c[TARGET_Z];
1345 	const v_t * const yend = y + (tsize / sizeof (v_t));
1346 	const v_t *d = (const v_t *)dc;
1347 	const v_t * const dend = d + (dsize / sizeof (v_t));
1348 
1349 	SYN_PQR_DEFINE();
1350 
1351 	MUL2_SETUP();
1352 
1353 	for (; d < dend;  d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE,
1354 	    z += SYN_STRIDE) {
1355 		LOAD(d, SYN_PQR_D);
1356 		P_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, x)
1357 		Q_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, y);
1358 		R_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, z);
1359 	}
1360 	for (; y < yend; y += SYN_STRIDE, z += SYN_STRIDE) {
1361 		Q_SYNDROME(SYN_PQR_X, y);
1362 		R_SYNDROME(SYN_PQR_X, z);
1363 	}
1364 }
1365 
1366 
1367 /*
1368  * Reconstruct data using PRQ parity and PQR syndromes
1369  *
1370  * @tc		syndrome/result columns
1371  * @tsize	size of syndrome/result columns
1372  * @c		parity columns
1373  * @mul		array of multiplication constants
1374  */
1375 static void
raidz_rec_pqr_abd(void ** t,const size_t tsize,void ** c,const unsigned * const mul)1376 raidz_rec_pqr_abd(void **t, const size_t tsize, void **c,
1377     const unsigned * const mul)
1378 {
1379 	v_t *x = (v_t *)t[TARGET_X];
1380 	v_t *y = (v_t *)t[TARGET_Y];
1381 	v_t *z = (v_t *)t[TARGET_Z];
1382 	const v_t * const xend = x + (tsize / sizeof (v_t));
1383 	const v_t *p = (v_t *)c[CODE_P];
1384 	const v_t *q = (v_t *)c[CODE_Q];
1385 	const v_t *r = (v_t *)c[CODE_R];
1386 
1387 	REC_PQR_DEFINE();
1388 
1389 	for (; x < xend; x += REC_PQR_STRIDE, y += REC_PQR_STRIDE,
1390 	    z += REC_PQR_STRIDE, p += REC_PQR_STRIDE, q += REC_PQR_STRIDE,
1391 	    r += REC_PQR_STRIDE) {
1392 		LOAD(x, REC_PQR_X);
1393 		LOAD(y, REC_PQR_Y);
1394 		LOAD(z, REC_PQR_Z);
1395 
1396 		XOR_ACC(p, REC_PQR_X);
1397 		XOR_ACC(q, REC_PQR_Y);
1398 		XOR_ACC(r, REC_PQR_Z);
1399 
1400 		/* Save Pxyz and Qxyz */
1401 		COPY(REC_PQR_X, REC_PQR_XS);
1402 		COPY(REC_PQR_Y, REC_PQR_YS);
1403 
1404 		/* Calc X */
1405 		MUL(mul[MUL_PQR_XP], REC_PQR_X);	/* Xp = Pxyz * xp   */
1406 		MUL(mul[MUL_PQR_XQ], REC_PQR_Y);	/* Xq = Qxyz * xq   */
1407 		XOR(REC_PQR_Y, REC_PQR_X);
1408 		MUL(mul[MUL_PQR_XR], REC_PQR_Z);	/* Xr = Rxyz * xr   */
1409 		XOR(REC_PQR_Z, REC_PQR_X);		/* X = Xp + Xq + Xr */
1410 		STORE(x, REC_PQR_X);
1411 
1412 		/* Calc Y */
1413 		XOR(REC_PQR_X, REC_PQR_XS); 		/* Pyz = Pxyz + X */
1414 		MUL(mul[MUL_PQR_YU], REC_PQR_X);  	/* Xq = X * upd_q */
1415 		XOR(REC_PQR_X, REC_PQR_YS); 		/* Qyz = Qxyz + Xq */
1416 		COPY(REC_PQR_XS, REC_PQR_X);		/* restore Pyz */
1417 		MUL(mul[MUL_PQR_YP], REC_PQR_X);	/* Yp = Pyz * yp */
1418 		MUL(mul[MUL_PQR_YQ], REC_PQR_YS);	/* Yq = Qyz * yq */
1419 		XOR(REC_PQR_X, REC_PQR_YS); 		/* Y = Yp + Yq */
1420 		STORE(y, REC_PQR_YS);
1421 
1422 		/* Calc Z */
1423 		XOR(REC_PQR_XS, REC_PQR_YS);		/* Z = Pz = Pyz + Y */
1424 		STORE(z, REC_PQR_YS);
1425 	}
1426 }
1427 
1428 
1429 /*
1430  * Reconstruct three data columns using PQR parity
1431  *
1432  * @syn_method	raidz_syn_pqr_abd()
1433  * @rec_method	raidz_rec_pqr_abd()
1434  *
1435  * @rr		RAIDZ row
1436  * @tgtidx	array of missing data indexes
1437  */
1438 static raidz_inline int
raidz_reconstruct_pqr_impl(raidz_row_t * rr,const int * tgtidx)1439 raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
1440 {
1441 	size_t c;
1442 	size_t dsize;
1443 	abd_t *dabd;
1444 	const size_t firstdc = rr->rr_firstdatacol;
1445 	const size_t ncols = rr->rr_cols;
1446 	const size_t x = tgtidx[TARGET_X];
1447 	const size_t y = tgtidx[TARGET_Y];
1448 	const size_t z = tgtidx[TARGET_Z];
1449 	const size_t xsize = rr->rr_col[x].rc_size;
1450 	const size_t ysize = rr->rr_col[y].rc_size;
1451 	const size_t zsize = rr->rr_col[z].rc_size;
1452 	abd_t *xabd = rr->rr_col[x].rc_abd;
1453 	abd_t *yabd = rr->rr_col[y].rc_abd;
1454 	abd_t *zabd = rr->rr_col[z].rc_abd;
1455 	abd_t *tabds[] = { xabd, yabd, zabd };
1456 	abd_t *cabds[] = {
1457 		rr->rr_col[CODE_P].rc_abd,
1458 		rr->rr_col[CODE_Q].rc_abd,
1459 		rr->rr_col[CODE_R].rc_abd
1460 	};
1461 
1462 	if (xabd == NULL)
1463 		return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
1464 
1465 	unsigned coeff[MUL_CNT];
1466 	raidz_rec_pqr_coeff(rr, tgtidx, coeff);
1467 
1468 	/*
1469 	 * Check if some of targets is shorter then others
1470 	 * In this case, shorter target needs to be replaced with
1471 	 * new buffer so that syndrome can be calculated.
1472 	 */
1473 	if (ysize < xsize) {
1474 		yabd = abd_alloc(xsize, B_FALSE);
1475 		tabds[1] = yabd;
1476 	}
1477 	if (zsize < xsize) {
1478 		zabd = abd_alloc(xsize, B_FALSE);
1479 		tabds[2] = zabd;
1480 	}
1481 
1482 	raidz_math_begin();
1483 
1484 	/* Start with first data column if present */
1485 	if (firstdc != x) {
1486 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1487 		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1488 		raidz_copy(zabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1489 	} else {
1490 		raidz_zero(xabd, xsize);
1491 		raidz_zero(yabd, xsize);
1492 		raidz_zero(zabd, xsize);
1493 	}
1494 
1495 	/* generate q_syndrome */
1496 	for (c = firstdc+1; c < ncols; c++) {
1497 		if (c == x || c == y || c == z) {
1498 			dabd = NULL;
1499 			dsize = 0;
1500 		} else {
1501 			dabd = rr->rr_col[c].rc_abd;
1502 			dsize = rr->rr_col[c].rc_size;
1503 		}
1504 
1505 		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 3,
1506 		    raidz_syn_pqr_abd);
1507 	}
1508 
1509 	abd_raidz_rec_iterate(cabds, tabds, xsize, 3, raidz_rec_pqr_abd, coeff);
1510 
1511 	/*
1512 	 * Copy shorter targets back to the original abd buffer
1513 	 */
1514 	if (ysize < xsize)
1515 		raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
1516 	if (zsize < xsize)
1517 		raidz_copy(rr->rr_col[z].rc_abd, zabd, 0, zsize);
1518 
1519 	raidz_math_end();
1520 
1521 	if (ysize < xsize)
1522 		abd_free(yabd);
1523 	if (zsize < xsize)
1524 		abd_free(zabd);
1525 
1526 	return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
1527 }
1528 
1529 #endif /* _VDEV_RAIDZ_MATH_IMPL_H */
1530