xref: /freebsd/sys/contrib/openzfs/module/zfs/vdev_raidz_math_impl.h (revision f5f40dd63bc7acbb5312b26ac1ea1103c12352a6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
23  */
24 
25 #ifndef _VDEV_RAIDZ_MATH_IMPL_H
26 #define	_VDEV_RAIDZ_MATH_IMPL_H
27 
28 #include <sys/types.h>
29 #include <sys/vdev_raidz_impl.h>
30 
31 #define	raidz_inline inline __attribute__((always_inline))
32 #ifndef noinline
33 #define	noinline __attribute__((noinline))
34 #endif
35 
36 /*
37  * Functions calculate multiplication constants for data reconstruction.
38  * Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and
39  * used parity columns for reconstruction.
40  * @rr			RAIDZ row
41  * @tgtidx		array of missing data indexes
42  * @coeff		output array of coefficients. Array must be provided by
43  *         		user and must hold minimum MUL_CNT values.
44  */
45 static noinline void
46 raidz_rec_q_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
47 {
48 	const unsigned ncols = rr->rr_cols;
49 	const unsigned x = tgtidx[TARGET_X];
50 
51 	coeff[MUL_Q_X] = gf_exp2(255 - (ncols - x - 1));
52 }
53 
54 static noinline void
55 raidz_rec_r_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
56 {
57 	const unsigned ncols = rr->rr_cols;
58 	const unsigned x = tgtidx[TARGET_X];
59 
60 	coeff[MUL_R_X] = gf_exp4(255 - (ncols - x - 1));
61 }
62 
63 static noinline void
64 raidz_rec_pq_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
65 {
66 	const unsigned ncols = rr->rr_cols;
67 	const unsigned x = tgtidx[TARGET_X];
68 	const unsigned y = tgtidx[TARGET_Y];
69 	gf_t a, b, e;
70 
71 	a = gf_exp2(x + 255 - y);
72 	b = gf_exp2(255 - (ncols - x - 1));
73 	e = a ^ 0x01;
74 
75 	coeff[MUL_PQ_X] = gf_div(a, e);
76 	coeff[MUL_PQ_Y] = gf_div(b, e);
77 }
78 
79 static noinline void
80 raidz_rec_pr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
81 {
82 	const unsigned ncols = rr->rr_cols;
83 	const unsigned x = tgtidx[TARGET_X];
84 	const unsigned y = tgtidx[TARGET_Y];
85 
86 	gf_t a, b, e;
87 
88 	a = gf_exp4(x + 255 - y);
89 	b = gf_exp4(255 - (ncols - x - 1));
90 	e = a ^ 0x01;
91 
92 	coeff[MUL_PR_X] = gf_div(a, e);
93 	coeff[MUL_PR_Y] = gf_div(b, e);
94 }
95 
96 static noinline void
97 raidz_rec_qr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
98 {
99 	const unsigned ncols = rr->rr_cols;
100 	const unsigned x = tgtidx[TARGET_X];
101 	const unsigned y = tgtidx[TARGET_Y];
102 
103 	gf_t nx, ny, nxxy, nxyy, d;
104 
105 	nx = gf_exp2(ncols - x - 1);
106 	ny = gf_exp2(ncols - y - 1);
107 	nxxy = gf_mul(gf_mul(nx, nx), ny);
108 	nxyy = gf_mul(gf_mul(nx, ny), ny);
109 	d = nxxy ^ nxyy;
110 
111 	coeff[MUL_QR_XQ] = ny;
112 	coeff[MUL_QR_X]	= gf_div(ny, d);
113 	coeff[MUL_QR_YQ] = nx;
114 	coeff[MUL_QR_Y]	= gf_div(nx, d);
115 }
116 
117 static noinline void
118 raidz_rec_pqr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
119 {
120 	const unsigned ncols = rr->rr_cols;
121 	const unsigned x = tgtidx[TARGET_X];
122 	const unsigned y = tgtidx[TARGET_Y];
123 	const unsigned z = tgtidx[TARGET_Z];
124 
125 	gf_t nx, ny, nz, nxx, nyy, nzz, nyyz, nyzz, xd, yd;
126 
127 	nx = gf_exp2(ncols - x - 1);
128 	ny = gf_exp2(ncols - y - 1);
129 	nz = gf_exp2(ncols - z - 1);
130 
131 	nxx = gf_exp4(ncols - x - 1);
132 	nyy = gf_exp4(ncols - y - 1);
133 	nzz = gf_exp4(ncols - z - 1);
134 
135 	nyyz = gf_mul(gf_mul(ny, nz), ny);
136 	nyzz = gf_mul(nzz, ny);
137 
138 	xd = gf_mul(nxx, ny) ^ gf_mul(nx, nyy) ^ nyyz ^
139 	    gf_mul(nxx, nz) ^ gf_mul(nzz, nx) ^  nyzz;
140 
141 	yd = gf_inv(ny ^ nz);
142 
143 	coeff[MUL_PQR_XP] = gf_div(nyyz ^ nyzz, xd);
144 	coeff[MUL_PQR_XQ] = gf_div(nyy ^ nzz, xd);
145 	coeff[MUL_PQR_XR] = gf_div(ny ^ nz, xd);
146 	coeff[MUL_PQR_YU] = nx;
147 	coeff[MUL_PQR_YP] = gf_mul(nz, yd);
148 	coeff[MUL_PQR_YQ] = yd;
149 }
150 
151 /*
152  * Method for zeroing a buffer (can be implemented using SIMD).
153  * This method is used by multiple for gen/rec functions.
154  *
155  * @dc		Destination buffer
156  * @dsize	Destination buffer size
157  * @private	Unused
158  */
159 static int
160 raidz_zero_abd_cb(void *dc, size_t dsize, void *private)
161 {
162 	v_t *dst = (v_t *)dc;
163 	size_t i;
164 
165 	ZERO_DEFINE();
166 
167 	(void) private; /* unused */
168 
169 	ZERO(ZERO_D);
170 
171 	for (i = 0; i < dsize / sizeof (v_t); i += (2 * ZERO_STRIDE)) {
172 		STORE(dst + i, ZERO_D);
173 		STORE(dst + i + ZERO_STRIDE, ZERO_D);
174 	}
175 
176 	return (0);
177 }
178 
179 #define	raidz_zero(dabd, size)						\
180 {									\
181 	abd_iterate_func(dabd, 0, size, raidz_zero_abd_cb, NULL);	\
182 }
183 
184 /*
185  * Method for copying two buffers (can be implemented using SIMD).
186  * This method is used by multiple for gen/rec functions.
187  *
188  * @dc		Destination buffer
189  * @sc		Source buffer
190  * @dsize	Destination buffer size
191  * @ssize	Source buffer size
192  * @private	Unused
193  */
194 static int
195 raidz_copy_abd_cb(void *dc, void *sc, size_t size, void *private)
196 {
197 	v_t *dst = (v_t *)dc;
198 	const v_t *src = (v_t *)sc;
199 	size_t i;
200 
201 	COPY_DEFINE();
202 
203 	(void) private; /* unused */
204 
205 	for (i = 0; i < size / sizeof (v_t); i += (2 * COPY_STRIDE)) {
206 		LOAD(src + i, COPY_D);
207 		STORE(dst + i, COPY_D);
208 
209 		LOAD(src + i + COPY_STRIDE, COPY_D);
210 		STORE(dst + i + COPY_STRIDE, COPY_D);
211 	}
212 
213 	return (0);
214 }
215 
216 
217 #define	raidz_copy(dabd, sabd, off, size)				\
218 {									\
219 	abd_iterate_func2(dabd, sabd, off, off, size, raidz_copy_abd_cb, \
220 	    NULL);							\
221 }
222 
223 /*
224  * Method for adding (XORing) two buffers.
225  * Source and destination are XORed together and result is stored in
226  * destination buffer. This method is used by multiple for gen/rec functions.
227  *
228  * @dc		Destination buffer
229  * @sc		Source buffer
230  * @dsize	Destination buffer size
231  * @ssize	Source buffer size
232  * @private	Unused
233  */
234 static int
235 raidz_add_abd_cb(void *dc, void *sc, size_t size, void *private)
236 {
237 	v_t *dst = (v_t *)dc;
238 	const v_t *src = (v_t *)sc;
239 	size_t i;
240 
241 	ADD_DEFINE();
242 
243 	(void) private; /* unused */
244 
245 	for (i = 0; i < size / sizeof (v_t); i += (2 * ADD_STRIDE)) {
246 		LOAD(dst + i, ADD_D);
247 		XOR_ACC(src + i, ADD_D);
248 		STORE(dst + i, ADD_D);
249 
250 		LOAD(dst + i + ADD_STRIDE, ADD_D);
251 		XOR_ACC(src + i + ADD_STRIDE, ADD_D);
252 		STORE(dst + i + ADD_STRIDE, ADD_D);
253 	}
254 
255 	return (0);
256 }
257 
258 #define	raidz_add(dabd, sabd, off, size)				\
259 {									\
260 	abd_iterate_func2(dabd, sabd, off, off, size, raidz_add_abd_cb, \
261 	    NULL);							\
262 }
263 
264 /*
265  * Method for multiplying a buffer with a constant in GF(2^8).
266  * Symbols from buffer are multiplied by a constant and result is stored
267  * back in the same buffer.
268  *
269  * @dc		In/Out data buffer.
270  * @size	Size of the buffer
271  * @private	pointer to the multiplication constant (unsigned)
272  */
273 static int
274 raidz_mul_abd_cb(void *dc, size_t size, void *private)
275 {
276 	const unsigned mul = *((unsigned *)private);
277 	v_t *d = (v_t *)dc;
278 	size_t i;
279 
280 	MUL_DEFINE();
281 
282 	for (i = 0; i < size / sizeof (v_t); i += (2 * MUL_STRIDE)) {
283 		LOAD(d + i, MUL_D);
284 		MUL(mul, MUL_D);
285 		STORE(d + i, MUL_D);
286 
287 		LOAD(d + i + MUL_STRIDE, MUL_D);
288 		MUL(mul, MUL_D);
289 		STORE(d + i + MUL_STRIDE, MUL_D);
290 	}
291 
292 	return (0);
293 }
294 
295 
296 /*
297  * Syndrome generation/update macros
298  *
299  * Require LOAD(), XOR(), STORE(), MUL2(), and MUL4() macros
300  */
301 #define	P_D_SYNDROME(D, T, t)		\
302 {					\
303 	LOAD((t), T);			\
304 	XOR(D, T);			\
305 	STORE((t), T);			\
306 }
307 
308 #define	Q_D_SYNDROME(D, T, t)		\
309 {					\
310 	LOAD((t), T);			\
311 	MUL2(T);			\
312 	XOR(D, T);			\
313 	STORE((t), T);			\
314 }
315 
316 #define	Q_SYNDROME(T, t)		\
317 {					\
318 	LOAD((t), T);			\
319 	MUL2(T);			\
320 	STORE((t), T);			\
321 }
322 
323 #define	R_D_SYNDROME(D, T, t)		\
324 {					\
325 	LOAD((t), T);			\
326 	MUL4(T);			\
327 	XOR(D, T);			\
328 	STORE((t), T);			\
329 }
330 
331 #define	R_SYNDROME(T, t)		\
332 {					\
333 	LOAD((t), T);			\
334 	MUL4(T);			\
335 	STORE((t), T);			\
336 }
337 
338 
339 /*
340  * PARITY CALCULATION
341  *
342  * Macros *_SYNDROME are used for parity/syndrome calculation.
343  * *_D_SYNDROME() macros are used to calculate syndrome between 0 and
344  * length of data column, and *_SYNDROME() macros are only for updating
345  * the parity/syndrome if data column is shorter.
346  *
347  * P parity is calculated using raidz_add_abd().
348  *
349  * For CPU L2 cache blocking we process 64KB at a time.
350  */
351 #define	CHUNK		65536
352 
353 /*
354  * Generate P parity (RAIDZ1)
355  *
356  * @rr	RAIDZ row
357  */
358 static raidz_inline void
359 raidz_generate_p_impl(raidz_row_t * const rr)
360 {
361 	size_t c;
362 	const size_t ncols = rr->rr_cols;
363 	const size_t psize = rr->rr_col[CODE_P].rc_size;
364 	abd_t *pabd = rr->rr_col[CODE_P].rc_abd;
365 	size_t off, size;
366 
367 	raidz_math_begin();
368 
369 	for (off = 0; off < psize; off += CHUNK) {
370 
371 		/* start with first data column */
372 		size = MIN(CHUNK, psize - off);
373 		raidz_copy(pabd, rr->rr_col[1].rc_abd, off, size);
374 
375 		for (c = 2; c < ncols; c++) {
376 			size = rr->rr_col[c].rc_size;
377 			if (size <= off)
378 				continue;
379 
380 			/* add data column */
381 			size = MIN(CHUNK, size - off);
382 			abd_t *dabd = rr->rr_col[c].rc_abd;
383 			raidz_add(pabd, dabd, off, size);
384 		}
385 	}
386 
387 	raidz_math_end();
388 }
389 
390 
391 /*
392  * Generate PQ parity (RAIDZ2)
393  * The function is called per data column.
394  *
395  * @c		array of pointers to parity (code) columns
396  * @dc		pointer to data column
397  * @csize	size of parity columns
398  * @dsize	size of data column
399  */
400 static void
401 raidz_gen_pq_add(void **c, const void *dc, const size_t csize,
402     const size_t dsize)
403 {
404 	v_t *p = (v_t *)c[0];
405 	v_t *q = (v_t *)c[1];
406 	const v_t *d = (const v_t *)dc;
407 	const v_t * const dend = d + (dsize / sizeof (v_t));
408 	const v_t * const qend = q + (csize / sizeof (v_t));
409 
410 	GEN_PQ_DEFINE();
411 
412 	MUL2_SETUP();
413 
414 	for (; d < dend; d += GEN_PQ_STRIDE, p += GEN_PQ_STRIDE,
415 	    q += GEN_PQ_STRIDE) {
416 		LOAD(d, GEN_PQ_D);
417 		P_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, p);
418 		Q_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, q);
419 	}
420 	for (; q < qend; q += GEN_PQ_STRIDE) {
421 		Q_SYNDROME(GEN_PQ_C, q);
422 	}
423 }
424 
425 
426 /*
427  * Generate PQ parity (RAIDZ2)
428  *
429  * @rr	RAIDZ row
430  */
431 static raidz_inline void
432 raidz_generate_pq_impl(raidz_row_t * const rr)
433 {
434 	size_t c;
435 	const size_t ncols = rr->rr_cols;
436 	const size_t csize = rr->rr_col[CODE_P].rc_size;
437 	size_t off, size, dsize;
438 	abd_t *dabd;
439 	abd_t *cabds[] = {
440 		rr->rr_col[CODE_P].rc_abd,
441 		rr->rr_col[CODE_Q].rc_abd
442 	};
443 
444 	raidz_math_begin();
445 
446 	for (off = 0; off < csize; off += CHUNK) {
447 
448 		size = MIN(CHUNK, csize - off);
449 		raidz_copy(cabds[CODE_P], rr->rr_col[2].rc_abd, off, size);
450 		raidz_copy(cabds[CODE_Q], rr->rr_col[2].rc_abd, off, size);
451 
452 		for (c = 3; c < ncols; c++) {
453 			dabd = rr->rr_col[c].rc_abd;
454 			dsize = rr->rr_col[c].rc_size;
455 			dsize = (dsize > off) ? MIN(CHUNK, dsize - off) : 0;
456 
457 			abd_raidz_gen_iterate(cabds, dabd, off, size, dsize, 2,
458 			    raidz_gen_pq_add);
459 		}
460 	}
461 
462 	raidz_math_end();
463 }
464 
465 
466 /*
467  * Generate PQR parity (RAIDZ3)
468  * The function is called per data column.
469  *
470  * @c		array of pointers to parity (code) columns
471  * @dc		pointer to data column
472  * @csize	size of parity columns
473  * @dsize	size of data column
474  */
475 static void
476 raidz_gen_pqr_add(void **c, const void *dc, const size_t csize,
477     const size_t dsize)
478 {
479 	v_t *p = (v_t *)c[CODE_P];
480 	v_t *q = (v_t *)c[CODE_Q];
481 	v_t *r = (v_t *)c[CODE_R];
482 	const v_t *d = (const v_t *)dc;
483 	const v_t * const dend = d + (dsize / sizeof (v_t));
484 	const v_t * const qend = q + (csize / sizeof (v_t));
485 
486 	GEN_PQR_DEFINE();
487 
488 	MUL2_SETUP();
489 
490 	for (; d < dend; d += GEN_PQR_STRIDE, p += GEN_PQR_STRIDE,
491 	    q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
492 		LOAD(d, GEN_PQR_D);
493 		P_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, p);
494 		Q_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, q);
495 		R_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, r);
496 	}
497 	for (; q < qend; q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
498 		Q_SYNDROME(GEN_PQR_C, q);
499 		R_SYNDROME(GEN_PQR_C, r);
500 	}
501 }
502 
503 
504 /*
505  * Generate PQR parity (RAIDZ3)
506  *
507  * @rr	RAIDZ row
508  */
509 static raidz_inline void
510 raidz_generate_pqr_impl(raidz_row_t * const rr)
511 {
512 	size_t c;
513 	const size_t ncols = rr->rr_cols;
514 	const size_t csize = rr->rr_col[CODE_P].rc_size;
515 	size_t off, size, dsize;
516 	abd_t *dabd;
517 	abd_t *cabds[] = {
518 		rr->rr_col[CODE_P].rc_abd,
519 		rr->rr_col[CODE_Q].rc_abd,
520 		rr->rr_col[CODE_R].rc_abd
521 	};
522 
523 	raidz_math_begin();
524 
525 	for (off = 0; off < csize; off += CHUNK) {
526 
527 		size = MIN(CHUNK, csize - off);
528 		raidz_copy(cabds[CODE_P], rr->rr_col[3].rc_abd, off, size);
529 		raidz_copy(cabds[CODE_Q], rr->rr_col[3].rc_abd, off, size);
530 		raidz_copy(cabds[CODE_R], rr->rr_col[3].rc_abd, off, size);
531 
532 		for (c = 4; c < ncols; c++) {
533 			dabd = rr->rr_col[c].rc_abd;
534 			dsize = rr->rr_col[c].rc_size;
535 			dsize = (dsize > off) ? MIN(CHUNK, dsize - off) : 0;
536 
537 			abd_raidz_gen_iterate(cabds, dabd, off, size, dsize, 3,
538 			    raidz_gen_pqr_add);
539 		}
540 	}
541 
542 	raidz_math_end();
543 }
544 
545 
546 /*
547  * DATA RECONSTRUCTION
548  *
549  * Data reconstruction process consists of two phases:
550  * 	- Syndrome calculation
551  * 	- Data reconstruction
552  *
553  * Syndrome is calculated by generating parity using available data columns
554  * and zeros in places of erasure. Existing parity is added to corresponding
555  * syndrome value to obtain the [P|Q|R]syn values from equation:
556  * 	P = Psyn + Dx + Dy + Dz
557  * 	Q = Qsyn + 2^x * Dx + 2^y * Dy + 2^z * Dz
558  * 	R = Rsyn + 4^x * Dx + 4^y * Dy + 4^z * Dz
559  *
560  * For data reconstruction phase, the corresponding equations are solved
561  * for missing data (Dx, Dy, Dz). This generally involves multiplying known
562  * symbols by an coefficient and adding them together. The multiplication
563  * constant coefficients are calculated ahead of the operation in
564  * raidz_rec_[q|r|pq|pq|qr|pqr]_coeff() functions.
565  *
566  * IMPLEMENTATION NOTE: RAID-Z block can have complex geometry, with "big"
567  * and "short" columns.
568  * For this reason, reconstruction is performed in minimum of
569  * two steps. First, from offset 0 to short_size, then from short_size to
570  * short_size. Calculation functions REC_[*]_BLOCK() are implemented to work
571  * over both ranges. The split also enables removal of conditional expressions
572  * from loop bodies, improving throughput of SIMD implementations.
573  * For the best performance, all functions marked with raidz_inline attribute
574  * must be inlined by compiler.
575  *
576  *    parity          data
577  *    columns         columns
578  * <----------> <------------------>
579  *                   x       y  <----+ missing columns (x, y)
580  *                   |       |
581  * +---+---+---+---+-v-+---+-v-+---+   ^ 0
582  * |   |   |   |   |   |   |   |   |   |
583  * |   |   |   |   |   |   |   |   |   |
584  * | P | Q | R | D | D | D | D | D |   |
585  * |   |   |   | 0 | 1 | 2 | 3 | 4 |   |
586  * |   |   |   |   |   |   |   |   |   v
587  * |   |   |   |   |   +---+---+---+   ^ short_size
588  * |   |   |   |   |   |               |
589  * +---+---+---+---+---+               v big_size
590  * <------------------> <---------->
591  *      big columns     short columns
592  *
593  */
594 
595 
596 
597 
598 /*
599  * Reconstruct single data column using P parity
600  *
601  * @syn_method	raidz_add_abd()
602  * @rec_method	not applicable
603  *
604  * @rr		RAIDZ row
605  * @tgtidx	array of missing data indexes
606  */
607 static raidz_inline int
608 raidz_reconstruct_p_impl(raidz_row_t *rr, const int *tgtidx)
609 {
610 	size_t c;
611 	const size_t firstdc = rr->rr_firstdatacol;
612 	const size_t ncols = rr->rr_cols;
613 	const size_t x = tgtidx[TARGET_X];
614 	const size_t xsize = rr->rr_col[x].rc_size;
615 	abd_t *xabd = rr->rr_col[x].rc_abd;
616 	size_t off, size;
617 
618 	if (xabd == NULL)
619 		return (1 << CODE_P);
620 
621 	raidz_math_begin();
622 
623 	for (off = 0; off < xsize; off += CHUNK) {
624 
625 		/* copy P into target */
626 		size = MIN(CHUNK, xsize - off);
627 		raidz_copy(xabd, rr->rr_col[CODE_P].rc_abd, off, size);
628 
629 		/* generate p_syndrome */
630 		for (c = firstdc; c < ncols; c++) {
631 			if (c == x)
632 				continue;
633 			size = rr->rr_col[c].rc_size;
634 			if (size <= off)
635 				continue;
636 
637 			size = MIN(CHUNK, MIN(size, xsize) - off);
638 			abd_t *dabd = rr->rr_col[c].rc_abd;
639 			raidz_add(xabd, dabd, off, size);
640 		}
641 	}
642 
643 	raidz_math_end();
644 
645 	return (1 << CODE_P);
646 }
647 
648 
649 /*
650  * Generate Q syndrome (Qsyn)
651  *
652  * @xc		array of pointers to syndrome columns
653  * @dc		data column (NULL if missing)
654  * @xsize	size of syndrome columns
655  * @dsize	size of data column (0 if missing)
656  */
657 static void
658 raidz_syn_q_abd(void **xc, const void *dc, const size_t xsize,
659     const size_t dsize)
660 {
661 	v_t *x = (v_t *)xc[TARGET_X];
662 	const v_t *d = (const v_t *)dc;
663 	const v_t * const dend = d + (dsize / sizeof (v_t));
664 	const v_t * const xend = x + (xsize / sizeof (v_t));
665 
666 	SYN_Q_DEFINE();
667 
668 	MUL2_SETUP();
669 
670 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
671 		LOAD(d, SYN_Q_D);
672 		Q_D_SYNDROME(SYN_Q_D, SYN_Q_X, x);
673 	}
674 	for (; x < xend; x += SYN_STRIDE) {
675 		Q_SYNDROME(SYN_Q_X, x);
676 	}
677 }
678 
679 
680 /*
681  * Reconstruct single data column using Q parity
682  *
683  * @syn_method	raidz_add_abd()
684  * @rec_method	raidz_mul_abd_cb()
685  *
686  * @rr		RAIDZ row
687  * @tgtidx	array of missing data indexes
688  */
689 static raidz_inline int
690 raidz_reconstruct_q_impl(raidz_row_t *rr, const int *tgtidx)
691 {
692 	size_t c;
693 	size_t dsize;
694 	abd_t *dabd;
695 	const size_t firstdc = rr->rr_firstdatacol;
696 	const size_t ncols = rr->rr_cols;
697 	const size_t x = tgtidx[TARGET_X];
698 	abd_t *xabd = rr->rr_col[x].rc_abd;
699 	const size_t xsize = rr->rr_col[x].rc_size;
700 	abd_t *tabds[] = { xabd };
701 
702 	if (xabd == NULL)
703 		return (1 << CODE_Q);
704 
705 	unsigned coeff[MUL_CNT];
706 	raidz_rec_q_coeff(rr, tgtidx, coeff);
707 
708 	raidz_math_begin();
709 
710 	/* Start with first data column if present */
711 	if (firstdc != x) {
712 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
713 	} else {
714 		raidz_zero(xabd, xsize);
715 	}
716 
717 	/* generate q_syndrome */
718 	for (c = firstdc+1; c < ncols; c++) {
719 		if (c == x) {
720 			dabd = NULL;
721 			dsize = 0;
722 		} else {
723 			dabd = rr->rr_col[c].rc_abd;
724 			dsize = rr->rr_col[c].rc_size;
725 		}
726 
727 		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 1,
728 		    raidz_syn_q_abd);
729 	}
730 
731 	/* add Q to the syndrome */
732 	raidz_add(xabd, rr->rr_col[CODE_Q].rc_abd, 0, xsize);
733 
734 	/* transform the syndrome */
735 	abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void*) coeff);
736 
737 	raidz_math_end();
738 
739 	return (1 << CODE_Q);
740 }
741 
742 
743 /*
744  * Generate R syndrome (Rsyn)
745  *
746  * @xc		array of pointers to syndrome columns
747  * @dc		data column (NULL if missing)
748  * @tsize	size of syndrome columns
749  * @dsize	size of data column (0 if missing)
750  */
751 static void
752 raidz_syn_r_abd(void **xc, const void *dc, const size_t tsize,
753     const size_t dsize)
754 {
755 	v_t *x = (v_t *)xc[TARGET_X];
756 	const v_t *d = (const v_t *)dc;
757 	const v_t * const dend = d + (dsize / sizeof (v_t));
758 	const v_t * const xend = x + (tsize / sizeof (v_t));
759 
760 	SYN_R_DEFINE();
761 
762 	MUL2_SETUP();
763 
764 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
765 		LOAD(d, SYN_R_D);
766 		R_D_SYNDROME(SYN_R_D, SYN_R_X, x);
767 	}
768 	for (; x < xend; x += SYN_STRIDE) {
769 		R_SYNDROME(SYN_R_X, x);
770 	}
771 }
772 
773 
774 /*
775  * Reconstruct single data column using R parity
776  *
777  * @syn_method	raidz_add_abd()
778  * @rec_method	raidz_mul_abd_cb()
779  *
780  * @rr		RAIDZ rr
781  * @tgtidx	array of missing data indexes
782  */
783 static raidz_inline int
784 raidz_reconstruct_r_impl(raidz_row_t *rr, const int *tgtidx)
785 {
786 	size_t c;
787 	size_t dsize;
788 	abd_t *dabd;
789 	const size_t firstdc = rr->rr_firstdatacol;
790 	const size_t ncols = rr->rr_cols;
791 	const size_t x = tgtidx[TARGET_X];
792 	const size_t xsize = rr->rr_col[x].rc_size;
793 	abd_t *xabd = rr->rr_col[x].rc_abd;
794 	abd_t *tabds[] = { xabd };
795 
796 	if (xabd == NULL)
797 		return (1 << CODE_R);
798 
799 	unsigned coeff[MUL_CNT];
800 	raidz_rec_r_coeff(rr, tgtidx, coeff);
801 
802 	raidz_math_begin();
803 
804 	/* Start with first data column if present */
805 	if (firstdc != x) {
806 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
807 	} else {
808 		raidz_zero(xabd, xsize);
809 	}
810 
811 
812 	/* generate q_syndrome */
813 	for (c = firstdc+1; c < ncols; c++) {
814 		if (c == x) {
815 			dabd = NULL;
816 			dsize = 0;
817 		} else {
818 			dabd = rr->rr_col[c].rc_abd;
819 			dsize = rr->rr_col[c].rc_size;
820 		}
821 
822 		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 1,
823 		    raidz_syn_r_abd);
824 	}
825 
826 	/* add R to the syndrome */
827 	raidz_add(xabd, rr->rr_col[CODE_R].rc_abd, 0, xsize);
828 
829 	/* transform the syndrome */
830 	abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void *)coeff);
831 
832 	raidz_math_end();
833 
834 	return (1 << CODE_R);
835 }
836 
837 
838 /*
839  * Generate P and Q syndromes
840  *
841  * @xc		array of pointers to syndrome columns
842  * @dc		data column (NULL if missing)
843  * @tsize	size of syndrome columns
844  * @dsize	size of data column (0 if missing)
845  */
846 static void
847 raidz_syn_pq_abd(void **tc, const void *dc, const size_t tsize,
848     const size_t dsize)
849 {
850 	v_t *x = (v_t *)tc[TARGET_X];
851 	v_t *y = (v_t *)tc[TARGET_Y];
852 	const v_t *d = (const v_t *)dc;
853 	const v_t * const dend = d + (dsize / sizeof (v_t));
854 	const v_t * const yend = y + (tsize / sizeof (v_t));
855 
856 	SYN_PQ_DEFINE();
857 
858 	MUL2_SETUP();
859 
860 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
861 		LOAD(d, SYN_PQ_D);
862 		P_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, x);
863 		Q_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, y);
864 	}
865 	for (; y < yend; y += SYN_STRIDE) {
866 		Q_SYNDROME(SYN_PQ_X, y);
867 	}
868 }
869 
870 /*
871  * Reconstruct data using PQ parity and PQ syndromes
872  *
873  * @tc		syndrome/result columns
874  * @tsize	size of syndrome/result columns
875  * @c		parity columns
876  * @mul		array of multiplication constants
877  */
878 static void
879 raidz_rec_pq_abd(void **tc, const size_t tsize, void **c,
880     const unsigned *mul)
881 {
882 	v_t *x = (v_t *)tc[TARGET_X];
883 	v_t *y = (v_t *)tc[TARGET_Y];
884 	const v_t * const xend = x + (tsize / sizeof (v_t));
885 	const v_t *p = (v_t *)c[CODE_P];
886 	const v_t *q = (v_t *)c[CODE_Q];
887 
888 	REC_PQ_DEFINE();
889 
890 	for (; x < xend; x += REC_PQ_STRIDE, y += REC_PQ_STRIDE,
891 	    p += REC_PQ_STRIDE, q += REC_PQ_STRIDE) {
892 		LOAD(x, REC_PQ_X);
893 		LOAD(y, REC_PQ_Y);
894 
895 		XOR_ACC(p, REC_PQ_X);
896 		XOR_ACC(q, REC_PQ_Y);
897 
898 		/* Save Pxy */
899 		COPY(REC_PQ_X,  REC_PQ_T);
900 
901 		/* Calc X */
902 		MUL(mul[MUL_PQ_X], REC_PQ_X);
903 		MUL(mul[MUL_PQ_Y], REC_PQ_Y);
904 		XOR(REC_PQ_Y,  REC_PQ_X);
905 		STORE(x, REC_PQ_X);
906 
907 		/* Calc Y */
908 		XOR(REC_PQ_T,  REC_PQ_X);
909 		STORE(y, REC_PQ_X);
910 	}
911 }
912 
913 
914 /*
915  * Reconstruct two data columns using PQ parity
916  *
917  * @syn_method	raidz_syn_pq_abd()
918  * @rec_method	raidz_rec_pq_abd()
919  *
920  * @rr		RAIDZ row
921  * @tgtidx	array of missing data indexes
922  */
923 static raidz_inline int
924 raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx)
925 {
926 	size_t c;
927 	size_t dsize;
928 	abd_t *dabd;
929 	const size_t firstdc = rr->rr_firstdatacol;
930 	const size_t ncols = rr->rr_cols;
931 	const size_t x = tgtidx[TARGET_X];
932 	const size_t y = tgtidx[TARGET_Y];
933 	const size_t xsize = rr->rr_col[x].rc_size;
934 	const size_t ysize = rr->rr_col[y].rc_size;
935 	abd_t *xabd = rr->rr_col[x].rc_abd;
936 	abd_t *yabd = rr->rr_col[y].rc_abd;
937 	abd_t *tabds[2] = { xabd, yabd };
938 	abd_t *cabds[] = {
939 		rr->rr_col[CODE_P].rc_abd,
940 		rr->rr_col[CODE_Q].rc_abd
941 	};
942 
943 	if (xabd == NULL)
944 		return ((1 << CODE_P) | (1 << CODE_Q));
945 
946 	unsigned coeff[MUL_CNT];
947 	raidz_rec_pq_coeff(rr, tgtidx, coeff);
948 
949 	/*
950 	 * Check if some of targets is shorter then others
951 	 * In this case, shorter target needs to be replaced with
952 	 * new buffer so that syndrome can be calculated.
953 	 */
954 	if (ysize < xsize) {
955 		yabd = abd_alloc(xsize, B_FALSE);
956 		tabds[1] = yabd;
957 	}
958 
959 	raidz_math_begin();
960 
961 	/* Start with first data column if present */
962 	if (firstdc != x) {
963 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
964 		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
965 	} else {
966 		raidz_zero(xabd, xsize);
967 		raidz_zero(yabd, xsize);
968 	}
969 
970 	/* generate q_syndrome */
971 	for (c = firstdc+1; c < ncols; c++) {
972 		if (c == x || c == y) {
973 			dabd = NULL;
974 			dsize = 0;
975 		} else {
976 			dabd = rr->rr_col[c].rc_abd;
977 			dsize = rr->rr_col[c].rc_size;
978 		}
979 
980 		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
981 		    raidz_syn_pq_abd);
982 	}
983 
984 	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pq_abd, coeff);
985 
986 	/* Copy shorter targets back to the original abd buffer */
987 	if (ysize < xsize)
988 		raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
989 
990 	raidz_math_end();
991 
992 	if (ysize < xsize)
993 		abd_free(yabd);
994 
995 	return ((1 << CODE_P) | (1 << CODE_Q));
996 }
997 
998 
999 /*
1000  * Generate P and R syndromes
1001  *
1002  * @xc		array of pointers to syndrome columns
1003  * @dc		data column (NULL if missing)
1004  * @tsize	size of syndrome columns
1005  * @dsize	size of data column (0 if missing)
1006  */
1007 static void
1008 raidz_syn_pr_abd(void **c, const void *dc, const size_t tsize,
1009     const size_t dsize)
1010 {
1011 	v_t *x = (v_t *)c[TARGET_X];
1012 	v_t *y = (v_t *)c[TARGET_Y];
1013 	const v_t *d = (const v_t *)dc;
1014 	const v_t * const dend = d + (dsize / sizeof (v_t));
1015 	const v_t * const yend = y + (tsize / sizeof (v_t));
1016 
1017 	SYN_PR_DEFINE();
1018 
1019 	MUL2_SETUP();
1020 
1021 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
1022 		LOAD(d, SYN_PR_D);
1023 		P_D_SYNDROME(SYN_PR_D, SYN_PR_X, x);
1024 		R_D_SYNDROME(SYN_PR_D, SYN_PR_X, y);
1025 	}
1026 	for (; y < yend; y += SYN_STRIDE) {
1027 		R_SYNDROME(SYN_PR_X, y);
1028 	}
1029 }
1030 
1031 /*
1032  * Reconstruct data using PR parity and PR syndromes
1033  *
1034  * @tc		syndrome/result columns
1035  * @tsize	size of syndrome/result columns
1036  * @c		parity columns
1037  * @mul		array of multiplication constants
1038  */
1039 static void
1040 raidz_rec_pr_abd(void **t, const size_t tsize, void **c,
1041     const unsigned *mul)
1042 {
1043 	v_t *x = (v_t *)t[TARGET_X];
1044 	v_t *y = (v_t *)t[TARGET_Y];
1045 	const v_t * const xend = x + (tsize / sizeof (v_t));
1046 	const v_t *p = (v_t *)c[CODE_P];
1047 	const v_t *q = (v_t *)c[CODE_Q];
1048 
1049 	REC_PR_DEFINE();
1050 
1051 	for (; x < xend; x += REC_PR_STRIDE, y += REC_PR_STRIDE,
1052 	    p += REC_PR_STRIDE, q += REC_PR_STRIDE) {
1053 		LOAD(x, REC_PR_X);
1054 		LOAD(y, REC_PR_Y);
1055 		XOR_ACC(p, REC_PR_X);
1056 		XOR_ACC(q, REC_PR_Y);
1057 
1058 		/* Save Pxy */
1059 		COPY(REC_PR_X,  REC_PR_T);
1060 
1061 		/* Calc X */
1062 		MUL(mul[MUL_PR_X], REC_PR_X);
1063 		MUL(mul[MUL_PR_Y], REC_PR_Y);
1064 		XOR(REC_PR_Y,  REC_PR_X);
1065 		STORE(x, REC_PR_X);
1066 
1067 		/* Calc Y */
1068 		XOR(REC_PR_T,  REC_PR_X);
1069 		STORE(y, REC_PR_X);
1070 	}
1071 }
1072 
1073 
1074 /*
1075  * Reconstruct two data columns using PR parity
1076  *
1077  * @syn_method	raidz_syn_pr_abd()
1078  * @rec_method	raidz_rec_pr_abd()
1079  *
1080  * @rr		RAIDZ row
1081  * @tgtidx	array of missing data indexes
1082  */
1083 static raidz_inline int
1084 raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
1085 {
1086 	size_t c;
1087 	size_t dsize;
1088 	abd_t *dabd;
1089 	const size_t firstdc = rr->rr_firstdatacol;
1090 	const size_t ncols = rr->rr_cols;
1091 	const size_t x = tgtidx[0];
1092 	const size_t y = tgtidx[1];
1093 	const size_t xsize = rr->rr_col[x].rc_size;
1094 	const size_t ysize = rr->rr_col[y].rc_size;
1095 	abd_t *xabd = rr->rr_col[x].rc_abd;
1096 	abd_t *yabd = rr->rr_col[y].rc_abd;
1097 	abd_t *tabds[2] = { xabd, yabd };
1098 	abd_t *cabds[] = {
1099 		rr->rr_col[CODE_P].rc_abd,
1100 		rr->rr_col[CODE_R].rc_abd
1101 	};
1102 
1103 	if (xabd == NULL)
1104 		return ((1 << CODE_P) | (1 << CODE_R));
1105 
1106 	unsigned coeff[MUL_CNT];
1107 	raidz_rec_pr_coeff(rr, tgtidx, coeff);
1108 
1109 	/*
1110 	 * Check if some of targets are shorter then others.
1111 	 * They need to be replaced with a new buffer so that syndrome can
1112 	 * be calculated on full length.
1113 	 */
1114 	if (ysize < xsize) {
1115 		yabd = abd_alloc(xsize, B_FALSE);
1116 		tabds[1] = yabd;
1117 	}
1118 
1119 	raidz_math_begin();
1120 
1121 	/* Start with first data column if present */
1122 	if (firstdc != x) {
1123 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1124 		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1125 	} else {
1126 		raidz_zero(xabd, xsize);
1127 		raidz_zero(yabd, xsize);
1128 	}
1129 
1130 	/* generate q_syndrome */
1131 	for (c = firstdc+1; c < ncols; c++) {
1132 		if (c == x || c == y) {
1133 			dabd = NULL;
1134 			dsize = 0;
1135 		} else {
1136 			dabd = rr->rr_col[c].rc_abd;
1137 			dsize = rr->rr_col[c].rc_size;
1138 		}
1139 
1140 		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
1141 		    raidz_syn_pr_abd);
1142 	}
1143 
1144 	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pr_abd, coeff);
1145 
1146 	/*
1147 	 * Copy shorter targets back to the original abd buffer
1148 	 */
1149 	if (ysize < xsize)
1150 		raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
1151 
1152 	raidz_math_end();
1153 
1154 	if (ysize < xsize)
1155 		abd_free(yabd);
1156 
1157 	return ((1 << CODE_P) | (1 << CODE_R));
1158 }
1159 
1160 
1161 /*
1162  * Generate Q and R syndromes
1163  *
1164  * @xc		array of pointers to syndrome columns
1165  * @dc		data column (NULL if missing)
1166  * @tsize	size of syndrome columns
1167  * @dsize	size of data column (0 if missing)
1168  */
1169 static void
1170 raidz_syn_qr_abd(void **c, const void *dc, const size_t tsize,
1171     const size_t dsize)
1172 {
1173 	v_t *x = (v_t *)c[TARGET_X];
1174 	v_t *y = (v_t *)c[TARGET_Y];
1175 	const v_t * const xend = x + (tsize / sizeof (v_t));
1176 	const v_t *d = (const v_t *)dc;
1177 	const v_t * const dend = d + (dsize / sizeof (v_t));
1178 
1179 	SYN_QR_DEFINE();
1180 
1181 	MUL2_SETUP();
1182 
1183 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
1184 		LOAD(d, SYN_PQ_D);
1185 		Q_D_SYNDROME(SYN_QR_D, SYN_QR_X, x);
1186 		R_D_SYNDROME(SYN_QR_D, SYN_QR_X, y);
1187 	}
1188 	for (; x < xend; x += SYN_STRIDE, y += SYN_STRIDE) {
1189 		Q_SYNDROME(SYN_QR_X, x);
1190 		R_SYNDROME(SYN_QR_X, y);
1191 	}
1192 }
1193 
1194 
1195 /*
1196  * Reconstruct data using QR parity and QR syndromes
1197  *
1198  * @tc		syndrome/result columns
1199  * @tsize	size of syndrome/result columns
1200  * @c		parity columns
1201  * @mul		array of multiplication constants
1202  */
1203 static void
1204 raidz_rec_qr_abd(void **t, const size_t tsize, void **c,
1205     const unsigned *mul)
1206 {
1207 	v_t *x = (v_t *)t[TARGET_X];
1208 	v_t *y = (v_t *)t[TARGET_Y];
1209 	const v_t * const xend = x + (tsize / sizeof (v_t));
1210 	const v_t *p = (v_t *)c[CODE_P];
1211 	const v_t *q = (v_t *)c[CODE_Q];
1212 
1213 	REC_QR_DEFINE();
1214 
1215 	for (; x < xend; x += REC_QR_STRIDE, y += REC_QR_STRIDE,
1216 	    p += REC_QR_STRIDE, q += REC_QR_STRIDE) {
1217 		LOAD(x, REC_QR_X);
1218 		LOAD(y, REC_QR_Y);
1219 
1220 		XOR_ACC(p, REC_QR_X);
1221 		XOR_ACC(q, REC_QR_Y);
1222 
1223 		/* Save Pxy */
1224 		COPY(REC_QR_X,  REC_QR_T);
1225 
1226 		/* Calc X */
1227 		MUL(mul[MUL_QR_XQ], REC_QR_X);	/* X = Q * xqm */
1228 		XOR(REC_QR_Y, REC_QR_X);	/* X = R ^ X   */
1229 		MUL(mul[MUL_QR_X], REC_QR_X);	/* X = X * xm  */
1230 		STORE(x, REC_QR_X);
1231 
1232 		/* Calc Y */
1233 		MUL(mul[MUL_QR_YQ], REC_QR_T);	/* X = Q * xqm */
1234 		XOR(REC_QR_Y, REC_QR_T);	/* X = R ^ X   */
1235 		MUL(mul[MUL_QR_Y], REC_QR_T);	/* X = X * xm  */
1236 		STORE(y, REC_QR_T);
1237 	}
1238 }
1239 
1240 
1241 /*
1242  * Reconstruct two data columns using QR parity
1243  *
1244  * @syn_method	raidz_syn_qr_abd()
1245  * @rec_method	raidz_rec_qr_abd()
1246  *
1247  * @rr		RAIDZ row
1248  * @tgtidx	array of missing data indexes
1249  */
1250 static raidz_inline int
1251 raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
1252 {
1253 	size_t c;
1254 	size_t dsize;
1255 	abd_t *dabd;
1256 	const size_t firstdc = rr->rr_firstdatacol;
1257 	const size_t ncols = rr->rr_cols;
1258 	const size_t x = tgtidx[TARGET_X];
1259 	const size_t y = tgtidx[TARGET_Y];
1260 	const size_t xsize = rr->rr_col[x].rc_size;
1261 	const size_t ysize = rr->rr_col[y].rc_size;
1262 	abd_t *xabd = rr->rr_col[x].rc_abd;
1263 	abd_t *yabd = rr->rr_col[y].rc_abd;
1264 	abd_t *tabds[2] = { xabd, yabd };
1265 	abd_t *cabds[] = {
1266 		rr->rr_col[CODE_Q].rc_abd,
1267 		rr->rr_col[CODE_R].rc_abd
1268 	};
1269 
1270 	if (xabd == NULL)
1271 		return ((1 << CODE_Q) | (1 << CODE_R));
1272 
1273 	unsigned coeff[MUL_CNT];
1274 	raidz_rec_qr_coeff(rr, tgtidx, coeff);
1275 
1276 	/*
1277 	 * Check if some of targets is shorter then others
1278 	 * In this case, shorter target needs to be replaced with
1279 	 * new buffer so that syndrome can be calculated.
1280 	 */
1281 	if (ysize < xsize) {
1282 		yabd = abd_alloc(xsize, B_FALSE);
1283 		tabds[1] = yabd;
1284 	}
1285 
1286 	raidz_math_begin();
1287 
1288 	/* Start with first data column if present */
1289 	if (firstdc != x) {
1290 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1291 		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1292 	} else {
1293 		raidz_zero(xabd, xsize);
1294 		raidz_zero(yabd, xsize);
1295 	}
1296 
1297 	/* generate q_syndrome */
1298 	for (c = firstdc+1; c < ncols; c++) {
1299 		if (c == x || c == y) {
1300 			dabd = NULL;
1301 			dsize = 0;
1302 		} else {
1303 			dabd = rr->rr_col[c].rc_abd;
1304 			dsize = rr->rr_col[c].rc_size;
1305 		}
1306 
1307 		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
1308 		    raidz_syn_qr_abd);
1309 	}
1310 
1311 	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_qr_abd, coeff);
1312 
1313 	/*
1314 	 * Copy shorter targets back to the original abd buffer
1315 	 */
1316 	if (ysize < xsize)
1317 		raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
1318 
1319 	raidz_math_end();
1320 
1321 	if (ysize < xsize)
1322 		abd_free(yabd);
1323 
1324 
1325 	return ((1 << CODE_Q) | (1 << CODE_R));
1326 }
1327 
1328 
1329 /*
1330  * Generate P, Q, and R syndromes
1331  *
1332  * @xc		array of pointers to syndrome columns
1333  * @dc		data column (NULL if missing)
1334  * @tsize	size of syndrome columns
1335  * @dsize	size of data column (0 if missing)
1336  */
1337 static void
1338 raidz_syn_pqr_abd(void **c, const void *dc, const size_t tsize,
1339     const size_t dsize)
1340 {
1341 	v_t *x = (v_t *)c[TARGET_X];
1342 	v_t *y = (v_t *)c[TARGET_Y];
1343 	v_t *z = (v_t *)c[TARGET_Z];
1344 	const v_t * const yend = y + (tsize / sizeof (v_t));
1345 	const v_t *d = (const v_t *)dc;
1346 	const v_t * const dend = d + (dsize / sizeof (v_t));
1347 
1348 	SYN_PQR_DEFINE();
1349 
1350 	MUL2_SETUP();
1351 
1352 	for (; d < dend;  d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE,
1353 	    z += SYN_STRIDE) {
1354 		LOAD(d, SYN_PQR_D);
1355 		P_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, x)
1356 		Q_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, y);
1357 		R_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, z);
1358 	}
1359 	for (; y < yend; y += SYN_STRIDE, z += SYN_STRIDE) {
1360 		Q_SYNDROME(SYN_PQR_X, y);
1361 		R_SYNDROME(SYN_PQR_X, z);
1362 	}
1363 }
1364 
1365 
1366 /*
1367  * Reconstruct data using PRQ parity and PQR syndromes
1368  *
1369  * @tc		syndrome/result columns
1370  * @tsize	size of syndrome/result columns
1371  * @c		parity columns
1372  * @mul		array of multiplication constants
1373  */
1374 static void
1375 raidz_rec_pqr_abd(void **t, const size_t tsize, void **c,
1376     const unsigned * const mul)
1377 {
1378 	v_t *x = (v_t *)t[TARGET_X];
1379 	v_t *y = (v_t *)t[TARGET_Y];
1380 	v_t *z = (v_t *)t[TARGET_Z];
1381 	const v_t * const xend = x + (tsize / sizeof (v_t));
1382 	const v_t *p = (v_t *)c[CODE_P];
1383 	const v_t *q = (v_t *)c[CODE_Q];
1384 	const v_t *r = (v_t *)c[CODE_R];
1385 
1386 	REC_PQR_DEFINE();
1387 
1388 	for (; x < xend; x += REC_PQR_STRIDE, y += REC_PQR_STRIDE,
1389 	    z += REC_PQR_STRIDE, p += REC_PQR_STRIDE, q += REC_PQR_STRIDE,
1390 	    r += REC_PQR_STRIDE) {
1391 		LOAD(x, REC_PQR_X);
1392 		LOAD(y, REC_PQR_Y);
1393 		LOAD(z, REC_PQR_Z);
1394 
1395 		XOR_ACC(p, REC_PQR_X);
1396 		XOR_ACC(q, REC_PQR_Y);
1397 		XOR_ACC(r, REC_PQR_Z);
1398 
1399 		/* Save Pxyz and Qxyz */
1400 		COPY(REC_PQR_X, REC_PQR_XS);
1401 		COPY(REC_PQR_Y, REC_PQR_YS);
1402 
1403 		/* Calc X */
1404 		MUL(mul[MUL_PQR_XP], REC_PQR_X);	/* Xp = Pxyz * xp   */
1405 		MUL(mul[MUL_PQR_XQ], REC_PQR_Y);	/* Xq = Qxyz * xq   */
1406 		XOR(REC_PQR_Y, REC_PQR_X);
1407 		MUL(mul[MUL_PQR_XR], REC_PQR_Z);	/* Xr = Rxyz * xr   */
1408 		XOR(REC_PQR_Z, REC_PQR_X);		/* X = Xp + Xq + Xr */
1409 		STORE(x, REC_PQR_X);
1410 
1411 		/* Calc Y */
1412 		XOR(REC_PQR_X, REC_PQR_XS); 		/* Pyz = Pxyz + X */
1413 		MUL(mul[MUL_PQR_YU], REC_PQR_X);  	/* Xq = X * upd_q */
1414 		XOR(REC_PQR_X, REC_PQR_YS); 		/* Qyz = Qxyz + Xq */
1415 		COPY(REC_PQR_XS, REC_PQR_X);		/* restore Pyz */
1416 		MUL(mul[MUL_PQR_YP], REC_PQR_X);	/* Yp = Pyz * yp */
1417 		MUL(mul[MUL_PQR_YQ], REC_PQR_YS);	/* Yq = Qyz * yq */
1418 		XOR(REC_PQR_X, REC_PQR_YS); 		/* Y = Yp + Yq */
1419 		STORE(y, REC_PQR_YS);
1420 
1421 		/* Calc Z */
1422 		XOR(REC_PQR_XS, REC_PQR_YS);		/* Z = Pz = Pyz + Y */
1423 		STORE(z, REC_PQR_YS);
1424 	}
1425 }
1426 
1427 
1428 /*
1429  * Reconstruct three data columns using PQR parity
1430  *
1431  * @syn_method	raidz_syn_pqr_abd()
1432  * @rec_method	raidz_rec_pqr_abd()
1433  *
1434  * @rr		RAIDZ row
1435  * @tgtidx	array of missing data indexes
1436  */
1437 static raidz_inline int
1438 raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
1439 {
1440 	size_t c;
1441 	size_t dsize;
1442 	abd_t *dabd;
1443 	const size_t firstdc = rr->rr_firstdatacol;
1444 	const size_t ncols = rr->rr_cols;
1445 	const size_t x = tgtidx[TARGET_X];
1446 	const size_t y = tgtidx[TARGET_Y];
1447 	const size_t z = tgtidx[TARGET_Z];
1448 	const size_t xsize = rr->rr_col[x].rc_size;
1449 	const size_t ysize = rr->rr_col[y].rc_size;
1450 	const size_t zsize = rr->rr_col[z].rc_size;
1451 	abd_t *xabd = rr->rr_col[x].rc_abd;
1452 	abd_t *yabd = rr->rr_col[y].rc_abd;
1453 	abd_t *zabd = rr->rr_col[z].rc_abd;
1454 	abd_t *tabds[] = { xabd, yabd, zabd };
1455 	abd_t *cabds[] = {
1456 		rr->rr_col[CODE_P].rc_abd,
1457 		rr->rr_col[CODE_Q].rc_abd,
1458 		rr->rr_col[CODE_R].rc_abd
1459 	};
1460 
1461 	if (xabd == NULL)
1462 		return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
1463 
1464 	unsigned coeff[MUL_CNT];
1465 	raidz_rec_pqr_coeff(rr, tgtidx, coeff);
1466 
1467 	/*
1468 	 * Check if some of targets is shorter then others
1469 	 * In this case, shorter target needs to be replaced with
1470 	 * new buffer so that syndrome can be calculated.
1471 	 */
1472 	if (ysize < xsize) {
1473 		yabd = abd_alloc(xsize, B_FALSE);
1474 		tabds[1] = yabd;
1475 	}
1476 	if (zsize < xsize) {
1477 		zabd = abd_alloc(xsize, B_FALSE);
1478 		tabds[2] = zabd;
1479 	}
1480 
1481 	raidz_math_begin();
1482 
1483 	/* Start with first data column if present */
1484 	if (firstdc != x) {
1485 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1486 		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1487 		raidz_copy(zabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
1488 	} else {
1489 		raidz_zero(xabd, xsize);
1490 		raidz_zero(yabd, xsize);
1491 		raidz_zero(zabd, xsize);
1492 	}
1493 
1494 	/* generate q_syndrome */
1495 	for (c = firstdc+1; c < ncols; c++) {
1496 		if (c == x || c == y || c == z) {
1497 			dabd = NULL;
1498 			dsize = 0;
1499 		} else {
1500 			dabd = rr->rr_col[c].rc_abd;
1501 			dsize = rr->rr_col[c].rc_size;
1502 		}
1503 
1504 		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 3,
1505 		    raidz_syn_pqr_abd);
1506 	}
1507 
1508 	abd_raidz_rec_iterate(cabds, tabds, xsize, 3, raidz_rec_pqr_abd, coeff);
1509 
1510 	/*
1511 	 * Copy shorter targets back to the original abd buffer
1512 	 */
1513 	if (ysize < xsize)
1514 		raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
1515 	if (zsize < xsize)
1516 		raidz_copy(rr->rr_col[z].rc_abd, zabd, 0, zsize);
1517 
1518 	raidz_math_end();
1519 
1520 	if (ysize < xsize)
1521 		abd_free(yabd);
1522 	if (zsize < xsize)
1523 		abd_free(zabd);
1524 
1525 	return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
1526 }
1527 
1528 #endif /* _VDEV_RAIDZ_MATH_IMPL_H */
1529