xref: /freebsd/sys/contrib/openzfs/module/zfs/vdev_raidz_math_impl.h (revision e2eeea75eb8b6dd50c1298067a0655880d186734)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
23  */
24 
25 #ifndef _VDEV_RAIDZ_MATH_IMPL_H
26 #define	_VDEV_RAIDZ_MATH_IMPL_H
27 
28 #include <sys/types.h>
29 
30 #define	raidz_inline inline __attribute__((always_inline))
31 #ifndef noinline
32 #define	noinline __attribute__((noinline))
33 #endif
34 
35 /*
36  * Functions calculate multiplication constants for data reconstruction.
37  * Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and
38  * used parity columns for reconstruction.
39  * @rm			RAIDZ map
40  * @tgtidx		array of missing data indexes
41  * @coeff		output array of coefficients. Array must be provided by
42  *         		user and must hold minimum MUL_CNT values.
43  */
44 static noinline void
45 raidz_rec_q_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
46 {
47 	const unsigned ncols = raidz_ncols(rm);
48 	const unsigned x = tgtidx[TARGET_X];
49 
50 	coeff[MUL_Q_X] = gf_exp2(255 - (ncols - x - 1));
51 }
52 
53 static noinline void
54 raidz_rec_r_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
55 {
56 	const unsigned ncols = raidz_ncols(rm);
57 	const unsigned x = tgtidx[TARGET_X];
58 
59 	coeff[MUL_R_X] = gf_exp4(255 - (ncols - x - 1));
60 }
61 
62 static noinline void
63 raidz_rec_pq_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
64 {
65 	const unsigned ncols = raidz_ncols(rm);
66 	const unsigned x = tgtidx[TARGET_X];
67 	const unsigned y = tgtidx[TARGET_Y];
68 	gf_t a, b, e;
69 
70 	a = gf_exp2(x + 255 - y);
71 	b = gf_exp2(255 - (ncols - x - 1));
72 	e = a ^ 0x01;
73 
74 	coeff[MUL_PQ_X] = gf_div(a, e);
75 	coeff[MUL_PQ_Y] = gf_div(b, e);
76 }
77 
78 static noinline void
79 raidz_rec_pr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
80 {
81 	const unsigned ncols = raidz_ncols(rm);
82 	const unsigned x = tgtidx[TARGET_X];
83 	const unsigned y = tgtidx[TARGET_Y];
84 
85 	gf_t a, b, e;
86 
87 	a = gf_exp4(x + 255 - y);
88 	b = gf_exp4(255 - (ncols - x - 1));
89 	e = a ^ 0x01;
90 
91 	coeff[MUL_PR_X] = gf_div(a, e);
92 	coeff[MUL_PR_Y] = gf_div(b, e);
93 }
94 
95 static noinline void
96 raidz_rec_qr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
97 {
98 	const unsigned ncols = raidz_ncols(rm);
99 	const unsigned x = tgtidx[TARGET_X];
100 	const unsigned y = tgtidx[TARGET_Y];
101 
102 	gf_t nx, ny, nxxy, nxyy, d;
103 
104 	nx = gf_exp2(ncols - x - 1);
105 	ny = gf_exp2(ncols - y - 1);
106 	nxxy = gf_mul(gf_mul(nx, nx), ny);
107 	nxyy = gf_mul(gf_mul(nx, ny), ny);
108 	d = nxxy ^ nxyy;
109 
110 	coeff[MUL_QR_XQ] = ny;
111 	coeff[MUL_QR_X]	= gf_div(ny, d);
112 	coeff[MUL_QR_YQ] = nx;
113 	coeff[MUL_QR_Y]	= gf_div(nx, d);
114 }
115 
116 static noinline void
117 raidz_rec_pqr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
118 {
119 	const unsigned ncols = raidz_ncols(rm);
120 	const unsigned x = tgtidx[TARGET_X];
121 	const unsigned y = tgtidx[TARGET_Y];
122 	const unsigned z = tgtidx[TARGET_Z];
123 
124 	gf_t nx, ny, nz, nxx, nyy, nzz, nyyz, nyzz, xd, yd;
125 
126 	nx = gf_exp2(ncols - x - 1);
127 	ny = gf_exp2(ncols - y - 1);
128 	nz = gf_exp2(ncols - z - 1);
129 
130 	nxx = gf_exp4(ncols - x - 1);
131 	nyy = gf_exp4(ncols - y - 1);
132 	nzz = gf_exp4(ncols - z - 1);
133 
134 	nyyz = gf_mul(gf_mul(ny, nz), ny);
135 	nyzz = gf_mul(nzz, ny);
136 
137 	xd = gf_mul(nxx, ny) ^ gf_mul(nx, nyy) ^ nyyz ^
138 	    gf_mul(nxx, nz) ^ gf_mul(nzz, nx) ^  nyzz;
139 
140 	yd = gf_inv(ny ^ nz);
141 
142 	coeff[MUL_PQR_XP] = gf_div(nyyz ^ nyzz, xd);
143 	coeff[MUL_PQR_XQ] = gf_div(nyy ^ nzz, xd);
144 	coeff[MUL_PQR_XR] = gf_div(ny ^ nz, xd);
145 	coeff[MUL_PQR_YU] = nx;
146 	coeff[MUL_PQR_YP] = gf_mul(nz, yd);
147 	coeff[MUL_PQR_YQ] = yd;
148 }
149 
150 /*
151  * Method for zeroing a buffer (can be implemented using SIMD).
152  * This method is used by multiple for gen/rec functions.
153  *
154  * @dc		Destination buffer
155  * @dsize	Destination buffer size
156  * @private	Unused
157  */
158 static int
159 raidz_zero_abd_cb(void *dc, size_t dsize, void *private)
160 {
161 	v_t *dst = (v_t *)dc;
162 	size_t i;
163 
164 	ZERO_DEFINE();
165 
166 	(void) private; /* unused */
167 
168 	ZERO(ZERO_D);
169 
170 	for (i = 0; i < dsize / sizeof (v_t); i += (2 * ZERO_STRIDE)) {
171 		STORE(dst + i, ZERO_D);
172 		STORE(dst + i + ZERO_STRIDE, ZERO_D);
173 	}
174 
175 	return (0);
176 }
177 
178 #define	raidz_zero(dabd, size)						\
179 {									\
180 	abd_iterate_func(dabd, 0, size, raidz_zero_abd_cb, NULL);	\
181 }
182 
183 /*
184  * Method for copying two buffers (can be implemented using SIMD).
185  * This method is used by multiple for gen/rec functions.
186  *
187  * @dc		Destination buffer
188  * @sc		Source buffer
189  * @dsize	Destination buffer size
190  * @ssize	Source buffer size
191  * @private	Unused
192  */
193 static int
194 raidz_copy_abd_cb(void *dc, void *sc, size_t size, void *private)
195 {
196 	v_t *dst = (v_t *)dc;
197 	const v_t *src = (v_t *)sc;
198 	size_t i;
199 
200 	COPY_DEFINE();
201 
202 	(void) private; /* unused */
203 
204 	for (i = 0; i < size / sizeof (v_t); i += (2 * COPY_STRIDE)) {
205 		LOAD(src + i, COPY_D);
206 		STORE(dst + i, COPY_D);
207 
208 		LOAD(src + i + COPY_STRIDE, COPY_D);
209 		STORE(dst + i + COPY_STRIDE, COPY_D);
210 	}
211 
212 	return (0);
213 }
214 
215 
216 #define	raidz_copy(dabd, sabd, size)					\
217 {									\
218 	abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_copy_abd_cb, NULL);\
219 }
220 
221 /*
222  * Method for adding (XORing) two buffers.
223  * Source and destination are XORed together and result is stored in
224  * destination buffer. This method is used by multiple for gen/rec functions.
225  *
226  * @dc		Destination buffer
227  * @sc		Source buffer
228  * @dsize	Destination buffer size
229  * @ssize	Source buffer size
230  * @private	Unused
231  */
232 static int
233 raidz_add_abd_cb(void *dc, void *sc, size_t size, void *private)
234 {
235 	v_t *dst = (v_t *)dc;
236 	const v_t *src = (v_t *)sc;
237 	size_t i;
238 
239 	ADD_DEFINE();
240 
241 	(void) private; /* unused */
242 
243 	for (i = 0; i < size / sizeof (v_t); i += (2 * ADD_STRIDE)) {
244 		LOAD(dst + i, ADD_D);
245 		XOR_ACC(src + i, ADD_D);
246 		STORE(dst + i, ADD_D);
247 
248 		LOAD(dst + i + ADD_STRIDE, ADD_D);
249 		XOR_ACC(src + i + ADD_STRIDE, ADD_D);
250 		STORE(dst + i + ADD_STRIDE, ADD_D);
251 	}
252 
253 	return (0);
254 }
255 
256 #define	raidz_add(dabd, sabd, size)					\
257 {									\
258 	abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_add_abd_cb, NULL);\
259 }
260 
261 /*
262  * Method for multiplying a buffer with a constant in GF(2^8).
263  * Symbols from buffer are multiplied by a constant and result is stored
264  * back in the same buffer.
265  *
266  * @dc		In/Out data buffer.
267  * @size	Size of the buffer
268  * @private	pointer to the multiplication constant (unsigned)
269  */
270 static int
271 raidz_mul_abd_cb(void *dc, size_t size, void *private)
272 {
273 	const unsigned mul = *((unsigned *)private);
274 	v_t *d = (v_t *)dc;
275 	size_t i;
276 
277 	MUL_DEFINE();
278 
279 	for (i = 0; i < size / sizeof (v_t); i += (2 * MUL_STRIDE)) {
280 		LOAD(d + i, MUL_D);
281 		MUL(mul, MUL_D);
282 		STORE(d + i, MUL_D);
283 
284 		LOAD(d + i + MUL_STRIDE, MUL_D);
285 		MUL(mul, MUL_D);
286 		STORE(d + i + MUL_STRIDE, MUL_D);
287 	}
288 
289 	return (0);
290 }
291 
292 
293 /*
294  * Syndrome generation/update macros
295  *
296  * Require LOAD(), XOR(), STORE(), MUL2(), and MUL4() macros
297  */
298 #define	P_D_SYNDROME(D, T, t)		\
299 {					\
300 	LOAD((t), T);			\
301 	XOR(D, T);			\
302 	STORE((t), T);			\
303 }
304 
305 #define	Q_D_SYNDROME(D, T, t)		\
306 {					\
307 	LOAD((t), T);			\
308 	MUL2(T);			\
309 	XOR(D, T);			\
310 	STORE((t), T);			\
311 }
312 
313 #define	Q_SYNDROME(T, t)		\
314 {					\
315 	LOAD((t), T);			\
316 	MUL2(T);			\
317 	STORE((t), T);			\
318 }
319 
320 #define	R_D_SYNDROME(D, T, t)		\
321 {					\
322 	LOAD((t), T);			\
323 	MUL4(T);			\
324 	XOR(D, T);			\
325 	STORE((t), T);			\
326 }
327 
328 #define	R_SYNDROME(T, t)		\
329 {					\
330 	LOAD((t), T);			\
331 	MUL4(T);			\
332 	STORE((t), T);			\
333 }
334 
335 
336 /*
337  * PARITY CALCULATION
338  *
339  * Macros *_SYNDROME are used for parity/syndrome calculation.
340  * *_D_SYNDROME() macros are used to calculate syndrome between 0 and
341  * length of data column, and *_SYNDROME() macros are only for updating
342  * the parity/syndrome if data column is shorter.
343  *
344  * P parity is calculated using raidz_add_abd().
345  */
346 
347 /*
348  * Generate P parity (RAIDZ1)
349  *
350  * @rm	RAIDZ map
351  */
352 static raidz_inline void
353 raidz_generate_p_impl(raidz_map_t * const rm)
354 {
355 	size_t c;
356 	const size_t ncols = raidz_ncols(rm);
357 	const size_t psize = rm->rm_col[CODE_P].rc_size;
358 	abd_t *pabd = rm->rm_col[CODE_P].rc_abd;
359 	size_t size;
360 	abd_t *dabd;
361 
362 	raidz_math_begin();
363 
364 	/* start with first data column */
365 	raidz_copy(pabd, rm->rm_col[1].rc_abd, psize);
366 
367 	for (c = 2; c < ncols; c++) {
368 		dabd = rm->rm_col[c].rc_abd;
369 		size = rm->rm_col[c].rc_size;
370 
371 		/* add data column */
372 		raidz_add(pabd, dabd, size);
373 	}
374 
375 	raidz_math_end();
376 }
377 
378 
379 /*
380  * Generate PQ parity (RAIDZ2)
381  * The function is called per data column.
382  *
383  * @c		array of pointers to parity (code) columns
384  * @dc		pointer to data column
385  * @csize	size of parity columns
386  * @dsize	size of data column
387  */
388 static void
389 raidz_gen_pq_add(void **c, const void *dc, const size_t csize,
390     const size_t dsize)
391 {
392 	v_t *p = (v_t *)c[0];
393 	v_t *q = (v_t *)c[1];
394 	const v_t *d = (const v_t *)dc;
395 	const v_t * const dend = d + (dsize / sizeof (v_t));
396 	const v_t * const qend = q + (csize / sizeof (v_t));
397 
398 	GEN_PQ_DEFINE();
399 
400 	MUL2_SETUP();
401 
402 	for (; d < dend; d += GEN_PQ_STRIDE, p += GEN_PQ_STRIDE,
403 	    q += GEN_PQ_STRIDE) {
404 		LOAD(d, GEN_PQ_D);
405 		P_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, p);
406 		Q_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, q);
407 	}
408 	for (; q < qend; q += GEN_PQ_STRIDE) {
409 		Q_SYNDROME(GEN_PQ_C, q);
410 	}
411 }
412 
413 
414 /*
415  * Generate PQ parity (RAIDZ2)
416  *
417  * @rm	RAIDZ map
418  */
419 static raidz_inline void
420 raidz_generate_pq_impl(raidz_map_t * const rm)
421 {
422 	size_t c;
423 	const size_t ncols = raidz_ncols(rm);
424 	const size_t csize = rm->rm_col[CODE_P].rc_size;
425 	size_t dsize;
426 	abd_t *dabd;
427 	abd_t *cabds[] = {
428 		rm->rm_col[CODE_P].rc_abd,
429 		rm->rm_col[CODE_Q].rc_abd
430 	};
431 
432 	raidz_math_begin();
433 
434 	raidz_copy(cabds[CODE_P], rm->rm_col[2].rc_abd, csize);
435 	raidz_copy(cabds[CODE_Q], rm->rm_col[2].rc_abd, csize);
436 
437 	for (c = 3; c < ncols; c++) {
438 		dabd = rm->rm_col[c].rc_abd;
439 		dsize = rm->rm_col[c].rc_size;
440 
441 		abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 2,
442 		    raidz_gen_pq_add);
443 	}
444 
445 	raidz_math_end();
446 }
447 
448 
449 /*
450  * Generate PQR parity (RAIDZ3)
451  * The function is called per data column.
452  *
453  * @c		array of pointers to parity (code) columns
454  * @dc		pointer to data column
455  * @csize	size of parity columns
456  * @dsize	size of data column
457  */
458 static void
459 raidz_gen_pqr_add(void **c, const void *dc, const size_t csize,
460     const size_t dsize)
461 {
462 	v_t *p = (v_t *)c[0];
463 	v_t *q = (v_t *)c[1];
464 	v_t *r = (v_t *)c[CODE_R];
465 	const v_t *d = (const v_t *)dc;
466 	const v_t * const dend = d + (dsize / sizeof (v_t));
467 	const v_t * const qend = q + (csize / sizeof (v_t));
468 
469 	GEN_PQR_DEFINE();
470 
471 	MUL2_SETUP();
472 
473 	for (; d < dend; d += GEN_PQR_STRIDE, p += GEN_PQR_STRIDE,
474 	    q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
475 		LOAD(d, GEN_PQR_D);
476 		P_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, p);
477 		Q_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, q);
478 		R_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, r);
479 	}
480 	for (; q < qend; q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
481 		Q_SYNDROME(GEN_PQR_C, q);
482 		R_SYNDROME(GEN_PQR_C, r);
483 	}
484 }
485 
486 
487 /*
488  * Generate PQR parity (RAIDZ2)
489  *
490  * @rm	RAIDZ map
491  */
492 static raidz_inline void
493 raidz_generate_pqr_impl(raidz_map_t * const rm)
494 {
495 	size_t c;
496 	const size_t ncols = raidz_ncols(rm);
497 	const size_t csize = rm->rm_col[CODE_P].rc_size;
498 	size_t dsize;
499 	abd_t *dabd;
500 	abd_t *cabds[] = {
501 		rm->rm_col[CODE_P].rc_abd,
502 		rm->rm_col[CODE_Q].rc_abd,
503 		rm->rm_col[CODE_R].rc_abd
504 	};
505 
506 	raidz_math_begin();
507 
508 	raidz_copy(cabds[CODE_P], rm->rm_col[3].rc_abd, csize);
509 	raidz_copy(cabds[CODE_Q], rm->rm_col[3].rc_abd, csize);
510 	raidz_copy(cabds[CODE_R], rm->rm_col[3].rc_abd, csize);
511 
512 	for (c = 4; c < ncols; c++) {
513 		dabd = rm->rm_col[c].rc_abd;
514 		dsize = rm->rm_col[c].rc_size;
515 
516 		abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 3,
517 		    raidz_gen_pqr_add);
518 	}
519 
520 	raidz_math_end();
521 }
522 
523 
524 /*
525  * DATA RECONSTRUCTION
526  *
527  * Data reconstruction process consists of two phases:
528  * 	- Syndrome calculation
529  * 	- Data reconstruction
530  *
531  * Syndrome is calculated by generating parity using available data columns
532  * and zeros in places of erasure. Existing parity is added to corresponding
533  * syndrome value to obtain the [P|Q|R]syn values from equation:
534  * 	P = Psyn + Dx + Dy + Dz
535  * 	Q = Qsyn + 2^x * Dx + 2^y * Dy + 2^z * Dz
536  * 	R = Rsyn + 4^x * Dx + 4^y * Dy + 4^z * Dz
537  *
538  * For data reconstruction phase, the corresponding equations are solved
539  * for missing data (Dx, Dy, Dz). This generally involves multiplying known
540  * symbols by an coefficient and adding them together. The multiplication
541  * constant coefficients are calculated ahead of the operation in
542  * raidz_rec_[q|r|pq|pq|qr|pqr]_coeff() functions.
543  *
544  * IMPLEMENTATION NOTE: RAID-Z block can have complex geometry, with "big"
545  * and "short" columns.
546  * For this reason, reconstruction is performed in minimum of
547  * two steps. First, from offset 0 to short_size, then from short_size to
548  * short_size. Calculation functions REC_[*]_BLOCK() are implemented to work
549  * over both ranges. The split also enables removal of conditional expressions
550  * from loop bodies, improving throughput of SIMD implementations.
551  * For the best performance, all functions marked with raidz_inline attribute
552  * must be inlined by compiler.
553  *
554  *    parity          data
555  *    columns         columns
556  * <----------> <------------------>
557  *                   x       y  <----+ missing columns (x, y)
558  *                   |       |
559  * +---+---+---+---+-v-+---+-v-+---+   ^ 0
560  * |   |   |   |   |   |   |   |   |   |
561  * |   |   |   |   |   |   |   |   |   |
562  * | P | Q | R | D | D | D | D | D |   |
563  * |   |   |   | 0 | 1 | 2 | 3 | 4 |   |
564  * |   |   |   |   |   |   |   |   |   v
565  * |   |   |   |   |   +---+---+---+   ^ short_size
566  * |   |   |   |   |   |               |
567  * +---+---+---+---+---+               v big_size
568  * <------------------> <---------->
569  *      big columns     short columns
570  *
571  */
572 
573 
574 
575 
576 /*
577  * Reconstruct single data column using P parity
578  *
579  * @syn_method	raidz_add_abd()
580  * @rec_method	not applicable
581  *
582  * @rm		RAIDZ map
583  * @tgtidx	array of missing data indexes
584  */
585 static raidz_inline int
586 raidz_reconstruct_p_impl(raidz_map_t *rm, const int *tgtidx)
587 {
588 	size_t c;
589 	const size_t firstdc = raidz_parity(rm);
590 	const size_t ncols = raidz_ncols(rm);
591 	const size_t x = tgtidx[TARGET_X];
592 	const size_t xsize = rm->rm_col[x].rc_size;
593 	abd_t *xabd = rm->rm_col[x].rc_abd;
594 	size_t size;
595 	abd_t *dabd;
596 
597 	raidz_math_begin();
598 
599 	/* copy P into target */
600 	raidz_copy(xabd, rm->rm_col[CODE_P].rc_abd, xsize);
601 
602 	/* generate p_syndrome */
603 	for (c = firstdc; c < ncols; c++) {
604 		if (c == x)
605 			continue;
606 
607 		dabd = rm->rm_col[c].rc_abd;
608 		size = MIN(rm->rm_col[c].rc_size, xsize);
609 
610 		raidz_add(xabd, dabd, size);
611 	}
612 
613 	raidz_math_end();
614 
615 	return (1 << CODE_P);
616 }
617 
618 
619 /*
620  * Generate Q syndrome (Qsyn)
621  *
622  * @xc		array of pointers to syndrome columns
623  * @dc		data column (NULL if missing)
624  * @xsize	size of syndrome columns
625  * @dsize	size of data column (0 if missing)
626  */
627 static void
628 raidz_syn_q_abd(void **xc, const void *dc, const size_t xsize,
629     const size_t dsize)
630 {
631 	v_t *x = (v_t *)xc[TARGET_X];
632 	const v_t *d = (const v_t *)dc;
633 	const v_t * const dend = d + (dsize / sizeof (v_t));
634 	const v_t * const xend = x + (xsize / sizeof (v_t));
635 
636 	SYN_Q_DEFINE();
637 
638 	MUL2_SETUP();
639 
640 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
641 		LOAD(d, SYN_Q_D);
642 		Q_D_SYNDROME(SYN_Q_D, SYN_Q_X, x);
643 	}
644 	for (; x < xend; x += SYN_STRIDE) {
645 		Q_SYNDROME(SYN_Q_X, x);
646 	}
647 }
648 
649 
650 /*
651  * Reconstruct single data column using Q parity
652  *
653  * @syn_method	raidz_add_abd()
654  * @rec_method	raidz_mul_abd_cb()
655  *
656  * @rm		RAIDZ map
657  * @tgtidx	array of missing data indexes
658  */
659 static raidz_inline int
660 raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx)
661 {
662 	size_t c;
663 	size_t dsize;
664 	abd_t *dabd;
665 	const size_t firstdc = raidz_parity(rm);
666 	const size_t ncols = raidz_ncols(rm);
667 	const size_t x = tgtidx[TARGET_X];
668 	abd_t *xabd = rm->rm_col[x].rc_abd;
669 	const size_t xsize = rm->rm_col[x].rc_size;
670 	abd_t *tabds[] = { xabd };
671 
672 	unsigned coeff[MUL_CNT];
673 	raidz_rec_q_coeff(rm, tgtidx, coeff);
674 
675 	raidz_math_begin();
676 
677 	/* Start with first data column if present */
678 	if (firstdc != x) {
679 		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
680 	} else {
681 		raidz_zero(xabd, xsize);
682 	}
683 
684 	/* generate q_syndrome */
685 	for (c = firstdc+1; c < ncols; c++) {
686 		if (c == x) {
687 			dabd = NULL;
688 			dsize = 0;
689 		} else {
690 			dabd = rm->rm_col[c].rc_abd;
691 			dsize = rm->rm_col[c].rc_size;
692 		}
693 
694 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
695 		    raidz_syn_q_abd);
696 	}
697 
698 	/* add Q to the syndrome */
699 	raidz_add(xabd, rm->rm_col[CODE_Q].rc_abd, xsize);
700 
701 	/* transform the syndrome */
702 	abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void*) coeff);
703 
704 	raidz_math_end();
705 
706 	return (1 << CODE_Q);
707 }
708 
709 
710 /*
711  * Generate R syndrome (Rsyn)
712  *
713  * @xc		array of pointers to syndrome columns
714  * @dc		data column (NULL if missing)
715  * @tsize	size of syndrome columns
716  * @dsize	size of data column (0 if missing)
717  */
718 static void
719 raidz_syn_r_abd(void **xc, const void *dc, const size_t tsize,
720     const size_t dsize)
721 {
722 	v_t *x = (v_t *)xc[TARGET_X];
723 	const v_t *d = (const v_t *)dc;
724 	const v_t * const dend = d + (dsize / sizeof (v_t));
725 	const v_t * const xend = x + (tsize / sizeof (v_t));
726 
727 	SYN_R_DEFINE();
728 
729 	MUL2_SETUP();
730 
731 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
732 		LOAD(d, SYN_R_D);
733 		R_D_SYNDROME(SYN_R_D, SYN_R_X, x);
734 	}
735 	for (; x < xend; x += SYN_STRIDE) {
736 		R_SYNDROME(SYN_R_X, x);
737 	}
738 }
739 
740 
741 /*
742  * Reconstruct single data column using R parity
743  *
744  * @syn_method	raidz_add_abd()
745  * @rec_method	raidz_mul_abd_cb()
746  *
747  * @rm		RAIDZ map
748  * @tgtidx	array of missing data indexes
749  */
750 static raidz_inline int
751 raidz_reconstruct_r_impl(raidz_map_t *rm, const int *tgtidx)
752 {
753 	size_t c;
754 	size_t dsize;
755 	abd_t *dabd;
756 	const size_t firstdc = raidz_parity(rm);
757 	const size_t ncols = raidz_ncols(rm);
758 	const size_t x = tgtidx[TARGET_X];
759 	const size_t xsize = rm->rm_col[x].rc_size;
760 	abd_t *xabd = rm->rm_col[x].rc_abd;
761 	abd_t *tabds[] = { xabd };
762 
763 	unsigned coeff[MUL_CNT];
764 	raidz_rec_r_coeff(rm, tgtidx, coeff);
765 
766 	raidz_math_begin();
767 
768 	/* Start with first data column if present */
769 	if (firstdc != x) {
770 		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
771 	} else {
772 		raidz_zero(xabd, xsize);
773 	}
774 
775 
776 	/* generate q_syndrome */
777 	for (c = firstdc+1; c < ncols; c++) {
778 		if (c == x) {
779 			dabd = NULL;
780 			dsize = 0;
781 		} else {
782 			dabd = rm->rm_col[c].rc_abd;
783 			dsize = rm->rm_col[c].rc_size;
784 		}
785 
786 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
787 		    raidz_syn_r_abd);
788 	}
789 
790 	/* add R to the syndrome */
791 	raidz_add(xabd, rm->rm_col[CODE_R].rc_abd, xsize);
792 
793 	/* transform the syndrome */
794 	abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void *)coeff);
795 
796 	raidz_math_end();
797 
798 	return (1 << CODE_R);
799 }
800 
801 
802 /*
803  * Generate P and Q syndromes
804  *
805  * @xc		array of pointers to syndrome columns
806  * @dc		data column (NULL if missing)
807  * @tsize	size of syndrome columns
808  * @dsize	size of data column (0 if missing)
809  */
810 static void
811 raidz_syn_pq_abd(void **tc, const void *dc, const size_t tsize,
812     const size_t dsize)
813 {
814 	v_t *x = (v_t *)tc[TARGET_X];
815 	v_t *y = (v_t *)tc[TARGET_Y];
816 	const v_t *d = (const v_t *)dc;
817 	const v_t * const dend = d + (dsize / sizeof (v_t));
818 	const v_t * const yend = y + (tsize / sizeof (v_t));
819 
820 	SYN_PQ_DEFINE();
821 
822 	MUL2_SETUP();
823 
824 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
825 		LOAD(d, SYN_PQ_D);
826 		P_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, x);
827 		Q_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, y);
828 	}
829 	for (; y < yend; y += SYN_STRIDE) {
830 		Q_SYNDROME(SYN_PQ_X, y);
831 	}
832 }
833 
834 /*
835  * Reconstruct data using PQ parity and PQ syndromes
836  *
837  * @tc		syndrome/result columns
838  * @tsize	size of syndrome/result columns
839  * @c		parity columns
840  * @mul		array of multiplication constants
841  */
842 static void
843 raidz_rec_pq_abd(void **tc, const size_t tsize, void **c,
844     const unsigned *mul)
845 {
846 	v_t *x = (v_t *)tc[TARGET_X];
847 	v_t *y = (v_t *)tc[TARGET_Y];
848 	const v_t * const xend = x + (tsize / sizeof (v_t));
849 	const v_t *p = (v_t *)c[CODE_P];
850 	const v_t *q = (v_t *)c[CODE_Q];
851 
852 	REC_PQ_DEFINE();
853 
854 	for (; x < xend; x += REC_PQ_STRIDE, y += REC_PQ_STRIDE,
855 	    p += REC_PQ_STRIDE, q += REC_PQ_STRIDE) {
856 		LOAD(x, REC_PQ_X);
857 		LOAD(y, REC_PQ_Y);
858 
859 		XOR_ACC(p, REC_PQ_X);
860 		XOR_ACC(q, REC_PQ_Y);
861 
862 		/* Save Pxy */
863 		COPY(REC_PQ_X,  REC_PQ_T);
864 
865 		/* Calc X */
866 		MUL(mul[MUL_PQ_X], REC_PQ_X);
867 		MUL(mul[MUL_PQ_Y], REC_PQ_Y);
868 		XOR(REC_PQ_Y,  REC_PQ_X);
869 		STORE(x, REC_PQ_X);
870 
871 		/* Calc Y */
872 		XOR(REC_PQ_T,  REC_PQ_X);
873 		STORE(y, REC_PQ_X);
874 	}
875 }
876 
877 
878 /*
879  * Reconstruct two data columns using PQ parity
880  *
881  * @syn_method	raidz_syn_pq_abd()
882  * @rec_method	raidz_rec_pq_abd()
883  *
884  * @rm		RAIDZ map
885  * @tgtidx	array of missing data indexes
886  */
887 static raidz_inline int
888 raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx)
889 {
890 	size_t c;
891 	size_t dsize;
892 	abd_t *dabd;
893 	const size_t firstdc = raidz_parity(rm);
894 	const size_t ncols = raidz_ncols(rm);
895 	const size_t x = tgtidx[TARGET_X];
896 	const size_t y = tgtidx[TARGET_Y];
897 	const size_t xsize = rm->rm_col[x].rc_size;
898 	const size_t ysize = rm->rm_col[y].rc_size;
899 	abd_t *xabd = rm->rm_col[x].rc_abd;
900 	abd_t *yabd = rm->rm_col[y].rc_abd;
901 	abd_t *tabds[2] = { xabd, yabd };
902 	abd_t *cabds[] = {
903 		rm->rm_col[CODE_P].rc_abd,
904 		rm->rm_col[CODE_Q].rc_abd
905 	};
906 
907 	unsigned coeff[MUL_CNT];
908 	raidz_rec_pq_coeff(rm, tgtidx, coeff);
909 
910 	/*
911 	 * Check if some of targets is shorter then others
912 	 * In this case, shorter target needs to be replaced with
913 	 * new buffer so that syndrome can be calculated.
914 	 */
915 	if (ysize < xsize) {
916 		yabd = abd_alloc(xsize, B_FALSE);
917 		tabds[1] = yabd;
918 	}
919 
920 	raidz_math_begin();
921 
922 	/* Start with first data column if present */
923 	if (firstdc != x) {
924 		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
925 		raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
926 	} else {
927 		raidz_zero(xabd, xsize);
928 		raidz_zero(yabd, xsize);
929 	}
930 
931 	/* generate q_syndrome */
932 	for (c = firstdc+1; c < ncols; c++) {
933 		if (c == x || c == y) {
934 			dabd = NULL;
935 			dsize = 0;
936 		} else {
937 			dabd = rm->rm_col[c].rc_abd;
938 			dsize = rm->rm_col[c].rc_size;
939 		}
940 
941 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
942 		    raidz_syn_pq_abd);
943 	}
944 
945 	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pq_abd, coeff);
946 
947 	/* Copy shorter targets back to the original abd buffer */
948 	if (ysize < xsize)
949 		raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
950 
951 	raidz_math_end();
952 
953 	if (ysize < xsize)
954 		abd_free(yabd);
955 
956 	return ((1 << CODE_P) | (1 << CODE_Q));
957 }
958 
959 
960 /*
961  * Generate P and R syndromes
962  *
963  * @xc		array of pointers to syndrome columns
964  * @dc		data column (NULL if missing)
965  * @tsize	size of syndrome columns
966  * @dsize	size of data column (0 if missing)
967  */
968 static void
969 raidz_syn_pr_abd(void **c, const void *dc, const size_t tsize,
970     const size_t dsize)
971 {
972 	v_t *x = (v_t *)c[TARGET_X];
973 	v_t *y = (v_t *)c[TARGET_Y];
974 	const v_t *d = (const v_t *)dc;
975 	const v_t * const dend = d + (dsize / sizeof (v_t));
976 	const v_t * const yend = y + (tsize / sizeof (v_t));
977 
978 	SYN_PR_DEFINE();
979 
980 	MUL2_SETUP();
981 
982 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
983 		LOAD(d, SYN_PR_D);
984 		P_D_SYNDROME(SYN_PR_D, SYN_PR_X, x);
985 		R_D_SYNDROME(SYN_PR_D, SYN_PR_X, y);
986 	}
987 	for (; y < yend; y += SYN_STRIDE) {
988 		R_SYNDROME(SYN_PR_X, y);
989 	}
990 }
991 
992 /*
993  * Reconstruct data using PR parity and PR syndromes
994  *
995  * @tc		syndrome/result columns
996  * @tsize	size of syndrome/result columns
997  * @c		parity columns
998  * @mul		array of multiplication constants
999  */
1000 static void
1001 raidz_rec_pr_abd(void **t, const size_t tsize, void **c,
1002     const unsigned *mul)
1003 {
1004 	v_t *x = (v_t *)t[TARGET_X];
1005 	v_t *y = (v_t *)t[TARGET_Y];
1006 	const v_t * const xend = x + (tsize / sizeof (v_t));
1007 	const v_t *p = (v_t *)c[CODE_P];
1008 	const v_t *q = (v_t *)c[CODE_Q];
1009 
1010 	REC_PR_DEFINE();
1011 
1012 	for (; x < xend; x += REC_PR_STRIDE, y += REC_PR_STRIDE,
1013 	    p += REC_PR_STRIDE, q += REC_PR_STRIDE) {
1014 		LOAD(x, REC_PR_X);
1015 		LOAD(y, REC_PR_Y);
1016 		XOR_ACC(p, REC_PR_X);
1017 		XOR_ACC(q, REC_PR_Y);
1018 
1019 		/* Save Pxy */
1020 		COPY(REC_PR_X,  REC_PR_T);
1021 
1022 		/* Calc X */
1023 		MUL(mul[MUL_PR_X], REC_PR_X);
1024 		MUL(mul[MUL_PR_Y], REC_PR_Y);
1025 		XOR(REC_PR_Y,  REC_PR_X);
1026 		STORE(x, REC_PR_X);
1027 
1028 		/* Calc Y */
1029 		XOR(REC_PR_T,  REC_PR_X);
1030 		STORE(y, REC_PR_X);
1031 	}
1032 }
1033 
1034 
1035 /*
1036  * Reconstruct two data columns using PR parity
1037  *
1038  * @syn_method	raidz_syn_pr_abd()
1039  * @rec_method	raidz_rec_pr_abd()
1040  *
1041  * @rm		RAIDZ map
1042  * @tgtidx	array of missing data indexes
1043  */
1044 static raidz_inline int
1045 raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx)
1046 {
1047 	size_t c;
1048 	size_t dsize;
1049 	abd_t *dabd;
1050 	const size_t firstdc = raidz_parity(rm);
1051 	const size_t ncols = raidz_ncols(rm);
1052 	const size_t x = tgtidx[0];
1053 	const size_t y = tgtidx[1];
1054 	const size_t xsize = rm->rm_col[x].rc_size;
1055 	const size_t ysize = rm->rm_col[y].rc_size;
1056 	abd_t *xabd = rm->rm_col[x].rc_abd;
1057 	abd_t *yabd = rm->rm_col[y].rc_abd;
1058 	abd_t *tabds[2] = { xabd, yabd };
1059 	abd_t *cabds[] = {
1060 		rm->rm_col[CODE_P].rc_abd,
1061 		rm->rm_col[CODE_R].rc_abd
1062 	};
1063 	unsigned coeff[MUL_CNT];
1064 	raidz_rec_pr_coeff(rm, tgtidx, coeff);
1065 
1066 	/*
1067 	 * Check if some of targets are shorter then others.
1068 	 * They need to be replaced with a new buffer so that syndrome can
1069 	 * be calculated on full length.
1070 	 */
1071 	if (ysize < xsize) {
1072 		yabd = abd_alloc(xsize, B_FALSE);
1073 		tabds[1] = yabd;
1074 	}
1075 
1076 	raidz_math_begin();
1077 
1078 	/* Start with first data column if present */
1079 	if (firstdc != x) {
1080 		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
1081 		raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
1082 	} else {
1083 		raidz_zero(xabd, xsize);
1084 		raidz_zero(yabd, xsize);
1085 	}
1086 
1087 	/* generate q_syndrome */
1088 	for (c = firstdc+1; c < ncols; c++) {
1089 		if (c == x || c == y) {
1090 			dabd = NULL;
1091 			dsize = 0;
1092 		} else {
1093 			dabd = rm->rm_col[c].rc_abd;
1094 			dsize = rm->rm_col[c].rc_size;
1095 		}
1096 
1097 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
1098 		    raidz_syn_pr_abd);
1099 	}
1100 
1101 	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pr_abd, coeff);
1102 
1103 	/*
1104 	 * Copy shorter targets back to the original abd buffer
1105 	 */
1106 	if (ysize < xsize)
1107 		raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
1108 
1109 	raidz_math_end();
1110 
1111 	if (ysize < xsize)
1112 		abd_free(yabd);
1113 
1114 	return ((1 << CODE_P) | (1 << CODE_Q));
1115 }
1116 
1117 
1118 /*
1119  * Generate Q and R syndromes
1120  *
1121  * @xc		array of pointers to syndrome columns
1122  * @dc		data column (NULL if missing)
1123  * @tsize	size of syndrome columns
1124  * @dsize	size of data column (0 if missing)
1125  */
1126 static void
1127 raidz_syn_qr_abd(void **c, const void *dc, const size_t tsize,
1128     const size_t dsize)
1129 {
1130 	v_t *x = (v_t *)c[TARGET_X];
1131 	v_t *y = (v_t *)c[TARGET_Y];
1132 	const v_t * const xend = x + (tsize / sizeof (v_t));
1133 	const v_t *d = (const v_t *)dc;
1134 	const v_t * const dend = d + (dsize / sizeof (v_t));
1135 
1136 	SYN_QR_DEFINE();
1137 
1138 	MUL2_SETUP();
1139 
1140 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
1141 		LOAD(d, SYN_PQ_D);
1142 		Q_D_SYNDROME(SYN_QR_D, SYN_QR_X, x);
1143 		R_D_SYNDROME(SYN_QR_D, SYN_QR_X, y);
1144 	}
1145 	for (; x < xend; x += SYN_STRIDE, y += SYN_STRIDE) {
1146 		Q_SYNDROME(SYN_QR_X, x);
1147 		R_SYNDROME(SYN_QR_X, y);
1148 	}
1149 }
1150 
1151 
1152 /*
1153  * Reconstruct data using QR parity and QR syndromes
1154  *
1155  * @tc		syndrome/result columns
1156  * @tsize	size of syndrome/result columns
1157  * @c		parity columns
1158  * @mul		array of multiplication constants
1159  */
1160 static void
1161 raidz_rec_qr_abd(void **t, const size_t tsize, void **c,
1162     const unsigned *mul)
1163 {
1164 	v_t *x = (v_t *)t[TARGET_X];
1165 	v_t *y = (v_t *)t[TARGET_Y];
1166 	const v_t * const xend = x + (tsize / sizeof (v_t));
1167 	const v_t *p = (v_t *)c[CODE_P];
1168 	const v_t *q = (v_t *)c[CODE_Q];
1169 
1170 	REC_QR_DEFINE();
1171 
1172 	for (; x < xend; x += REC_QR_STRIDE, y += REC_QR_STRIDE,
1173 	    p += REC_QR_STRIDE, q += REC_QR_STRIDE) {
1174 		LOAD(x, REC_QR_X);
1175 		LOAD(y, REC_QR_Y);
1176 
1177 		XOR_ACC(p, REC_QR_X);
1178 		XOR_ACC(q, REC_QR_Y);
1179 
1180 		/* Save Pxy */
1181 		COPY(REC_QR_X,  REC_QR_T);
1182 
1183 		/* Calc X */
1184 		MUL(mul[MUL_QR_XQ], REC_QR_X);	/* X = Q * xqm */
1185 		XOR(REC_QR_Y, REC_QR_X);	/* X = R ^ X   */
1186 		MUL(mul[MUL_QR_X], REC_QR_X);	/* X = X * xm  */
1187 		STORE(x, REC_QR_X);
1188 
1189 		/* Calc Y */
1190 		MUL(mul[MUL_QR_YQ], REC_QR_T);	/* X = Q * xqm */
1191 		XOR(REC_QR_Y, REC_QR_T);	/* X = R ^ X   */
1192 		MUL(mul[MUL_QR_Y], REC_QR_T);	/* X = X * xm  */
1193 		STORE(y, REC_QR_T);
1194 	}
1195 }
1196 
1197 
1198 /*
1199  * Reconstruct two data columns using QR parity
1200  *
1201  * @syn_method	raidz_syn_qr_abd()
1202  * @rec_method	raidz_rec_qr_abd()
1203  *
1204  * @rm		RAIDZ map
1205  * @tgtidx	array of missing data indexes
1206  */
1207 static raidz_inline int
1208 raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx)
1209 {
1210 	size_t c;
1211 	size_t dsize;
1212 	abd_t *dabd;
1213 	const size_t firstdc = raidz_parity(rm);
1214 	const size_t ncols = raidz_ncols(rm);
1215 	const size_t x = tgtidx[TARGET_X];
1216 	const size_t y = tgtidx[TARGET_Y];
1217 	const size_t xsize = rm->rm_col[x].rc_size;
1218 	const size_t ysize = rm->rm_col[y].rc_size;
1219 	abd_t *xabd = rm->rm_col[x].rc_abd;
1220 	abd_t *yabd = rm->rm_col[y].rc_abd;
1221 	abd_t *tabds[2] = { xabd, yabd };
1222 	abd_t *cabds[] = {
1223 		rm->rm_col[CODE_Q].rc_abd,
1224 		rm->rm_col[CODE_R].rc_abd
1225 	};
1226 	unsigned coeff[MUL_CNT];
1227 	raidz_rec_qr_coeff(rm, tgtidx, coeff);
1228 
1229 	/*
1230 	 * Check if some of targets is shorter then others
1231 	 * In this case, shorter target needs to be replaced with
1232 	 * new buffer so that syndrome can be calculated.
1233 	 */
1234 	if (ysize < xsize) {
1235 		yabd = abd_alloc(xsize, B_FALSE);
1236 		tabds[1] = yabd;
1237 	}
1238 
1239 	raidz_math_begin();
1240 
1241 	/* Start with first data column if present */
1242 	if (firstdc != x) {
1243 		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
1244 		raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
1245 	} else {
1246 		raidz_zero(xabd, xsize);
1247 		raidz_zero(yabd, xsize);
1248 	}
1249 
1250 	/* generate q_syndrome */
1251 	for (c = firstdc+1; c < ncols; c++) {
1252 		if (c == x || c == y) {
1253 			dabd = NULL;
1254 			dsize = 0;
1255 		} else {
1256 			dabd = rm->rm_col[c].rc_abd;
1257 			dsize = rm->rm_col[c].rc_size;
1258 		}
1259 
1260 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
1261 		    raidz_syn_qr_abd);
1262 	}
1263 
1264 	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_qr_abd, coeff);
1265 
1266 	/*
1267 	 * Copy shorter targets back to the original abd buffer
1268 	 */
1269 	if (ysize < xsize)
1270 		raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
1271 
1272 	raidz_math_end();
1273 
1274 	if (ysize < xsize)
1275 		abd_free(yabd);
1276 
1277 
1278 	return ((1 << CODE_Q) | (1 << CODE_R));
1279 }
1280 
1281 
1282 /*
1283  * Generate P, Q, and R syndromes
1284  *
1285  * @xc		array of pointers to syndrome columns
1286  * @dc		data column (NULL if missing)
1287  * @tsize	size of syndrome columns
1288  * @dsize	size of data column (0 if missing)
1289  */
1290 static void
1291 raidz_syn_pqr_abd(void **c, const void *dc, const size_t tsize,
1292     const size_t dsize)
1293 {
1294 	v_t *x = (v_t *)c[TARGET_X];
1295 	v_t *y = (v_t *)c[TARGET_Y];
1296 	v_t *z = (v_t *)c[TARGET_Z];
1297 	const v_t * const yend = y + (tsize / sizeof (v_t));
1298 	const v_t *d = (const v_t *)dc;
1299 	const v_t * const dend = d + (dsize / sizeof (v_t));
1300 
1301 	SYN_PQR_DEFINE();
1302 
1303 	MUL2_SETUP();
1304 
1305 	for (; d < dend;  d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE,
1306 	    z += SYN_STRIDE) {
1307 		LOAD(d, SYN_PQR_D);
1308 		P_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, x)
1309 		Q_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, y);
1310 		R_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, z);
1311 	}
1312 	for (; y < yend; y += SYN_STRIDE, z += SYN_STRIDE) {
1313 		Q_SYNDROME(SYN_PQR_X, y);
1314 		R_SYNDROME(SYN_PQR_X, z);
1315 	}
1316 }
1317 
1318 
1319 /*
1320  * Reconstruct data using PRQ parity and PQR syndromes
1321  *
1322  * @tc		syndrome/result columns
1323  * @tsize	size of syndrome/result columns
1324  * @c		parity columns
1325  * @mul		array of multiplication constants
1326  */
1327 static void
1328 raidz_rec_pqr_abd(void **t, const size_t tsize, void **c,
1329     const unsigned * const mul)
1330 {
1331 	v_t *x = (v_t *)t[TARGET_X];
1332 	v_t *y = (v_t *)t[TARGET_Y];
1333 	v_t *z = (v_t *)t[TARGET_Z];
1334 	const v_t * const xend = x + (tsize / sizeof (v_t));
1335 	const v_t *p = (v_t *)c[CODE_P];
1336 	const v_t *q = (v_t *)c[CODE_Q];
1337 	const v_t *r = (v_t *)c[CODE_R];
1338 
1339 	REC_PQR_DEFINE();
1340 
1341 	for (; x < xend; x += REC_PQR_STRIDE, y += REC_PQR_STRIDE,
1342 	    z += REC_PQR_STRIDE, p += REC_PQR_STRIDE, q += REC_PQR_STRIDE,
1343 	    r += REC_PQR_STRIDE) {
1344 		LOAD(x, REC_PQR_X);
1345 		LOAD(y, REC_PQR_Y);
1346 		LOAD(z, REC_PQR_Z);
1347 
1348 		XOR_ACC(p, REC_PQR_X);
1349 		XOR_ACC(q, REC_PQR_Y);
1350 		XOR_ACC(r, REC_PQR_Z);
1351 
1352 		/* Save Pxyz and Qxyz */
1353 		COPY(REC_PQR_X, REC_PQR_XS);
1354 		COPY(REC_PQR_Y, REC_PQR_YS);
1355 
1356 		/* Calc X */
1357 		MUL(mul[MUL_PQR_XP], REC_PQR_X);	/* Xp = Pxyz * xp   */
1358 		MUL(mul[MUL_PQR_XQ], REC_PQR_Y);	/* Xq = Qxyz * xq   */
1359 		XOR(REC_PQR_Y, REC_PQR_X);
1360 		MUL(mul[MUL_PQR_XR], REC_PQR_Z);	/* Xr = Rxyz * xr   */
1361 		XOR(REC_PQR_Z, REC_PQR_X);		/* X = Xp + Xq + Xr */
1362 		STORE(x, REC_PQR_X);
1363 
1364 		/* Calc Y */
1365 		XOR(REC_PQR_X, REC_PQR_XS); 		/* Pyz = Pxyz + X */
1366 		MUL(mul[MUL_PQR_YU], REC_PQR_X);  	/* Xq = X * upd_q */
1367 		XOR(REC_PQR_X, REC_PQR_YS); 		/* Qyz = Qxyz + Xq */
1368 		COPY(REC_PQR_XS, REC_PQR_X);		/* restore Pyz */
1369 		MUL(mul[MUL_PQR_YP], REC_PQR_X);	/* Yp = Pyz * yp */
1370 		MUL(mul[MUL_PQR_YQ], REC_PQR_YS);	/* Yq = Qyz * yq */
1371 		XOR(REC_PQR_X, REC_PQR_YS); 		/* Y = Yp + Yq */
1372 		STORE(y, REC_PQR_YS);
1373 
1374 		/* Calc Z */
1375 		XOR(REC_PQR_XS, REC_PQR_YS);		/* Z = Pz = Pyz + Y */
1376 		STORE(z, REC_PQR_YS);
1377 	}
1378 }
1379 
1380 
1381 /*
1382  * Reconstruct three data columns using PQR parity
1383  *
1384  * @syn_method	raidz_syn_pqr_abd()
1385  * @rec_method	raidz_rec_pqr_abd()
1386  *
1387  * @rm		RAIDZ map
1388  * @tgtidx	array of missing data indexes
1389  */
1390 static raidz_inline int
1391 raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx)
1392 {
1393 	size_t c;
1394 	size_t dsize;
1395 	abd_t *dabd;
1396 	const size_t firstdc = raidz_parity(rm);
1397 	const size_t ncols = raidz_ncols(rm);
1398 	const size_t x = tgtidx[TARGET_X];
1399 	const size_t y = tgtidx[TARGET_Y];
1400 	const size_t z = tgtidx[TARGET_Z];
1401 	const size_t xsize = rm->rm_col[x].rc_size;
1402 	const size_t ysize = rm->rm_col[y].rc_size;
1403 	const size_t zsize = rm->rm_col[z].rc_size;
1404 	abd_t *xabd = rm->rm_col[x].rc_abd;
1405 	abd_t *yabd = rm->rm_col[y].rc_abd;
1406 	abd_t *zabd = rm->rm_col[z].rc_abd;
1407 	abd_t *tabds[] = { xabd, yabd, zabd };
1408 	abd_t *cabds[] = {
1409 		rm->rm_col[CODE_P].rc_abd,
1410 		rm->rm_col[CODE_Q].rc_abd,
1411 		rm->rm_col[CODE_R].rc_abd
1412 	};
1413 	unsigned coeff[MUL_CNT];
1414 	raidz_rec_pqr_coeff(rm, tgtidx, coeff);
1415 
1416 	/*
1417 	 * Check if some of targets is shorter then others
1418 	 * In this case, shorter target needs to be replaced with
1419 	 * new buffer so that syndrome can be calculated.
1420 	 */
1421 	if (ysize < xsize) {
1422 		yabd = abd_alloc(xsize, B_FALSE);
1423 		tabds[1] = yabd;
1424 	}
1425 	if (zsize < xsize) {
1426 		zabd = abd_alloc(xsize, B_FALSE);
1427 		tabds[2] = zabd;
1428 	}
1429 
1430 	raidz_math_begin();
1431 
1432 	/* Start with first data column if present */
1433 	if (firstdc != x) {
1434 		raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
1435 		raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
1436 		raidz_copy(zabd, rm->rm_col[firstdc].rc_abd, xsize);
1437 	} else {
1438 		raidz_zero(xabd, xsize);
1439 		raidz_zero(yabd, xsize);
1440 		raidz_zero(zabd, xsize);
1441 	}
1442 
1443 	/* generate q_syndrome */
1444 	for (c = firstdc+1; c < ncols; c++) {
1445 		if (c == x || c == y || c == z) {
1446 			dabd = NULL;
1447 			dsize = 0;
1448 		} else {
1449 			dabd = rm->rm_col[c].rc_abd;
1450 			dsize = rm->rm_col[c].rc_size;
1451 		}
1452 
1453 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 3,
1454 		    raidz_syn_pqr_abd);
1455 	}
1456 
1457 	abd_raidz_rec_iterate(cabds, tabds, xsize, 3, raidz_rec_pqr_abd, coeff);
1458 
1459 	/*
1460 	 * Copy shorter targets back to the original abd buffer
1461 	 */
1462 	if (ysize < xsize)
1463 		raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
1464 	if (zsize < xsize)
1465 		raidz_copy(rm->rm_col[z].rc_abd, zabd, zsize);
1466 
1467 	raidz_math_end();
1468 
1469 	if (ysize < xsize)
1470 		abd_free(yabd);
1471 	if (zsize < xsize)
1472 		abd_free(zabd);
1473 
1474 	return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
1475 }
1476 
1477 #endif /* _VDEV_RAIDZ_MATH_IMPL_H */
1478