xref: /freebsd/sys/contrib/openzfs/module/zfs/vdev_raidz_math_impl.h (revision b197d4b893974c9eb4d7b38704c6d5c486235d6f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
23  */
24 
25 #ifndef _VDEV_RAIDZ_MATH_IMPL_H
26 #define	_VDEV_RAIDZ_MATH_IMPL_H
27 
28 #include <sys/types.h>
29 #include <sys/vdev_raidz_impl.h>
30 
31 #define	raidz_inline inline __attribute__((always_inline))
32 #ifndef noinline
33 #define	noinline __attribute__((noinline))
34 #endif
35 
36 /*
37  * Functions calculate multiplication constants for data reconstruction.
38  * Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and
39  * used parity columns for reconstruction.
40  * @rr			RAIDZ row
41  * @tgtidx		array of missing data indexes
42  * @coeff		output array of coefficients. Array must be provided by
43  *         		user and must hold minimum MUL_CNT values.
44  */
45 static noinline void
46 raidz_rec_q_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
47 {
48 	const unsigned ncols = rr->rr_cols;
49 	const unsigned x = tgtidx[TARGET_X];
50 
51 	coeff[MUL_Q_X] = gf_exp2(255 - (ncols - x - 1));
52 }
53 
54 static noinline void
55 raidz_rec_r_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
56 {
57 	const unsigned ncols = rr->rr_cols;
58 	const unsigned x = tgtidx[TARGET_X];
59 
60 	coeff[MUL_R_X] = gf_exp4(255 - (ncols - x - 1));
61 }
62 
63 static noinline void
64 raidz_rec_pq_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
65 {
66 	const unsigned ncols = rr->rr_cols;
67 	const unsigned x = tgtidx[TARGET_X];
68 	const unsigned y = tgtidx[TARGET_Y];
69 	gf_t a, b, e;
70 
71 	a = gf_exp2(x + 255 - y);
72 	b = gf_exp2(255 - (ncols - x - 1));
73 	e = a ^ 0x01;
74 
75 	coeff[MUL_PQ_X] = gf_div(a, e);
76 	coeff[MUL_PQ_Y] = gf_div(b, e);
77 }
78 
79 static noinline void
80 raidz_rec_pr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
81 {
82 	const unsigned ncols = rr->rr_cols;
83 	const unsigned x = tgtidx[TARGET_X];
84 	const unsigned y = tgtidx[TARGET_Y];
85 
86 	gf_t a, b, e;
87 
88 	a = gf_exp4(x + 255 - y);
89 	b = gf_exp4(255 - (ncols - x - 1));
90 	e = a ^ 0x01;
91 
92 	coeff[MUL_PR_X] = gf_div(a, e);
93 	coeff[MUL_PR_Y] = gf_div(b, e);
94 }
95 
96 static noinline void
97 raidz_rec_qr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
98 {
99 	const unsigned ncols = rr->rr_cols;
100 	const unsigned x = tgtidx[TARGET_X];
101 	const unsigned y = tgtidx[TARGET_Y];
102 
103 	gf_t nx, ny, nxxy, nxyy, d;
104 
105 	nx = gf_exp2(ncols - x - 1);
106 	ny = gf_exp2(ncols - y - 1);
107 	nxxy = gf_mul(gf_mul(nx, nx), ny);
108 	nxyy = gf_mul(gf_mul(nx, ny), ny);
109 	d = nxxy ^ nxyy;
110 
111 	coeff[MUL_QR_XQ] = ny;
112 	coeff[MUL_QR_X]	= gf_div(ny, d);
113 	coeff[MUL_QR_YQ] = nx;
114 	coeff[MUL_QR_Y]	= gf_div(nx, d);
115 }
116 
117 static noinline void
118 raidz_rec_pqr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
119 {
120 	const unsigned ncols = rr->rr_cols;
121 	const unsigned x = tgtidx[TARGET_X];
122 	const unsigned y = tgtidx[TARGET_Y];
123 	const unsigned z = tgtidx[TARGET_Z];
124 
125 	gf_t nx, ny, nz, nxx, nyy, nzz, nyyz, nyzz, xd, yd;
126 
127 	nx = gf_exp2(ncols - x - 1);
128 	ny = gf_exp2(ncols - y - 1);
129 	nz = gf_exp2(ncols - z - 1);
130 
131 	nxx = gf_exp4(ncols - x - 1);
132 	nyy = gf_exp4(ncols - y - 1);
133 	nzz = gf_exp4(ncols - z - 1);
134 
135 	nyyz = gf_mul(gf_mul(ny, nz), ny);
136 	nyzz = gf_mul(nzz, ny);
137 
138 	xd = gf_mul(nxx, ny) ^ gf_mul(nx, nyy) ^ nyyz ^
139 	    gf_mul(nxx, nz) ^ gf_mul(nzz, nx) ^  nyzz;
140 
141 	yd = gf_inv(ny ^ nz);
142 
143 	coeff[MUL_PQR_XP] = gf_div(nyyz ^ nyzz, xd);
144 	coeff[MUL_PQR_XQ] = gf_div(nyy ^ nzz, xd);
145 	coeff[MUL_PQR_XR] = gf_div(ny ^ nz, xd);
146 	coeff[MUL_PQR_YU] = nx;
147 	coeff[MUL_PQR_YP] = gf_mul(nz, yd);
148 	coeff[MUL_PQR_YQ] = yd;
149 }
150 
151 /*
152  * Method for zeroing a buffer (can be implemented using SIMD).
153  * This method is used by multiple for gen/rec functions.
154  *
155  * @dc		Destination buffer
156  * @dsize	Destination buffer size
157  * @private	Unused
158  */
159 static int
160 raidz_zero_abd_cb(void *dc, size_t dsize, void *private)
161 {
162 	v_t *dst = (v_t *)dc;
163 	size_t i;
164 
165 	ZERO_DEFINE();
166 
167 	(void) private; /* unused */
168 
169 	ZERO(ZERO_D);
170 
171 	for (i = 0; i < dsize / sizeof (v_t); i += (2 * ZERO_STRIDE)) {
172 		STORE(dst + i, ZERO_D);
173 		STORE(dst + i + ZERO_STRIDE, ZERO_D);
174 	}
175 
176 	return (0);
177 }
178 
179 #define	raidz_zero(dabd, size)						\
180 {									\
181 	abd_iterate_func(dabd, 0, size, raidz_zero_abd_cb, NULL);	\
182 }
183 
184 /*
185  * Method for copying two buffers (can be implemented using SIMD).
186  * This method is used by multiple for gen/rec functions.
187  *
188  * @dc		Destination buffer
189  * @sc		Source buffer
190  * @dsize	Destination buffer size
191  * @ssize	Source buffer size
192  * @private	Unused
193  */
194 static int
195 raidz_copy_abd_cb(void *dc, void *sc, size_t size, void *private)
196 {
197 	v_t *dst = (v_t *)dc;
198 	const v_t *src = (v_t *)sc;
199 	size_t i;
200 
201 	COPY_DEFINE();
202 
203 	(void) private; /* unused */
204 
205 	for (i = 0; i < size / sizeof (v_t); i += (2 * COPY_STRIDE)) {
206 		LOAD(src + i, COPY_D);
207 		STORE(dst + i, COPY_D);
208 
209 		LOAD(src + i + COPY_STRIDE, COPY_D);
210 		STORE(dst + i + COPY_STRIDE, COPY_D);
211 	}
212 
213 	return (0);
214 }
215 
216 
217 #define	raidz_copy(dabd, sabd, size)					\
218 {									\
219 	abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_copy_abd_cb, NULL);\
220 }
221 
222 /*
223  * Method for adding (XORing) two buffers.
224  * Source and destination are XORed together and result is stored in
225  * destination buffer. This method is used by multiple for gen/rec functions.
226  *
227  * @dc		Destination buffer
228  * @sc		Source buffer
229  * @dsize	Destination buffer size
230  * @ssize	Source buffer size
231  * @private	Unused
232  */
233 static int
234 raidz_add_abd_cb(void *dc, void *sc, size_t size, void *private)
235 {
236 	v_t *dst = (v_t *)dc;
237 	const v_t *src = (v_t *)sc;
238 	size_t i;
239 
240 	ADD_DEFINE();
241 
242 	(void) private; /* unused */
243 
244 	for (i = 0; i < size / sizeof (v_t); i += (2 * ADD_STRIDE)) {
245 		LOAD(dst + i, ADD_D);
246 		XOR_ACC(src + i, ADD_D);
247 		STORE(dst + i, ADD_D);
248 
249 		LOAD(dst + i + ADD_STRIDE, ADD_D);
250 		XOR_ACC(src + i + ADD_STRIDE, ADD_D);
251 		STORE(dst + i + ADD_STRIDE, ADD_D);
252 	}
253 
254 	return (0);
255 }
256 
257 #define	raidz_add(dabd, sabd, size)					\
258 {									\
259 	abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_add_abd_cb, NULL);\
260 }
261 
262 /*
263  * Method for multiplying a buffer with a constant in GF(2^8).
264  * Symbols from buffer are multiplied by a constant and result is stored
265  * back in the same buffer.
266  *
267  * @dc		In/Out data buffer.
268  * @size	Size of the buffer
269  * @private	pointer to the multiplication constant (unsigned)
270  */
271 static int
272 raidz_mul_abd_cb(void *dc, size_t size, void *private)
273 {
274 	const unsigned mul = *((unsigned *)private);
275 	v_t *d = (v_t *)dc;
276 	size_t i;
277 
278 	MUL_DEFINE();
279 
280 	for (i = 0; i < size / sizeof (v_t); i += (2 * MUL_STRIDE)) {
281 		LOAD(d + i, MUL_D);
282 		MUL(mul, MUL_D);
283 		STORE(d + i, MUL_D);
284 
285 		LOAD(d + i + MUL_STRIDE, MUL_D);
286 		MUL(mul, MUL_D);
287 		STORE(d + i + MUL_STRIDE, MUL_D);
288 	}
289 
290 	return (0);
291 }
292 
293 
294 /*
295  * Syndrome generation/update macros
296  *
297  * Require LOAD(), XOR(), STORE(), MUL2(), and MUL4() macros
298  */
299 #define	P_D_SYNDROME(D, T, t)		\
300 {					\
301 	LOAD((t), T);			\
302 	XOR(D, T);			\
303 	STORE((t), T);			\
304 }
305 
306 #define	Q_D_SYNDROME(D, T, t)		\
307 {					\
308 	LOAD((t), T);			\
309 	MUL2(T);			\
310 	XOR(D, T);			\
311 	STORE((t), T);			\
312 }
313 
314 #define	Q_SYNDROME(T, t)		\
315 {					\
316 	LOAD((t), T);			\
317 	MUL2(T);			\
318 	STORE((t), T);			\
319 }
320 
321 #define	R_D_SYNDROME(D, T, t)		\
322 {					\
323 	LOAD((t), T);			\
324 	MUL4(T);			\
325 	XOR(D, T);			\
326 	STORE((t), T);			\
327 }
328 
329 #define	R_SYNDROME(T, t)		\
330 {					\
331 	LOAD((t), T);			\
332 	MUL4(T);			\
333 	STORE((t), T);			\
334 }
335 
336 
337 /*
338  * PARITY CALCULATION
339  *
340  * Macros *_SYNDROME are used for parity/syndrome calculation.
341  * *_D_SYNDROME() macros are used to calculate syndrome between 0 and
342  * length of data column, and *_SYNDROME() macros are only for updating
343  * the parity/syndrome if data column is shorter.
344  *
345  * P parity is calculated using raidz_add_abd().
346  */
347 
348 /*
349  * Generate P parity (RAIDZ1)
350  *
351  * @rr	RAIDZ row
352  */
353 static raidz_inline void
354 raidz_generate_p_impl(raidz_row_t * const rr)
355 {
356 	size_t c;
357 	const size_t ncols = rr->rr_cols;
358 	const size_t psize = rr->rr_col[CODE_P].rc_size;
359 	abd_t *pabd = rr->rr_col[CODE_P].rc_abd;
360 	size_t size;
361 	abd_t *dabd;
362 
363 	raidz_math_begin();
364 
365 	/* start with first data column */
366 	raidz_copy(pabd, rr->rr_col[1].rc_abd, psize);
367 
368 	for (c = 2; c < ncols; c++) {
369 		dabd = rr->rr_col[c].rc_abd;
370 		size = rr->rr_col[c].rc_size;
371 
372 		/* add data column */
373 		raidz_add(pabd, dabd, size);
374 	}
375 
376 	raidz_math_end();
377 }
378 
379 
380 /*
381  * Generate PQ parity (RAIDZ2)
382  * The function is called per data column.
383  *
384  * @c		array of pointers to parity (code) columns
385  * @dc		pointer to data column
386  * @csize	size of parity columns
387  * @dsize	size of data column
388  */
389 static void
390 raidz_gen_pq_add(void **c, const void *dc, const size_t csize,
391     const size_t dsize)
392 {
393 	v_t *p = (v_t *)c[0];
394 	v_t *q = (v_t *)c[1];
395 	const v_t *d = (const v_t *)dc;
396 	const v_t * const dend = d + (dsize / sizeof (v_t));
397 	const v_t * const qend = q + (csize / sizeof (v_t));
398 
399 	GEN_PQ_DEFINE();
400 
401 	MUL2_SETUP();
402 
403 	for (; d < dend; d += GEN_PQ_STRIDE, p += GEN_PQ_STRIDE,
404 	    q += GEN_PQ_STRIDE) {
405 		LOAD(d, GEN_PQ_D);
406 		P_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, p);
407 		Q_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, q);
408 	}
409 	for (; q < qend; q += GEN_PQ_STRIDE) {
410 		Q_SYNDROME(GEN_PQ_C, q);
411 	}
412 }
413 
414 
415 /*
416  * Generate PQ parity (RAIDZ2)
417  *
418  * @rr	RAIDZ row
419  */
420 static raidz_inline void
421 raidz_generate_pq_impl(raidz_row_t * const rr)
422 {
423 	size_t c;
424 	const size_t ncols = rr->rr_cols;
425 	const size_t csize = rr->rr_col[CODE_P].rc_size;
426 	size_t dsize;
427 	abd_t *dabd;
428 	abd_t *cabds[] = {
429 		rr->rr_col[CODE_P].rc_abd,
430 		rr->rr_col[CODE_Q].rc_abd
431 	};
432 
433 	raidz_math_begin();
434 
435 	raidz_copy(cabds[CODE_P], rr->rr_col[2].rc_abd, csize);
436 	raidz_copy(cabds[CODE_Q], rr->rr_col[2].rc_abd, csize);
437 
438 	for (c = 3; c < ncols; c++) {
439 		dabd = rr->rr_col[c].rc_abd;
440 		dsize = rr->rr_col[c].rc_size;
441 
442 		abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 2,
443 		    raidz_gen_pq_add);
444 	}
445 
446 	raidz_math_end();
447 }
448 
449 
450 /*
451  * Generate PQR parity (RAIDZ3)
452  * The function is called per data column.
453  *
454  * @c		array of pointers to parity (code) columns
455  * @dc		pointer to data column
456  * @csize	size of parity columns
457  * @dsize	size of data column
458  */
459 static void
460 raidz_gen_pqr_add(void **c, const void *dc, const size_t csize,
461     const size_t dsize)
462 {
463 	v_t *p = (v_t *)c[0];
464 	v_t *q = (v_t *)c[1];
465 	v_t *r = (v_t *)c[CODE_R];
466 	const v_t *d = (const v_t *)dc;
467 	const v_t * const dend = d + (dsize / sizeof (v_t));
468 	const v_t * const qend = q + (csize / sizeof (v_t));
469 
470 	GEN_PQR_DEFINE();
471 
472 	MUL2_SETUP();
473 
474 	for (; d < dend; d += GEN_PQR_STRIDE, p += GEN_PQR_STRIDE,
475 	    q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
476 		LOAD(d, GEN_PQR_D);
477 		P_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, p);
478 		Q_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, q);
479 		R_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, r);
480 	}
481 	for (; q < qend; q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
482 		Q_SYNDROME(GEN_PQR_C, q);
483 		R_SYNDROME(GEN_PQR_C, r);
484 	}
485 }
486 
487 
488 /*
489  * Generate PQR parity (RAIDZ2)
490  *
491  * @rr	RAIDZ row
492  */
493 static raidz_inline void
494 raidz_generate_pqr_impl(raidz_row_t * const rr)
495 {
496 	size_t c;
497 	const size_t ncols = rr->rr_cols;
498 	const size_t csize = rr->rr_col[CODE_P].rc_size;
499 	size_t dsize;
500 	abd_t *dabd;
501 	abd_t *cabds[] = {
502 		rr->rr_col[CODE_P].rc_abd,
503 		rr->rr_col[CODE_Q].rc_abd,
504 		rr->rr_col[CODE_R].rc_abd
505 	};
506 
507 	raidz_math_begin();
508 
509 	raidz_copy(cabds[CODE_P], rr->rr_col[3].rc_abd, csize);
510 	raidz_copy(cabds[CODE_Q], rr->rr_col[3].rc_abd, csize);
511 	raidz_copy(cabds[CODE_R], rr->rr_col[3].rc_abd, csize);
512 
513 	for (c = 4; c < ncols; c++) {
514 		dabd = rr->rr_col[c].rc_abd;
515 		dsize = rr->rr_col[c].rc_size;
516 
517 		abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 3,
518 		    raidz_gen_pqr_add);
519 	}
520 
521 	raidz_math_end();
522 }
523 
524 
525 /*
526  * DATA RECONSTRUCTION
527  *
528  * Data reconstruction process consists of two phases:
529  * 	- Syndrome calculation
530  * 	- Data reconstruction
531  *
532  * Syndrome is calculated by generating parity using available data columns
533  * and zeros in places of erasure. Existing parity is added to corresponding
534  * syndrome value to obtain the [P|Q|R]syn values from equation:
535  * 	P = Psyn + Dx + Dy + Dz
536  * 	Q = Qsyn + 2^x * Dx + 2^y * Dy + 2^z * Dz
537  * 	R = Rsyn + 4^x * Dx + 4^y * Dy + 4^z * Dz
538  *
539  * For data reconstruction phase, the corresponding equations are solved
540  * for missing data (Dx, Dy, Dz). This generally involves multiplying known
541  * symbols by an coefficient and adding them together. The multiplication
542  * constant coefficients are calculated ahead of the operation in
543  * raidz_rec_[q|r|pq|pq|qr|pqr]_coeff() functions.
544  *
545  * IMPLEMENTATION NOTE: RAID-Z block can have complex geometry, with "big"
546  * and "short" columns.
547  * For this reason, reconstruction is performed in minimum of
548  * two steps. First, from offset 0 to short_size, then from short_size to
549  * short_size. Calculation functions REC_[*]_BLOCK() are implemented to work
550  * over both ranges. The split also enables removal of conditional expressions
551  * from loop bodies, improving throughput of SIMD implementations.
552  * For the best performance, all functions marked with raidz_inline attribute
553  * must be inlined by compiler.
554  *
555  *    parity          data
556  *    columns         columns
557  * <----------> <------------------>
558  *                   x       y  <----+ missing columns (x, y)
559  *                   |       |
560  * +---+---+---+---+-v-+---+-v-+---+   ^ 0
561  * |   |   |   |   |   |   |   |   |   |
562  * |   |   |   |   |   |   |   |   |   |
563  * | P | Q | R | D | D | D | D | D |   |
564  * |   |   |   | 0 | 1 | 2 | 3 | 4 |   |
565  * |   |   |   |   |   |   |   |   |   v
566  * |   |   |   |   |   +---+---+---+   ^ short_size
567  * |   |   |   |   |   |               |
568  * +---+---+---+---+---+               v big_size
569  * <------------------> <---------->
570  *      big columns     short columns
571  *
572  */
573 
574 
575 
576 
577 /*
578  * Reconstruct single data column using P parity
579  *
580  * @syn_method	raidz_add_abd()
581  * @rec_method	not applicable
582  *
583  * @rr		RAIDZ row
584  * @tgtidx	array of missing data indexes
585  */
586 static raidz_inline int
587 raidz_reconstruct_p_impl(raidz_row_t *rr, const int *tgtidx)
588 {
589 	size_t c;
590 	const size_t firstdc = rr->rr_firstdatacol;
591 	const size_t ncols = rr->rr_cols;
592 	const size_t x = tgtidx[TARGET_X];
593 	const size_t xsize = rr->rr_col[x].rc_size;
594 	abd_t *xabd = rr->rr_col[x].rc_abd;
595 	size_t size;
596 	abd_t *dabd;
597 
598 	if (xabd == NULL)
599 		return (1 << CODE_P);
600 
601 	raidz_math_begin();
602 
603 	/* copy P into target */
604 	raidz_copy(xabd, rr->rr_col[CODE_P].rc_abd, xsize);
605 
606 	/* generate p_syndrome */
607 	for (c = firstdc; c < ncols; c++) {
608 		if (c == x)
609 			continue;
610 
611 		dabd = rr->rr_col[c].rc_abd;
612 		size = MIN(rr->rr_col[c].rc_size, xsize);
613 
614 		raidz_add(xabd, dabd, size);
615 	}
616 
617 	raidz_math_end();
618 
619 	return (1 << CODE_P);
620 }
621 
622 
623 /*
624  * Generate Q syndrome (Qsyn)
625  *
626  * @xc		array of pointers to syndrome columns
627  * @dc		data column (NULL if missing)
628  * @xsize	size of syndrome columns
629  * @dsize	size of data column (0 if missing)
630  */
631 static void
632 raidz_syn_q_abd(void **xc, const void *dc, const size_t xsize,
633     const size_t dsize)
634 {
635 	v_t *x = (v_t *)xc[TARGET_X];
636 	const v_t *d = (const v_t *)dc;
637 	const v_t * const dend = d + (dsize / sizeof (v_t));
638 	const v_t * const xend = x + (xsize / sizeof (v_t));
639 
640 	SYN_Q_DEFINE();
641 
642 	MUL2_SETUP();
643 
644 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
645 		LOAD(d, SYN_Q_D);
646 		Q_D_SYNDROME(SYN_Q_D, SYN_Q_X, x);
647 	}
648 	for (; x < xend; x += SYN_STRIDE) {
649 		Q_SYNDROME(SYN_Q_X, x);
650 	}
651 }
652 
653 
654 /*
655  * Reconstruct single data column using Q parity
656  *
657  * @syn_method	raidz_add_abd()
658  * @rec_method	raidz_mul_abd_cb()
659  *
660  * @rr		RAIDZ row
661  * @tgtidx	array of missing data indexes
662  */
663 static raidz_inline int
664 raidz_reconstruct_q_impl(raidz_row_t *rr, const int *tgtidx)
665 {
666 	size_t c;
667 	size_t dsize;
668 	abd_t *dabd;
669 	const size_t firstdc = rr->rr_firstdatacol;
670 	const size_t ncols = rr->rr_cols;
671 	const size_t x = tgtidx[TARGET_X];
672 	abd_t *xabd = rr->rr_col[x].rc_abd;
673 	const size_t xsize = rr->rr_col[x].rc_size;
674 	abd_t *tabds[] = { xabd };
675 
676 	if (xabd == NULL)
677 		return (1 << CODE_Q);
678 
679 	unsigned coeff[MUL_CNT];
680 	raidz_rec_q_coeff(rr, tgtidx, coeff);
681 
682 	raidz_math_begin();
683 
684 	/* Start with first data column if present */
685 	if (firstdc != x) {
686 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
687 	} else {
688 		raidz_zero(xabd, xsize);
689 	}
690 
691 	/* generate q_syndrome */
692 	for (c = firstdc+1; c < ncols; c++) {
693 		if (c == x) {
694 			dabd = NULL;
695 			dsize = 0;
696 		} else {
697 			dabd = rr->rr_col[c].rc_abd;
698 			dsize = rr->rr_col[c].rc_size;
699 		}
700 
701 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
702 		    raidz_syn_q_abd);
703 	}
704 
705 	/* add Q to the syndrome */
706 	raidz_add(xabd, rr->rr_col[CODE_Q].rc_abd, xsize);
707 
708 	/* transform the syndrome */
709 	abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void*) coeff);
710 
711 	raidz_math_end();
712 
713 	return (1 << CODE_Q);
714 }
715 
716 
717 /*
718  * Generate R syndrome (Rsyn)
719  *
720  * @xc		array of pointers to syndrome columns
721  * @dc		data column (NULL if missing)
722  * @tsize	size of syndrome columns
723  * @dsize	size of data column (0 if missing)
724  */
725 static void
726 raidz_syn_r_abd(void **xc, const void *dc, const size_t tsize,
727     const size_t dsize)
728 {
729 	v_t *x = (v_t *)xc[TARGET_X];
730 	const v_t *d = (const v_t *)dc;
731 	const v_t * const dend = d + (dsize / sizeof (v_t));
732 	const v_t * const xend = x + (tsize / sizeof (v_t));
733 
734 	SYN_R_DEFINE();
735 
736 	MUL2_SETUP();
737 
738 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
739 		LOAD(d, SYN_R_D);
740 		R_D_SYNDROME(SYN_R_D, SYN_R_X, x);
741 	}
742 	for (; x < xend; x += SYN_STRIDE) {
743 		R_SYNDROME(SYN_R_X, x);
744 	}
745 }
746 
747 
748 /*
749  * Reconstruct single data column using R parity
750  *
751  * @syn_method	raidz_add_abd()
752  * @rec_method	raidz_mul_abd_cb()
753  *
754  * @rr		RAIDZ rr
755  * @tgtidx	array of missing data indexes
756  */
757 static raidz_inline int
758 raidz_reconstruct_r_impl(raidz_row_t *rr, const int *tgtidx)
759 {
760 	size_t c;
761 	size_t dsize;
762 	abd_t *dabd;
763 	const size_t firstdc = rr->rr_firstdatacol;
764 	const size_t ncols = rr->rr_cols;
765 	const size_t x = tgtidx[TARGET_X];
766 	const size_t xsize = rr->rr_col[x].rc_size;
767 	abd_t *xabd = rr->rr_col[x].rc_abd;
768 	abd_t *tabds[] = { xabd };
769 
770 	if (xabd == NULL)
771 		return (1 << CODE_R);
772 
773 	unsigned coeff[MUL_CNT];
774 	raidz_rec_r_coeff(rr, tgtidx, coeff);
775 
776 	raidz_math_begin();
777 
778 	/* Start with first data column if present */
779 	if (firstdc != x) {
780 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
781 	} else {
782 		raidz_zero(xabd, xsize);
783 	}
784 
785 
786 	/* generate q_syndrome */
787 	for (c = firstdc+1; c < ncols; c++) {
788 		if (c == x) {
789 			dabd = NULL;
790 			dsize = 0;
791 		} else {
792 			dabd = rr->rr_col[c].rc_abd;
793 			dsize = rr->rr_col[c].rc_size;
794 		}
795 
796 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
797 		    raidz_syn_r_abd);
798 	}
799 
800 	/* add R to the syndrome */
801 	raidz_add(xabd, rr->rr_col[CODE_R].rc_abd, xsize);
802 
803 	/* transform the syndrome */
804 	abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void *)coeff);
805 
806 	raidz_math_end();
807 
808 	return (1 << CODE_R);
809 }
810 
811 
812 /*
813  * Generate P and Q syndromes
814  *
815  * @xc		array of pointers to syndrome columns
816  * @dc		data column (NULL if missing)
817  * @tsize	size of syndrome columns
818  * @dsize	size of data column (0 if missing)
819  */
820 static void
821 raidz_syn_pq_abd(void **tc, const void *dc, const size_t tsize,
822     const size_t dsize)
823 {
824 	v_t *x = (v_t *)tc[TARGET_X];
825 	v_t *y = (v_t *)tc[TARGET_Y];
826 	const v_t *d = (const v_t *)dc;
827 	const v_t * const dend = d + (dsize / sizeof (v_t));
828 	const v_t * const yend = y + (tsize / sizeof (v_t));
829 
830 	SYN_PQ_DEFINE();
831 
832 	MUL2_SETUP();
833 
834 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
835 		LOAD(d, SYN_PQ_D);
836 		P_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, x);
837 		Q_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, y);
838 	}
839 	for (; y < yend; y += SYN_STRIDE) {
840 		Q_SYNDROME(SYN_PQ_X, y);
841 	}
842 }
843 
844 /*
845  * Reconstruct data using PQ parity and PQ syndromes
846  *
847  * @tc		syndrome/result columns
848  * @tsize	size of syndrome/result columns
849  * @c		parity columns
850  * @mul		array of multiplication constants
851  */
852 static void
853 raidz_rec_pq_abd(void **tc, const size_t tsize, void **c,
854     const unsigned *mul)
855 {
856 	v_t *x = (v_t *)tc[TARGET_X];
857 	v_t *y = (v_t *)tc[TARGET_Y];
858 	const v_t * const xend = x + (tsize / sizeof (v_t));
859 	const v_t *p = (v_t *)c[CODE_P];
860 	const v_t *q = (v_t *)c[CODE_Q];
861 
862 	REC_PQ_DEFINE();
863 
864 	for (; x < xend; x += REC_PQ_STRIDE, y += REC_PQ_STRIDE,
865 	    p += REC_PQ_STRIDE, q += REC_PQ_STRIDE) {
866 		LOAD(x, REC_PQ_X);
867 		LOAD(y, REC_PQ_Y);
868 
869 		XOR_ACC(p, REC_PQ_X);
870 		XOR_ACC(q, REC_PQ_Y);
871 
872 		/* Save Pxy */
873 		COPY(REC_PQ_X,  REC_PQ_T);
874 
875 		/* Calc X */
876 		MUL(mul[MUL_PQ_X], REC_PQ_X);
877 		MUL(mul[MUL_PQ_Y], REC_PQ_Y);
878 		XOR(REC_PQ_Y,  REC_PQ_X);
879 		STORE(x, REC_PQ_X);
880 
881 		/* Calc Y */
882 		XOR(REC_PQ_T,  REC_PQ_X);
883 		STORE(y, REC_PQ_X);
884 	}
885 }
886 
887 
888 /*
889  * Reconstruct two data columns using PQ parity
890  *
891  * @syn_method	raidz_syn_pq_abd()
892  * @rec_method	raidz_rec_pq_abd()
893  *
894  * @rr		RAIDZ row
895  * @tgtidx	array of missing data indexes
896  */
897 static raidz_inline int
898 raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx)
899 {
900 	size_t c;
901 	size_t dsize;
902 	abd_t *dabd;
903 	const size_t firstdc = rr->rr_firstdatacol;
904 	const size_t ncols = rr->rr_cols;
905 	const size_t x = tgtidx[TARGET_X];
906 	const size_t y = tgtidx[TARGET_Y];
907 	const size_t xsize = rr->rr_col[x].rc_size;
908 	const size_t ysize = rr->rr_col[y].rc_size;
909 	abd_t *xabd = rr->rr_col[x].rc_abd;
910 	abd_t *yabd = rr->rr_col[y].rc_abd;
911 	abd_t *tabds[2] = { xabd, yabd };
912 	abd_t *cabds[] = {
913 		rr->rr_col[CODE_P].rc_abd,
914 		rr->rr_col[CODE_Q].rc_abd
915 	};
916 
917 	if (xabd == NULL)
918 		return ((1 << CODE_P) | (1 << CODE_Q));
919 
920 	unsigned coeff[MUL_CNT];
921 	raidz_rec_pq_coeff(rr, tgtidx, coeff);
922 
923 	/*
924 	 * Check if some of targets is shorter then others
925 	 * In this case, shorter target needs to be replaced with
926 	 * new buffer so that syndrome can be calculated.
927 	 */
928 	if (ysize < xsize) {
929 		yabd = abd_alloc(xsize, B_FALSE);
930 		tabds[1] = yabd;
931 	}
932 
933 	raidz_math_begin();
934 
935 	/* Start with first data column if present */
936 	if (firstdc != x) {
937 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
938 		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
939 	} else {
940 		raidz_zero(xabd, xsize);
941 		raidz_zero(yabd, xsize);
942 	}
943 
944 	/* generate q_syndrome */
945 	for (c = firstdc+1; c < ncols; c++) {
946 		if (c == x || c == y) {
947 			dabd = NULL;
948 			dsize = 0;
949 		} else {
950 			dabd = rr->rr_col[c].rc_abd;
951 			dsize = rr->rr_col[c].rc_size;
952 		}
953 
954 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
955 		    raidz_syn_pq_abd);
956 	}
957 
958 	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pq_abd, coeff);
959 
960 	/* Copy shorter targets back to the original abd buffer */
961 	if (ysize < xsize)
962 		raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
963 
964 	raidz_math_end();
965 
966 	if (ysize < xsize)
967 		abd_free(yabd);
968 
969 	return ((1 << CODE_P) | (1 << CODE_Q));
970 }
971 
972 
973 /*
974  * Generate P and R syndromes
975  *
976  * @xc		array of pointers to syndrome columns
977  * @dc		data column (NULL if missing)
978  * @tsize	size of syndrome columns
979  * @dsize	size of data column (0 if missing)
980  */
981 static void
982 raidz_syn_pr_abd(void **c, const void *dc, const size_t tsize,
983     const size_t dsize)
984 {
985 	v_t *x = (v_t *)c[TARGET_X];
986 	v_t *y = (v_t *)c[TARGET_Y];
987 	const v_t *d = (const v_t *)dc;
988 	const v_t * const dend = d + (dsize / sizeof (v_t));
989 	const v_t * const yend = y + (tsize / sizeof (v_t));
990 
991 	SYN_PR_DEFINE();
992 
993 	MUL2_SETUP();
994 
995 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
996 		LOAD(d, SYN_PR_D);
997 		P_D_SYNDROME(SYN_PR_D, SYN_PR_X, x);
998 		R_D_SYNDROME(SYN_PR_D, SYN_PR_X, y);
999 	}
1000 	for (; y < yend; y += SYN_STRIDE) {
1001 		R_SYNDROME(SYN_PR_X, y);
1002 	}
1003 }
1004 
1005 /*
1006  * Reconstruct data using PR parity and PR syndromes
1007  *
1008  * @tc		syndrome/result columns
1009  * @tsize	size of syndrome/result columns
1010  * @c		parity columns
1011  * @mul		array of multiplication constants
1012  */
1013 static void
1014 raidz_rec_pr_abd(void **t, const size_t tsize, void **c,
1015     const unsigned *mul)
1016 {
1017 	v_t *x = (v_t *)t[TARGET_X];
1018 	v_t *y = (v_t *)t[TARGET_Y];
1019 	const v_t * const xend = x + (tsize / sizeof (v_t));
1020 	const v_t *p = (v_t *)c[CODE_P];
1021 	const v_t *q = (v_t *)c[CODE_Q];
1022 
1023 	REC_PR_DEFINE();
1024 
1025 	for (; x < xend; x += REC_PR_STRIDE, y += REC_PR_STRIDE,
1026 	    p += REC_PR_STRIDE, q += REC_PR_STRIDE) {
1027 		LOAD(x, REC_PR_X);
1028 		LOAD(y, REC_PR_Y);
1029 		XOR_ACC(p, REC_PR_X);
1030 		XOR_ACC(q, REC_PR_Y);
1031 
1032 		/* Save Pxy */
1033 		COPY(REC_PR_X,  REC_PR_T);
1034 
1035 		/* Calc X */
1036 		MUL(mul[MUL_PR_X], REC_PR_X);
1037 		MUL(mul[MUL_PR_Y], REC_PR_Y);
1038 		XOR(REC_PR_Y,  REC_PR_X);
1039 		STORE(x, REC_PR_X);
1040 
1041 		/* Calc Y */
1042 		XOR(REC_PR_T,  REC_PR_X);
1043 		STORE(y, REC_PR_X);
1044 	}
1045 }
1046 
1047 
1048 /*
1049  * Reconstruct two data columns using PR parity
1050  *
1051  * @syn_method	raidz_syn_pr_abd()
1052  * @rec_method	raidz_rec_pr_abd()
1053  *
1054  * @rr		RAIDZ row
1055  * @tgtidx	array of missing data indexes
1056  */
1057 static raidz_inline int
1058 raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
1059 {
1060 	size_t c;
1061 	size_t dsize;
1062 	abd_t *dabd;
1063 	const size_t firstdc = rr->rr_firstdatacol;
1064 	const size_t ncols = rr->rr_cols;
1065 	const size_t x = tgtidx[0];
1066 	const size_t y = tgtidx[1];
1067 	const size_t xsize = rr->rr_col[x].rc_size;
1068 	const size_t ysize = rr->rr_col[y].rc_size;
1069 	abd_t *xabd = rr->rr_col[x].rc_abd;
1070 	abd_t *yabd = rr->rr_col[y].rc_abd;
1071 	abd_t *tabds[2] = { xabd, yabd };
1072 	abd_t *cabds[] = {
1073 		rr->rr_col[CODE_P].rc_abd,
1074 		rr->rr_col[CODE_R].rc_abd
1075 	};
1076 
1077 	if (xabd == NULL)
1078 		return ((1 << CODE_P) | (1 << CODE_R));
1079 
1080 	unsigned coeff[MUL_CNT];
1081 	raidz_rec_pr_coeff(rr, tgtidx, coeff);
1082 
1083 	/*
1084 	 * Check if some of targets are shorter then others.
1085 	 * They need to be replaced with a new buffer so that syndrome can
1086 	 * be calculated on full length.
1087 	 */
1088 	if (ysize < xsize) {
1089 		yabd = abd_alloc(xsize, B_FALSE);
1090 		tabds[1] = yabd;
1091 	}
1092 
1093 	raidz_math_begin();
1094 
1095 	/* Start with first data column if present */
1096 	if (firstdc != x) {
1097 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
1098 		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
1099 	} else {
1100 		raidz_zero(xabd, xsize);
1101 		raidz_zero(yabd, xsize);
1102 	}
1103 
1104 	/* generate q_syndrome */
1105 	for (c = firstdc+1; c < ncols; c++) {
1106 		if (c == x || c == y) {
1107 			dabd = NULL;
1108 			dsize = 0;
1109 		} else {
1110 			dabd = rr->rr_col[c].rc_abd;
1111 			dsize = rr->rr_col[c].rc_size;
1112 		}
1113 
1114 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
1115 		    raidz_syn_pr_abd);
1116 	}
1117 
1118 	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pr_abd, coeff);
1119 
1120 	/*
1121 	 * Copy shorter targets back to the original abd buffer
1122 	 */
1123 	if (ysize < xsize)
1124 		raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
1125 
1126 	raidz_math_end();
1127 
1128 	if (ysize < xsize)
1129 		abd_free(yabd);
1130 
1131 	return ((1 << CODE_P) | (1 << CODE_R));
1132 }
1133 
1134 
1135 /*
1136  * Generate Q and R syndromes
1137  *
1138  * @xc		array of pointers to syndrome columns
1139  * @dc		data column (NULL if missing)
1140  * @tsize	size of syndrome columns
1141  * @dsize	size of data column (0 if missing)
1142  */
1143 static void
1144 raidz_syn_qr_abd(void **c, const void *dc, const size_t tsize,
1145     const size_t dsize)
1146 {
1147 	v_t *x = (v_t *)c[TARGET_X];
1148 	v_t *y = (v_t *)c[TARGET_Y];
1149 	const v_t * const xend = x + (tsize / sizeof (v_t));
1150 	const v_t *d = (const v_t *)dc;
1151 	const v_t * const dend = d + (dsize / sizeof (v_t));
1152 
1153 	SYN_QR_DEFINE();
1154 
1155 	MUL2_SETUP();
1156 
1157 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
1158 		LOAD(d, SYN_PQ_D);
1159 		Q_D_SYNDROME(SYN_QR_D, SYN_QR_X, x);
1160 		R_D_SYNDROME(SYN_QR_D, SYN_QR_X, y);
1161 	}
1162 	for (; x < xend; x += SYN_STRIDE, y += SYN_STRIDE) {
1163 		Q_SYNDROME(SYN_QR_X, x);
1164 		R_SYNDROME(SYN_QR_X, y);
1165 	}
1166 }
1167 
1168 
1169 /*
1170  * Reconstruct data using QR parity and QR syndromes
1171  *
1172  * @tc		syndrome/result columns
1173  * @tsize	size of syndrome/result columns
1174  * @c		parity columns
1175  * @mul		array of multiplication constants
1176  */
1177 static void
1178 raidz_rec_qr_abd(void **t, const size_t tsize, void **c,
1179     const unsigned *mul)
1180 {
1181 	v_t *x = (v_t *)t[TARGET_X];
1182 	v_t *y = (v_t *)t[TARGET_Y];
1183 	const v_t * const xend = x + (tsize / sizeof (v_t));
1184 	const v_t *p = (v_t *)c[CODE_P];
1185 	const v_t *q = (v_t *)c[CODE_Q];
1186 
1187 	REC_QR_DEFINE();
1188 
1189 	for (; x < xend; x += REC_QR_STRIDE, y += REC_QR_STRIDE,
1190 	    p += REC_QR_STRIDE, q += REC_QR_STRIDE) {
1191 		LOAD(x, REC_QR_X);
1192 		LOAD(y, REC_QR_Y);
1193 
1194 		XOR_ACC(p, REC_QR_X);
1195 		XOR_ACC(q, REC_QR_Y);
1196 
1197 		/* Save Pxy */
1198 		COPY(REC_QR_X,  REC_QR_T);
1199 
1200 		/* Calc X */
1201 		MUL(mul[MUL_QR_XQ], REC_QR_X);	/* X = Q * xqm */
1202 		XOR(REC_QR_Y, REC_QR_X);	/* X = R ^ X   */
1203 		MUL(mul[MUL_QR_X], REC_QR_X);	/* X = X * xm  */
1204 		STORE(x, REC_QR_X);
1205 
1206 		/* Calc Y */
1207 		MUL(mul[MUL_QR_YQ], REC_QR_T);	/* X = Q * xqm */
1208 		XOR(REC_QR_Y, REC_QR_T);	/* X = R ^ X   */
1209 		MUL(mul[MUL_QR_Y], REC_QR_T);	/* X = X * xm  */
1210 		STORE(y, REC_QR_T);
1211 	}
1212 }
1213 
1214 
1215 /*
1216  * Reconstruct two data columns using QR parity
1217  *
1218  * @syn_method	raidz_syn_qr_abd()
1219  * @rec_method	raidz_rec_qr_abd()
1220  *
1221  * @rr		RAIDZ row
1222  * @tgtidx	array of missing data indexes
1223  */
1224 static raidz_inline int
1225 raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
1226 {
1227 	size_t c;
1228 	size_t dsize;
1229 	abd_t *dabd;
1230 	const size_t firstdc = rr->rr_firstdatacol;
1231 	const size_t ncols = rr->rr_cols;
1232 	const size_t x = tgtidx[TARGET_X];
1233 	const size_t y = tgtidx[TARGET_Y];
1234 	const size_t xsize = rr->rr_col[x].rc_size;
1235 	const size_t ysize = rr->rr_col[y].rc_size;
1236 	abd_t *xabd = rr->rr_col[x].rc_abd;
1237 	abd_t *yabd = rr->rr_col[y].rc_abd;
1238 	abd_t *tabds[2] = { xabd, yabd };
1239 	abd_t *cabds[] = {
1240 		rr->rr_col[CODE_Q].rc_abd,
1241 		rr->rr_col[CODE_R].rc_abd
1242 	};
1243 
1244 	if (xabd == NULL)
1245 		return ((1 << CODE_Q) | (1 << CODE_R));
1246 
1247 	unsigned coeff[MUL_CNT];
1248 	raidz_rec_qr_coeff(rr, tgtidx, coeff);
1249 
1250 	/*
1251 	 * Check if some of targets is shorter then others
1252 	 * In this case, shorter target needs to be replaced with
1253 	 * new buffer so that syndrome can be calculated.
1254 	 */
1255 	if (ysize < xsize) {
1256 		yabd = abd_alloc(xsize, B_FALSE);
1257 		tabds[1] = yabd;
1258 	}
1259 
1260 	raidz_math_begin();
1261 
1262 	/* Start with first data column if present */
1263 	if (firstdc != x) {
1264 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
1265 		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
1266 	} else {
1267 		raidz_zero(xabd, xsize);
1268 		raidz_zero(yabd, xsize);
1269 	}
1270 
1271 	/* generate q_syndrome */
1272 	for (c = firstdc+1; c < ncols; c++) {
1273 		if (c == x || c == y) {
1274 			dabd = NULL;
1275 			dsize = 0;
1276 		} else {
1277 			dabd = rr->rr_col[c].rc_abd;
1278 			dsize = rr->rr_col[c].rc_size;
1279 		}
1280 
1281 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
1282 		    raidz_syn_qr_abd);
1283 	}
1284 
1285 	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_qr_abd, coeff);
1286 
1287 	/*
1288 	 * Copy shorter targets back to the original abd buffer
1289 	 */
1290 	if (ysize < xsize)
1291 		raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
1292 
1293 	raidz_math_end();
1294 
1295 	if (ysize < xsize)
1296 		abd_free(yabd);
1297 
1298 
1299 	return ((1 << CODE_Q) | (1 << CODE_R));
1300 }
1301 
1302 
1303 /*
1304  * Generate P, Q, and R syndromes
1305  *
1306  * @xc		array of pointers to syndrome columns
1307  * @dc		data column (NULL if missing)
1308  * @tsize	size of syndrome columns
1309  * @dsize	size of data column (0 if missing)
1310  */
1311 static void
1312 raidz_syn_pqr_abd(void **c, const void *dc, const size_t tsize,
1313     const size_t dsize)
1314 {
1315 	v_t *x = (v_t *)c[TARGET_X];
1316 	v_t *y = (v_t *)c[TARGET_Y];
1317 	v_t *z = (v_t *)c[TARGET_Z];
1318 	const v_t * const yend = y + (tsize / sizeof (v_t));
1319 	const v_t *d = (const v_t *)dc;
1320 	const v_t * const dend = d + (dsize / sizeof (v_t));
1321 
1322 	SYN_PQR_DEFINE();
1323 
1324 	MUL2_SETUP();
1325 
1326 	for (; d < dend;  d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE,
1327 	    z += SYN_STRIDE) {
1328 		LOAD(d, SYN_PQR_D);
1329 		P_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, x)
1330 		Q_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, y);
1331 		R_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, z);
1332 	}
1333 	for (; y < yend; y += SYN_STRIDE, z += SYN_STRIDE) {
1334 		Q_SYNDROME(SYN_PQR_X, y);
1335 		R_SYNDROME(SYN_PQR_X, z);
1336 	}
1337 }
1338 
1339 
1340 /*
1341  * Reconstruct data using PRQ parity and PQR syndromes
1342  *
1343  * @tc		syndrome/result columns
1344  * @tsize	size of syndrome/result columns
1345  * @c		parity columns
1346  * @mul		array of multiplication constants
1347  */
1348 static void
1349 raidz_rec_pqr_abd(void **t, const size_t tsize, void **c,
1350     const unsigned * const mul)
1351 {
1352 	v_t *x = (v_t *)t[TARGET_X];
1353 	v_t *y = (v_t *)t[TARGET_Y];
1354 	v_t *z = (v_t *)t[TARGET_Z];
1355 	const v_t * const xend = x + (tsize / sizeof (v_t));
1356 	const v_t *p = (v_t *)c[CODE_P];
1357 	const v_t *q = (v_t *)c[CODE_Q];
1358 	const v_t *r = (v_t *)c[CODE_R];
1359 
1360 	REC_PQR_DEFINE();
1361 
1362 	for (; x < xend; x += REC_PQR_STRIDE, y += REC_PQR_STRIDE,
1363 	    z += REC_PQR_STRIDE, p += REC_PQR_STRIDE, q += REC_PQR_STRIDE,
1364 	    r += REC_PQR_STRIDE) {
1365 		LOAD(x, REC_PQR_X);
1366 		LOAD(y, REC_PQR_Y);
1367 		LOAD(z, REC_PQR_Z);
1368 
1369 		XOR_ACC(p, REC_PQR_X);
1370 		XOR_ACC(q, REC_PQR_Y);
1371 		XOR_ACC(r, REC_PQR_Z);
1372 
1373 		/* Save Pxyz and Qxyz */
1374 		COPY(REC_PQR_X, REC_PQR_XS);
1375 		COPY(REC_PQR_Y, REC_PQR_YS);
1376 
1377 		/* Calc X */
1378 		MUL(mul[MUL_PQR_XP], REC_PQR_X);	/* Xp = Pxyz * xp   */
1379 		MUL(mul[MUL_PQR_XQ], REC_PQR_Y);	/* Xq = Qxyz * xq   */
1380 		XOR(REC_PQR_Y, REC_PQR_X);
1381 		MUL(mul[MUL_PQR_XR], REC_PQR_Z);	/* Xr = Rxyz * xr   */
1382 		XOR(REC_PQR_Z, REC_PQR_X);		/* X = Xp + Xq + Xr */
1383 		STORE(x, REC_PQR_X);
1384 
1385 		/* Calc Y */
1386 		XOR(REC_PQR_X, REC_PQR_XS); 		/* Pyz = Pxyz + X */
1387 		MUL(mul[MUL_PQR_YU], REC_PQR_X);  	/* Xq = X * upd_q */
1388 		XOR(REC_PQR_X, REC_PQR_YS); 		/* Qyz = Qxyz + Xq */
1389 		COPY(REC_PQR_XS, REC_PQR_X);		/* restore Pyz */
1390 		MUL(mul[MUL_PQR_YP], REC_PQR_X);	/* Yp = Pyz * yp */
1391 		MUL(mul[MUL_PQR_YQ], REC_PQR_YS);	/* Yq = Qyz * yq */
1392 		XOR(REC_PQR_X, REC_PQR_YS); 		/* Y = Yp + Yq */
1393 		STORE(y, REC_PQR_YS);
1394 
1395 		/* Calc Z */
1396 		XOR(REC_PQR_XS, REC_PQR_YS);		/* Z = Pz = Pyz + Y */
1397 		STORE(z, REC_PQR_YS);
1398 	}
1399 }
1400 
1401 
1402 /*
1403  * Reconstruct three data columns using PQR parity
1404  *
1405  * @syn_method	raidz_syn_pqr_abd()
1406  * @rec_method	raidz_rec_pqr_abd()
1407  *
1408  * @rr		RAIDZ row
1409  * @tgtidx	array of missing data indexes
1410  */
1411 static raidz_inline int
1412 raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
1413 {
1414 	size_t c;
1415 	size_t dsize;
1416 	abd_t *dabd;
1417 	const size_t firstdc = rr->rr_firstdatacol;
1418 	const size_t ncols = rr->rr_cols;
1419 	const size_t x = tgtidx[TARGET_X];
1420 	const size_t y = tgtidx[TARGET_Y];
1421 	const size_t z = tgtidx[TARGET_Z];
1422 	const size_t xsize = rr->rr_col[x].rc_size;
1423 	const size_t ysize = rr->rr_col[y].rc_size;
1424 	const size_t zsize = rr->rr_col[z].rc_size;
1425 	abd_t *xabd = rr->rr_col[x].rc_abd;
1426 	abd_t *yabd = rr->rr_col[y].rc_abd;
1427 	abd_t *zabd = rr->rr_col[z].rc_abd;
1428 	abd_t *tabds[] = { xabd, yabd, zabd };
1429 	abd_t *cabds[] = {
1430 		rr->rr_col[CODE_P].rc_abd,
1431 		rr->rr_col[CODE_Q].rc_abd,
1432 		rr->rr_col[CODE_R].rc_abd
1433 	};
1434 
1435 	if (xabd == NULL)
1436 		return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
1437 
1438 	unsigned coeff[MUL_CNT];
1439 	raidz_rec_pqr_coeff(rr, tgtidx, coeff);
1440 
1441 	/*
1442 	 * Check if some of targets is shorter then others
1443 	 * In this case, shorter target needs to be replaced with
1444 	 * new buffer so that syndrome can be calculated.
1445 	 */
1446 	if (ysize < xsize) {
1447 		yabd = abd_alloc(xsize, B_FALSE);
1448 		tabds[1] = yabd;
1449 	}
1450 	if (zsize < xsize) {
1451 		zabd = abd_alloc(xsize, B_FALSE);
1452 		tabds[2] = zabd;
1453 	}
1454 
1455 	raidz_math_begin();
1456 
1457 	/* Start with first data column if present */
1458 	if (firstdc != x) {
1459 		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
1460 		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
1461 		raidz_copy(zabd, rr->rr_col[firstdc].rc_abd, xsize);
1462 	} else {
1463 		raidz_zero(xabd, xsize);
1464 		raidz_zero(yabd, xsize);
1465 		raidz_zero(zabd, xsize);
1466 	}
1467 
1468 	/* generate q_syndrome */
1469 	for (c = firstdc+1; c < ncols; c++) {
1470 		if (c == x || c == y || c == z) {
1471 			dabd = NULL;
1472 			dsize = 0;
1473 		} else {
1474 			dabd = rr->rr_col[c].rc_abd;
1475 			dsize = rr->rr_col[c].rc_size;
1476 		}
1477 
1478 		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 3,
1479 		    raidz_syn_pqr_abd);
1480 	}
1481 
1482 	abd_raidz_rec_iterate(cabds, tabds, xsize, 3, raidz_rec_pqr_abd, coeff);
1483 
1484 	/*
1485 	 * Copy shorter targets back to the original abd buffer
1486 	 */
1487 	if (ysize < xsize)
1488 		raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
1489 	if (zsize < xsize)
1490 		raidz_copy(rr->rr_col[z].rc_abd, zabd, zsize);
1491 
1492 	raidz_math_end();
1493 
1494 	if (ysize < xsize)
1495 		abd_free(yabd);
1496 	if (zsize < xsize)
1497 		abd_free(zabd);
1498 
1499 	return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
1500 }
1501 
1502 #endif /* _VDEV_RAIDZ_MATH_IMPL_H */
1503