xref: /linux/lib/raid/raid6/x86/recov_avx2.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2012 Intel Corporation
4  * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
5  */
6 
7 #include <linux/mm.h>
8 #include <linux/raid/pq.h>
9 #include <asm/fpu/api.h>
10 #include "algos.h"
11 
12 static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila,
13 		int failb, void **ptrs)
14 {
15 	u8 *p, *q, *dp, *dq;
16 	const u8 *pbmul;	/* P multiplier table for B data */
17 	const u8 *qmul;		/* Q multiplier table (for both) */
18 	const u8 x0f = 0x0f;
19 
20 	p = (u8 *)ptrs[disks-2];
21 	q = (u8 *)ptrs[disks-1];
22 
23 	/* Compute syndrome with zero for the missing data pages
24 	   Use the dead data pages as temporary storage for
25 	   delta p and delta q */
26 	dp = (u8 *)ptrs[faila];
27 	ptrs[faila] = page_address(ZERO_PAGE(0));
28 	ptrs[disks-2] = dp;
29 	dq = (u8 *)ptrs[failb];
30 	ptrs[failb] = page_address(ZERO_PAGE(0));
31 	ptrs[disks-1] = dq;
32 
33 	raid6_gen_syndrome(disks, bytes, ptrs);
34 
35 	/* Restore pointer table */
36 	ptrs[faila]   = dp;
37 	ptrs[failb]   = dq;
38 	ptrs[disks-2] = p;
39 	ptrs[disks-1] = q;
40 
41 	/* Now, pick the proper data tables */
42 	pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
43 	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
44 		raid6_gfexp[failb]]];
45 
46 	kernel_fpu_begin();
47 
48 	/* ymm0 = x0f[16] */
49 	asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
50 
51 	while (bytes) {
52 #ifdef CONFIG_X86_64
53 		asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0]));
54 		asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32]));
55 		asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0]));
56 		asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32]));
57 		asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0]));
58 		asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32]));
59 		asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0]));
60 		asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32]));
61 
62 		/*
63 		 * 1 = dq[0]  ^ q[0]
64 		 * 9 = dq[32] ^ q[32]
65 		 * 0 = dp[0]  ^ p[0]
66 		 * 8 = dp[32] ^ p[32]
67 		 */
68 
69 		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
70 		asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
71 
72 		asm volatile("vpsraw $4, %ymm1, %ymm3");
73 		asm volatile("vpsraw $4, %ymm9, %ymm12");
74 		asm volatile("vpand %ymm7, %ymm1, %ymm1");
75 		asm volatile("vpand %ymm7, %ymm9, %ymm9");
76 		asm volatile("vpand %ymm7, %ymm3, %ymm3");
77 		asm volatile("vpand %ymm7, %ymm12, %ymm12");
78 		asm volatile("vpshufb %ymm9, %ymm4, %ymm14");
79 		asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
80 		asm volatile("vpshufb %ymm12, %ymm5, %ymm15");
81 		asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
82 		asm volatile("vpxor %ymm14, %ymm15, %ymm15");
83 		asm volatile("vpxor %ymm4, %ymm5, %ymm5");
84 
85 		/*
86 		 * 5 = qx[0]
87 		 * 15 = qx[32]
88 		 */
89 
90 		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
91 		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
92 		asm volatile("vpsraw $4, %ymm0, %ymm2");
93 		asm volatile("vpsraw $4, %ymm8, %ymm6");
94 		asm volatile("vpand %ymm7, %ymm0, %ymm3");
95 		asm volatile("vpand %ymm7, %ymm8, %ymm14");
96 		asm volatile("vpand %ymm7, %ymm2, %ymm2");
97 		asm volatile("vpand %ymm7, %ymm6, %ymm6");
98 		asm volatile("vpshufb %ymm14, %ymm4, %ymm12");
99 		asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
100 		asm volatile("vpshufb %ymm6, %ymm1, %ymm13");
101 		asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
102 		asm volatile("vpxor %ymm4, %ymm1, %ymm1");
103 		asm volatile("vpxor %ymm12, %ymm13, %ymm13");
104 
105 		/*
106 		 * 1  = pbmul[px[0]]
107 		 * 13 = pbmul[px[32]]
108 		 */
109 		asm volatile("vpxor %ymm5, %ymm1, %ymm1");
110 		asm volatile("vpxor %ymm15, %ymm13, %ymm13");
111 
112 		/*
113 		 * 1 = db = DQ
114 		 * 13 = db[32] = DQ[32]
115 		 */
116 		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
117 		asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32]));
118 		asm volatile("vpxor %ymm1, %ymm0, %ymm0");
119 		asm volatile("vpxor %ymm13, %ymm8, %ymm8");
120 
121 		asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
122 		asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32]));
123 
124 		bytes -= 64;
125 		p += 64;
126 		q += 64;
127 		dp += 64;
128 		dq += 64;
129 #else
130 		asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q));
131 		asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p));
132 		asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq));
133 		asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp));
134 
135 		/* 1 = dq ^ q;  0 = dp ^ p */
136 
137 		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
138 		asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
139 
140 		/*
141 		 * 1 = dq ^ q
142 		 * 3 = dq ^ p >> 4
143 		 */
144 		asm volatile("vpsraw $4, %ymm1, %ymm3");
145 		asm volatile("vpand %ymm7, %ymm1, %ymm1");
146 		asm volatile("vpand %ymm7, %ymm3, %ymm3");
147 		asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
148 		asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
149 		asm volatile("vpxor %ymm4, %ymm5, %ymm5");
150 
151 		/* 5 = qx */
152 
153 		asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
154 		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
155 
156 		asm volatile("vpsraw $4, %ymm0, %ymm2");
157 		asm volatile("vpand %ymm7, %ymm0, %ymm3");
158 		asm volatile("vpand %ymm7, %ymm2, %ymm2");
159 		asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
160 		asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
161 		asm volatile("vpxor %ymm4, %ymm1, %ymm1");
162 
163 		/* 1 = pbmul[px] */
164 		asm volatile("vpxor %ymm5, %ymm1, %ymm1");
165 		/* 1 = db = DQ */
166 		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
167 
168 		asm volatile("vpxor %ymm1, %ymm0, %ymm0");
169 		asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
170 
171 		bytes -= 32;
172 		p += 32;
173 		q += 32;
174 		dp += 32;
175 		dq += 32;
176 #endif
177 	}
178 
179 	kernel_fpu_end();
180 }
181 
182 static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila,
183 		void **ptrs)
184 {
185 	u8 *p, *q, *dq;
186 	const u8 *qmul;		/* Q multiplier table */
187 	const u8 x0f = 0x0f;
188 
189 	p = (u8 *)ptrs[disks-2];
190 	q = (u8 *)ptrs[disks-1];
191 
192 	/* Compute syndrome with zero for the missing data page
193 	   Use the dead data page as temporary storage for delta q */
194 	dq = (u8 *)ptrs[faila];
195 	ptrs[faila] = page_address(ZERO_PAGE(0));
196 	ptrs[disks-1] = dq;
197 
198 	raid6_gen_syndrome(disks, bytes, ptrs);
199 
200 	/* Restore pointer table */
201 	ptrs[faila]   = dq;
202 	ptrs[disks-1] = q;
203 
204 	/* Now, pick the proper data tables */
205 	qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
206 
207 	kernel_fpu_begin();
208 
209 	asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
210 
211 	while (bytes) {
212 #ifdef CONFIG_X86_64
213 		asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
214 		asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32]));
215 		asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
216 		asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32]));
217 
218 		/*
219 		 * 3 = q[0] ^ dq[0]
220 		 * 8 = q[32] ^ dq[32]
221 		 */
222 		asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
223 		asm volatile("vmovapd %ymm0, %ymm13");
224 		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
225 		asm volatile("vmovapd %ymm1, %ymm14");
226 
227 		asm volatile("vpsraw $4, %ymm3, %ymm6");
228 		asm volatile("vpsraw $4, %ymm8, %ymm12");
229 		asm volatile("vpand %ymm7, %ymm3, %ymm3");
230 		asm volatile("vpand %ymm7, %ymm8, %ymm8");
231 		asm volatile("vpand %ymm7, %ymm6, %ymm6");
232 		asm volatile("vpand %ymm7, %ymm12, %ymm12");
233 		asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
234 		asm volatile("vpshufb %ymm8, %ymm13, %ymm13");
235 		asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
236 		asm volatile("vpshufb %ymm12, %ymm14, %ymm14");
237 		asm volatile("vpxor %ymm0, %ymm1, %ymm1");
238 		asm volatile("vpxor %ymm13, %ymm14, %ymm14");
239 
240 		/*
241 		 * 1  = qmul[q[0]  ^ dq[0]]
242 		 * 14 = qmul[q[32] ^ dq[32]]
243 		 */
244 		asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
245 		asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32]));
246 		asm volatile("vpxor %ymm1, %ymm2, %ymm2");
247 		asm volatile("vpxor %ymm14, %ymm12, %ymm12");
248 
249 		/*
250 		 * 2  = p[0]  ^ qmul[q[0]  ^ dq[0]]
251 		 * 12 = p[32] ^ qmul[q[32] ^ dq[32]]
252 		 */
253 
254 		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
255 		asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32]));
256 		asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
257 		asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32]));
258 
259 		bytes -= 64;
260 		p += 64;
261 		q += 64;
262 		dq += 64;
263 #else
264 		asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
265 		asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
266 
267 		/* 3 = q ^ dq */
268 
269 		asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
270 		asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
271 
272 		asm volatile("vpsraw $4, %ymm3, %ymm6");
273 		asm volatile("vpand %ymm7, %ymm3, %ymm3");
274 		asm volatile("vpand %ymm7, %ymm6, %ymm6");
275 		asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
276 		asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
277 		asm volatile("vpxor %ymm0, %ymm1, %ymm1");
278 
279 		/* 1 = qmul[q ^ dq] */
280 
281 		asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
282 		asm volatile("vpxor %ymm1, %ymm2, %ymm2");
283 
284 		/* 2 = p ^ qmul[q ^ dq] */
285 
286 		asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
287 		asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
288 
289 		bytes -= 32;
290 		p += 32;
291 		q += 32;
292 		dq += 32;
293 #endif
294 	}
295 
296 	kernel_fpu_end();
297 }
298 
299 const struct raid6_recov_calls raid6_recov_avx2 = {
300 	.data2 = raid6_2data_recov_avx2,
301 	.datap = raid6_datap_recov_avx2,
302 #ifdef CONFIG_X86_64
303 	.name = "avx2x2",
304 #else
305 	.name = "avx2x1",
306 #endif
307 };
308