1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2016 Intel Corporation
4 *
5 * Author: Gayatri Kammela <gayatri.kammela@intel.com>
6 * Author: Megha Dey <megha.dey@linux.intel.com>
7 */
8
9 #include <linux/raid/pq.h>
10 #include "x86.h"
11
raid6_has_avx512(void)12 static int raid6_has_avx512(void)
13 {
14 return boot_cpu_has(X86_FEATURE_AVX2) &&
15 boot_cpu_has(X86_FEATURE_AVX) &&
16 boot_cpu_has(X86_FEATURE_AVX512F) &&
17 boot_cpu_has(X86_FEATURE_AVX512BW) &&
18 boot_cpu_has(X86_FEATURE_AVX512VL) &&
19 boot_cpu_has(X86_FEATURE_AVX512DQ);
20 }
21
raid6_2data_recov_avx512(int disks,size_t bytes,int faila,int failb,void ** ptrs)22 static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
23 int failb, void **ptrs)
24 {
25 u8 *p, *q, *dp, *dq;
26 const u8 *pbmul; /* P multiplier table for B data */
27 const u8 *qmul; /* Q multiplier table (for both) */
28 const u8 x0f = 0x0f;
29
30 p = (u8 *)ptrs[disks-2];
31 q = (u8 *)ptrs[disks-1];
32
33 /*
34 * Compute syndrome with zero for the missing data pages
35 * Use the dead data pages as temporary storage for
36 * delta p and delta q
37 */
38
39 dp = (u8 *)ptrs[faila];
40 ptrs[faila] = (void *)raid6_empty_zero_page;
41 ptrs[disks-2] = dp;
42 dq = (u8 *)ptrs[failb];
43 ptrs[failb] = (void *)raid6_empty_zero_page;
44 ptrs[disks-1] = dq;
45
46 raid6_call.gen_syndrome(disks, bytes, ptrs);
47
48 /* Restore pointer table */
49 ptrs[faila] = dp;
50 ptrs[failb] = dq;
51 ptrs[disks-2] = p;
52 ptrs[disks-1] = q;
53
54 /* Now, pick the proper data tables */
55 pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
56 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
57 raid6_gfexp[failb]]];
58
59 kernel_fpu_begin();
60
61 /* zmm0 = x0f[16] */
62 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
63
64 while (bytes) {
65 #ifdef CONFIG_X86_64
66 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
67 "vmovdqa64 %1, %%zmm9\n\t"
68 "vmovdqa64 %2, %%zmm0\n\t"
69 "vmovdqa64 %3, %%zmm8\n\t"
70 "vpxorq %4, %%zmm1, %%zmm1\n\t"
71 "vpxorq %5, %%zmm9, %%zmm9\n\t"
72 "vpxorq %6, %%zmm0, %%zmm0\n\t"
73 "vpxorq %7, %%zmm8, %%zmm8"
74 :
75 : "m" (q[0]), "m" (q[64]), "m" (p[0]),
76 "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
77 "m" (dp[0]), "m" (dp[64]));
78
79 /*
80 * 1 = dq[0] ^ q[0]
81 * 9 = dq[64] ^ q[64]
82 * 0 = dp[0] ^ p[0]
83 * 8 = dp[64] ^ p[64]
84 */
85
86 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
87 "vbroadcasti64x2 %1, %%zmm5"
88 :
89 : "m" (qmul[0]), "m" (qmul[16]));
90
91 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
92 "vpsraw $4, %%zmm9, %%zmm12\n\t"
93 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
94 "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
95 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
96 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
97 "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
98 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
99 "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
100 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
101 "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
102 "vpxorq %%zmm4, %%zmm5, %%zmm5"
103 :
104 : );
105
106 /*
107 * 5 = qx[0]
108 * 15 = qx[64]
109 */
110
111 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
112 "vbroadcasti64x2 %1, %%zmm1\n\t"
113 "vpsraw $4, %%zmm0, %%zmm2\n\t"
114 "vpsraw $4, %%zmm8, %%zmm6\n\t"
115 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
116 "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
117 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
118 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
119 "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
120 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
121 "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
122 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
123 "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
124 "vpxorq %%zmm12, %%zmm13, %%zmm13"
125 :
126 : "m" (pbmul[0]), "m" (pbmul[16]));
127
128 /*
129 * 1 = pbmul[px[0]]
130 * 13 = pbmul[px[64]]
131 */
132 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
133 "vpxorq %%zmm15, %%zmm13, %%zmm13"
134 :
135 : );
136
137 /*
138 * 1 = db = DQ
139 * 13 = db[64] = DQ[64]
140 */
141 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
142 "vmovdqa64 %%zmm13,%1\n\t"
143 "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
144 "vpxorq %%zmm13, %%zmm8, %%zmm8"
145 :
146 : "m" (dq[0]), "m" (dq[64]));
147
148 asm volatile("vmovdqa64 %%zmm0, %0\n\t"
149 "vmovdqa64 %%zmm8, %1"
150 :
151 : "m" (dp[0]), "m" (dp[64]));
152
153 bytes -= 128;
154 p += 128;
155 q += 128;
156 dp += 128;
157 dq += 128;
158 #else
159 asm volatile("vmovdqa64 %0, %%zmm1\n\t"
160 "vmovdqa64 %1, %%zmm0\n\t"
161 "vpxorq %2, %%zmm1, %%zmm1\n\t"
162 "vpxorq %3, %%zmm0, %%zmm0"
163 :
164 : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
165
166 /* 1 = dq ^ q; 0 = dp ^ p */
167
168 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
169 "vbroadcasti64x2 %1, %%zmm5"
170 :
171 : "m" (qmul[0]), "m" (qmul[16]));
172
173 /*
174 * 1 = dq ^ q
175 * 3 = dq ^ p >> 4
176 */
177 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
178 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
179 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
180 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
181 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
182 "vpxorq %%zmm4, %%zmm5, %%zmm5"
183 :
184 : );
185
186 /* 5 = qx */
187
188 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
189 "vbroadcasti64x2 %1, %%zmm1"
190 :
191 : "m" (pbmul[0]), "m" (pbmul[16]));
192
193 asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
194 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
195 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
196 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
197 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
198 "vpxorq %%zmm4, %%zmm1, %%zmm1"
199 :
200 : );
201
202 /* 1 = pbmul[px] */
203 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
204 /* 1 = db = DQ */
205 "vmovdqa64 %%zmm1, %0\n\t"
206 :
207 : "m" (dq[0]));
208
209 asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
210 "vmovdqa64 %%zmm0, %0"
211 :
212 : "m" (dp[0]));
213
214 bytes -= 64;
215 p += 64;
216 q += 64;
217 dp += 64;
218 dq += 64;
219 #endif
220 }
221
222 kernel_fpu_end();
223 }
224
raid6_datap_recov_avx512(int disks,size_t bytes,int faila,void ** ptrs)225 static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
226 void **ptrs)
227 {
228 u8 *p, *q, *dq;
229 const u8 *qmul; /* Q multiplier table */
230 const u8 x0f = 0x0f;
231
232 p = (u8 *)ptrs[disks-2];
233 q = (u8 *)ptrs[disks-1];
234
235 /*
236 * Compute syndrome with zero for the missing data page
237 * Use the dead data page as temporary storage for delta q
238 */
239
240 dq = (u8 *)ptrs[faila];
241 ptrs[faila] = (void *)raid6_empty_zero_page;
242 ptrs[disks-1] = dq;
243
244 raid6_call.gen_syndrome(disks, bytes, ptrs);
245
246 /* Restore pointer table */
247 ptrs[faila] = dq;
248 ptrs[disks-1] = q;
249
250 /* Now, pick the proper data tables */
251 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
252
253 kernel_fpu_begin();
254
255 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
256
257 while (bytes) {
258 #ifdef CONFIG_X86_64
259 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
260 "vmovdqa64 %1, %%zmm8\n\t"
261 "vpxorq %2, %%zmm3, %%zmm3\n\t"
262 "vpxorq %3, %%zmm8, %%zmm8"
263 :
264 : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
265 "m" (q[64]));
266
267 /*
268 * 3 = q[0] ^ dq[0]
269 * 8 = q[64] ^ dq[64]
270 */
271 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
272 "vmovapd %%zmm0, %%zmm13\n\t"
273 "vbroadcasti64x2 %1, %%zmm1\n\t"
274 "vmovapd %%zmm1, %%zmm14"
275 :
276 : "m" (qmul[0]), "m" (qmul[16]));
277
278 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
279 "vpsraw $4, %%zmm8, %%zmm12\n\t"
280 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
281 "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
282 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
283 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
284 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
285 "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
286 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
287 "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
288 "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
289 "vpxorq %%zmm13, %%zmm14, %%zmm14"
290 :
291 : );
292
293 /*
294 * 1 = qmul[q[0] ^ dq[0]]
295 * 14 = qmul[q[64] ^ dq[64]]
296 */
297 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
298 "vmovdqa64 %1, %%zmm12\n\t"
299 "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
300 "vpxorq %%zmm14, %%zmm12, %%zmm12"
301 :
302 : "m" (p[0]), "m" (p[64]));
303
304 /*
305 * 2 = p[0] ^ qmul[q[0] ^ dq[0]]
306 * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
307 */
308
309 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
310 "vmovdqa64 %%zmm14, %1\n\t"
311 "vmovdqa64 %%zmm2, %2\n\t"
312 "vmovdqa64 %%zmm12,%3"
313 :
314 : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
315 "m" (p[64]));
316
317 bytes -= 128;
318 p += 128;
319 q += 128;
320 dq += 128;
321 #else
322 asm volatile("vmovdqa64 %0, %%zmm3\n\t"
323 "vpxorq %1, %%zmm3, %%zmm3"
324 :
325 : "m" (dq[0]), "m" (q[0]));
326
327 /* 3 = q ^ dq */
328
329 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
330 "vbroadcasti64x2 %1, %%zmm1"
331 :
332 : "m" (qmul[0]), "m" (qmul[16]));
333
334 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
335 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
336 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
337 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
338 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
339 "vpxorq %%zmm0, %%zmm1, %%zmm1"
340 :
341 : );
342
343 /* 1 = qmul[q ^ dq] */
344
345 asm volatile("vmovdqa64 %0, %%zmm2\n\t"
346 "vpxorq %%zmm1, %%zmm2, %%zmm2"
347 :
348 : "m" (p[0]));
349
350 /* 2 = p ^ qmul[q ^ dq] */
351
352 asm volatile("vmovdqa64 %%zmm1, %0\n\t"
353 "vmovdqa64 %%zmm2, %1"
354 :
355 : "m" (dq[0]), "m" (p[0]));
356
357 bytes -= 64;
358 p += 64;
359 q += 64;
360 dq += 64;
361 #endif
362 }
363
364 kernel_fpu_end();
365 }
366
367 const struct raid6_recov_calls raid6_recov_avx512 = {
368 .data2 = raid6_2data_recov_avx512,
369 .datap = raid6_datap_recov_avx512,
370 .valid = raid6_has_avx512,
371 #ifdef CONFIG_X86_64
372 .name = "avx512x2",
373 #else
374 .name = "avx512x1",
375 #endif
376 .priority = 3,
377 };
378