1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * RAID-6 syndrome calculation using RISC-V vector instructions
4 *
5 * Copyright 2024 Institute of Software, CAS.
6 * Author: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
7 *
8 * Based on neon.uc:
9 * Copyright 2002-2004 H. Peter Anvin
10 */
11
12 #include <asm/simd.h>
13 #include <asm/vector.h>
14 #include <crypto/internal/simd.h>
15 #include <linux/raid/pq.h>
16 #include <linux/types.h>
17 #include "rvv.h"
18
19 #define NSIZE (riscv_v_vsize / 32) /* NSIZE = vlenb */
20
rvv_has_vector(void)21 static int rvv_has_vector(void)
22 {
23 return has_vector();
24 }
25
raid6_rvv1_gen_syndrome_real(int disks,unsigned long bytes,void ** ptrs)26 static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
27 {
28 u8 **dptr = (u8 **)ptrs;
29 unsigned long d;
30 int z, z0;
31 u8 *p, *q;
32
33 z0 = disks - 3; /* Highest data disk */
34 p = dptr[z0 + 1]; /* XOR parity */
35 q = dptr[z0 + 2]; /* RS syndrome */
36
37 asm volatile (".option push\n"
38 ".option arch,+v\n"
39 "vsetvli t0, x0, e8, m1, ta, ma\n"
40 ".option pop\n"
41 );
42
43 /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */
44 for (d = 0; d < bytes; d += NSIZE * 1) {
45 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
46 asm volatile (".option push\n"
47 ".option arch,+v\n"
48 "vle8.v v0, (%[wp0])\n"
49 "vle8.v v1, (%[wp0])\n"
50 ".option pop\n"
51 : :
52 [wp0]"r"(&dptr[z0][d + 0 * NSIZE])
53 );
54
55 for (z = z0 - 1 ; z >= 0 ; z--) {
56 /*
57 * w2$$ = MASK(wq$$);
58 * w1$$ = SHLBYTE(wq$$);
59 * w2$$ &= NBYTES(0x1d);
60 * w1$$ ^= w2$$;
61 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
62 * wq$$ = w1$$ ^ wd$$;
63 * wp$$ ^= wd$$;
64 */
65 asm volatile (".option push\n"
66 ".option arch,+v\n"
67 "vsra.vi v2, v1, 7\n"
68 "vsll.vi v3, v1, 1\n"
69 "vand.vx v2, v2, %[x1d]\n"
70 "vxor.vv v3, v3, v2\n"
71 "vle8.v v2, (%[wd0])\n"
72 "vxor.vv v1, v3, v2\n"
73 "vxor.vv v0, v0, v2\n"
74 ".option pop\n"
75 : :
76 [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
77 [x1d]"r"(0x1d)
78 );
79 }
80
81 /*
82 * *(unative_t *)&p[d+NSIZE*$$] = wp$$;
83 * *(unative_t *)&q[d+NSIZE*$$] = wq$$;
84 */
85 asm volatile (".option push\n"
86 ".option arch,+v\n"
87 "vse8.v v0, (%[wp0])\n"
88 "vse8.v v1, (%[wq0])\n"
89 ".option pop\n"
90 : :
91 [wp0]"r"(&p[d + NSIZE * 0]),
92 [wq0]"r"(&q[d + NSIZE * 0])
93 );
94 }
95 }
96
raid6_rvv1_xor_syndrome_real(int disks,int start,int stop,unsigned long bytes,void ** ptrs)97 static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop,
98 unsigned long bytes, void **ptrs)
99 {
100 u8 **dptr = (u8 **)ptrs;
101 u8 *p, *q;
102 unsigned long d;
103 int z, z0;
104
105 z0 = stop; /* P/Q right side optimization */
106 p = dptr[disks - 2]; /* XOR parity */
107 q = dptr[disks - 1]; /* RS syndrome */
108
109 asm volatile (".option push\n"
110 ".option arch,+v\n"
111 "vsetvli t0, x0, e8, m1, ta, ma\n"
112 ".option pop\n"
113 );
114
115 /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */
116 for (d = 0 ; d < bytes ; d += NSIZE * 1) {
117 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
118 asm volatile (".option push\n"
119 ".option arch,+v\n"
120 "vle8.v v0, (%[wp0])\n"
121 "vle8.v v1, (%[wp0])\n"
122 ".option pop\n"
123 : :
124 [wp0]"r"(&dptr[z0][d + 0 * NSIZE])
125 );
126
127 /* P/Q data pages */
128 for (z = z0 - 1; z >= start; z--) {
129 /*
130 * w2$$ = MASK(wq$$);
131 * w1$$ = SHLBYTE(wq$$);
132 * w2$$ &= NBYTES(0x1d);
133 * w1$$ ^= w2$$;
134 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
135 * wq$$ = w1$$ ^ wd$$;
136 * wp$$ ^= wd$$;
137 */
138 asm volatile (".option push\n"
139 ".option arch,+v\n"
140 "vsra.vi v2, v1, 7\n"
141 "vsll.vi v3, v1, 1\n"
142 "vand.vx v2, v2, %[x1d]\n"
143 "vxor.vv v3, v3, v2\n"
144 "vle8.v v2, (%[wd0])\n"
145 "vxor.vv v1, v3, v2\n"
146 "vxor.vv v0, v0, v2\n"
147 ".option pop\n"
148 : :
149 [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
150 [x1d]"r"(0x1d)
151 );
152 }
153
154 /* P/Q left side optimization */
155 for (z = start - 1; z >= 0; z--) {
156 /*
157 * w2$$ = MASK(wq$$);
158 * w1$$ = SHLBYTE(wq$$);
159 * w2$$ &= NBYTES(0x1d);
160 * wq$$ = w1$$ ^ w2$$;
161 */
162 asm volatile (".option push\n"
163 ".option arch,+v\n"
164 "vsra.vi v2, v1, 7\n"
165 "vsll.vi v3, v1, 1\n"
166 "vand.vx v2, v2, %[x1d]\n"
167 "vxor.vv v1, v3, v2\n"
168 ".option pop\n"
169 : :
170 [x1d]"r"(0x1d)
171 );
172 }
173
174 /*
175 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
176 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
177 * v0:wp0, v1:wq0, v2:p0, v3:q0
178 */
179 asm volatile (".option push\n"
180 ".option arch,+v\n"
181 "vle8.v v2, (%[wp0])\n"
182 "vle8.v v3, (%[wq0])\n"
183 "vxor.vv v2, v2, v0\n"
184 "vxor.vv v3, v3, v1\n"
185 "vse8.v v2, (%[wp0])\n"
186 "vse8.v v3, (%[wq0])\n"
187 ".option pop\n"
188 : :
189 [wp0]"r"(&p[d + NSIZE * 0]),
190 [wq0]"r"(&q[d + NSIZE * 0])
191 );
192 }
193 }
194
raid6_rvv2_gen_syndrome_real(int disks,unsigned long bytes,void ** ptrs)195 static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
196 {
197 u8 **dptr = (u8 **)ptrs;
198 unsigned long d;
199 int z, z0;
200 u8 *p, *q;
201
202 z0 = disks - 3; /* Highest data disk */
203 p = dptr[z0 + 1]; /* XOR parity */
204 q = dptr[z0 + 2]; /* RS syndrome */
205
206 asm volatile (".option push\n"
207 ".option arch,+v\n"
208 "vsetvli t0, x0, e8, m1, ta, ma\n"
209 ".option pop\n"
210 );
211
212 /*
213 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
214 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
215 */
216 for (d = 0; d < bytes; d += NSIZE * 2) {
217 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
218 asm volatile (".option push\n"
219 ".option arch,+v\n"
220 "vle8.v v0, (%[wp0])\n"
221 "vle8.v v1, (%[wp0])\n"
222 "vle8.v v4, (%[wp1])\n"
223 "vle8.v v5, (%[wp1])\n"
224 ".option pop\n"
225 : :
226 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
227 [wp1]"r"(&dptr[z0][d + 1 * NSIZE])
228 );
229
230 for (z = z0 - 1; z >= 0; z--) {
231 /*
232 * w2$$ = MASK(wq$$);
233 * w1$$ = SHLBYTE(wq$$);
234 * w2$$ &= NBYTES(0x1d);
235 * w1$$ ^= w2$$;
236 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
237 * wq$$ = w1$$ ^ wd$$;
238 * wp$$ ^= wd$$;
239 */
240 asm volatile (".option push\n"
241 ".option arch,+v\n"
242 "vsra.vi v2, v1, 7\n"
243 "vsll.vi v3, v1, 1\n"
244 "vand.vx v2, v2, %[x1d]\n"
245 "vxor.vv v3, v3, v2\n"
246 "vle8.v v2, (%[wd0])\n"
247 "vxor.vv v1, v3, v2\n"
248 "vxor.vv v0, v0, v2\n"
249
250 "vsra.vi v6, v5, 7\n"
251 "vsll.vi v7, v5, 1\n"
252 "vand.vx v6, v6, %[x1d]\n"
253 "vxor.vv v7, v7, v6\n"
254 "vle8.v v6, (%[wd1])\n"
255 "vxor.vv v5, v7, v6\n"
256 "vxor.vv v4, v4, v6\n"
257 ".option pop\n"
258 : :
259 [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
260 [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
261 [x1d]"r"(0x1d)
262 );
263 }
264
265 /*
266 * *(unative_t *)&p[d+NSIZE*$$] = wp$$;
267 * *(unative_t *)&q[d+NSIZE*$$] = wq$$;
268 */
269 asm volatile (".option push\n"
270 ".option arch,+v\n"
271 "vse8.v v0, (%[wp0])\n"
272 "vse8.v v1, (%[wq0])\n"
273 "vse8.v v4, (%[wp1])\n"
274 "vse8.v v5, (%[wq1])\n"
275 ".option pop\n"
276 : :
277 [wp0]"r"(&p[d + NSIZE * 0]),
278 [wq0]"r"(&q[d + NSIZE * 0]),
279 [wp1]"r"(&p[d + NSIZE * 1]),
280 [wq1]"r"(&q[d + NSIZE * 1])
281 );
282 }
283 }
284
raid6_rvv2_xor_syndrome_real(int disks,int start,int stop,unsigned long bytes,void ** ptrs)285 static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop,
286 unsigned long bytes, void **ptrs)
287 {
288 u8 **dptr = (u8 **)ptrs;
289 u8 *p, *q;
290 unsigned long d;
291 int z, z0;
292
293 z0 = stop; /* P/Q right side optimization */
294 p = dptr[disks - 2]; /* XOR parity */
295 q = dptr[disks - 1]; /* RS syndrome */
296
297 asm volatile (".option push\n"
298 ".option arch,+v\n"
299 "vsetvli t0, x0, e8, m1, ta, ma\n"
300 ".option pop\n"
301 );
302
303 /*
304 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
305 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
306 */
307 for (d = 0; d < bytes; d += NSIZE * 2) {
308 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
309 asm volatile (".option push\n"
310 ".option arch,+v\n"
311 "vle8.v v0, (%[wp0])\n"
312 "vle8.v v1, (%[wp0])\n"
313 "vle8.v v4, (%[wp1])\n"
314 "vle8.v v5, (%[wp1])\n"
315 ".option pop\n"
316 : :
317 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
318 [wp1]"r"(&dptr[z0][d + 1 * NSIZE])
319 );
320
321 /* P/Q data pages */
322 for (z = z0 - 1; z >= start; z--) {
323 /*
324 * w2$$ = MASK(wq$$);
325 * w1$$ = SHLBYTE(wq$$);
326 * w2$$ &= NBYTES(0x1d);
327 * w1$$ ^= w2$$;
328 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
329 * wq$$ = w1$$ ^ wd$$;
330 * wp$$ ^= wd$$;
331 */
332 asm volatile (".option push\n"
333 ".option arch,+v\n"
334 "vsra.vi v2, v1, 7\n"
335 "vsll.vi v3, v1, 1\n"
336 "vand.vx v2, v2, %[x1d]\n"
337 "vxor.vv v3, v3, v2\n"
338 "vle8.v v2, (%[wd0])\n"
339 "vxor.vv v1, v3, v2\n"
340 "vxor.vv v0, v0, v2\n"
341
342 "vsra.vi v6, v5, 7\n"
343 "vsll.vi v7, v5, 1\n"
344 "vand.vx v6, v6, %[x1d]\n"
345 "vxor.vv v7, v7, v6\n"
346 "vle8.v v6, (%[wd1])\n"
347 "vxor.vv v5, v7, v6\n"
348 "vxor.vv v4, v4, v6\n"
349 ".option pop\n"
350 : :
351 [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
352 [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
353 [x1d]"r"(0x1d)
354 );
355 }
356
357 /* P/Q left side optimization */
358 for (z = start - 1; z >= 0; z--) {
359 /*
360 * w2$$ = MASK(wq$$);
361 * w1$$ = SHLBYTE(wq$$);
362 * w2$$ &= NBYTES(0x1d);
363 * wq$$ = w1$$ ^ w2$$;
364 */
365 asm volatile (".option push\n"
366 ".option arch,+v\n"
367 "vsra.vi v2, v1, 7\n"
368 "vsll.vi v3, v1, 1\n"
369 "vand.vx v2, v2, %[x1d]\n"
370 "vxor.vv v1, v3, v2\n"
371
372 "vsra.vi v6, v5, 7\n"
373 "vsll.vi v7, v5, 1\n"
374 "vand.vx v6, v6, %[x1d]\n"
375 "vxor.vv v5, v7, v6\n"
376 ".option pop\n"
377 : :
378 [x1d]"r"(0x1d)
379 );
380 }
381
382 /*
383 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
384 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
385 * v0:wp0, v1:wq0, v2:p0, v3:q0
386 * v4:wp1, v5:wq1, v6:p1, v7:q1
387 */
388 asm volatile (".option push\n"
389 ".option arch,+v\n"
390 "vle8.v v2, (%[wp0])\n"
391 "vle8.v v3, (%[wq0])\n"
392 "vxor.vv v2, v2, v0\n"
393 "vxor.vv v3, v3, v1\n"
394 "vse8.v v2, (%[wp0])\n"
395 "vse8.v v3, (%[wq0])\n"
396
397 "vle8.v v6, (%[wp1])\n"
398 "vle8.v v7, (%[wq1])\n"
399 "vxor.vv v6, v6, v4\n"
400 "vxor.vv v7, v7, v5\n"
401 "vse8.v v6, (%[wp1])\n"
402 "vse8.v v7, (%[wq1])\n"
403 ".option pop\n"
404 : :
405 [wp0]"r"(&p[d + NSIZE * 0]),
406 [wq0]"r"(&q[d + NSIZE * 0]),
407 [wp1]"r"(&p[d + NSIZE * 1]),
408 [wq1]"r"(&q[d + NSIZE * 1])
409 );
410 }
411 }
412
raid6_rvv4_gen_syndrome_real(int disks,unsigned long bytes,void ** ptrs)413 static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
414 {
415 u8 **dptr = (u8 **)ptrs;
416 unsigned long d;
417 int z, z0;
418 u8 *p, *q;
419
420 z0 = disks - 3; /* Highest data disk */
421 p = dptr[z0 + 1]; /* XOR parity */
422 q = dptr[z0 + 2]; /* RS syndrome */
423
424 asm volatile (".option push\n"
425 ".option arch,+v\n"
426 "vsetvli t0, x0, e8, m1, ta, ma\n"
427 ".option pop\n"
428 );
429
430 /*
431 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
432 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
433 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12
434 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13
435 */
436 for (d = 0; d < bytes; d += NSIZE * 4) {
437 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
438 asm volatile (".option push\n"
439 ".option arch,+v\n"
440 "vle8.v v0, (%[wp0])\n"
441 "vle8.v v1, (%[wp0])\n"
442 "vle8.v v4, (%[wp1])\n"
443 "vle8.v v5, (%[wp1])\n"
444 "vle8.v v8, (%[wp2])\n"
445 "vle8.v v9, (%[wp2])\n"
446 "vle8.v v12, (%[wp3])\n"
447 "vle8.v v13, (%[wp3])\n"
448 ".option pop\n"
449 : :
450 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
451 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]),
452 [wp2]"r"(&dptr[z0][d + 2 * NSIZE]),
453 [wp3]"r"(&dptr[z0][d + 3 * NSIZE])
454 );
455
456 for (z = z0 - 1; z >= 0; z--) {
457 /*
458 * w2$$ = MASK(wq$$);
459 * w1$$ = SHLBYTE(wq$$);
460 * w2$$ &= NBYTES(0x1d);
461 * w1$$ ^= w2$$;
462 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
463 * wq$$ = w1$$ ^ wd$$;
464 * wp$$ ^= wd$$;
465 */
466 asm volatile (".option push\n"
467 ".option arch,+v\n"
468 "vsra.vi v2, v1, 7\n"
469 "vsll.vi v3, v1, 1\n"
470 "vand.vx v2, v2, %[x1d]\n"
471 "vxor.vv v3, v3, v2\n"
472 "vle8.v v2, (%[wd0])\n"
473 "vxor.vv v1, v3, v2\n"
474 "vxor.vv v0, v0, v2\n"
475
476 "vsra.vi v6, v5, 7\n"
477 "vsll.vi v7, v5, 1\n"
478 "vand.vx v6, v6, %[x1d]\n"
479 "vxor.vv v7, v7, v6\n"
480 "vle8.v v6, (%[wd1])\n"
481 "vxor.vv v5, v7, v6\n"
482 "vxor.vv v4, v4, v6\n"
483
484 "vsra.vi v10, v9, 7\n"
485 "vsll.vi v11, v9, 1\n"
486 "vand.vx v10, v10, %[x1d]\n"
487 "vxor.vv v11, v11, v10\n"
488 "vle8.v v10, (%[wd2])\n"
489 "vxor.vv v9, v11, v10\n"
490 "vxor.vv v8, v8, v10\n"
491
492 "vsra.vi v14, v13, 7\n"
493 "vsll.vi v15, v13, 1\n"
494 "vand.vx v14, v14, %[x1d]\n"
495 "vxor.vv v15, v15, v14\n"
496 "vle8.v v14, (%[wd3])\n"
497 "vxor.vv v13, v15, v14\n"
498 "vxor.vv v12, v12, v14\n"
499 ".option pop\n"
500 : :
501 [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
502 [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
503 [wd2]"r"(&dptr[z][d + 2 * NSIZE]),
504 [wd3]"r"(&dptr[z][d + 3 * NSIZE]),
505 [x1d]"r"(0x1d)
506 );
507 }
508
509 /*
510 * *(unative_t *)&p[d+NSIZE*$$] = wp$$;
511 * *(unative_t *)&q[d+NSIZE*$$] = wq$$;
512 */
513 asm volatile (".option push\n"
514 ".option arch,+v\n"
515 "vse8.v v0, (%[wp0])\n"
516 "vse8.v v1, (%[wq0])\n"
517 "vse8.v v4, (%[wp1])\n"
518 "vse8.v v5, (%[wq1])\n"
519 "vse8.v v8, (%[wp2])\n"
520 "vse8.v v9, (%[wq2])\n"
521 "vse8.v v12, (%[wp3])\n"
522 "vse8.v v13, (%[wq3])\n"
523 ".option pop\n"
524 : :
525 [wp0]"r"(&p[d + NSIZE * 0]),
526 [wq0]"r"(&q[d + NSIZE * 0]),
527 [wp1]"r"(&p[d + NSIZE * 1]),
528 [wq1]"r"(&q[d + NSIZE * 1]),
529 [wp2]"r"(&p[d + NSIZE * 2]),
530 [wq2]"r"(&q[d + NSIZE * 2]),
531 [wp3]"r"(&p[d + NSIZE * 3]),
532 [wq3]"r"(&q[d + NSIZE * 3])
533 );
534 }
535 }
536
raid6_rvv4_xor_syndrome_real(int disks,int start,int stop,unsigned long bytes,void ** ptrs)537 static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop,
538 unsigned long bytes, void **ptrs)
539 {
540 u8 **dptr = (u8 **)ptrs;
541 u8 *p, *q;
542 unsigned long d;
543 int z, z0;
544
545 z0 = stop; /* P/Q right side optimization */
546 p = dptr[disks - 2]; /* XOR parity */
547 q = dptr[disks - 1]; /* RS syndrome */
548
549 asm volatile (".option push\n"
550 ".option arch,+v\n"
551 "vsetvli t0, x0, e8, m1, ta, ma\n"
552 ".option pop\n"
553 );
554
555 /*
556 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
557 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
558 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12
559 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13
560 */
561 for (d = 0; d < bytes; d += NSIZE * 4) {
562 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
563 asm volatile (".option push\n"
564 ".option arch,+v\n"
565 "vle8.v v0, (%[wp0])\n"
566 "vle8.v v1, (%[wp0])\n"
567 "vle8.v v4, (%[wp1])\n"
568 "vle8.v v5, (%[wp1])\n"
569 "vle8.v v8, (%[wp2])\n"
570 "vle8.v v9, (%[wp2])\n"
571 "vle8.v v12, (%[wp3])\n"
572 "vle8.v v13, (%[wp3])\n"
573 ".option pop\n"
574 : :
575 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
576 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]),
577 [wp2]"r"(&dptr[z0][d + 2 * NSIZE]),
578 [wp3]"r"(&dptr[z0][d + 3 * NSIZE])
579 );
580
581 /* P/Q data pages */
582 for (z = z0 - 1; z >= start; z--) {
583 /*
584 * w2$$ = MASK(wq$$);
585 * w1$$ = SHLBYTE(wq$$);
586 * w2$$ &= NBYTES(0x1d);
587 * w1$$ ^= w2$$;
588 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
589 * wq$$ = w1$$ ^ wd$$;
590 * wp$$ ^= wd$$;
591 */
592 asm volatile (".option push\n"
593 ".option arch,+v\n"
594 "vsra.vi v2, v1, 7\n"
595 "vsll.vi v3, v1, 1\n"
596 "vand.vx v2, v2, %[x1d]\n"
597 "vxor.vv v3, v3, v2\n"
598 "vle8.v v2, (%[wd0])\n"
599 "vxor.vv v1, v3, v2\n"
600 "vxor.vv v0, v0, v2\n"
601
602 "vsra.vi v6, v5, 7\n"
603 "vsll.vi v7, v5, 1\n"
604 "vand.vx v6, v6, %[x1d]\n"
605 "vxor.vv v7, v7, v6\n"
606 "vle8.v v6, (%[wd1])\n"
607 "vxor.vv v5, v7, v6\n"
608 "vxor.vv v4, v4, v6\n"
609
610 "vsra.vi v10, v9, 7\n"
611 "vsll.vi v11, v9, 1\n"
612 "vand.vx v10, v10, %[x1d]\n"
613 "vxor.vv v11, v11, v10\n"
614 "vle8.v v10, (%[wd2])\n"
615 "vxor.vv v9, v11, v10\n"
616 "vxor.vv v8, v8, v10\n"
617
618 "vsra.vi v14, v13, 7\n"
619 "vsll.vi v15, v13, 1\n"
620 "vand.vx v14, v14, %[x1d]\n"
621 "vxor.vv v15, v15, v14\n"
622 "vle8.v v14, (%[wd3])\n"
623 "vxor.vv v13, v15, v14\n"
624 "vxor.vv v12, v12, v14\n"
625 ".option pop\n"
626 : :
627 [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
628 [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
629 [wd2]"r"(&dptr[z][d + 2 * NSIZE]),
630 [wd3]"r"(&dptr[z][d + 3 * NSIZE]),
631 [x1d]"r"(0x1d)
632 );
633 }
634
635 /* P/Q left side optimization */
636 for (z = start - 1; z >= 0; z--) {
637 /*
638 * w2$$ = MASK(wq$$);
639 * w1$$ = SHLBYTE(wq$$);
640 * w2$$ &= NBYTES(0x1d);
641 * wq$$ = w1$$ ^ w2$$;
642 */
643 asm volatile (".option push\n"
644 ".option arch,+v\n"
645 "vsra.vi v2, v1, 7\n"
646 "vsll.vi v3, v1, 1\n"
647 "vand.vx v2, v2, %[x1d]\n"
648 "vxor.vv v1, v3, v2\n"
649
650 "vsra.vi v6, v5, 7\n"
651 "vsll.vi v7, v5, 1\n"
652 "vand.vx v6, v6, %[x1d]\n"
653 "vxor.vv v5, v7, v6\n"
654
655 "vsra.vi v10, v9, 7\n"
656 "vsll.vi v11, v9, 1\n"
657 "vand.vx v10, v10, %[x1d]\n"
658 "vxor.vv v9, v11, v10\n"
659
660 "vsra.vi v14, v13, 7\n"
661 "vsll.vi v15, v13, 1\n"
662 "vand.vx v14, v14, %[x1d]\n"
663 "vxor.vv v13, v15, v14\n"
664 ".option pop\n"
665 : :
666 [x1d]"r"(0x1d)
667 );
668 }
669
670 /*
671 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
672 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
673 * v0:wp0, v1:wq0, v2:p0, v3:q0
674 * v4:wp1, v5:wq1, v6:p1, v7:q1
675 * v8:wp2, v9:wq2, v10:p2, v11:q2
676 * v12:wp3, v13:wq3, v14:p3, v15:q3
677 */
678 asm volatile (".option push\n"
679 ".option arch,+v\n"
680 "vle8.v v2, (%[wp0])\n"
681 "vle8.v v3, (%[wq0])\n"
682 "vxor.vv v2, v2, v0\n"
683 "vxor.vv v3, v3, v1\n"
684 "vse8.v v2, (%[wp0])\n"
685 "vse8.v v3, (%[wq0])\n"
686
687 "vle8.v v6, (%[wp1])\n"
688 "vle8.v v7, (%[wq1])\n"
689 "vxor.vv v6, v6, v4\n"
690 "vxor.vv v7, v7, v5\n"
691 "vse8.v v6, (%[wp1])\n"
692 "vse8.v v7, (%[wq1])\n"
693
694 "vle8.v v10, (%[wp2])\n"
695 "vle8.v v11, (%[wq2])\n"
696 "vxor.vv v10, v10, v8\n"
697 "vxor.vv v11, v11, v9\n"
698 "vse8.v v10, (%[wp2])\n"
699 "vse8.v v11, (%[wq2])\n"
700
701 "vle8.v v14, (%[wp3])\n"
702 "vle8.v v15, (%[wq3])\n"
703 "vxor.vv v14, v14, v12\n"
704 "vxor.vv v15, v15, v13\n"
705 "vse8.v v14, (%[wp3])\n"
706 "vse8.v v15, (%[wq3])\n"
707 ".option pop\n"
708 : :
709 [wp0]"r"(&p[d + NSIZE * 0]),
710 [wq0]"r"(&q[d + NSIZE * 0]),
711 [wp1]"r"(&p[d + NSIZE * 1]),
712 [wq1]"r"(&q[d + NSIZE * 1]),
713 [wp2]"r"(&p[d + NSIZE * 2]),
714 [wq2]"r"(&q[d + NSIZE * 2]),
715 [wp3]"r"(&p[d + NSIZE * 3]),
716 [wq3]"r"(&q[d + NSIZE * 3])
717 );
718 }
719 }
720
raid6_rvv8_gen_syndrome_real(int disks,unsigned long bytes,void ** ptrs)721 static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
722 {
723 u8 **dptr = (u8 **)ptrs;
724 unsigned long d;
725 int z, z0;
726 u8 *p, *q;
727
728 z0 = disks - 3; /* Highest data disk */
729 p = dptr[z0 + 1]; /* XOR parity */
730 q = dptr[z0 + 2]; /* RS syndrome */
731
732 asm volatile (".option push\n"
733 ".option arch,+v\n"
734 "vsetvli t0, x0, e8, m1, ta, ma\n"
735 ".option pop\n"
736 );
737
738 /*
739 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
740 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
741 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12
742 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13
743 * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14
744 * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15
745 * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16
746 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17
747 */
748 for (d = 0; d < bytes; d += NSIZE * 8) {
749 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
750 asm volatile (".option push\n"
751 ".option arch,+v\n"
752 "vle8.v v0, (%[wp0])\n"
753 "vle8.v v1, (%[wp0])\n"
754 "vle8.v v4, (%[wp1])\n"
755 "vle8.v v5, (%[wp1])\n"
756 "vle8.v v8, (%[wp2])\n"
757 "vle8.v v9, (%[wp2])\n"
758 "vle8.v v12, (%[wp3])\n"
759 "vle8.v v13, (%[wp3])\n"
760 "vle8.v v16, (%[wp4])\n"
761 "vle8.v v17, (%[wp4])\n"
762 "vle8.v v20, (%[wp5])\n"
763 "vle8.v v21, (%[wp5])\n"
764 "vle8.v v24, (%[wp6])\n"
765 "vle8.v v25, (%[wp6])\n"
766 "vle8.v v28, (%[wp7])\n"
767 "vle8.v v29, (%[wp7])\n"
768 ".option pop\n"
769 : :
770 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
771 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]),
772 [wp2]"r"(&dptr[z0][d + 2 * NSIZE]),
773 [wp3]"r"(&dptr[z0][d + 3 * NSIZE]),
774 [wp4]"r"(&dptr[z0][d + 4 * NSIZE]),
775 [wp5]"r"(&dptr[z0][d + 5 * NSIZE]),
776 [wp6]"r"(&dptr[z0][d + 6 * NSIZE]),
777 [wp7]"r"(&dptr[z0][d + 7 * NSIZE])
778 );
779
780 for (z = z0 - 1; z >= 0; z--) {
781 /*
782 * w2$$ = MASK(wq$$);
783 * w1$$ = SHLBYTE(wq$$);
784 * w2$$ &= NBYTES(0x1d);
785 * w1$$ ^= w2$$;
786 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
787 * wq$$ = w1$$ ^ wd$$;
788 * wp$$ ^= wd$$;
789 */
790 asm volatile (".option push\n"
791 ".option arch,+v\n"
792 "vsra.vi v2, v1, 7\n"
793 "vsll.vi v3, v1, 1\n"
794 "vand.vx v2, v2, %[x1d]\n"
795 "vxor.vv v3, v3, v2\n"
796 "vle8.v v2, (%[wd0])\n"
797 "vxor.vv v1, v3, v2\n"
798 "vxor.vv v0, v0, v2\n"
799
800 "vsra.vi v6, v5, 7\n"
801 "vsll.vi v7, v5, 1\n"
802 "vand.vx v6, v6, %[x1d]\n"
803 "vxor.vv v7, v7, v6\n"
804 "vle8.v v6, (%[wd1])\n"
805 "vxor.vv v5, v7, v6\n"
806 "vxor.vv v4, v4, v6\n"
807
808 "vsra.vi v10, v9, 7\n"
809 "vsll.vi v11, v9, 1\n"
810 "vand.vx v10, v10, %[x1d]\n"
811 "vxor.vv v11, v11, v10\n"
812 "vle8.v v10, (%[wd2])\n"
813 "vxor.vv v9, v11, v10\n"
814 "vxor.vv v8, v8, v10\n"
815
816 "vsra.vi v14, v13, 7\n"
817 "vsll.vi v15, v13, 1\n"
818 "vand.vx v14, v14, %[x1d]\n"
819 "vxor.vv v15, v15, v14\n"
820 "vle8.v v14, (%[wd3])\n"
821 "vxor.vv v13, v15, v14\n"
822 "vxor.vv v12, v12, v14\n"
823
824 "vsra.vi v18, v17, 7\n"
825 "vsll.vi v19, v17, 1\n"
826 "vand.vx v18, v18, %[x1d]\n"
827 "vxor.vv v19, v19, v18\n"
828 "vle8.v v18, (%[wd4])\n"
829 "vxor.vv v17, v19, v18\n"
830 "vxor.vv v16, v16, v18\n"
831
832 "vsra.vi v22, v21, 7\n"
833 "vsll.vi v23, v21, 1\n"
834 "vand.vx v22, v22, %[x1d]\n"
835 "vxor.vv v23, v23, v22\n"
836 "vle8.v v22, (%[wd5])\n"
837 "vxor.vv v21, v23, v22\n"
838 "vxor.vv v20, v20, v22\n"
839
840 "vsra.vi v26, v25, 7\n"
841 "vsll.vi v27, v25, 1\n"
842 "vand.vx v26, v26, %[x1d]\n"
843 "vxor.vv v27, v27, v26\n"
844 "vle8.v v26, (%[wd6])\n"
845 "vxor.vv v25, v27, v26\n"
846 "vxor.vv v24, v24, v26\n"
847
848 "vsra.vi v30, v29, 7\n"
849 "vsll.vi v31, v29, 1\n"
850 "vand.vx v30, v30, %[x1d]\n"
851 "vxor.vv v31, v31, v30\n"
852 "vle8.v v30, (%[wd7])\n"
853 "vxor.vv v29, v31, v30\n"
854 "vxor.vv v28, v28, v30\n"
855 ".option pop\n"
856 : :
857 [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
858 [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
859 [wd2]"r"(&dptr[z][d + 2 * NSIZE]),
860 [wd3]"r"(&dptr[z][d + 3 * NSIZE]),
861 [wd4]"r"(&dptr[z][d + 4 * NSIZE]),
862 [wd5]"r"(&dptr[z][d + 5 * NSIZE]),
863 [wd6]"r"(&dptr[z][d + 6 * NSIZE]),
864 [wd7]"r"(&dptr[z][d + 7 * NSIZE]),
865 [x1d]"r"(0x1d)
866 );
867 }
868
869 /*
870 * *(unative_t *)&p[d+NSIZE*$$] = wp$$;
871 * *(unative_t *)&q[d+NSIZE*$$] = wq$$;
872 */
873 asm volatile (".option push\n"
874 ".option arch,+v\n"
875 "vse8.v v0, (%[wp0])\n"
876 "vse8.v v1, (%[wq0])\n"
877 "vse8.v v4, (%[wp1])\n"
878 "vse8.v v5, (%[wq1])\n"
879 "vse8.v v8, (%[wp2])\n"
880 "vse8.v v9, (%[wq2])\n"
881 "vse8.v v12, (%[wp3])\n"
882 "vse8.v v13, (%[wq3])\n"
883 "vse8.v v16, (%[wp4])\n"
884 "vse8.v v17, (%[wq4])\n"
885 "vse8.v v20, (%[wp5])\n"
886 "vse8.v v21, (%[wq5])\n"
887 "vse8.v v24, (%[wp6])\n"
888 "vse8.v v25, (%[wq6])\n"
889 "vse8.v v28, (%[wp7])\n"
890 "vse8.v v29, (%[wq7])\n"
891 ".option pop\n"
892 : :
893 [wp0]"r"(&p[d + NSIZE * 0]),
894 [wq0]"r"(&q[d + NSIZE * 0]),
895 [wp1]"r"(&p[d + NSIZE * 1]),
896 [wq1]"r"(&q[d + NSIZE * 1]),
897 [wp2]"r"(&p[d + NSIZE * 2]),
898 [wq2]"r"(&q[d + NSIZE * 2]),
899 [wp3]"r"(&p[d + NSIZE * 3]),
900 [wq3]"r"(&q[d + NSIZE * 3]),
901 [wp4]"r"(&p[d + NSIZE * 4]),
902 [wq4]"r"(&q[d + NSIZE * 4]),
903 [wp5]"r"(&p[d + NSIZE * 5]),
904 [wq5]"r"(&q[d + NSIZE * 5]),
905 [wp6]"r"(&p[d + NSIZE * 6]),
906 [wq6]"r"(&q[d + NSIZE * 6]),
907 [wp7]"r"(&p[d + NSIZE * 7]),
908 [wq7]"r"(&q[d + NSIZE * 7])
909 );
910 }
911 }
912
raid6_rvv8_xor_syndrome_real(int disks,int start,int stop,unsigned long bytes,void ** ptrs)913 static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop,
914 unsigned long bytes, void **ptrs)
915 {
916 u8 **dptr = (u8 **)ptrs;
917 u8 *p, *q;
918 unsigned long d;
919 int z, z0;
920
921 z0 = stop; /* P/Q right side optimization */
922 p = dptr[disks - 2]; /* XOR parity */
923 q = dptr[disks - 1]; /* RS syndrome */
924
925 asm volatile (".option push\n"
926 ".option arch,+v\n"
927 "vsetvli t0, x0, e8, m1, ta, ma\n"
928 ".option pop\n"
929 );
930
931 /*
932 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10
933 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11
934 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12
935 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13
936 * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14
937 * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15
938 * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16
939 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17
940 */
941 for (d = 0; d < bytes; d += NSIZE * 8) {
942 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
943 asm volatile (".option push\n"
944 ".option arch,+v\n"
945 "vle8.v v0, (%[wp0])\n"
946 "vle8.v v1, (%[wp0])\n"
947 "vle8.v v4, (%[wp1])\n"
948 "vle8.v v5, (%[wp1])\n"
949 "vle8.v v8, (%[wp2])\n"
950 "vle8.v v9, (%[wp2])\n"
951 "vle8.v v12, (%[wp3])\n"
952 "vle8.v v13, (%[wp3])\n"
953 "vle8.v v16, (%[wp4])\n"
954 "vle8.v v17, (%[wp4])\n"
955 "vle8.v v20, (%[wp5])\n"
956 "vle8.v v21, (%[wp5])\n"
957 "vle8.v v24, (%[wp6])\n"
958 "vle8.v v25, (%[wp6])\n"
959 "vle8.v v28, (%[wp7])\n"
960 "vle8.v v29, (%[wp7])\n"
961 ".option pop\n"
962 : :
963 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]),
964 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]),
965 [wp2]"r"(&dptr[z0][d + 2 * NSIZE]),
966 [wp3]"r"(&dptr[z0][d + 3 * NSIZE]),
967 [wp4]"r"(&dptr[z0][d + 4 * NSIZE]),
968 [wp5]"r"(&dptr[z0][d + 5 * NSIZE]),
969 [wp6]"r"(&dptr[z0][d + 6 * NSIZE]),
970 [wp7]"r"(&dptr[z0][d + 7 * NSIZE])
971 );
972
973 /* P/Q data pages */
974 for (z = z0 - 1; z >= start; z--) {
975 /*
976 * w2$$ = MASK(wq$$);
977 * w1$$ = SHLBYTE(wq$$);
978 * w2$$ &= NBYTES(0x1d);
979 * w1$$ ^= w2$$;
980 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
981 * wq$$ = w1$$ ^ wd$$;
982 * wp$$ ^= wd$$;
983 */
984 asm volatile (".option push\n"
985 ".option arch,+v\n"
986 "vsra.vi v2, v1, 7\n"
987 "vsll.vi v3, v1, 1\n"
988 "vand.vx v2, v2, %[x1d]\n"
989 "vxor.vv v3, v3, v2\n"
990 "vle8.v v2, (%[wd0])\n"
991 "vxor.vv v1, v3, v2\n"
992 "vxor.vv v0, v0, v2\n"
993
994 "vsra.vi v6, v5, 7\n"
995 "vsll.vi v7, v5, 1\n"
996 "vand.vx v6, v6, %[x1d]\n"
997 "vxor.vv v7, v7, v6\n"
998 "vle8.v v6, (%[wd1])\n"
999 "vxor.vv v5, v7, v6\n"
1000 "vxor.vv v4, v4, v6\n"
1001
1002 "vsra.vi v10, v9, 7\n"
1003 "vsll.vi v11, v9, 1\n"
1004 "vand.vx v10, v10, %[x1d]\n"
1005 "vxor.vv v11, v11, v10\n"
1006 "vle8.v v10, (%[wd2])\n"
1007 "vxor.vv v9, v11, v10\n"
1008 "vxor.vv v8, v8, v10\n"
1009
1010 "vsra.vi v14, v13, 7\n"
1011 "vsll.vi v15, v13, 1\n"
1012 "vand.vx v14, v14, %[x1d]\n"
1013 "vxor.vv v15, v15, v14\n"
1014 "vle8.v v14, (%[wd3])\n"
1015 "vxor.vv v13, v15, v14\n"
1016 "vxor.vv v12, v12, v14\n"
1017
1018 "vsra.vi v18, v17, 7\n"
1019 "vsll.vi v19, v17, 1\n"
1020 "vand.vx v18, v18, %[x1d]\n"
1021 "vxor.vv v19, v19, v18\n"
1022 "vle8.v v18, (%[wd4])\n"
1023 "vxor.vv v17, v19, v18\n"
1024 "vxor.vv v16, v16, v18\n"
1025
1026 "vsra.vi v22, v21, 7\n"
1027 "vsll.vi v23, v21, 1\n"
1028 "vand.vx v22, v22, %[x1d]\n"
1029 "vxor.vv v23, v23, v22\n"
1030 "vle8.v v22, (%[wd5])\n"
1031 "vxor.vv v21, v23, v22\n"
1032 "vxor.vv v20, v20, v22\n"
1033
1034 "vsra.vi v26, v25, 7\n"
1035 "vsll.vi v27, v25, 1\n"
1036 "vand.vx v26, v26, %[x1d]\n"
1037 "vxor.vv v27, v27, v26\n"
1038 "vle8.v v26, (%[wd6])\n"
1039 "vxor.vv v25, v27, v26\n"
1040 "vxor.vv v24, v24, v26\n"
1041
1042 "vsra.vi v30, v29, 7\n"
1043 "vsll.vi v31, v29, 1\n"
1044 "vand.vx v30, v30, %[x1d]\n"
1045 "vxor.vv v31, v31, v30\n"
1046 "vle8.v v30, (%[wd7])\n"
1047 "vxor.vv v29, v31, v30\n"
1048 "vxor.vv v28, v28, v30\n"
1049 ".option pop\n"
1050 : :
1051 [wd0]"r"(&dptr[z][d + 0 * NSIZE]),
1052 [wd1]"r"(&dptr[z][d + 1 * NSIZE]),
1053 [wd2]"r"(&dptr[z][d + 2 * NSIZE]),
1054 [wd3]"r"(&dptr[z][d + 3 * NSIZE]),
1055 [wd4]"r"(&dptr[z][d + 4 * NSIZE]),
1056 [wd5]"r"(&dptr[z][d + 5 * NSIZE]),
1057 [wd6]"r"(&dptr[z][d + 6 * NSIZE]),
1058 [wd7]"r"(&dptr[z][d + 7 * NSIZE]),
1059 [x1d]"r"(0x1d)
1060 );
1061 }
1062
1063 /* P/Q left side optimization */
1064 for (z = start - 1; z >= 0; z--) {
1065 /*
1066 * w2$$ = MASK(wq$$);
1067 * w1$$ = SHLBYTE(wq$$);
1068 * w2$$ &= NBYTES(0x1d);
1069 * wq$$ = w1$$ ^ w2$$;
1070 */
1071 asm volatile (".option push\n"
1072 ".option arch,+v\n"
1073 "vsra.vi v2, v1, 7\n"
1074 "vsll.vi v3, v1, 1\n"
1075 "vand.vx v2, v2, %[x1d]\n"
1076 "vxor.vv v1, v3, v2\n"
1077
1078 "vsra.vi v6, v5, 7\n"
1079 "vsll.vi v7, v5, 1\n"
1080 "vand.vx v6, v6, %[x1d]\n"
1081 "vxor.vv v5, v7, v6\n"
1082
1083 "vsra.vi v10, v9, 7\n"
1084 "vsll.vi v11, v9, 1\n"
1085 "vand.vx v10, v10, %[x1d]\n"
1086 "vxor.vv v9, v11, v10\n"
1087
1088 "vsra.vi v14, v13, 7\n"
1089 "vsll.vi v15, v13, 1\n"
1090 "vand.vx v14, v14, %[x1d]\n"
1091 "vxor.vv v13, v15, v14\n"
1092
1093 "vsra.vi v18, v17, 7\n"
1094 "vsll.vi v19, v17, 1\n"
1095 "vand.vx v18, v18, %[x1d]\n"
1096 "vxor.vv v17, v19, v18\n"
1097
1098 "vsra.vi v22, v21, 7\n"
1099 "vsll.vi v23, v21, 1\n"
1100 "vand.vx v22, v22, %[x1d]\n"
1101 "vxor.vv v21, v23, v22\n"
1102
1103 "vsra.vi v26, v25, 7\n"
1104 "vsll.vi v27, v25, 1\n"
1105 "vand.vx v26, v26, %[x1d]\n"
1106 "vxor.vv v25, v27, v26\n"
1107
1108 "vsra.vi v30, v29, 7\n"
1109 "vsll.vi v31, v29, 1\n"
1110 "vand.vx v30, v30, %[x1d]\n"
1111 "vxor.vv v29, v31, v30\n"
1112 ".option pop\n"
1113 : :
1114 [x1d]"r"(0x1d)
1115 );
1116 }
1117
1118 /*
1119 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
1120 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
1121 * v0:wp0, v1:wq0, v2:p0, v3:q0
1122 * v4:wp1, v5:wq1, v6:p1, v7:q1
1123 * v8:wp2, v9:wq2, v10:p2, v11:q2
1124 * v12:wp3, v13:wq3, v14:p3, v15:q3
1125 * v16:wp4, v17:wq4, v18:p4, v19:q4
1126 * v20:wp5, v21:wq5, v22:p5, v23:q5
1127 * v24:wp6, v25:wq6, v26:p6, v27:q6
1128 * v28:wp7, v29:wq7, v30:p7, v31:q7
1129 */
1130 asm volatile (".option push\n"
1131 ".option arch,+v\n"
1132 "vle8.v v2, (%[wp0])\n"
1133 "vle8.v v3, (%[wq0])\n"
1134 "vxor.vv v2, v2, v0\n"
1135 "vxor.vv v3, v3, v1\n"
1136 "vse8.v v2, (%[wp0])\n"
1137 "vse8.v v3, (%[wq0])\n"
1138
1139 "vle8.v v6, (%[wp1])\n"
1140 "vle8.v v7, (%[wq1])\n"
1141 "vxor.vv v6, v6, v4\n"
1142 "vxor.vv v7, v7, v5\n"
1143 "vse8.v v6, (%[wp1])\n"
1144 "vse8.v v7, (%[wq1])\n"
1145
1146 "vle8.v v10, (%[wp2])\n"
1147 "vle8.v v11, (%[wq2])\n"
1148 "vxor.vv v10, v10, v8\n"
1149 "vxor.vv v11, v11, v9\n"
1150 "vse8.v v10, (%[wp2])\n"
1151 "vse8.v v11, (%[wq2])\n"
1152
1153 "vle8.v v14, (%[wp3])\n"
1154 "vle8.v v15, (%[wq3])\n"
1155 "vxor.vv v14, v14, v12\n"
1156 "vxor.vv v15, v15, v13\n"
1157 "vse8.v v14, (%[wp3])\n"
1158 "vse8.v v15, (%[wq3])\n"
1159
1160 "vle8.v v18, (%[wp4])\n"
1161 "vle8.v v19, (%[wq4])\n"
1162 "vxor.vv v18, v18, v16\n"
1163 "vxor.vv v19, v19, v17\n"
1164 "vse8.v v18, (%[wp4])\n"
1165 "vse8.v v19, (%[wq4])\n"
1166
1167 "vle8.v v22, (%[wp5])\n"
1168 "vle8.v v23, (%[wq5])\n"
1169 "vxor.vv v22, v22, v20\n"
1170 "vxor.vv v23, v23, v21\n"
1171 "vse8.v v22, (%[wp5])\n"
1172 "vse8.v v23, (%[wq5])\n"
1173
1174 "vle8.v v26, (%[wp6])\n"
1175 "vle8.v v27, (%[wq6])\n"
1176 "vxor.vv v26, v26, v24\n"
1177 "vxor.vv v27, v27, v25\n"
1178 "vse8.v v26, (%[wp6])\n"
1179 "vse8.v v27, (%[wq6])\n"
1180
1181 "vle8.v v30, (%[wp7])\n"
1182 "vle8.v v31, (%[wq7])\n"
1183 "vxor.vv v30, v30, v28\n"
1184 "vxor.vv v31, v31, v29\n"
1185 "vse8.v v30, (%[wp7])\n"
1186 "vse8.v v31, (%[wq7])\n"
1187 ".option pop\n"
1188 : :
1189 [wp0]"r"(&p[d + NSIZE * 0]),
1190 [wq0]"r"(&q[d + NSIZE * 0]),
1191 [wp1]"r"(&p[d + NSIZE * 1]),
1192 [wq1]"r"(&q[d + NSIZE * 1]),
1193 [wp2]"r"(&p[d + NSIZE * 2]),
1194 [wq2]"r"(&q[d + NSIZE * 2]),
1195 [wp3]"r"(&p[d + NSIZE * 3]),
1196 [wq3]"r"(&q[d + NSIZE * 3]),
1197 [wp4]"r"(&p[d + NSIZE * 4]),
1198 [wq4]"r"(&q[d + NSIZE * 4]),
1199 [wp5]"r"(&p[d + NSIZE * 5]),
1200 [wq5]"r"(&q[d + NSIZE * 5]),
1201 [wp6]"r"(&p[d + NSIZE * 6]),
1202 [wq6]"r"(&q[d + NSIZE * 6]),
1203 [wp7]"r"(&p[d + NSIZE * 7]),
1204 [wq7]"r"(&q[d + NSIZE * 7])
1205 );
1206 }
1207 }
1208
1209 RAID6_RVV_WRAPPER(1);
1210 RAID6_RVV_WRAPPER(2);
1211 RAID6_RVV_WRAPPER(4);
1212 RAID6_RVV_WRAPPER(8);
1213