1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * RAID-6 syndrome calculation using RISC-V vector instructions 4 * 5 * Copyright 2024 Institute of Software, CAS. 6 * Author: Chunyan Zhang <zhangchunyan@iscas.ac.cn> 7 * 8 * Based on neon.uc: 9 * Copyright 2002-2004 H. Peter Anvin 10 */ 11 12 #include <asm/simd.h> 13 #include <asm/vector.h> 14 #include <crypto/internal/simd.h> 15 #include <linux/raid/pq.h> 16 #include <linux/types.h> 17 #include "rvv.h" 18 19 #define NSIZE (riscv_v_vsize / 32) /* NSIZE = vlenb */ 20 21 static int rvv_has_vector(void) 22 { 23 return has_vector(); 24 } 25 26 static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 27 { 28 u8 **dptr = (u8 **)ptrs; 29 u8 *p, *q; 30 unsigned long vl, d; 31 int z, z0; 32 33 z0 = disks - 3; /* Highest data disk */ 34 p = dptr[z0 + 1]; /* XOR parity */ 35 q = dptr[z0 + 2]; /* RS syndrome */ 36 37 asm volatile (".option push\n" 38 ".option arch,+v\n" 39 "vsetvli %0, x0, e8, m1, ta, ma\n" 40 ".option pop\n" 41 : "=&r" (vl) 42 ); 43 44 /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ 45 for (d = 0; d < bytes; d += NSIZE * 1) { 46 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 47 asm volatile (".option push\n" 48 ".option arch,+v\n" 49 "vle8.v v0, (%[wp0])\n" 50 "vle8.v v1, (%[wp0])\n" 51 ".option pop\n" 52 : : 53 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) 54 ); 55 56 for (z = z0 - 1 ; z >= 0 ; z--) { 57 /* 58 * w2$$ = MASK(wq$$); 59 * w1$$ = SHLBYTE(wq$$); 60 * w2$$ &= NBYTES(0x1d); 61 * w1$$ ^= w2$$; 62 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 63 * wq$$ = w1$$ ^ wd$$; 64 * wp$$ ^= wd$$; 65 */ 66 asm volatile (".option push\n" 67 ".option arch,+v\n" 68 "vsra.vi v2, v1, 7\n" 69 "vsll.vi v3, v1, 1\n" 70 "vand.vx v2, v2, %[x1d]\n" 71 "vxor.vv v3, v3, v2\n" 72 "vle8.v v2, (%[wd0])\n" 73 "vxor.vv v1, v3, v2\n" 74 "vxor.vv v0, v0, v2\n" 75 ".option pop\n" 76 : : 77 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 78 [x1d]"r"(0x1d) 79 ); 80 } 81 82 /* 83 * *(unative_t *)&p[d+NSIZE*$$] = wp$$; 84 * *(unative_t *)&q[d+NSIZE*$$] = wq$$; 85 */ 86 asm volatile (".option push\n" 87 ".option arch,+v\n" 88 "vse8.v v0, (%[wp0])\n" 89 "vse8.v v1, (%[wq0])\n" 90 ".option pop\n" 91 : : 92 [wp0]"r"(&p[d + NSIZE * 0]), 93 [wq0]"r"(&q[d + NSIZE * 0]) 94 ); 95 } 96 } 97 98 static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, 99 unsigned long bytes, void **ptrs) 100 { 101 u8 **dptr = (u8 **)ptrs; 102 u8 *p, *q; 103 unsigned long vl, d; 104 int z, z0; 105 106 z0 = stop; /* P/Q right side optimization */ 107 p = dptr[disks - 2]; /* XOR parity */ 108 q = dptr[disks - 1]; /* RS syndrome */ 109 110 asm volatile (".option push\n" 111 ".option arch,+v\n" 112 "vsetvli %0, x0, e8, m1, ta, ma\n" 113 ".option pop\n" 114 : "=&r" (vl) 115 ); 116 117 /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ 118 for (d = 0 ; d < bytes ; d += NSIZE * 1) { 119 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 120 asm volatile (".option push\n" 121 ".option arch,+v\n" 122 "vle8.v v0, (%[wp0])\n" 123 "vle8.v v1, (%[wp0])\n" 124 ".option pop\n" 125 : : 126 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) 127 ); 128 129 /* P/Q data pages */ 130 for (z = z0 - 1; z >= start; z--) { 131 /* 132 * w2$$ = MASK(wq$$); 133 * w1$$ = SHLBYTE(wq$$); 134 * w2$$ &= NBYTES(0x1d); 135 * w1$$ ^= w2$$; 136 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 137 * wq$$ = w1$$ ^ wd$$; 138 * wp$$ ^= wd$$; 139 */ 140 asm volatile (".option push\n" 141 ".option arch,+v\n" 142 "vsra.vi v2, v1, 7\n" 143 "vsll.vi v3, v1, 1\n" 144 "vand.vx v2, v2, %[x1d]\n" 145 "vxor.vv v3, v3, v2\n" 146 "vle8.v v2, (%[wd0])\n" 147 "vxor.vv v1, v3, v2\n" 148 "vxor.vv v0, v0, v2\n" 149 ".option pop\n" 150 : : 151 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 152 [x1d]"r"(0x1d) 153 ); 154 } 155 156 /* P/Q left side optimization */ 157 for (z = start - 1; z >= 0; z--) { 158 /* 159 * w2$$ = MASK(wq$$); 160 * w1$$ = SHLBYTE(wq$$); 161 * w2$$ &= NBYTES(0x1d); 162 * wq$$ = w1$$ ^ w2$$; 163 */ 164 asm volatile (".option push\n" 165 ".option arch,+v\n" 166 "vsra.vi v2, v1, 7\n" 167 "vsll.vi v3, v1, 1\n" 168 "vand.vx v2, v2, %[x1d]\n" 169 "vxor.vv v1, v3, v2\n" 170 ".option pop\n" 171 : : 172 [x1d]"r"(0x1d) 173 ); 174 } 175 176 /* 177 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 178 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 179 * v0:wp0, v1:wq0, v2:p0, v3:q0 180 */ 181 asm volatile (".option push\n" 182 ".option arch,+v\n" 183 "vle8.v v2, (%[wp0])\n" 184 "vle8.v v3, (%[wq0])\n" 185 "vxor.vv v2, v2, v0\n" 186 "vxor.vv v3, v3, v1\n" 187 "vse8.v v2, (%[wp0])\n" 188 "vse8.v v3, (%[wq0])\n" 189 ".option pop\n" 190 : : 191 [wp0]"r"(&p[d + NSIZE * 0]), 192 [wq0]"r"(&q[d + NSIZE * 0]) 193 ); 194 } 195 } 196 197 static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 198 { 199 u8 **dptr = (u8 **)ptrs; 200 u8 *p, *q; 201 unsigned long vl, d; 202 int z, z0; 203 204 z0 = disks - 3; /* Highest data disk */ 205 p = dptr[z0 + 1]; /* XOR parity */ 206 q = dptr[z0 + 2]; /* RS syndrome */ 207 208 asm volatile (".option push\n" 209 ".option arch,+v\n" 210 "vsetvli %0, x0, e8, m1, ta, ma\n" 211 ".option pop\n" 212 : "=&r" (vl) 213 ); 214 215 /* 216 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 217 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 218 */ 219 for (d = 0; d < bytes; d += NSIZE * 2) { 220 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 221 asm volatile (".option push\n" 222 ".option arch,+v\n" 223 "vle8.v v0, (%[wp0])\n" 224 "vle8.v v1, (%[wp0])\n" 225 "vle8.v v4, (%[wp1])\n" 226 "vle8.v v5, (%[wp1])\n" 227 ".option pop\n" 228 : : 229 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 230 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]) 231 ); 232 233 for (z = z0 - 1; z >= 0; z--) { 234 /* 235 * w2$$ = MASK(wq$$); 236 * w1$$ = SHLBYTE(wq$$); 237 * w2$$ &= NBYTES(0x1d); 238 * w1$$ ^= w2$$; 239 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 240 * wq$$ = w1$$ ^ wd$$; 241 * wp$$ ^= wd$$; 242 */ 243 asm volatile (".option push\n" 244 ".option arch,+v\n" 245 "vsra.vi v2, v1, 7\n" 246 "vsll.vi v3, v1, 1\n" 247 "vand.vx v2, v2, %[x1d]\n" 248 "vxor.vv v3, v3, v2\n" 249 "vle8.v v2, (%[wd0])\n" 250 "vxor.vv v1, v3, v2\n" 251 "vxor.vv v0, v0, v2\n" 252 253 "vsra.vi v6, v5, 7\n" 254 "vsll.vi v7, v5, 1\n" 255 "vand.vx v6, v6, %[x1d]\n" 256 "vxor.vv v7, v7, v6\n" 257 "vle8.v v6, (%[wd1])\n" 258 "vxor.vv v5, v7, v6\n" 259 "vxor.vv v4, v4, v6\n" 260 ".option pop\n" 261 : : 262 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 263 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 264 [x1d]"r"(0x1d) 265 ); 266 } 267 268 /* 269 * *(unative_t *)&p[d+NSIZE*$$] = wp$$; 270 * *(unative_t *)&q[d+NSIZE*$$] = wq$$; 271 */ 272 asm volatile (".option push\n" 273 ".option arch,+v\n" 274 "vse8.v v0, (%[wp0])\n" 275 "vse8.v v1, (%[wq0])\n" 276 "vse8.v v4, (%[wp1])\n" 277 "vse8.v v5, (%[wq1])\n" 278 ".option pop\n" 279 : : 280 [wp0]"r"(&p[d + NSIZE * 0]), 281 [wq0]"r"(&q[d + NSIZE * 0]), 282 [wp1]"r"(&p[d + NSIZE * 1]), 283 [wq1]"r"(&q[d + NSIZE * 1]) 284 ); 285 } 286 } 287 288 static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, 289 unsigned long bytes, void **ptrs) 290 { 291 u8 **dptr = (u8 **)ptrs; 292 u8 *p, *q; 293 unsigned long vl, d; 294 int z, z0; 295 296 z0 = stop; /* P/Q right side optimization */ 297 p = dptr[disks - 2]; /* XOR parity */ 298 q = dptr[disks - 1]; /* RS syndrome */ 299 300 asm volatile (".option push\n" 301 ".option arch,+v\n" 302 "vsetvli %0, x0, e8, m1, ta, ma\n" 303 ".option pop\n" 304 : "=&r" (vl) 305 ); 306 307 /* 308 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 309 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 310 */ 311 for (d = 0; d < bytes; d += NSIZE * 2) { 312 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 313 asm volatile (".option push\n" 314 ".option arch,+v\n" 315 "vle8.v v0, (%[wp0])\n" 316 "vle8.v v1, (%[wp0])\n" 317 "vle8.v v4, (%[wp1])\n" 318 "vle8.v v5, (%[wp1])\n" 319 ".option pop\n" 320 : : 321 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 322 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]) 323 ); 324 325 /* P/Q data pages */ 326 for (z = z0 - 1; z >= start; z--) { 327 /* 328 * w2$$ = MASK(wq$$); 329 * w1$$ = SHLBYTE(wq$$); 330 * w2$$ &= NBYTES(0x1d); 331 * w1$$ ^= w2$$; 332 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 333 * wq$$ = w1$$ ^ wd$$; 334 * wp$$ ^= wd$$; 335 */ 336 asm volatile (".option push\n" 337 ".option arch,+v\n" 338 "vsra.vi v2, v1, 7\n" 339 "vsll.vi v3, v1, 1\n" 340 "vand.vx v2, v2, %[x1d]\n" 341 "vxor.vv v3, v3, v2\n" 342 "vle8.v v2, (%[wd0])\n" 343 "vxor.vv v1, v3, v2\n" 344 "vxor.vv v0, v0, v2\n" 345 346 "vsra.vi v6, v5, 7\n" 347 "vsll.vi v7, v5, 1\n" 348 "vand.vx v6, v6, %[x1d]\n" 349 "vxor.vv v7, v7, v6\n" 350 "vle8.v v6, (%[wd1])\n" 351 "vxor.vv v5, v7, v6\n" 352 "vxor.vv v4, v4, v6\n" 353 ".option pop\n" 354 : : 355 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 356 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 357 [x1d]"r"(0x1d) 358 ); 359 } 360 361 /* P/Q left side optimization */ 362 for (z = start - 1; z >= 0; z--) { 363 /* 364 * w2$$ = MASK(wq$$); 365 * w1$$ = SHLBYTE(wq$$); 366 * w2$$ &= NBYTES(0x1d); 367 * wq$$ = w1$$ ^ w2$$; 368 */ 369 asm volatile (".option push\n" 370 ".option arch,+v\n" 371 "vsra.vi v2, v1, 7\n" 372 "vsll.vi v3, v1, 1\n" 373 "vand.vx v2, v2, %[x1d]\n" 374 "vxor.vv v1, v3, v2\n" 375 376 "vsra.vi v6, v5, 7\n" 377 "vsll.vi v7, v5, 1\n" 378 "vand.vx v6, v6, %[x1d]\n" 379 "vxor.vv v5, v7, v6\n" 380 ".option pop\n" 381 : : 382 [x1d]"r"(0x1d) 383 ); 384 } 385 386 /* 387 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 388 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 389 * v0:wp0, v1:wq0, v2:p0, v3:q0 390 * v4:wp1, v5:wq1, v6:p1, v7:q1 391 */ 392 asm volatile (".option push\n" 393 ".option arch,+v\n" 394 "vle8.v v2, (%[wp0])\n" 395 "vle8.v v3, (%[wq0])\n" 396 "vxor.vv v2, v2, v0\n" 397 "vxor.vv v3, v3, v1\n" 398 "vse8.v v2, (%[wp0])\n" 399 "vse8.v v3, (%[wq0])\n" 400 401 "vle8.v v6, (%[wp1])\n" 402 "vle8.v v7, (%[wq1])\n" 403 "vxor.vv v6, v6, v4\n" 404 "vxor.vv v7, v7, v5\n" 405 "vse8.v v6, (%[wp1])\n" 406 "vse8.v v7, (%[wq1])\n" 407 ".option pop\n" 408 : : 409 [wp0]"r"(&p[d + NSIZE * 0]), 410 [wq0]"r"(&q[d + NSIZE * 0]), 411 [wp1]"r"(&p[d + NSIZE * 1]), 412 [wq1]"r"(&q[d + NSIZE * 1]) 413 ); 414 } 415 } 416 417 static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 418 { 419 u8 **dptr = (u8 **)ptrs; 420 u8 *p, *q; 421 unsigned long vl, d; 422 int z, z0; 423 424 z0 = disks - 3; /* Highest data disk */ 425 p = dptr[z0 + 1]; /* XOR parity */ 426 q = dptr[z0 + 2]; /* RS syndrome */ 427 428 asm volatile (".option push\n" 429 ".option arch,+v\n" 430 "vsetvli %0, x0, e8, m1, ta, ma\n" 431 ".option pop\n" 432 : "=&r" (vl) 433 ); 434 435 /* 436 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 437 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 438 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 439 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 440 */ 441 for (d = 0; d < bytes; d += NSIZE * 4) { 442 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 443 asm volatile (".option push\n" 444 ".option arch,+v\n" 445 "vle8.v v0, (%[wp0])\n" 446 "vle8.v v1, (%[wp0])\n" 447 "vle8.v v4, (%[wp1])\n" 448 "vle8.v v5, (%[wp1])\n" 449 "vle8.v v8, (%[wp2])\n" 450 "vle8.v v9, (%[wp2])\n" 451 "vle8.v v12, (%[wp3])\n" 452 "vle8.v v13, (%[wp3])\n" 453 ".option pop\n" 454 : : 455 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 456 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), 457 [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), 458 [wp3]"r"(&dptr[z0][d + 3 * NSIZE]) 459 ); 460 461 for (z = z0 - 1; z >= 0; z--) { 462 /* 463 * w2$$ = MASK(wq$$); 464 * w1$$ = SHLBYTE(wq$$); 465 * w2$$ &= NBYTES(0x1d); 466 * w1$$ ^= w2$$; 467 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 468 * wq$$ = w1$$ ^ wd$$; 469 * wp$$ ^= wd$$; 470 */ 471 asm volatile (".option push\n" 472 ".option arch,+v\n" 473 "vsra.vi v2, v1, 7\n" 474 "vsll.vi v3, v1, 1\n" 475 "vand.vx v2, v2, %[x1d]\n" 476 "vxor.vv v3, v3, v2\n" 477 "vle8.v v2, (%[wd0])\n" 478 "vxor.vv v1, v3, v2\n" 479 "vxor.vv v0, v0, v2\n" 480 481 "vsra.vi v6, v5, 7\n" 482 "vsll.vi v7, v5, 1\n" 483 "vand.vx v6, v6, %[x1d]\n" 484 "vxor.vv v7, v7, v6\n" 485 "vle8.v v6, (%[wd1])\n" 486 "vxor.vv v5, v7, v6\n" 487 "vxor.vv v4, v4, v6\n" 488 489 "vsra.vi v10, v9, 7\n" 490 "vsll.vi v11, v9, 1\n" 491 "vand.vx v10, v10, %[x1d]\n" 492 "vxor.vv v11, v11, v10\n" 493 "vle8.v v10, (%[wd2])\n" 494 "vxor.vv v9, v11, v10\n" 495 "vxor.vv v8, v8, v10\n" 496 497 "vsra.vi v14, v13, 7\n" 498 "vsll.vi v15, v13, 1\n" 499 "vand.vx v14, v14, %[x1d]\n" 500 "vxor.vv v15, v15, v14\n" 501 "vle8.v v14, (%[wd3])\n" 502 "vxor.vv v13, v15, v14\n" 503 "vxor.vv v12, v12, v14\n" 504 ".option pop\n" 505 : : 506 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 507 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 508 [wd2]"r"(&dptr[z][d + 2 * NSIZE]), 509 [wd3]"r"(&dptr[z][d + 3 * NSIZE]), 510 [x1d]"r"(0x1d) 511 ); 512 } 513 514 /* 515 * *(unative_t *)&p[d+NSIZE*$$] = wp$$; 516 * *(unative_t *)&q[d+NSIZE*$$] = wq$$; 517 */ 518 asm volatile (".option push\n" 519 ".option arch,+v\n" 520 "vse8.v v0, (%[wp0])\n" 521 "vse8.v v1, (%[wq0])\n" 522 "vse8.v v4, (%[wp1])\n" 523 "vse8.v v5, (%[wq1])\n" 524 "vse8.v v8, (%[wp2])\n" 525 "vse8.v v9, (%[wq2])\n" 526 "vse8.v v12, (%[wp3])\n" 527 "vse8.v v13, (%[wq3])\n" 528 ".option pop\n" 529 : : 530 [wp0]"r"(&p[d + NSIZE * 0]), 531 [wq0]"r"(&q[d + NSIZE * 0]), 532 [wp1]"r"(&p[d + NSIZE * 1]), 533 [wq1]"r"(&q[d + NSIZE * 1]), 534 [wp2]"r"(&p[d + NSIZE * 2]), 535 [wq2]"r"(&q[d + NSIZE * 2]), 536 [wp3]"r"(&p[d + NSIZE * 3]), 537 [wq3]"r"(&q[d + NSIZE * 3]) 538 ); 539 } 540 } 541 542 static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, 543 unsigned long bytes, void **ptrs) 544 { 545 u8 **dptr = (u8 **)ptrs; 546 u8 *p, *q; 547 unsigned long vl, d; 548 int z, z0; 549 550 z0 = stop; /* P/Q right side optimization */ 551 p = dptr[disks - 2]; /* XOR parity */ 552 q = dptr[disks - 1]; /* RS syndrome */ 553 554 asm volatile (".option push\n" 555 ".option arch,+v\n" 556 "vsetvli %0, x0, e8, m1, ta, ma\n" 557 ".option pop\n" 558 : "=&r" (vl) 559 ); 560 561 /* 562 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 563 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 564 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 565 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 566 */ 567 for (d = 0; d < bytes; d += NSIZE * 4) { 568 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 569 asm volatile (".option push\n" 570 ".option arch,+v\n" 571 "vle8.v v0, (%[wp0])\n" 572 "vle8.v v1, (%[wp0])\n" 573 "vle8.v v4, (%[wp1])\n" 574 "vle8.v v5, (%[wp1])\n" 575 "vle8.v v8, (%[wp2])\n" 576 "vle8.v v9, (%[wp2])\n" 577 "vle8.v v12, (%[wp3])\n" 578 "vle8.v v13, (%[wp3])\n" 579 ".option pop\n" 580 : : 581 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 582 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), 583 [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), 584 [wp3]"r"(&dptr[z0][d + 3 * NSIZE]) 585 ); 586 587 /* P/Q data pages */ 588 for (z = z0 - 1; z >= start; z--) { 589 /* 590 * w2$$ = MASK(wq$$); 591 * w1$$ = SHLBYTE(wq$$); 592 * w2$$ &= NBYTES(0x1d); 593 * w1$$ ^= w2$$; 594 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 595 * wq$$ = w1$$ ^ wd$$; 596 * wp$$ ^= wd$$; 597 */ 598 asm volatile (".option push\n" 599 ".option arch,+v\n" 600 "vsra.vi v2, v1, 7\n" 601 "vsll.vi v3, v1, 1\n" 602 "vand.vx v2, v2, %[x1d]\n" 603 "vxor.vv v3, v3, v2\n" 604 "vle8.v v2, (%[wd0])\n" 605 "vxor.vv v1, v3, v2\n" 606 "vxor.vv v0, v0, v2\n" 607 608 "vsra.vi v6, v5, 7\n" 609 "vsll.vi v7, v5, 1\n" 610 "vand.vx v6, v6, %[x1d]\n" 611 "vxor.vv v7, v7, v6\n" 612 "vle8.v v6, (%[wd1])\n" 613 "vxor.vv v5, v7, v6\n" 614 "vxor.vv v4, v4, v6\n" 615 616 "vsra.vi v10, v9, 7\n" 617 "vsll.vi v11, v9, 1\n" 618 "vand.vx v10, v10, %[x1d]\n" 619 "vxor.vv v11, v11, v10\n" 620 "vle8.v v10, (%[wd2])\n" 621 "vxor.vv v9, v11, v10\n" 622 "vxor.vv v8, v8, v10\n" 623 624 "vsra.vi v14, v13, 7\n" 625 "vsll.vi v15, v13, 1\n" 626 "vand.vx v14, v14, %[x1d]\n" 627 "vxor.vv v15, v15, v14\n" 628 "vle8.v v14, (%[wd3])\n" 629 "vxor.vv v13, v15, v14\n" 630 "vxor.vv v12, v12, v14\n" 631 ".option pop\n" 632 : : 633 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 634 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 635 [wd2]"r"(&dptr[z][d + 2 * NSIZE]), 636 [wd3]"r"(&dptr[z][d + 3 * NSIZE]), 637 [x1d]"r"(0x1d) 638 ); 639 } 640 641 /* P/Q left side optimization */ 642 for (z = start - 1; z >= 0; z--) { 643 /* 644 * w2$$ = MASK(wq$$); 645 * w1$$ = SHLBYTE(wq$$); 646 * w2$$ &= NBYTES(0x1d); 647 * wq$$ = w1$$ ^ w2$$; 648 */ 649 asm volatile (".option push\n" 650 ".option arch,+v\n" 651 "vsra.vi v2, v1, 7\n" 652 "vsll.vi v3, v1, 1\n" 653 "vand.vx v2, v2, %[x1d]\n" 654 "vxor.vv v1, v3, v2\n" 655 656 "vsra.vi v6, v5, 7\n" 657 "vsll.vi v7, v5, 1\n" 658 "vand.vx v6, v6, %[x1d]\n" 659 "vxor.vv v5, v7, v6\n" 660 661 "vsra.vi v10, v9, 7\n" 662 "vsll.vi v11, v9, 1\n" 663 "vand.vx v10, v10, %[x1d]\n" 664 "vxor.vv v9, v11, v10\n" 665 666 "vsra.vi v14, v13, 7\n" 667 "vsll.vi v15, v13, 1\n" 668 "vand.vx v14, v14, %[x1d]\n" 669 "vxor.vv v13, v15, v14\n" 670 ".option pop\n" 671 : : 672 [x1d]"r"(0x1d) 673 ); 674 } 675 676 /* 677 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 678 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 679 * v0:wp0, v1:wq0, v2:p0, v3:q0 680 * v4:wp1, v5:wq1, v6:p1, v7:q1 681 * v8:wp2, v9:wq2, v10:p2, v11:q2 682 * v12:wp3, v13:wq3, v14:p3, v15:q3 683 */ 684 asm volatile (".option push\n" 685 ".option arch,+v\n" 686 "vle8.v v2, (%[wp0])\n" 687 "vle8.v v3, (%[wq0])\n" 688 "vxor.vv v2, v2, v0\n" 689 "vxor.vv v3, v3, v1\n" 690 "vse8.v v2, (%[wp0])\n" 691 "vse8.v v3, (%[wq0])\n" 692 693 "vle8.v v6, (%[wp1])\n" 694 "vle8.v v7, (%[wq1])\n" 695 "vxor.vv v6, v6, v4\n" 696 "vxor.vv v7, v7, v5\n" 697 "vse8.v v6, (%[wp1])\n" 698 "vse8.v v7, (%[wq1])\n" 699 700 "vle8.v v10, (%[wp2])\n" 701 "vle8.v v11, (%[wq2])\n" 702 "vxor.vv v10, v10, v8\n" 703 "vxor.vv v11, v11, v9\n" 704 "vse8.v v10, (%[wp2])\n" 705 "vse8.v v11, (%[wq2])\n" 706 707 "vle8.v v14, (%[wp3])\n" 708 "vle8.v v15, (%[wq3])\n" 709 "vxor.vv v14, v14, v12\n" 710 "vxor.vv v15, v15, v13\n" 711 "vse8.v v14, (%[wp3])\n" 712 "vse8.v v15, (%[wq3])\n" 713 ".option pop\n" 714 : : 715 [wp0]"r"(&p[d + NSIZE * 0]), 716 [wq0]"r"(&q[d + NSIZE * 0]), 717 [wp1]"r"(&p[d + NSIZE * 1]), 718 [wq1]"r"(&q[d + NSIZE * 1]), 719 [wp2]"r"(&p[d + NSIZE * 2]), 720 [wq2]"r"(&q[d + NSIZE * 2]), 721 [wp3]"r"(&p[d + NSIZE * 3]), 722 [wq3]"r"(&q[d + NSIZE * 3]) 723 ); 724 } 725 } 726 727 static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 728 { 729 u8 **dptr = (u8 **)ptrs; 730 u8 *p, *q; 731 unsigned long vl, d; 732 int z, z0; 733 734 z0 = disks - 3; /* Highest data disk */ 735 p = dptr[z0 + 1]; /* XOR parity */ 736 q = dptr[z0 + 2]; /* RS syndrome */ 737 738 asm volatile (".option push\n" 739 ".option arch,+v\n" 740 "vsetvli %0, x0, e8, m1, ta, ma\n" 741 ".option pop\n" 742 : "=&r" (vl) 743 ); 744 745 /* 746 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 747 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 748 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 749 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 750 * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14 751 * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15 752 * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16 753 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17 754 */ 755 for (d = 0; d < bytes; d += NSIZE * 8) { 756 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 757 asm volatile (".option push\n" 758 ".option arch,+v\n" 759 "vle8.v v0, (%[wp0])\n" 760 "vle8.v v1, (%[wp0])\n" 761 "vle8.v v4, (%[wp1])\n" 762 "vle8.v v5, (%[wp1])\n" 763 "vle8.v v8, (%[wp2])\n" 764 "vle8.v v9, (%[wp2])\n" 765 "vle8.v v12, (%[wp3])\n" 766 "vle8.v v13, (%[wp3])\n" 767 "vle8.v v16, (%[wp4])\n" 768 "vle8.v v17, (%[wp4])\n" 769 "vle8.v v20, (%[wp5])\n" 770 "vle8.v v21, (%[wp5])\n" 771 "vle8.v v24, (%[wp6])\n" 772 "vle8.v v25, (%[wp6])\n" 773 "vle8.v v28, (%[wp7])\n" 774 "vle8.v v29, (%[wp7])\n" 775 ".option pop\n" 776 : : 777 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 778 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), 779 [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), 780 [wp3]"r"(&dptr[z0][d + 3 * NSIZE]), 781 [wp4]"r"(&dptr[z0][d + 4 * NSIZE]), 782 [wp5]"r"(&dptr[z0][d + 5 * NSIZE]), 783 [wp6]"r"(&dptr[z0][d + 6 * NSIZE]), 784 [wp7]"r"(&dptr[z0][d + 7 * NSIZE]) 785 ); 786 787 for (z = z0 - 1; z >= 0; z--) { 788 /* 789 * w2$$ = MASK(wq$$); 790 * w1$$ = SHLBYTE(wq$$); 791 * w2$$ &= NBYTES(0x1d); 792 * w1$$ ^= w2$$; 793 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 794 * wq$$ = w1$$ ^ wd$$; 795 * wp$$ ^= wd$$; 796 */ 797 asm volatile (".option push\n" 798 ".option arch,+v\n" 799 "vsra.vi v2, v1, 7\n" 800 "vsll.vi v3, v1, 1\n" 801 "vand.vx v2, v2, %[x1d]\n" 802 "vxor.vv v3, v3, v2\n" 803 "vle8.v v2, (%[wd0])\n" 804 "vxor.vv v1, v3, v2\n" 805 "vxor.vv v0, v0, v2\n" 806 807 "vsra.vi v6, v5, 7\n" 808 "vsll.vi v7, v5, 1\n" 809 "vand.vx v6, v6, %[x1d]\n" 810 "vxor.vv v7, v7, v6\n" 811 "vle8.v v6, (%[wd1])\n" 812 "vxor.vv v5, v7, v6\n" 813 "vxor.vv v4, v4, v6\n" 814 815 "vsra.vi v10, v9, 7\n" 816 "vsll.vi v11, v9, 1\n" 817 "vand.vx v10, v10, %[x1d]\n" 818 "vxor.vv v11, v11, v10\n" 819 "vle8.v v10, (%[wd2])\n" 820 "vxor.vv v9, v11, v10\n" 821 "vxor.vv v8, v8, v10\n" 822 823 "vsra.vi v14, v13, 7\n" 824 "vsll.vi v15, v13, 1\n" 825 "vand.vx v14, v14, %[x1d]\n" 826 "vxor.vv v15, v15, v14\n" 827 "vle8.v v14, (%[wd3])\n" 828 "vxor.vv v13, v15, v14\n" 829 "vxor.vv v12, v12, v14\n" 830 831 "vsra.vi v18, v17, 7\n" 832 "vsll.vi v19, v17, 1\n" 833 "vand.vx v18, v18, %[x1d]\n" 834 "vxor.vv v19, v19, v18\n" 835 "vle8.v v18, (%[wd4])\n" 836 "vxor.vv v17, v19, v18\n" 837 "vxor.vv v16, v16, v18\n" 838 839 "vsra.vi v22, v21, 7\n" 840 "vsll.vi v23, v21, 1\n" 841 "vand.vx v22, v22, %[x1d]\n" 842 "vxor.vv v23, v23, v22\n" 843 "vle8.v v22, (%[wd5])\n" 844 "vxor.vv v21, v23, v22\n" 845 "vxor.vv v20, v20, v22\n" 846 847 "vsra.vi v26, v25, 7\n" 848 "vsll.vi v27, v25, 1\n" 849 "vand.vx v26, v26, %[x1d]\n" 850 "vxor.vv v27, v27, v26\n" 851 "vle8.v v26, (%[wd6])\n" 852 "vxor.vv v25, v27, v26\n" 853 "vxor.vv v24, v24, v26\n" 854 855 "vsra.vi v30, v29, 7\n" 856 "vsll.vi v31, v29, 1\n" 857 "vand.vx v30, v30, %[x1d]\n" 858 "vxor.vv v31, v31, v30\n" 859 "vle8.v v30, (%[wd7])\n" 860 "vxor.vv v29, v31, v30\n" 861 "vxor.vv v28, v28, v30\n" 862 ".option pop\n" 863 : : 864 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 865 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 866 [wd2]"r"(&dptr[z][d + 2 * NSIZE]), 867 [wd3]"r"(&dptr[z][d + 3 * NSIZE]), 868 [wd4]"r"(&dptr[z][d + 4 * NSIZE]), 869 [wd5]"r"(&dptr[z][d + 5 * NSIZE]), 870 [wd6]"r"(&dptr[z][d + 6 * NSIZE]), 871 [wd7]"r"(&dptr[z][d + 7 * NSIZE]), 872 [x1d]"r"(0x1d) 873 ); 874 } 875 876 /* 877 * *(unative_t *)&p[d+NSIZE*$$] = wp$$; 878 * *(unative_t *)&q[d+NSIZE*$$] = wq$$; 879 */ 880 asm volatile (".option push\n" 881 ".option arch,+v\n" 882 "vse8.v v0, (%[wp0])\n" 883 "vse8.v v1, (%[wq0])\n" 884 "vse8.v v4, (%[wp1])\n" 885 "vse8.v v5, (%[wq1])\n" 886 "vse8.v v8, (%[wp2])\n" 887 "vse8.v v9, (%[wq2])\n" 888 "vse8.v v12, (%[wp3])\n" 889 "vse8.v v13, (%[wq3])\n" 890 "vse8.v v16, (%[wp4])\n" 891 "vse8.v v17, (%[wq4])\n" 892 "vse8.v v20, (%[wp5])\n" 893 "vse8.v v21, (%[wq5])\n" 894 "vse8.v v24, (%[wp6])\n" 895 "vse8.v v25, (%[wq6])\n" 896 "vse8.v v28, (%[wp7])\n" 897 "vse8.v v29, (%[wq7])\n" 898 ".option pop\n" 899 : : 900 [wp0]"r"(&p[d + NSIZE * 0]), 901 [wq0]"r"(&q[d + NSIZE * 0]), 902 [wp1]"r"(&p[d + NSIZE * 1]), 903 [wq1]"r"(&q[d + NSIZE * 1]), 904 [wp2]"r"(&p[d + NSIZE * 2]), 905 [wq2]"r"(&q[d + NSIZE * 2]), 906 [wp3]"r"(&p[d + NSIZE * 3]), 907 [wq3]"r"(&q[d + NSIZE * 3]), 908 [wp4]"r"(&p[d + NSIZE * 4]), 909 [wq4]"r"(&q[d + NSIZE * 4]), 910 [wp5]"r"(&p[d + NSIZE * 5]), 911 [wq5]"r"(&q[d + NSIZE * 5]), 912 [wp6]"r"(&p[d + NSIZE * 6]), 913 [wq6]"r"(&q[d + NSIZE * 6]), 914 [wp7]"r"(&p[d + NSIZE * 7]), 915 [wq7]"r"(&q[d + NSIZE * 7]) 916 ); 917 } 918 } 919 920 static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, 921 unsigned long bytes, void **ptrs) 922 { 923 u8 **dptr = (u8 **)ptrs; 924 u8 *p, *q; 925 unsigned long vl, d; 926 int z, z0; 927 928 z0 = stop; /* P/Q right side optimization */ 929 p = dptr[disks - 2]; /* XOR parity */ 930 q = dptr[disks - 1]; /* RS syndrome */ 931 932 asm volatile (".option push\n" 933 ".option arch,+v\n" 934 "vsetvli %0, x0, e8, m1, ta, ma\n" 935 ".option pop\n" 936 : "=&r" (vl) 937 ); 938 939 /* 940 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 941 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 942 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 943 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 944 * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14 945 * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15 946 * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16 947 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17 948 */ 949 for (d = 0; d < bytes; d += NSIZE * 8) { 950 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 951 asm volatile (".option push\n" 952 ".option arch,+v\n" 953 "vle8.v v0, (%[wp0])\n" 954 "vle8.v v1, (%[wp0])\n" 955 "vle8.v v4, (%[wp1])\n" 956 "vle8.v v5, (%[wp1])\n" 957 "vle8.v v8, (%[wp2])\n" 958 "vle8.v v9, (%[wp2])\n" 959 "vle8.v v12, (%[wp3])\n" 960 "vle8.v v13, (%[wp3])\n" 961 "vle8.v v16, (%[wp4])\n" 962 "vle8.v v17, (%[wp4])\n" 963 "vle8.v v20, (%[wp5])\n" 964 "vle8.v v21, (%[wp5])\n" 965 "vle8.v v24, (%[wp6])\n" 966 "vle8.v v25, (%[wp6])\n" 967 "vle8.v v28, (%[wp7])\n" 968 "vle8.v v29, (%[wp7])\n" 969 ".option pop\n" 970 : : 971 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 972 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), 973 [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), 974 [wp3]"r"(&dptr[z0][d + 3 * NSIZE]), 975 [wp4]"r"(&dptr[z0][d + 4 * NSIZE]), 976 [wp5]"r"(&dptr[z0][d + 5 * NSIZE]), 977 [wp6]"r"(&dptr[z0][d + 6 * NSIZE]), 978 [wp7]"r"(&dptr[z0][d + 7 * NSIZE]) 979 ); 980 981 /* P/Q data pages */ 982 for (z = z0 - 1; z >= start; z--) { 983 /* 984 * w2$$ = MASK(wq$$); 985 * w1$$ = SHLBYTE(wq$$); 986 * w2$$ &= NBYTES(0x1d); 987 * w1$$ ^= w2$$; 988 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 989 * wq$$ = w1$$ ^ wd$$; 990 * wp$$ ^= wd$$; 991 */ 992 asm volatile (".option push\n" 993 ".option arch,+v\n" 994 "vsra.vi v2, v1, 7\n" 995 "vsll.vi v3, v1, 1\n" 996 "vand.vx v2, v2, %[x1d]\n" 997 "vxor.vv v3, v3, v2\n" 998 "vle8.v v2, (%[wd0])\n" 999 "vxor.vv v1, v3, v2\n" 1000 "vxor.vv v0, v0, v2\n" 1001 1002 "vsra.vi v6, v5, 7\n" 1003 "vsll.vi v7, v5, 1\n" 1004 "vand.vx v6, v6, %[x1d]\n" 1005 "vxor.vv v7, v7, v6\n" 1006 "vle8.v v6, (%[wd1])\n" 1007 "vxor.vv v5, v7, v6\n" 1008 "vxor.vv v4, v4, v6\n" 1009 1010 "vsra.vi v10, v9, 7\n" 1011 "vsll.vi v11, v9, 1\n" 1012 "vand.vx v10, v10, %[x1d]\n" 1013 "vxor.vv v11, v11, v10\n" 1014 "vle8.v v10, (%[wd2])\n" 1015 "vxor.vv v9, v11, v10\n" 1016 "vxor.vv v8, v8, v10\n" 1017 1018 "vsra.vi v14, v13, 7\n" 1019 "vsll.vi v15, v13, 1\n" 1020 "vand.vx v14, v14, %[x1d]\n" 1021 "vxor.vv v15, v15, v14\n" 1022 "vle8.v v14, (%[wd3])\n" 1023 "vxor.vv v13, v15, v14\n" 1024 "vxor.vv v12, v12, v14\n" 1025 1026 "vsra.vi v18, v17, 7\n" 1027 "vsll.vi v19, v17, 1\n" 1028 "vand.vx v18, v18, %[x1d]\n" 1029 "vxor.vv v19, v19, v18\n" 1030 "vle8.v v18, (%[wd4])\n" 1031 "vxor.vv v17, v19, v18\n" 1032 "vxor.vv v16, v16, v18\n" 1033 1034 "vsra.vi v22, v21, 7\n" 1035 "vsll.vi v23, v21, 1\n" 1036 "vand.vx v22, v22, %[x1d]\n" 1037 "vxor.vv v23, v23, v22\n" 1038 "vle8.v v22, (%[wd5])\n" 1039 "vxor.vv v21, v23, v22\n" 1040 "vxor.vv v20, v20, v22\n" 1041 1042 "vsra.vi v26, v25, 7\n" 1043 "vsll.vi v27, v25, 1\n" 1044 "vand.vx v26, v26, %[x1d]\n" 1045 "vxor.vv v27, v27, v26\n" 1046 "vle8.v v26, (%[wd6])\n" 1047 "vxor.vv v25, v27, v26\n" 1048 "vxor.vv v24, v24, v26\n" 1049 1050 "vsra.vi v30, v29, 7\n" 1051 "vsll.vi v31, v29, 1\n" 1052 "vand.vx v30, v30, %[x1d]\n" 1053 "vxor.vv v31, v31, v30\n" 1054 "vle8.v v30, (%[wd7])\n" 1055 "vxor.vv v29, v31, v30\n" 1056 "vxor.vv v28, v28, v30\n" 1057 ".option pop\n" 1058 : : 1059 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 1060 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 1061 [wd2]"r"(&dptr[z][d + 2 * NSIZE]), 1062 [wd3]"r"(&dptr[z][d + 3 * NSIZE]), 1063 [wd4]"r"(&dptr[z][d + 4 * NSIZE]), 1064 [wd5]"r"(&dptr[z][d + 5 * NSIZE]), 1065 [wd6]"r"(&dptr[z][d + 6 * NSIZE]), 1066 [wd7]"r"(&dptr[z][d + 7 * NSIZE]), 1067 [x1d]"r"(0x1d) 1068 ); 1069 } 1070 1071 /* P/Q left side optimization */ 1072 for (z = start - 1; z >= 0; z--) { 1073 /* 1074 * w2$$ = MASK(wq$$); 1075 * w1$$ = SHLBYTE(wq$$); 1076 * w2$$ &= NBYTES(0x1d); 1077 * wq$$ = w1$$ ^ w2$$; 1078 */ 1079 asm volatile (".option push\n" 1080 ".option arch,+v\n" 1081 "vsra.vi v2, v1, 7\n" 1082 "vsll.vi v3, v1, 1\n" 1083 "vand.vx v2, v2, %[x1d]\n" 1084 "vxor.vv v1, v3, v2\n" 1085 1086 "vsra.vi v6, v5, 7\n" 1087 "vsll.vi v7, v5, 1\n" 1088 "vand.vx v6, v6, %[x1d]\n" 1089 "vxor.vv v5, v7, v6\n" 1090 1091 "vsra.vi v10, v9, 7\n" 1092 "vsll.vi v11, v9, 1\n" 1093 "vand.vx v10, v10, %[x1d]\n" 1094 "vxor.vv v9, v11, v10\n" 1095 1096 "vsra.vi v14, v13, 7\n" 1097 "vsll.vi v15, v13, 1\n" 1098 "vand.vx v14, v14, %[x1d]\n" 1099 "vxor.vv v13, v15, v14\n" 1100 1101 "vsra.vi v18, v17, 7\n" 1102 "vsll.vi v19, v17, 1\n" 1103 "vand.vx v18, v18, %[x1d]\n" 1104 "vxor.vv v17, v19, v18\n" 1105 1106 "vsra.vi v22, v21, 7\n" 1107 "vsll.vi v23, v21, 1\n" 1108 "vand.vx v22, v22, %[x1d]\n" 1109 "vxor.vv v21, v23, v22\n" 1110 1111 "vsra.vi v26, v25, 7\n" 1112 "vsll.vi v27, v25, 1\n" 1113 "vand.vx v26, v26, %[x1d]\n" 1114 "vxor.vv v25, v27, v26\n" 1115 1116 "vsra.vi v30, v29, 7\n" 1117 "vsll.vi v31, v29, 1\n" 1118 "vand.vx v30, v30, %[x1d]\n" 1119 "vxor.vv v29, v31, v30\n" 1120 ".option pop\n" 1121 : : 1122 [x1d]"r"(0x1d) 1123 ); 1124 } 1125 1126 /* 1127 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 1128 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 1129 * v0:wp0, v1:wq0, v2:p0, v3:q0 1130 * v4:wp1, v5:wq1, v6:p1, v7:q1 1131 * v8:wp2, v9:wq2, v10:p2, v11:q2 1132 * v12:wp3, v13:wq3, v14:p3, v15:q3 1133 * v16:wp4, v17:wq4, v18:p4, v19:q4 1134 * v20:wp5, v21:wq5, v22:p5, v23:q5 1135 * v24:wp6, v25:wq6, v26:p6, v27:q6 1136 * v28:wp7, v29:wq7, v30:p7, v31:q7 1137 */ 1138 asm volatile (".option push\n" 1139 ".option arch,+v\n" 1140 "vle8.v v2, (%[wp0])\n" 1141 "vle8.v v3, (%[wq0])\n" 1142 "vxor.vv v2, v2, v0\n" 1143 "vxor.vv v3, v3, v1\n" 1144 "vse8.v v2, (%[wp0])\n" 1145 "vse8.v v3, (%[wq0])\n" 1146 1147 "vle8.v v6, (%[wp1])\n" 1148 "vle8.v v7, (%[wq1])\n" 1149 "vxor.vv v6, v6, v4\n" 1150 "vxor.vv v7, v7, v5\n" 1151 "vse8.v v6, (%[wp1])\n" 1152 "vse8.v v7, (%[wq1])\n" 1153 1154 "vle8.v v10, (%[wp2])\n" 1155 "vle8.v v11, (%[wq2])\n" 1156 "vxor.vv v10, v10, v8\n" 1157 "vxor.vv v11, v11, v9\n" 1158 "vse8.v v10, (%[wp2])\n" 1159 "vse8.v v11, (%[wq2])\n" 1160 1161 "vle8.v v14, (%[wp3])\n" 1162 "vle8.v v15, (%[wq3])\n" 1163 "vxor.vv v14, v14, v12\n" 1164 "vxor.vv v15, v15, v13\n" 1165 "vse8.v v14, (%[wp3])\n" 1166 "vse8.v v15, (%[wq3])\n" 1167 1168 "vle8.v v18, (%[wp4])\n" 1169 "vle8.v v19, (%[wq4])\n" 1170 "vxor.vv v18, v18, v16\n" 1171 "vxor.vv v19, v19, v17\n" 1172 "vse8.v v18, (%[wp4])\n" 1173 "vse8.v v19, (%[wq4])\n" 1174 1175 "vle8.v v22, (%[wp5])\n" 1176 "vle8.v v23, (%[wq5])\n" 1177 "vxor.vv v22, v22, v20\n" 1178 "vxor.vv v23, v23, v21\n" 1179 "vse8.v v22, (%[wp5])\n" 1180 "vse8.v v23, (%[wq5])\n" 1181 1182 "vle8.v v26, (%[wp6])\n" 1183 "vle8.v v27, (%[wq6])\n" 1184 "vxor.vv v26, v26, v24\n" 1185 "vxor.vv v27, v27, v25\n" 1186 "vse8.v v26, (%[wp6])\n" 1187 "vse8.v v27, (%[wq6])\n" 1188 1189 "vle8.v v30, (%[wp7])\n" 1190 "vle8.v v31, (%[wq7])\n" 1191 "vxor.vv v30, v30, v28\n" 1192 "vxor.vv v31, v31, v29\n" 1193 "vse8.v v30, (%[wp7])\n" 1194 "vse8.v v31, (%[wq7])\n" 1195 ".option pop\n" 1196 : : 1197 [wp0]"r"(&p[d + NSIZE * 0]), 1198 [wq0]"r"(&q[d + NSIZE * 0]), 1199 [wp1]"r"(&p[d + NSIZE * 1]), 1200 [wq1]"r"(&q[d + NSIZE * 1]), 1201 [wp2]"r"(&p[d + NSIZE * 2]), 1202 [wq2]"r"(&q[d + NSIZE * 2]), 1203 [wp3]"r"(&p[d + NSIZE * 3]), 1204 [wq3]"r"(&q[d + NSIZE * 3]), 1205 [wp4]"r"(&p[d + NSIZE * 4]), 1206 [wq4]"r"(&q[d + NSIZE * 4]), 1207 [wp5]"r"(&p[d + NSIZE * 5]), 1208 [wq5]"r"(&q[d + NSIZE * 5]), 1209 [wp6]"r"(&p[d + NSIZE * 6]), 1210 [wq6]"r"(&q[d + NSIZE * 6]), 1211 [wp7]"r"(&p[d + NSIZE * 7]), 1212 [wq7]"r"(&q[d + NSIZE * 7]) 1213 ); 1214 } 1215 } 1216 1217 RAID6_RVV_WRAPPER(1); 1218 RAID6_RVV_WRAPPER(2); 1219 RAID6_RVV_WRAPPER(4); 1220 RAID6_RVV_WRAPPER(8); 1221