1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * RAID-6 syndrome calculation using RISC-V vector instructions 4 * 5 * Copyright 2024 Institute of Software, CAS. 6 * Author: Chunyan Zhang <zhangchunyan@iscas.ac.cn> 7 * 8 * Based on neon.uc: 9 * Copyright 2002-2004 H. Peter Anvin 10 */ 11 12 #include "rvv.h" 13 14 #ifdef __riscv_vector 15 #error "This code must be built without compiler support for vector" 16 #endif 17 18 static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 19 { 20 u8 **dptr = (u8 **)ptrs; 21 u8 *p, *q; 22 unsigned long vl, d, nsize; 23 int z, z0; 24 25 z0 = disks - 3; /* Highest data disk */ 26 p = dptr[z0 + 1]; /* XOR parity */ 27 q = dptr[z0 + 2]; /* RS syndrome */ 28 29 asm volatile (".option push\n" 30 ".option arch,+v\n" 31 "vsetvli %0, x0, e8, m1, ta, ma\n" 32 ".option pop\n" 33 : "=&r" (vl) 34 ); 35 36 nsize = vl; 37 38 /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ 39 for (d = 0; d < bytes; d += nsize * 1) { 40 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 41 asm volatile (".option push\n" 42 ".option arch,+v\n" 43 "vle8.v v0, (%[wp0])\n" 44 "vmv.v.v v1, v0\n" 45 ".option pop\n" 46 : : 47 [wp0]"r"(&dptr[z0][d + 0 * nsize]) 48 ); 49 50 for (z = z0 - 1 ; z >= 0 ; z--) { 51 /* 52 * w2$$ = MASK(wq$$); 53 * w1$$ = SHLBYTE(wq$$); 54 * w2$$ &= NBYTES(0x1d); 55 * w1$$ ^= w2$$; 56 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 57 * wq$$ = w1$$ ^ wd$$; 58 * wp$$ ^= wd$$; 59 */ 60 asm volatile (".option push\n" 61 ".option arch,+v\n" 62 "vsra.vi v2, v1, 7\n" 63 "vsll.vi v3, v1, 1\n" 64 "vand.vx v2, v2, %[x1d]\n" 65 "vxor.vv v3, v3, v2\n" 66 "vle8.v v2, (%[wd0])\n" 67 "vxor.vv v1, v3, v2\n" 68 "vxor.vv v0, v0, v2\n" 69 ".option pop\n" 70 : : 71 [wd0]"r"(&dptr[z][d + 0 * nsize]), 72 [x1d]"r"(0x1d) 73 ); 74 } 75 76 /* 77 * *(unative_t *)&p[d+NSIZE*$$] = wp$$; 78 * *(unative_t *)&q[d+NSIZE*$$] = wq$$; 79 */ 80 asm volatile (".option push\n" 81 ".option arch,+v\n" 82 "vse8.v v0, (%[wp0])\n" 83 "vse8.v v1, (%[wq0])\n" 84 ".option pop\n" 85 : : 86 [wp0]"r"(&p[d + nsize * 0]), 87 [wq0]"r"(&q[d + nsize * 0]) 88 ); 89 } 90 } 91 92 static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, 93 unsigned long bytes, void **ptrs) 94 { 95 u8 **dptr = (u8 **)ptrs; 96 u8 *p, *q; 97 unsigned long vl, d, nsize; 98 int z, z0; 99 100 z0 = stop; /* P/Q right side optimization */ 101 p = dptr[disks - 2]; /* XOR parity */ 102 q = dptr[disks - 1]; /* RS syndrome */ 103 104 asm volatile (".option push\n" 105 ".option arch,+v\n" 106 "vsetvli %0, x0, e8, m1, ta, ma\n" 107 ".option pop\n" 108 : "=&r" (vl) 109 ); 110 111 nsize = vl; 112 113 /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ 114 for (d = 0 ; d < bytes ; d += nsize * 1) { 115 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 116 asm volatile (".option push\n" 117 ".option arch,+v\n" 118 "vle8.v v0, (%[wp0])\n" 119 "vmv.v.v v1, v0\n" 120 ".option pop\n" 121 : : 122 [wp0]"r"(&dptr[z0][d + 0 * nsize]) 123 ); 124 125 /* P/Q data pages */ 126 for (z = z0 - 1; z >= start; z--) { 127 /* 128 * w2$$ = MASK(wq$$); 129 * w1$$ = SHLBYTE(wq$$); 130 * w2$$ &= NBYTES(0x1d); 131 * w1$$ ^= w2$$; 132 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 133 * wq$$ = w1$$ ^ wd$$; 134 * wp$$ ^= wd$$; 135 */ 136 asm volatile (".option push\n" 137 ".option arch,+v\n" 138 "vsra.vi v2, v1, 7\n" 139 "vsll.vi v3, v1, 1\n" 140 "vand.vx v2, v2, %[x1d]\n" 141 "vxor.vv v3, v3, v2\n" 142 "vle8.v v2, (%[wd0])\n" 143 "vxor.vv v1, v3, v2\n" 144 "vxor.vv v0, v0, v2\n" 145 ".option pop\n" 146 : : 147 [wd0]"r"(&dptr[z][d + 0 * nsize]), 148 [x1d]"r"(0x1d) 149 ); 150 } 151 152 /* P/Q left side optimization */ 153 for (z = start - 1; z >= 0; z--) { 154 /* 155 * w2$$ = MASK(wq$$); 156 * w1$$ = SHLBYTE(wq$$); 157 * w2$$ &= NBYTES(0x1d); 158 * wq$$ = w1$$ ^ w2$$; 159 */ 160 asm volatile (".option push\n" 161 ".option arch,+v\n" 162 "vsra.vi v2, v1, 7\n" 163 "vsll.vi v3, v1, 1\n" 164 "vand.vx v2, v2, %[x1d]\n" 165 "vxor.vv v1, v3, v2\n" 166 ".option pop\n" 167 : : 168 [x1d]"r"(0x1d) 169 ); 170 } 171 172 /* 173 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 174 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 175 * v0:wp0, v1:wq0, v2:p0, v3:q0 176 */ 177 asm volatile (".option push\n" 178 ".option arch,+v\n" 179 "vle8.v v2, (%[wp0])\n" 180 "vle8.v v3, (%[wq0])\n" 181 "vxor.vv v2, v2, v0\n" 182 "vxor.vv v3, v3, v1\n" 183 "vse8.v v2, (%[wp0])\n" 184 "vse8.v v3, (%[wq0])\n" 185 ".option pop\n" 186 : : 187 [wp0]"r"(&p[d + nsize * 0]), 188 [wq0]"r"(&q[d + nsize * 0]) 189 ); 190 } 191 } 192 193 static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 194 { 195 u8 **dptr = (u8 **)ptrs; 196 u8 *p, *q; 197 unsigned long vl, d, nsize; 198 int z, z0; 199 200 z0 = disks - 3; /* Highest data disk */ 201 p = dptr[z0 + 1]; /* XOR parity */ 202 q = dptr[z0 + 2]; /* RS syndrome */ 203 204 asm volatile (".option push\n" 205 ".option arch,+v\n" 206 "vsetvli %0, x0, e8, m1, ta, ma\n" 207 ".option pop\n" 208 : "=&r" (vl) 209 ); 210 211 nsize = vl; 212 213 /* 214 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 215 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 216 */ 217 for (d = 0; d < bytes; d += nsize * 2) { 218 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 219 asm volatile (".option push\n" 220 ".option arch,+v\n" 221 "vle8.v v0, (%[wp0])\n" 222 "vmv.v.v v1, v0\n" 223 "vle8.v v4, (%[wp1])\n" 224 "vmv.v.v v5, v4\n" 225 ".option pop\n" 226 : : 227 [wp0]"r"(&dptr[z0][d + 0 * nsize]), 228 [wp1]"r"(&dptr[z0][d + 1 * nsize]) 229 ); 230 231 for (z = z0 - 1; z >= 0; z--) { 232 /* 233 * w2$$ = MASK(wq$$); 234 * w1$$ = SHLBYTE(wq$$); 235 * w2$$ &= NBYTES(0x1d); 236 * w1$$ ^= w2$$; 237 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 238 * wq$$ = w1$$ ^ wd$$; 239 * wp$$ ^= wd$$; 240 */ 241 asm volatile (".option push\n" 242 ".option arch,+v\n" 243 "vsra.vi v2, v1, 7\n" 244 "vsll.vi v3, v1, 1\n" 245 "vand.vx v2, v2, %[x1d]\n" 246 "vxor.vv v3, v3, v2\n" 247 "vle8.v v2, (%[wd0])\n" 248 "vxor.vv v1, v3, v2\n" 249 "vxor.vv v0, v0, v2\n" 250 251 "vsra.vi v6, v5, 7\n" 252 "vsll.vi v7, v5, 1\n" 253 "vand.vx v6, v6, %[x1d]\n" 254 "vxor.vv v7, v7, v6\n" 255 "vle8.v v6, (%[wd1])\n" 256 "vxor.vv v5, v7, v6\n" 257 "vxor.vv v4, v4, v6\n" 258 ".option pop\n" 259 : : 260 [wd0]"r"(&dptr[z][d + 0 * nsize]), 261 [wd1]"r"(&dptr[z][d + 1 * nsize]), 262 [x1d]"r"(0x1d) 263 ); 264 } 265 266 /* 267 * *(unative_t *)&p[d+NSIZE*$$] = wp$$; 268 * *(unative_t *)&q[d+NSIZE*$$] = wq$$; 269 */ 270 asm volatile (".option push\n" 271 ".option arch,+v\n" 272 "vse8.v v0, (%[wp0])\n" 273 "vse8.v v1, (%[wq0])\n" 274 "vse8.v v4, (%[wp1])\n" 275 "vse8.v v5, (%[wq1])\n" 276 ".option pop\n" 277 : : 278 [wp0]"r"(&p[d + nsize * 0]), 279 [wq0]"r"(&q[d + nsize * 0]), 280 [wp1]"r"(&p[d + nsize * 1]), 281 [wq1]"r"(&q[d + nsize * 1]) 282 ); 283 } 284 } 285 286 static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, 287 unsigned long bytes, void **ptrs) 288 { 289 u8 **dptr = (u8 **)ptrs; 290 u8 *p, *q; 291 unsigned long vl, d, nsize; 292 int z, z0; 293 294 z0 = stop; /* P/Q right side optimization */ 295 p = dptr[disks - 2]; /* XOR parity */ 296 q = dptr[disks - 1]; /* RS syndrome */ 297 298 asm volatile (".option push\n" 299 ".option arch,+v\n" 300 "vsetvli %0, x0, e8, m1, ta, ma\n" 301 ".option pop\n" 302 : "=&r" (vl) 303 ); 304 305 nsize = vl; 306 307 /* 308 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 309 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 310 */ 311 for (d = 0; d < bytes; d += nsize * 2) { 312 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 313 asm volatile (".option push\n" 314 ".option arch,+v\n" 315 "vle8.v v0, (%[wp0])\n" 316 "vmv.v.v v1, v0\n" 317 "vle8.v v4, (%[wp1])\n" 318 "vmv.v.v v5, v4\n" 319 ".option pop\n" 320 : : 321 [wp0]"r"(&dptr[z0][d + 0 * nsize]), 322 [wp1]"r"(&dptr[z0][d + 1 * nsize]) 323 ); 324 325 /* P/Q data pages */ 326 for (z = z0 - 1; z >= start; z--) { 327 /* 328 * w2$$ = MASK(wq$$); 329 * w1$$ = SHLBYTE(wq$$); 330 * w2$$ &= NBYTES(0x1d); 331 * w1$$ ^= w2$$; 332 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 333 * wq$$ = w1$$ ^ wd$$; 334 * wp$$ ^= wd$$; 335 */ 336 asm volatile (".option push\n" 337 ".option arch,+v\n" 338 "vsra.vi v2, v1, 7\n" 339 "vsll.vi v3, v1, 1\n" 340 "vand.vx v2, v2, %[x1d]\n" 341 "vxor.vv v3, v3, v2\n" 342 "vle8.v v2, (%[wd0])\n" 343 "vxor.vv v1, v3, v2\n" 344 "vxor.vv v0, v0, v2\n" 345 346 "vsra.vi v6, v5, 7\n" 347 "vsll.vi v7, v5, 1\n" 348 "vand.vx v6, v6, %[x1d]\n" 349 "vxor.vv v7, v7, v6\n" 350 "vle8.v v6, (%[wd1])\n" 351 "vxor.vv v5, v7, v6\n" 352 "vxor.vv v4, v4, v6\n" 353 ".option pop\n" 354 : : 355 [wd0]"r"(&dptr[z][d + 0 * nsize]), 356 [wd1]"r"(&dptr[z][d + 1 * nsize]), 357 [x1d]"r"(0x1d) 358 ); 359 } 360 361 /* P/Q left side optimization */ 362 for (z = start - 1; z >= 0; z--) { 363 /* 364 * w2$$ = MASK(wq$$); 365 * w1$$ = SHLBYTE(wq$$); 366 * w2$$ &= NBYTES(0x1d); 367 * wq$$ = w1$$ ^ w2$$; 368 */ 369 asm volatile (".option push\n" 370 ".option arch,+v\n" 371 "vsra.vi v2, v1, 7\n" 372 "vsll.vi v3, v1, 1\n" 373 "vand.vx v2, v2, %[x1d]\n" 374 "vxor.vv v1, v3, v2\n" 375 376 "vsra.vi v6, v5, 7\n" 377 "vsll.vi v7, v5, 1\n" 378 "vand.vx v6, v6, %[x1d]\n" 379 "vxor.vv v5, v7, v6\n" 380 ".option pop\n" 381 : : 382 [x1d]"r"(0x1d) 383 ); 384 } 385 386 /* 387 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 388 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 389 * v0:wp0, v1:wq0, v2:p0, v3:q0 390 * v4:wp1, v5:wq1, v6:p1, v7:q1 391 */ 392 asm volatile (".option push\n" 393 ".option arch,+v\n" 394 "vle8.v v2, (%[wp0])\n" 395 "vle8.v v3, (%[wq0])\n" 396 "vxor.vv v2, v2, v0\n" 397 "vxor.vv v3, v3, v1\n" 398 "vse8.v v2, (%[wp0])\n" 399 "vse8.v v3, (%[wq0])\n" 400 401 "vle8.v v6, (%[wp1])\n" 402 "vle8.v v7, (%[wq1])\n" 403 "vxor.vv v6, v6, v4\n" 404 "vxor.vv v7, v7, v5\n" 405 "vse8.v v6, (%[wp1])\n" 406 "vse8.v v7, (%[wq1])\n" 407 ".option pop\n" 408 : : 409 [wp0]"r"(&p[d + nsize * 0]), 410 [wq0]"r"(&q[d + nsize * 0]), 411 [wp1]"r"(&p[d + nsize * 1]), 412 [wq1]"r"(&q[d + nsize * 1]) 413 ); 414 } 415 } 416 417 static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 418 { 419 u8 **dptr = (u8 **)ptrs; 420 u8 *p, *q; 421 unsigned long vl, d, nsize; 422 int z, z0; 423 424 z0 = disks - 3; /* Highest data disk */ 425 p = dptr[z0 + 1]; /* XOR parity */ 426 q = dptr[z0 + 2]; /* RS syndrome */ 427 428 asm volatile (".option push\n" 429 ".option arch,+v\n" 430 "vsetvli %0, x0, e8, m1, ta, ma\n" 431 ".option pop\n" 432 : "=&r" (vl) 433 ); 434 435 nsize = vl; 436 437 /* 438 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 439 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 440 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 441 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 442 */ 443 for (d = 0; d < bytes; d += nsize * 4) { 444 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 445 asm volatile (".option push\n" 446 ".option arch,+v\n" 447 "vle8.v v0, (%[wp0])\n" 448 "vmv.v.v v1, v0\n" 449 "vle8.v v4, (%[wp1])\n" 450 "vmv.v.v v5, v4\n" 451 "vle8.v v8, (%[wp2])\n" 452 "vmv.v.v v9, v8\n" 453 "vle8.v v12, (%[wp3])\n" 454 "vmv.v.v v13, v12\n" 455 ".option pop\n" 456 : : 457 [wp0]"r"(&dptr[z0][d + 0 * nsize]), 458 [wp1]"r"(&dptr[z0][d + 1 * nsize]), 459 [wp2]"r"(&dptr[z0][d + 2 * nsize]), 460 [wp3]"r"(&dptr[z0][d + 3 * nsize]) 461 ); 462 463 for (z = z0 - 1; z >= 0; z--) { 464 /* 465 * w2$$ = MASK(wq$$); 466 * w1$$ = SHLBYTE(wq$$); 467 * w2$$ &= NBYTES(0x1d); 468 * w1$$ ^= w2$$; 469 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 470 * wq$$ = w1$$ ^ wd$$; 471 * wp$$ ^= wd$$; 472 */ 473 asm volatile (".option push\n" 474 ".option arch,+v\n" 475 "vsra.vi v2, v1, 7\n" 476 "vsll.vi v3, v1, 1\n" 477 "vand.vx v2, v2, %[x1d]\n" 478 "vxor.vv v3, v3, v2\n" 479 "vle8.v v2, (%[wd0])\n" 480 "vxor.vv v1, v3, v2\n" 481 "vxor.vv v0, v0, v2\n" 482 483 "vsra.vi v6, v5, 7\n" 484 "vsll.vi v7, v5, 1\n" 485 "vand.vx v6, v6, %[x1d]\n" 486 "vxor.vv v7, v7, v6\n" 487 "vle8.v v6, (%[wd1])\n" 488 "vxor.vv v5, v7, v6\n" 489 "vxor.vv v4, v4, v6\n" 490 491 "vsra.vi v10, v9, 7\n" 492 "vsll.vi v11, v9, 1\n" 493 "vand.vx v10, v10, %[x1d]\n" 494 "vxor.vv v11, v11, v10\n" 495 "vle8.v v10, (%[wd2])\n" 496 "vxor.vv v9, v11, v10\n" 497 "vxor.vv v8, v8, v10\n" 498 499 "vsra.vi v14, v13, 7\n" 500 "vsll.vi v15, v13, 1\n" 501 "vand.vx v14, v14, %[x1d]\n" 502 "vxor.vv v15, v15, v14\n" 503 "vle8.v v14, (%[wd3])\n" 504 "vxor.vv v13, v15, v14\n" 505 "vxor.vv v12, v12, v14\n" 506 ".option pop\n" 507 : : 508 [wd0]"r"(&dptr[z][d + 0 * nsize]), 509 [wd1]"r"(&dptr[z][d + 1 * nsize]), 510 [wd2]"r"(&dptr[z][d + 2 * nsize]), 511 [wd3]"r"(&dptr[z][d + 3 * nsize]), 512 [x1d]"r"(0x1d) 513 ); 514 } 515 516 /* 517 * *(unative_t *)&p[d+NSIZE*$$] = wp$$; 518 * *(unative_t *)&q[d+NSIZE*$$] = wq$$; 519 */ 520 asm volatile (".option push\n" 521 ".option arch,+v\n" 522 "vse8.v v0, (%[wp0])\n" 523 "vse8.v v1, (%[wq0])\n" 524 "vse8.v v4, (%[wp1])\n" 525 "vse8.v v5, (%[wq1])\n" 526 "vse8.v v8, (%[wp2])\n" 527 "vse8.v v9, (%[wq2])\n" 528 "vse8.v v12, (%[wp3])\n" 529 "vse8.v v13, (%[wq3])\n" 530 ".option pop\n" 531 : : 532 [wp0]"r"(&p[d + nsize * 0]), 533 [wq0]"r"(&q[d + nsize * 0]), 534 [wp1]"r"(&p[d + nsize * 1]), 535 [wq1]"r"(&q[d + nsize * 1]), 536 [wp2]"r"(&p[d + nsize * 2]), 537 [wq2]"r"(&q[d + nsize * 2]), 538 [wp3]"r"(&p[d + nsize * 3]), 539 [wq3]"r"(&q[d + nsize * 3]) 540 ); 541 } 542 } 543 544 static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, 545 unsigned long bytes, void **ptrs) 546 { 547 u8 **dptr = (u8 **)ptrs; 548 u8 *p, *q; 549 unsigned long vl, d, nsize; 550 int z, z0; 551 552 z0 = stop; /* P/Q right side optimization */ 553 p = dptr[disks - 2]; /* XOR parity */ 554 q = dptr[disks - 1]; /* RS syndrome */ 555 556 asm volatile (".option push\n" 557 ".option arch,+v\n" 558 "vsetvli %0, x0, e8, m1, ta, ma\n" 559 ".option pop\n" 560 : "=&r" (vl) 561 ); 562 563 nsize = vl; 564 565 /* 566 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 567 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 568 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 569 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 570 */ 571 for (d = 0; d < bytes; d += nsize * 4) { 572 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 573 asm volatile (".option push\n" 574 ".option arch,+v\n" 575 "vle8.v v0, (%[wp0])\n" 576 "vmv.v.v v1, v0\n" 577 "vle8.v v4, (%[wp1])\n" 578 "vmv.v.v v5, v4\n" 579 "vle8.v v8, (%[wp2])\n" 580 "vmv.v.v v9, v8\n" 581 "vle8.v v12, (%[wp3])\n" 582 "vmv.v.v v13, v12\n" 583 ".option pop\n" 584 : : 585 [wp0]"r"(&dptr[z0][d + 0 * nsize]), 586 [wp1]"r"(&dptr[z0][d + 1 * nsize]), 587 [wp2]"r"(&dptr[z0][d + 2 * nsize]), 588 [wp3]"r"(&dptr[z0][d + 3 * nsize]) 589 ); 590 591 /* P/Q data pages */ 592 for (z = z0 - 1; z >= start; z--) { 593 /* 594 * w2$$ = MASK(wq$$); 595 * w1$$ = SHLBYTE(wq$$); 596 * w2$$ &= NBYTES(0x1d); 597 * w1$$ ^= w2$$; 598 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 599 * wq$$ = w1$$ ^ wd$$; 600 * wp$$ ^= wd$$; 601 */ 602 asm volatile (".option push\n" 603 ".option arch,+v\n" 604 "vsra.vi v2, v1, 7\n" 605 "vsll.vi v3, v1, 1\n" 606 "vand.vx v2, v2, %[x1d]\n" 607 "vxor.vv v3, v3, v2\n" 608 "vle8.v v2, (%[wd0])\n" 609 "vxor.vv v1, v3, v2\n" 610 "vxor.vv v0, v0, v2\n" 611 612 "vsra.vi v6, v5, 7\n" 613 "vsll.vi v7, v5, 1\n" 614 "vand.vx v6, v6, %[x1d]\n" 615 "vxor.vv v7, v7, v6\n" 616 "vle8.v v6, (%[wd1])\n" 617 "vxor.vv v5, v7, v6\n" 618 "vxor.vv v4, v4, v6\n" 619 620 "vsra.vi v10, v9, 7\n" 621 "vsll.vi v11, v9, 1\n" 622 "vand.vx v10, v10, %[x1d]\n" 623 "vxor.vv v11, v11, v10\n" 624 "vle8.v v10, (%[wd2])\n" 625 "vxor.vv v9, v11, v10\n" 626 "vxor.vv v8, v8, v10\n" 627 628 "vsra.vi v14, v13, 7\n" 629 "vsll.vi v15, v13, 1\n" 630 "vand.vx v14, v14, %[x1d]\n" 631 "vxor.vv v15, v15, v14\n" 632 "vle8.v v14, (%[wd3])\n" 633 "vxor.vv v13, v15, v14\n" 634 "vxor.vv v12, v12, v14\n" 635 ".option pop\n" 636 : : 637 [wd0]"r"(&dptr[z][d + 0 * nsize]), 638 [wd1]"r"(&dptr[z][d + 1 * nsize]), 639 [wd2]"r"(&dptr[z][d + 2 * nsize]), 640 [wd3]"r"(&dptr[z][d + 3 * nsize]), 641 [x1d]"r"(0x1d) 642 ); 643 } 644 645 /* P/Q left side optimization */ 646 for (z = start - 1; z >= 0; z--) { 647 /* 648 * w2$$ = MASK(wq$$); 649 * w1$$ = SHLBYTE(wq$$); 650 * w2$$ &= NBYTES(0x1d); 651 * wq$$ = w1$$ ^ w2$$; 652 */ 653 asm volatile (".option push\n" 654 ".option arch,+v\n" 655 "vsra.vi v2, v1, 7\n" 656 "vsll.vi v3, v1, 1\n" 657 "vand.vx v2, v2, %[x1d]\n" 658 "vxor.vv v1, v3, v2\n" 659 660 "vsra.vi v6, v5, 7\n" 661 "vsll.vi v7, v5, 1\n" 662 "vand.vx v6, v6, %[x1d]\n" 663 "vxor.vv v5, v7, v6\n" 664 665 "vsra.vi v10, v9, 7\n" 666 "vsll.vi v11, v9, 1\n" 667 "vand.vx v10, v10, %[x1d]\n" 668 "vxor.vv v9, v11, v10\n" 669 670 "vsra.vi v14, v13, 7\n" 671 "vsll.vi v15, v13, 1\n" 672 "vand.vx v14, v14, %[x1d]\n" 673 "vxor.vv v13, v15, v14\n" 674 ".option pop\n" 675 : : 676 [x1d]"r"(0x1d) 677 ); 678 } 679 680 /* 681 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 682 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 683 * v0:wp0, v1:wq0, v2:p0, v3:q0 684 * v4:wp1, v5:wq1, v6:p1, v7:q1 685 * v8:wp2, v9:wq2, v10:p2, v11:q2 686 * v12:wp3, v13:wq3, v14:p3, v15:q3 687 */ 688 asm volatile (".option push\n" 689 ".option arch,+v\n" 690 "vle8.v v2, (%[wp0])\n" 691 "vle8.v v3, (%[wq0])\n" 692 "vxor.vv v2, v2, v0\n" 693 "vxor.vv v3, v3, v1\n" 694 "vse8.v v2, (%[wp0])\n" 695 "vse8.v v3, (%[wq0])\n" 696 697 "vle8.v v6, (%[wp1])\n" 698 "vle8.v v7, (%[wq1])\n" 699 "vxor.vv v6, v6, v4\n" 700 "vxor.vv v7, v7, v5\n" 701 "vse8.v v6, (%[wp1])\n" 702 "vse8.v v7, (%[wq1])\n" 703 704 "vle8.v v10, (%[wp2])\n" 705 "vle8.v v11, (%[wq2])\n" 706 "vxor.vv v10, v10, v8\n" 707 "vxor.vv v11, v11, v9\n" 708 "vse8.v v10, (%[wp2])\n" 709 "vse8.v v11, (%[wq2])\n" 710 711 "vle8.v v14, (%[wp3])\n" 712 "vle8.v v15, (%[wq3])\n" 713 "vxor.vv v14, v14, v12\n" 714 "vxor.vv v15, v15, v13\n" 715 "vse8.v v14, (%[wp3])\n" 716 "vse8.v v15, (%[wq3])\n" 717 ".option pop\n" 718 : : 719 [wp0]"r"(&p[d + nsize * 0]), 720 [wq0]"r"(&q[d + nsize * 0]), 721 [wp1]"r"(&p[d + nsize * 1]), 722 [wq1]"r"(&q[d + nsize * 1]), 723 [wp2]"r"(&p[d + nsize * 2]), 724 [wq2]"r"(&q[d + nsize * 2]), 725 [wp3]"r"(&p[d + nsize * 3]), 726 [wq3]"r"(&q[d + nsize * 3]) 727 ); 728 } 729 } 730 731 static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 732 { 733 u8 **dptr = (u8 **)ptrs; 734 u8 *p, *q; 735 unsigned long vl, d, nsize; 736 int z, z0; 737 738 z0 = disks - 3; /* Highest data disk */ 739 p = dptr[z0 + 1]; /* XOR parity */ 740 q = dptr[z0 + 2]; /* RS syndrome */ 741 742 asm volatile (".option push\n" 743 ".option arch,+v\n" 744 "vsetvli %0, x0, e8, m1, ta, ma\n" 745 ".option pop\n" 746 : "=&r" (vl) 747 ); 748 749 nsize = vl; 750 751 /* 752 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 753 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 754 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 755 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 756 * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14 757 * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15 758 * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16 759 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17 760 */ 761 for (d = 0; d < bytes; d += nsize * 8) { 762 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 763 asm volatile (".option push\n" 764 ".option arch,+v\n" 765 "vle8.v v0, (%[wp0])\n" 766 "vmv.v.v v1, v0\n" 767 "vle8.v v4, (%[wp1])\n" 768 "vmv.v.v v5, v4\n" 769 "vle8.v v8, (%[wp2])\n" 770 "vmv.v.v v9, v8\n" 771 "vle8.v v12, (%[wp3])\n" 772 "vmv.v.v v13, v12\n" 773 "vle8.v v16, (%[wp4])\n" 774 "vmv.v.v v17, v16\n" 775 "vle8.v v20, (%[wp5])\n" 776 "vmv.v.v v21, v20\n" 777 "vle8.v v24, (%[wp6])\n" 778 "vmv.v.v v25, v24\n" 779 "vle8.v v28, (%[wp7])\n" 780 "vmv.v.v v29, v28\n" 781 ".option pop\n" 782 : : 783 [wp0]"r"(&dptr[z0][d + 0 * nsize]), 784 [wp1]"r"(&dptr[z0][d + 1 * nsize]), 785 [wp2]"r"(&dptr[z0][d + 2 * nsize]), 786 [wp3]"r"(&dptr[z0][d + 3 * nsize]), 787 [wp4]"r"(&dptr[z0][d + 4 * nsize]), 788 [wp5]"r"(&dptr[z0][d + 5 * nsize]), 789 [wp6]"r"(&dptr[z0][d + 6 * nsize]), 790 [wp7]"r"(&dptr[z0][d + 7 * nsize]) 791 ); 792 793 for (z = z0 - 1; z >= 0; z--) { 794 /* 795 * w2$$ = MASK(wq$$); 796 * w1$$ = SHLBYTE(wq$$); 797 * w2$$ &= NBYTES(0x1d); 798 * w1$$ ^= w2$$; 799 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 800 * wq$$ = w1$$ ^ wd$$; 801 * wp$$ ^= wd$$; 802 */ 803 asm volatile (".option push\n" 804 ".option arch,+v\n" 805 "vsra.vi v2, v1, 7\n" 806 "vsll.vi v3, v1, 1\n" 807 "vand.vx v2, v2, %[x1d]\n" 808 "vxor.vv v3, v3, v2\n" 809 "vle8.v v2, (%[wd0])\n" 810 "vxor.vv v1, v3, v2\n" 811 "vxor.vv v0, v0, v2\n" 812 813 "vsra.vi v6, v5, 7\n" 814 "vsll.vi v7, v5, 1\n" 815 "vand.vx v6, v6, %[x1d]\n" 816 "vxor.vv v7, v7, v6\n" 817 "vle8.v v6, (%[wd1])\n" 818 "vxor.vv v5, v7, v6\n" 819 "vxor.vv v4, v4, v6\n" 820 821 "vsra.vi v10, v9, 7\n" 822 "vsll.vi v11, v9, 1\n" 823 "vand.vx v10, v10, %[x1d]\n" 824 "vxor.vv v11, v11, v10\n" 825 "vle8.v v10, (%[wd2])\n" 826 "vxor.vv v9, v11, v10\n" 827 "vxor.vv v8, v8, v10\n" 828 829 "vsra.vi v14, v13, 7\n" 830 "vsll.vi v15, v13, 1\n" 831 "vand.vx v14, v14, %[x1d]\n" 832 "vxor.vv v15, v15, v14\n" 833 "vle8.v v14, (%[wd3])\n" 834 "vxor.vv v13, v15, v14\n" 835 "vxor.vv v12, v12, v14\n" 836 837 "vsra.vi v18, v17, 7\n" 838 "vsll.vi v19, v17, 1\n" 839 "vand.vx v18, v18, %[x1d]\n" 840 "vxor.vv v19, v19, v18\n" 841 "vle8.v v18, (%[wd4])\n" 842 "vxor.vv v17, v19, v18\n" 843 "vxor.vv v16, v16, v18\n" 844 845 "vsra.vi v22, v21, 7\n" 846 "vsll.vi v23, v21, 1\n" 847 "vand.vx v22, v22, %[x1d]\n" 848 "vxor.vv v23, v23, v22\n" 849 "vle8.v v22, (%[wd5])\n" 850 "vxor.vv v21, v23, v22\n" 851 "vxor.vv v20, v20, v22\n" 852 853 "vsra.vi v26, v25, 7\n" 854 "vsll.vi v27, v25, 1\n" 855 "vand.vx v26, v26, %[x1d]\n" 856 "vxor.vv v27, v27, v26\n" 857 "vle8.v v26, (%[wd6])\n" 858 "vxor.vv v25, v27, v26\n" 859 "vxor.vv v24, v24, v26\n" 860 861 "vsra.vi v30, v29, 7\n" 862 "vsll.vi v31, v29, 1\n" 863 "vand.vx v30, v30, %[x1d]\n" 864 "vxor.vv v31, v31, v30\n" 865 "vle8.v v30, (%[wd7])\n" 866 "vxor.vv v29, v31, v30\n" 867 "vxor.vv v28, v28, v30\n" 868 ".option pop\n" 869 : : 870 [wd0]"r"(&dptr[z][d + 0 * nsize]), 871 [wd1]"r"(&dptr[z][d + 1 * nsize]), 872 [wd2]"r"(&dptr[z][d + 2 * nsize]), 873 [wd3]"r"(&dptr[z][d + 3 * nsize]), 874 [wd4]"r"(&dptr[z][d + 4 * nsize]), 875 [wd5]"r"(&dptr[z][d + 5 * nsize]), 876 [wd6]"r"(&dptr[z][d + 6 * nsize]), 877 [wd7]"r"(&dptr[z][d + 7 * nsize]), 878 [x1d]"r"(0x1d) 879 ); 880 } 881 882 /* 883 * *(unative_t *)&p[d+NSIZE*$$] = wp$$; 884 * *(unative_t *)&q[d+NSIZE*$$] = wq$$; 885 */ 886 asm volatile (".option push\n" 887 ".option arch,+v\n" 888 "vse8.v v0, (%[wp0])\n" 889 "vse8.v v1, (%[wq0])\n" 890 "vse8.v v4, (%[wp1])\n" 891 "vse8.v v5, (%[wq1])\n" 892 "vse8.v v8, (%[wp2])\n" 893 "vse8.v v9, (%[wq2])\n" 894 "vse8.v v12, (%[wp3])\n" 895 "vse8.v v13, (%[wq3])\n" 896 "vse8.v v16, (%[wp4])\n" 897 "vse8.v v17, (%[wq4])\n" 898 "vse8.v v20, (%[wp5])\n" 899 "vse8.v v21, (%[wq5])\n" 900 "vse8.v v24, (%[wp6])\n" 901 "vse8.v v25, (%[wq6])\n" 902 "vse8.v v28, (%[wp7])\n" 903 "vse8.v v29, (%[wq7])\n" 904 ".option pop\n" 905 : : 906 [wp0]"r"(&p[d + nsize * 0]), 907 [wq0]"r"(&q[d + nsize * 0]), 908 [wp1]"r"(&p[d + nsize * 1]), 909 [wq1]"r"(&q[d + nsize * 1]), 910 [wp2]"r"(&p[d + nsize * 2]), 911 [wq2]"r"(&q[d + nsize * 2]), 912 [wp3]"r"(&p[d + nsize * 3]), 913 [wq3]"r"(&q[d + nsize * 3]), 914 [wp4]"r"(&p[d + nsize * 4]), 915 [wq4]"r"(&q[d + nsize * 4]), 916 [wp5]"r"(&p[d + nsize * 5]), 917 [wq5]"r"(&q[d + nsize * 5]), 918 [wp6]"r"(&p[d + nsize * 6]), 919 [wq6]"r"(&q[d + nsize * 6]), 920 [wp7]"r"(&p[d + nsize * 7]), 921 [wq7]"r"(&q[d + nsize * 7]) 922 ); 923 } 924 } 925 926 static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, 927 unsigned long bytes, void **ptrs) 928 { 929 u8 **dptr = (u8 **)ptrs; 930 u8 *p, *q; 931 unsigned long vl, d, nsize; 932 int z, z0; 933 934 z0 = stop; /* P/Q right side optimization */ 935 p = dptr[disks - 2]; /* XOR parity */ 936 q = dptr[disks - 1]; /* RS syndrome */ 937 938 asm volatile (".option push\n" 939 ".option arch,+v\n" 940 "vsetvli %0, x0, e8, m1, ta, ma\n" 941 ".option pop\n" 942 : "=&r" (vl) 943 ); 944 945 nsize = vl; 946 947 /* 948 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 949 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 950 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 951 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 952 * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14 953 * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15 954 * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16 955 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17 956 */ 957 for (d = 0; d < bytes; d += nsize * 8) { 958 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 959 asm volatile (".option push\n" 960 ".option arch,+v\n" 961 "vle8.v v0, (%[wp0])\n" 962 "vmv.v.v v1, v0\n" 963 "vle8.v v4, (%[wp1])\n" 964 "vmv.v.v v5, v4\n" 965 "vle8.v v8, (%[wp2])\n" 966 "vmv.v.v v9, v8\n" 967 "vle8.v v12, (%[wp3])\n" 968 "vmv.v.v v13, v12\n" 969 "vle8.v v16, (%[wp4])\n" 970 "vmv.v.v v17, v16\n" 971 "vle8.v v20, (%[wp5])\n" 972 "vmv.v.v v21, v20\n" 973 "vle8.v v24, (%[wp6])\n" 974 "vmv.v.v v25, v24\n" 975 "vle8.v v28, (%[wp7])\n" 976 "vmv.v.v v29, v28\n" 977 ".option pop\n" 978 : : 979 [wp0]"r"(&dptr[z0][d + 0 * nsize]), 980 [wp1]"r"(&dptr[z0][d + 1 * nsize]), 981 [wp2]"r"(&dptr[z0][d + 2 * nsize]), 982 [wp3]"r"(&dptr[z0][d + 3 * nsize]), 983 [wp4]"r"(&dptr[z0][d + 4 * nsize]), 984 [wp5]"r"(&dptr[z0][d + 5 * nsize]), 985 [wp6]"r"(&dptr[z0][d + 6 * nsize]), 986 [wp7]"r"(&dptr[z0][d + 7 * nsize]) 987 ); 988 989 /* P/Q data pages */ 990 for (z = z0 - 1; z >= start; z--) { 991 /* 992 * w2$$ = MASK(wq$$); 993 * w1$$ = SHLBYTE(wq$$); 994 * w2$$ &= NBYTES(0x1d); 995 * w1$$ ^= w2$$; 996 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 997 * wq$$ = w1$$ ^ wd$$; 998 * wp$$ ^= wd$$; 999 */ 1000 asm volatile (".option push\n" 1001 ".option arch,+v\n" 1002 "vsra.vi v2, v1, 7\n" 1003 "vsll.vi v3, v1, 1\n" 1004 "vand.vx v2, v2, %[x1d]\n" 1005 "vxor.vv v3, v3, v2\n" 1006 "vle8.v v2, (%[wd0])\n" 1007 "vxor.vv v1, v3, v2\n" 1008 "vxor.vv v0, v0, v2\n" 1009 1010 "vsra.vi v6, v5, 7\n" 1011 "vsll.vi v7, v5, 1\n" 1012 "vand.vx v6, v6, %[x1d]\n" 1013 "vxor.vv v7, v7, v6\n" 1014 "vle8.v v6, (%[wd1])\n" 1015 "vxor.vv v5, v7, v6\n" 1016 "vxor.vv v4, v4, v6\n" 1017 1018 "vsra.vi v10, v9, 7\n" 1019 "vsll.vi v11, v9, 1\n" 1020 "vand.vx v10, v10, %[x1d]\n" 1021 "vxor.vv v11, v11, v10\n" 1022 "vle8.v v10, (%[wd2])\n" 1023 "vxor.vv v9, v11, v10\n" 1024 "vxor.vv v8, v8, v10\n" 1025 1026 "vsra.vi v14, v13, 7\n" 1027 "vsll.vi v15, v13, 1\n" 1028 "vand.vx v14, v14, %[x1d]\n" 1029 "vxor.vv v15, v15, v14\n" 1030 "vle8.v v14, (%[wd3])\n" 1031 "vxor.vv v13, v15, v14\n" 1032 "vxor.vv v12, v12, v14\n" 1033 1034 "vsra.vi v18, v17, 7\n" 1035 "vsll.vi v19, v17, 1\n" 1036 "vand.vx v18, v18, %[x1d]\n" 1037 "vxor.vv v19, v19, v18\n" 1038 "vle8.v v18, (%[wd4])\n" 1039 "vxor.vv v17, v19, v18\n" 1040 "vxor.vv v16, v16, v18\n" 1041 1042 "vsra.vi v22, v21, 7\n" 1043 "vsll.vi v23, v21, 1\n" 1044 "vand.vx v22, v22, %[x1d]\n" 1045 "vxor.vv v23, v23, v22\n" 1046 "vle8.v v22, (%[wd5])\n" 1047 "vxor.vv v21, v23, v22\n" 1048 "vxor.vv v20, v20, v22\n" 1049 1050 "vsra.vi v26, v25, 7\n" 1051 "vsll.vi v27, v25, 1\n" 1052 "vand.vx v26, v26, %[x1d]\n" 1053 "vxor.vv v27, v27, v26\n" 1054 "vle8.v v26, (%[wd6])\n" 1055 "vxor.vv v25, v27, v26\n" 1056 "vxor.vv v24, v24, v26\n" 1057 1058 "vsra.vi v30, v29, 7\n" 1059 "vsll.vi v31, v29, 1\n" 1060 "vand.vx v30, v30, %[x1d]\n" 1061 "vxor.vv v31, v31, v30\n" 1062 "vle8.v v30, (%[wd7])\n" 1063 "vxor.vv v29, v31, v30\n" 1064 "vxor.vv v28, v28, v30\n" 1065 ".option pop\n" 1066 : : 1067 [wd0]"r"(&dptr[z][d + 0 * nsize]), 1068 [wd1]"r"(&dptr[z][d + 1 * nsize]), 1069 [wd2]"r"(&dptr[z][d + 2 * nsize]), 1070 [wd3]"r"(&dptr[z][d + 3 * nsize]), 1071 [wd4]"r"(&dptr[z][d + 4 * nsize]), 1072 [wd5]"r"(&dptr[z][d + 5 * nsize]), 1073 [wd6]"r"(&dptr[z][d + 6 * nsize]), 1074 [wd7]"r"(&dptr[z][d + 7 * nsize]), 1075 [x1d]"r"(0x1d) 1076 ); 1077 } 1078 1079 /* P/Q left side optimization */ 1080 for (z = start - 1; z >= 0; z--) { 1081 /* 1082 * w2$$ = MASK(wq$$); 1083 * w1$$ = SHLBYTE(wq$$); 1084 * w2$$ &= NBYTES(0x1d); 1085 * wq$$ = w1$$ ^ w2$$; 1086 */ 1087 asm volatile (".option push\n" 1088 ".option arch,+v\n" 1089 "vsra.vi v2, v1, 7\n" 1090 "vsll.vi v3, v1, 1\n" 1091 "vand.vx v2, v2, %[x1d]\n" 1092 "vxor.vv v1, v3, v2\n" 1093 1094 "vsra.vi v6, v5, 7\n" 1095 "vsll.vi v7, v5, 1\n" 1096 "vand.vx v6, v6, %[x1d]\n" 1097 "vxor.vv v5, v7, v6\n" 1098 1099 "vsra.vi v10, v9, 7\n" 1100 "vsll.vi v11, v9, 1\n" 1101 "vand.vx v10, v10, %[x1d]\n" 1102 "vxor.vv v9, v11, v10\n" 1103 1104 "vsra.vi v14, v13, 7\n" 1105 "vsll.vi v15, v13, 1\n" 1106 "vand.vx v14, v14, %[x1d]\n" 1107 "vxor.vv v13, v15, v14\n" 1108 1109 "vsra.vi v18, v17, 7\n" 1110 "vsll.vi v19, v17, 1\n" 1111 "vand.vx v18, v18, %[x1d]\n" 1112 "vxor.vv v17, v19, v18\n" 1113 1114 "vsra.vi v22, v21, 7\n" 1115 "vsll.vi v23, v21, 1\n" 1116 "vand.vx v22, v22, %[x1d]\n" 1117 "vxor.vv v21, v23, v22\n" 1118 1119 "vsra.vi v26, v25, 7\n" 1120 "vsll.vi v27, v25, 1\n" 1121 "vand.vx v26, v26, %[x1d]\n" 1122 "vxor.vv v25, v27, v26\n" 1123 1124 "vsra.vi v30, v29, 7\n" 1125 "vsll.vi v31, v29, 1\n" 1126 "vand.vx v30, v30, %[x1d]\n" 1127 "vxor.vv v29, v31, v30\n" 1128 ".option pop\n" 1129 : : 1130 [x1d]"r"(0x1d) 1131 ); 1132 } 1133 1134 /* 1135 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 1136 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 1137 * v0:wp0, v1:wq0, v2:p0, v3:q0 1138 * v4:wp1, v5:wq1, v6:p1, v7:q1 1139 * v8:wp2, v9:wq2, v10:p2, v11:q2 1140 * v12:wp3, v13:wq3, v14:p3, v15:q3 1141 * v16:wp4, v17:wq4, v18:p4, v19:q4 1142 * v20:wp5, v21:wq5, v22:p5, v23:q5 1143 * v24:wp6, v25:wq6, v26:p6, v27:q6 1144 * v28:wp7, v29:wq7, v30:p7, v31:q7 1145 */ 1146 asm volatile (".option push\n" 1147 ".option arch,+v\n" 1148 "vle8.v v2, (%[wp0])\n" 1149 "vle8.v v3, (%[wq0])\n" 1150 "vxor.vv v2, v2, v0\n" 1151 "vxor.vv v3, v3, v1\n" 1152 "vse8.v v2, (%[wp0])\n" 1153 "vse8.v v3, (%[wq0])\n" 1154 1155 "vle8.v v6, (%[wp1])\n" 1156 "vle8.v v7, (%[wq1])\n" 1157 "vxor.vv v6, v6, v4\n" 1158 "vxor.vv v7, v7, v5\n" 1159 "vse8.v v6, (%[wp1])\n" 1160 "vse8.v v7, (%[wq1])\n" 1161 1162 "vle8.v v10, (%[wp2])\n" 1163 "vle8.v v11, (%[wq2])\n" 1164 "vxor.vv v10, v10, v8\n" 1165 "vxor.vv v11, v11, v9\n" 1166 "vse8.v v10, (%[wp2])\n" 1167 "vse8.v v11, (%[wq2])\n" 1168 1169 "vle8.v v14, (%[wp3])\n" 1170 "vle8.v v15, (%[wq3])\n" 1171 "vxor.vv v14, v14, v12\n" 1172 "vxor.vv v15, v15, v13\n" 1173 "vse8.v v14, (%[wp3])\n" 1174 "vse8.v v15, (%[wq3])\n" 1175 1176 "vle8.v v18, (%[wp4])\n" 1177 "vle8.v v19, (%[wq4])\n" 1178 "vxor.vv v18, v18, v16\n" 1179 "vxor.vv v19, v19, v17\n" 1180 "vse8.v v18, (%[wp4])\n" 1181 "vse8.v v19, (%[wq4])\n" 1182 1183 "vle8.v v22, (%[wp5])\n" 1184 "vle8.v v23, (%[wq5])\n" 1185 "vxor.vv v22, v22, v20\n" 1186 "vxor.vv v23, v23, v21\n" 1187 "vse8.v v22, (%[wp5])\n" 1188 "vse8.v v23, (%[wq5])\n" 1189 1190 "vle8.v v26, (%[wp6])\n" 1191 "vle8.v v27, (%[wq6])\n" 1192 "vxor.vv v26, v26, v24\n" 1193 "vxor.vv v27, v27, v25\n" 1194 "vse8.v v26, (%[wp6])\n" 1195 "vse8.v v27, (%[wq6])\n" 1196 1197 "vle8.v v30, (%[wp7])\n" 1198 "vle8.v v31, (%[wq7])\n" 1199 "vxor.vv v30, v30, v28\n" 1200 "vxor.vv v31, v31, v29\n" 1201 "vse8.v v30, (%[wp7])\n" 1202 "vse8.v v31, (%[wq7])\n" 1203 ".option pop\n" 1204 : : 1205 [wp0]"r"(&p[d + nsize * 0]), 1206 [wq0]"r"(&q[d + nsize * 0]), 1207 [wp1]"r"(&p[d + nsize * 1]), 1208 [wq1]"r"(&q[d + nsize * 1]), 1209 [wp2]"r"(&p[d + nsize * 2]), 1210 [wq2]"r"(&q[d + nsize * 2]), 1211 [wp3]"r"(&p[d + nsize * 3]), 1212 [wq3]"r"(&q[d + nsize * 3]), 1213 [wp4]"r"(&p[d + nsize * 4]), 1214 [wq4]"r"(&q[d + nsize * 4]), 1215 [wp5]"r"(&p[d + nsize * 5]), 1216 [wq5]"r"(&q[d + nsize * 5]), 1217 [wp6]"r"(&p[d + nsize * 6]), 1218 [wq6]"r"(&q[d + nsize * 6]), 1219 [wp7]"r"(&p[d + nsize * 7]), 1220 [wq7]"r"(&q[d + nsize * 7]) 1221 ); 1222 } 1223 } 1224 1225 RAID6_RVV_WRAPPER(1); 1226 RAID6_RVV_WRAPPER(2); 1227 RAID6_RVV_WRAPPER(4); 1228 RAID6_RVV_WRAPPER(8); 1229