1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * RAID-6 syndrome calculation using RISC-V vector instructions 4 * 5 * Copyright 2024 Institute of Software, CAS. 6 * Author: Chunyan Zhang <zhangchunyan@iscas.ac.cn> 7 * 8 * Based on neon.uc: 9 * Copyright 2002-2004 H. Peter Anvin 10 */ 11 12 #include <asm/vector.h> 13 #include <linux/raid/pq.h> 14 #include "rvv.h" 15 16 #define NSIZE (riscv_v_vsize / 32) /* NSIZE = vlenb */ 17 18 static int rvv_has_vector(void) 19 { 20 return has_vector(); 21 } 22 23 static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 24 { 25 u8 **dptr = (u8 **)ptrs; 26 u8 *p, *q; 27 unsigned long vl, d; 28 int z, z0; 29 30 z0 = disks - 3; /* Highest data disk */ 31 p = dptr[z0 + 1]; /* XOR parity */ 32 q = dptr[z0 + 2]; /* RS syndrome */ 33 34 asm volatile (".option push\n" 35 ".option arch,+v\n" 36 "vsetvli %0, x0, e8, m1, ta, ma\n" 37 ".option pop\n" 38 : "=&r" (vl) 39 ); 40 41 /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ 42 for (d = 0; d < bytes; d += NSIZE * 1) { 43 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 44 asm volatile (".option push\n" 45 ".option arch,+v\n" 46 "vle8.v v0, (%[wp0])\n" 47 "vmv.v.v v1, v0\n" 48 ".option pop\n" 49 : : 50 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) 51 ); 52 53 for (z = z0 - 1 ; z >= 0 ; z--) { 54 /* 55 * w2$$ = MASK(wq$$); 56 * w1$$ = SHLBYTE(wq$$); 57 * w2$$ &= NBYTES(0x1d); 58 * w1$$ ^= w2$$; 59 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 60 * wq$$ = w1$$ ^ wd$$; 61 * wp$$ ^= wd$$; 62 */ 63 asm volatile (".option push\n" 64 ".option arch,+v\n" 65 "vsra.vi v2, v1, 7\n" 66 "vsll.vi v3, v1, 1\n" 67 "vand.vx v2, v2, %[x1d]\n" 68 "vxor.vv v3, v3, v2\n" 69 "vle8.v v2, (%[wd0])\n" 70 "vxor.vv v1, v3, v2\n" 71 "vxor.vv v0, v0, v2\n" 72 ".option pop\n" 73 : : 74 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 75 [x1d]"r"(0x1d) 76 ); 77 } 78 79 /* 80 * *(unative_t *)&p[d+NSIZE*$$] = wp$$; 81 * *(unative_t *)&q[d+NSIZE*$$] = wq$$; 82 */ 83 asm volatile (".option push\n" 84 ".option arch,+v\n" 85 "vse8.v v0, (%[wp0])\n" 86 "vse8.v v1, (%[wq0])\n" 87 ".option pop\n" 88 : : 89 [wp0]"r"(&p[d + NSIZE * 0]), 90 [wq0]"r"(&q[d + NSIZE * 0]) 91 ); 92 } 93 } 94 95 static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, 96 unsigned long bytes, void **ptrs) 97 { 98 u8 **dptr = (u8 **)ptrs; 99 u8 *p, *q; 100 unsigned long vl, d; 101 int z, z0; 102 103 z0 = stop; /* P/Q right side optimization */ 104 p = dptr[disks - 2]; /* XOR parity */ 105 q = dptr[disks - 1]; /* RS syndrome */ 106 107 asm volatile (".option push\n" 108 ".option arch,+v\n" 109 "vsetvli %0, x0, e8, m1, ta, ma\n" 110 ".option pop\n" 111 : "=&r" (vl) 112 ); 113 114 /* v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 */ 115 for (d = 0 ; d < bytes ; d += NSIZE * 1) { 116 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 117 asm volatile (".option push\n" 118 ".option arch,+v\n" 119 "vle8.v v0, (%[wp0])\n" 120 "vmv.v.v v1, v0\n" 121 ".option pop\n" 122 : : 123 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) 124 ); 125 126 /* P/Q data pages */ 127 for (z = z0 - 1; z >= start; z--) { 128 /* 129 * w2$$ = MASK(wq$$); 130 * w1$$ = SHLBYTE(wq$$); 131 * w2$$ &= NBYTES(0x1d); 132 * w1$$ ^= w2$$; 133 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 134 * wq$$ = w1$$ ^ wd$$; 135 * wp$$ ^= wd$$; 136 */ 137 asm volatile (".option push\n" 138 ".option arch,+v\n" 139 "vsra.vi v2, v1, 7\n" 140 "vsll.vi v3, v1, 1\n" 141 "vand.vx v2, v2, %[x1d]\n" 142 "vxor.vv v3, v3, v2\n" 143 "vle8.v v2, (%[wd0])\n" 144 "vxor.vv v1, v3, v2\n" 145 "vxor.vv v0, v0, v2\n" 146 ".option pop\n" 147 : : 148 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 149 [x1d]"r"(0x1d) 150 ); 151 } 152 153 /* P/Q left side optimization */ 154 for (z = start - 1; z >= 0; z--) { 155 /* 156 * w2$$ = MASK(wq$$); 157 * w1$$ = SHLBYTE(wq$$); 158 * w2$$ &= NBYTES(0x1d); 159 * wq$$ = w1$$ ^ w2$$; 160 */ 161 asm volatile (".option push\n" 162 ".option arch,+v\n" 163 "vsra.vi v2, v1, 7\n" 164 "vsll.vi v3, v1, 1\n" 165 "vand.vx v2, v2, %[x1d]\n" 166 "vxor.vv v1, v3, v2\n" 167 ".option pop\n" 168 : : 169 [x1d]"r"(0x1d) 170 ); 171 } 172 173 /* 174 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 175 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 176 * v0:wp0, v1:wq0, v2:p0, v3:q0 177 */ 178 asm volatile (".option push\n" 179 ".option arch,+v\n" 180 "vle8.v v2, (%[wp0])\n" 181 "vle8.v v3, (%[wq0])\n" 182 "vxor.vv v2, v2, v0\n" 183 "vxor.vv v3, v3, v1\n" 184 "vse8.v v2, (%[wp0])\n" 185 "vse8.v v3, (%[wq0])\n" 186 ".option pop\n" 187 : : 188 [wp0]"r"(&p[d + NSIZE * 0]), 189 [wq0]"r"(&q[d + NSIZE * 0]) 190 ); 191 } 192 } 193 194 static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 195 { 196 u8 **dptr = (u8 **)ptrs; 197 u8 *p, *q; 198 unsigned long vl, d; 199 int z, z0; 200 201 z0 = disks - 3; /* Highest data disk */ 202 p = dptr[z0 + 1]; /* XOR parity */ 203 q = dptr[z0 + 2]; /* RS syndrome */ 204 205 asm volatile (".option push\n" 206 ".option arch,+v\n" 207 "vsetvli %0, x0, e8, m1, ta, ma\n" 208 ".option pop\n" 209 : "=&r" (vl) 210 ); 211 212 /* 213 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 214 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 215 */ 216 for (d = 0; d < bytes; d += NSIZE * 2) { 217 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 218 asm volatile (".option push\n" 219 ".option arch,+v\n" 220 "vle8.v v0, (%[wp0])\n" 221 "vmv.v.v v1, v0\n" 222 "vle8.v v4, (%[wp1])\n" 223 "vmv.v.v v5, v4\n" 224 ".option pop\n" 225 : : 226 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 227 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]) 228 ); 229 230 for (z = z0 - 1; z >= 0; z--) { 231 /* 232 * w2$$ = MASK(wq$$); 233 * w1$$ = SHLBYTE(wq$$); 234 * w2$$ &= NBYTES(0x1d); 235 * w1$$ ^= w2$$; 236 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 237 * wq$$ = w1$$ ^ wd$$; 238 * wp$$ ^= wd$$; 239 */ 240 asm volatile (".option push\n" 241 ".option arch,+v\n" 242 "vsra.vi v2, v1, 7\n" 243 "vsll.vi v3, v1, 1\n" 244 "vand.vx v2, v2, %[x1d]\n" 245 "vxor.vv v3, v3, v2\n" 246 "vle8.v v2, (%[wd0])\n" 247 "vxor.vv v1, v3, v2\n" 248 "vxor.vv v0, v0, v2\n" 249 250 "vsra.vi v6, v5, 7\n" 251 "vsll.vi v7, v5, 1\n" 252 "vand.vx v6, v6, %[x1d]\n" 253 "vxor.vv v7, v7, v6\n" 254 "vle8.v v6, (%[wd1])\n" 255 "vxor.vv v5, v7, v6\n" 256 "vxor.vv v4, v4, v6\n" 257 ".option pop\n" 258 : : 259 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 260 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 261 [x1d]"r"(0x1d) 262 ); 263 } 264 265 /* 266 * *(unative_t *)&p[d+NSIZE*$$] = wp$$; 267 * *(unative_t *)&q[d+NSIZE*$$] = wq$$; 268 */ 269 asm volatile (".option push\n" 270 ".option arch,+v\n" 271 "vse8.v v0, (%[wp0])\n" 272 "vse8.v v1, (%[wq0])\n" 273 "vse8.v v4, (%[wp1])\n" 274 "vse8.v v5, (%[wq1])\n" 275 ".option pop\n" 276 : : 277 [wp0]"r"(&p[d + NSIZE * 0]), 278 [wq0]"r"(&q[d + NSIZE * 0]), 279 [wp1]"r"(&p[d + NSIZE * 1]), 280 [wq1]"r"(&q[d + NSIZE * 1]) 281 ); 282 } 283 } 284 285 static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, 286 unsigned long bytes, void **ptrs) 287 { 288 u8 **dptr = (u8 **)ptrs; 289 u8 *p, *q; 290 unsigned long vl, d; 291 int z, z0; 292 293 z0 = stop; /* P/Q right side optimization */ 294 p = dptr[disks - 2]; /* XOR parity */ 295 q = dptr[disks - 1]; /* RS syndrome */ 296 297 asm volatile (".option push\n" 298 ".option arch,+v\n" 299 "vsetvli %0, x0, e8, m1, ta, ma\n" 300 ".option pop\n" 301 : "=&r" (vl) 302 ); 303 304 /* 305 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 306 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 307 */ 308 for (d = 0; d < bytes; d += NSIZE * 2) { 309 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 310 asm volatile (".option push\n" 311 ".option arch,+v\n" 312 "vle8.v v0, (%[wp0])\n" 313 "vmv.v.v v1, v0\n" 314 "vle8.v v4, (%[wp1])\n" 315 "vmv.v.v v5, v4\n" 316 ".option pop\n" 317 : : 318 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 319 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]) 320 ); 321 322 /* P/Q data pages */ 323 for (z = z0 - 1; z >= start; z--) { 324 /* 325 * w2$$ = MASK(wq$$); 326 * w1$$ = SHLBYTE(wq$$); 327 * w2$$ &= NBYTES(0x1d); 328 * w1$$ ^= w2$$; 329 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 330 * wq$$ = w1$$ ^ wd$$; 331 * wp$$ ^= wd$$; 332 */ 333 asm volatile (".option push\n" 334 ".option arch,+v\n" 335 "vsra.vi v2, v1, 7\n" 336 "vsll.vi v3, v1, 1\n" 337 "vand.vx v2, v2, %[x1d]\n" 338 "vxor.vv v3, v3, v2\n" 339 "vle8.v v2, (%[wd0])\n" 340 "vxor.vv v1, v3, v2\n" 341 "vxor.vv v0, v0, v2\n" 342 343 "vsra.vi v6, v5, 7\n" 344 "vsll.vi v7, v5, 1\n" 345 "vand.vx v6, v6, %[x1d]\n" 346 "vxor.vv v7, v7, v6\n" 347 "vle8.v v6, (%[wd1])\n" 348 "vxor.vv v5, v7, v6\n" 349 "vxor.vv v4, v4, v6\n" 350 ".option pop\n" 351 : : 352 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 353 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 354 [x1d]"r"(0x1d) 355 ); 356 } 357 358 /* P/Q left side optimization */ 359 for (z = start - 1; z >= 0; z--) { 360 /* 361 * w2$$ = MASK(wq$$); 362 * w1$$ = SHLBYTE(wq$$); 363 * w2$$ &= NBYTES(0x1d); 364 * wq$$ = w1$$ ^ w2$$; 365 */ 366 asm volatile (".option push\n" 367 ".option arch,+v\n" 368 "vsra.vi v2, v1, 7\n" 369 "vsll.vi v3, v1, 1\n" 370 "vand.vx v2, v2, %[x1d]\n" 371 "vxor.vv v1, v3, v2\n" 372 373 "vsra.vi v6, v5, 7\n" 374 "vsll.vi v7, v5, 1\n" 375 "vand.vx v6, v6, %[x1d]\n" 376 "vxor.vv v5, v7, v6\n" 377 ".option pop\n" 378 : : 379 [x1d]"r"(0x1d) 380 ); 381 } 382 383 /* 384 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 385 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 386 * v0:wp0, v1:wq0, v2:p0, v3:q0 387 * v4:wp1, v5:wq1, v6:p1, v7:q1 388 */ 389 asm volatile (".option push\n" 390 ".option arch,+v\n" 391 "vle8.v v2, (%[wp0])\n" 392 "vle8.v v3, (%[wq0])\n" 393 "vxor.vv v2, v2, v0\n" 394 "vxor.vv v3, v3, v1\n" 395 "vse8.v v2, (%[wp0])\n" 396 "vse8.v v3, (%[wq0])\n" 397 398 "vle8.v v6, (%[wp1])\n" 399 "vle8.v v7, (%[wq1])\n" 400 "vxor.vv v6, v6, v4\n" 401 "vxor.vv v7, v7, v5\n" 402 "vse8.v v6, (%[wp1])\n" 403 "vse8.v v7, (%[wq1])\n" 404 ".option pop\n" 405 : : 406 [wp0]"r"(&p[d + NSIZE * 0]), 407 [wq0]"r"(&q[d + NSIZE * 0]), 408 [wp1]"r"(&p[d + NSIZE * 1]), 409 [wq1]"r"(&q[d + NSIZE * 1]) 410 ); 411 } 412 } 413 414 static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 415 { 416 u8 **dptr = (u8 **)ptrs; 417 u8 *p, *q; 418 unsigned long vl, d; 419 int z, z0; 420 421 z0 = disks - 3; /* Highest data disk */ 422 p = dptr[z0 + 1]; /* XOR parity */ 423 q = dptr[z0 + 2]; /* RS syndrome */ 424 425 asm volatile (".option push\n" 426 ".option arch,+v\n" 427 "vsetvli %0, x0, e8, m1, ta, ma\n" 428 ".option pop\n" 429 : "=&r" (vl) 430 ); 431 432 /* 433 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 434 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 435 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 436 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 437 */ 438 for (d = 0; d < bytes; d += NSIZE * 4) { 439 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 440 asm volatile (".option push\n" 441 ".option arch,+v\n" 442 "vle8.v v0, (%[wp0])\n" 443 "vmv.v.v v1, v0\n" 444 "vle8.v v4, (%[wp1])\n" 445 "vmv.v.v v5, v4\n" 446 "vle8.v v8, (%[wp2])\n" 447 "vmv.v.v v9, v8\n" 448 "vle8.v v12, (%[wp3])\n" 449 "vmv.v.v v13, v12\n" 450 ".option pop\n" 451 : : 452 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 453 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), 454 [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), 455 [wp3]"r"(&dptr[z0][d + 3 * NSIZE]) 456 ); 457 458 for (z = z0 - 1; z >= 0; z--) { 459 /* 460 * w2$$ = MASK(wq$$); 461 * w1$$ = SHLBYTE(wq$$); 462 * w2$$ &= NBYTES(0x1d); 463 * w1$$ ^= w2$$; 464 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 465 * wq$$ = w1$$ ^ wd$$; 466 * wp$$ ^= wd$$; 467 */ 468 asm volatile (".option push\n" 469 ".option arch,+v\n" 470 "vsra.vi v2, v1, 7\n" 471 "vsll.vi v3, v1, 1\n" 472 "vand.vx v2, v2, %[x1d]\n" 473 "vxor.vv v3, v3, v2\n" 474 "vle8.v v2, (%[wd0])\n" 475 "vxor.vv v1, v3, v2\n" 476 "vxor.vv v0, v0, v2\n" 477 478 "vsra.vi v6, v5, 7\n" 479 "vsll.vi v7, v5, 1\n" 480 "vand.vx v6, v6, %[x1d]\n" 481 "vxor.vv v7, v7, v6\n" 482 "vle8.v v6, (%[wd1])\n" 483 "vxor.vv v5, v7, v6\n" 484 "vxor.vv v4, v4, v6\n" 485 486 "vsra.vi v10, v9, 7\n" 487 "vsll.vi v11, v9, 1\n" 488 "vand.vx v10, v10, %[x1d]\n" 489 "vxor.vv v11, v11, v10\n" 490 "vle8.v v10, (%[wd2])\n" 491 "vxor.vv v9, v11, v10\n" 492 "vxor.vv v8, v8, v10\n" 493 494 "vsra.vi v14, v13, 7\n" 495 "vsll.vi v15, v13, 1\n" 496 "vand.vx v14, v14, %[x1d]\n" 497 "vxor.vv v15, v15, v14\n" 498 "vle8.v v14, (%[wd3])\n" 499 "vxor.vv v13, v15, v14\n" 500 "vxor.vv v12, v12, v14\n" 501 ".option pop\n" 502 : : 503 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 504 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 505 [wd2]"r"(&dptr[z][d + 2 * NSIZE]), 506 [wd3]"r"(&dptr[z][d + 3 * NSIZE]), 507 [x1d]"r"(0x1d) 508 ); 509 } 510 511 /* 512 * *(unative_t *)&p[d+NSIZE*$$] = wp$$; 513 * *(unative_t *)&q[d+NSIZE*$$] = wq$$; 514 */ 515 asm volatile (".option push\n" 516 ".option arch,+v\n" 517 "vse8.v v0, (%[wp0])\n" 518 "vse8.v v1, (%[wq0])\n" 519 "vse8.v v4, (%[wp1])\n" 520 "vse8.v v5, (%[wq1])\n" 521 "vse8.v v8, (%[wp2])\n" 522 "vse8.v v9, (%[wq2])\n" 523 "vse8.v v12, (%[wp3])\n" 524 "vse8.v v13, (%[wq3])\n" 525 ".option pop\n" 526 : : 527 [wp0]"r"(&p[d + NSIZE * 0]), 528 [wq0]"r"(&q[d + NSIZE * 0]), 529 [wp1]"r"(&p[d + NSIZE * 1]), 530 [wq1]"r"(&q[d + NSIZE * 1]), 531 [wp2]"r"(&p[d + NSIZE * 2]), 532 [wq2]"r"(&q[d + NSIZE * 2]), 533 [wp3]"r"(&p[d + NSIZE * 3]), 534 [wq3]"r"(&q[d + NSIZE * 3]) 535 ); 536 } 537 } 538 539 static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, 540 unsigned long bytes, void **ptrs) 541 { 542 u8 **dptr = (u8 **)ptrs; 543 u8 *p, *q; 544 unsigned long vl, d; 545 int z, z0; 546 547 z0 = stop; /* P/Q right side optimization */ 548 p = dptr[disks - 2]; /* XOR parity */ 549 q = dptr[disks - 1]; /* RS syndrome */ 550 551 asm volatile (".option push\n" 552 ".option arch,+v\n" 553 "vsetvli %0, x0, e8, m1, ta, ma\n" 554 ".option pop\n" 555 : "=&r" (vl) 556 ); 557 558 /* 559 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 560 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 561 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 562 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 563 */ 564 for (d = 0; d < bytes; d += NSIZE * 4) { 565 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 566 asm volatile (".option push\n" 567 ".option arch,+v\n" 568 "vle8.v v0, (%[wp0])\n" 569 "vmv.v.v v1, v0\n" 570 "vle8.v v4, (%[wp1])\n" 571 "vmv.v.v v5, v4\n" 572 "vle8.v v8, (%[wp2])\n" 573 "vmv.v.v v9, v8\n" 574 "vle8.v v12, (%[wp3])\n" 575 "vmv.v.v v13, v12\n" 576 ".option pop\n" 577 : : 578 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 579 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), 580 [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), 581 [wp3]"r"(&dptr[z0][d + 3 * NSIZE]) 582 ); 583 584 /* P/Q data pages */ 585 for (z = z0 - 1; z >= start; z--) { 586 /* 587 * w2$$ = MASK(wq$$); 588 * w1$$ = SHLBYTE(wq$$); 589 * w2$$ &= NBYTES(0x1d); 590 * w1$$ ^= w2$$; 591 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 592 * wq$$ = w1$$ ^ wd$$; 593 * wp$$ ^= wd$$; 594 */ 595 asm volatile (".option push\n" 596 ".option arch,+v\n" 597 "vsra.vi v2, v1, 7\n" 598 "vsll.vi v3, v1, 1\n" 599 "vand.vx v2, v2, %[x1d]\n" 600 "vxor.vv v3, v3, v2\n" 601 "vle8.v v2, (%[wd0])\n" 602 "vxor.vv v1, v3, v2\n" 603 "vxor.vv v0, v0, v2\n" 604 605 "vsra.vi v6, v5, 7\n" 606 "vsll.vi v7, v5, 1\n" 607 "vand.vx v6, v6, %[x1d]\n" 608 "vxor.vv v7, v7, v6\n" 609 "vle8.v v6, (%[wd1])\n" 610 "vxor.vv v5, v7, v6\n" 611 "vxor.vv v4, v4, v6\n" 612 613 "vsra.vi v10, v9, 7\n" 614 "vsll.vi v11, v9, 1\n" 615 "vand.vx v10, v10, %[x1d]\n" 616 "vxor.vv v11, v11, v10\n" 617 "vle8.v v10, (%[wd2])\n" 618 "vxor.vv v9, v11, v10\n" 619 "vxor.vv v8, v8, v10\n" 620 621 "vsra.vi v14, v13, 7\n" 622 "vsll.vi v15, v13, 1\n" 623 "vand.vx v14, v14, %[x1d]\n" 624 "vxor.vv v15, v15, v14\n" 625 "vle8.v v14, (%[wd3])\n" 626 "vxor.vv v13, v15, v14\n" 627 "vxor.vv v12, v12, v14\n" 628 ".option pop\n" 629 : : 630 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 631 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 632 [wd2]"r"(&dptr[z][d + 2 * NSIZE]), 633 [wd3]"r"(&dptr[z][d + 3 * NSIZE]), 634 [x1d]"r"(0x1d) 635 ); 636 } 637 638 /* P/Q left side optimization */ 639 for (z = start - 1; z >= 0; z--) { 640 /* 641 * w2$$ = MASK(wq$$); 642 * w1$$ = SHLBYTE(wq$$); 643 * w2$$ &= NBYTES(0x1d); 644 * wq$$ = w1$$ ^ w2$$; 645 */ 646 asm volatile (".option push\n" 647 ".option arch,+v\n" 648 "vsra.vi v2, v1, 7\n" 649 "vsll.vi v3, v1, 1\n" 650 "vand.vx v2, v2, %[x1d]\n" 651 "vxor.vv v1, v3, v2\n" 652 653 "vsra.vi v6, v5, 7\n" 654 "vsll.vi v7, v5, 1\n" 655 "vand.vx v6, v6, %[x1d]\n" 656 "vxor.vv v5, v7, v6\n" 657 658 "vsra.vi v10, v9, 7\n" 659 "vsll.vi v11, v9, 1\n" 660 "vand.vx v10, v10, %[x1d]\n" 661 "vxor.vv v9, v11, v10\n" 662 663 "vsra.vi v14, v13, 7\n" 664 "vsll.vi v15, v13, 1\n" 665 "vand.vx v14, v14, %[x1d]\n" 666 "vxor.vv v13, v15, v14\n" 667 ".option pop\n" 668 : : 669 [x1d]"r"(0x1d) 670 ); 671 } 672 673 /* 674 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 675 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 676 * v0:wp0, v1:wq0, v2:p0, v3:q0 677 * v4:wp1, v5:wq1, v6:p1, v7:q1 678 * v8:wp2, v9:wq2, v10:p2, v11:q2 679 * v12:wp3, v13:wq3, v14:p3, v15:q3 680 */ 681 asm volatile (".option push\n" 682 ".option arch,+v\n" 683 "vle8.v v2, (%[wp0])\n" 684 "vle8.v v3, (%[wq0])\n" 685 "vxor.vv v2, v2, v0\n" 686 "vxor.vv v3, v3, v1\n" 687 "vse8.v v2, (%[wp0])\n" 688 "vse8.v v3, (%[wq0])\n" 689 690 "vle8.v v6, (%[wp1])\n" 691 "vle8.v v7, (%[wq1])\n" 692 "vxor.vv v6, v6, v4\n" 693 "vxor.vv v7, v7, v5\n" 694 "vse8.v v6, (%[wp1])\n" 695 "vse8.v v7, (%[wq1])\n" 696 697 "vle8.v v10, (%[wp2])\n" 698 "vle8.v v11, (%[wq2])\n" 699 "vxor.vv v10, v10, v8\n" 700 "vxor.vv v11, v11, v9\n" 701 "vse8.v v10, (%[wp2])\n" 702 "vse8.v v11, (%[wq2])\n" 703 704 "vle8.v v14, (%[wp3])\n" 705 "vle8.v v15, (%[wq3])\n" 706 "vxor.vv v14, v14, v12\n" 707 "vxor.vv v15, v15, v13\n" 708 "vse8.v v14, (%[wp3])\n" 709 "vse8.v v15, (%[wq3])\n" 710 ".option pop\n" 711 : : 712 [wp0]"r"(&p[d + NSIZE * 0]), 713 [wq0]"r"(&q[d + NSIZE * 0]), 714 [wp1]"r"(&p[d + NSIZE * 1]), 715 [wq1]"r"(&q[d + NSIZE * 1]), 716 [wp2]"r"(&p[d + NSIZE * 2]), 717 [wq2]"r"(&q[d + NSIZE * 2]), 718 [wp3]"r"(&p[d + NSIZE * 3]), 719 [wq3]"r"(&q[d + NSIZE * 3]) 720 ); 721 } 722 } 723 724 static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) 725 { 726 u8 **dptr = (u8 **)ptrs; 727 u8 *p, *q; 728 unsigned long vl, d; 729 int z, z0; 730 731 z0 = disks - 3; /* Highest data disk */ 732 p = dptr[z0 + 1]; /* XOR parity */ 733 q = dptr[z0 + 2]; /* RS syndrome */ 734 735 asm volatile (".option push\n" 736 ".option arch,+v\n" 737 "vsetvli %0, x0, e8, m1, ta, ma\n" 738 ".option pop\n" 739 : "=&r" (vl) 740 ); 741 742 /* 743 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 744 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 745 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 746 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 747 * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14 748 * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15 749 * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16 750 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17 751 */ 752 for (d = 0; d < bytes; d += NSIZE * 8) { 753 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 754 asm volatile (".option push\n" 755 ".option arch,+v\n" 756 "vle8.v v0, (%[wp0])\n" 757 "vmv.v.v v1, v0\n" 758 "vle8.v v4, (%[wp1])\n" 759 "vmv.v.v v5, v4\n" 760 "vle8.v v8, (%[wp2])\n" 761 "vmv.v.v v9, v8\n" 762 "vle8.v v12, (%[wp3])\n" 763 "vmv.v.v v13, v12\n" 764 "vle8.v v16, (%[wp4])\n" 765 "vmv.v.v v17, v16\n" 766 "vle8.v v20, (%[wp5])\n" 767 "vmv.v.v v21, v20\n" 768 "vle8.v v24, (%[wp6])\n" 769 "vmv.v.v v25, v24\n" 770 "vle8.v v28, (%[wp7])\n" 771 "vmv.v.v v29, v28\n" 772 ".option pop\n" 773 : : 774 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 775 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), 776 [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), 777 [wp3]"r"(&dptr[z0][d + 3 * NSIZE]), 778 [wp4]"r"(&dptr[z0][d + 4 * NSIZE]), 779 [wp5]"r"(&dptr[z0][d + 5 * NSIZE]), 780 [wp6]"r"(&dptr[z0][d + 6 * NSIZE]), 781 [wp7]"r"(&dptr[z0][d + 7 * NSIZE]) 782 ); 783 784 for (z = z0 - 1; z >= 0; z--) { 785 /* 786 * w2$$ = MASK(wq$$); 787 * w1$$ = SHLBYTE(wq$$); 788 * w2$$ &= NBYTES(0x1d); 789 * w1$$ ^= w2$$; 790 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 791 * wq$$ = w1$$ ^ wd$$; 792 * wp$$ ^= wd$$; 793 */ 794 asm volatile (".option push\n" 795 ".option arch,+v\n" 796 "vsra.vi v2, v1, 7\n" 797 "vsll.vi v3, v1, 1\n" 798 "vand.vx v2, v2, %[x1d]\n" 799 "vxor.vv v3, v3, v2\n" 800 "vle8.v v2, (%[wd0])\n" 801 "vxor.vv v1, v3, v2\n" 802 "vxor.vv v0, v0, v2\n" 803 804 "vsra.vi v6, v5, 7\n" 805 "vsll.vi v7, v5, 1\n" 806 "vand.vx v6, v6, %[x1d]\n" 807 "vxor.vv v7, v7, v6\n" 808 "vle8.v v6, (%[wd1])\n" 809 "vxor.vv v5, v7, v6\n" 810 "vxor.vv v4, v4, v6\n" 811 812 "vsra.vi v10, v9, 7\n" 813 "vsll.vi v11, v9, 1\n" 814 "vand.vx v10, v10, %[x1d]\n" 815 "vxor.vv v11, v11, v10\n" 816 "vle8.v v10, (%[wd2])\n" 817 "vxor.vv v9, v11, v10\n" 818 "vxor.vv v8, v8, v10\n" 819 820 "vsra.vi v14, v13, 7\n" 821 "vsll.vi v15, v13, 1\n" 822 "vand.vx v14, v14, %[x1d]\n" 823 "vxor.vv v15, v15, v14\n" 824 "vle8.v v14, (%[wd3])\n" 825 "vxor.vv v13, v15, v14\n" 826 "vxor.vv v12, v12, v14\n" 827 828 "vsra.vi v18, v17, 7\n" 829 "vsll.vi v19, v17, 1\n" 830 "vand.vx v18, v18, %[x1d]\n" 831 "vxor.vv v19, v19, v18\n" 832 "vle8.v v18, (%[wd4])\n" 833 "vxor.vv v17, v19, v18\n" 834 "vxor.vv v16, v16, v18\n" 835 836 "vsra.vi v22, v21, 7\n" 837 "vsll.vi v23, v21, 1\n" 838 "vand.vx v22, v22, %[x1d]\n" 839 "vxor.vv v23, v23, v22\n" 840 "vle8.v v22, (%[wd5])\n" 841 "vxor.vv v21, v23, v22\n" 842 "vxor.vv v20, v20, v22\n" 843 844 "vsra.vi v26, v25, 7\n" 845 "vsll.vi v27, v25, 1\n" 846 "vand.vx v26, v26, %[x1d]\n" 847 "vxor.vv v27, v27, v26\n" 848 "vle8.v v26, (%[wd6])\n" 849 "vxor.vv v25, v27, v26\n" 850 "vxor.vv v24, v24, v26\n" 851 852 "vsra.vi v30, v29, 7\n" 853 "vsll.vi v31, v29, 1\n" 854 "vand.vx v30, v30, %[x1d]\n" 855 "vxor.vv v31, v31, v30\n" 856 "vle8.v v30, (%[wd7])\n" 857 "vxor.vv v29, v31, v30\n" 858 "vxor.vv v28, v28, v30\n" 859 ".option pop\n" 860 : : 861 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 862 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 863 [wd2]"r"(&dptr[z][d + 2 * NSIZE]), 864 [wd3]"r"(&dptr[z][d + 3 * NSIZE]), 865 [wd4]"r"(&dptr[z][d + 4 * NSIZE]), 866 [wd5]"r"(&dptr[z][d + 5 * NSIZE]), 867 [wd6]"r"(&dptr[z][d + 6 * NSIZE]), 868 [wd7]"r"(&dptr[z][d + 7 * NSIZE]), 869 [x1d]"r"(0x1d) 870 ); 871 } 872 873 /* 874 * *(unative_t *)&p[d+NSIZE*$$] = wp$$; 875 * *(unative_t *)&q[d+NSIZE*$$] = wq$$; 876 */ 877 asm volatile (".option push\n" 878 ".option arch,+v\n" 879 "vse8.v v0, (%[wp0])\n" 880 "vse8.v v1, (%[wq0])\n" 881 "vse8.v v4, (%[wp1])\n" 882 "vse8.v v5, (%[wq1])\n" 883 "vse8.v v8, (%[wp2])\n" 884 "vse8.v v9, (%[wq2])\n" 885 "vse8.v v12, (%[wp3])\n" 886 "vse8.v v13, (%[wq3])\n" 887 "vse8.v v16, (%[wp4])\n" 888 "vse8.v v17, (%[wq4])\n" 889 "vse8.v v20, (%[wp5])\n" 890 "vse8.v v21, (%[wq5])\n" 891 "vse8.v v24, (%[wp6])\n" 892 "vse8.v v25, (%[wq6])\n" 893 "vse8.v v28, (%[wp7])\n" 894 "vse8.v v29, (%[wq7])\n" 895 ".option pop\n" 896 : : 897 [wp0]"r"(&p[d + NSIZE * 0]), 898 [wq0]"r"(&q[d + NSIZE * 0]), 899 [wp1]"r"(&p[d + NSIZE * 1]), 900 [wq1]"r"(&q[d + NSIZE * 1]), 901 [wp2]"r"(&p[d + NSIZE * 2]), 902 [wq2]"r"(&q[d + NSIZE * 2]), 903 [wp3]"r"(&p[d + NSIZE * 3]), 904 [wq3]"r"(&q[d + NSIZE * 3]), 905 [wp4]"r"(&p[d + NSIZE * 4]), 906 [wq4]"r"(&q[d + NSIZE * 4]), 907 [wp5]"r"(&p[d + NSIZE * 5]), 908 [wq5]"r"(&q[d + NSIZE * 5]), 909 [wp6]"r"(&p[d + NSIZE * 6]), 910 [wq6]"r"(&q[d + NSIZE * 6]), 911 [wp7]"r"(&p[d + NSIZE * 7]), 912 [wq7]"r"(&q[d + NSIZE * 7]) 913 ); 914 } 915 } 916 917 static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, 918 unsigned long bytes, void **ptrs) 919 { 920 u8 **dptr = (u8 **)ptrs; 921 u8 *p, *q; 922 unsigned long vl, d; 923 int z, z0; 924 925 z0 = stop; /* P/Q right side optimization */ 926 p = dptr[disks - 2]; /* XOR parity */ 927 q = dptr[disks - 1]; /* RS syndrome */ 928 929 asm volatile (".option push\n" 930 ".option arch,+v\n" 931 "vsetvli %0, x0, e8, m1, ta, ma\n" 932 ".option pop\n" 933 : "=&r" (vl) 934 ); 935 936 /* 937 * v0:wp0, v1:wq0, v2:wd0/w20, v3:w10 938 * v4:wp1, v5:wq1, v6:wd1/w21, v7:w11 939 * v8:wp2, v9:wq2, v10:wd2/w22, v11:w12 940 * v12:wp3, v13:wq3, v14:wd3/w23, v15:w13 941 * v16:wp4, v17:wq4, v18:wd4/w24, v19:w14 942 * v20:wp5, v21:wq5, v22:wd5/w25, v23:w15 943 * v24:wp6, v25:wq6, v26:wd6/w26, v27:w16 944 * v28:wp7, v29:wq7, v30:wd7/w27, v31:w17 945 */ 946 for (d = 0; d < bytes; d += NSIZE * 8) { 947 /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */ 948 asm volatile (".option push\n" 949 ".option arch,+v\n" 950 "vle8.v v0, (%[wp0])\n" 951 "vmv.v.v v1, v0\n" 952 "vle8.v v4, (%[wp1])\n" 953 "vmv.v.v v5, v4\n" 954 "vle8.v v8, (%[wp2])\n" 955 "vmv.v.v v9, v8\n" 956 "vle8.v v12, (%[wp3])\n" 957 "vmv.v.v v13, v12\n" 958 "vle8.v v16, (%[wp4])\n" 959 "vmv.v.v v17, v16\n" 960 "vle8.v v20, (%[wp5])\n" 961 "vmv.v.v v21, v20\n" 962 "vle8.v v24, (%[wp6])\n" 963 "vmv.v.v v25, v24\n" 964 "vle8.v v28, (%[wp7])\n" 965 "vmv.v.v v29, v28\n" 966 ".option pop\n" 967 : : 968 [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), 969 [wp1]"r"(&dptr[z0][d + 1 * NSIZE]), 970 [wp2]"r"(&dptr[z0][d + 2 * NSIZE]), 971 [wp3]"r"(&dptr[z0][d + 3 * NSIZE]), 972 [wp4]"r"(&dptr[z0][d + 4 * NSIZE]), 973 [wp5]"r"(&dptr[z0][d + 5 * NSIZE]), 974 [wp6]"r"(&dptr[z0][d + 6 * NSIZE]), 975 [wp7]"r"(&dptr[z0][d + 7 * NSIZE]) 976 ); 977 978 /* P/Q data pages */ 979 for (z = z0 - 1; z >= start; z--) { 980 /* 981 * w2$$ = MASK(wq$$); 982 * w1$$ = SHLBYTE(wq$$); 983 * w2$$ &= NBYTES(0x1d); 984 * w1$$ ^= w2$$; 985 * wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; 986 * wq$$ = w1$$ ^ wd$$; 987 * wp$$ ^= wd$$; 988 */ 989 asm volatile (".option push\n" 990 ".option arch,+v\n" 991 "vsra.vi v2, v1, 7\n" 992 "vsll.vi v3, v1, 1\n" 993 "vand.vx v2, v2, %[x1d]\n" 994 "vxor.vv v3, v3, v2\n" 995 "vle8.v v2, (%[wd0])\n" 996 "vxor.vv v1, v3, v2\n" 997 "vxor.vv v0, v0, v2\n" 998 999 "vsra.vi v6, v5, 7\n" 1000 "vsll.vi v7, v5, 1\n" 1001 "vand.vx v6, v6, %[x1d]\n" 1002 "vxor.vv v7, v7, v6\n" 1003 "vle8.v v6, (%[wd1])\n" 1004 "vxor.vv v5, v7, v6\n" 1005 "vxor.vv v4, v4, v6\n" 1006 1007 "vsra.vi v10, v9, 7\n" 1008 "vsll.vi v11, v9, 1\n" 1009 "vand.vx v10, v10, %[x1d]\n" 1010 "vxor.vv v11, v11, v10\n" 1011 "vle8.v v10, (%[wd2])\n" 1012 "vxor.vv v9, v11, v10\n" 1013 "vxor.vv v8, v8, v10\n" 1014 1015 "vsra.vi v14, v13, 7\n" 1016 "vsll.vi v15, v13, 1\n" 1017 "vand.vx v14, v14, %[x1d]\n" 1018 "vxor.vv v15, v15, v14\n" 1019 "vle8.v v14, (%[wd3])\n" 1020 "vxor.vv v13, v15, v14\n" 1021 "vxor.vv v12, v12, v14\n" 1022 1023 "vsra.vi v18, v17, 7\n" 1024 "vsll.vi v19, v17, 1\n" 1025 "vand.vx v18, v18, %[x1d]\n" 1026 "vxor.vv v19, v19, v18\n" 1027 "vle8.v v18, (%[wd4])\n" 1028 "vxor.vv v17, v19, v18\n" 1029 "vxor.vv v16, v16, v18\n" 1030 1031 "vsra.vi v22, v21, 7\n" 1032 "vsll.vi v23, v21, 1\n" 1033 "vand.vx v22, v22, %[x1d]\n" 1034 "vxor.vv v23, v23, v22\n" 1035 "vle8.v v22, (%[wd5])\n" 1036 "vxor.vv v21, v23, v22\n" 1037 "vxor.vv v20, v20, v22\n" 1038 1039 "vsra.vi v26, v25, 7\n" 1040 "vsll.vi v27, v25, 1\n" 1041 "vand.vx v26, v26, %[x1d]\n" 1042 "vxor.vv v27, v27, v26\n" 1043 "vle8.v v26, (%[wd6])\n" 1044 "vxor.vv v25, v27, v26\n" 1045 "vxor.vv v24, v24, v26\n" 1046 1047 "vsra.vi v30, v29, 7\n" 1048 "vsll.vi v31, v29, 1\n" 1049 "vand.vx v30, v30, %[x1d]\n" 1050 "vxor.vv v31, v31, v30\n" 1051 "vle8.v v30, (%[wd7])\n" 1052 "vxor.vv v29, v31, v30\n" 1053 "vxor.vv v28, v28, v30\n" 1054 ".option pop\n" 1055 : : 1056 [wd0]"r"(&dptr[z][d + 0 * NSIZE]), 1057 [wd1]"r"(&dptr[z][d + 1 * NSIZE]), 1058 [wd2]"r"(&dptr[z][d + 2 * NSIZE]), 1059 [wd3]"r"(&dptr[z][d + 3 * NSIZE]), 1060 [wd4]"r"(&dptr[z][d + 4 * NSIZE]), 1061 [wd5]"r"(&dptr[z][d + 5 * NSIZE]), 1062 [wd6]"r"(&dptr[z][d + 6 * NSIZE]), 1063 [wd7]"r"(&dptr[z][d + 7 * NSIZE]), 1064 [x1d]"r"(0x1d) 1065 ); 1066 } 1067 1068 /* P/Q left side optimization */ 1069 for (z = start - 1; z >= 0; z--) { 1070 /* 1071 * w2$$ = MASK(wq$$); 1072 * w1$$ = SHLBYTE(wq$$); 1073 * w2$$ &= NBYTES(0x1d); 1074 * wq$$ = w1$$ ^ w2$$; 1075 */ 1076 asm volatile (".option push\n" 1077 ".option arch,+v\n" 1078 "vsra.vi v2, v1, 7\n" 1079 "vsll.vi v3, v1, 1\n" 1080 "vand.vx v2, v2, %[x1d]\n" 1081 "vxor.vv v1, v3, v2\n" 1082 1083 "vsra.vi v6, v5, 7\n" 1084 "vsll.vi v7, v5, 1\n" 1085 "vand.vx v6, v6, %[x1d]\n" 1086 "vxor.vv v5, v7, v6\n" 1087 1088 "vsra.vi v10, v9, 7\n" 1089 "vsll.vi v11, v9, 1\n" 1090 "vand.vx v10, v10, %[x1d]\n" 1091 "vxor.vv v9, v11, v10\n" 1092 1093 "vsra.vi v14, v13, 7\n" 1094 "vsll.vi v15, v13, 1\n" 1095 "vand.vx v14, v14, %[x1d]\n" 1096 "vxor.vv v13, v15, v14\n" 1097 1098 "vsra.vi v18, v17, 7\n" 1099 "vsll.vi v19, v17, 1\n" 1100 "vand.vx v18, v18, %[x1d]\n" 1101 "vxor.vv v17, v19, v18\n" 1102 1103 "vsra.vi v22, v21, 7\n" 1104 "vsll.vi v23, v21, 1\n" 1105 "vand.vx v22, v22, %[x1d]\n" 1106 "vxor.vv v21, v23, v22\n" 1107 1108 "vsra.vi v26, v25, 7\n" 1109 "vsll.vi v27, v25, 1\n" 1110 "vand.vx v26, v26, %[x1d]\n" 1111 "vxor.vv v25, v27, v26\n" 1112 1113 "vsra.vi v30, v29, 7\n" 1114 "vsll.vi v31, v29, 1\n" 1115 "vand.vx v30, v30, %[x1d]\n" 1116 "vxor.vv v29, v31, v30\n" 1117 ".option pop\n" 1118 : : 1119 [x1d]"r"(0x1d) 1120 ); 1121 } 1122 1123 /* 1124 * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; 1125 * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; 1126 * v0:wp0, v1:wq0, v2:p0, v3:q0 1127 * v4:wp1, v5:wq1, v6:p1, v7:q1 1128 * v8:wp2, v9:wq2, v10:p2, v11:q2 1129 * v12:wp3, v13:wq3, v14:p3, v15:q3 1130 * v16:wp4, v17:wq4, v18:p4, v19:q4 1131 * v20:wp5, v21:wq5, v22:p5, v23:q5 1132 * v24:wp6, v25:wq6, v26:p6, v27:q6 1133 * v28:wp7, v29:wq7, v30:p7, v31:q7 1134 */ 1135 asm volatile (".option push\n" 1136 ".option arch,+v\n" 1137 "vle8.v v2, (%[wp0])\n" 1138 "vle8.v v3, (%[wq0])\n" 1139 "vxor.vv v2, v2, v0\n" 1140 "vxor.vv v3, v3, v1\n" 1141 "vse8.v v2, (%[wp0])\n" 1142 "vse8.v v3, (%[wq0])\n" 1143 1144 "vle8.v v6, (%[wp1])\n" 1145 "vle8.v v7, (%[wq1])\n" 1146 "vxor.vv v6, v6, v4\n" 1147 "vxor.vv v7, v7, v5\n" 1148 "vse8.v v6, (%[wp1])\n" 1149 "vse8.v v7, (%[wq1])\n" 1150 1151 "vle8.v v10, (%[wp2])\n" 1152 "vle8.v v11, (%[wq2])\n" 1153 "vxor.vv v10, v10, v8\n" 1154 "vxor.vv v11, v11, v9\n" 1155 "vse8.v v10, (%[wp2])\n" 1156 "vse8.v v11, (%[wq2])\n" 1157 1158 "vle8.v v14, (%[wp3])\n" 1159 "vle8.v v15, (%[wq3])\n" 1160 "vxor.vv v14, v14, v12\n" 1161 "vxor.vv v15, v15, v13\n" 1162 "vse8.v v14, (%[wp3])\n" 1163 "vse8.v v15, (%[wq3])\n" 1164 1165 "vle8.v v18, (%[wp4])\n" 1166 "vle8.v v19, (%[wq4])\n" 1167 "vxor.vv v18, v18, v16\n" 1168 "vxor.vv v19, v19, v17\n" 1169 "vse8.v v18, (%[wp4])\n" 1170 "vse8.v v19, (%[wq4])\n" 1171 1172 "vle8.v v22, (%[wp5])\n" 1173 "vle8.v v23, (%[wq5])\n" 1174 "vxor.vv v22, v22, v20\n" 1175 "vxor.vv v23, v23, v21\n" 1176 "vse8.v v22, (%[wp5])\n" 1177 "vse8.v v23, (%[wq5])\n" 1178 1179 "vle8.v v26, (%[wp6])\n" 1180 "vle8.v v27, (%[wq6])\n" 1181 "vxor.vv v26, v26, v24\n" 1182 "vxor.vv v27, v27, v25\n" 1183 "vse8.v v26, (%[wp6])\n" 1184 "vse8.v v27, (%[wq6])\n" 1185 1186 "vle8.v v30, (%[wp7])\n" 1187 "vle8.v v31, (%[wq7])\n" 1188 "vxor.vv v30, v30, v28\n" 1189 "vxor.vv v31, v31, v29\n" 1190 "vse8.v v30, (%[wp7])\n" 1191 "vse8.v v31, (%[wq7])\n" 1192 ".option pop\n" 1193 : : 1194 [wp0]"r"(&p[d + NSIZE * 0]), 1195 [wq0]"r"(&q[d + NSIZE * 0]), 1196 [wp1]"r"(&p[d + NSIZE * 1]), 1197 [wq1]"r"(&q[d + NSIZE * 1]), 1198 [wp2]"r"(&p[d + NSIZE * 2]), 1199 [wq2]"r"(&q[d + NSIZE * 2]), 1200 [wp3]"r"(&p[d + NSIZE * 3]), 1201 [wq3]"r"(&q[d + NSIZE * 3]), 1202 [wp4]"r"(&p[d + NSIZE * 4]), 1203 [wq4]"r"(&q[d + NSIZE * 4]), 1204 [wp5]"r"(&p[d + NSIZE * 5]), 1205 [wq5]"r"(&q[d + NSIZE * 5]), 1206 [wp6]"r"(&p[d + NSIZE * 6]), 1207 [wq6]"r"(&q[d + NSIZE * 6]), 1208 [wp7]"r"(&p[d + NSIZE * 7]), 1209 [wq7]"r"(&q[d + NSIZE * 7]) 1210 ); 1211 } 1212 } 1213 1214 RAID6_RVV_WRAPPER(1); 1215 RAID6_RVV_WRAPPER(2); 1216 RAID6_RVV_WRAPPER(4); 1217 RAID6_RVV_WRAPPER(8); 1218