1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX) 4 * 5 * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 6 * 7 * Originally based on recov_avx2.c and recov_ssse3.c: 8 * 9 * Copyright (C) 2012 Intel Corporation 10 * Author: Jim Kukunas <james.t.kukunas@linux.intel.com> 11 */ 12 13 #include <linux/mm.h> 14 #include <linux/raid/pq.h> 15 #include <asm/cpu-features.h> 16 #include <asm/fpu.h> 17 #include "algos.h" 18 19 /* 20 * Unlike with the syndrome calculation algorithms, there's no boot-time 21 * selection of recovery algorithms by benchmarking, so we have to specify 22 * the priorities and hope the future cores will all have decent vector 23 * support (i.e. no LASX slower than LSX, or even scalar code). 24 */ 25 26 #ifdef CONFIG_CPU_HAS_LSX 27 static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila, 28 int failb, void **ptrs) 29 { 30 u8 *p, *q, *dp, *dq; 31 const u8 *pbmul; /* P multiplier table for B data */ 32 const u8 *qmul; /* Q multiplier table (for both) */ 33 34 p = (u8 *)ptrs[disks - 2]; 35 q = (u8 *)ptrs[disks - 1]; 36 37 /* 38 * Compute syndrome with zero for the missing data pages 39 * Use the dead data pages as temporary storage for 40 * delta p and delta q 41 */ 42 dp = (u8 *)ptrs[faila]; 43 ptrs[faila] = page_address(ZERO_PAGE(0)); 44 ptrs[disks - 2] = dp; 45 dq = (u8 *)ptrs[failb]; 46 ptrs[failb] = page_address(ZERO_PAGE(0)); 47 ptrs[disks - 1] = dq; 48 49 raid6_gen_syndrome(disks, bytes, ptrs); 50 51 /* Restore pointer table */ 52 ptrs[faila] = dp; 53 ptrs[failb] = dq; 54 ptrs[disks - 2] = p; 55 ptrs[disks - 1] = q; 56 57 /* Now, pick the proper data tables */ 58 pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; 59 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; 60 61 kernel_fpu_begin(); 62 63 /* 64 * vr20, vr21: qmul 65 * vr22, vr23: pbmul 66 */ 67 asm volatile("vld $vr20, %0" : : "m" (qmul[0])); 68 asm volatile("vld $vr21, %0" : : "m" (qmul[16])); 69 asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); 70 asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); 71 72 while (bytes) { 73 /* vr4 - vr7: Q */ 74 asm volatile("vld $vr4, %0" : : "m" (q[0])); 75 asm volatile("vld $vr5, %0" : : "m" (q[16])); 76 asm volatile("vld $vr6, %0" : : "m" (q[32])); 77 asm volatile("vld $vr7, %0" : : "m" (q[48])); 78 /* vr4 - vr7: Q + Qxy */ 79 asm volatile("vld $vr8, %0" : : "m" (dq[0])); 80 asm volatile("vld $vr9, %0" : : "m" (dq[16])); 81 asm volatile("vld $vr10, %0" : : "m" (dq[32])); 82 asm volatile("vld $vr11, %0" : : "m" (dq[48])); 83 asm volatile("vxor.v $vr4, $vr4, $vr8"); 84 asm volatile("vxor.v $vr5, $vr5, $vr9"); 85 asm volatile("vxor.v $vr6, $vr6, $vr10"); 86 asm volatile("vxor.v $vr7, $vr7, $vr11"); 87 /* vr0 - vr3: P */ 88 asm volatile("vld $vr0, %0" : : "m" (p[0])); 89 asm volatile("vld $vr1, %0" : : "m" (p[16])); 90 asm volatile("vld $vr2, %0" : : "m" (p[32])); 91 asm volatile("vld $vr3, %0" : : "m" (p[48])); 92 /* vr0 - vr3: P + Pxy */ 93 asm volatile("vld $vr8, %0" : : "m" (dp[0])); 94 asm volatile("vld $vr9, %0" : : "m" (dp[16])); 95 asm volatile("vld $vr10, %0" : : "m" (dp[32])); 96 asm volatile("vld $vr11, %0" : : "m" (dp[48])); 97 asm volatile("vxor.v $vr0, $vr0, $vr8"); 98 asm volatile("vxor.v $vr1, $vr1, $vr9"); 99 asm volatile("vxor.v $vr2, $vr2, $vr10"); 100 asm volatile("vxor.v $vr3, $vr3, $vr11"); 101 102 /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */ 103 asm volatile("vsrli.b $vr8, $vr4, 4"); 104 asm volatile("vsrli.b $vr9, $vr5, 4"); 105 asm volatile("vsrli.b $vr10, $vr6, 4"); 106 asm volatile("vsrli.b $vr11, $vr7, 4"); 107 /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */ 108 asm volatile("vandi.b $vr4, $vr4, 0x0f"); 109 asm volatile("vandi.b $vr5, $vr5, 0x0f"); 110 asm volatile("vandi.b $vr6, $vr6, 0x0f"); 111 asm volatile("vandi.b $vr7, $vr7, 0x0f"); 112 /* lookup from qmul[0] */ 113 asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4"); 114 asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5"); 115 asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6"); 116 asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7"); 117 /* lookup from qmul[16] */ 118 asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8"); 119 asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9"); 120 asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10"); 121 asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11"); 122 /* vr16 - vr19: B(Q + Qxy) */ 123 asm volatile("vxor.v $vr16, $vr8, $vr4"); 124 asm volatile("vxor.v $vr17, $vr9, $vr5"); 125 asm volatile("vxor.v $vr18, $vr10, $vr6"); 126 asm volatile("vxor.v $vr19, $vr11, $vr7"); 127 128 /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */ 129 asm volatile("vsrli.b $vr4, $vr0, 4"); 130 asm volatile("vsrli.b $vr5, $vr1, 4"); 131 asm volatile("vsrli.b $vr6, $vr2, 4"); 132 asm volatile("vsrli.b $vr7, $vr3, 4"); 133 /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */ 134 asm volatile("vandi.b $vr12, $vr0, 0x0f"); 135 asm volatile("vandi.b $vr13, $vr1, 0x0f"); 136 asm volatile("vandi.b $vr14, $vr2, 0x0f"); 137 asm volatile("vandi.b $vr15, $vr3, 0x0f"); 138 /* lookup from pbmul[0] */ 139 asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12"); 140 asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13"); 141 asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14"); 142 asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15"); 143 /* lookup from pbmul[16] */ 144 asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4"); 145 asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5"); 146 asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6"); 147 asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7"); 148 /* vr4 - vr7: A(P + Pxy) */ 149 asm volatile("vxor.v $vr4, $vr4, $vr12"); 150 asm volatile("vxor.v $vr5, $vr5, $vr13"); 151 asm volatile("vxor.v $vr6, $vr6, $vr14"); 152 asm volatile("vxor.v $vr7, $vr7, $vr15"); 153 154 /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */ 155 asm volatile("vxor.v $vr4, $vr4, $vr16"); 156 asm volatile("vxor.v $vr5, $vr5, $vr17"); 157 asm volatile("vxor.v $vr6, $vr6, $vr18"); 158 asm volatile("vxor.v $vr7, $vr7, $vr19"); 159 asm volatile("vst $vr4, %0" : "=m" (dq[0])); 160 asm volatile("vst $vr5, %0" : "=m" (dq[16])); 161 asm volatile("vst $vr6, %0" : "=m" (dq[32])); 162 asm volatile("vst $vr7, %0" : "=m" (dq[48])); 163 164 /* vr0 - vr3: P + Pxy + Dx = Dy */ 165 asm volatile("vxor.v $vr0, $vr0, $vr4"); 166 asm volatile("vxor.v $vr1, $vr1, $vr5"); 167 asm volatile("vxor.v $vr2, $vr2, $vr6"); 168 asm volatile("vxor.v $vr3, $vr3, $vr7"); 169 asm volatile("vst $vr0, %0" : "=m" (dp[0])); 170 asm volatile("vst $vr1, %0" : "=m" (dp[16])); 171 asm volatile("vst $vr2, %0" : "=m" (dp[32])); 172 asm volatile("vst $vr3, %0" : "=m" (dp[48])); 173 174 bytes -= 64; 175 p += 64; 176 q += 64; 177 dp += 64; 178 dq += 64; 179 } 180 181 kernel_fpu_end(); 182 } 183 184 static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila, 185 void **ptrs) 186 { 187 u8 *p, *q, *dq; 188 const u8 *qmul; /* Q multiplier table */ 189 190 p = (u8 *)ptrs[disks - 2]; 191 q = (u8 *)ptrs[disks - 1]; 192 193 /* 194 * Compute syndrome with zero for the missing data page 195 * Use the dead data page as temporary storage for delta q 196 */ 197 dq = (u8 *)ptrs[faila]; 198 ptrs[faila] = page_address(ZERO_PAGE(0)); 199 ptrs[disks - 1] = dq; 200 201 raid6_gen_syndrome(disks, bytes, ptrs); 202 203 /* Restore pointer table */ 204 ptrs[faila] = dq; 205 ptrs[disks - 1] = q; 206 207 /* Now, pick the proper data tables */ 208 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 209 210 kernel_fpu_begin(); 211 212 /* vr22, vr23: qmul */ 213 asm volatile("vld $vr22, %0" : : "m" (qmul[0])); 214 asm volatile("vld $vr23, %0" : : "m" (qmul[16])); 215 216 while (bytes) { 217 /* vr0 - vr3: P + Dx */ 218 asm volatile("vld $vr0, %0" : : "m" (p[0])); 219 asm volatile("vld $vr1, %0" : : "m" (p[16])); 220 asm volatile("vld $vr2, %0" : : "m" (p[32])); 221 asm volatile("vld $vr3, %0" : : "m" (p[48])); 222 /* vr4 - vr7: Qx */ 223 asm volatile("vld $vr4, %0" : : "m" (dq[0])); 224 asm volatile("vld $vr5, %0" : : "m" (dq[16])); 225 asm volatile("vld $vr6, %0" : : "m" (dq[32])); 226 asm volatile("vld $vr7, %0" : : "m" (dq[48])); 227 /* vr4 - vr7: Q + Qx */ 228 asm volatile("vld $vr8, %0" : : "m" (q[0])); 229 asm volatile("vld $vr9, %0" : : "m" (q[16])); 230 asm volatile("vld $vr10, %0" : : "m" (q[32])); 231 asm volatile("vld $vr11, %0" : : "m" (q[48])); 232 asm volatile("vxor.v $vr4, $vr4, $vr8"); 233 asm volatile("vxor.v $vr5, $vr5, $vr9"); 234 asm volatile("vxor.v $vr6, $vr6, $vr10"); 235 asm volatile("vxor.v $vr7, $vr7, $vr11"); 236 237 /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */ 238 asm volatile("vsrli.b $vr8, $vr4, 4"); 239 asm volatile("vsrli.b $vr9, $vr5, 4"); 240 asm volatile("vsrli.b $vr10, $vr6, 4"); 241 asm volatile("vsrli.b $vr11, $vr7, 4"); 242 /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */ 243 asm volatile("vandi.b $vr4, $vr4, 0x0f"); 244 asm volatile("vandi.b $vr5, $vr5, 0x0f"); 245 asm volatile("vandi.b $vr6, $vr6, 0x0f"); 246 asm volatile("vandi.b $vr7, $vr7, 0x0f"); 247 /* lookup from qmul[0] */ 248 asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4"); 249 asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5"); 250 asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6"); 251 asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7"); 252 /* lookup from qmul[16] */ 253 asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8"); 254 asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9"); 255 asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10"); 256 asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11"); 257 /* vr4 - vr7: qmul(Q + Qx) = Dx */ 258 asm volatile("vxor.v $vr4, $vr4, $vr8"); 259 asm volatile("vxor.v $vr5, $vr5, $vr9"); 260 asm volatile("vxor.v $vr6, $vr6, $vr10"); 261 asm volatile("vxor.v $vr7, $vr7, $vr11"); 262 asm volatile("vst $vr4, %0" : "=m" (dq[0])); 263 asm volatile("vst $vr5, %0" : "=m" (dq[16])); 264 asm volatile("vst $vr6, %0" : "=m" (dq[32])); 265 asm volatile("vst $vr7, %0" : "=m" (dq[48])); 266 267 /* vr0 - vr3: P + Dx + Dx = P */ 268 asm volatile("vxor.v $vr0, $vr0, $vr4"); 269 asm volatile("vxor.v $vr1, $vr1, $vr5"); 270 asm volatile("vxor.v $vr2, $vr2, $vr6"); 271 asm volatile("vxor.v $vr3, $vr3, $vr7"); 272 asm volatile("vst $vr0, %0" : "=m" (p[0])); 273 asm volatile("vst $vr1, %0" : "=m" (p[16])); 274 asm volatile("vst $vr2, %0" : "=m" (p[32])); 275 asm volatile("vst $vr3, %0" : "=m" (p[48])); 276 277 bytes -= 64; 278 p += 64; 279 q += 64; 280 dq += 64; 281 } 282 283 kernel_fpu_end(); 284 } 285 286 const struct raid6_recov_calls raid6_recov_lsx = { 287 .data2 = raid6_2data_recov_lsx, 288 .datap = raid6_datap_recov_lsx, 289 .name = "lsx", 290 }; 291 #endif /* CONFIG_CPU_HAS_LSX */ 292 293 #ifdef CONFIG_CPU_HAS_LASX 294 static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila, 295 int failb, void **ptrs) 296 { 297 u8 *p, *q, *dp, *dq; 298 const u8 *pbmul; /* P multiplier table for B data */ 299 const u8 *qmul; /* Q multiplier table (for both) */ 300 301 p = (u8 *)ptrs[disks - 2]; 302 q = (u8 *)ptrs[disks - 1]; 303 304 /* 305 * Compute syndrome with zero for the missing data pages 306 * Use the dead data pages as temporary storage for 307 * delta p and delta q 308 */ 309 dp = (u8 *)ptrs[faila]; 310 ptrs[faila] = page_address(ZERO_PAGE(0)); 311 ptrs[disks - 2] = dp; 312 dq = (u8 *)ptrs[failb]; 313 ptrs[failb] = page_address(ZERO_PAGE(0)); 314 ptrs[disks - 1] = dq; 315 316 raid6_gen_syndrome(disks, bytes, ptrs); 317 318 /* Restore pointer table */ 319 ptrs[faila] = dp; 320 ptrs[failb] = dq; 321 ptrs[disks - 2] = p; 322 ptrs[disks - 1] = q; 323 324 /* Now, pick the proper data tables */ 325 pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; 326 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; 327 328 kernel_fpu_begin(); 329 330 /* 331 * xr20, xr21: qmul 332 * xr22, xr23: pbmul 333 */ 334 asm volatile("vld $vr20, %0" : : "m" (qmul[0])); 335 asm volatile("vld $vr21, %0" : : "m" (qmul[16])); 336 asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); 337 asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); 338 asm volatile("xvreplve0.q $xr20, $xr20"); 339 asm volatile("xvreplve0.q $xr21, $xr21"); 340 asm volatile("xvreplve0.q $xr22, $xr22"); 341 asm volatile("xvreplve0.q $xr23, $xr23"); 342 343 while (bytes) { 344 /* xr0, xr1: Q */ 345 asm volatile("xvld $xr0, %0" : : "m" (q[0])); 346 asm volatile("xvld $xr1, %0" : : "m" (q[32])); 347 /* xr0, xr1: Q + Qxy */ 348 asm volatile("xvld $xr4, %0" : : "m" (dq[0])); 349 asm volatile("xvld $xr5, %0" : : "m" (dq[32])); 350 asm volatile("xvxor.v $xr0, $xr0, $xr4"); 351 asm volatile("xvxor.v $xr1, $xr1, $xr5"); 352 /* xr2, xr3: P */ 353 asm volatile("xvld $xr2, %0" : : "m" (p[0])); 354 asm volatile("xvld $xr3, %0" : : "m" (p[32])); 355 /* xr2, xr3: P + Pxy */ 356 asm volatile("xvld $xr4, %0" : : "m" (dp[0])); 357 asm volatile("xvld $xr5, %0" : : "m" (dp[32])); 358 asm volatile("xvxor.v $xr2, $xr2, $xr4"); 359 asm volatile("xvxor.v $xr3, $xr3, $xr5"); 360 361 /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */ 362 asm volatile("xvsrli.b $xr4, $xr0, 4"); 363 asm volatile("xvsrli.b $xr5, $xr1, 4"); 364 /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */ 365 asm volatile("xvandi.b $xr0, $xr0, 0x0f"); 366 asm volatile("xvandi.b $xr1, $xr1, 0x0f"); 367 /* lookup from qmul[0] */ 368 asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0"); 369 asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1"); 370 /* lookup from qmul[16] */ 371 asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4"); 372 asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5"); 373 /* xr6, xr7: B(Q + Qxy) */ 374 asm volatile("xvxor.v $xr6, $xr4, $xr0"); 375 asm volatile("xvxor.v $xr7, $xr5, $xr1"); 376 377 /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */ 378 asm volatile("xvsrli.b $xr4, $xr2, 4"); 379 asm volatile("xvsrli.b $xr5, $xr3, 4"); 380 /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */ 381 asm volatile("xvandi.b $xr0, $xr2, 0x0f"); 382 asm volatile("xvandi.b $xr1, $xr3, 0x0f"); 383 /* lookup from pbmul[0] */ 384 asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0"); 385 asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1"); 386 /* lookup from pbmul[16] */ 387 asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4"); 388 asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5"); 389 /* xr0, xr1: A(P + Pxy) */ 390 asm volatile("xvxor.v $xr0, $xr0, $xr4"); 391 asm volatile("xvxor.v $xr1, $xr1, $xr5"); 392 393 /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */ 394 asm volatile("xvxor.v $xr0, $xr0, $xr6"); 395 asm volatile("xvxor.v $xr1, $xr1, $xr7"); 396 397 /* xr2, xr3: P + Pxy + Dx = Dy */ 398 asm volatile("xvxor.v $xr2, $xr2, $xr0"); 399 asm volatile("xvxor.v $xr3, $xr3, $xr1"); 400 401 asm volatile("xvst $xr0, %0" : "=m" (dq[0])); 402 asm volatile("xvst $xr1, %0" : "=m" (dq[32])); 403 asm volatile("xvst $xr2, %0" : "=m" (dp[0])); 404 asm volatile("xvst $xr3, %0" : "=m" (dp[32])); 405 406 bytes -= 64; 407 p += 64; 408 q += 64; 409 dp += 64; 410 dq += 64; 411 } 412 413 kernel_fpu_end(); 414 } 415 416 static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila, 417 void **ptrs) 418 { 419 u8 *p, *q, *dq; 420 const u8 *qmul; /* Q multiplier table */ 421 422 p = (u8 *)ptrs[disks - 2]; 423 q = (u8 *)ptrs[disks - 1]; 424 425 /* 426 * Compute syndrome with zero for the missing data page 427 * Use the dead data page as temporary storage for delta q 428 */ 429 dq = (u8 *)ptrs[faila]; 430 ptrs[faila] = page_address(ZERO_PAGE(0)); 431 ptrs[disks - 1] = dq; 432 433 raid6_gen_syndrome(disks, bytes, ptrs); 434 435 /* Restore pointer table */ 436 ptrs[faila] = dq; 437 ptrs[disks - 1] = q; 438 439 /* Now, pick the proper data tables */ 440 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 441 442 kernel_fpu_begin(); 443 444 /* xr22, xr23: qmul */ 445 asm volatile("vld $vr22, %0" : : "m" (qmul[0])); 446 asm volatile("xvreplve0.q $xr22, $xr22"); 447 asm volatile("vld $vr23, %0" : : "m" (qmul[16])); 448 asm volatile("xvreplve0.q $xr23, $xr23"); 449 450 while (bytes) { 451 /* xr0, xr1: P + Dx */ 452 asm volatile("xvld $xr0, %0" : : "m" (p[0])); 453 asm volatile("xvld $xr1, %0" : : "m" (p[32])); 454 /* xr2, xr3: Qx */ 455 asm volatile("xvld $xr2, %0" : : "m" (dq[0])); 456 asm volatile("xvld $xr3, %0" : : "m" (dq[32])); 457 /* xr2, xr3: Q + Qx */ 458 asm volatile("xvld $xr4, %0" : : "m" (q[0])); 459 asm volatile("xvld $xr5, %0" : : "m" (q[32])); 460 asm volatile("xvxor.v $xr2, $xr2, $xr4"); 461 asm volatile("xvxor.v $xr3, $xr3, $xr5"); 462 463 /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */ 464 asm volatile("xvsrli.b $xr4, $xr2, 4"); 465 asm volatile("xvsrli.b $xr5, $xr3, 4"); 466 /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */ 467 asm volatile("xvandi.b $xr2, $xr2, 0x0f"); 468 asm volatile("xvandi.b $xr3, $xr3, 0x0f"); 469 /* lookup from qmul[0] */ 470 asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2"); 471 asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3"); 472 /* lookup from qmul[16] */ 473 asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4"); 474 asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5"); 475 /* xr2, xr3: qmul(Q + Qx) = Dx */ 476 asm volatile("xvxor.v $xr2, $xr2, $xr4"); 477 asm volatile("xvxor.v $xr3, $xr3, $xr5"); 478 479 /* xr0, xr1: P + Dx + Dx = P */ 480 asm volatile("xvxor.v $xr0, $xr0, $xr2"); 481 asm volatile("xvxor.v $xr1, $xr1, $xr3"); 482 483 asm volatile("xvst $xr2, %0" : "=m" (dq[0])); 484 asm volatile("xvst $xr3, %0" : "=m" (dq[32])); 485 asm volatile("xvst $xr0, %0" : "=m" (p[0])); 486 asm volatile("xvst $xr1, %0" : "=m" (p[32])); 487 488 bytes -= 64; 489 p += 64; 490 q += 64; 491 dq += 64; 492 } 493 494 kernel_fpu_end(); 495 } 496 497 const struct raid6_recov_calls raid6_recov_lasx = { 498 .data2 = raid6_2data_recov_lasx, 499 .datap = raid6_datap_recov_lasx, 500 .name = "lasx", 501 }; 502 #endif /* CONFIG_CPU_HAS_LASX */ 503