1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX) 4 * 5 * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 6 * 7 * Originally based on recov_avx2.c and recov_ssse3.c: 8 * 9 * Copyright (C) 2012 Intel Corporation 10 * Author: Jim Kukunas <james.t.kukunas@linux.intel.com> 11 */ 12 13 #include <linux/raid/pq.h> 14 #include "loongarch.h" 15 16 /* 17 * Unlike with the syndrome calculation algorithms, there's no boot-time 18 * selection of recovery algorithms by benchmarking, so we have to specify 19 * the priorities and hope the future cores will all have decent vector 20 * support (i.e. no LASX slower than LSX, or even scalar code). 21 */ 22 23 #ifdef CONFIG_CPU_HAS_LSX 24 static int raid6_has_lsx(void) 25 { 26 return cpu_has_lsx; 27 } 28 29 static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila, 30 int failb, void **ptrs) 31 { 32 u8 *p, *q, *dp, *dq; 33 const u8 *pbmul; /* P multiplier table for B data */ 34 const u8 *qmul; /* Q multiplier table (for both) */ 35 36 p = (u8 *)ptrs[disks - 2]; 37 q = (u8 *)ptrs[disks - 1]; 38 39 /* 40 * Compute syndrome with zero for the missing data pages 41 * Use the dead data pages as temporary storage for 42 * delta p and delta q 43 */ 44 dp = (u8 *)ptrs[faila]; 45 ptrs[faila] = (void *)raid6_empty_zero_page; 46 ptrs[disks - 2] = dp; 47 dq = (u8 *)ptrs[failb]; 48 ptrs[failb] = (void *)raid6_empty_zero_page; 49 ptrs[disks - 1] = dq; 50 51 raid6_call.gen_syndrome(disks, bytes, ptrs); 52 53 /* Restore pointer table */ 54 ptrs[faila] = dp; 55 ptrs[failb] = dq; 56 ptrs[disks - 2] = p; 57 ptrs[disks - 1] = q; 58 59 /* Now, pick the proper data tables */ 60 pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; 61 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; 62 63 kernel_fpu_begin(); 64 65 /* 66 * vr20, vr21: qmul 67 * vr22, vr23: pbmul 68 */ 69 asm volatile("vld $vr20, %0" : : "m" (qmul[0])); 70 asm volatile("vld $vr21, %0" : : "m" (qmul[16])); 71 asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); 72 asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); 73 74 while (bytes) { 75 /* vr4 - vr7: Q */ 76 asm volatile("vld $vr4, %0" : : "m" (q[0])); 77 asm volatile("vld $vr5, %0" : : "m" (q[16])); 78 asm volatile("vld $vr6, %0" : : "m" (q[32])); 79 asm volatile("vld $vr7, %0" : : "m" (q[48])); 80 /* vr4 - vr7: Q + Qxy */ 81 asm volatile("vld $vr8, %0" : : "m" (dq[0])); 82 asm volatile("vld $vr9, %0" : : "m" (dq[16])); 83 asm volatile("vld $vr10, %0" : : "m" (dq[32])); 84 asm volatile("vld $vr11, %0" : : "m" (dq[48])); 85 asm volatile("vxor.v $vr4, $vr4, $vr8"); 86 asm volatile("vxor.v $vr5, $vr5, $vr9"); 87 asm volatile("vxor.v $vr6, $vr6, $vr10"); 88 asm volatile("vxor.v $vr7, $vr7, $vr11"); 89 /* vr0 - vr3: P */ 90 asm volatile("vld $vr0, %0" : : "m" (p[0])); 91 asm volatile("vld $vr1, %0" : : "m" (p[16])); 92 asm volatile("vld $vr2, %0" : : "m" (p[32])); 93 asm volatile("vld $vr3, %0" : : "m" (p[48])); 94 /* vr0 - vr3: P + Pxy */ 95 asm volatile("vld $vr8, %0" : : "m" (dp[0])); 96 asm volatile("vld $vr9, %0" : : "m" (dp[16])); 97 asm volatile("vld $vr10, %0" : : "m" (dp[32])); 98 asm volatile("vld $vr11, %0" : : "m" (dp[48])); 99 asm volatile("vxor.v $vr0, $vr0, $vr8"); 100 asm volatile("vxor.v $vr1, $vr1, $vr9"); 101 asm volatile("vxor.v $vr2, $vr2, $vr10"); 102 asm volatile("vxor.v $vr3, $vr3, $vr11"); 103 104 /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */ 105 asm volatile("vsrli.b $vr8, $vr4, 4"); 106 asm volatile("vsrli.b $vr9, $vr5, 4"); 107 asm volatile("vsrli.b $vr10, $vr6, 4"); 108 asm volatile("vsrli.b $vr11, $vr7, 4"); 109 /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */ 110 asm volatile("vandi.b $vr4, $vr4, 0x0f"); 111 asm volatile("vandi.b $vr5, $vr5, 0x0f"); 112 asm volatile("vandi.b $vr6, $vr6, 0x0f"); 113 asm volatile("vandi.b $vr7, $vr7, 0x0f"); 114 /* lookup from qmul[0] */ 115 asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4"); 116 asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5"); 117 asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6"); 118 asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7"); 119 /* lookup from qmul[16] */ 120 asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8"); 121 asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9"); 122 asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10"); 123 asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11"); 124 /* vr16 - vr19: B(Q + Qxy) */ 125 asm volatile("vxor.v $vr16, $vr8, $vr4"); 126 asm volatile("vxor.v $vr17, $vr9, $vr5"); 127 asm volatile("vxor.v $vr18, $vr10, $vr6"); 128 asm volatile("vxor.v $vr19, $vr11, $vr7"); 129 130 /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */ 131 asm volatile("vsrli.b $vr4, $vr0, 4"); 132 asm volatile("vsrli.b $vr5, $vr1, 4"); 133 asm volatile("vsrli.b $vr6, $vr2, 4"); 134 asm volatile("vsrli.b $vr7, $vr3, 4"); 135 /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */ 136 asm volatile("vandi.b $vr12, $vr0, 0x0f"); 137 asm volatile("vandi.b $vr13, $vr1, 0x0f"); 138 asm volatile("vandi.b $vr14, $vr2, 0x0f"); 139 asm volatile("vandi.b $vr15, $vr3, 0x0f"); 140 /* lookup from pbmul[0] */ 141 asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12"); 142 asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13"); 143 asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14"); 144 asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15"); 145 /* lookup from pbmul[16] */ 146 asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4"); 147 asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5"); 148 asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6"); 149 asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7"); 150 /* vr4 - vr7: A(P + Pxy) */ 151 asm volatile("vxor.v $vr4, $vr4, $vr12"); 152 asm volatile("vxor.v $vr5, $vr5, $vr13"); 153 asm volatile("vxor.v $vr6, $vr6, $vr14"); 154 asm volatile("vxor.v $vr7, $vr7, $vr15"); 155 156 /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */ 157 asm volatile("vxor.v $vr4, $vr4, $vr16"); 158 asm volatile("vxor.v $vr5, $vr5, $vr17"); 159 asm volatile("vxor.v $vr6, $vr6, $vr18"); 160 asm volatile("vxor.v $vr7, $vr7, $vr19"); 161 asm volatile("vst $vr4, %0" : "=m" (dq[0])); 162 asm volatile("vst $vr5, %0" : "=m" (dq[16])); 163 asm volatile("vst $vr6, %0" : "=m" (dq[32])); 164 asm volatile("vst $vr7, %0" : "=m" (dq[48])); 165 166 /* vr0 - vr3: P + Pxy + Dx = Dy */ 167 asm volatile("vxor.v $vr0, $vr0, $vr4"); 168 asm volatile("vxor.v $vr1, $vr1, $vr5"); 169 asm volatile("vxor.v $vr2, $vr2, $vr6"); 170 asm volatile("vxor.v $vr3, $vr3, $vr7"); 171 asm volatile("vst $vr0, %0" : "=m" (dp[0])); 172 asm volatile("vst $vr1, %0" : "=m" (dp[16])); 173 asm volatile("vst $vr2, %0" : "=m" (dp[32])); 174 asm volatile("vst $vr3, %0" : "=m" (dp[48])); 175 176 bytes -= 64; 177 p += 64; 178 q += 64; 179 dp += 64; 180 dq += 64; 181 } 182 183 kernel_fpu_end(); 184 } 185 186 static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila, 187 void **ptrs) 188 { 189 u8 *p, *q, *dq; 190 const u8 *qmul; /* Q multiplier table */ 191 192 p = (u8 *)ptrs[disks - 2]; 193 q = (u8 *)ptrs[disks - 1]; 194 195 /* 196 * Compute syndrome with zero for the missing data page 197 * Use the dead data page as temporary storage for delta q 198 */ 199 dq = (u8 *)ptrs[faila]; 200 ptrs[faila] = (void *)raid6_empty_zero_page; 201 ptrs[disks - 1] = dq; 202 203 raid6_call.gen_syndrome(disks, bytes, ptrs); 204 205 /* Restore pointer table */ 206 ptrs[faila] = dq; 207 ptrs[disks - 1] = q; 208 209 /* Now, pick the proper data tables */ 210 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 211 212 kernel_fpu_begin(); 213 214 /* vr22, vr23: qmul */ 215 asm volatile("vld $vr22, %0" : : "m" (qmul[0])); 216 asm volatile("vld $vr23, %0" : : "m" (qmul[16])); 217 218 while (bytes) { 219 /* vr0 - vr3: P + Dx */ 220 asm volatile("vld $vr0, %0" : : "m" (p[0])); 221 asm volatile("vld $vr1, %0" : : "m" (p[16])); 222 asm volatile("vld $vr2, %0" : : "m" (p[32])); 223 asm volatile("vld $vr3, %0" : : "m" (p[48])); 224 /* vr4 - vr7: Qx */ 225 asm volatile("vld $vr4, %0" : : "m" (dq[0])); 226 asm volatile("vld $vr5, %0" : : "m" (dq[16])); 227 asm volatile("vld $vr6, %0" : : "m" (dq[32])); 228 asm volatile("vld $vr7, %0" : : "m" (dq[48])); 229 /* vr4 - vr7: Q + Qx */ 230 asm volatile("vld $vr8, %0" : : "m" (q[0])); 231 asm volatile("vld $vr9, %0" : : "m" (q[16])); 232 asm volatile("vld $vr10, %0" : : "m" (q[32])); 233 asm volatile("vld $vr11, %0" : : "m" (q[48])); 234 asm volatile("vxor.v $vr4, $vr4, $vr8"); 235 asm volatile("vxor.v $vr5, $vr5, $vr9"); 236 asm volatile("vxor.v $vr6, $vr6, $vr10"); 237 asm volatile("vxor.v $vr7, $vr7, $vr11"); 238 239 /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */ 240 asm volatile("vsrli.b $vr8, $vr4, 4"); 241 asm volatile("vsrli.b $vr9, $vr5, 4"); 242 asm volatile("vsrli.b $vr10, $vr6, 4"); 243 asm volatile("vsrli.b $vr11, $vr7, 4"); 244 /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */ 245 asm volatile("vandi.b $vr4, $vr4, 0x0f"); 246 asm volatile("vandi.b $vr5, $vr5, 0x0f"); 247 asm volatile("vandi.b $vr6, $vr6, 0x0f"); 248 asm volatile("vandi.b $vr7, $vr7, 0x0f"); 249 /* lookup from qmul[0] */ 250 asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4"); 251 asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5"); 252 asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6"); 253 asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7"); 254 /* lookup from qmul[16] */ 255 asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8"); 256 asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9"); 257 asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10"); 258 asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11"); 259 /* vr4 - vr7: qmul(Q + Qx) = Dx */ 260 asm volatile("vxor.v $vr4, $vr4, $vr8"); 261 asm volatile("vxor.v $vr5, $vr5, $vr9"); 262 asm volatile("vxor.v $vr6, $vr6, $vr10"); 263 asm volatile("vxor.v $vr7, $vr7, $vr11"); 264 asm volatile("vst $vr4, %0" : "=m" (dq[0])); 265 asm volatile("vst $vr5, %0" : "=m" (dq[16])); 266 asm volatile("vst $vr6, %0" : "=m" (dq[32])); 267 asm volatile("vst $vr7, %0" : "=m" (dq[48])); 268 269 /* vr0 - vr3: P + Dx + Dx = P */ 270 asm volatile("vxor.v $vr0, $vr0, $vr4"); 271 asm volatile("vxor.v $vr1, $vr1, $vr5"); 272 asm volatile("vxor.v $vr2, $vr2, $vr6"); 273 asm volatile("vxor.v $vr3, $vr3, $vr7"); 274 asm volatile("vst $vr0, %0" : "=m" (p[0])); 275 asm volatile("vst $vr1, %0" : "=m" (p[16])); 276 asm volatile("vst $vr2, %0" : "=m" (p[32])); 277 asm volatile("vst $vr3, %0" : "=m" (p[48])); 278 279 bytes -= 64; 280 p += 64; 281 q += 64; 282 dq += 64; 283 } 284 285 kernel_fpu_end(); 286 } 287 288 const struct raid6_recov_calls raid6_recov_lsx = { 289 .data2 = raid6_2data_recov_lsx, 290 .datap = raid6_datap_recov_lsx, 291 .valid = raid6_has_lsx, 292 .name = "lsx", 293 .priority = 1, 294 }; 295 #endif /* CONFIG_CPU_HAS_LSX */ 296 297 #ifdef CONFIG_CPU_HAS_LASX 298 static int raid6_has_lasx(void) 299 { 300 return cpu_has_lasx; 301 } 302 303 static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila, 304 int failb, void **ptrs) 305 { 306 u8 *p, *q, *dp, *dq; 307 const u8 *pbmul; /* P multiplier table for B data */ 308 const u8 *qmul; /* Q multiplier table (for both) */ 309 310 p = (u8 *)ptrs[disks - 2]; 311 q = (u8 *)ptrs[disks - 1]; 312 313 /* 314 * Compute syndrome with zero for the missing data pages 315 * Use the dead data pages as temporary storage for 316 * delta p and delta q 317 */ 318 dp = (u8 *)ptrs[faila]; 319 ptrs[faila] = (void *)raid6_empty_zero_page; 320 ptrs[disks - 2] = dp; 321 dq = (u8 *)ptrs[failb]; 322 ptrs[failb] = (void *)raid6_empty_zero_page; 323 ptrs[disks - 1] = dq; 324 325 raid6_call.gen_syndrome(disks, bytes, ptrs); 326 327 /* Restore pointer table */ 328 ptrs[faila] = dp; 329 ptrs[failb] = dq; 330 ptrs[disks - 2] = p; 331 ptrs[disks - 1] = q; 332 333 /* Now, pick the proper data tables */ 334 pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]]; 335 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; 336 337 kernel_fpu_begin(); 338 339 /* 340 * xr20, xr21: qmul 341 * xr22, xr23: pbmul 342 */ 343 asm volatile("vld $vr20, %0" : : "m" (qmul[0])); 344 asm volatile("vld $vr21, %0" : : "m" (qmul[16])); 345 asm volatile("vld $vr22, %0" : : "m" (pbmul[0])); 346 asm volatile("vld $vr23, %0" : : "m" (pbmul[16])); 347 asm volatile("xvreplve0.q $xr20, $xr20"); 348 asm volatile("xvreplve0.q $xr21, $xr21"); 349 asm volatile("xvreplve0.q $xr22, $xr22"); 350 asm volatile("xvreplve0.q $xr23, $xr23"); 351 352 while (bytes) { 353 /* xr0, xr1: Q */ 354 asm volatile("xvld $xr0, %0" : : "m" (q[0])); 355 asm volatile("xvld $xr1, %0" : : "m" (q[32])); 356 /* xr0, xr1: Q + Qxy */ 357 asm volatile("xvld $xr4, %0" : : "m" (dq[0])); 358 asm volatile("xvld $xr5, %0" : : "m" (dq[32])); 359 asm volatile("xvxor.v $xr0, $xr0, $xr4"); 360 asm volatile("xvxor.v $xr1, $xr1, $xr5"); 361 /* xr2, xr3: P */ 362 asm volatile("xvld $xr2, %0" : : "m" (p[0])); 363 asm volatile("xvld $xr3, %0" : : "m" (p[32])); 364 /* xr2, xr3: P + Pxy */ 365 asm volatile("xvld $xr4, %0" : : "m" (dp[0])); 366 asm volatile("xvld $xr5, %0" : : "m" (dp[32])); 367 asm volatile("xvxor.v $xr2, $xr2, $xr4"); 368 asm volatile("xvxor.v $xr3, $xr3, $xr5"); 369 370 /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */ 371 asm volatile("xvsrli.b $xr4, $xr0, 4"); 372 asm volatile("xvsrli.b $xr5, $xr1, 4"); 373 /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */ 374 asm volatile("xvandi.b $xr0, $xr0, 0x0f"); 375 asm volatile("xvandi.b $xr1, $xr1, 0x0f"); 376 /* lookup from qmul[0] */ 377 asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0"); 378 asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1"); 379 /* lookup from qmul[16] */ 380 asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4"); 381 asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5"); 382 /* xr6, xr7: B(Q + Qxy) */ 383 asm volatile("xvxor.v $xr6, $xr4, $xr0"); 384 asm volatile("xvxor.v $xr7, $xr5, $xr1"); 385 386 /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */ 387 asm volatile("xvsrli.b $xr4, $xr2, 4"); 388 asm volatile("xvsrli.b $xr5, $xr3, 4"); 389 /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */ 390 asm volatile("xvandi.b $xr0, $xr2, 0x0f"); 391 asm volatile("xvandi.b $xr1, $xr3, 0x0f"); 392 /* lookup from pbmul[0] */ 393 asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0"); 394 asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1"); 395 /* lookup from pbmul[16] */ 396 asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4"); 397 asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5"); 398 /* xr0, xr1: A(P + Pxy) */ 399 asm volatile("xvxor.v $xr0, $xr0, $xr4"); 400 asm volatile("xvxor.v $xr1, $xr1, $xr5"); 401 402 /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */ 403 asm volatile("xvxor.v $xr0, $xr0, $xr6"); 404 asm volatile("xvxor.v $xr1, $xr1, $xr7"); 405 406 /* xr2, xr3: P + Pxy + Dx = Dy */ 407 asm volatile("xvxor.v $xr2, $xr2, $xr0"); 408 asm volatile("xvxor.v $xr3, $xr3, $xr1"); 409 410 asm volatile("xvst $xr0, %0" : "=m" (dq[0])); 411 asm volatile("xvst $xr1, %0" : "=m" (dq[32])); 412 asm volatile("xvst $xr2, %0" : "=m" (dp[0])); 413 asm volatile("xvst $xr3, %0" : "=m" (dp[32])); 414 415 bytes -= 64; 416 p += 64; 417 q += 64; 418 dp += 64; 419 dq += 64; 420 } 421 422 kernel_fpu_end(); 423 } 424 425 static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila, 426 void **ptrs) 427 { 428 u8 *p, *q, *dq; 429 const u8 *qmul; /* Q multiplier table */ 430 431 p = (u8 *)ptrs[disks - 2]; 432 q = (u8 *)ptrs[disks - 1]; 433 434 /* 435 * Compute syndrome with zero for the missing data page 436 * Use the dead data page as temporary storage for delta q 437 */ 438 dq = (u8 *)ptrs[faila]; 439 ptrs[faila] = (void *)raid6_empty_zero_page; 440 ptrs[disks - 1] = dq; 441 442 raid6_call.gen_syndrome(disks, bytes, ptrs); 443 444 /* Restore pointer table */ 445 ptrs[faila] = dq; 446 ptrs[disks - 1] = q; 447 448 /* Now, pick the proper data tables */ 449 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 450 451 kernel_fpu_begin(); 452 453 /* xr22, xr23: qmul */ 454 asm volatile("vld $vr22, %0" : : "m" (qmul[0])); 455 asm volatile("xvreplve0.q $xr22, $xr22"); 456 asm volatile("vld $vr23, %0" : : "m" (qmul[16])); 457 asm volatile("xvreplve0.q $xr23, $xr23"); 458 459 while (bytes) { 460 /* xr0, xr1: P + Dx */ 461 asm volatile("xvld $xr0, %0" : : "m" (p[0])); 462 asm volatile("xvld $xr1, %0" : : "m" (p[32])); 463 /* xr2, xr3: Qx */ 464 asm volatile("xvld $xr2, %0" : : "m" (dq[0])); 465 asm volatile("xvld $xr3, %0" : : "m" (dq[32])); 466 /* xr2, xr3: Q + Qx */ 467 asm volatile("xvld $xr4, %0" : : "m" (q[0])); 468 asm volatile("xvld $xr5, %0" : : "m" (q[32])); 469 asm volatile("xvxor.v $xr2, $xr2, $xr4"); 470 asm volatile("xvxor.v $xr3, $xr3, $xr5"); 471 472 /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */ 473 asm volatile("xvsrli.b $xr4, $xr2, 4"); 474 asm volatile("xvsrli.b $xr5, $xr3, 4"); 475 /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */ 476 asm volatile("xvandi.b $xr2, $xr2, 0x0f"); 477 asm volatile("xvandi.b $xr3, $xr3, 0x0f"); 478 /* lookup from qmul[0] */ 479 asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2"); 480 asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3"); 481 /* lookup from qmul[16] */ 482 asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4"); 483 asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5"); 484 /* xr2, xr3: qmul(Q + Qx) = Dx */ 485 asm volatile("xvxor.v $xr2, $xr2, $xr4"); 486 asm volatile("xvxor.v $xr3, $xr3, $xr5"); 487 488 /* xr0, xr1: P + Dx + Dx = P */ 489 asm volatile("xvxor.v $xr0, $xr0, $xr2"); 490 asm volatile("xvxor.v $xr1, $xr1, $xr3"); 491 492 asm volatile("xvst $xr2, %0" : "=m" (dq[0])); 493 asm volatile("xvst $xr3, %0" : "=m" (dq[32])); 494 asm volatile("xvst $xr0, %0" : "=m" (p[0])); 495 asm volatile("xvst $xr1, %0" : "=m" (p[32])); 496 497 bytes -= 64; 498 p += 64; 499 q += 64; 500 dq += 64; 501 } 502 503 kernel_fpu_end(); 504 } 505 506 const struct raid6_recov_calls raid6_recov_lasx = { 507 .data2 = raid6_2data_recov_lasx, 508 .datap = raid6_datap_recov_lasx, 509 .valid = raid6_has_lasx, 510 .name = "lasx", 511 .priority = 2, 512 }; 513 #endif /* CONFIG_CPU_HAS_LASX */ 514