1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* -*- linux-c -*- ------------------------------------------------------- * 3 * 4 * Copyright (C) 2012 Intel Corporation 5 * Author: Yuanhan Liu <yuanhan.liu@linux.intel.com> 6 * 7 * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved 8 * 9 * ----------------------------------------------------------------------- */ 10 11 /* 12 * AVX2 implementation of RAID-6 syndrome functions 13 * 14 */ 15 16 #include <asm/cpufeature.h> 17 #include <asm/fpu/api.h> 18 #include "algos.h" 19 20 static const struct raid6_avx2_constants { 21 u64 x1d[4]; 22 } raid6_avx2_constants __aligned(32) = { 23 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, 24 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,}, 25 }; 26 27 /* 28 * Plain AVX2 implementation 29 */ 30 static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs) 31 { 32 u8 **dptr = (u8 **)ptrs; 33 u8 *p, *q; 34 int d, z, z0; 35 36 z0 = disks - 3; /* Highest data disk */ 37 p = dptr[z0+1]; /* XOR parity */ 38 q = dptr[z0+2]; /* RS syndrome */ 39 40 kernel_fpu_begin(); 41 42 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); 43 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* Zero temp */ 44 45 for (d = 0; d < bytes; d += 32) { 46 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); 47 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */ 48 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); 49 asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */ 50 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d])); 51 for (z = z0-2; z >= 0; z--) { 52 asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 53 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5"); 54 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 55 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 56 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 57 asm volatile("vpxor %ymm6,%ymm2,%ymm2"); 58 asm volatile("vpxor %ymm6,%ymm4,%ymm4"); 59 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d])); 60 } 61 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5"); 62 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 63 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 64 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 65 asm volatile("vpxor %ymm6,%ymm2,%ymm2"); 66 asm volatile("vpxor %ymm6,%ymm4,%ymm4"); 67 68 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); 69 asm volatile("vpxor %ymm2,%ymm2,%ymm2"); 70 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); 71 asm volatile("vpxor %ymm4,%ymm4,%ymm4"); 72 } 73 74 asm volatile("sfence" : : : "memory"); 75 kernel_fpu_end(); 76 } 77 78 static void raid6_avx21_xor_syndrome(int disks, int start, int stop, 79 size_t bytes, void **ptrs) 80 { 81 u8 **dptr = (u8 **)ptrs; 82 u8 *p, *q; 83 int d, z, z0; 84 85 z0 = stop; /* P/Q right side optimization */ 86 p = dptr[disks-2]; /* XOR parity */ 87 q = dptr[disks-1]; /* RS syndrome */ 88 89 kernel_fpu_begin(); 90 91 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); 92 93 for (d = 0 ; d < bytes ; d += 32) { 94 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d])); 95 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d])); 96 asm volatile("vpxor %ymm4,%ymm2,%ymm2"); 97 /* P/Q data pages */ 98 for (z = z0-1 ; z >= start ; z--) { 99 asm volatile("vpxor %ymm5,%ymm5,%ymm5"); 100 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); 101 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 102 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 103 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 104 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d])); 105 asm volatile("vpxor %ymm5,%ymm2,%ymm2"); 106 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 107 } 108 /* P/Q left side optimization */ 109 for (z = start-1 ; z >= 0 ; z--) { 110 asm volatile("vpxor %ymm5,%ymm5,%ymm5"); 111 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); 112 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 113 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 114 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 115 } 116 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d])); 117 /* Don't use movntdq for r/w memory area < cache line */ 118 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d])); 119 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d])); 120 } 121 122 asm volatile("sfence" : : : "memory"); 123 kernel_fpu_end(); 124 } 125 126 const struct raid6_calls raid6_avx2x1 = { 127 .gen_syndrome = raid6_avx21_gen_syndrome, 128 .xor_syndrome = raid6_avx21_xor_syndrome, 129 .name = "avx2x1", 130 }; 131 132 /* 133 * Unrolled-by-2 AVX2 implementation 134 */ 135 static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs) 136 { 137 u8 **dptr = (u8 **)ptrs; 138 u8 *p, *q; 139 int d, z, z0; 140 141 z0 = disks - 3; /* Highest data disk */ 142 p = dptr[z0+1]; /* XOR parity */ 143 q = dptr[z0+2]; /* RS syndrome */ 144 145 kernel_fpu_begin(); 146 147 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); 148 asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */ 149 150 /* We uniformly assume a single prefetch covers at least 32 bytes */ 151 for (d = 0; d < bytes; d += 64) { 152 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); 153 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32])); 154 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */ 155 asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */ 156 asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */ 157 asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */ 158 for (z = z0-1; z >= 0; z--) { 159 asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 160 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32])); 161 asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5"); 162 asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7"); 163 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 164 asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); 165 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 166 asm volatile("vpand %ymm0,%ymm7,%ymm7"); 167 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 168 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 169 asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d])); 170 asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32])); 171 asm volatile("vpxor %ymm5,%ymm2,%ymm2"); 172 asm volatile("vpxor %ymm7,%ymm3,%ymm3"); 173 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 174 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 175 } 176 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); 177 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); 178 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); 179 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); 180 } 181 182 asm volatile("sfence" : : : "memory"); 183 kernel_fpu_end(); 184 } 185 186 static void raid6_avx22_xor_syndrome(int disks, int start, int stop, 187 size_t bytes, void **ptrs) 188 { 189 u8 **dptr = (u8 **)ptrs; 190 u8 *p, *q; 191 int d, z, z0; 192 193 z0 = stop; /* P/Q right side optimization */ 194 p = dptr[disks-2]; /* XOR parity */ 195 q = dptr[disks-1]; /* RS syndrome */ 196 197 kernel_fpu_begin(); 198 199 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); 200 201 for (d = 0 ; d < bytes ; d += 64) { 202 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d])); 203 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32])); 204 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d])); 205 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32])); 206 asm volatile("vpxor %ymm4,%ymm2,%ymm2"); 207 asm volatile("vpxor %ymm6,%ymm3,%ymm3"); 208 /* P/Q data pages */ 209 for (z = z0-1 ; z >= start ; z--) { 210 asm volatile("vpxor %ymm5,%ymm5,%ymm5"); 211 asm volatile("vpxor %ymm7,%ymm7,%ymm7"); 212 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); 213 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7"); 214 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 215 asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); 216 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 217 asm volatile("vpand %ymm0,%ymm7,%ymm7"); 218 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 219 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 220 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d])); 221 asm volatile("vmovdqa %0,%%ymm7" 222 :: "m" (dptr[z][d+32])); 223 asm volatile("vpxor %ymm5,%ymm2,%ymm2"); 224 asm volatile("vpxor %ymm7,%ymm3,%ymm3"); 225 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 226 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 227 } 228 /* P/Q left side optimization */ 229 for (z = start-1 ; z >= 0 ; z--) { 230 asm volatile("vpxor %ymm5,%ymm5,%ymm5"); 231 asm volatile("vpxor %ymm7,%ymm7,%ymm7"); 232 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); 233 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7"); 234 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 235 asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); 236 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 237 asm volatile("vpand %ymm0,%ymm7,%ymm7"); 238 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 239 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 240 } 241 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d])); 242 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32])); 243 /* Don't use movntdq for r/w memory area < cache line */ 244 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d])); 245 asm volatile("vmovdqa %%ymm6,%0" : "=m" (q[d+32])); 246 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d])); 247 asm volatile("vmovdqa %%ymm3,%0" : "=m" (p[d+32])); 248 } 249 250 asm volatile("sfence" : : : "memory"); 251 kernel_fpu_end(); 252 } 253 254 const struct raid6_calls raid6_avx2x2 = { 255 .gen_syndrome = raid6_avx22_gen_syndrome, 256 .xor_syndrome = raid6_avx22_xor_syndrome, 257 .name = "avx2x2", 258 }; 259 260 #ifdef CONFIG_X86_64 261 262 /* 263 * Unrolled-by-4 AVX2 implementation 264 */ 265 static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs) 266 { 267 u8 **dptr = (u8 **)ptrs; 268 u8 *p, *q; 269 int d, z, z0; 270 271 z0 = disks - 3; /* Highest data disk */ 272 p = dptr[z0+1]; /* XOR parity */ 273 q = dptr[z0+2]; /* RS syndrome */ 274 275 kernel_fpu_begin(); 276 277 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); 278 asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */ 279 asm volatile("vpxor %ymm2,%ymm2,%ymm2"); /* P[0] */ 280 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* P[1] */ 281 asm volatile("vpxor %ymm4,%ymm4,%ymm4"); /* Q[0] */ 282 asm volatile("vpxor %ymm6,%ymm6,%ymm6"); /* Q[1] */ 283 asm volatile("vpxor %ymm10,%ymm10,%ymm10"); /* P[2] */ 284 asm volatile("vpxor %ymm11,%ymm11,%ymm11"); /* P[3] */ 285 asm volatile("vpxor %ymm12,%ymm12,%ymm12"); /* Q[2] */ 286 asm volatile("vpxor %ymm14,%ymm14,%ymm14"); /* Q[3] */ 287 288 for (d = 0; d < bytes; d += 128) { 289 for (z = z0; z >= 0; z--) { 290 asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 291 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32])); 292 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64])); 293 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96])); 294 asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5"); 295 asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7"); 296 asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13"); 297 asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15"); 298 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 299 asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); 300 asm volatile("vpaddb %ymm12,%ymm12,%ymm12"); 301 asm volatile("vpaddb %ymm14,%ymm14,%ymm14"); 302 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 303 asm volatile("vpand %ymm0,%ymm7,%ymm7"); 304 asm volatile("vpand %ymm0,%ymm13,%ymm13"); 305 asm volatile("vpand %ymm0,%ymm15,%ymm15"); 306 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 307 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 308 asm volatile("vpxor %ymm13,%ymm12,%ymm12"); 309 asm volatile("vpxor %ymm15,%ymm14,%ymm14"); 310 asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d])); 311 asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32])); 312 asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64])); 313 asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96])); 314 asm volatile("vpxor %ymm5,%ymm2,%ymm2"); 315 asm volatile("vpxor %ymm7,%ymm3,%ymm3"); 316 asm volatile("vpxor %ymm13,%ymm10,%ymm10"); 317 asm volatile("vpxor %ymm15,%ymm11,%ymm11"); 318 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 319 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 320 asm volatile("vpxor %ymm13,%ymm12,%ymm12"); 321 asm volatile("vpxor %ymm15,%ymm14,%ymm14"); 322 } 323 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); 324 asm volatile("vpxor %ymm2,%ymm2,%ymm2"); 325 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); 326 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); 327 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64])); 328 asm volatile("vpxor %ymm10,%ymm10,%ymm10"); 329 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96])); 330 asm volatile("vpxor %ymm11,%ymm11,%ymm11"); 331 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); 332 asm volatile("vpxor %ymm4,%ymm4,%ymm4"); 333 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); 334 asm volatile("vpxor %ymm6,%ymm6,%ymm6"); 335 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64])); 336 asm volatile("vpxor %ymm12,%ymm12,%ymm12"); 337 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96])); 338 asm volatile("vpxor %ymm14,%ymm14,%ymm14"); 339 } 340 341 asm volatile("sfence" : : : "memory"); 342 kernel_fpu_end(); 343 } 344 345 static void raid6_avx24_xor_syndrome(int disks, int start, int stop, 346 size_t bytes, void **ptrs) 347 { 348 u8 **dptr = (u8 **)ptrs; 349 u8 *p, *q; 350 int d, z, z0; 351 352 z0 = stop; /* P/Q right side optimization */ 353 p = dptr[disks-2]; /* XOR parity */ 354 q = dptr[disks-1]; /* RS syndrome */ 355 356 kernel_fpu_begin(); 357 358 asm volatile("vmovdqa %0,%%ymm0" :: "m" (raid6_avx2_constants.x1d[0])); 359 360 for (d = 0 ; d < bytes ; d += 128) { 361 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d])); 362 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32])); 363 asm volatile("vmovdqa %0,%%ymm12" :: "m" (dptr[z0][d+64])); 364 asm volatile("vmovdqa %0,%%ymm14" :: "m" (dptr[z0][d+96])); 365 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d])); 366 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32])); 367 asm volatile("vmovdqa %0,%%ymm10" : : "m" (p[d+64])); 368 asm volatile("vmovdqa %0,%%ymm11" : : "m" (p[d+96])); 369 asm volatile("vpxor %ymm4,%ymm2,%ymm2"); 370 asm volatile("vpxor %ymm6,%ymm3,%ymm3"); 371 asm volatile("vpxor %ymm12,%ymm10,%ymm10"); 372 asm volatile("vpxor %ymm14,%ymm11,%ymm11"); 373 /* P/Q data pages */ 374 for (z = z0-1 ; z >= start ; z--) { 375 asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); 376 asm volatile("prefetchnta %0" :: "m" (dptr[z][d+64])); 377 asm volatile("vpxor %ymm5,%ymm5,%ymm5"); 378 asm volatile("vpxor %ymm7,%ymm7,%ymm7"); 379 asm volatile("vpxor %ymm13,%ymm13,%ymm13"); 380 asm volatile("vpxor %ymm15,%ymm15,%ymm15"); 381 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); 382 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7"); 383 asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13"); 384 asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15"); 385 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 386 asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); 387 asm volatile("vpaddb %ymm12,%ymm12,%ymm12"); 388 asm volatile("vpaddb %ymm14,%ymm14,%ymm14"); 389 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 390 asm volatile("vpand %ymm0,%ymm7,%ymm7"); 391 asm volatile("vpand %ymm0,%ymm13,%ymm13"); 392 asm volatile("vpand %ymm0,%ymm15,%ymm15"); 393 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 394 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 395 asm volatile("vpxor %ymm13,%ymm12,%ymm12"); 396 asm volatile("vpxor %ymm15,%ymm14,%ymm14"); 397 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d])); 398 asm volatile("vmovdqa %0,%%ymm7" 399 :: "m" (dptr[z][d+32])); 400 asm volatile("vmovdqa %0,%%ymm13" 401 :: "m" (dptr[z][d+64])); 402 asm volatile("vmovdqa %0,%%ymm15" 403 :: "m" (dptr[z][d+96])); 404 asm volatile("vpxor %ymm5,%ymm2,%ymm2"); 405 asm volatile("vpxor %ymm7,%ymm3,%ymm3"); 406 asm volatile("vpxor %ymm13,%ymm10,%ymm10"); 407 asm volatile("vpxor %ymm15,%ymm11,%ymm11"); 408 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 409 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 410 asm volatile("vpxor %ymm13,%ymm12,%ymm12"); 411 asm volatile("vpxor %ymm15,%ymm14,%ymm14"); 412 } 413 asm volatile("prefetchnta %0" :: "m" (q[d])); 414 asm volatile("prefetchnta %0" :: "m" (q[d+64])); 415 /* P/Q left side optimization */ 416 for (z = start-1 ; z >= 0 ; z--) { 417 asm volatile("vpxor %ymm5,%ymm5,%ymm5"); 418 asm volatile("vpxor %ymm7,%ymm7,%ymm7"); 419 asm volatile("vpxor %ymm13,%ymm13,%ymm13"); 420 asm volatile("vpxor %ymm15,%ymm15,%ymm15"); 421 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); 422 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7"); 423 asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13"); 424 asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15"); 425 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 426 asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); 427 asm volatile("vpaddb %ymm12,%ymm12,%ymm12"); 428 asm volatile("vpaddb %ymm14,%ymm14,%ymm14"); 429 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 430 asm volatile("vpand %ymm0,%ymm7,%ymm7"); 431 asm volatile("vpand %ymm0,%ymm13,%ymm13"); 432 asm volatile("vpand %ymm0,%ymm15,%ymm15"); 433 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 434 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 435 asm volatile("vpxor %ymm13,%ymm12,%ymm12"); 436 asm volatile("vpxor %ymm15,%ymm14,%ymm14"); 437 } 438 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); 439 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); 440 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64])); 441 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96])); 442 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d])); 443 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32])); 444 asm volatile("vpxor %0,%%ymm12,%%ymm12" : : "m" (q[d+64])); 445 asm volatile("vpxor %0,%%ymm14,%%ymm14" : : "m" (q[d+96])); 446 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); 447 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); 448 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64])); 449 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96])); 450 } 451 asm volatile("sfence" : : : "memory"); 452 kernel_fpu_end(); 453 } 454 455 const struct raid6_calls raid6_avx2x4 = { 456 .gen_syndrome = raid6_avx24_gen_syndrome, 457 .xor_syndrome = raid6_avx24_xor_syndrome, 458 .name = "avx2x4", 459 }; 460 #endif /* CONFIG_X86_64 */ 461