1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2012 Intel Corporation 4 * Author: Yuanhan Liu <yuanhan.liu@linux.intel.com> 5 * 6 * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved 7 * 8 * AVX2 implementation of RAID-6 syndrome functions 9 */ 10 11 #include <asm/cpufeature.h> 12 #include <asm/fpu/api.h> 13 #include "algos.h" 14 15 static const struct raid6_avx2_constants { 16 u64 x1d[4]; 17 } raid6_avx2_constants __aligned(32) = { 18 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, 19 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,}, 20 }; 21 22 /* 23 * Plain AVX2 implementation 24 */ 25 static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs) 26 { 27 u8 **dptr = (u8 **)ptrs; 28 u8 *p, *q; 29 int d, z, z0; 30 31 z0 = disks - 3; /* Highest data disk */ 32 p = dptr[z0+1]; /* XOR parity */ 33 q = dptr[z0+2]; /* RS syndrome */ 34 35 kernel_fpu_begin(); 36 37 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); 38 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* Zero temp */ 39 40 for (d = 0; d < bytes; d += 32) { 41 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); 42 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */ 43 asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); 44 asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */ 45 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d])); 46 for (z = z0-2; z >= 0; z--) { 47 asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 48 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5"); 49 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 50 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 51 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 52 asm volatile("vpxor %ymm6,%ymm2,%ymm2"); 53 asm volatile("vpxor %ymm6,%ymm4,%ymm4"); 54 asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d])); 55 } 56 asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5"); 57 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 58 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 59 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 60 asm volatile("vpxor %ymm6,%ymm2,%ymm2"); 61 asm volatile("vpxor %ymm6,%ymm4,%ymm4"); 62 63 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); 64 asm volatile("vpxor %ymm2,%ymm2,%ymm2"); 65 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); 66 asm volatile("vpxor %ymm4,%ymm4,%ymm4"); 67 } 68 69 asm volatile("sfence" : : : "memory"); 70 kernel_fpu_end(); 71 } 72 73 static void raid6_avx21_xor_syndrome(int disks, int start, int stop, 74 size_t bytes, void **ptrs) 75 { 76 u8 **dptr = (u8 **)ptrs; 77 u8 *p, *q; 78 int d, z, z0; 79 80 z0 = stop; /* P/Q right side optimization */ 81 p = dptr[disks-2]; /* XOR parity */ 82 q = dptr[disks-1]; /* RS syndrome */ 83 84 kernel_fpu_begin(); 85 86 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); 87 88 for (d = 0 ; d < bytes ; d += 32) { 89 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d])); 90 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d])); 91 asm volatile("vpxor %ymm4,%ymm2,%ymm2"); 92 /* P/Q data pages */ 93 for (z = z0-1 ; z >= start ; z--) { 94 asm volatile("vpxor %ymm5,%ymm5,%ymm5"); 95 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); 96 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 97 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 98 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 99 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d])); 100 asm volatile("vpxor %ymm5,%ymm2,%ymm2"); 101 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 102 } 103 /* P/Q left side optimization */ 104 for (z = start-1 ; z >= 0 ; z--) { 105 asm volatile("vpxor %ymm5,%ymm5,%ymm5"); 106 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); 107 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 108 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 109 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 110 } 111 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d])); 112 /* Don't use movntdq for r/w memory area < cache line */ 113 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d])); 114 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d])); 115 } 116 117 asm volatile("sfence" : : : "memory"); 118 kernel_fpu_end(); 119 } 120 121 const struct raid6_calls raid6_avx2x1 = { 122 .gen_syndrome = raid6_avx21_gen_syndrome, 123 .xor_syndrome = raid6_avx21_xor_syndrome, 124 .name = "avx2x1", 125 }; 126 127 /* 128 * Unrolled-by-2 AVX2 implementation 129 */ 130 static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs) 131 { 132 u8 **dptr = (u8 **)ptrs; 133 u8 *p, *q; 134 int d, z, z0; 135 136 z0 = disks - 3; /* Highest data disk */ 137 p = dptr[z0+1]; /* XOR parity */ 138 q = dptr[z0+2]; /* RS syndrome */ 139 140 kernel_fpu_begin(); 141 142 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); 143 asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */ 144 145 /* We uniformly assume a single prefetch covers at least 32 bytes */ 146 for (d = 0; d < bytes; d += 64) { 147 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); 148 asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32])); 149 asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */ 150 asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */ 151 asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */ 152 asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */ 153 for (z = z0-1; z >= 0; z--) { 154 asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 155 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32])); 156 asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5"); 157 asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7"); 158 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 159 asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); 160 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 161 asm volatile("vpand %ymm0,%ymm7,%ymm7"); 162 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 163 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 164 asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d])); 165 asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32])); 166 asm volatile("vpxor %ymm5,%ymm2,%ymm2"); 167 asm volatile("vpxor %ymm7,%ymm3,%ymm3"); 168 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 169 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 170 } 171 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); 172 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); 173 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); 174 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); 175 } 176 177 asm volatile("sfence" : : : "memory"); 178 kernel_fpu_end(); 179 } 180 181 static void raid6_avx22_xor_syndrome(int disks, int start, int stop, 182 size_t bytes, void **ptrs) 183 { 184 u8 **dptr = (u8 **)ptrs; 185 u8 *p, *q; 186 int d, z, z0; 187 188 z0 = stop; /* P/Q right side optimization */ 189 p = dptr[disks-2]; /* XOR parity */ 190 q = dptr[disks-1]; /* RS syndrome */ 191 192 kernel_fpu_begin(); 193 194 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); 195 196 for (d = 0 ; d < bytes ; d += 64) { 197 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d])); 198 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32])); 199 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d])); 200 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32])); 201 asm volatile("vpxor %ymm4,%ymm2,%ymm2"); 202 asm volatile("vpxor %ymm6,%ymm3,%ymm3"); 203 /* P/Q data pages */ 204 for (z = z0-1 ; z >= start ; z--) { 205 asm volatile("vpxor %ymm5,%ymm5,%ymm5"); 206 asm volatile("vpxor %ymm7,%ymm7,%ymm7"); 207 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); 208 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7"); 209 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 210 asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); 211 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 212 asm volatile("vpand %ymm0,%ymm7,%ymm7"); 213 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 214 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 215 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d])); 216 asm volatile("vmovdqa %0,%%ymm7" 217 :: "m" (dptr[z][d+32])); 218 asm volatile("vpxor %ymm5,%ymm2,%ymm2"); 219 asm volatile("vpxor %ymm7,%ymm3,%ymm3"); 220 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 221 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 222 } 223 /* P/Q left side optimization */ 224 for (z = start-1 ; z >= 0 ; z--) { 225 asm volatile("vpxor %ymm5,%ymm5,%ymm5"); 226 asm volatile("vpxor %ymm7,%ymm7,%ymm7"); 227 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); 228 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7"); 229 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 230 asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); 231 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 232 asm volatile("vpand %ymm0,%ymm7,%ymm7"); 233 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 234 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 235 } 236 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d])); 237 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32])); 238 /* Don't use movntdq for r/w memory area < cache line */ 239 asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d])); 240 asm volatile("vmovdqa %%ymm6,%0" : "=m" (q[d+32])); 241 asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d])); 242 asm volatile("vmovdqa %%ymm3,%0" : "=m" (p[d+32])); 243 } 244 245 asm volatile("sfence" : : : "memory"); 246 kernel_fpu_end(); 247 } 248 249 const struct raid6_calls raid6_avx2x2 = { 250 .gen_syndrome = raid6_avx22_gen_syndrome, 251 .xor_syndrome = raid6_avx22_xor_syndrome, 252 .name = "avx2x2", 253 }; 254 255 #ifdef CONFIG_X86_64 256 257 /* 258 * Unrolled-by-4 AVX2 implementation 259 */ 260 static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs) 261 { 262 u8 **dptr = (u8 **)ptrs; 263 u8 *p, *q; 264 int d, z, z0; 265 266 z0 = disks - 3; /* Highest data disk */ 267 p = dptr[z0+1]; /* XOR parity */ 268 q = dptr[z0+2]; /* RS syndrome */ 269 270 kernel_fpu_begin(); 271 272 asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0])); 273 asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */ 274 asm volatile("vpxor %ymm2,%ymm2,%ymm2"); /* P[0] */ 275 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* P[1] */ 276 asm volatile("vpxor %ymm4,%ymm4,%ymm4"); /* Q[0] */ 277 asm volatile("vpxor %ymm6,%ymm6,%ymm6"); /* Q[1] */ 278 asm volatile("vpxor %ymm10,%ymm10,%ymm10"); /* P[2] */ 279 asm volatile("vpxor %ymm11,%ymm11,%ymm11"); /* P[3] */ 280 asm volatile("vpxor %ymm12,%ymm12,%ymm12"); /* Q[2] */ 281 asm volatile("vpxor %ymm14,%ymm14,%ymm14"); /* Q[3] */ 282 283 for (d = 0; d < bytes; d += 128) { 284 for (z = z0; z >= 0; z--) { 285 asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); 286 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32])); 287 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64])); 288 asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96])); 289 asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5"); 290 asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7"); 291 asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13"); 292 asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15"); 293 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 294 asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); 295 asm volatile("vpaddb %ymm12,%ymm12,%ymm12"); 296 asm volatile("vpaddb %ymm14,%ymm14,%ymm14"); 297 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 298 asm volatile("vpand %ymm0,%ymm7,%ymm7"); 299 asm volatile("vpand %ymm0,%ymm13,%ymm13"); 300 asm volatile("vpand %ymm0,%ymm15,%ymm15"); 301 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 302 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 303 asm volatile("vpxor %ymm13,%ymm12,%ymm12"); 304 asm volatile("vpxor %ymm15,%ymm14,%ymm14"); 305 asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d])); 306 asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32])); 307 asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64])); 308 asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96])); 309 asm volatile("vpxor %ymm5,%ymm2,%ymm2"); 310 asm volatile("vpxor %ymm7,%ymm3,%ymm3"); 311 asm volatile("vpxor %ymm13,%ymm10,%ymm10"); 312 asm volatile("vpxor %ymm15,%ymm11,%ymm11"); 313 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 314 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 315 asm volatile("vpxor %ymm13,%ymm12,%ymm12"); 316 asm volatile("vpxor %ymm15,%ymm14,%ymm14"); 317 } 318 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); 319 asm volatile("vpxor %ymm2,%ymm2,%ymm2"); 320 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); 321 asm volatile("vpxor %ymm3,%ymm3,%ymm3"); 322 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64])); 323 asm volatile("vpxor %ymm10,%ymm10,%ymm10"); 324 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96])); 325 asm volatile("vpxor %ymm11,%ymm11,%ymm11"); 326 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); 327 asm volatile("vpxor %ymm4,%ymm4,%ymm4"); 328 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); 329 asm volatile("vpxor %ymm6,%ymm6,%ymm6"); 330 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64])); 331 asm volatile("vpxor %ymm12,%ymm12,%ymm12"); 332 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96])); 333 asm volatile("vpxor %ymm14,%ymm14,%ymm14"); 334 } 335 336 asm volatile("sfence" : : : "memory"); 337 kernel_fpu_end(); 338 } 339 340 static void raid6_avx24_xor_syndrome(int disks, int start, int stop, 341 size_t bytes, void **ptrs) 342 { 343 u8 **dptr = (u8 **)ptrs; 344 u8 *p, *q; 345 int d, z, z0; 346 347 z0 = stop; /* P/Q right side optimization */ 348 p = dptr[disks-2]; /* XOR parity */ 349 q = dptr[disks-1]; /* RS syndrome */ 350 351 kernel_fpu_begin(); 352 353 asm volatile("vmovdqa %0,%%ymm0" :: "m" (raid6_avx2_constants.x1d[0])); 354 355 for (d = 0 ; d < bytes ; d += 128) { 356 asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d])); 357 asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32])); 358 asm volatile("vmovdqa %0,%%ymm12" :: "m" (dptr[z0][d+64])); 359 asm volatile("vmovdqa %0,%%ymm14" :: "m" (dptr[z0][d+96])); 360 asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d])); 361 asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32])); 362 asm volatile("vmovdqa %0,%%ymm10" : : "m" (p[d+64])); 363 asm volatile("vmovdqa %0,%%ymm11" : : "m" (p[d+96])); 364 asm volatile("vpxor %ymm4,%ymm2,%ymm2"); 365 asm volatile("vpxor %ymm6,%ymm3,%ymm3"); 366 asm volatile("vpxor %ymm12,%ymm10,%ymm10"); 367 asm volatile("vpxor %ymm14,%ymm11,%ymm11"); 368 /* P/Q data pages */ 369 for (z = z0-1 ; z >= start ; z--) { 370 asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); 371 asm volatile("prefetchnta %0" :: "m" (dptr[z][d+64])); 372 asm volatile("vpxor %ymm5,%ymm5,%ymm5"); 373 asm volatile("vpxor %ymm7,%ymm7,%ymm7"); 374 asm volatile("vpxor %ymm13,%ymm13,%ymm13"); 375 asm volatile("vpxor %ymm15,%ymm15,%ymm15"); 376 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); 377 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7"); 378 asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13"); 379 asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15"); 380 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 381 asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); 382 asm volatile("vpaddb %ymm12,%ymm12,%ymm12"); 383 asm volatile("vpaddb %ymm14,%ymm14,%ymm14"); 384 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 385 asm volatile("vpand %ymm0,%ymm7,%ymm7"); 386 asm volatile("vpand %ymm0,%ymm13,%ymm13"); 387 asm volatile("vpand %ymm0,%ymm15,%ymm15"); 388 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 389 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 390 asm volatile("vpxor %ymm13,%ymm12,%ymm12"); 391 asm volatile("vpxor %ymm15,%ymm14,%ymm14"); 392 asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d])); 393 asm volatile("vmovdqa %0,%%ymm7" 394 :: "m" (dptr[z][d+32])); 395 asm volatile("vmovdqa %0,%%ymm13" 396 :: "m" (dptr[z][d+64])); 397 asm volatile("vmovdqa %0,%%ymm15" 398 :: "m" (dptr[z][d+96])); 399 asm volatile("vpxor %ymm5,%ymm2,%ymm2"); 400 asm volatile("vpxor %ymm7,%ymm3,%ymm3"); 401 asm volatile("vpxor %ymm13,%ymm10,%ymm10"); 402 asm volatile("vpxor %ymm15,%ymm11,%ymm11"); 403 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 404 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 405 asm volatile("vpxor %ymm13,%ymm12,%ymm12"); 406 asm volatile("vpxor %ymm15,%ymm14,%ymm14"); 407 } 408 asm volatile("prefetchnta %0" :: "m" (q[d])); 409 asm volatile("prefetchnta %0" :: "m" (q[d+64])); 410 /* P/Q left side optimization */ 411 for (z = start-1 ; z >= 0 ; z--) { 412 asm volatile("vpxor %ymm5,%ymm5,%ymm5"); 413 asm volatile("vpxor %ymm7,%ymm7,%ymm7"); 414 asm volatile("vpxor %ymm13,%ymm13,%ymm13"); 415 asm volatile("vpxor %ymm15,%ymm15,%ymm15"); 416 asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5"); 417 asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7"); 418 asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13"); 419 asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15"); 420 asm volatile("vpaddb %ymm4,%ymm4,%ymm4"); 421 asm volatile("vpaddb %ymm6,%ymm6,%ymm6"); 422 asm volatile("vpaddb %ymm12,%ymm12,%ymm12"); 423 asm volatile("vpaddb %ymm14,%ymm14,%ymm14"); 424 asm volatile("vpand %ymm0,%ymm5,%ymm5"); 425 asm volatile("vpand %ymm0,%ymm7,%ymm7"); 426 asm volatile("vpand %ymm0,%ymm13,%ymm13"); 427 asm volatile("vpand %ymm0,%ymm15,%ymm15"); 428 asm volatile("vpxor %ymm5,%ymm4,%ymm4"); 429 asm volatile("vpxor %ymm7,%ymm6,%ymm6"); 430 asm volatile("vpxor %ymm13,%ymm12,%ymm12"); 431 asm volatile("vpxor %ymm15,%ymm14,%ymm14"); 432 } 433 asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d])); 434 asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32])); 435 asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64])); 436 asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96])); 437 asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d])); 438 asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32])); 439 asm volatile("vpxor %0,%%ymm12,%%ymm12" : : "m" (q[d+64])); 440 asm volatile("vpxor %0,%%ymm14,%%ymm14" : : "m" (q[d+96])); 441 asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d])); 442 asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32])); 443 asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64])); 444 asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96])); 445 } 446 asm volatile("sfence" : : : "memory"); 447 kernel_fpu_end(); 448 } 449 450 const struct raid6_calls raid6_avx2x4 = { 451 .gen_syndrome = raid6_avx24_gen_syndrome, 452 .xor_syndrome = raid6_avx24_xor_syndrome, 453 .name = "avx2x4", 454 }; 455 #endif /* CONFIG_X86_64 */ 456