1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2016 Intel Corporation 4 * 5 * Author: Gayatri Kammela <gayatri.kammela@intel.com> 6 * Author: Megha Dey <megha.dey@linux.intel.com> 7 * 8 * Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved 9 * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved 10 * 11 * AVX512 implementation of RAID-6 syndrome functions 12 */ 13 14 #include <asm/cpufeature.h> 15 #include <asm/fpu/api.h> 16 #include "algos.h" 17 18 static const struct raid6_avx512_constants { 19 u64 x1d[8]; 20 } raid6_avx512_constants __aligned(512/8) = { 21 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, 22 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, 23 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, 24 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,}, 25 }; 26 27 static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs) 28 { 29 u8 **dptr = (u8 **)ptrs; 30 u8 *p, *q; 31 int d, z, z0; 32 33 z0 = disks - 3; /* Highest data disk */ 34 p = dptr[z0+1]; /* XOR parity */ 35 q = dptr[z0+2]; /* RS syndrome */ 36 37 kernel_fpu_begin(); 38 39 asm volatile("vmovdqa64 %0,%%zmm0\n\t" 40 "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */ 41 : 42 : "m" (raid6_avx512_constants.x1d[0])); 43 44 for (d = 0; d < bytes; d += 64) { 45 asm volatile("prefetchnta %0\n\t" 46 "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */ 47 "prefetchnta %1\n\t" 48 "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */ 49 "vmovdqa64 %1,%%zmm6" 50 : 51 : "m" (dptr[z0][d]), "m" (dptr[z0-1][d])); 52 for (z = z0-2; z >= 0; z--) { 53 asm volatile("prefetchnta %0\n\t" 54 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" 55 "vpmovm2b %%k1,%%zmm5\n\t" 56 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 57 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 58 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 59 "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t" 60 "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t" 61 "vmovdqa64 %0,%%zmm6" 62 : 63 : "m" (dptr[z][d])); 64 } 65 asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" 66 "vpmovm2b %%k1,%%zmm5\n\t" 67 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 68 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 69 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 70 "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t" 71 "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t" 72 "vmovntdq %%zmm2,%0\n\t" 73 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" 74 "vmovntdq %%zmm4,%1\n\t" 75 "vpxorq %%zmm4,%%zmm4,%%zmm4" 76 : 77 : "m" (p[d]), "m" (q[d])); 78 } 79 80 asm volatile("sfence" : : : "memory"); 81 kernel_fpu_end(); 82 } 83 84 static void raid6_avx5121_xor_syndrome(int disks, int start, int stop, 85 size_t bytes, void **ptrs) 86 { 87 u8 **dptr = (u8 **)ptrs; 88 u8 *p, *q; 89 int d, z, z0; 90 91 z0 = stop; /* P/Q right side optimization */ 92 p = dptr[disks-2]; /* XOR parity */ 93 q = dptr[disks-1]; /* RS syndrome */ 94 95 kernel_fpu_begin(); 96 97 asm volatile("vmovdqa64 %0,%%zmm0" 98 : : "m" (raid6_avx512_constants.x1d[0])); 99 100 for (d = 0 ; d < bytes ; d += 64) { 101 asm volatile("vmovdqa64 %0,%%zmm4\n\t" 102 "vmovdqa64 %1,%%zmm2\n\t" 103 "vpxorq %%zmm4,%%zmm2,%%zmm2" 104 : 105 : "m" (dptr[z0][d]), "m" (p[d])); 106 /* P/Q data pages */ 107 for (z = z0-1 ; z >= start ; z--) { 108 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" 109 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" 110 "vpmovm2b %%k1,%%zmm5\n\t" 111 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 112 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 113 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 114 "vmovdqa64 %0,%%zmm5\n\t" 115 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" 116 "vpxorq %%zmm5,%%zmm4,%%zmm4" 117 : 118 : "m" (dptr[z][d])); 119 } 120 /* P/Q left side optimization */ 121 for (z = start-1 ; z >= 0 ; z--) { 122 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" 123 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" 124 "vpmovm2b %%k1,%%zmm5\n\t" 125 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 126 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 127 "vpxorq %%zmm5,%%zmm4,%%zmm4" 128 : 129 : ); 130 } 131 asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t" 132 /* Don't use movntdq for r/w memory area < cache line */ 133 "vmovdqa64 %%zmm4,%0\n\t" 134 "vmovdqa64 %%zmm2,%1" 135 : 136 : "m" (q[d]), "m" (p[d])); 137 } 138 139 asm volatile("sfence" : : : "memory"); 140 kernel_fpu_end(); 141 } 142 143 const struct raid6_calls raid6_avx512x1 = { 144 .gen_syndrome = raid6_avx5121_gen_syndrome, 145 .xor_syndrome = raid6_avx5121_xor_syndrome, 146 .name = "avx512x1", 147 }; 148 149 /* 150 * Unrolled-by-2 AVX512 implementation 151 */ 152 static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs) 153 { 154 u8 **dptr = (u8 **)ptrs; 155 u8 *p, *q; 156 int d, z, z0; 157 158 z0 = disks - 3; /* Highest data disk */ 159 p = dptr[z0+1]; /* XOR parity */ 160 q = dptr[z0+2]; /* RS syndrome */ 161 162 kernel_fpu_begin(); 163 164 asm volatile("vmovdqa64 %0,%%zmm0\n\t" 165 "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */ 166 : 167 : "m" (raid6_avx512_constants.x1d[0])); 168 169 /* We uniformly assume a single prefetch covers at least 64 bytes */ 170 for (d = 0; d < bytes; d += 128) { 171 asm volatile("prefetchnta %0\n\t" 172 "prefetchnta %1\n\t" 173 "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */ 174 "vmovdqa64 %1,%%zmm3\n\t" /* P[1] */ 175 "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */ 176 "vmovdqa64 %%zmm3,%%zmm6" /* Q[1] */ 177 : 178 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64])); 179 for (z = z0-1; z >= 0; z--) { 180 asm volatile("prefetchnta %0\n\t" 181 "prefetchnta %1\n\t" 182 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" 183 "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t" 184 "vpmovm2b %%k1,%%zmm5\n\t" 185 "vpmovm2b %%k2,%%zmm7\n\t" 186 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 187 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" 188 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 189 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" 190 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 191 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 192 "vmovdqa64 %0,%%zmm5\n\t" 193 "vmovdqa64 %1,%%zmm7\n\t" 194 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" 195 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" 196 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 197 "vpxorq %%zmm7,%%zmm6,%%zmm6" 198 : 199 : "m" (dptr[z][d]), "m" (dptr[z][d+64])); 200 } 201 asm volatile("vmovntdq %%zmm2,%0\n\t" 202 "vmovntdq %%zmm3,%1\n\t" 203 "vmovntdq %%zmm4,%2\n\t" 204 "vmovntdq %%zmm6,%3" 205 : 206 : "m" (p[d]), "m" (p[d+64]), "m" (q[d]), 207 "m" (q[d+64])); 208 } 209 210 asm volatile("sfence" : : : "memory"); 211 kernel_fpu_end(); 212 } 213 214 static void raid6_avx5122_xor_syndrome(int disks, int start, int stop, 215 size_t bytes, void **ptrs) 216 { 217 u8 **dptr = (u8 **)ptrs; 218 u8 *p, *q; 219 int d, z, z0; 220 221 z0 = stop; /* P/Q right side optimization */ 222 p = dptr[disks-2]; /* XOR parity */ 223 q = dptr[disks-1]; /* RS syndrome */ 224 225 kernel_fpu_begin(); 226 227 asm volatile("vmovdqa64 %0,%%zmm0" 228 : : "m" (raid6_avx512_constants.x1d[0])); 229 230 for (d = 0 ; d < bytes ; d += 128) { 231 asm volatile("vmovdqa64 %0,%%zmm4\n\t" 232 "vmovdqa64 %1,%%zmm6\n\t" 233 "vmovdqa64 %2,%%zmm2\n\t" 234 "vmovdqa64 %3,%%zmm3\n\t" 235 "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t" 236 "vpxorq %%zmm6,%%zmm3,%%zmm3" 237 : 238 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]), 239 "m" (p[d]), "m" (p[d+64])); 240 /* P/Q data pages */ 241 for (z = z0-1 ; z >= start ; z--) { 242 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" 243 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" 244 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" 245 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" 246 "vpmovm2b %%k1,%%zmm5\n\t" 247 "vpmovm2b %%k2,%%zmm7\n\t" 248 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 249 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" 250 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 251 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" 252 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 253 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 254 "vmovdqa64 %0,%%zmm5\n\t" 255 "vmovdqa64 %1,%%zmm7\n\t" 256 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" 257 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" 258 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 259 "vpxorq %%zmm7,%%zmm6,%%zmm6" 260 : 261 : "m" (dptr[z][d]), "m" (dptr[z][d+64])); 262 } 263 /* P/Q left side optimization */ 264 for (z = start-1 ; z >= 0 ; z--) { 265 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" 266 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" 267 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" 268 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" 269 "vpmovm2b %%k1,%%zmm5\n\t" 270 "vpmovm2b %%k2,%%zmm7\n\t" 271 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 272 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" 273 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 274 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" 275 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 276 "vpxorq %%zmm7,%%zmm6,%%zmm6" 277 : 278 : ); 279 } 280 asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t" 281 "vpxorq %1,%%zmm6,%%zmm6\n\t" 282 /* Don't use movntdq for r/w 283 * memory area < cache line 284 */ 285 "vmovdqa64 %%zmm4,%0\n\t" 286 "vmovdqa64 %%zmm6,%1\n\t" 287 "vmovdqa64 %%zmm2,%2\n\t" 288 "vmovdqa64 %%zmm3,%3" 289 : 290 : "m" (q[d]), "m" (q[d+64]), "m" (p[d]), 291 "m" (p[d+64])); 292 } 293 294 asm volatile("sfence" : : : "memory"); 295 kernel_fpu_end(); 296 } 297 298 const struct raid6_calls raid6_avx512x2 = { 299 .gen_syndrome = raid6_avx5122_gen_syndrome, 300 .xor_syndrome = raid6_avx5122_xor_syndrome, 301 .name = "avx512x2", 302 }; 303 304 #ifdef CONFIG_X86_64 305 306 /* 307 * Unrolled-by-4 AVX2 implementation 308 */ 309 static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs) 310 { 311 u8 **dptr = (u8 **)ptrs; 312 u8 *p, *q; 313 int d, z, z0; 314 315 z0 = disks - 3; /* Highest data disk */ 316 p = dptr[z0+1]; /* XOR parity */ 317 q = dptr[z0+2]; /* RS syndrome */ 318 319 kernel_fpu_begin(); 320 321 asm volatile("vmovdqa64 %0,%%zmm0\n\t" 322 "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t" /* Zero temp */ 323 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" /* P[0] */ 324 "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" /* P[1] */ 325 "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" /* Q[0] */ 326 "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" /* Q[1] */ 327 "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" /* P[2] */ 328 "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" /* P[3] */ 329 "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" /* Q[2] */ 330 "vpxorq %%zmm14,%%zmm14,%%zmm14" /* Q[3] */ 331 : 332 : "m" (raid6_avx512_constants.x1d[0])); 333 334 for (d = 0; d < bytes; d += 256) { 335 for (z = z0; z >= 0; z--) { 336 asm volatile("prefetchnta %0\n\t" 337 "prefetchnta %1\n\t" 338 "prefetchnta %2\n\t" 339 "prefetchnta %3\n\t" 340 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" 341 "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t" 342 "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t" 343 "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t" 344 "vpmovm2b %%k1,%%zmm5\n\t" 345 "vpmovm2b %%k2,%%zmm7\n\t" 346 "vpmovm2b %%k3,%%zmm13\n\t" 347 "vpmovm2b %%k4,%%zmm15\n\t" 348 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 349 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" 350 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t" 351 "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t" 352 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 353 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" 354 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t" 355 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t" 356 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 357 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 358 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" 359 "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t" 360 "vmovdqa64 %0,%%zmm5\n\t" 361 "vmovdqa64 %1,%%zmm7\n\t" 362 "vmovdqa64 %2,%%zmm13\n\t" 363 "vmovdqa64 %3,%%zmm15\n\t" 364 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" 365 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" 366 "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t" 367 "vpxorq %%zmm15,%%zmm11,%%zmm11\n" 368 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 369 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 370 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" 371 "vpxorq %%zmm15,%%zmm14,%%zmm14" 372 : 373 : "m" (dptr[z][d]), "m" (dptr[z][d+64]), 374 "m" (dptr[z][d+128]), "m" (dptr[z][d+192])); 375 } 376 asm volatile("vmovntdq %%zmm2,%0\n\t" 377 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" 378 "vmovntdq %%zmm3,%1\n\t" 379 "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" 380 "vmovntdq %%zmm10,%2\n\t" 381 "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" 382 "vmovntdq %%zmm11,%3\n\t" 383 "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" 384 "vmovntdq %%zmm4,%4\n\t" 385 "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" 386 "vmovntdq %%zmm6,%5\n\t" 387 "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" 388 "vmovntdq %%zmm12,%6\n\t" 389 "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" 390 "vmovntdq %%zmm14,%7\n\t" 391 "vpxorq %%zmm14,%%zmm14,%%zmm14" 392 : 393 : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]), 394 "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]), 395 "m" (q[d+128]), "m" (q[d+192])); 396 } 397 398 asm volatile("sfence" : : : "memory"); 399 kernel_fpu_end(); 400 } 401 402 static void raid6_avx5124_xor_syndrome(int disks, int start, int stop, 403 size_t bytes, void **ptrs) 404 { 405 u8 **dptr = (u8 **)ptrs; 406 u8 *p, *q; 407 int d, z, z0; 408 409 z0 = stop; /* P/Q right side optimization */ 410 p = dptr[disks-2]; /* XOR parity */ 411 q = dptr[disks-1]; /* RS syndrome */ 412 413 kernel_fpu_begin(); 414 415 asm volatile("vmovdqa64 %0,%%zmm0" 416 :: "m" (raid6_avx512_constants.x1d[0])); 417 418 for (d = 0 ; d < bytes ; d += 256) { 419 asm volatile("vmovdqa64 %0,%%zmm4\n\t" 420 "vmovdqa64 %1,%%zmm6\n\t" 421 "vmovdqa64 %2,%%zmm12\n\t" 422 "vmovdqa64 %3,%%zmm14\n\t" 423 "vmovdqa64 %4,%%zmm2\n\t" 424 "vmovdqa64 %5,%%zmm3\n\t" 425 "vmovdqa64 %6,%%zmm10\n\t" 426 "vmovdqa64 %7,%%zmm11\n\t" 427 "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t" 428 "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t" 429 "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t" 430 "vpxorq %%zmm14,%%zmm11,%%zmm11" 431 : 432 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]), 433 "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]), 434 "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]), 435 "m" (p[d+192])); 436 /* P/Q data pages */ 437 for (z = z0-1 ; z >= start ; z--) { 438 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" 439 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" 440 "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t" 441 "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t" 442 "prefetchnta %0\n\t" 443 "prefetchnta %2\n\t" 444 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" 445 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" 446 "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t" 447 "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t" 448 "vpmovm2b %%k1,%%zmm5\n\t" 449 "vpmovm2b %%k2,%%zmm7\n\t" 450 "vpmovm2b %%k3,%%zmm13\n\t" 451 "vpmovm2b %%k4,%%zmm15\n\t" 452 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 453 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" 454 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t" 455 "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t" 456 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 457 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" 458 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t" 459 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t" 460 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 461 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 462 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" 463 "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t" 464 "vmovdqa64 %0,%%zmm5\n\t" 465 "vmovdqa64 %1,%%zmm7\n\t" 466 "vmovdqa64 %2,%%zmm13\n\t" 467 "vmovdqa64 %3,%%zmm15\n\t" 468 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" 469 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" 470 "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t" 471 "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t" 472 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 473 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 474 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" 475 "vpxorq %%zmm15,%%zmm14,%%zmm14" 476 : 477 : "m" (dptr[z][d]), "m" (dptr[z][d+64]), 478 "m" (dptr[z][d+128]), 479 "m" (dptr[z][d+192])); 480 } 481 asm volatile("prefetchnta %0\n\t" 482 "prefetchnta %1\n\t" 483 : 484 : "m" (q[d]), "m" (q[d+128])); 485 /* P/Q left side optimization */ 486 for (z = start-1 ; z >= 0 ; z--) { 487 asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t" 488 "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t" 489 "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t" 490 "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t" 491 "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t" 492 "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t" 493 "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t" 494 "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t" 495 "vpmovm2b %%k1,%%zmm5\n\t" 496 "vpmovm2b %%k2,%%zmm7\n\t" 497 "vpmovm2b %%k3,%%zmm13\n\t" 498 "vpmovm2b %%k4,%%zmm15\n\t" 499 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 500 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" 501 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t" 502 "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t" 503 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 504 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" 505 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t" 506 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t" 507 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 508 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 509 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" 510 "vpxorq %%zmm15,%%zmm14,%%zmm14" 511 : 512 : ); 513 } 514 asm volatile("vmovntdq %%zmm2,%0\n\t" 515 "vmovntdq %%zmm3,%1\n\t" 516 "vmovntdq %%zmm10,%2\n\t" 517 "vmovntdq %%zmm11,%3\n\t" 518 "vpxorq %4,%%zmm4,%%zmm4\n\t" 519 "vpxorq %5,%%zmm6,%%zmm6\n\t" 520 "vpxorq %6,%%zmm12,%%zmm12\n\t" 521 "vpxorq %7,%%zmm14,%%zmm14\n\t" 522 "vmovntdq %%zmm4,%4\n\t" 523 "vmovntdq %%zmm6,%5\n\t" 524 "vmovntdq %%zmm12,%6\n\t" 525 "vmovntdq %%zmm14,%7" 526 : 527 : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]), 528 "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]), 529 "m" (q[d+128]), "m" (q[d+192])); 530 } 531 asm volatile("sfence" : : : "memory"); 532 kernel_fpu_end(); 533 } 534 const struct raid6_calls raid6_avx512x4 = { 535 .gen_syndrome = raid6_avx5124_gen_syndrome, 536 .xor_syndrome = raid6_avx5124_xor_syndrome, 537 .name = "avx512x4", 538 }; 539 #endif 540