1 /* 2 * Copyright (C) 2016 Intel Corporation 3 * 4 * Author: Gayatri Kammela <gayatri.kammela@intel.com> 5 * Author: Megha Dey <megha.dey@linux.intel.com> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; version 2 10 * of the License. 11 * 12 */ 13 14 #ifdef CONFIG_AS_AVX512 15 16 #include <linux/raid/pq.h> 17 #include "x86.h" 18 19 static int raid6_has_avx512(void) 20 { 21 return boot_cpu_has(X86_FEATURE_AVX2) && 22 boot_cpu_has(X86_FEATURE_AVX) && 23 boot_cpu_has(X86_FEATURE_AVX512F) && 24 boot_cpu_has(X86_FEATURE_AVX512BW) && 25 boot_cpu_has(X86_FEATURE_AVX512VL) && 26 boot_cpu_has(X86_FEATURE_AVX512DQ); 27 } 28 29 static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila, 30 int failb, void **ptrs) 31 { 32 u8 *p, *q, *dp, *dq; 33 const u8 *pbmul; /* P multiplier table for B data */ 34 const u8 *qmul; /* Q multiplier table (for both) */ 35 const u8 x0f = 0x0f; 36 37 p = (u8 *)ptrs[disks-2]; 38 q = (u8 *)ptrs[disks-1]; 39 40 /* 41 * Compute syndrome with zero for the missing data pages 42 * Use the dead data pages as temporary storage for 43 * delta p and delta q 44 */ 45 46 dp = (u8 *)ptrs[faila]; 47 ptrs[faila] = (void *)raid6_empty_zero_page; 48 ptrs[disks-2] = dp; 49 dq = (u8 *)ptrs[failb]; 50 ptrs[failb] = (void *)raid6_empty_zero_page; 51 ptrs[disks-1] = dq; 52 53 raid6_call.gen_syndrome(disks, bytes, ptrs); 54 55 /* Restore pointer table */ 56 ptrs[faila] = dp; 57 ptrs[failb] = dq; 58 ptrs[disks-2] = p; 59 ptrs[disks-1] = q; 60 61 /* Now, pick the proper data tables */ 62 pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; 63 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ 64 raid6_gfexp[failb]]]; 65 66 kernel_fpu_begin(); 67 68 /* zmm0 = x0f[16] */ 69 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); 70 71 while (bytes) { 72 #ifdef CONFIG_X86_64 73 asm volatile("vmovdqa64 %0, %%zmm1\n\t" 74 "vmovdqa64 %1, %%zmm9\n\t" 75 "vmovdqa64 %2, %%zmm0\n\t" 76 "vmovdqa64 %3, %%zmm8\n\t" 77 "vpxorq %4, %%zmm1, %%zmm1\n\t" 78 "vpxorq %5, %%zmm9, %%zmm9\n\t" 79 "vpxorq %6, %%zmm0, %%zmm0\n\t" 80 "vpxorq %7, %%zmm8, %%zmm8" 81 : 82 : "m" (q[0]), "m" (q[64]), "m" (p[0]), 83 "m" (p[64]), "m" (dq[0]), "m" (dq[64]), 84 "m" (dp[0]), "m" (dp[64])); 85 86 /* 87 * 1 = dq[0] ^ q[0] 88 * 9 = dq[64] ^ q[64] 89 * 0 = dp[0] ^ p[0] 90 * 8 = dp[64] ^ p[64] 91 */ 92 93 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 94 "vbroadcasti64x2 %1, %%zmm5" 95 : 96 : "m" (qmul[0]), "m" (qmul[16])); 97 98 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" 99 "vpsraw $4, %%zmm9, %%zmm12\n\t" 100 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" 101 "vpandq %%zmm7, %%zmm9, %%zmm9\n\t" 102 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 103 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" 104 "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t" 105 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" 106 "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t" 107 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" 108 "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t" 109 "vpxorq %%zmm4, %%zmm5, %%zmm5" 110 : 111 : ); 112 113 /* 114 * 5 = qx[0] 115 * 15 = qx[64] 116 */ 117 118 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 119 "vbroadcasti64x2 %1, %%zmm1\n\t" 120 "vpsraw $4, %%zmm0, %%zmm2\n\t" 121 "vpsraw $4, %%zmm8, %%zmm6\n\t" 122 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" 123 "vpandq %%zmm7, %%zmm8, %%zmm14\n\t" 124 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" 125 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 126 "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t" 127 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" 128 "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t" 129 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" 130 "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t" 131 "vpxorq %%zmm12, %%zmm13, %%zmm13" 132 : 133 : "m" (pbmul[0]), "m" (pbmul[16])); 134 135 /* 136 * 1 = pbmul[px[0]] 137 * 13 = pbmul[px[64]] 138 */ 139 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" 140 "vpxorq %%zmm15, %%zmm13, %%zmm13" 141 : 142 : ); 143 144 /* 145 * 1 = db = DQ 146 * 13 = db[64] = DQ[64] 147 */ 148 asm volatile("vmovdqa64 %%zmm1, %0\n\t" 149 "vmovdqa64 %%zmm13,%1\n\t" 150 "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" 151 "vpxorq %%zmm13, %%zmm8, %%zmm8" 152 : 153 : "m" (dq[0]), "m" (dq[64])); 154 155 asm volatile("vmovdqa64 %%zmm0, %0\n\t" 156 "vmovdqa64 %%zmm8, %1" 157 : 158 : "m" (dp[0]), "m" (dp[64])); 159 160 bytes -= 128; 161 p += 128; 162 q += 128; 163 dp += 128; 164 dq += 128; 165 #else 166 asm volatile("vmovdqa64 %0, %%zmm1\n\t" 167 "vmovdqa64 %1, %%zmm0\n\t" 168 "vpxorq %2, %%zmm1, %%zmm1\n\t" 169 "vpxorq %3, %%zmm0, %%zmm0" 170 : 171 : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp)); 172 173 /* 1 = dq ^ q; 0 = dp ^ p */ 174 175 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 176 "vbroadcasti64x2 %1, %%zmm5" 177 : 178 : "m" (qmul[0]), "m" (qmul[16])); 179 180 /* 181 * 1 = dq ^ q 182 * 3 = dq ^ p >> 4 183 */ 184 asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t" 185 "vpandq %%zmm7, %%zmm1, %%zmm1\n\t" 186 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 187 "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t" 188 "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t" 189 "vpxorq %%zmm4, %%zmm5, %%zmm5" 190 : 191 : ); 192 193 /* 5 = qx */ 194 195 asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t" 196 "vbroadcasti64x2 %1, %%zmm1" 197 : 198 : "m" (pbmul[0]), "m" (pbmul[16])); 199 200 asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t" 201 "vpandq %%zmm7, %%zmm0, %%zmm3\n\t" 202 "vpandq %%zmm7, %%zmm2, %%zmm2\n\t" 203 "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t" 204 "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t" 205 "vpxorq %%zmm4, %%zmm1, %%zmm1" 206 : 207 : ); 208 209 /* 1 = pbmul[px] */ 210 asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t" 211 /* 1 = db = DQ */ 212 "vmovdqa64 %%zmm1, %0\n\t" 213 : 214 : "m" (dq[0])); 215 216 asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t" 217 "vmovdqa64 %%zmm0, %0" 218 : 219 : "m" (dp[0])); 220 221 bytes -= 64; 222 p += 64; 223 q += 64; 224 dp += 64; 225 dq += 64; 226 #endif 227 } 228 229 kernel_fpu_end(); 230 } 231 232 static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila, 233 void **ptrs) 234 { 235 u8 *p, *q, *dq; 236 const u8 *qmul; /* Q multiplier table */ 237 const u8 x0f = 0x0f; 238 239 p = (u8 *)ptrs[disks-2]; 240 q = (u8 *)ptrs[disks-1]; 241 242 /* 243 * Compute syndrome with zero for the missing data page 244 * Use the dead data page as temporary storage for delta q 245 */ 246 247 dq = (u8 *)ptrs[faila]; 248 ptrs[faila] = (void *)raid6_empty_zero_page; 249 ptrs[disks-1] = dq; 250 251 raid6_call.gen_syndrome(disks, bytes, ptrs); 252 253 /* Restore pointer table */ 254 ptrs[faila] = dq; 255 ptrs[disks-1] = q; 256 257 /* Now, pick the proper data tables */ 258 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 259 260 kernel_fpu_begin(); 261 262 asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f)); 263 264 while (bytes) { 265 #ifdef CONFIG_X86_64 266 asm volatile("vmovdqa64 %0, %%zmm3\n\t" 267 "vmovdqa64 %1, %%zmm8\n\t" 268 "vpxorq %2, %%zmm3, %%zmm3\n\t" 269 "vpxorq %3, %%zmm8, %%zmm8" 270 : 271 : "m" (dq[0]), "m" (dq[64]), "m" (q[0]), 272 "m" (q[64])); 273 274 /* 275 * 3 = q[0] ^ dq[0] 276 * 8 = q[64] ^ dq[64] 277 */ 278 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" 279 "vmovapd %%zmm0, %%zmm13\n\t" 280 "vbroadcasti64x2 %1, %%zmm1\n\t" 281 "vmovapd %%zmm1, %%zmm14" 282 : 283 : "m" (qmul[0]), "m" (qmul[16])); 284 285 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" 286 "vpsraw $4, %%zmm8, %%zmm12\n\t" 287 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 288 "vpandq %%zmm7, %%zmm8, %%zmm8\n\t" 289 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 290 "vpandq %%zmm7, %%zmm12, %%zmm12\n\t" 291 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" 292 "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t" 293 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" 294 "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t" 295 "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t" 296 "vpxorq %%zmm13, %%zmm14, %%zmm14" 297 : 298 : ); 299 300 /* 301 * 1 = qmul[q[0] ^ dq[0]] 302 * 14 = qmul[q[64] ^ dq[64]] 303 */ 304 asm volatile("vmovdqa64 %0, %%zmm2\n\t" 305 "vmovdqa64 %1, %%zmm12\n\t" 306 "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t" 307 "vpxorq %%zmm14, %%zmm12, %%zmm12" 308 : 309 : "m" (p[0]), "m" (p[64])); 310 311 /* 312 * 2 = p[0] ^ qmul[q[0] ^ dq[0]] 313 * 12 = p[64] ^ qmul[q[64] ^ dq[64]] 314 */ 315 316 asm volatile("vmovdqa64 %%zmm1, %0\n\t" 317 "vmovdqa64 %%zmm14, %1\n\t" 318 "vmovdqa64 %%zmm2, %2\n\t" 319 "vmovdqa64 %%zmm12,%3" 320 : 321 : "m" (dq[0]), "m" (dq[64]), "m" (p[0]), 322 "m" (p[64])); 323 324 bytes -= 128; 325 p += 128; 326 q += 128; 327 dq += 128; 328 #else 329 asm volatile("vmovdqa64 %0, %%zmm3\n\t" 330 "vpxorq %1, %%zmm3, %%zmm3" 331 : 332 : "m" (dq[0]), "m" (q[0])); 333 334 /* 3 = q ^ dq */ 335 336 asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t" 337 "vbroadcasti64x2 %1, %%zmm1" 338 : 339 : "m" (qmul[0]), "m" (qmul[16])); 340 341 asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t" 342 "vpandq %%zmm7, %%zmm3, %%zmm3\n\t" 343 "vpandq %%zmm7, %%zmm6, %%zmm6\n\t" 344 "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t" 345 "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t" 346 "vpxorq %%zmm0, %%zmm1, %%zmm1" 347 : 348 : ); 349 350 /* 1 = qmul[q ^ dq] */ 351 352 asm volatile("vmovdqa64 %0, %%zmm2\n\t" 353 "vpxorq %%zmm1, %%zmm2, %%zmm2" 354 : 355 : "m" (p[0])); 356 357 /* 2 = p ^ qmul[q ^ dq] */ 358 359 asm volatile("vmovdqa64 %%zmm1, %0\n\t" 360 "vmovdqa64 %%zmm2, %1" 361 : 362 : "m" (dq[0]), "m" (p[0])); 363 364 bytes -= 64; 365 p += 64; 366 q += 64; 367 dq += 64; 368 #endif 369 } 370 371 kernel_fpu_end(); 372 } 373 374 const struct raid6_recov_calls raid6_recov_avx512 = { 375 .data2 = raid6_2data_recov_avx512, 376 .datap = raid6_datap_recov_avx512, 377 .valid = raid6_has_avx512, 378 #ifdef CONFIG_X86_64 379 .name = "avx512x2", 380 #else 381 .name = "avx512x1", 382 #endif 383 .priority = 3, 384 }; 385 386 #else 387 #warning "your version of binutils lacks AVX512 support" 388 #endif 389