1 /* -*- linux-c -*- -------------------------------------------------------- 2 * 3 * Copyright (C) 2016 Intel Corporation 4 * 5 * Author: Gayatri Kammela <gayatri.kammela@intel.com> 6 * Author: Megha Dey <megha.dey@linux.intel.com> 7 * 8 * Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved 9 * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of the GNU General Public License as published by 13 * the Free Software Foundation, Inc., 53 Temple Place Ste 330, 14 * Boston MA 02111-1307, USA; either version 2 of the License, or 15 * (at your option) any later version; incorporated herein by reference. 16 * 17 * ----------------------------------------------------------------------- 18 */ 19 20 /* 21 * AVX512 implementation of RAID-6 syndrome functions 22 * 23 */ 24 25 #ifdef CONFIG_AS_AVX512 26 27 #include <linux/raid/pq.h> 28 #include "x86.h" 29 30 static const struct raid6_avx512_constants { 31 u64 x1d[8]; 32 } raid6_avx512_constants __aligned(512) = { 33 { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, 34 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, 35 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL, 36 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,}, 37 }; 38 39 static int raid6_have_avx512(void) 40 { 41 return boot_cpu_has(X86_FEATURE_AVX2) && 42 boot_cpu_has(X86_FEATURE_AVX) && 43 boot_cpu_has(X86_FEATURE_AVX512F) && 44 boot_cpu_has(X86_FEATURE_AVX512BW) && 45 boot_cpu_has(X86_FEATURE_AVX512VL) && 46 boot_cpu_has(X86_FEATURE_AVX512DQ); 47 } 48 49 static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs) 50 { 51 u8 **dptr = (u8 **)ptrs; 52 u8 *p, *q; 53 int d, z, z0; 54 55 z0 = disks - 3; /* Highest data disk */ 56 p = dptr[z0+1]; /* XOR parity */ 57 q = dptr[z0+2]; /* RS syndrome */ 58 59 kernel_fpu_begin(); 60 61 asm volatile("vmovdqa64 %0,%%zmm0\n\t" 62 "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */ 63 : 64 : "m" (raid6_avx512_constants.x1d[0])); 65 66 for (d = 0; d < bytes; d += 64) { 67 asm volatile("prefetchnta %0\n\t" 68 "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */ 69 "prefetchnta %1\n\t" 70 "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */ 71 "vmovdqa64 %1,%%zmm6" 72 : 73 : "m" (dptr[z0][d]), "m" (dptr[z0-1][d])); 74 for (z = z0-2; z >= 0; z--) { 75 asm volatile("prefetchnta %0\n\t" 76 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" 77 "vpmovm2b %%k1,%%zmm5\n\t" 78 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 79 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 80 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 81 "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t" 82 "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t" 83 "vmovdqa64 %0,%%zmm6" 84 : 85 : "m" (dptr[z][d])); 86 } 87 asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" 88 "vpmovm2b %%k1,%%zmm5\n\t" 89 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 90 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 91 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 92 "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t" 93 "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t" 94 "vmovntdq %%zmm2,%0\n\t" 95 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" 96 "vmovntdq %%zmm4,%1\n\t" 97 "vpxorq %%zmm4,%%zmm4,%%zmm4" 98 : 99 : "m" (p[d]), "m" (q[d])); 100 } 101 102 asm volatile("sfence" : : : "memory"); 103 kernel_fpu_end(); 104 } 105 106 const struct raid6_calls raid6_avx512x1 = { 107 raid6_avx5121_gen_syndrome, 108 NULL, /* XOR not yet implemented */ 109 raid6_have_avx512, 110 "avx512x1", 111 1 /* Has cache hints */ 112 }; 113 114 /* 115 * Unrolled-by-2 AVX512 implementation 116 */ 117 static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs) 118 { 119 u8 **dptr = (u8 **)ptrs; 120 u8 *p, *q; 121 int d, z, z0; 122 123 z0 = disks - 3; /* Highest data disk */ 124 p = dptr[z0+1]; /* XOR parity */ 125 q = dptr[z0+2]; /* RS syndrome */ 126 127 kernel_fpu_begin(); 128 129 asm volatile("vmovdqa64 %0,%%zmm0\n\t" 130 "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */ 131 : 132 : "m" (raid6_avx512_constants.x1d[0])); 133 134 /* We uniformly assume a single prefetch covers at least 64 bytes */ 135 for (d = 0; d < bytes; d += 128) { 136 asm volatile("prefetchnta %0\n\t" 137 "prefetchnta %1\n\t" 138 "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */ 139 "vmovdqa64 %1,%%zmm3\n\t" /* P[1] */ 140 "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */ 141 "vmovdqa64 %%zmm3,%%zmm6" /* Q[1] */ 142 : 143 : "m" (dptr[z0][d]), "m" (dptr[z0][d+64])); 144 for (z = z0-1; z >= 0; z--) { 145 asm volatile("prefetchnta %0\n\t" 146 "prefetchnta %1\n\t" 147 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" 148 "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t" 149 "vpmovm2b %%k1,%%zmm5\n\t" 150 "vpmovm2b %%k2,%%zmm7\n\t" 151 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 152 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" 153 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 154 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" 155 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 156 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 157 "vmovdqa64 %0,%%zmm5\n\t" 158 "vmovdqa64 %1,%%zmm7\n\t" 159 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" 160 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" 161 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 162 "vpxorq %%zmm7,%%zmm6,%%zmm6" 163 : 164 : "m" (dptr[z][d]), "m" (dptr[z][d+64])); 165 } 166 asm volatile("vmovntdq %%zmm2,%0\n\t" 167 "vmovntdq %%zmm3,%1\n\t" 168 "vmovntdq %%zmm4,%2\n\t" 169 "vmovntdq %%zmm6,%3" 170 : 171 : "m" (p[d]), "m" (p[d+64]), "m" (q[d]), 172 "m" (q[d+64])); 173 } 174 175 asm volatile("sfence" : : : "memory"); 176 kernel_fpu_end(); 177 } 178 179 const struct raid6_calls raid6_avx512x2 = { 180 raid6_avx5122_gen_syndrome, 181 NULL, /* XOR not yet implemented */ 182 raid6_have_avx512, 183 "avx512x2", 184 1 /* Has cache hints */ 185 }; 186 187 #ifdef CONFIG_X86_64 188 189 /* 190 * Unrolled-by-4 AVX2 implementation 191 */ 192 static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs) 193 { 194 u8 **dptr = (u8 **)ptrs; 195 u8 *p, *q; 196 int d, z, z0; 197 198 z0 = disks - 3; /* Highest data disk */ 199 p = dptr[z0+1]; /* XOR parity */ 200 q = dptr[z0+2]; /* RS syndrome */ 201 202 kernel_fpu_begin(); 203 204 asm volatile("vmovdqa64 %0,%%zmm0\n\t" 205 "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t" /* Zero temp */ 206 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" /* P[0] */ 207 "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" /* P[1] */ 208 "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" /* Q[0] */ 209 "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" /* Q[1] */ 210 "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" /* P[2] */ 211 "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" /* P[3] */ 212 "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" /* Q[2] */ 213 "vpxorq %%zmm14,%%zmm14,%%zmm14" /* Q[3] */ 214 : 215 : "m" (raid6_avx512_constants.x1d[0])); 216 217 for (d = 0; d < bytes; d += 256) { 218 for (z = z0; z >= 0; z--) { 219 asm volatile("prefetchnta %0\n\t" 220 "prefetchnta %1\n\t" 221 "prefetchnta %2\n\t" 222 "prefetchnta %3\n\t" 223 "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t" 224 "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t" 225 "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t" 226 "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t" 227 "vpmovm2b %%k1,%%zmm5\n\t" 228 "vpmovm2b %%k2,%%zmm7\n\t" 229 "vpmovm2b %%k3,%%zmm13\n\t" 230 "vpmovm2b %%k4,%%zmm15\n\t" 231 "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t" 232 "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t" 233 "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t" 234 "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t" 235 "vpandq %%zmm0,%%zmm5,%%zmm5\n\t" 236 "vpandq %%zmm0,%%zmm7,%%zmm7\n\t" 237 "vpandq %%zmm0,%%zmm13,%%zmm13\n\t" 238 "vpandq %%zmm0,%%zmm15,%%zmm15\n\t" 239 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 240 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 241 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" 242 "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t" 243 "vmovdqa64 %0,%%zmm5\n\t" 244 "vmovdqa64 %1,%%zmm7\n\t" 245 "vmovdqa64 %2,%%zmm13\n\t" 246 "vmovdqa64 %3,%%zmm15\n\t" 247 "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t" 248 "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t" 249 "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t" 250 "vpxorq %%zmm15,%%zmm11,%%zmm11\n" 251 "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t" 252 "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t" 253 "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t" 254 "vpxorq %%zmm15,%%zmm14,%%zmm14" 255 : 256 : "m" (dptr[z][d]), "m" (dptr[z][d+64]), 257 "m" (dptr[z][d+128]), "m" (dptr[z][d+192])); 258 } 259 asm volatile("vmovntdq %%zmm2,%0\n\t" 260 "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" 261 "vmovntdq %%zmm3,%1\n\t" 262 "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" 263 "vmovntdq %%zmm10,%2\n\t" 264 "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" 265 "vmovntdq %%zmm11,%3\n\t" 266 "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" 267 "vmovntdq %%zmm4,%4\n\t" 268 "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" 269 "vmovntdq %%zmm6,%5\n\t" 270 "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" 271 "vmovntdq %%zmm12,%6\n\t" 272 "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" 273 "vmovntdq %%zmm14,%7\n\t" 274 "vpxorq %%zmm14,%%zmm14,%%zmm14" 275 : 276 : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]), 277 "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]), 278 "m" (q[d+128]), "m" (q[d+192])); 279 } 280 281 asm volatile("sfence" : : : "memory"); 282 kernel_fpu_end(); 283 } 284 285 const struct raid6_calls raid6_avx512x4 = { 286 raid6_avx5124_gen_syndrome, 287 NULL, /* XOR not yet implemented */ 288 raid6_have_avx512, 289 "avx512x4", 290 1 /* Has cache hints */ 291 }; 292 #endif 293 294 #endif /* CONFIG_AS_AVX512 */ 295