1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2012 Intel Corporation 4 */ 5 6 #include <linux/mm.h> 7 #include <linux/raid/pq.h> 8 #include <asm/fpu/api.h> 9 #include "algos.h" 10 11 static void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, 12 int failb, void **ptrs) 13 { 14 u8 *p, *q, *dp, *dq; 15 const u8 *pbmul; /* P multiplier table for B data */ 16 const u8 *qmul; /* Q multiplier table (for both) */ 17 static const u8 __aligned(16) x0f[16] = { 18 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 19 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f}; 20 21 p = (u8 *)ptrs[disks-2]; 22 q = (u8 *)ptrs[disks-1]; 23 24 /* Compute syndrome with zero for the missing data pages 25 Use the dead data pages as temporary storage for 26 delta p and delta q */ 27 dp = (u8 *)ptrs[faila]; 28 ptrs[faila] = page_address(ZERO_PAGE(0)); 29 ptrs[disks-2] = dp; 30 dq = (u8 *)ptrs[failb]; 31 ptrs[failb] = page_address(ZERO_PAGE(0)); 32 ptrs[disks-1] = dq; 33 34 raid6_gen_syndrome(disks, bytes, ptrs); 35 36 /* Restore pointer table */ 37 ptrs[faila] = dp; 38 ptrs[failb] = dq; 39 ptrs[disks-2] = p; 40 ptrs[disks-1] = q; 41 42 /* Now, pick the proper data tables */ 43 pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; 44 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ 45 raid6_gfexp[failb]]]; 46 47 kernel_fpu_begin(); 48 49 asm volatile("movdqa %0,%%xmm7" : : "m" (x0f[0])); 50 51 #ifdef CONFIG_X86_64 52 asm volatile("movdqa %0,%%xmm6" : : "m" (qmul[0])); 53 asm volatile("movdqa %0,%%xmm14" : : "m" (pbmul[0])); 54 asm volatile("movdqa %0,%%xmm15" : : "m" (pbmul[16])); 55 #endif 56 57 /* Now do it... */ 58 while (bytes) { 59 #ifdef CONFIG_X86_64 60 /* xmm6, xmm14, xmm15 */ 61 62 asm volatile("movdqa %0,%%xmm1" : : "m" (q[0])); 63 asm volatile("movdqa %0,%%xmm9" : : "m" (q[16])); 64 asm volatile("movdqa %0,%%xmm0" : : "m" (p[0])); 65 asm volatile("movdqa %0,%%xmm8" : : "m" (p[16])); 66 asm volatile("pxor %0,%%xmm1" : : "m" (dq[0])); 67 asm volatile("pxor %0,%%xmm9" : : "m" (dq[16])); 68 asm volatile("pxor %0,%%xmm0" : : "m" (dp[0])); 69 asm volatile("pxor %0,%%xmm8" : : "m" (dp[16])); 70 71 /* xmm0/8 = px */ 72 73 asm volatile("movdqa %xmm6,%xmm4"); 74 asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16])); 75 asm volatile("movdqa %xmm6,%xmm12"); 76 asm volatile("movdqa %xmm5,%xmm13"); 77 asm volatile("movdqa %xmm1,%xmm3"); 78 asm volatile("movdqa %xmm9,%xmm11"); 79 asm volatile("movdqa %xmm0,%xmm2"); /* xmm2/10 = px */ 80 asm volatile("movdqa %xmm8,%xmm10"); 81 asm volatile("psraw $4,%xmm1"); 82 asm volatile("psraw $4,%xmm9"); 83 asm volatile("pand %xmm7,%xmm3"); 84 asm volatile("pand %xmm7,%xmm11"); 85 asm volatile("pand %xmm7,%xmm1"); 86 asm volatile("pand %xmm7,%xmm9"); 87 asm volatile("pshufb %xmm3,%xmm4"); 88 asm volatile("pshufb %xmm11,%xmm12"); 89 asm volatile("pshufb %xmm1,%xmm5"); 90 asm volatile("pshufb %xmm9,%xmm13"); 91 asm volatile("pxor %xmm4,%xmm5"); 92 asm volatile("pxor %xmm12,%xmm13"); 93 94 /* xmm5/13 = qx */ 95 96 asm volatile("movdqa %xmm14,%xmm4"); 97 asm volatile("movdqa %xmm15,%xmm1"); 98 asm volatile("movdqa %xmm14,%xmm12"); 99 asm volatile("movdqa %xmm15,%xmm9"); 100 asm volatile("movdqa %xmm2,%xmm3"); 101 asm volatile("movdqa %xmm10,%xmm11"); 102 asm volatile("psraw $4,%xmm2"); 103 asm volatile("psraw $4,%xmm10"); 104 asm volatile("pand %xmm7,%xmm3"); 105 asm volatile("pand %xmm7,%xmm11"); 106 asm volatile("pand %xmm7,%xmm2"); 107 asm volatile("pand %xmm7,%xmm10"); 108 asm volatile("pshufb %xmm3,%xmm4"); 109 asm volatile("pshufb %xmm11,%xmm12"); 110 asm volatile("pshufb %xmm2,%xmm1"); 111 asm volatile("pshufb %xmm10,%xmm9"); 112 asm volatile("pxor %xmm4,%xmm1"); 113 asm volatile("pxor %xmm12,%xmm9"); 114 115 /* xmm1/9 = pbmul[px] */ 116 asm volatile("pxor %xmm5,%xmm1"); 117 asm volatile("pxor %xmm13,%xmm9"); 118 /* xmm1/9 = db = DQ */ 119 asm volatile("movdqa %%xmm1,%0" : "=m" (dq[0])); 120 asm volatile("movdqa %%xmm9,%0" : "=m" (dq[16])); 121 122 asm volatile("pxor %xmm1,%xmm0"); 123 asm volatile("pxor %xmm9,%xmm8"); 124 asm volatile("movdqa %%xmm0,%0" : "=m" (dp[0])); 125 asm volatile("movdqa %%xmm8,%0" : "=m" (dp[16])); 126 127 bytes -= 32; 128 p += 32; 129 q += 32; 130 dp += 32; 131 dq += 32; 132 #else 133 asm volatile("movdqa %0,%%xmm1" : : "m" (*q)); 134 asm volatile("movdqa %0,%%xmm0" : : "m" (*p)); 135 asm volatile("pxor %0,%%xmm1" : : "m" (*dq)); 136 asm volatile("pxor %0,%%xmm0" : : "m" (*dp)); 137 138 /* 1 = dq ^ q 139 * 0 = dp ^ p 140 */ 141 asm volatile("movdqa %0,%%xmm4" : : "m" (qmul[0])); 142 asm volatile("movdqa %0,%%xmm5" : : "m" (qmul[16])); 143 144 asm volatile("movdqa %xmm1,%xmm3"); 145 asm volatile("psraw $4,%xmm1"); 146 asm volatile("pand %xmm7,%xmm3"); 147 asm volatile("pand %xmm7,%xmm1"); 148 asm volatile("pshufb %xmm3,%xmm4"); 149 asm volatile("pshufb %xmm1,%xmm5"); 150 asm volatile("pxor %xmm4,%xmm5"); 151 152 asm volatile("movdqa %xmm0,%xmm2"); /* xmm2 = px */ 153 154 /* xmm5 = qx */ 155 156 asm volatile("movdqa %0,%%xmm4" : : "m" (pbmul[0])); 157 asm volatile("movdqa %0,%%xmm1" : : "m" (pbmul[16])); 158 asm volatile("movdqa %xmm2,%xmm3"); 159 asm volatile("psraw $4,%xmm2"); 160 asm volatile("pand %xmm7,%xmm3"); 161 asm volatile("pand %xmm7,%xmm2"); 162 asm volatile("pshufb %xmm3,%xmm4"); 163 asm volatile("pshufb %xmm2,%xmm1"); 164 asm volatile("pxor %xmm4,%xmm1"); 165 166 /* xmm1 = pbmul[px] */ 167 asm volatile("pxor %xmm5,%xmm1"); 168 /* xmm1 = db = DQ */ 169 asm volatile("movdqa %%xmm1,%0" : "=m" (*dq)); 170 171 asm volatile("pxor %xmm1,%xmm0"); 172 asm volatile("movdqa %%xmm0,%0" : "=m" (*dp)); 173 174 bytes -= 16; 175 p += 16; 176 q += 16; 177 dp += 16; 178 dq += 16; 179 #endif 180 } 181 182 kernel_fpu_end(); 183 } 184 185 186 static void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, 187 void **ptrs) 188 { 189 u8 *p, *q, *dq; 190 const u8 *qmul; /* Q multiplier table */ 191 static const u8 __aligned(16) x0f[16] = { 192 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 193 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f}; 194 195 p = (u8 *)ptrs[disks-2]; 196 q = (u8 *)ptrs[disks-1]; 197 198 /* Compute syndrome with zero for the missing data page 199 Use the dead data page as temporary storage for delta q */ 200 dq = (u8 *)ptrs[faila]; 201 ptrs[faila] = page_address(ZERO_PAGE(0)); 202 ptrs[disks-1] = dq; 203 204 raid6_gen_syndrome(disks, bytes, ptrs); 205 206 /* Restore pointer table */ 207 ptrs[faila] = dq; 208 ptrs[disks-1] = q; 209 210 /* Now, pick the proper data tables */ 211 qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; 212 213 kernel_fpu_begin(); 214 215 asm volatile("movdqa %0, %%xmm7" : : "m" (x0f[0])); 216 217 while (bytes) { 218 #ifdef CONFIG_X86_64 219 asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0])); 220 asm volatile("movdqa %0, %%xmm4" : : "m" (dq[16])); 221 asm volatile("pxor %0, %%xmm3" : : "m" (q[0])); 222 asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0])); 223 224 /* xmm3 = q[0] ^ dq[0] */ 225 226 asm volatile("pxor %0, %%xmm4" : : "m" (q[16])); 227 asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16])); 228 229 /* xmm4 = q[16] ^ dq[16] */ 230 231 asm volatile("movdqa %xmm3, %xmm6"); 232 asm volatile("movdqa %xmm4, %xmm8"); 233 234 /* xmm4 = xmm8 = q[16] ^ dq[16] */ 235 236 asm volatile("psraw $4, %xmm3"); 237 asm volatile("pand %xmm7, %xmm6"); 238 asm volatile("pand %xmm7, %xmm3"); 239 asm volatile("pshufb %xmm6, %xmm0"); 240 asm volatile("pshufb %xmm3, %xmm1"); 241 asm volatile("movdqa %0, %%xmm10" : : "m" (qmul[0])); 242 asm volatile("pxor %xmm0, %xmm1"); 243 asm volatile("movdqa %0, %%xmm11" : : "m" (qmul[16])); 244 245 /* xmm1 = qmul[q[0] ^ dq[0]] */ 246 247 asm volatile("psraw $4, %xmm4"); 248 asm volatile("pand %xmm7, %xmm8"); 249 asm volatile("pand %xmm7, %xmm4"); 250 asm volatile("pshufb %xmm8, %xmm10"); 251 asm volatile("pshufb %xmm4, %xmm11"); 252 asm volatile("movdqa %0, %%xmm2" : : "m" (p[0])); 253 asm volatile("pxor %xmm10, %xmm11"); 254 asm volatile("movdqa %0, %%xmm12" : : "m" (p[16])); 255 256 /* xmm11 = qmul[q[16] ^ dq[16]] */ 257 258 asm volatile("pxor %xmm1, %xmm2"); 259 260 /* xmm2 = p[0] ^ qmul[q[0] ^ dq[0]] */ 261 262 asm volatile("pxor %xmm11, %xmm12"); 263 264 /* xmm12 = p[16] ^ qmul[q[16] ^ dq[16]] */ 265 266 asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0])); 267 asm volatile("movdqa %%xmm11, %0" : "=m" (dq[16])); 268 269 asm volatile("movdqa %%xmm2, %0" : "=m" (p[0])); 270 asm volatile("movdqa %%xmm12, %0" : "=m" (p[16])); 271 272 bytes -= 32; 273 p += 32; 274 q += 32; 275 dq += 32; 276 277 #else 278 asm volatile("movdqa %0, %%xmm3" : : "m" (dq[0])); 279 asm volatile("movdqa %0, %%xmm0" : : "m" (qmul[0])); 280 asm volatile("pxor %0, %%xmm3" : : "m" (q[0])); 281 asm volatile("movdqa %0, %%xmm1" : : "m" (qmul[16])); 282 283 /* xmm3 = *q ^ *dq */ 284 285 asm volatile("movdqa %xmm3, %xmm6"); 286 asm volatile("movdqa %0, %%xmm2" : : "m" (p[0])); 287 asm volatile("psraw $4, %xmm3"); 288 asm volatile("pand %xmm7, %xmm6"); 289 asm volatile("pand %xmm7, %xmm3"); 290 asm volatile("pshufb %xmm6, %xmm0"); 291 asm volatile("pshufb %xmm3, %xmm1"); 292 asm volatile("pxor %xmm0, %xmm1"); 293 294 /* xmm1 = qmul[*q ^ *dq */ 295 296 asm volatile("pxor %xmm1, %xmm2"); 297 298 /* xmm2 = *p ^ qmul[*q ^ *dq] */ 299 300 asm volatile("movdqa %%xmm1, %0" : "=m" (dq[0])); 301 asm volatile("movdqa %%xmm2, %0" : "=m" (p[0])); 302 303 bytes -= 16; 304 p += 16; 305 q += 16; 306 dq += 16; 307 #endif 308 } 309 310 kernel_fpu_end(); 311 } 312 313 const struct raid6_recov_calls raid6_recov_ssse3 = { 314 .data2 = raid6_2data_recov_ssse3, 315 .datap = raid6_datap_recov_ssse3, 316 #ifdef CONFIG_X86_64 317 .name = "ssse3x2", 318 #else 319 .name = "ssse3x1", 320 #endif 321 }; 322