xref: /linux/lib/raid/raid6/x86/sse2.c (revision 30bf04bd13a58cd9b877589569aa0abd06f04e52)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright 2002 H. Peter Anvin - All Rights Reserved
4  *
5  * SSE-2 implementation of RAID-6 syndrome functions
6  */
7 
8 #include <asm/cpufeature.h>
9 #include <asm/fpu/api.h>
10 #include "algos.h"
11 
12 static const struct raid6_sse_constants {
13 	u64 x1d[2];
14 } raid6_sse_constants  __attribute__((aligned(16))) = {
15 	{ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
16 };
17 
18 /*
19  * Plain SSE2 implementation
20  */
21 static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
22 {
23 	u8 **dptr = (u8 **)ptrs;
24 	u8 *p, *q;
25 	int d, z, z0;
26 
27 	z0 = disks - 3;		/* Highest data disk */
28 	p = dptr[z0+1];		/* XOR parity */
29 	q = dptr[z0+2];		/* RS syndrome */
30 
31 	kernel_fpu_begin();
32 
33 	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
34 	asm volatile("pxor %xmm5,%xmm5");	/* Zero temp */
35 
36 	for ( d = 0 ; d < bytes ; d += 16 ) {
37 		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
38 		asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
39 		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
40 		asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
41 		asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
42 		for ( z = z0-2 ; z >= 0 ; z-- ) {
43 			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
44 			asm volatile("pcmpgtb %xmm4,%xmm5");
45 			asm volatile("paddb %xmm4,%xmm4");
46 			asm volatile("pand %xmm0,%xmm5");
47 			asm volatile("pxor %xmm5,%xmm4");
48 			asm volatile("pxor %xmm5,%xmm5");
49 			asm volatile("pxor %xmm6,%xmm2");
50 			asm volatile("pxor %xmm6,%xmm4");
51 			asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
52 		}
53 		asm volatile("pcmpgtb %xmm4,%xmm5");
54 		asm volatile("paddb %xmm4,%xmm4");
55 		asm volatile("pand %xmm0,%xmm5");
56 		asm volatile("pxor %xmm5,%xmm4");
57 		asm volatile("pxor %xmm5,%xmm5");
58 		asm volatile("pxor %xmm6,%xmm2");
59 		asm volatile("pxor %xmm6,%xmm4");
60 
61 		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
62 		asm volatile("pxor %xmm2,%xmm2");
63 		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
64 		asm volatile("pxor %xmm4,%xmm4");
65 	}
66 
67 	asm volatile("sfence" : : : "memory");
68 	kernel_fpu_end();
69 }
70 
71 
72 static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
73 				     size_t bytes, void **ptrs)
74 {
75 	u8 **dptr = (u8 **)ptrs;
76 	u8 *p, *q;
77 	int d, z, z0;
78 
79 	z0 = stop;		/* P/Q right side optimization */
80 	p = dptr[disks-2];	/* XOR parity */
81 	q = dptr[disks-1];	/* RS syndrome */
82 
83 	kernel_fpu_begin();
84 
85 	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
86 
87 	for ( d = 0 ; d < bytes ; d += 16 ) {
88 		asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
89 		asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
90 		asm volatile("pxor %xmm4,%xmm2");
91 		/* P/Q data pages */
92 		for ( z = z0-1 ; z >= start ; z-- ) {
93 			asm volatile("pxor %xmm5,%xmm5");
94 			asm volatile("pcmpgtb %xmm4,%xmm5");
95 			asm volatile("paddb %xmm4,%xmm4");
96 			asm volatile("pand %xmm0,%xmm5");
97 			asm volatile("pxor %xmm5,%xmm4");
98 			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
99 			asm volatile("pxor %xmm5,%xmm2");
100 			asm volatile("pxor %xmm5,%xmm4");
101 		}
102 		/* P/Q left side optimization */
103 		for ( z = start-1 ; z >= 0 ; z-- ) {
104 			asm volatile("pxor %xmm5,%xmm5");
105 			asm volatile("pcmpgtb %xmm4,%xmm5");
106 			asm volatile("paddb %xmm4,%xmm4");
107 			asm volatile("pand %xmm0,%xmm5");
108 			asm volatile("pxor %xmm5,%xmm4");
109 		}
110 		asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
111 		/* Don't use movntdq for r/w memory area < cache line */
112 		asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
113 		asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
114 	}
115 
116 	asm volatile("sfence" : : : "memory");
117 	kernel_fpu_end();
118 }
119 
120 const struct raid6_calls raid6_sse2x1 = {
121 	.gen_syndrome	= raid6_sse21_gen_syndrome,
122 	.xor_syndrome	= raid6_sse21_xor_syndrome,
123 	.name		= "sse2x1",
124 };
125 
126 /*
127  * Unrolled-by-2 SSE2 implementation
128  */
129 static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
130 {
131 	u8 **dptr = (u8 **)ptrs;
132 	u8 *p, *q;
133 	int d, z, z0;
134 
135 	z0 = disks - 3;		/* Highest data disk */
136 	p = dptr[z0+1];		/* XOR parity */
137 	q = dptr[z0+2];		/* RS syndrome */
138 
139 	kernel_fpu_begin();
140 
141 	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
142 	asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
143 	asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
144 
145 	/* We uniformly assume a single prefetch covers at least 32 bytes */
146 	for ( d = 0 ; d < bytes ; d += 32 ) {
147 		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
148 		asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d]));    /* P[0] */
149 		asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
150 		asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
151 		asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
152 		for ( z = z0-1 ; z >= 0 ; z-- ) {
153 			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
154 			asm volatile("pcmpgtb %xmm4,%xmm5");
155 			asm volatile("pcmpgtb %xmm6,%xmm7");
156 			asm volatile("paddb %xmm4,%xmm4");
157 			asm volatile("paddb %xmm6,%xmm6");
158 			asm volatile("pand %xmm0,%xmm5");
159 			asm volatile("pand %xmm0,%xmm7");
160 			asm volatile("pxor %xmm5,%xmm4");
161 			asm volatile("pxor %xmm7,%xmm6");
162 			asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
163 			asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
164 			asm volatile("pxor %xmm5,%xmm2");
165 			asm volatile("pxor %xmm7,%xmm3");
166 			asm volatile("pxor %xmm5,%xmm4");
167 			asm volatile("pxor %xmm7,%xmm6");
168 			asm volatile("pxor %xmm5,%xmm5");
169 			asm volatile("pxor %xmm7,%xmm7");
170 		}
171 		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
172 		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
173 		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
174 		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
175 	}
176 
177 	asm volatile("sfence" : : : "memory");
178 	kernel_fpu_end();
179 }
180 
181 static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
182 				     size_t bytes, void **ptrs)
183 {
184 	u8 **dptr = (u8 **)ptrs;
185 	u8 *p, *q;
186 	int d, z, z0;
187 
188 	z0 = stop;		/* P/Q right side optimization */
189 	p = dptr[disks-2];	/* XOR parity */
190 	q = dptr[disks-1];	/* RS syndrome */
191 
192 	kernel_fpu_begin();
193 
194 	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
195 
196 	for ( d = 0 ; d < bytes ; d += 32 ) {
197 		asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
198 		asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
199 		asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
200 		asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
201 		asm volatile("pxor %xmm4,%xmm2");
202 		asm volatile("pxor %xmm6,%xmm3");
203 		/* P/Q data pages */
204 		for ( z = z0-1 ; z >= start ; z-- ) {
205 			asm volatile("pxor %xmm5,%xmm5");
206 			asm volatile("pxor %xmm7,%xmm7");
207 			asm volatile("pcmpgtb %xmm4,%xmm5");
208 			asm volatile("pcmpgtb %xmm6,%xmm7");
209 			asm volatile("paddb %xmm4,%xmm4");
210 			asm volatile("paddb %xmm6,%xmm6");
211 			asm volatile("pand %xmm0,%xmm5");
212 			asm volatile("pand %xmm0,%xmm7");
213 			asm volatile("pxor %xmm5,%xmm4");
214 			asm volatile("pxor %xmm7,%xmm6");
215 			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
216 			asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
217 			asm volatile("pxor %xmm5,%xmm2");
218 			asm volatile("pxor %xmm7,%xmm3");
219 			asm volatile("pxor %xmm5,%xmm4");
220 			asm volatile("pxor %xmm7,%xmm6");
221 		}
222 		/* P/Q left side optimization */
223 		for ( z = start-1 ; z >= 0 ; z-- ) {
224 			asm volatile("pxor %xmm5,%xmm5");
225 			asm volatile("pxor %xmm7,%xmm7");
226 			asm volatile("pcmpgtb %xmm4,%xmm5");
227 			asm volatile("pcmpgtb %xmm6,%xmm7");
228 			asm volatile("paddb %xmm4,%xmm4");
229 			asm volatile("paddb %xmm6,%xmm6");
230 			asm volatile("pand %xmm0,%xmm5");
231 			asm volatile("pand %xmm0,%xmm7");
232 			asm volatile("pxor %xmm5,%xmm4");
233 			asm volatile("pxor %xmm7,%xmm6");
234 		}
235 		asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
236 		asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
237 		/* Don't use movntdq for r/w memory area < cache line */
238 		asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
239 		asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
240 		asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
241 		asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
242 	}
243 
244 	asm volatile("sfence" : : : "memory");
245 	kernel_fpu_end();
246 }
247 
248 const struct raid6_calls raid6_sse2x2 = {
249 	.gen_syndrome	= raid6_sse22_gen_syndrome,
250 	.xor_syndrome	= raid6_sse22_xor_syndrome,
251 	.name		= "sse2x2",
252 };
253 
254 #ifdef CONFIG_X86_64
255 
256 /*
257  * Unrolled-by-4 SSE2 implementation
258  */
259 static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
260 {
261 	u8 **dptr = (u8 **)ptrs;
262 	u8 *p, *q;
263 	int d, z, z0;
264 
265 	z0 = disks - 3;		/* Highest data disk */
266 	p = dptr[z0+1];		/* XOR parity */
267 	q = dptr[z0+2];		/* RS syndrome */
268 
269 	kernel_fpu_begin();
270 
271 	asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
272 	asm volatile("pxor %xmm2,%xmm2");	/* P[0] */
273 	asm volatile("pxor %xmm3,%xmm3");	/* P[1] */
274 	asm volatile("pxor %xmm4,%xmm4"); 	/* Q[0] */
275 	asm volatile("pxor %xmm5,%xmm5");	/* Zero temp */
276 	asm volatile("pxor %xmm6,%xmm6"); 	/* Q[1] */
277 	asm volatile("pxor %xmm7,%xmm7"); 	/* Zero temp */
278 	asm volatile("pxor %xmm10,%xmm10");	/* P[2] */
279 	asm volatile("pxor %xmm11,%xmm11");	/* P[3] */
280 	asm volatile("pxor %xmm12,%xmm12"); 	/* Q[2] */
281 	asm volatile("pxor %xmm13,%xmm13");	/* Zero temp */
282 	asm volatile("pxor %xmm14,%xmm14"); 	/* Q[3] */
283 	asm volatile("pxor %xmm15,%xmm15"); 	/* Zero temp */
284 
285 	for ( d = 0 ; d < bytes ; d += 64 ) {
286 		for ( z = z0 ; z >= 0 ; z-- ) {
287 			/* The second prefetch seems to improve performance... */
288 			asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
289 			asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
290 			asm volatile("pcmpgtb %xmm4,%xmm5");
291 			asm volatile("pcmpgtb %xmm6,%xmm7");
292 			asm volatile("pcmpgtb %xmm12,%xmm13");
293 			asm volatile("pcmpgtb %xmm14,%xmm15");
294 			asm volatile("paddb %xmm4,%xmm4");
295 			asm volatile("paddb %xmm6,%xmm6");
296 			asm volatile("paddb %xmm12,%xmm12");
297 			asm volatile("paddb %xmm14,%xmm14");
298 			asm volatile("pand %xmm0,%xmm5");
299 			asm volatile("pand %xmm0,%xmm7");
300 			asm volatile("pand %xmm0,%xmm13");
301 			asm volatile("pand %xmm0,%xmm15");
302 			asm volatile("pxor %xmm5,%xmm4");
303 			asm volatile("pxor %xmm7,%xmm6");
304 			asm volatile("pxor %xmm13,%xmm12");
305 			asm volatile("pxor %xmm15,%xmm14");
306 			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
307 			asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
308 			asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
309 			asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
310 			asm volatile("pxor %xmm5,%xmm2");
311 			asm volatile("pxor %xmm7,%xmm3");
312 			asm volatile("pxor %xmm13,%xmm10");
313 			asm volatile("pxor %xmm15,%xmm11");
314 			asm volatile("pxor %xmm5,%xmm4");
315 			asm volatile("pxor %xmm7,%xmm6");
316 			asm volatile("pxor %xmm13,%xmm12");
317 			asm volatile("pxor %xmm15,%xmm14");
318 			asm volatile("pxor %xmm5,%xmm5");
319 			asm volatile("pxor %xmm7,%xmm7");
320 			asm volatile("pxor %xmm13,%xmm13");
321 			asm volatile("pxor %xmm15,%xmm15");
322 		}
323 		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
324 		asm volatile("pxor %xmm2,%xmm2");
325 		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
326 		asm volatile("pxor %xmm3,%xmm3");
327 		asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
328 		asm volatile("pxor %xmm10,%xmm10");
329 		asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
330 		asm volatile("pxor %xmm11,%xmm11");
331 		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
332 		asm volatile("pxor %xmm4,%xmm4");
333 		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
334 		asm volatile("pxor %xmm6,%xmm6");
335 		asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
336 		asm volatile("pxor %xmm12,%xmm12");
337 		asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
338 		asm volatile("pxor %xmm14,%xmm14");
339 	}
340 
341 	asm volatile("sfence" : : : "memory");
342 	kernel_fpu_end();
343 }
344 
345 static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
346 				     size_t bytes, void **ptrs)
347 {
348 	u8 **dptr = (u8 **)ptrs;
349 	u8 *p, *q;
350 	int d, z, z0;
351 
352 	z0 = stop;		/* P/Q right side optimization */
353 	p = dptr[disks-2];	/* XOR parity */
354 	q = dptr[disks-1];	/* RS syndrome */
355 
356 	kernel_fpu_begin();
357 
358 	asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
359 
360 	for ( d = 0 ; d < bytes ; d += 64 ) {
361 		asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
362 		asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
363 		asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
364 		asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
365 		asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
366 		asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
367 		asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
368 		asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
369 		asm volatile("pxor %xmm4,%xmm2");
370 		asm volatile("pxor %xmm6,%xmm3");
371 		asm volatile("pxor %xmm12,%xmm10");
372 		asm volatile("pxor %xmm14,%xmm11");
373 		/* P/Q data pages */
374 		for ( z = z0-1 ; z >= start ; z-- ) {
375 			asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
376 			asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
377 			asm volatile("pxor %xmm5,%xmm5");
378 			asm volatile("pxor %xmm7,%xmm7");
379 			asm volatile("pxor %xmm13,%xmm13");
380 			asm volatile("pxor %xmm15,%xmm15");
381 			asm volatile("pcmpgtb %xmm4,%xmm5");
382 			asm volatile("pcmpgtb %xmm6,%xmm7");
383 			asm volatile("pcmpgtb %xmm12,%xmm13");
384 			asm volatile("pcmpgtb %xmm14,%xmm15");
385 			asm volatile("paddb %xmm4,%xmm4");
386 			asm volatile("paddb %xmm6,%xmm6");
387 			asm volatile("paddb %xmm12,%xmm12");
388 			asm volatile("paddb %xmm14,%xmm14");
389 			asm volatile("pand %xmm0,%xmm5");
390 			asm volatile("pand %xmm0,%xmm7");
391 			asm volatile("pand %xmm0,%xmm13");
392 			asm volatile("pand %xmm0,%xmm15");
393 			asm volatile("pxor %xmm5,%xmm4");
394 			asm volatile("pxor %xmm7,%xmm6");
395 			asm volatile("pxor %xmm13,%xmm12");
396 			asm volatile("pxor %xmm15,%xmm14");
397 			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
398 			asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
399 			asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
400 			asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
401 			asm volatile("pxor %xmm5,%xmm2");
402 			asm volatile("pxor %xmm7,%xmm3");
403 			asm volatile("pxor %xmm13,%xmm10");
404 			asm volatile("pxor %xmm15,%xmm11");
405 			asm volatile("pxor %xmm5,%xmm4");
406 			asm volatile("pxor %xmm7,%xmm6");
407 			asm volatile("pxor %xmm13,%xmm12");
408 			asm volatile("pxor %xmm15,%xmm14");
409 		}
410 		asm volatile("prefetchnta %0" :: "m" (q[d]));
411 		asm volatile("prefetchnta %0" :: "m" (q[d+32]));
412 		/* P/Q left side optimization */
413 		for ( z = start-1 ; z >= 0 ; z-- ) {
414 			asm volatile("pxor %xmm5,%xmm5");
415 			asm volatile("pxor %xmm7,%xmm7");
416 			asm volatile("pxor %xmm13,%xmm13");
417 			asm volatile("pxor %xmm15,%xmm15");
418 			asm volatile("pcmpgtb %xmm4,%xmm5");
419 			asm volatile("pcmpgtb %xmm6,%xmm7");
420 			asm volatile("pcmpgtb %xmm12,%xmm13");
421 			asm volatile("pcmpgtb %xmm14,%xmm15");
422 			asm volatile("paddb %xmm4,%xmm4");
423 			asm volatile("paddb %xmm6,%xmm6");
424 			asm volatile("paddb %xmm12,%xmm12");
425 			asm volatile("paddb %xmm14,%xmm14");
426 			asm volatile("pand %xmm0,%xmm5");
427 			asm volatile("pand %xmm0,%xmm7");
428 			asm volatile("pand %xmm0,%xmm13");
429 			asm volatile("pand %xmm0,%xmm15");
430 			asm volatile("pxor %xmm5,%xmm4");
431 			asm volatile("pxor %xmm7,%xmm6");
432 			asm volatile("pxor %xmm13,%xmm12");
433 			asm volatile("pxor %xmm15,%xmm14");
434 		}
435 		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
436 		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
437 		asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
438 		asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
439 		asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
440 		asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
441 		asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
442 		asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
443 		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
444 		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
445 		asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
446 		asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
447 	}
448 	asm volatile("sfence" : : : "memory");
449 	kernel_fpu_end();
450 }
451 
452 
453 const struct raid6_calls raid6_sse2x4 = {
454 	.gen_syndrome	= raid6_sse24_gen_syndrome,
455 	.xor_syndrome	= raid6_sse24_xor_syndrome,
456 	.name		= "sse2x4",
457 };
458 
459 #endif /* CONFIG_X86_64 */
460