xref: /linux/lib/raid/raid6/x86/sse1.c (revision 30bf04bd13a58cd9b877589569aa0abd06f04e52)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright 2002 H. Peter Anvin - All Rights Reserved
4  *
5  * SSE-1/MMXEXT implementation of RAID-6 syndrome functions.
6  *
7  * This is really an MMX implementation, but it requires SSE-1 or AMD MMXEXT for
8  * prefetch support and a few other features.  The support for nontemporal
9  * memory accesses is enough to make this worthwhile as a separate
10  * implementation.
11  */
12 
13 #include <asm/cpufeature.h>
14 #include <asm/fpu/api.h>
15 #include "algos.h"
16 
17 /* Defined in raid6/mmx.c */
18 extern const struct raid6_mmx_constants {
19 	u64 x1d;
20 } raid6_mmx_constants;
21 
22 /*
23  * Plain SSE1 implementation
24  */
25 static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
26 {
27 	u8 **dptr = (u8 **)ptrs;
28 	u8 *p, *q;
29 	int d, z, z0;
30 
31 	z0 = disks - 3;		/* Highest data disk */
32 	p = dptr[z0+1];		/* XOR parity */
33 	q = dptr[z0+2];		/* RS syndrome */
34 
35 	kernel_fpu_begin();
36 
37 	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
38 	asm volatile("pxor %mm5,%mm5");	/* Zero temp */
39 
40 	for ( d = 0 ; d < bytes ; d += 8 ) {
41 		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
42 		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
43 		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
44 		asm volatile("movq %mm2,%mm4");	/* Q[0] */
45 		asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d]));
46 		for ( z = z0-2 ; z >= 0 ; z-- ) {
47 			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
48 			asm volatile("pcmpgtb %mm4,%mm5");
49 			asm volatile("paddb %mm4,%mm4");
50 			asm volatile("pand %mm0,%mm5");
51 			asm volatile("pxor %mm5,%mm4");
52 			asm volatile("pxor %mm5,%mm5");
53 			asm volatile("pxor %mm6,%mm2");
54 			asm volatile("pxor %mm6,%mm4");
55 			asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
56 		}
57 		asm volatile("pcmpgtb %mm4,%mm5");
58 		asm volatile("paddb %mm4,%mm4");
59 		asm volatile("pand %mm0,%mm5");
60 		asm volatile("pxor %mm5,%mm4");
61 		asm volatile("pxor %mm5,%mm5");
62 		asm volatile("pxor %mm6,%mm2");
63 		asm volatile("pxor %mm6,%mm4");
64 
65 		asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
66 		asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
67 	}
68 
69 	asm volatile("sfence" : : : "memory");
70 	kernel_fpu_end();
71 }
72 
73 const struct raid6_calls raid6_sse1x1 = {
74 	.gen_syndrome	= raid6_sse11_gen_syndrome,
75 	.name		= "sse1x1",
76 };
77 
78 /*
79  * Unrolled-by-2 SSE1 implementation
80  */
81 static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
82 {
83 	u8 **dptr = (u8 **)ptrs;
84 	u8 *p, *q;
85 	int d, z, z0;
86 
87 	z0 = disks - 3;		/* Highest data disk */
88 	p = dptr[z0+1];		/* XOR parity */
89 	q = dptr[z0+2];		/* RS syndrome */
90 
91 	kernel_fpu_begin();
92 
93 	asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
94 	asm volatile("pxor %mm5,%mm5");	/* Zero temp */
95 	asm volatile("pxor %mm7,%mm7"); /* Zero temp */
96 
97 	/* We uniformly assume a single prefetch covers at least 16 bytes */
98 	for ( d = 0 ; d < bytes ; d += 16 ) {
99 		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
100 		asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
101 		asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */
102 		asm volatile("movq %mm2,%mm4");	/* Q[0] */
103 		asm volatile("movq %mm3,%mm6"); /* Q[1] */
104 		for ( z = z0-1 ; z >= 0 ; z-- ) {
105 			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
106 			asm volatile("pcmpgtb %mm4,%mm5");
107 			asm volatile("pcmpgtb %mm6,%mm7");
108 			asm volatile("paddb %mm4,%mm4");
109 			asm volatile("paddb %mm6,%mm6");
110 			asm volatile("pand %mm0,%mm5");
111 			asm volatile("pand %mm0,%mm7");
112 			asm volatile("pxor %mm5,%mm4");
113 			asm volatile("pxor %mm7,%mm6");
114 			asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
115 			asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
116 			asm volatile("pxor %mm5,%mm2");
117 			asm volatile("pxor %mm7,%mm3");
118 			asm volatile("pxor %mm5,%mm4");
119 			asm volatile("pxor %mm7,%mm6");
120 			asm volatile("pxor %mm5,%mm5");
121 			asm volatile("pxor %mm7,%mm7");
122 		}
123 		asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
124 		asm volatile("movntq %%mm3,%0" : "=m" (p[d+8]));
125 		asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
126 		asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
127 	}
128 
129 	asm volatile("sfence" : :: "memory");
130 	kernel_fpu_end();
131 }
132 
133 const struct raid6_calls raid6_sse1x2 = {
134 	.gen_syndrome	= raid6_sse12_gen_syndrome,
135 	.name		= "sse1x2",
136 };
137