xref: /linux/lib/raid/xor/powerpc/xor_vmx.c (revision 440d6635b20037bc9ad46b20817d7b61cef0fc1b)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *
4  * Copyright (C) IBM Corporation, 2012
5  *
6  * Author: Anton Blanchard <anton@au.ibm.com>
7  */
8 
9 /*
10  * Sparse (as at v0.5.0) gets very, very confused by this file.
11  * Make it a bit simpler for it.
12  */
13 #include "xor_impl.h"
14 #if !defined(__CHECKER__)
15 #include <altivec.h>
16 #else
17 #define vec_xor(a, b) a ^ b
18 #define vector __attribute__((vector_size(16)))
19 #endif
20 
21 #include "xor_vmx.h"
22 
23 typedef vector signed char unative_t;
24 
25 #define DEFINE(V)				\
26 	unative_t *V = (unative_t *)V##_in;	\
27 	unative_t V##_0, V##_1, V##_2, V##_3
28 
29 #define LOAD(V)			\
30 	do {			\
31 		V##_0 = V[0];	\
32 		V##_1 = V[1];	\
33 		V##_2 = V[2];	\
34 		V##_3 = V[3];	\
35 	} while (0)
36 
37 #define STORE(V)		\
38 	do {			\
39 		V[0] = V##_0;	\
40 		V[1] = V##_1;	\
41 		V[2] = V##_2;	\
42 		V[3] = V##_3;	\
43 	} while (0)
44 
45 #define XOR(V1, V2)					\
46 	do {						\
47 		V1##_0 = vec_xor(V1##_0, V2##_0);	\
48 		V1##_1 = vec_xor(V1##_1, V2##_1);	\
49 		V1##_2 = vec_xor(V1##_2, V2##_2);	\
50 		V1##_3 = vec_xor(V1##_3, V2##_3);	\
51 	} while (0)
52 
53 static void __xor_altivec_2(unsigned long bytes,
54 		unsigned long * __restrict v1_in,
55 		const unsigned long * __restrict v2_in)
56 {
57 	DEFINE(v1);
58 	DEFINE(v2);
59 	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
60 
61 	do {
62 		LOAD(v1);
63 		LOAD(v2);
64 		XOR(v1, v2);
65 		STORE(v1);
66 
67 		v1 += 4;
68 		v2 += 4;
69 	} while (--lines > 0);
70 }
71 
72 static void __xor_altivec_3(unsigned long bytes,
73 		unsigned long * __restrict v1_in,
74 		const unsigned long * __restrict v2_in,
75 		const unsigned long * __restrict v3_in)
76 {
77 	DEFINE(v1);
78 	DEFINE(v2);
79 	DEFINE(v3);
80 	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
81 
82 	do {
83 		LOAD(v1);
84 		LOAD(v2);
85 		LOAD(v3);
86 		XOR(v1, v2);
87 		XOR(v1, v3);
88 		STORE(v1);
89 
90 		v1 += 4;
91 		v2 += 4;
92 		v3 += 4;
93 	} while (--lines > 0);
94 }
95 
96 static void __xor_altivec_4(unsigned long bytes,
97 		unsigned long * __restrict v1_in,
98 		const unsigned long * __restrict v2_in,
99 		const unsigned long * __restrict v3_in,
100 		const unsigned long * __restrict v4_in)
101 {
102 	DEFINE(v1);
103 	DEFINE(v2);
104 	DEFINE(v3);
105 	DEFINE(v4);
106 	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
107 
108 	do {
109 		LOAD(v1);
110 		LOAD(v2);
111 		LOAD(v3);
112 		LOAD(v4);
113 		XOR(v1, v2);
114 		XOR(v3, v4);
115 		XOR(v1, v3);
116 		STORE(v1);
117 
118 		v1 += 4;
119 		v2 += 4;
120 		v3 += 4;
121 		v4 += 4;
122 	} while (--lines > 0);
123 }
124 
125 static void __xor_altivec_5(unsigned long bytes,
126 		unsigned long * __restrict v1_in,
127 		const unsigned long * __restrict v2_in,
128 		const unsigned long * __restrict v3_in,
129 		const unsigned long * __restrict v4_in,
130 		const unsigned long * __restrict v5_in)
131 {
132 	DEFINE(v1);
133 	DEFINE(v2);
134 	DEFINE(v3);
135 	DEFINE(v4);
136 	DEFINE(v5);
137 	unsigned long lines = bytes / (sizeof(unative_t)) / 4;
138 
139 	do {
140 		LOAD(v1);
141 		LOAD(v2);
142 		LOAD(v3);
143 		LOAD(v4);
144 		LOAD(v5);
145 		XOR(v1, v2);
146 		XOR(v3, v4);
147 		XOR(v1, v5);
148 		XOR(v1, v3);
149 		STORE(v1);
150 
151 		v1 += 4;
152 		v2 += 4;
153 		v3 += 4;
154 		v4 += 4;
155 		v5 += 4;
156 	} while (--lines > 0);
157 }
158 
159 __DO_XOR_BLOCKS(altivec_inner, __xor_altivec_2, __xor_altivec_3,
160 		__xor_altivec_4, __xor_altivec_5);
161