xref: /freebsd/sys/contrib/openzfs/module/zfs/vdev_raidz_math_aarch64_neonx2.c (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (C) 2016 Romain Dolbeau. All rights reserved.
24  */
25 
26 #include <sys/isa_defs.h>
27 
28 #if defined(__aarch64__)
29 
30 #include "vdev_raidz_math_aarch64_neon_common.h"
31 
32 #define	SYN_STRIDE		4
33 
34 #define	ZERO_STRIDE		8
35 #define	ZERO_DEFINE()	\
36 	GEN_X_DEFINE_0_3()	\
37 	GEN_X_DEFINE_4_5()	\
38 	GEN_X_DEFINE_6_7()
39 #define	ZERO_D			0, 1, 2, 3, 4, 5, 6, 7
40 
41 #define	COPY_STRIDE		8
42 #define	COPY_DEFINE()	\
43 	GEN_X_DEFINE_0_3()	\
44 	GEN_X_DEFINE_4_5()	\
45 	GEN_X_DEFINE_6_7()
46 #define	COPY_D			0, 1, 2, 3, 4, 5, 6, 7
47 
48 #define	ADD_STRIDE		8
49 #define	ADD_DEFINE()	\
50 	GEN_X_DEFINE_0_3()	\
51 	GEN_X_DEFINE_4_5()	\
52 	GEN_X_DEFINE_6_7()
53 #define	ADD_D			0, 1, 2, 3, 4, 5, 6, 7
54 
55 #define	MUL_STRIDE		4
56 #define	MUL_DEFINE()	\
57 	GEN_X_DEFINE_0_3()  \
58 	GEN_X_DEFINE_33_36()
59 #define	MUL_D			0, 1, 2, 3
60 
61 #define	GEN_P_DEFINE() \
62 	GEN_X_DEFINE_0_3() \
63 	GEN_X_DEFINE_33_36()
64 #define	GEN_P_STRIDE		4
65 #define	GEN_P_P			0, 1, 2, 3
66 
67 #define	GEN_PQ_DEFINE() \
68 	GEN_X_DEFINE_0_3()	\
69 	GEN_X_DEFINE_4_5()	\
70 	GEN_X_DEFINE_6_7()	\
71 	GEN_X_DEFINE_16()	\
72 	GEN_X_DEFINE_17()	\
73 	GEN_X_DEFINE_33_36()
74 #define	GEN_PQ_STRIDE		4
75 #define	GEN_PQ_D		0, 1, 2, 3
76 #define	GEN_PQ_C		4, 5, 6, 7
77 
78 #define	GEN_PQR_DEFINE() \
79 	GEN_X_DEFINE_0_3()	\
80 	GEN_X_DEFINE_4_5()	\
81 	GEN_X_DEFINE_6_7()	\
82 	GEN_X_DEFINE_16()	\
83 	GEN_X_DEFINE_17()	\
84 	GEN_X_DEFINE_33_36()
85 #define	GEN_PQR_STRIDE		4
86 #define	GEN_PQR_D		0, 1, 2, 3
87 #define	GEN_PQR_C		4, 5, 6, 7
88 
89 #define	SYN_Q_DEFINE() \
90 	GEN_X_DEFINE_0_3()	\
91 	GEN_X_DEFINE_4_5()	\
92 	GEN_X_DEFINE_6_7()	\
93 	GEN_X_DEFINE_16()	\
94 	GEN_X_DEFINE_17()	\
95 	GEN_X_DEFINE_33_36()
96 #define	SYN_Q_STRIDE		4
97 #define	SYN_Q_D			0, 1, 2, 3
98 #define	SYN_Q_X			4, 5, 6, 7
99 
100 #define	SYN_R_DEFINE() \
101 	GEN_X_DEFINE_0_3()	\
102 	GEN_X_DEFINE_4_5()	\
103 	GEN_X_DEFINE_6_7()	\
104 	GEN_X_DEFINE_16()	\
105 	GEN_X_DEFINE_17()	\
106 	GEN_X_DEFINE_33_36()
107 #define	SYN_R_STRIDE		4
108 #define	SYN_R_D			0, 1, 2, 3
109 #define	SYN_R_X			4, 5, 6, 7
110 
111 #define	SYN_PQ_DEFINE() \
112 	GEN_X_DEFINE_0_3()	\
113 	GEN_X_DEFINE_4_5()	\
114 	GEN_X_DEFINE_6_7()	\
115 	GEN_X_DEFINE_16()	\
116 	GEN_X_DEFINE_17()	\
117 	GEN_X_DEFINE_33_36()
118 #define	SYN_PQ_STRIDE		4
119 #define	SYN_PQ_D		0, 1, 2, 3
120 #define	SYN_PQ_X		4, 5, 6, 7
121 
122 #define	REC_PQ_DEFINE() \
123 	GEN_X_DEFINE_0_3()	\
124 	GEN_X_DEFINE_4_5()	\
125 	GEN_X_DEFINE_6_7()	\
126 	GEN_X_DEFINE_8_9()	\
127 	GEN_X_DEFINE_22_23()	\
128 	GEN_X_DEFINE_33_36()
129 #define	REC_PQ_STRIDE		4
130 #define	REC_PQ_X		0, 1, 2, 3
131 #define	REC_PQ_Y		4, 5, 6, 7
132 #define	REC_PQ_T		8, 9, 22, 23
133 
134 #define	SYN_PR_DEFINE() \
135 	GEN_X_DEFINE_0_3()	\
136 	GEN_X_DEFINE_4_5()	\
137 	GEN_X_DEFINE_6_7()	\
138 	GEN_X_DEFINE_16()	\
139 	GEN_X_DEFINE_17()	\
140 	GEN_X_DEFINE_33_36()
141 #define	SYN_PR_STRIDE		4
142 #define	SYN_PR_D		0, 1, 2, 3
143 #define	SYN_PR_X		4, 5, 6, 7
144 
145 #define	REC_PR_DEFINE() \
146 	GEN_X_DEFINE_0_3()	\
147 	GEN_X_DEFINE_4_5()	\
148 	GEN_X_DEFINE_6_7()	\
149 	GEN_X_DEFINE_8_9()	\
150 	GEN_X_DEFINE_22_23()	\
151 	GEN_X_DEFINE_33_36()
152 #define	REC_PR_STRIDE		4
153 #define	REC_PR_X		0, 1, 2, 3
154 #define	REC_PR_Y		4, 5, 6, 7
155 #define	REC_PR_T		8, 9, 22, 23
156 
157 #define	SYN_QR_DEFINE() \
158 	GEN_X_DEFINE_0_3()	\
159 	GEN_X_DEFINE_4_5()	\
160 	GEN_X_DEFINE_6_7()	\
161 	GEN_X_DEFINE_16()	\
162 	GEN_X_DEFINE_17()	\
163 	GEN_X_DEFINE_33_36()
164 #define	SYN_QR_STRIDE		4
165 #define	SYN_QR_D		0, 1, 2, 3
166 #define	SYN_QR_X		4, 5, 6, 7
167 
168 #define	REC_QR_DEFINE() \
169 	GEN_X_DEFINE_0_3()	\
170 	GEN_X_DEFINE_4_5()	\
171 	GEN_X_DEFINE_6_7()	\
172 	GEN_X_DEFINE_8_9()	\
173 	GEN_X_DEFINE_22_23()	\
174 	GEN_X_DEFINE_33_36()
175 #define	REC_QR_STRIDE		4
176 #define	REC_QR_X		0, 1, 2, 3
177 #define	REC_QR_Y		4, 5, 6, 7
178 #define	REC_QR_T		8, 9, 22, 23
179 
180 #define	SYN_PQR_DEFINE() \
181 	GEN_X_DEFINE_0_3()	\
182 	GEN_X_DEFINE_4_5()	\
183 	GEN_X_DEFINE_6_7()	\
184 	GEN_X_DEFINE_16()	\
185 	GEN_X_DEFINE_17()	\
186 	GEN_X_DEFINE_33_36()
187 #define	SYN_PQR_STRIDE		 4
188 #define	SYN_PQR_D		 0, 1, 2, 3
189 #define	SYN_PQR_X		 4, 5, 6, 7
190 
191 #define	REC_PQR_DEFINE() \
192 	GEN_X_DEFINE_0_3()	\
193 	GEN_X_DEFINE_4_5()	\
194 	GEN_X_DEFINE_6_7()	\
195 	GEN_X_DEFINE_8_9()	\
196 	GEN_X_DEFINE_31()	\
197 	GEN_X_DEFINE_32()	\
198 	GEN_X_DEFINE_33_36()
199 #define	REC_PQR_STRIDE		2
200 #define	REC_PQR_X		0, 1
201 #define	REC_PQR_Y		2, 3
202 #define	REC_PQR_Z		4, 5
203 #define	REC_PQR_XS		6, 7
204 #define	REC_PQR_YS		8, 9
205 
206 #include <sys/vdev_raidz_impl.h>
207 #include "vdev_raidz_math_impl.h"
208 
209 DEFINE_GEN_METHODS(aarch64_neonx2);
210 /*
211  * If compiled with -O0, gcc doesn't do any stack frame coalescing
212  * and -Wframe-larger-than=1024 is triggered in debug mode.
213  */
214 #if defined(__GNUC__) && !defined(__clang__)
215 #pragma GCC diagnostic ignored "-Wframe-larger-than="
216 #endif
217 DEFINE_REC_METHODS(aarch64_neonx2);
218 #if defined(__GNUC__) && !defined(__clang__)
219 #pragma GCC diagnostic pop
220 #endif
221 
222 static boolean_t
raidz_will_aarch64_neonx2_work(void)223 raidz_will_aarch64_neonx2_work(void)
224 {
225 	return (kfpu_allowed());
226 }
227 
228 const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl = {
229 	.init = NULL,
230 	.fini = NULL,
231 	.gen = RAIDZ_GEN_METHODS(aarch64_neonx2),
232 	.rec = RAIDZ_REC_METHODS(aarch64_neonx2),
233 	.is_supported = &raidz_will_aarch64_neonx2_work,
234 	.name = "aarch64_neonx2"
235 };
236 
237 #endif /* defined(__aarch64__) */
238