xref: /freebsd/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c (revision 924226fba12cc9a228c73b956e1b7fa24c60b055)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
24  */
25 
26 #include <sys/zfs_context.h>
27 #include <sys/zio_checksum.h>
28 
29 #include "blake3_impl.h"
30 
31 static const blake3_impl_ops_t *const blake3_impls[] = {
32 	&blake3_generic_impl,
33 #if defined(__aarch64__) || \
34 	(defined(__x86_64) && defined(HAVE_SSE2)) || \
35 	(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
36 	&blake3_sse2_impl,
37 #endif
38 #if defined(__aarch64__) || \
39 	(defined(__x86_64) && defined(HAVE_SSE4_1)) || \
40 	(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
41 	&blake3_sse41_impl,
42 #endif
43 #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
44 	&blake3_avx2_impl,
45 #endif
46 #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
47 	&blake3_avx512_impl,
48 #endif
49 };
50 
51 /* this pointer holds current ops for implementation */
52 static const blake3_impl_ops_t *blake3_selected_impl = &blake3_generic_impl;
53 
54 /* special implementation selections */
55 #define	IMPL_FASTEST	(UINT32_MAX)
56 #define	IMPL_CYCLE	(UINT32_MAX-1)
57 #define	IMPL_USER	(UINT32_MAX-2)
58 #define	IMPL_PARAM	(UINT32_MAX-3)
59 
60 #define	IMPL_READ(i) (*(volatile uint32_t *) &(i))
61 static uint32_t icp_blake3_impl = IMPL_FASTEST;
62 
63 #define	BLAKE3_IMPL_NAME_MAX	16
64 
65 /* id of fastest implementation */
66 static uint32_t blake3_fastest_id = 0;
67 
68 /* currently used id */
69 static uint32_t blake3_current_id = 0;
70 
71 /* id of module parameter (-1 == unused) */
72 static int blake3_param_id = -1;
73 
74 /* return number of supported implementations */
75 int
76 blake3_get_impl_count(void)
77 {
78 	static int impls = 0;
79 	int i;
80 
81 	if (impls)
82 		return (impls);
83 
84 	for (i = 0; i < ARRAY_SIZE(blake3_impls); i++) {
85 		if (!blake3_impls[i]->is_supported()) continue;
86 		impls++;
87 	}
88 
89 	return (impls);
90 }
91 
92 /* return id of selected implementation */
93 int
94 blake3_get_impl_id(void)
95 {
96 	return (blake3_current_id);
97 }
98 
99 /* return name of selected implementation */
100 const char *
101 blake3_get_impl_name(void)
102 {
103 	return (blake3_selected_impl->name);
104 }
105 
106 /* setup id as fastest implementation */
107 void
108 blake3_set_impl_fastest(uint32_t id)
109 {
110 	blake3_fastest_id = id;
111 }
112 
113 /* set implementation by id */
114 void
115 blake3_set_impl_id(uint32_t id)
116 {
117 	int i, cid;
118 
119 	/* select fastest */
120 	if (id == IMPL_FASTEST)
121 		id = blake3_fastest_id;
122 
123 	/* select next or first */
124 	if (id == IMPL_CYCLE)
125 		id = (++blake3_current_id) % blake3_get_impl_count();
126 
127 	/* 0..N for the real impl */
128 	for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
129 		if (!blake3_impls[i]->is_supported()) continue;
130 		if (cid == id) {
131 			blake3_current_id = cid;
132 			blake3_selected_impl = blake3_impls[i];
133 			return;
134 		}
135 		cid++;
136 	}
137 }
138 
139 /* set implementation by name */
140 int
141 blake3_set_impl_name(const char *name)
142 {
143 	int i, cid;
144 
145 	if (strcmp(name, "fastest") == 0) {
146 		atomic_swap_32(&icp_blake3_impl, IMPL_FASTEST);
147 		blake3_set_impl_id(IMPL_FASTEST);
148 		return (0);
149 	} else if (strcmp(name, "cycle") == 0) {
150 		atomic_swap_32(&icp_blake3_impl, IMPL_CYCLE);
151 		blake3_set_impl_id(IMPL_CYCLE);
152 		return (0);
153 	}
154 
155 	for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
156 		if (!blake3_impls[i]->is_supported()) continue;
157 		if (strcmp(name, blake3_impls[i]->name) == 0) {
158 			if (icp_blake3_impl == IMPL_PARAM) {
159 				blake3_param_id = cid;
160 				return (0);
161 			}
162 			blake3_selected_impl = blake3_impls[i];
163 			blake3_current_id = cid;
164 			return (0);
165 		}
166 		cid++;
167 	}
168 
169 	return (-EINVAL);
170 }
171 
172 /* setup implementation */
173 void
174 blake3_setup_impl(void)
175 {
176 	switch (IMPL_READ(icp_blake3_impl)) {
177 	case IMPL_PARAM:
178 		blake3_set_impl_id(blake3_param_id);
179 		atomic_swap_32(&icp_blake3_impl, IMPL_USER);
180 		break;
181 	case IMPL_FASTEST:
182 		blake3_set_impl_id(IMPL_FASTEST);
183 		break;
184 	case IMPL_CYCLE:
185 		blake3_set_impl_id(IMPL_CYCLE);
186 		break;
187 	default:
188 		blake3_set_impl_id(blake3_current_id);
189 		break;
190 	}
191 }
192 
193 /* return selected implementation */
194 const blake3_impl_ops_t *
195 blake3_impl_get_ops(void)
196 {
197 	/* each call to ops will cycle */
198 	if (icp_blake3_impl == IMPL_CYCLE)
199 		blake3_set_impl_id(IMPL_CYCLE);
200 
201 	return (blake3_selected_impl);
202 }
203 
204 #if defined(_KERNEL)
205 void **blake3_per_cpu_ctx;
206 
207 void
208 blake3_per_cpu_ctx_init(void)
209 {
210 	/*
211 	 * Create "The Godfather" ptr to hold all blake3 ctx
212 	 */
213 	blake3_per_cpu_ctx = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP);
214 	for (int i = 0; i < max_ncpus; i++) {
215 		blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),
216 		    KM_SLEEP);
217 	}
218 }
219 
220 void
221 blake3_per_cpu_ctx_fini(void)
222 {
223 	for (int i = 0; i < max_ncpus; i++) {
224 		memset(blake3_per_cpu_ctx[i], 0, sizeof (BLAKE3_CTX));
225 		kmem_free(blake3_per_cpu_ctx[i], sizeof (BLAKE3_CTX));
226 	}
227 	memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *));
228 	kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *));
229 }
230 #endif
231 
232 #if defined(_KERNEL) && defined(__linux__)
233 static int
234 icp_blake3_impl_set(const char *name, zfs_kernel_param_t *kp)
235 {
236 	char req_name[BLAKE3_IMPL_NAME_MAX];
237 	size_t i;
238 
239 	/* sanitize input */
240 	i = strnlen(name, BLAKE3_IMPL_NAME_MAX);
241 	if (i == 0 || i >= BLAKE3_IMPL_NAME_MAX)
242 		return (-EINVAL);
243 
244 	strlcpy(req_name, name, BLAKE3_IMPL_NAME_MAX);
245 	while (i > 0 && isspace(req_name[i-1]))
246 		i--;
247 	req_name[i] = '\0';
248 
249 	atomic_swap_32(&icp_blake3_impl, IMPL_PARAM);
250 	return (blake3_set_impl_name(req_name));
251 }
252 
253 static int
254 icp_blake3_impl_get(char *buffer, zfs_kernel_param_t *kp)
255 {
256 	int i, cid, cnt = 0;
257 	char *fmt;
258 
259 	/* cycling */
260 	fmt = (icp_blake3_impl == IMPL_CYCLE) ? "[cycle] " : "cycle ";
261 	cnt += sprintf(buffer + cnt, fmt);
262 
263 	/* fastest one */
264 	fmt = (icp_blake3_impl == IMPL_FASTEST) ? "[fastest] " : "fastest ";
265 	cnt += sprintf(buffer + cnt, fmt);
266 
267 	/* user selected */
268 	for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
269 		if (!blake3_impls[i]->is_supported()) continue;
270 		fmt = (icp_blake3_impl == IMPL_USER &&
271 		    cid == blake3_current_id) ? "[%s] " : "%s ";
272 		cnt += sprintf(buffer + cnt, fmt, blake3_impls[i]->name);
273 		cid++;
274 	}
275 
276 	buffer[cnt] = 0;
277 
278 	return (cnt);
279 }
280 
281 module_param_call(icp_blake3_impl, icp_blake3_impl_set, icp_blake3_impl_get,
282     NULL, 0644);
283 MODULE_PARM_DESC(icp_blake3_impl, "Select BLAKE3 implementation.");
284 #endif
285