xref: /freebsd/sys/contrib/openzfs/module/zfs/zfs_chksum.c (revision 5956d97f4b3204318ceb6aa9c77bd0bc6ea87a41)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2021 Tino Reichardt <milky-zfs@mcmilk.de>
24  */
25 
26 #include <sys/types.h>
27 #include <sys/spa.h>
28 #include <sys/zio_checksum.h>
29 #include <sys/zfs_context.h>
30 #include <sys/zfs_chksum.h>
31 
32 #include <sys/blake3.h>
33 
34 static kstat_t *chksum_kstat = NULL;
35 
36 typedef struct {
37 	const char *name;
38 	const char *impl;
39 	uint64_t bs1k;
40 	uint64_t bs4k;
41 	uint64_t bs16k;
42 	uint64_t bs64k;
43 	uint64_t bs256k;
44 	uint64_t bs1m;
45 	uint64_t bs4m;
46 	zio_cksum_salt_t salt;
47 	zio_checksum_t *(func);
48 	zio_checksum_tmpl_init_t *(init);
49 	zio_checksum_tmpl_free_t *(free);
50 } chksum_stat_t;
51 
52 static int chksum_stat_cnt = 0;
53 static chksum_stat_t *chksum_stat_data = 0;
54 
55 /*
56  * i3-1005G1 test output:
57  *
58  * implementation     1k      4k     16k     64k    256k      1m      4m
59  * fletcher-4       5421   15001   26468   32555   34720   32801   18847
60  * edonr-generic    1196    1602    1761    1749    1762    1759    1751
61  * skein-generic     546     591     608     615     619     612     616
62  * sha256-generic    246     270     274     274     277     275     276
63  * sha256-avx        262     296     304     307     307     307     306
64  * sha256-sha-ni     769    1072    1172    1220    1219    1232    1228
65  * sha256-openssl    240     300     316     314     304     285     276
66  * sha512-generic    333     374     385     392     391     393     392
67  * sha512-openssl    353     441     467     476     472     467     426
68  * sha512-avx        362     444     473     475     479     476     478
69  * sha512-avx2       394     500     530     538     543     545     542
70  * blake3-generic    308     313     313     313     312     313     312
71  * blake3-sse2       402    1289    1423    1446    1432    1458    1413
72  * blake3-sse41      427    1470    1625    1704    1679    1607    1629
73  * blake3-avx2       428    1920    3095    3343    3356    3318    3204
74  * blake3-avx512     473    2687    4905    5836    5844    5643    5374
75  */
76 static int
77 chksum_stat_kstat_headers(char *buf, size_t size)
78 {
79 	ssize_t off = 0;
80 
81 	off += snprintf(buf + off, size, "%-23s", "implementation");
82 	off += snprintf(buf + off, size - off, "%8s", "1k");
83 	off += snprintf(buf + off, size - off, "%8s", "4k");
84 	off += snprintf(buf + off, size - off, "%8s", "16k");
85 	off += snprintf(buf + off, size - off, "%8s", "64k");
86 	off += snprintf(buf + off, size - off, "%8s", "256k");
87 	off += snprintf(buf + off, size - off, "%8s", "1m");
88 	(void) snprintf(buf + off, size - off, "%8s\n", "4m");
89 
90 	return (0);
91 }
92 
93 static int
94 chksum_stat_kstat_data(char *buf, size_t size, void *data)
95 {
96 	chksum_stat_t *cs;
97 	ssize_t off = 0;
98 	char b[24];
99 
100 	cs = (chksum_stat_t *)data;
101 	snprintf(b, 23, "%s-%s", cs->name, cs->impl);
102 	off += snprintf(buf + off, size - off, "%-23s", b);
103 	off += snprintf(buf + off, size - off, "%8llu",
104 	    (u_longlong_t)cs->bs1k);
105 	off += snprintf(buf + off, size - off, "%8llu",
106 	    (u_longlong_t)cs->bs4k);
107 	off += snprintf(buf + off, size - off, "%8llu",
108 	    (u_longlong_t)cs->bs16k);
109 	off += snprintf(buf + off, size - off, "%8llu",
110 	    (u_longlong_t)cs->bs64k);
111 	off += snprintf(buf + off, size - off, "%8llu",
112 	    (u_longlong_t)cs->bs256k);
113 	off += snprintf(buf + off, size - off, "%8llu",
114 	    (u_longlong_t)cs->bs1m);
115 	(void) snprintf(buf + off, size - off, "%8llu\n",
116 	    (u_longlong_t)cs->bs4m);
117 
118 	return (0);
119 }
120 
121 static void *
122 chksum_stat_kstat_addr(kstat_t *ksp, loff_t n)
123 {
124 	if (n < chksum_stat_cnt)
125 		ksp->ks_private = (void *)(chksum_stat_data + n);
126 	else
127 		ksp->ks_private = NULL;
128 
129 	return (ksp->ks_private);
130 }
131 
132 static void
133 chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round,
134     uint64_t *result)
135 {
136 	hrtime_t start;
137 	uint64_t run_bw, run_time_ns, run_count = 0, size = 0;
138 	uint32_t l, loops = 0;
139 	zio_cksum_t zcp;
140 
141 	switch (round) {
142 	case 1: /* 1k */
143 		size = 1<<10; loops = 128; break;
144 	case 2: /* 2k */
145 		size = 1<<12; loops = 64; break;
146 	case 3: /* 4k */
147 		size = 1<<14; loops = 32; break;
148 	case 4: /* 16k */
149 		size = 1<<16; loops = 16; break;
150 	case 5: /* 256k */
151 		size = 1<<18; loops = 8; break;
152 	case 6: /* 1m */
153 		size = 1<<20; loops = 4; break;
154 	case 7: /* 4m */
155 		size = 1<<22; loops = 1; break;
156 	}
157 
158 	kpreempt_disable();
159 	start = gethrtime();
160 	do {
161 		for (l = 0; l < loops; l++, run_count++)
162 			cs->func(abd, size, ctx, &zcp);
163 
164 		run_time_ns = gethrtime() - start;
165 	} while (run_time_ns < MSEC2NSEC(1));
166 	kpreempt_enable();
167 
168 	run_bw = size * run_count * NANOSEC;
169 	run_bw /= run_time_ns;	/* B/s */
170 	*result = run_bw/1024/1024; /* MiB/s */
171 }
172 
173 static void
174 chksum_benchit(chksum_stat_t *cs)
175 {
176 	abd_t *abd;
177 	void *ctx = 0;
178 	void *salt = &cs->salt.zcs_bytes;
179 
180 	/* allocate test memory via default abd interface */
181 	abd = abd_alloc_linear(1<<22, B_FALSE);
182 	memset(salt, 0, sizeof (cs->salt.zcs_bytes));
183 	if (cs->init) {
184 		ctx = cs->init(&cs->salt);
185 	}
186 
187 	chksum_run(cs, abd, ctx, 1, &cs->bs1k);
188 	chksum_run(cs, abd, ctx, 2, &cs->bs4k);
189 	chksum_run(cs, abd, ctx, 3, &cs->bs16k);
190 	chksum_run(cs, abd, ctx, 4, &cs->bs64k);
191 	chksum_run(cs, abd, ctx, 5, &cs->bs256k);
192 	chksum_run(cs, abd, ctx, 6, &cs->bs1m);
193 	chksum_run(cs, abd, ctx, 7, &cs->bs4m);
194 
195 	/* free up temp memory */
196 	if (cs->free) {
197 		cs->free(ctx);
198 	}
199 	abd_free(abd);
200 }
201 
202 /*
203  * Initialize and benchmark all supported implementations.
204  */
205 static void
206 chksum_benchmark(void)
207 {
208 
209 #ifndef _KERNEL
210 	/* we need the benchmark only for the kernel module */
211 	return;
212 #endif
213 
214 	chksum_stat_t *cs;
215 	int cbid = 0, id;
216 	uint64_t max = 0;
217 
218 	/* space for the benchmark times */
219 	chksum_stat_cnt = 4;
220 	chksum_stat_cnt += blake3_get_impl_count();
221 	chksum_stat_data = (chksum_stat_t *)kmem_zalloc(
222 	    sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
223 
224 	/* edonr */
225 	cs = &chksum_stat_data[cbid++];
226 	cs->init = abd_checksum_edonr_tmpl_init;
227 	cs->func = abd_checksum_edonr_native;
228 	cs->free = abd_checksum_edonr_tmpl_free;
229 	cs->name = "edonr";
230 	cs->impl = "generic";
231 	chksum_benchit(cs);
232 
233 	/* skein */
234 	cs = &chksum_stat_data[cbid++];
235 	cs->init = abd_checksum_skein_tmpl_init;
236 	cs->func = abd_checksum_skein_native;
237 	cs->free = abd_checksum_skein_tmpl_free;
238 	cs->name = "skein";
239 	cs->impl = "generic";
240 	chksum_benchit(cs);
241 
242 	/* sha256 */
243 	cs = &chksum_stat_data[cbid++];
244 	cs->init = 0;
245 	cs->func = abd_checksum_SHA256;
246 	cs->free = 0;
247 	cs->name = "sha256";
248 	cs->impl = "generic";
249 	chksum_benchit(cs);
250 
251 	/* sha512 */
252 	cs = &chksum_stat_data[cbid++];
253 	cs->init = 0;
254 	cs->func = abd_checksum_SHA512_native;
255 	cs->free = 0;
256 	cs->name = "sha512";
257 	cs->impl = "generic";
258 	chksum_benchit(cs);
259 
260 	/* blake3 */
261 	for (id = 0; id < blake3_get_impl_count(); id++) {
262 		blake3_set_impl_id(id);
263 		cs = &chksum_stat_data[cbid++];
264 		cs->init = abd_checksum_blake3_tmpl_init;
265 		cs->func = abd_checksum_blake3_native;
266 		cs->free = abd_checksum_blake3_tmpl_free;
267 		cs->name = "blake3";
268 		cs->impl = blake3_get_impl_name();
269 		chksum_benchit(cs);
270 		if (cs->bs256k > max) {
271 			max = cs->bs256k;
272 			blake3_set_impl_fastest(id);
273 		}
274 	}
275 }
276 
277 void
278 chksum_init(void)
279 {
280 #ifdef _KERNEL
281 	blake3_per_cpu_ctx_init();
282 #endif
283 
284 	/* Benchmark supported implementations */
285 	chksum_benchmark();
286 
287 	/* Install kstats for all implementations */
288 	chksum_kstat = kstat_create("zfs", 0, "chksum_bench", "misc",
289 	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
290 
291 	if (chksum_kstat != NULL) {
292 		chksum_kstat->ks_data = NULL;
293 		chksum_kstat->ks_ndata = UINT32_MAX;
294 		kstat_set_raw_ops(chksum_kstat,
295 		    chksum_stat_kstat_headers,
296 		    chksum_stat_kstat_data,
297 		    chksum_stat_kstat_addr);
298 		kstat_install(chksum_kstat);
299 	}
300 
301 	/* setup implementations */
302 	blake3_setup_impl();
303 }
304 
305 void
306 chksum_fini(void)
307 {
308 	if (chksum_kstat != NULL) {
309 		kstat_delete(chksum_kstat);
310 		chksum_kstat = NULL;
311 	}
312 
313 	if (chksum_stat_cnt) {
314 		kmem_free(chksum_stat_data,
315 		    sizeof (chksum_stat_t) * chksum_stat_cnt);
316 		chksum_stat_cnt = 0;
317 		chksum_stat_data = 0;
318 	}
319 
320 #ifdef _KERNEL
321 	blake3_per_cpu_ctx_fini();
322 #endif
323 }
324