11f1e2261SMartin Matuska /* 21f1e2261SMartin Matuska * CDDL HEADER START 31f1e2261SMartin Matuska * 41f1e2261SMartin Matuska * The contents of this file are subject to the terms of the 51f1e2261SMartin Matuska * Common Development and Distribution License (the "License"). 61f1e2261SMartin Matuska * You may not use this file except in compliance with the License. 71f1e2261SMartin Matuska * 81f1e2261SMartin Matuska * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 101f1e2261SMartin Matuska * See the License for the specific language governing permissions 111f1e2261SMartin Matuska * and limitations under the License. 121f1e2261SMartin Matuska * 131f1e2261SMartin Matuska * When distributing Covered Code, include this CDDL HEADER in each 141f1e2261SMartin Matuska * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151f1e2261SMartin Matuska * If applicable, add the following below this CDDL HEADER, with the 161f1e2261SMartin Matuska * fields enclosed by brackets "[]" replaced with your own identifying 171f1e2261SMartin Matuska * information: Portions Copyright [yyyy] [name of copyright owner] 181f1e2261SMartin Matuska * 191f1e2261SMartin Matuska * CDDL HEADER END 201f1e2261SMartin Matuska */ 211f1e2261SMartin Matuska 221f1e2261SMartin Matuska /* 23*271171e0SMartin Matuska * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de> 241f1e2261SMartin Matuska */ 251f1e2261SMartin Matuska 261f1e2261SMartin Matuska #include <sys/types.h> 271f1e2261SMartin Matuska #include <sys/spa.h> 281f1e2261SMartin Matuska #include <sys/zio_checksum.h> 291f1e2261SMartin Matuska #include <sys/zfs_context.h> 301f1e2261SMartin Matuska #include <sys/zfs_chksum.h> 311f1e2261SMartin Matuska 321f1e2261SMartin Matuska #include <sys/blake3.h> 331f1e2261SMartin Matuska 34*271171e0SMartin Matuska /* limit benchmarking to max 256KiB, when EdonR is slower then this: */ 35*271171e0SMartin Matuska #define LIMIT_PERF_MBS 300 361f1e2261SMartin Matuska 371f1e2261SMartin Matuska typedef struct { 381f1e2261SMartin Matuska const char *name; 391f1e2261SMartin Matuska const char *impl; 401f1e2261SMartin Matuska uint64_t bs1k; 411f1e2261SMartin Matuska uint64_t bs4k; 421f1e2261SMartin Matuska uint64_t bs16k; 431f1e2261SMartin Matuska uint64_t bs64k; 441f1e2261SMartin Matuska uint64_t bs256k; 451f1e2261SMartin Matuska uint64_t bs1m; 461f1e2261SMartin Matuska uint64_t bs4m; 47*271171e0SMartin Matuska uint64_t bs16m; 481f1e2261SMartin Matuska zio_cksum_salt_t salt; 491f1e2261SMartin Matuska zio_checksum_t *(func); 501f1e2261SMartin Matuska zio_checksum_tmpl_init_t *(init); 511f1e2261SMartin Matuska zio_checksum_tmpl_free_t *(free); 521f1e2261SMartin Matuska } chksum_stat_t; 531f1e2261SMartin Matuska 541f1e2261SMartin Matuska static chksum_stat_t *chksum_stat_data = 0; 55*271171e0SMartin Matuska static int chksum_stat_cnt = 0; 56*271171e0SMartin Matuska static kstat_t *chksum_kstat = NULL; 571f1e2261SMartin Matuska 581f1e2261SMartin Matuska /* 591f1e2261SMartin Matuska * i3-1005G1 test output: 601f1e2261SMartin Matuska * 611f1e2261SMartin Matuska * implementation 1k 4k 16k 64k 256k 1m 4m 621f1e2261SMartin Matuska * fletcher-4 5421 15001 26468 32555 34720 32801 18847 631f1e2261SMartin Matuska * edonr-generic 1196 1602 1761 1749 1762 1759 1751 641f1e2261SMartin Matuska * skein-generic 546 591 608 615 619 612 616 651f1e2261SMartin Matuska * sha256-generic 246 270 274 274 277 275 276 661f1e2261SMartin Matuska * sha256-avx 262 296 304 307 307 307 306 671f1e2261SMartin Matuska * sha256-sha-ni 769 1072 1172 1220 1219 1232 1228 681f1e2261SMartin Matuska * sha256-openssl 240 300 316 314 304 285 276 691f1e2261SMartin Matuska * sha512-generic 333 374 385 392 391 393 392 701f1e2261SMartin Matuska * sha512-openssl 353 441 467 476 472 467 426 711f1e2261SMartin Matuska * sha512-avx 362 444 473 475 479 476 478 721f1e2261SMartin Matuska * sha512-avx2 394 500 530 538 543 545 542 731f1e2261SMartin Matuska * blake3-generic 308 313 313 313 312 313 312 741f1e2261SMartin Matuska * blake3-sse2 402 1289 1423 1446 1432 1458 1413 751f1e2261SMartin Matuska * blake3-sse41 427 1470 1625 1704 1679 1607 1629 761f1e2261SMartin Matuska * blake3-avx2 428 1920 3095 3343 3356 3318 3204 771f1e2261SMartin Matuska * blake3-avx512 473 2687 4905 5836 5844 5643 5374 781f1e2261SMartin Matuska */ 791f1e2261SMartin Matuska static int 80*271171e0SMartin Matuska chksum_kstat_headers(char *buf, size_t size) 811f1e2261SMartin Matuska { 821f1e2261SMartin Matuska ssize_t off = 0; 831f1e2261SMartin Matuska 841f1e2261SMartin Matuska off += snprintf(buf + off, size, "%-23s", "implementation"); 851f1e2261SMartin Matuska off += snprintf(buf + off, size - off, "%8s", "1k"); 861f1e2261SMartin Matuska off += snprintf(buf + off, size - off, "%8s", "4k"); 871f1e2261SMartin Matuska off += snprintf(buf + off, size - off, "%8s", "16k"); 881f1e2261SMartin Matuska off += snprintf(buf + off, size - off, "%8s", "64k"); 891f1e2261SMartin Matuska off += snprintf(buf + off, size - off, "%8s", "256k"); 901f1e2261SMartin Matuska off += snprintf(buf + off, size - off, "%8s", "1m"); 91*271171e0SMartin Matuska off += snprintf(buf + off, size - off, "%8s", "4m"); 92*271171e0SMartin Matuska (void) snprintf(buf + off, size - off, "%8s\n", "16m"); 931f1e2261SMartin Matuska 941f1e2261SMartin Matuska return (0); 951f1e2261SMartin Matuska } 961f1e2261SMartin Matuska 971f1e2261SMartin Matuska static int 98*271171e0SMartin Matuska chksum_kstat_data(char *buf, size_t size, void *data) 991f1e2261SMartin Matuska { 1001f1e2261SMartin Matuska chksum_stat_t *cs; 1011f1e2261SMartin Matuska ssize_t off = 0; 1021f1e2261SMartin Matuska char b[24]; 1031f1e2261SMartin Matuska 1041f1e2261SMartin Matuska cs = (chksum_stat_t *)data; 1051f1e2261SMartin Matuska snprintf(b, 23, "%s-%s", cs->name, cs->impl); 1061f1e2261SMartin Matuska off += snprintf(buf + off, size - off, "%-23s", b); 1071f1e2261SMartin Matuska off += snprintf(buf + off, size - off, "%8llu", 1081f1e2261SMartin Matuska (u_longlong_t)cs->bs1k); 1091f1e2261SMartin Matuska off += snprintf(buf + off, size - off, "%8llu", 1101f1e2261SMartin Matuska (u_longlong_t)cs->bs4k); 1111f1e2261SMartin Matuska off += snprintf(buf + off, size - off, "%8llu", 1121f1e2261SMartin Matuska (u_longlong_t)cs->bs16k); 1131f1e2261SMartin Matuska off += snprintf(buf + off, size - off, "%8llu", 1141f1e2261SMartin Matuska (u_longlong_t)cs->bs64k); 1151f1e2261SMartin Matuska off += snprintf(buf + off, size - off, "%8llu", 1161f1e2261SMartin Matuska (u_longlong_t)cs->bs256k); 1171f1e2261SMartin Matuska off += snprintf(buf + off, size - off, "%8llu", 1181f1e2261SMartin Matuska (u_longlong_t)cs->bs1m); 119*271171e0SMartin Matuska off += snprintf(buf + off, size - off, "%8llu", 1201f1e2261SMartin Matuska (u_longlong_t)cs->bs4m); 121*271171e0SMartin Matuska (void) snprintf(buf + off, size - off, "%8llu\n", 122*271171e0SMartin Matuska (u_longlong_t)cs->bs16m); 1231f1e2261SMartin Matuska 1241f1e2261SMartin Matuska return (0); 1251f1e2261SMartin Matuska } 1261f1e2261SMartin Matuska 1271f1e2261SMartin Matuska static void * 128*271171e0SMartin Matuska chksum_kstat_addr(kstat_t *ksp, loff_t n) 1291f1e2261SMartin Matuska { 1301f1e2261SMartin Matuska if (n < chksum_stat_cnt) 1311f1e2261SMartin Matuska ksp->ks_private = (void *)(chksum_stat_data + n); 1321f1e2261SMartin Matuska else 1331f1e2261SMartin Matuska ksp->ks_private = NULL; 1341f1e2261SMartin Matuska 1351f1e2261SMartin Matuska return (ksp->ks_private); 1361f1e2261SMartin Matuska } 1371f1e2261SMartin Matuska 1381f1e2261SMartin Matuska static void 1391f1e2261SMartin Matuska chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round, 1401f1e2261SMartin Matuska uint64_t *result) 1411f1e2261SMartin Matuska { 1421f1e2261SMartin Matuska hrtime_t start; 1431f1e2261SMartin Matuska uint64_t run_bw, run_time_ns, run_count = 0, size = 0; 1441f1e2261SMartin Matuska uint32_t l, loops = 0; 1451f1e2261SMartin Matuska zio_cksum_t zcp; 1461f1e2261SMartin Matuska 1471f1e2261SMartin Matuska switch (round) { 1481f1e2261SMartin Matuska case 1: /* 1k */ 1491f1e2261SMartin Matuska size = 1<<10; loops = 128; break; 1501f1e2261SMartin Matuska case 2: /* 2k */ 1511f1e2261SMartin Matuska size = 1<<12; loops = 64; break; 1521f1e2261SMartin Matuska case 3: /* 4k */ 1531f1e2261SMartin Matuska size = 1<<14; loops = 32; break; 1541f1e2261SMartin Matuska case 4: /* 16k */ 1551f1e2261SMartin Matuska size = 1<<16; loops = 16; break; 1561f1e2261SMartin Matuska case 5: /* 256k */ 1571f1e2261SMartin Matuska size = 1<<18; loops = 8; break; 1581f1e2261SMartin Matuska case 6: /* 1m */ 1591f1e2261SMartin Matuska size = 1<<20; loops = 4; break; 1601f1e2261SMartin Matuska case 7: /* 4m */ 1611f1e2261SMartin Matuska size = 1<<22; loops = 1; break; 162*271171e0SMartin Matuska case 8: /* 16m */ 163*271171e0SMartin Matuska size = 1<<24; loops = 1; break; 1641f1e2261SMartin Matuska } 1651f1e2261SMartin Matuska 1661f1e2261SMartin Matuska kpreempt_disable(); 1671f1e2261SMartin Matuska start = gethrtime(); 1681f1e2261SMartin Matuska do { 1691f1e2261SMartin Matuska for (l = 0; l < loops; l++, run_count++) 1701f1e2261SMartin Matuska cs->func(abd, size, ctx, &zcp); 1711f1e2261SMartin Matuska 1721f1e2261SMartin Matuska run_time_ns = gethrtime() - start; 1731f1e2261SMartin Matuska } while (run_time_ns < MSEC2NSEC(1)); 1741f1e2261SMartin Matuska kpreempt_enable(); 1751f1e2261SMartin Matuska 1761f1e2261SMartin Matuska run_bw = size * run_count * NANOSEC; 1771f1e2261SMartin Matuska run_bw /= run_time_ns; /* B/s */ 1781f1e2261SMartin Matuska *result = run_bw/1024/1024; /* MiB/s */ 1791f1e2261SMartin Matuska } 1801f1e2261SMartin Matuska 181*271171e0SMartin Matuska #define LIMIT_INIT 0 182*271171e0SMartin Matuska #define LIMIT_NEEDED 1 183*271171e0SMartin Matuska #define LIMIT_NOLIMIT 2 184*271171e0SMartin Matuska 1851f1e2261SMartin Matuska static void 1861f1e2261SMartin Matuska chksum_benchit(chksum_stat_t *cs) 1871f1e2261SMartin Matuska { 1881f1e2261SMartin Matuska abd_t *abd; 1891f1e2261SMartin Matuska void *ctx = 0; 1901f1e2261SMartin Matuska void *salt = &cs->salt.zcs_bytes; 191*271171e0SMartin Matuska static int chksum_stat_limit = LIMIT_INIT; 1921f1e2261SMartin Matuska 1931f1e2261SMartin Matuska memset(salt, 0, sizeof (cs->salt.zcs_bytes)); 194*271171e0SMartin Matuska if (cs->init) 1951f1e2261SMartin Matuska ctx = cs->init(&cs->salt); 1961f1e2261SMartin Matuska 197*271171e0SMartin Matuska /* allocate test memory via abd linear interface */ 198*271171e0SMartin Matuska abd = abd_alloc_linear(1<<20, B_FALSE); 1991f1e2261SMartin Matuska chksum_run(cs, abd, ctx, 1, &cs->bs1k); 2001f1e2261SMartin Matuska chksum_run(cs, abd, ctx, 2, &cs->bs4k); 2011f1e2261SMartin Matuska chksum_run(cs, abd, ctx, 3, &cs->bs16k); 2021f1e2261SMartin Matuska chksum_run(cs, abd, ctx, 4, &cs->bs64k); 2031f1e2261SMartin Matuska chksum_run(cs, abd, ctx, 5, &cs->bs256k); 204*271171e0SMartin Matuska 205*271171e0SMartin Matuska /* check if we ran on a slow cpu */ 206*271171e0SMartin Matuska if (chksum_stat_limit == LIMIT_INIT) { 207*271171e0SMartin Matuska if (cs->bs1k < LIMIT_PERF_MBS) { 208*271171e0SMartin Matuska chksum_stat_limit = LIMIT_NEEDED; 209*271171e0SMartin Matuska } else { 210*271171e0SMartin Matuska chksum_stat_limit = LIMIT_NOLIMIT; 211*271171e0SMartin Matuska } 212*271171e0SMartin Matuska } 213*271171e0SMartin Matuska 214*271171e0SMartin Matuska /* skip benchmarks >= 1MiB when the CPU is to slow */ 215*271171e0SMartin Matuska if (chksum_stat_limit == LIMIT_NEEDED) 216*271171e0SMartin Matuska goto abort; 217*271171e0SMartin Matuska 2181f1e2261SMartin Matuska chksum_run(cs, abd, ctx, 6, &cs->bs1m); 219*271171e0SMartin Matuska abd_free(abd); 220*271171e0SMartin Matuska 221*271171e0SMartin Matuska /* allocate test memory via abd non linear interface */ 222*271171e0SMartin Matuska abd = abd_alloc(1<<24, B_FALSE); 2231f1e2261SMartin Matuska chksum_run(cs, abd, ctx, 7, &cs->bs4m); 224*271171e0SMartin Matuska chksum_run(cs, abd, ctx, 8, &cs->bs16m); 225*271171e0SMartin Matuska 226*271171e0SMartin Matuska abort: 227*271171e0SMartin Matuska abd_free(abd); 2281f1e2261SMartin Matuska 2291f1e2261SMartin Matuska /* free up temp memory */ 230*271171e0SMartin Matuska if (cs->free) 2311f1e2261SMartin Matuska cs->free(ctx); 2321f1e2261SMartin Matuska } 2331f1e2261SMartin Matuska 2341f1e2261SMartin Matuska /* 2351f1e2261SMartin Matuska * Initialize and benchmark all supported implementations. 2361f1e2261SMartin Matuska */ 2371f1e2261SMartin Matuska static void 2381f1e2261SMartin Matuska chksum_benchmark(void) 2391f1e2261SMartin Matuska { 2401f1e2261SMartin Matuska 2411f1e2261SMartin Matuska #ifndef _KERNEL 2421f1e2261SMartin Matuska /* we need the benchmark only for the kernel module */ 2431f1e2261SMartin Matuska return; 2441f1e2261SMartin Matuska #endif 2451f1e2261SMartin Matuska 2461f1e2261SMartin Matuska chksum_stat_t *cs; 2471f1e2261SMartin Matuska int cbid = 0, id; 2481f1e2261SMartin Matuska uint64_t max = 0; 2491f1e2261SMartin Matuska 2501f1e2261SMartin Matuska /* space for the benchmark times */ 2511f1e2261SMartin Matuska chksum_stat_cnt = 4; 2521f1e2261SMartin Matuska chksum_stat_cnt += blake3_get_impl_count(); 2531f1e2261SMartin Matuska chksum_stat_data = (chksum_stat_t *)kmem_zalloc( 2541f1e2261SMartin Matuska sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP); 2551f1e2261SMartin Matuska 256*271171e0SMartin Matuska /* edonr - needs to be the first one here (slow CPU check) */ 2571f1e2261SMartin Matuska cs = &chksum_stat_data[cbid++]; 2581f1e2261SMartin Matuska cs->init = abd_checksum_edonr_tmpl_init; 2591f1e2261SMartin Matuska cs->func = abd_checksum_edonr_native; 2601f1e2261SMartin Matuska cs->free = abd_checksum_edonr_tmpl_free; 2611f1e2261SMartin Matuska cs->name = "edonr"; 2621f1e2261SMartin Matuska cs->impl = "generic"; 2631f1e2261SMartin Matuska chksum_benchit(cs); 2641f1e2261SMartin Matuska 2651f1e2261SMartin Matuska /* skein */ 2661f1e2261SMartin Matuska cs = &chksum_stat_data[cbid++]; 2671f1e2261SMartin Matuska cs->init = abd_checksum_skein_tmpl_init; 2681f1e2261SMartin Matuska cs->func = abd_checksum_skein_native; 2691f1e2261SMartin Matuska cs->free = abd_checksum_skein_tmpl_free; 2701f1e2261SMartin Matuska cs->name = "skein"; 2711f1e2261SMartin Matuska cs->impl = "generic"; 2721f1e2261SMartin Matuska chksum_benchit(cs); 2731f1e2261SMartin Matuska 2741f1e2261SMartin Matuska /* sha256 */ 2751f1e2261SMartin Matuska cs = &chksum_stat_data[cbid++]; 2761f1e2261SMartin Matuska cs->init = 0; 2771f1e2261SMartin Matuska cs->func = abd_checksum_SHA256; 2781f1e2261SMartin Matuska cs->free = 0; 2791f1e2261SMartin Matuska cs->name = "sha256"; 2801f1e2261SMartin Matuska cs->impl = "generic"; 2811f1e2261SMartin Matuska chksum_benchit(cs); 2821f1e2261SMartin Matuska 2831f1e2261SMartin Matuska /* sha512 */ 2841f1e2261SMartin Matuska cs = &chksum_stat_data[cbid++]; 2851f1e2261SMartin Matuska cs->init = 0; 2861f1e2261SMartin Matuska cs->func = abd_checksum_SHA512_native; 2871f1e2261SMartin Matuska cs->free = 0; 2881f1e2261SMartin Matuska cs->name = "sha512"; 2891f1e2261SMartin Matuska cs->impl = "generic"; 2901f1e2261SMartin Matuska chksum_benchit(cs); 2911f1e2261SMartin Matuska 2921f1e2261SMartin Matuska /* blake3 */ 2931f1e2261SMartin Matuska for (id = 0; id < blake3_get_impl_count(); id++) { 2941f1e2261SMartin Matuska blake3_set_impl_id(id); 2951f1e2261SMartin Matuska cs = &chksum_stat_data[cbid++]; 2961f1e2261SMartin Matuska cs->init = abd_checksum_blake3_tmpl_init; 2971f1e2261SMartin Matuska cs->func = abd_checksum_blake3_native; 2981f1e2261SMartin Matuska cs->free = abd_checksum_blake3_tmpl_free; 2991f1e2261SMartin Matuska cs->name = "blake3"; 3001f1e2261SMartin Matuska cs->impl = blake3_get_impl_name(); 3011f1e2261SMartin Matuska chksum_benchit(cs); 3021f1e2261SMartin Matuska if (cs->bs256k > max) { 3031f1e2261SMartin Matuska max = cs->bs256k; 3041f1e2261SMartin Matuska blake3_set_impl_fastest(id); 3051f1e2261SMartin Matuska } 3061f1e2261SMartin Matuska } 3071f1e2261SMartin Matuska } 3081f1e2261SMartin Matuska 3091f1e2261SMartin Matuska void 3101f1e2261SMartin Matuska chksum_init(void) 3111f1e2261SMartin Matuska { 3121f1e2261SMartin Matuska #ifdef _KERNEL 3131f1e2261SMartin Matuska blake3_per_cpu_ctx_init(); 3141f1e2261SMartin Matuska #endif 3151f1e2261SMartin Matuska 3161f1e2261SMartin Matuska /* Benchmark supported implementations */ 3171f1e2261SMartin Matuska chksum_benchmark(); 3181f1e2261SMartin Matuska 3191f1e2261SMartin Matuska /* Install kstats for all implementations */ 3201f1e2261SMartin Matuska chksum_kstat = kstat_create("zfs", 0, "chksum_bench", "misc", 3211f1e2261SMartin Matuska KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); 3221f1e2261SMartin Matuska 3231f1e2261SMartin Matuska if (chksum_kstat != NULL) { 3241f1e2261SMartin Matuska chksum_kstat->ks_data = NULL; 3251f1e2261SMartin Matuska chksum_kstat->ks_ndata = UINT32_MAX; 3261f1e2261SMartin Matuska kstat_set_raw_ops(chksum_kstat, 327*271171e0SMartin Matuska chksum_kstat_headers, 328*271171e0SMartin Matuska chksum_kstat_data, 329*271171e0SMartin Matuska chksum_kstat_addr); 3301f1e2261SMartin Matuska kstat_install(chksum_kstat); 3311f1e2261SMartin Matuska } 3321f1e2261SMartin Matuska 3331f1e2261SMartin Matuska /* setup implementations */ 3341f1e2261SMartin Matuska blake3_setup_impl(); 3351f1e2261SMartin Matuska } 3361f1e2261SMartin Matuska 3371f1e2261SMartin Matuska void 3381f1e2261SMartin Matuska chksum_fini(void) 3391f1e2261SMartin Matuska { 3401f1e2261SMartin Matuska if (chksum_kstat != NULL) { 3411f1e2261SMartin Matuska kstat_delete(chksum_kstat); 3421f1e2261SMartin Matuska chksum_kstat = NULL; 3431f1e2261SMartin Matuska } 3441f1e2261SMartin Matuska 3451f1e2261SMartin Matuska if (chksum_stat_cnt) { 3461f1e2261SMartin Matuska kmem_free(chksum_stat_data, 3471f1e2261SMartin Matuska sizeof (chksum_stat_t) * chksum_stat_cnt); 3481f1e2261SMartin Matuska chksum_stat_cnt = 0; 3491f1e2261SMartin Matuska chksum_stat_data = 0; 3501f1e2261SMartin Matuska } 3511f1e2261SMartin Matuska 3521f1e2261SMartin Matuska #ifdef _KERNEL 3531f1e2261SMartin Matuska blake3_per_cpu_ctx_fini(); 3541f1e2261SMartin Matuska #endif 3551f1e2261SMartin Matuska } 356