1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de> 24 */ 25 26 #include <sys/zfs_context.h> 27 #include <sys/zio_checksum.h> 28 29 #include "blake3_impl.h" 30 31 static const blake3_ops_t *const blake3_impls[] = { 32 &blake3_generic_impl, 33 #if defined(__aarch64__) || \ 34 (defined(__x86_64) && defined(HAVE_SSE2)) || \ 35 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) 36 &blake3_sse2_impl, 37 #endif 38 #if defined(__aarch64__) || \ 39 (defined(__x86_64) && defined(HAVE_SSE4_1)) || \ 40 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) 41 &blake3_sse41_impl, 42 #endif 43 #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2) 44 &blake3_avx2_impl, 45 #endif 46 #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL) 47 &blake3_avx512_impl, 48 #endif 49 }; 50 51 /* Select BLAKE3 implementation */ 52 #define IMPL_FASTEST (UINT32_MAX) 53 #define IMPL_CYCLE (UINT32_MAX - 1) 54 55 #define IMPL_READ(i) (*(volatile uint32_t *) &(i)) 56 57 /* Indicate that benchmark has been done */ 58 static boolean_t blake3_initialized = B_FALSE; 59 60 /* Implementation that contains the fastest methods */ 61 static blake3_ops_t blake3_fastest_impl = { 62 .name = "fastest" 63 }; 64 65 /* Hold all supported implementations */ 66 static const blake3_ops_t *blake3_supp_impls[ARRAY_SIZE(blake3_impls)]; 67 static uint32_t blake3_supp_impls_cnt = 0; 68 69 /* Currently selected implementation */ 70 static uint32_t blake3_impl_chosen = IMPL_FASTEST; 71 72 static struct blake3_impl_selector { 73 const char *name; 74 uint32_t sel; 75 } blake3_impl_selectors[] = { 76 { "cycle", IMPL_CYCLE }, 77 { "fastest", IMPL_FASTEST } 78 }; 79 80 /* check the supported implementations */ 81 static void blake3_impl_init(void) 82 { 83 int i, c; 84 85 /* init only once */ 86 if (likely(blake3_initialized)) 87 return; 88 89 /* move supported implementations into blake3_supp_impls */ 90 for (i = 0, c = 0; i < ARRAY_SIZE(blake3_impls); i++) { 91 const blake3_ops_t *impl = blake3_impls[i]; 92 93 if (impl->is_supported && impl->is_supported()) 94 blake3_supp_impls[c++] = impl; 95 } 96 blake3_supp_impls_cnt = c; 97 98 /* first init generic impl, may be changed via set_fastest() */ 99 memcpy(&blake3_fastest_impl, blake3_impls[0], 100 sizeof (blake3_fastest_impl)); 101 blake3_initialized = B_TRUE; 102 } 103 104 /* get number of supported implementations */ 105 uint32_t 106 blake3_impl_getcnt(void) 107 { 108 blake3_impl_init(); 109 return (blake3_supp_impls_cnt); 110 } 111 112 /* get id of selected implementation */ 113 uint32_t 114 blake3_impl_getid(void) 115 { 116 return (IMPL_READ(blake3_impl_chosen)); 117 } 118 119 /* get name of selected implementation */ 120 const char * 121 blake3_impl_getname(void) 122 { 123 uint32_t impl = IMPL_READ(blake3_impl_chosen); 124 125 blake3_impl_init(); 126 switch (impl) { 127 case IMPL_FASTEST: 128 return ("fastest"); 129 case IMPL_CYCLE: 130 return ("cycle"); 131 default: 132 return (blake3_supp_impls[impl]->name); 133 } 134 } 135 136 /* setup id as fastest implementation */ 137 void 138 blake3_impl_set_fastest(uint32_t id) 139 { 140 /* setup fastest impl */ 141 memcpy(&blake3_fastest_impl, blake3_supp_impls[id], 142 sizeof (blake3_fastest_impl)); 143 } 144 145 /* set implementation by id */ 146 void 147 blake3_impl_setid(uint32_t id) 148 { 149 blake3_impl_init(); 150 switch (id) { 151 case IMPL_FASTEST: 152 atomic_swap_32(&blake3_impl_chosen, IMPL_FASTEST); 153 break; 154 case IMPL_CYCLE: 155 atomic_swap_32(&blake3_impl_chosen, IMPL_CYCLE); 156 break; 157 default: 158 ASSERT3U(id, <, blake3_supp_impls_cnt); 159 atomic_swap_32(&blake3_impl_chosen, id); 160 break; 161 } 162 } 163 164 /* set implementation by name */ 165 int 166 blake3_impl_setname(const char *val) 167 { 168 uint32_t impl = IMPL_READ(blake3_impl_chosen); 169 size_t val_len; 170 int i, err = -EINVAL; 171 172 blake3_impl_init(); 173 val_len = strlen(val); 174 while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */ 175 val_len--; 176 177 /* check mandatory implementations */ 178 for (i = 0; i < ARRAY_SIZE(blake3_impl_selectors); i++) { 179 const char *name = blake3_impl_selectors[i].name; 180 181 if (val_len == strlen(name) && 182 strncmp(val, name, val_len) == 0) { 183 impl = blake3_impl_selectors[i].sel; 184 err = 0; 185 break; 186 } 187 } 188 189 if (err != 0 && blake3_initialized) { 190 /* check all supported implementations */ 191 for (i = 0; i < blake3_supp_impls_cnt; i++) { 192 const char *name = blake3_supp_impls[i]->name; 193 194 if (val_len == strlen(name) && 195 strncmp(val, name, val_len) == 0) { 196 impl = i; 197 err = 0; 198 break; 199 } 200 } 201 } 202 203 if (err == 0) { 204 atomic_swap_32(&blake3_impl_chosen, impl); 205 } 206 207 return (err); 208 } 209 210 const blake3_ops_t * 211 blake3_impl_get_ops(void) 212 { 213 const blake3_ops_t *ops = NULL; 214 uint32_t impl = IMPL_READ(blake3_impl_chosen); 215 216 blake3_impl_init(); 217 switch (impl) { 218 case IMPL_FASTEST: 219 ASSERT(blake3_initialized); 220 ops = &blake3_fastest_impl; 221 break; 222 case IMPL_CYCLE: 223 /* Cycle through supported implementations */ 224 ASSERT(blake3_initialized); 225 ASSERT3U(blake3_supp_impls_cnt, >, 0); 226 static uint32_t cycle_count = 0; 227 uint32_t idx = (++cycle_count) % blake3_supp_impls_cnt; 228 ops = blake3_supp_impls[idx]; 229 break; 230 default: 231 ASSERT3U(blake3_supp_impls_cnt, >, 0); 232 ASSERT3U(impl, <, blake3_supp_impls_cnt); 233 ops = blake3_supp_impls[impl]; 234 break; 235 } 236 237 ASSERT3P(ops, !=, NULL); 238 return (ops); 239 } 240 241 #if defined(_KERNEL) 242 243 void **blake3_per_cpu_ctx; 244 245 void 246 blake3_per_cpu_ctx_init(void) 247 { 248 /* 249 * Create "The Godfather" ptr to hold all blake3 ctx 250 */ 251 blake3_per_cpu_ctx = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP); 252 for (int i = 0; i < max_ncpus; i++) { 253 blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX), 254 KM_SLEEP); 255 } 256 257 /* init once in kernel mode */ 258 blake3_impl_init(); 259 } 260 261 void 262 blake3_per_cpu_ctx_fini(void) 263 { 264 for (int i = 0; i < max_ncpus; i++) { 265 memset(blake3_per_cpu_ctx[i], 0, sizeof (BLAKE3_CTX)); 266 kmem_free(blake3_per_cpu_ctx[i], sizeof (BLAKE3_CTX)); 267 } 268 memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *)); 269 kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *)); 270 } 271 272 #define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ") 273 274 #if defined(__linux__) 275 276 static int 277 blake3_param_get(char *buffer, zfs_kernel_param_t *unused) 278 { 279 const uint32_t impl = IMPL_READ(blake3_impl_chosen); 280 char *fmt; 281 int cnt = 0; 282 283 /* cycling */ 284 fmt = IMPL_FMT(impl, IMPL_CYCLE); 285 cnt += sprintf(buffer + cnt, fmt, "cycle"); 286 287 /* list fastest */ 288 fmt = IMPL_FMT(impl, IMPL_FASTEST); 289 cnt += sprintf(buffer + cnt, fmt, "fastest"); 290 291 /* list all supported implementations */ 292 for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) { 293 fmt = IMPL_FMT(impl, i); 294 cnt += sprintf(buffer + cnt, fmt, 295 blake3_supp_impls[i]->name); 296 } 297 298 return (cnt); 299 } 300 301 static int 302 blake3_param_set(const char *val, zfs_kernel_param_t *unused) 303 { 304 (void) unused; 305 return (blake3_impl_setname(val)); 306 } 307 308 #elif defined(__FreeBSD__) 309 310 #include <sys/sbuf.h> 311 312 static int 313 blake3_param(ZFS_MODULE_PARAM_ARGS) 314 { 315 int err; 316 317 if (req->newptr == NULL) { 318 const uint32_t impl = IMPL_READ(blake3_impl_chosen); 319 const int init_buflen = 64; 320 const char *fmt; 321 struct sbuf *s; 322 323 s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req); 324 325 /* cycling */ 326 fmt = IMPL_FMT(impl, IMPL_CYCLE); 327 (void) sbuf_printf(s, fmt, "cycle"); 328 329 /* list fastest */ 330 fmt = IMPL_FMT(impl, IMPL_FASTEST); 331 (void) sbuf_printf(s, fmt, "fastest"); 332 333 /* list all supported implementations */ 334 for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) { 335 fmt = IMPL_FMT(impl, i); 336 (void) sbuf_printf(s, fmt, blake3_supp_impls[i]->name); 337 } 338 339 err = sbuf_finish(s); 340 sbuf_delete(s); 341 342 return (err); 343 } 344 345 char buf[16]; 346 347 err = sysctl_handle_string(oidp, buf, sizeof (buf), req); 348 if (err) { 349 return (err); 350 } 351 352 return (-blake3_impl_setname(buf)); 353 } 354 #endif 355 356 #undef IMPL_FMT 357 358 ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, blake3_impl, 359 blake3_param_set, blake3_param_get, ZMOD_RW, \ 360 "Select BLAKE3 implementation."); 361 #endif 362