1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de> 24 */ 25 26 #include <sys/zfs_context.h> 27 #include <sys/zio_checksum.h> 28 29 #include "blake3_impl.h" 30 31 static const blake3_ops_t *const blake3_impls[] = { 32 &blake3_generic_impl, 33 #if defined(__aarch64__) || \ 34 (defined(__x86_64) && defined(HAVE_SSE2)) || \ 35 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) 36 &blake3_sse2_impl, 37 #endif 38 #if defined(__aarch64__) || \ 39 (defined(__x86_64) && defined(HAVE_SSE4_1)) || \ 40 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) 41 &blake3_sse41_impl, 42 #endif 43 #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2) 44 &blake3_avx2_impl, 45 #endif 46 #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL) 47 &blake3_avx512_impl, 48 #endif 49 }; 50 51 /* Select BLAKE3 implementation */ 52 #define IMPL_FASTEST (UINT32_MAX) 53 #define IMPL_CYCLE (UINT32_MAX - 1) 54 55 #define IMPL_READ(i) (*(volatile uint32_t *) &(i)) 56 57 /* Indicate that benchmark has been done */ 58 static boolean_t blake3_initialized = B_FALSE; 59 60 /* Implementation that contains the fastest methods */ 61 static blake3_ops_t blake3_fastest_impl = { 62 .name = "fastest" 63 }; 64 65 /* Hold all supported implementations */ 66 static const blake3_ops_t *blake3_supp_impls[ARRAY_SIZE(blake3_impls)]; 67 static uint32_t blake3_supp_impls_cnt = 0; 68 69 /* Currently selected implementation */ 70 static uint32_t blake3_impl_chosen = IMPL_FASTEST; 71 72 static struct blake3_impl_selector { 73 const char *name; 74 uint32_t sel; 75 } blake3_impl_selectors[] = { 76 { "cycle", IMPL_CYCLE }, 77 { "fastest", IMPL_FASTEST } 78 }; 79 80 /* check the supported implementations */ 81 static void blake3_impl_init(void) 82 { 83 int i, c; 84 85 /* init only once */ 86 if (likely(blake3_initialized)) 87 return; 88 89 /* move supported implementations into blake3_supp_impls */ 90 for (i = 0, c = 0; i < ARRAY_SIZE(blake3_impls); i++) { 91 const blake3_ops_t *impl = blake3_impls[i]; 92 93 if (impl->is_supported && impl->is_supported()) 94 blake3_supp_impls[c++] = impl; 95 } 96 blake3_supp_impls_cnt = c; 97 98 /* first init generic impl, may be changed via set_fastest() */ 99 memcpy(&blake3_fastest_impl, blake3_impls[0], 100 sizeof (blake3_fastest_impl)); 101 blake3_initialized = B_TRUE; 102 } 103 104 /* get number of supported implementations */ 105 uint32_t 106 blake3_impl_getcnt(void) 107 { 108 blake3_impl_init(); 109 return (blake3_supp_impls_cnt); 110 } 111 112 /* get id of selected implementation */ 113 uint32_t 114 blake3_impl_getid(void) 115 { 116 return (IMPL_READ(blake3_impl_chosen)); 117 } 118 119 /* get name of selected implementation */ 120 const char * 121 blake3_impl_getname(void) 122 { 123 uint32_t impl = IMPL_READ(blake3_impl_chosen); 124 125 blake3_impl_init(); 126 switch (impl) { 127 case IMPL_FASTEST: 128 return ("fastest"); 129 case IMPL_CYCLE: 130 return ("cycle"); 131 default: 132 return (blake3_supp_impls[impl]->name); 133 } 134 } 135 136 /* setup id as fastest implementation */ 137 void 138 blake3_impl_set_fastest(uint32_t id) 139 { 140 /* setup fastest impl */ 141 memcpy(&blake3_fastest_impl, blake3_supp_impls[id], 142 sizeof (blake3_fastest_impl)); 143 } 144 145 /* set implementation by id */ 146 void 147 blake3_impl_setid(uint32_t id) 148 { 149 blake3_impl_init(); 150 switch (id) { 151 case IMPL_FASTEST: 152 atomic_swap_32(&blake3_impl_chosen, IMPL_FASTEST); 153 break; 154 case IMPL_CYCLE: 155 atomic_swap_32(&blake3_impl_chosen, IMPL_CYCLE); 156 break; 157 default: 158 ASSERT3U(id, >=, 0); 159 ASSERT3U(id, <, blake3_supp_impls_cnt); 160 atomic_swap_32(&blake3_impl_chosen, id); 161 break; 162 } 163 } 164 165 /* set implementation by name */ 166 int 167 blake3_impl_setname(const char *val) 168 { 169 uint32_t impl = IMPL_READ(blake3_impl_chosen); 170 size_t val_len; 171 int i, err = -EINVAL; 172 173 blake3_impl_init(); 174 val_len = strlen(val); 175 while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */ 176 val_len--; 177 178 /* check mandatory implementations */ 179 for (i = 0; i < ARRAY_SIZE(blake3_impl_selectors); i++) { 180 const char *name = blake3_impl_selectors[i].name; 181 182 if (val_len == strlen(name) && 183 strncmp(val, name, val_len) == 0) { 184 impl = blake3_impl_selectors[i].sel; 185 err = 0; 186 break; 187 } 188 } 189 190 if (err != 0 && blake3_initialized) { 191 /* check all supported implementations */ 192 for (i = 0; i < blake3_supp_impls_cnt; i++) { 193 const char *name = blake3_supp_impls[i]->name; 194 195 if (val_len == strlen(name) && 196 strncmp(val, name, val_len) == 0) { 197 impl = i; 198 err = 0; 199 break; 200 } 201 } 202 } 203 204 if (err == 0) { 205 atomic_swap_32(&blake3_impl_chosen, impl); 206 } 207 208 return (err); 209 } 210 211 const blake3_ops_t * 212 blake3_impl_get_ops(void) 213 { 214 const blake3_ops_t *ops = NULL; 215 uint32_t impl = IMPL_READ(blake3_impl_chosen); 216 217 blake3_impl_init(); 218 switch (impl) { 219 case IMPL_FASTEST: 220 ASSERT(blake3_initialized); 221 ops = &blake3_fastest_impl; 222 break; 223 case IMPL_CYCLE: 224 /* Cycle through supported implementations */ 225 ASSERT(blake3_initialized); 226 ASSERT3U(blake3_supp_impls_cnt, >, 0); 227 static uint32_t cycle_count = 0; 228 uint32_t idx = (++cycle_count) % blake3_supp_impls_cnt; 229 ops = blake3_supp_impls[idx]; 230 break; 231 default: 232 ASSERT3U(blake3_supp_impls_cnt, >, 0); 233 ASSERT3U(impl, <, blake3_supp_impls_cnt); 234 ops = blake3_supp_impls[impl]; 235 break; 236 } 237 238 ASSERT3P(ops, !=, NULL); 239 return (ops); 240 } 241 242 #if defined(_KERNEL) 243 244 void **blake3_per_cpu_ctx; 245 246 void 247 blake3_per_cpu_ctx_init(void) 248 { 249 /* 250 * Create "The Godfather" ptr to hold all blake3 ctx 251 */ 252 blake3_per_cpu_ctx = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP); 253 for (int i = 0; i < max_ncpus; i++) { 254 blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX), 255 KM_SLEEP); 256 } 257 258 /* init once in kernel mode */ 259 blake3_impl_init(); 260 } 261 262 void 263 blake3_per_cpu_ctx_fini(void) 264 { 265 for (int i = 0; i < max_ncpus; i++) { 266 memset(blake3_per_cpu_ctx[i], 0, sizeof (BLAKE3_CTX)); 267 kmem_free(blake3_per_cpu_ctx[i], sizeof (BLAKE3_CTX)); 268 } 269 memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *)); 270 kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *)); 271 } 272 273 #define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ") 274 275 #if defined(__linux__) 276 277 static int 278 blake3_param_get(char *buffer, zfs_kernel_param_t *unused) 279 { 280 const uint32_t impl = IMPL_READ(blake3_impl_chosen); 281 char *fmt; 282 int cnt = 0; 283 284 /* cycling */ 285 fmt = IMPL_FMT(impl, IMPL_CYCLE); 286 cnt += sprintf(buffer + cnt, fmt, "cycle"); 287 288 /* list fastest */ 289 fmt = IMPL_FMT(impl, IMPL_FASTEST); 290 cnt += sprintf(buffer + cnt, fmt, "fastest"); 291 292 /* list all supported implementations */ 293 for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) { 294 fmt = IMPL_FMT(impl, i); 295 cnt += sprintf(buffer + cnt, fmt, 296 blake3_supp_impls[i]->name); 297 } 298 299 return (cnt); 300 } 301 302 static int 303 blake3_param_set(const char *val, zfs_kernel_param_t *unused) 304 { 305 (void) unused; 306 return (blake3_impl_setname(val)); 307 } 308 309 #elif defined(__FreeBSD__) 310 311 #include <sys/sbuf.h> 312 313 static int 314 blake3_param(ZFS_MODULE_PARAM_ARGS) 315 { 316 int err; 317 318 if (req->newptr == NULL) { 319 const uint32_t impl = IMPL_READ(blake3_impl_chosen); 320 const int init_buflen = 64; 321 const char *fmt; 322 struct sbuf *s; 323 324 s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req); 325 326 /* cycling */ 327 fmt = IMPL_FMT(impl, IMPL_CYCLE); 328 (void) sbuf_printf(s, fmt, "cycle"); 329 330 /* list fastest */ 331 fmt = IMPL_FMT(impl, IMPL_FASTEST); 332 (void) sbuf_printf(s, fmt, "fastest"); 333 334 /* list all supported implementations */ 335 for (uint32_t i = 0; i < blake3_supp_impls_cnt; ++i) { 336 fmt = IMPL_FMT(impl, i); 337 (void) sbuf_printf(s, fmt, blake3_supp_impls[i]->name); 338 } 339 340 err = sbuf_finish(s); 341 sbuf_delete(s); 342 343 return (err); 344 } 345 346 char buf[16]; 347 348 err = sysctl_handle_string(oidp, buf, sizeof (buf), req); 349 if (err) { 350 return (err); 351 } 352 353 return (-blake3_impl_setname(buf)); 354 } 355 #endif 356 357 #undef IMPL_FMT 358 359 ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, blake3_impl, 360 blake3_param_set, blake3_param_get, ZMOD_RW, \ 361 "Select BLAKE3 implementation."); 362 #endif 363