1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de> 24 */ 25 26 #include <sys/simd.h> 27 #include <sys/zfs_context.h> 28 #include <sys/zfs_impl.h> 29 #include <sys/blake3.h> 30 31 #include "blake3_impl.h" 32 33 #if !defined(OMIT_SIMD) && (defined(__aarch64__) || \ 34 (defined(__x86_64) && defined(HAVE_SSE2)) || \ 35 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))) 36 #define USE_SIMD 37 #endif 38 39 #ifdef USE_SIMD 40 extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8], 41 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, 42 uint64_t counter, uint8_t flags); 43 44 extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8], 45 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, 46 uint64_t counter, uint8_t flags, uint8_t out[64]); 47 48 extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs, 49 size_t num_inputs, size_t blocks, const uint32_t key[8], 50 uint64_t counter, boolean_t increment_counter, uint8_t flags, 51 uint8_t flags_start, uint8_t flags_end, uint8_t *out); 52 53 static void blake3_compress_in_place_sse2(uint32_t cv[8], 54 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, 55 uint64_t counter, uint8_t flags) { 56 kfpu_begin(); 57 zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter, 58 flags); 59 kfpu_end(); 60 } 61 62 static void blake3_compress_xof_sse2(const uint32_t cv[8], 63 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, 64 uint64_t counter, uint8_t flags, uint8_t out[64]) { 65 kfpu_begin(); 66 zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags, 67 out); 68 kfpu_end(); 69 } 70 71 static void blake3_hash_many_sse2(const uint8_t * const *inputs, 72 size_t num_inputs, size_t blocks, const uint32_t key[8], 73 uint64_t counter, boolean_t increment_counter, uint8_t flags, 74 uint8_t flags_start, uint8_t flags_end, uint8_t *out) { 75 kfpu_begin(); 76 zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter, 77 increment_counter, flags, flags_start, flags_end, out); 78 kfpu_end(); 79 } 80 81 static boolean_t blake3_is_sse2_supported(void) 82 { 83 #if defined(__x86_64) 84 return (kfpu_allowed() && zfs_sse2_available()); 85 #elif defined(__PPC64__) 86 return (kfpu_allowed() && zfs_vsx_available()); 87 #else 88 return (kfpu_allowed()); 89 #endif 90 } 91 92 const blake3_ops_t blake3_sse2_impl = { 93 .compress_in_place = blake3_compress_in_place_sse2, 94 .compress_xof = blake3_compress_xof_sse2, 95 .hash_many = blake3_hash_many_sse2, 96 .is_supported = blake3_is_sse2_supported, 97 .degree = 4, 98 .name = "sse2" 99 }; 100 #endif 101 102 #ifdef USE_SIMD 103 104 extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8], 105 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, 106 uint64_t counter, uint8_t flags); 107 108 extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8], 109 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, 110 uint64_t counter, uint8_t flags, uint8_t out[64]); 111 112 extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs, 113 size_t num_inputs, size_t blocks, const uint32_t key[8], 114 uint64_t counter, boolean_t increment_counter, uint8_t flags, 115 uint8_t flags_start, uint8_t flags_end, uint8_t *out); 116 117 static void blake3_compress_in_place_sse41(uint32_t cv[8], 118 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, 119 uint64_t counter, uint8_t flags) { 120 kfpu_begin(); 121 zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter, 122 flags); 123 kfpu_end(); 124 } 125 126 static void blake3_compress_xof_sse41(const uint32_t cv[8], 127 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, 128 uint64_t counter, uint8_t flags, uint8_t out[64]) { 129 kfpu_begin(); 130 zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags, 131 out); 132 kfpu_end(); 133 } 134 135 static void blake3_hash_many_sse41(const uint8_t * const *inputs, 136 size_t num_inputs, size_t blocks, const uint32_t key[8], 137 uint64_t counter, boolean_t increment_counter, uint8_t flags, 138 uint8_t flags_start, uint8_t flags_end, uint8_t *out) { 139 kfpu_begin(); 140 zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter, 141 increment_counter, flags, flags_start, flags_end, out); 142 kfpu_end(); 143 } 144 145 static boolean_t blake3_is_sse41_supported(void) 146 { 147 #if defined(__x86_64) 148 return (kfpu_allowed() && zfs_sse4_1_available()); 149 #elif defined(__PPC64__) 150 return (kfpu_allowed() && zfs_vsx_available()); 151 #else 152 return (kfpu_allowed()); 153 #endif 154 } 155 156 const blake3_ops_t blake3_sse41_impl = { 157 .compress_in_place = blake3_compress_in_place_sse41, 158 .compress_xof = blake3_compress_xof_sse41, 159 .hash_many = blake3_hash_many_sse41, 160 .is_supported = blake3_is_sse41_supported, 161 .degree = 4, 162 .name = "sse41" 163 }; 164 #endif 165 166 #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2) 167 extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs, 168 size_t num_inputs, size_t blocks, const uint32_t key[8], 169 uint64_t counter, boolean_t increment_counter, uint8_t flags, 170 uint8_t flags_start, uint8_t flags_end, uint8_t *out); 171 172 static void blake3_hash_many_avx2(const uint8_t * const *inputs, 173 size_t num_inputs, size_t blocks, const uint32_t key[8], 174 uint64_t counter, boolean_t increment_counter, uint8_t flags, 175 uint8_t flags_start, uint8_t flags_end, uint8_t *out) { 176 kfpu_begin(); 177 zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter, 178 increment_counter, flags, flags_start, flags_end, out); 179 kfpu_end(); 180 } 181 182 static boolean_t blake3_is_avx2_supported(void) 183 { 184 return (kfpu_allowed() && zfs_sse4_1_available() && 185 zfs_avx2_available()); 186 } 187 188 const blake3_ops_t 189 blake3_avx2_impl = { 190 .compress_in_place = blake3_compress_in_place_sse41, 191 .compress_xof = blake3_compress_xof_sse41, 192 .hash_many = blake3_hash_many_avx2, 193 .is_supported = blake3_is_avx2_supported, 194 .degree = 8, 195 .name = "avx2" 196 }; 197 #endif 198 199 #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL) 200 extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8], 201 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, 202 uint64_t counter, uint8_t flags); 203 204 extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8], 205 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, 206 uint64_t counter, uint8_t flags, uint8_t out[64]); 207 208 extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs, 209 size_t num_inputs, size_t blocks, const uint32_t key[8], 210 uint64_t counter, boolean_t increment_counter, uint8_t flags, 211 uint8_t flags_start, uint8_t flags_end, uint8_t *out); 212 213 static void blake3_compress_in_place_avx512(uint32_t cv[8], 214 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, 215 uint64_t counter, uint8_t flags) { 216 kfpu_begin(); 217 zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter, 218 flags); 219 kfpu_end(); 220 } 221 222 static void blake3_compress_xof_avx512(const uint32_t cv[8], 223 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, 224 uint64_t counter, uint8_t flags, uint8_t out[64]) { 225 kfpu_begin(); 226 zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags, 227 out); 228 kfpu_end(); 229 } 230 231 static void blake3_hash_many_avx512(const uint8_t * const *inputs, 232 size_t num_inputs, size_t blocks, const uint32_t key[8], 233 uint64_t counter, boolean_t increment_counter, uint8_t flags, 234 uint8_t flags_start, uint8_t flags_end, uint8_t *out) { 235 kfpu_begin(); 236 zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter, 237 increment_counter, flags, flags_start, flags_end, out); 238 kfpu_end(); 239 } 240 241 static boolean_t blake3_is_avx512_supported(void) 242 { 243 return (kfpu_allowed() && zfs_avx512f_available() && 244 zfs_avx512vl_available()); 245 } 246 247 const blake3_ops_t blake3_avx512_impl = { 248 .compress_in_place = blake3_compress_in_place_avx512, 249 .compress_xof = blake3_compress_xof_avx512, 250 .hash_many = blake3_hash_many_avx512, 251 .is_supported = blake3_is_avx512_supported, 252 .degree = 16, 253 .name = "avx512" 254 }; 255 #endif 256 257 extern const blake3_ops_t blake3_generic_impl; 258 259 static const blake3_ops_t *const blake3_impls[] = { 260 &blake3_generic_impl, 261 #ifdef USE_SIMD 262 #if defined(__aarch64__) || \ 263 (defined(__x86_64) && defined(HAVE_SSE2)) || \ 264 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) 265 &blake3_sse2_impl, 266 #endif 267 #if defined(__aarch64__) || \ 268 (defined(__x86_64) && defined(HAVE_SSE4_1)) || \ 269 (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) 270 &blake3_sse41_impl, 271 #endif 272 #if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2) 273 &blake3_avx2_impl, 274 #endif 275 #if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL) 276 &blake3_avx512_impl, 277 #endif 278 #endif 279 }; 280 281 /* use the generic implementation functions */ 282 #define IMPL_NAME "blake3" 283 #define IMPL_OPS_T blake3_ops_t 284 #define IMPL_ARRAY blake3_impls 285 #define IMPL_GET_OPS blake3_get_ops 286 #define ZFS_IMPL_OPS zfs_blake3_ops 287 #include <generic_impl.c> 288 289 #ifdef _KERNEL 290 void **blake3_per_cpu_ctx; 291 292 void 293 blake3_per_cpu_ctx_init(void) 294 { 295 /* 296 * Create "The Godfather" ptr to hold all blake3 ctx 297 */ 298 blake3_per_cpu_ctx = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP); 299 for (int i = 0; i < max_ncpus; i++) { 300 blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX), 301 KM_SLEEP); 302 } 303 } 304 305 void 306 blake3_per_cpu_ctx_fini(void) 307 { 308 for (int i = 0; i < max_ncpus; i++) { 309 memset(blake3_per_cpu_ctx[i], 0, sizeof (BLAKE3_CTX)); 310 kmem_free(blake3_per_cpu_ctx[i], sizeof (BLAKE3_CTX)); 311 } 312 memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *)); 313 kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *)); 314 } 315 316 #define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ") 317 318 #if defined(__linux__) 319 320 static int 321 blake3_param_get(char *buffer, zfs_kernel_param_t *unused) 322 { 323 const uint32_t impl = IMPL_READ(generic_impl_chosen); 324 char *fmt; 325 int cnt = 0; 326 327 /* cycling */ 328 fmt = IMPL_FMT(impl, IMPL_CYCLE); 329 cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "cycle"); 330 331 /* list fastest */ 332 fmt = IMPL_FMT(impl, IMPL_FASTEST); 333 cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "fastest"); 334 335 /* list all supported implementations */ 336 generic_impl_init(); 337 for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) { 338 fmt = IMPL_FMT(impl, i); 339 cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, 340 blake3_impls[i]->name); 341 } 342 343 return (cnt); 344 } 345 346 static int 347 blake3_param_set(const char *val, zfs_kernel_param_t *unused) 348 { 349 (void) unused; 350 return (generic_impl_setname(val)); 351 } 352 353 #elif defined(__FreeBSD__) 354 355 #include <sys/sbuf.h> 356 357 static int 358 blake3_param(ZFS_MODULE_PARAM_ARGS) 359 { 360 int err; 361 362 generic_impl_init(); 363 if (req->newptr == NULL) { 364 const uint32_t impl = IMPL_READ(generic_impl_chosen); 365 const int init_buflen = 64; 366 const char *fmt; 367 struct sbuf *s; 368 369 s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req); 370 371 /* cycling */ 372 fmt = IMPL_FMT(impl, IMPL_CYCLE); 373 (void) sbuf_printf(s, fmt, "cycle"); 374 375 /* list fastest */ 376 fmt = IMPL_FMT(impl, IMPL_FASTEST); 377 (void) sbuf_printf(s, fmt, "fastest"); 378 379 /* list all supported implementations */ 380 for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) { 381 fmt = IMPL_FMT(impl, i); 382 (void) sbuf_printf(s, fmt, generic_supp_impls[i]->name); 383 } 384 385 err = sbuf_finish(s); 386 sbuf_delete(s); 387 388 return (err); 389 } 390 391 char buf[16]; 392 393 err = sysctl_handle_string(oidp, buf, sizeof (buf), req); 394 if (err) { 395 return (err); 396 } 397 398 return (-generic_impl_setname(buf)); 399 } 400 #endif 401 402 #undef IMPL_FMT 403 404 ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, blake3_impl, 405 blake3_param_set, blake3_param_get, ZMOD_RW, \ 406 "Select BLAKE3 implementation."); 407 #endif 408