1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * SHA-256 optimized for x86_64 4 * 5 * Copyright 2025 Google LLC 6 */ 7 #include <asm/fpu/api.h> 8 #include <linux/static_call.h> 9 10 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha_ni); 11 12 DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_blocks_generic); 13 14 #define DEFINE_X86_SHA256_FN(c_fn, asm_fn) \ 15 asmlinkage void asm_fn(struct sha256_block_state *state, \ 16 const u8 *data, size_t nblocks); \ 17 static void c_fn(struct sha256_block_state *state, const u8 *data, \ 18 size_t nblocks) \ 19 { \ 20 if (likely(irq_fpu_usable())) { \ 21 kernel_fpu_begin(); \ 22 asm_fn(state, data, nblocks); \ 23 kernel_fpu_end(); \ 24 } else { \ 25 sha256_blocks_generic(state, data, nblocks); \ 26 } \ 27 } 28 29 DEFINE_X86_SHA256_FN(sha256_blocks_ssse3, sha256_transform_ssse3); 30 DEFINE_X86_SHA256_FN(sha256_blocks_avx, sha256_transform_avx); 31 DEFINE_X86_SHA256_FN(sha256_blocks_avx2, sha256_transform_rorx); 32 DEFINE_X86_SHA256_FN(sha256_blocks_ni, sha256_ni_transform); 33 34 static void sha256_blocks(struct sha256_block_state *state, 35 const u8 *data, size_t nblocks) 36 { 37 static_call(sha256_blocks_x86)(state, data, nblocks); 38 } 39 40 static_assert(offsetof(struct __sha256_ctx, state) == 0); 41 static_assert(offsetof(struct __sha256_ctx, bytecount) == 32); 42 static_assert(offsetof(struct __sha256_ctx, buf) == 40); 43 asmlinkage void sha256_ni_finup2x(const struct __sha256_ctx *ctx, 44 const u8 *data1, const u8 *data2, int len, 45 u8 out1[SHA256_DIGEST_SIZE], 46 u8 out2[SHA256_DIGEST_SIZE]); 47 48 #define sha256_finup_2x_arch sha256_finup_2x_arch 49 static bool sha256_finup_2x_arch(const struct __sha256_ctx *ctx, 50 const u8 *data1, const u8 *data2, size_t len, 51 u8 out1[SHA256_DIGEST_SIZE], 52 u8 out2[SHA256_DIGEST_SIZE]) 53 { 54 /* 55 * The assembly requires len >= SHA256_BLOCK_SIZE && len <= INT_MAX. 56 * Further limit len to 65536 to avoid spending too long with preemption 57 * disabled. (Of course, in practice len is nearly always 4096 anyway.) 58 */ 59 if (static_branch_likely(&have_sha_ni) && len >= SHA256_BLOCK_SIZE && 60 len <= 65536 && likely(irq_fpu_usable())) { 61 kernel_fpu_begin(); 62 sha256_ni_finup2x(ctx, data1, data2, len, out1, out2); 63 kernel_fpu_end(); 64 kmsan_unpoison_memory(out1, SHA256_DIGEST_SIZE); 65 kmsan_unpoison_memory(out2, SHA256_DIGEST_SIZE); 66 return true; 67 } 68 return false; 69 } 70 71 static bool sha256_finup_2x_is_optimized_arch(void) 72 { 73 return static_key_enabled(&have_sha_ni); 74 } 75 76 #define sha256_mod_init_arch sha256_mod_init_arch 77 static void sha256_mod_init_arch(void) 78 { 79 if (boot_cpu_has(X86_FEATURE_SHA_NI)) { 80 static_call_update(sha256_blocks_x86, sha256_blocks_ni); 81 static_branch_enable(&have_sha_ni); 82 } else if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, 83 NULL) && 84 boot_cpu_has(X86_FEATURE_AVX)) { 85 if (boot_cpu_has(X86_FEATURE_AVX2) && 86 boot_cpu_has(X86_FEATURE_BMI2)) 87 static_call_update(sha256_blocks_x86, 88 sha256_blocks_avx2); 89 else 90 static_call_update(sha256_blocks_x86, 91 sha256_blocks_avx); 92 } else if (boot_cpu_has(X86_FEATURE_SSSE3)) { 93 static_call_update(sha256_blocks_x86, sha256_blocks_ssse3); 94 } 95 } 96