1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * SHA-256 optimized for x86_64 4 * 5 * Copyright 2025 Google LLC 6 */ 7 #include <asm/fpu/api.h> 8 #include <linux/static_call.h> 9 10 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha_ni); 11 12 DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_blocks_generic); 13 14 #define DEFINE_X86_SHA256_FN(c_fn, asm_fn) \ 15 asmlinkage void asm_fn(struct sha256_block_state *state, \ 16 const u8 *data, size_t nblocks); \ 17 static void c_fn(struct sha256_block_state *state, const u8 *data, \ 18 size_t nblocks) \ 19 { \ 20 if (likely(irq_fpu_usable())) { \ 21 kernel_fpu_begin(); \ 22 asm_fn(state, data, nblocks); \ 23 kernel_fpu_end(); \ 24 } else { \ 25 sha256_blocks_generic(state, data, nblocks); \ 26 } \ 27 } 28 29 DEFINE_X86_SHA256_FN(sha256_blocks_ssse3, sha256_transform_ssse3); 30 DEFINE_X86_SHA256_FN(sha256_blocks_avx, sha256_transform_avx); 31 DEFINE_X86_SHA256_FN(sha256_blocks_avx2, sha256_transform_rorx); 32 DEFINE_X86_SHA256_FN(sha256_blocks_ni, sha256_ni_transform); 33 34 #define PHE_ALIGNMENT 16 35 static void sha256_blocks_phe(struct sha256_block_state *state, 36 const u8 *data, size_t nblocks) 37 { 38 /* 39 * On Zhaoxin processors, XSHA256 requires the %rdi register 40 * in 64-bit mode (or %edi in 32-bit mode) to point to 41 * a 32-byte, 16-byte-aligned buffer. 42 */ 43 u8 buf[32 + PHE_ALIGNMENT - 1]; 44 u8 *dst = PTR_ALIGN(&buf[0], PHE_ALIGNMENT); 45 size_t padding = -1; 46 47 memcpy(dst, state, SHA256_DIGEST_SIZE); 48 asm volatile(".byte 0xf3,0x0f,0xa6,0xd0" /* REP XSHA256 */ 49 : "+a"(padding), "+c"(nblocks), "+S"(data) 50 : "D"(dst) 51 : "memory"); 52 memcpy(state, dst, SHA256_DIGEST_SIZE); 53 } 54 55 static void sha256_blocks(struct sha256_block_state *state, 56 const u8 *data, size_t nblocks) 57 { 58 static_call(sha256_blocks_x86)(state, data, nblocks); 59 } 60 61 static_assert(offsetof(struct __sha256_ctx, state) == 0); 62 static_assert(offsetof(struct __sha256_ctx, bytecount) == 32); 63 static_assert(offsetof(struct __sha256_ctx, buf) == 40); 64 asmlinkage void sha256_ni_finup2x(const struct __sha256_ctx *ctx, 65 const u8 *data1, const u8 *data2, int len, 66 u8 out1[SHA256_DIGEST_SIZE], 67 u8 out2[SHA256_DIGEST_SIZE]); 68 69 #define sha256_finup_2x_arch sha256_finup_2x_arch 70 static bool sha256_finup_2x_arch(const struct __sha256_ctx *ctx, 71 const u8 *data1, const u8 *data2, size_t len, 72 u8 out1[SHA256_DIGEST_SIZE], 73 u8 out2[SHA256_DIGEST_SIZE]) 74 { 75 /* 76 * The assembly requires len >= SHA256_BLOCK_SIZE && len <= INT_MAX. 77 * Further limit len to 65536 to avoid spending too long with preemption 78 * disabled. (Of course, in practice len is nearly always 4096 anyway.) 79 */ 80 if (static_branch_likely(&have_sha_ni) && len >= SHA256_BLOCK_SIZE && 81 len <= 65536 && likely(irq_fpu_usable())) { 82 kernel_fpu_begin(); 83 sha256_ni_finup2x(ctx, data1, data2, len, out1, out2); 84 kernel_fpu_end(); 85 kmsan_unpoison_memory(out1, SHA256_DIGEST_SIZE); 86 kmsan_unpoison_memory(out2, SHA256_DIGEST_SIZE); 87 return true; 88 } 89 return false; 90 } 91 92 static bool sha256_finup_2x_is_optimized_arch(void) 93 { 94 return static_key_enabled(&have_sha_ni); 95 } 96 97 #define sha256_mod_init_arch sha256_mod_init_arch 98 static void sha256_mod_init_arch(void) 99 { 100 if (boot_cpu_has(X86_FEATURE_SHA_NI)) { 101 static_call_update(sha256_blocks_x86, sha256_blocks_ni); 102 static_branch_enable(&have_sha_ni); 103 } else if (IS_ENABLED(CONFIG_CPU_SUP_ZHAOXIN) && 104 boot_cpu_has(X86_FEATURE_PHE_EN) && 105 boot_cpu_data.x86 >= 0x07) { 106 static_call_update(sha256_blocks_x86, sha256_blocks_phe); 107 } else if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, 108 NULL) && 109 boot_cpu_has(X86_FEATURE_AVX)) { 110 if (boot_cpu_has(X86_FEATURE_AVX2) && 111 boot_cpu_has(X86_FEATURE_BMI2)) 112 static_call_update(sha256_blocks_x86, 113 sha256_blocks_avx2); 114 else 115 static_call_update(sha256_blocks_x86, 116 sha256_blocks_avx); 117 } else if (boot_cpu_has(X86_FEATURE_SSSE3)) { 118 static_call_update(sha256_blocks_x86, sha256_blocks_ssse3); 119 } 120 } 121