xref: /linux/lib/crypto/x86/sha256.h (revision 6bc9effb4cbf9b6eba0f51aba1c8893dfd4c8100)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * SHA-256 optimized for x86_64
4  *
5  * Copyright 2025 Google LLC
6  */
7 #include <asm/fpu/api.h>
8 #include <linux/static_call.h>
9 
10 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha_ni);
11 
12 DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_blocks_generic);
13 
14 #define DEFINE_X86_SHA256_FN(c_fn, asm_fn)                                 \
15 	asmlinkage void asm_fn(struct sha256_block_state *state,           \
16 			       const u8 *data, size_t nblocks);            \
17 	static void c_fn(struct sha256_block_state *state, const u8 *data, \
18 			 size_t nblocks)                                   \
19 	{                                                                  \
20 		if (likely(irq_fpu_usable())) {                            \
21 			kernel_fpu_begin();                                \
22 			asm_fn(state, data, nblocks);                      \
23 			kernel_fpu_end();                                  \
24 		} else {                                                   \
25 			sha256_blocks_generic(state, data, nblocks);       \
26 		}                                                          \
27 	}
28 
29 DEFINE_X86_SHA256_FN(sha256_blocks_ssse3, sha256_transform_ssse3);
30 DEFINE_X86_SHA256_FN(sha256_blocks_avx, sha256_transform_avx);
31 DEFINE_X86_SHA256_FN(sha256_blocks_avx2, sha256_transform_rorx);
32 DEFINE_X86_SHA256_FN(sha256_blocks_ni, sha256_ni_transform);
33 
34 #define PHE_ALIGNMENT 16
35 static void sha256_blocks_phe(struct sha256_block_state *state,
36 			      const u8 *data, size_t nblocks)
37 {
38 	/*
39 	 * On Zhaoxin processors, XSHA256 requires the %rdi register
40 	 * in 64-bit mode (or %edi in 32-bit mode) to point to
41 	 * a 32-byte, 16-byte-aligned buffer.
42 	 */
43 	u8 buf[32 + PHE_ALIGNMENT - 1];
44 	u8 *dst = PTR_ALIGN(&buf[0], PHE_ALIGNMENT);
45 	size_t padding = -1;
46 
47 	memcpy(dst, state, SHA256_DIGEST_SIZE);
48 	asm volatile(".byte 0xf3,0x0f,0xa6,0xd0" /* REP XSHA256 */
49 		     : "+a"(padding), "+c"(nblocks), "+S"(data)
50 		     : "D"(dst)
51 		     : "memory");
52 	memcpy(state, dst, SHA256_DIGEST_SIZE);
53 }
54 
55 static void sha256_blocks(struct sha256_block_state *state,
56 			  const u8 *data, size_t nblocks)
57 {
58 	static_call(sha256_blocks_x86)(state, data, nblocks);
59 }
60 
61 static_assert(offsetof(struct __sha256_ctx, state) == 0);
62 static_assert(offsetof(struct __sha256_ctx, bytecount) == 32);
63 static_assert(offsetof(struct __sha256_ctx, buf) == 40);
64 asmlinkage void sha256_ni_finup2x(const struct __sha256_ctx *ctx,
65 				  const u8 *data1, const u8 *data2, int len,
66 				  u8 out1[SHA256_DIGEST_SIZE],
67 				  u8 out2[SHA256_DIGEST_SIZE]);
68 
69 #define sha256_finup_2x_arch sha256_finup_2x_arch
70 static bool sha256_finup_2x_arch(const struct __sha256_ctx *ctx,
71 				 const u8 *data1, const u8 *data2, size_t len,
72 				 u8 out1[SHA256_DIGEST_SIZE],
73 				 u8 out2[SHA256_DIGEST_SIZE])
74 {
75 	/*
76 	 * The assembly requires len >= SHA256_BLOCK_SIZE && len <= INT_MAX.
77 	 * Further limit len to 65536 to avoid spending too long with preemption
78 	 * disabled.  (Of course, in practice len is nearly always 4096 anyway.)
79 	 */
80 	if (static_branch_likely(&have_sha_ni) && len >= SHA256_BLOCK_SIZE &&
81 	    len <= 65536 && likely(irq_fpu_usable())) {
82 		kernel_fpu_begin();
83 		sha256_ni_finup2x(ctx, data1, data2, len, out1, out2);
84 		kernel_fpu_end();
85 		kmsan_unpoison_memory(out1, SHA256_DIGEST_SIZE);
86 		kmsan_unpoison_memory(out2, SHA256_DIGEST_SIZE);
87 		return true;
88 	}
89 	return false;
90 }
91 
92 static bool sha256_finup_2x_is_optimized_arch(void)
93 {
94 	return static_key_enabled(&have_sha_ni);
95 }
96 
97 #define sha256_mod_init_arch sha256_mod_init_arch
98 static void sha256_mod_init_arch(void)
99 {
100 	if (boot_cpu_has(X86_FEATURE_SHA_NI)) {
101 		static_call_update(sha256_blocks_x86, sha256_blocks_ni);
102 		static_branch_enable(&have_sha_ni);
103 	} else if (IS_ENABLED(CONFIG_CPU_SUP_ZHAOXIN) &&
104 		   boot_cpu_has(X86_FEATURE_PHE_EN) &&
105 		   boot_cpu_data.x86 >= 0x07) {
106 		static_call_update(sha256_blocks_x86, sha256_blocks_phe);
107 	} else if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
108 				     NULL) &&
109 		   boot_cpu_has(X86_FEATURE_AVX)) {
110 		if (boot_cpu_has(X86_FEATURE_AVX2) &&
111 		    boot_cpu_has(X86_FEATURE_BMI2))
112 			static_call_update(sha256_blocks_x86,
113 					   sha256_blocks_avx2);
114 		else
115 			static_call_update(sha256_blocks_x86,
116 					   sha256_blocks_avx);
117 	} else if (boot_cpu_has(X86_FEATURE_SSSE3)) {
118 		static_call_update(sha256_blocks_x86, sha256_blocks_ssse3);
119 	}
120 }
121