xref: /linux/lib/crypto/arm/chacha-glue.c (revision 13150742b09e720fdf021de14cd2b98b37415a89)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ChaCha and HChaCha functions (ARM optimized)
4  *
5  * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
6  * Copyright (C) 2015 Martin Willi
7  */
8 
9 #include <crypto/chacha.h>
10 #include <crypto/internal/simd.h>
11 #include <linux/jump_label.h>
12 #include <linux/kernel.h>
13 #include <linux/module.h>
14 
15 #include <asm/cputype.h>
16 #include <asm/hwcap.h>
17 #include <asm/neon.h>
18 #include <asm/simd.h>
19 
20 asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
21 				      u8 *dst, const u8 *src, int nrounds);
22 asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state,
23 				       u8 *dst, const u8 *src,
24 				       int nrounds, unsigned int nbytes);
25 asmlinkage void hchacha_block_arm(const struct chacha_state *state,
26 				  u32 out[HCHACHA_OUT_WORDS], int nrounds);
27 asmlinkage void hchacha_block_neon(const struct chacha_state *state,
28 				   u32 out[HCHACHA_OUT_WORDS], int nrounds);
29 
30 asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
31 			     const struct chacha_state *state, int nrounds);
32 
33 static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
34 
neon_usable(void)35 static inline bool neon_usable(void)
36 {
37 	return static_branch_likely(&use_neon) && crypto_simd_usable();
38 }
39 
chacha_doneon(struct chacha_state * state,u8 * dst,const u8 * src,unsigned int bytes,int nrounds)40 static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src,
41 			  unsigned int bytes, int nrounds)
42 {
43 	u8 buf[CHACHA_BLOCK_SIZE];
44 
45 	while (bytes > CHACHA_BLOCK_SIZE) {
46 		unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
47 
48 		chacha_4block_xor_neon(state, dst, src, nrounds, l);
49 		bytes -= l;
50 		src += l;
51 		dst += l;
52 		state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
53 	}
54 	if (bytes) {
55 		const u8 *s = src;
56 		u8 *d = dst;
57 
58 		if (bytes != CHACHA_BLOCK_SIZE)
59 			s = d = memcpy(buf, src, bytes);
60 		chacha_block_xor_neon(state, d, s, nrounds);
61 		if (d != dst)
62 			memcpy(dst, buf, bytes);
63 		state->x[12]++;
64 	}
65 }
66 
hchacha_block_arch(const struct chacha_state * state,u32 out[HCHACHA_OUT_WORDS],int nrounds)67 void hchacha_block_arch(const struct chacha_state *state,
68 			u32 out[HCHACHA_OUT_WORDS], int nrounds)
69 {
70 	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
71 		hchacha_block_arm(state, out, nrounds);
72 	} else {
73 		kernel_neon_begin();
74 		hchacha_block_neon(state, out, nrounds);
75 		kernel_neon_end();
76 	}
77 }
78 EXPORT_SYMBOL(hchacha_block_arch);
79 
chacha_crypt_arch(struct chacha_state * state,u8 * dst,const u8 * src,unsigned int bytes,int nrounds)80 void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
81 		       unsigned int bytes, int nrounds)
82 {
83 	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
84 	    bytes <= CHACHA_BLOCK_SIZE) {
85 		chacha_doarm(dst, src, bytes, state, nrounds);
86 		state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
87 		return;
88 	}
89 
90 	do {
91 		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
92 
93 		kernel_neon_begin();
94 		chacha_doneon(state, dst, src, todo, nrounds);
95 		kernel_neon_end();
96 
97 		bytes -= todo;
98 		src += todo;
99 		dst += todo;
100 	} while (bytes);
101 }
102 EXPORT_SYMBOL(chacha_crypt_arch);
103 
chacha_is_arch_optimized(void)104 bool chacha_is_arch_optimized(void)
105 {
106 	/* We always can use at least the ARM scalar implementation. */
107 	return true;
108 }
109 EXPORT_SYMBOL(chacha_is_arch_optimized);
110 
chacha_arm_mod_init(void)111 static int __init chacha_arm_mod_init(void)
112 {
113 	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
114 		switch (read_cpuid_part()) {
115 		case ARM_CPU_PART_CORTEX_A7:
116 		case ARM_CPU_PART_CORTEX_A5:
117 			/*
118 			 * The Cortex-A7 and Cortex-A5 do not perform well with
119 			 * the NEON implementation but do incredibly with the
120 			 * scalar one and use less power.
121 			 */
122 			break;
123 		default:
124 			static_branch_enable(&use_neon);
125 		}
126 	}
127 	return 0;
128 }
129 subsys_initcall(chacha_arm_mod_init);
130 
chacha_arm_mod_exit(void)131 static void __exit chacha_arm_mod_exit(void)
132 {
133 }
134 module_exit(chacha_arm_mod_exit);
135 
136 MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM optimized)");
137 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
138 MODULE_LICENSE("GPL v2");
139