xref: /linux/arch/arm/crypto/chacha-glue.c (revision f8bade6c9a6213c2c5ba6e5bf32415ecab6e41e5)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
4  * including ChaCha20 (RFC7539)
5  *
6  * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
7  * Copyright (C) 2015 Martin Willi
8  */
9 
10 #include <crypto/algapi.h>
11 #include <crypto/internal/chacha.h>
12 #include <crypto/internal/simd.h>
13 #include <crypto/internal/skcipher.h>
14 #include <linux/jump_label.h>
15 #include <linux/kernel.h>
16 #include <linux/module.h>
17 
18 #include <asm/cputype.h>
19 #include <asm/hwcap.h>
20 #include <asm/neon.h>
21 #include <asm/simd.h>
22 
23 asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
24 				      int nrounds);
25 asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
26 				       int nrounds, unsigned int nbytes);
27 asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
28 asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
29 
30 asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
31 			     const u32 *state, int nrounds);
32 
33 static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
34 
neon_usable(void)35 static inline bool neon_usable(void)
36 {
37 	return static_branch_likely(&use_neon) && crypto_simd_usable();
38 }
39 
chacha_doneon(u32 * state,u8 * dst,const u8 * src,unsigned int bytes,int nrounds)40 static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
41 			  unsigned int bytes, int nrounds)
42 {
43 	u8 buf[CHACHA_BLOCK_SIZE];
44 
45 	while (bytes > CHACHA_BLOCK_SIZE) {
46 		unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
47 
48 		chacha_4block_xor_neon(state, dst, src, nrounds, l);
49 		bytes -= l;
50 		src += l;
51 		dst += l;
52 		state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
53 	}
54 	if (bytes) {
55 		const u8 *s = src;
56 		u8 *d = dst;
57 
58 		if (bytes != CHACHA_BLOCK_SIZE)
59 			s = d = memcpy(buf, src, bytes);
60 		chacha_block_xor_neon(state, d, s, nrounds);
61 		if (d != dst)
62 			memcpy(dst, buf, bytes);
63 		state[12]++;
64 	}
65 }
66 
hchacha_block_arch(const u32 * state,u32 * stream,int nrounds)67 void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
68 {
69 	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
70 		hchacha_block_arm(state, stream, nrounds);
71 	} else {
72 		kernel_neon_begin();
73 		hchacha_block_neon(state, stream, nrounds);
74 		kernel_neon_end();
75 	}
76 }
77 EXPORT_SYMBOL(hchacha_block_arch);
78 
chacha_init_arch(u32 * state,const u32 * key,const u8 * iv)79 void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
80 {
81 	chacha_init_generic(state, key, iv);
82 }
83 EXPORT_SYMBOL(chacha_init_arch);
84 
chacha_crypt_arch(u32 * state,u8 * dst,const u8 * src,unsigned int bytes,int nrounds)85 void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
86 		       int nrounds)
87 {
88 	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
89 	    bytes <= CHACHA_BLOCK_SIZE) {
90 		chacha_doarm(dst, src, bytes, state, nrounds);
91 		state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
92 		return;
93 	}
94 
95 	do {
96 		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
97 
98 		kernel_neon_begin();
99 		chacha_doneon(state, dst, src, todo, nrounds);
100 		kernel_neon_end();
101 
102 		bytes -= todo;
103 		src += todo;
104 		dst += todo;
105 	} while (bytes);
106 }
107 EXPORT_SYMBOL(chacha_crypt_arch);
108 
chacha_stream_xor(struct skcipher_request * req,const struct chacha_ctx * ctx,const u8 * iv,bool neon)109 static int chacha_stream_xor(struct skcipher_request *req,
110 			     const struct chacha_ctx *ctx, const u8 *iv,
111 			     bool neon)
112 {
113 	struct skcipher_walk walk;
114 	u32 state[16];
115 	int err;
116 
117 	err = skcipher_walk_virt(&walk, req, false);
118 
119 	chacha_init_generic(state, ctx->key, iv);
120 
121 	while (walk.nbytes > 0) {
122 		unsigned int nbytes = walk.nbytes;
123 
124 		if (nbytes < walk.total)
125 			nbytes = round_down(nbytes, walk.stride);
126 
127 		if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
128 			chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
129 				     nbytes, state, ctx->nrounds);
130 			state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
131 		} else {
132 			kernel_neon_begin();
133 			chacha_doneon(state, walk.dst.virt.addr,
134 				      walk.src.virt.addr, nbytes, ctx->nrounds);
135 			kernel_neon_end();
136 		}
137 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
138 	}
139 
140 	return err;
141 }
142 
do_chacha(struct skcipher_request * req,bool neon)143 static int do_chacha(struct skcipher_request *req, bool neon)
144 {
145 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
146 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
147 
148 	return chacha_stream_xor(req, ctx, req->iv, neon);
149 }
150 
chacha_arm(struct skcipher_request * req)151 static int chacha_arm(struct skcipher_request *req)
152 {
153 	return do_chacha(req, false);
154 }
155 
chacha_neon(struct skcipher_request * req)156 static int chacha_neon(struct skcipher_request *req)
157 {
158 	return do_chacha(req, neon_usable());
159 }
160 
do_xchacha(struct skcipher_request * req,bool neon)161 static int do_xchacha(struct skcipher_request *req, bool neon)
162 {
163 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
164 	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
165 	struct chacha_ctx subctx;
166 	u32 state[16];
167 	u8 real_iv[16];
168 
169 	chacha_init_generic(state, ctx->key, req->iv);
170 
171 	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
172 		hchacha_block_arm(state, subctx.key, ctx->nrounds);
173 	} else {
174 		kernel_neon_begin();
175 		hchacha_block_neon(state, subctx.key, ctx->nrounds);
176 		kernel_neon_end();
177 	}
178 	subctx.nrounds = ctx->nrounds;
179 
180 	memcpy(&real_iv[0], req->iv + 24, 8);
181 	memcpy(&real_iv[8], req->iv + 16, 8);
182 	return chacha_stream_xor(req, &subctx, real_iv, neon);
183 }
184 
xchacha_arm(struct skcipher_request * req)185 static int xchacha_arm(struct skcipher_request *req)
186 {
187 	return do_xchacha(req, false);
188 }
189 
xchacha_neon(struct skcipher_request * req)190 static int xchacha_neon(struct skcipher_request *req)
191 {
192 	return do_xchacha(req, neon_usable());
193 }
194 
195 static struct skcipher_alg arm_algs[] = {
196 	{
197 		.base.cra_name		= "chacha20",
198 		.base.cra_driver_name	= "chacha20-arm",
199 		.base.cra_priority	= 200,
200 		.base.cra_blocksize	= 1,
201 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
202 		.base.cra_module	= THIS_MODULE,
203 
204 		.min_keysize		= CHACHA_KEY_SIZE,
205 		.max_keysize		= CHACHA_KEY_SIZE,
206 		.ivsize			= CHACHA_IV_SIZE,
207 		.chunksize		= CHACHA_BLOCK_SIZE,
208 		.setkey			= chacha20_setkey,
209 		.encrypt		= chacha_arm,
210 		.decrypt		= chacha_arm,
211 	}, {
212 		.base.cra_name		= "xchacha20",
213 		.base.cra_driver_name	= "xchacha20-arm",
214 		.base.cra_priority	= 200,
215 		.base.cra_blocksize	= 1,
216 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
217 		.base.cra_module	= THIS_MODULE,
218 
219 		.min_keysize		= CHACHA_KEY_SIZE,
220 		.max_keysize		= CHACHA_KEY_SIZE,
221 		.ivsize			= XCHACHA_IV_SIZE,
222 		.chunksize		= CHACHA_BLOCK_SIZE,
223 		.setkey			= chacha20_setkey,
224 		.encrypt		= xchacha_arm,
225 		.decrypt		= xchacha_arm,
226 	}, {
227 		.base.cra_name		= "xchacha12",
228 		.base.cra_driver_name	= "xchacha12-arm",
229 		.base.cra_priority	= 200,
230 		.base.cra_blocksize	= 1,
231 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
232 		.base.cra_module	= THIS_MODULE,
233 
234 		.min_keysize		= CHACHA_KEY_SIZE,
235 		.max_keysize		= CHACHA_KEY_SIZE,
236 		.ivsize			= XCHACHA_IV_SIZE,
237 		.chunksize		= CHACHA_BLOCK_SIZE,
238 		.setkey			= chacha12_setkey,
239 		.encrypt		= xchacha_arm,
240 		.decrypt		= xchacha_arm,
241 	},
242 };
243 
244 static struct skcipher_alg neon_algs[] = {
245 	{
246 		.base.cra_name		= "chacha20",
247 		.base.cra_driver_name	= "chacha20-neon",
248 		.base.cra_priority	= 300,
249 		.base.cra_blocksize	= 1,
250 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
251 		.base.cra_module	= THIS_MODULE,
252 
253 		.min_keysize		= CHACHA_KEY_SIZE,
254 		.max_keysize		= CHACHA_KEY_SIZE,
255 		.ivsize			= CHACHA_IV_SIZE,
256 		.chunksize		= CHACHA_BLOCK_SIZE,
257 		.walksize		= 4 * CHACHA_BLOCK_SIZE,
258 		.setkey			= chacha20_setkey,
259 		.encrypt		= chacha_neon,
260 		.decrypt		= chacha_neon,
261 	}, {
262 		.base.cra_name		= "xchacha20",
263 		.base.cra_driver_name	= "xchacha20-neon",
264 		.base.cra_priority	= 300,
265 		.base.cra_blocksize	= 1,
266 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
267 		.base.cra_module	= THIS_MODULE,
268 
269 		.min_keysize		= CHACHA_KEY_SIZE,
270 		.max_keysize		= CHACHA_KEY_SIZE,
271 		.ivsize			= XCHACHA_IV_SIZE,
272 		.chunksize		= CHACHA_BLOCK_SIZE,
273 		.walksize		= 4 * CHACHA_BLOCK_SIZE,
274 		.setkey			= chacha20_setkey,
275 		.encrypt		= xchacha_neon,
276 		.decrypt		= xchacha_neon,
277 	}, {
278 		.base.cra_name		= "xchacha12",
279 		.base.cra_driver_name	= "xchacha12-neon",
280 		.base.cra_priority	= 300,
281 		.base.cra_blocksize	= 1,
282 		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
283 		.base.cra_module	= THIS_MODULE,
284 
285 		.min_keysize		= CHACHA_KEY_SIZE,
286 		.max_keysize		= CHACHA_KEY_SIZE,
287 		.ivsize			= XCHACHA_IV_SIZE,
288 		.chunksize		= CHACHA_BLOCK_SIZE,
289 		.walksize		= 4 * CHACHA_BLOCK_SIZE,
290 		.setkey			= chacha12_setkey,
291 		.encrypt		= xchacha_neon,
292 		.decrypt		= xchacha_neon,
293 	}
294 };
295 
chacha_simd_mod_init(void)296 static int __init chacha_simd_mod_init(void)
297 {
298 	int err = 0;
299 
300 	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
301 		err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
302 		if (err)
303 			return err;
304 	}
305 
306 	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
307 		int i;
308 
309 		switch (read_cpuid_part()) {
310 		case ARM_CPU_PART_CORTEX_A7:
311 		case ARM_CPU_PART_CORTEX_A5:
312 			/*
313 			 * The Cortex-A7 and Cortex-A5 do not perform well with
314 			 * the NEON implementation but do incredibly with the
315 			 * scalar one and use less power.
316 			 */
317 			for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
318 				neon_algs[i].base.cra_priority = 0;
319 			break;
320 		default:
321 			static_branch_enable(&use_neon);
322 		}
323 
324 		if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
325 			err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
326 			if (err)
327 				crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
328 		}
329 	}
330 	return err;
331 }
332 
chacha_simd_mod_fini(void)333 static void __exit chacha_simd_mod_fini(void)
334 {
335 	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
336 		crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
337 		if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
338 			crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
339 	}
340 }
341 
342 module_init(chacha_simd_mod_init);
343 module_exit(chacha_simd_mod_fini);
344 
345 MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
346 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
347 MODULE_LICENSE("GPL v2");
348 MODULE_ALIAS_CRYPTO("chacha20");
349 MODULE_ALIAS_CRYPTO("chacha20-arm");
350 MODULE_ALIAS_CRYPTO("xchacha20");
351 MODULE_ALIAS_CRYPTO("xchacha20-arm");
352 MODULE_ALIAS_CRYPTO("xchacha12");
353 MODULE_ALIAS_CRYPTO("xchacha12-arm");
354 #ifdef CONFIG_KERNEL_MODE_NEON
355 MODULE_ALIAS_CRYPTO("chacha20-neon");
356 MODULE_ALIAS_CRYPTO("xchacha20-neon");
357 MODULE_ALIAS_CRYPTO("xchacha12-neon");
358 #endif
359