1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3 * ChaCha and HChaCha functions (ARM optimized)
4 *
5 * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
6 * Copyright (C) 2015 Martin Willi
7 */
8
9 #include <crypto/internal/simd.h>
10 #include <linux/jump_label.h>
11 #include <linux/kernel.h>
12
13 #include <asm/cputype.h>
14 #include <asm/hwcap.h>
15 #include <asm/neon.h>
16 #include <asm/simd.h>
17
18 asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
19 u8 *dst, const u8 *src, int nrounds);
20 asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state,
21 u8 *dst, const u8 *src,
22 int nrounds, unsigned int nbytes);
23 asmlinkage void hchacha_block_arm(const struct chacha_state *state,
24 u32 out[HCHACHA_OUT_WORDS], int nrounds);
25 asmlinkage void hchacha_block_neon(const struct chacha_state *state,
26 u32 out[HCHACHA_OUT_WORDS], int nrounds);
27
28 asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
29 const struct chacha_state *state, int nrounds);
30
31 static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
32
neon_usable(void)33 static inline bool neon_usable(void)
34 {
35 return static_branch_likely(&use_neon) && crypto_simd_usable();
36 }
37
chacha_doneon(struct chacha_state * state,u8 * dst,const u8 * src,unsigned int bytes,int nrounds)38 static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src,
39 unsigned int bytes, int nrounds)
40 {
41 u8 buf[CHACHA_BLOCK_SIZE];
42
43 while (bytes > CHACHA_BLOCK_SIZE) {
44 unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);
45
46 chacha_4block_xor_neon(state, dst, src, nrounds, l);
47 bytes -= l;
48 src += l;
49 dst += l;
50 state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
51 }
52 if (bytes) {
53 const u8 *s = src;
54 u8 *d = dst;
55
56 if (bytes != CHACHA_BLOCK_SIZE)
57 s = d = memcpy(buf, src, bytes);
58 chacha_block_xor_neon(state, d, s, nrounds);
59 if (d != dst)
60 memcpy(dst, buf, bytes);
61 state->x[12]++;
62 }
63 }
64
hchacha_block_arch(const struct chacha_state * state,u32 out[HCHACHA_OUT_WORDS],int nrounds)65 static void hchacha_block_arch(const struct chacha_state *state,
66 u32 out[HCHACHA_OUT_WORDS], int nrounds)
67 {
68 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
69 hchacha_block_arm(state, out, nrounds);
70 } else {
71 kernel_neon_begin();
72 hchacha_block_neon(state, out, nrounds);
73 kernel_neon_end();
74 }
75 }
76
chacha_crypt_arch(struct chacha_state * state,u8 * dst,const u8 * src,unsigned int bytes,int nrounds)77 static void chacha_crypt_arch(struct chacha_state *state, u8 *dst,
78 const u8 *src, unsigned int bytes, int nrounds)
79 {
80 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
81 bytes <= CHACHA_BLOCK_SIZE) {
82 chacha_doarm(dst, src, bytes, state, nrounds);
83 state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
84 return;
85 }
86
87 do {
88 unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
89
90 kernel_neon_begin();
91 chacha_doneon(state, dst, src, todo, nrounds);
92 kernel_neon_end();
93
94 bytes -= todo;
95 src += todo;
96 dst += todo;
97 } while (bytes);
98 }
99
100 #define chacha_mod_init_arch chacha_mod_init_arch
chacha_mod_init_arch(void)101 static void chacha_mod_init_arch(void)
102 {
103 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
104 switch (read_cpuid_part()) {
105 case ARM_CPU_PART_CORTEX_A7:
106 case ARM_CPU_PART_CORTEX_A5:
107 /*
108 * The Cortex-A7 and Cortex-A5 do not perform well with
109 * the NEON implementation but do incredibly with the
110 * scalar one and use less power.
111 */
112 break;
113 default:
114 static_branch_enable(&use_neon);
115 }
116 }
117 }
118