1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
4 *
5 * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
6 */
7
8 #include <asm/hwcap.h>
9 #include <asm/neon.h>
10 #include <asm/simd.h>
11 #include <linux/unaligned.h>
12 #include <crypto/algapi.h>
13 #include <crypto/internal/hash.h>
14 #include <crypto/internal/poly1305.h>
15 #include <crypto/internal/simd.h>
16 #include <linux/cpufeature.h>
17 #include <linux/crypto.h>
18 #include <linux/jump_label.h>
19 #include <linux/module.h>
20
21 void poly1305_init_arm(void *state, const u8 *key);
22 void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
23 void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
24 void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce);
25
poly1305_blocks_neon(void * state,const u8 * src,u32 len,u32 hibit)26 void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
27 {
28 }
29
30 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
31
poly1305_init_arch(struct poly1305_desc_ctx * dctx,const u8 key[POLY1305_KEY_SIZE])32 void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
33 {
34 poly1305_init_arm(&dctx->h, key);
35 dctx->s[0] = get_unaligned_le32(key + 16);
36 dctx->s[1] = get_unaligned_le32(key + 20);
37 dctx->s[2] = get_unaligned_le32(key + 24);
38 dctx->s[3] = get_unaligned_le32(key + 28);
39 dctx->buflen = 0;
40 }
41 EXPORT_SYMBOL(poly1305_init_arch);
42
arm_poly1305_init(struct shash_desc * desc)43 static int arm_poly1305_init(struct shash_desc *desc)
44 {
45 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
46
47 dctx->buflen = 0;
48 dctx->rset = 0;
49 dctx->sset = false;
50
51 return 0;
52 }
53
arm_poly1305_blocks(struct poly1305_desc_ctx * dctx,const u8 * src,u32 len,u32 hibit,bool do_neon)54 static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
55 u32 len, u32 hibit, bool do_neon)
56 {
57 if (unlikely(!dctx->sset)) {
58 if (!dctx->rset) {
59 poly1305_init_arm(&dctx->h, src);
60 src += POLY1305_BLOCK_SIZE;
61 len -= POLY1305_BLOCK_SIZE;
62 dctx->rset = 1;
63 }
64 if (len >= POLY1305_BLOCK_SIZE) {
65 dctx->s[0] = get_unaligned_le32(src + 0);
66 dctx->s[1] = get_unaligned_le32(src + 4);
67 dctx->s[2] = get_unaligned_le32(src + 8);
68 dctx->s[3] = get_unaligned_le32(src + 12);
69 src += POLY1305_BLOCK_SIZE;
70 len -= POLY1305_BLOCK_SIZE;
71 dctx->sset = true;
72 }
73 if (len < POLY1305_BLOCK_SIZE)
74 return;
75 }
76
77 len &= ~(POLY1305_BLOCK_SIZE - 1);
78
79 if (static_branch_likely(&have_neon) && likely(do_neon))
80 poly1305_blocks_neon(&dctx->h, src, len, hibit);
81 else
82 poly1305_blocks_arm(&dctx->h, src, len, hibit);
83 }
84
arm_poly1305_do_update(struct poly1305_desc_ctx * dctx,const u8 * src,u32 len,bool do_neon)85 static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
86 const u8 *src, u32 len, bool do_neon)
87 {
88 if (unlikely(dctx->buflen)) {
89 u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
90
91 memcpy(dctx->buf + dctx->buflen, src, bytes);
92 src += bytes;
93 len -= bytes;
94 dctx->buflen += bytes;
95
96 if (dctx->buflen == POLY1305_BLOCK_SIZE) {
97 arm_poly1305_blocks(dctx, dctx->buf,
98 POLY1305_BLOCK_SIZE, 1, false);
99 dctx->buflen = 0;
100 }
101 }
102
103 if (likely(len >= POLY1305_BLOCK_SIZE)) {
104 arm_poly1305_blocks(dctx, src, len, 1, do_neon);
105 src += round_down(len, POLY1305_BLOCK_SIZE);
106 len %= POLY1305_BLOCK_SIZE;
107 }
108
109 if (unlikely(len)) {
110 dctx->buflen = len;
111 memcpy(dctx->buf, src, len);
112 }
113 }
114
arm_poly1305_update(struct shash_desc * desc,const u8 * src,unsigned int srclen)115 static int arm_poly1305_update(struct shash_desc *desc,
116 const u8 *src, unsigned int srclen)
117 {
118 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
119
120 arm_poly1305_do_update(dctx, src, srclen, false);
121 return 0;
122 }
123
arm_poly1305_update_neon(struct shash_desc * desc,const u8 * src,unsigned int srclen)124 static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
125 const u8 *src,
126 unsigned int srclen)
127 {
128 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
129 bool do_neon = crypto_simd_usable() && srclen > 128;
130
131 if (static_branch_likely(&have_neon) && do_neon)
132 kernel_neon_begin();
133 arm_poly1305_do_update(dctx, src, srclen, do_neon);
134 if (static_branch_likely(&have_neon) && do_neon)
135 kernel_neon_end();
136 return 0;
137 }
138
poly1305_update_arch(struct poly1305_desc_ctx * dctx,const u8 * src,unsigned int nbytes)139 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
140 unsigned int nbytes)
141 {
142 bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
143 crypto_simd_usable();
144
145 if (unlikely(dctx->buflen)) {
146 u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
147
148 memcpy(dctx->buf + dctx->buflen, src, bytes);
149 src += bytes;
150 nbytes -= bytes;
151 dctx->buflen += bytes;
152
153 if (dctx->buflen == POLY1305_BLOCK_SIZE) {
154 poly1305_blocks_arm(&dctx->h, dctx->buf,
155 POLY1305_BLOCK_SIZE, 1);
156 dctx->buflen = 0;
157 }
158 }
159
160 if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
161 unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
162
163 if (static_branch_likely(&have_neon) && do_neon) {
164 do {
165 unsigned int todo = min_t(unsigned int, len, SZ_4K);
166
167 kernel_neon_begin();
168 poly1305_blocks_neon(&dctx->h, src, todo, 1);
169 kernel_neon_end();
170
171 len -= todo;
172 src += todo;
173 } while (len);
174 } else {
175 poly1305_blocks_arm(&dctx->h, src, len, 1);
176 src += len;
177 }
178 nbytes %= POLY1305_BLOCK_SIZE;
179 }
180
181 if (unlikely(nbytes)) {
182 dctx->buflen = nbytes;
183 memcpy(dctx->buf, src, nbytes);
184 }
185 }
186 EXPORT_SYMBOL(poly1305_update_arch);
187
poly1305_final_arch(struct poly1305_desc_ctx * dctx,u8 * dst)188 void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
189 {
190 if (unlikely(dctx->buflen)) {
191 dctx->buf[dctx->buflen++] = 1;
192 memset(dctx->buf + dctx->buflen, 0,
193 POLY1305_BLOCK_SIZE - dctx->buflen);
194 poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
195 }
196
197 poly1305_emit_arm(&dctx->h, dst, dctx->s);
198 *dctx = (struct poly1305_desc_ctx){};
199 }
200 EXPORT_SYMBOL(poly1305_final_arch);
201
arm_poly1305_final(struct shash_desc * desc,u8 * dst)202 static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
203 {
204 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
205
206 if (unlikely(!dctx->sset))
207 return -ENOKEY;
208
209 poly1305_final_arch(dctx, dst);
210 return 0;
211 }
212
213 static struct shash_alg arm_poly1305_algs[] = {{
214 .init = arm_poly1305_init,
215 .update = arm_poly1305_update,
216 .final = arm_poly1305_final,
217 .digestsize = POLY1305_DIGEST_SIZE,
218 .descsize = sizeof(struct poly1305_desc_ctx),
219
220 .base.cra_name = "poly1305",
221 .base.cra_driver_name = "poly1305-arm",
222 .base.cra_priority = 150,
223 .base.cra_blocksize = POLY1305_BLOCK_SIZE,
224 .base.cra_module = THIS_MODULE,
225 #ifdef CONFIG_KERNEL_MODE_NEON
226 }, {
227 .init = arm_poly1305_init,
228 .update = arm_poly1305_update_neon,
229 .final = arm_poly1305_final,
230 .digestsize = POLY1305_DIGEST_SIZE,
231 .descsize = sizeof(struct poly1305_desc_ctx),
232
233 .base.cra_name = "poly1305",
234 .base.cra_driver_name = "poly1305-neon",
235 .base.cra_priority = 200,
236 .base.cra_blocksize = POLY1305_BLOCK_SIZE,
237 .base.cra_module = THIS_MODULE,
238 #endif
239 }};
240
arm_poly1305_mod_init(void)241 static int __init arm_poly1305_mod_init(void)
242 {
243 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
244 (elf_hwcap & HWCAP_NEON))
245 static_branch_enable(&have_neon);
246 else if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
247 /* register only the first entry */
248 return crypto_register_shash(&arm_poly1305_algs[0]);
249
250 return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
251 crypto_register_shashes(arm_poly1305_algs,
252 ARRAY_SIZE(arm_poly1305_algs)) : 0;
253 }
254
arm_poly1305_mod_exit(void)255 static void __exit arm_poly1305_mod_exit(void)
256 {
257 if (!IS_REACHABLE(CONFIG_CRYPTO_HASH))
258 return;
259 if (!static_branch_likely(&have_neon)) {
260 crypto_unregister_shash(&arm_poly1305_algs[0]);
261 return;
262 }
263 crypto_unregister_shashes(arm_poly1305_algs,
264 ARRAY_SIZE(arm_poly1305_algs));
265 }
266
267 module_init(arm_poly1305_mod_init);
268 module_exit(arm_poly1305_mod_exit);
269
270 MODULE_DESCRIPTION("Accelerated Poly1305 transform for ARM");
271 MODULE_LICENSE("GPL v2");
272 MODULE_ALIAS_CRYPTO("poly1305");
273 MODULE_ALIAS_CRYPTO("poly1305-arm");
274 MODULE_ALIAS_CRYPTO("poly1305-neon");
275