1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
4 *
5 * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
6 */
7
8 #include <asm/hwcap.h>
9 #include <asm/neon.h>
10 #include <asm/simd.h>
11 #include <linux/unaligned.h>
12 #include <crypto/algapi.h>
13 #include <crypto/internal/hash.h>
14 #include <crypto/internal/poly1305.h>
15 #include <crypto/internal/simd.h>
16 #include <linux/cpufeature.h>
17 #include <linux/crypto.h>
18 #include <linux/jump_label.h>
19 #include <linux/module.h>
20
21 asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
22 asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
23 asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
24 asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
25
26 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
27
poly1305_init_arch(struct poly1305_desc_ctx * dctx,const u8 key[POLY1305_KEY_SIZE])28 void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
29 {
30 poly1305_init_arm64(&dctx->h, key);
31 dctx->s[0] = get_unaligned_le32(key + 16);
32 dctx->s[1] = get_unaligned_le32(key + 20);
33 dctx->s[2] = get_unaligned_le32(key + 24);
34 dctx->s[3] = get_unaligned_le32(key + 28);
35 dctx->buflen = 0;
36 }
37 EXPORT_SYMBOL(poly1305_init_arch);
38
neon_poly1305_init(struct shash_desc * desc)39 static int neon_poly1305_init(struct shash_desc *desc)
40 {
41 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
42
43 dctx->buflen = 0;
44 dctx->rset = 0;
45 dctx->sset = false;
46
47 return 0;
48 }
49
neon_poly1305_blocks(struct poly1305_desc_ctx * dctx,const u8 * src,u32 len,u32 hibit,bool do_neon)50 static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
51 u32 len, u32 hibit, bool do_neon)
52 {
53 if (unlikely(!dctx->sset)) {
54 if (!dctx->rset) {
55 poly1305_init_arm64(&dctx->h, src);
56 src += POLY1305_BLOCK_SIZE;
57 len -= POLY1305_BLOCK_SIZE;
58 dctx->rset = 1;
59 }
60 if (len >= POLY1305_BLOCK_SIZE) {
61 dctx->s[0] = get_unaligned_le32(src + 0);
62 dctx->s[1] = get_unaligned_le32(src + 4);
63 dctx->s[2] = get_unaligned_le32(src + 8);
64 dctx->s[3] = get_unaligned_le32(src + 12);
65 src += POLY1305_BLOCK_SIZE;
66 len -= POLY1305_BLOCK_SIZE;
67 dctx->sset = true;
68 }
69 if (len < POLY1305_BLOCK_SIZE)
70 return;
71 }
72
73 len &= ~(POLY1305_BLOCK_SIZE - 1);
74
75 if (static_branch_likely(&have_neon) && likely(do_neon))
76 poly1305_blocks_neon(&dctx->h, src, len, hibit);
77 else
78 poly1305_blocks(&dctx->h, src, len, hibit);
79 }
80
neon_poly1305_do_update(struct poly1305_desc_ctx * dctx,const u8 * src,u32 len,bool do_neon)81 static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
82 const u8 *src, u32 len, bool do_neon)
83 {
84 if (unlikely(dctx->buflen)) {
85 u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
86
87 memcpy(dctx->buf + dctx->buflen, src, bytes);
88 src += bytes;
89 len -= bytes;
90 dctx->buflen += bytes;
91
92 if (dctx->buflen == POLY1305_BLOCK_SIZE) {
93 neon_poly1305_blocks(dctx, dctx->buf,
94 POLY1305_BLOCK_SIZE, 1, false);
95 dctx->buflen = 0;
96 }
97 }
98
99 if (likely(len >= POLY1305_BLOCK_SIZE)) {
100 neon_poly1305_blocks(dctx, src, len, 1, do_neon);
101 src += round_down(len, POLY1305_BLOCK_SIZE);
102 len %= POLY1305_BLOCK_SIZE;
103 }
104
105 if (unlikely(len)) {
106 dctx->buflen = len;
107 memcpy(dctx->buf, src, len);
108 }
109 }
110
neon_poly1305_update(struct shash_desc * desc,const u8 * src,unsigned int srclen)111 static int neon_poly1305_update(struct shash_desc *desc,
112 const u8 *src, unsigned int srclen)
113 {
114 bool do_neon = crypto_simd_usable() && srclen > 128;
115 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
116
117 if (static_branch_likely(&have_neon) && do_neon)
118 kernel_neon_begin();
119 neon_poly1305_do_update(dctx, src, srclen, do_neon);
120 if (static_branch_likely(&have_neon) && do_neon)
121 kernel_neon_end();
122 return 0;
123 }
124
poly1305_update_arch(struct poly1305_desc_ctx * dctx,const u8 * src,unsigned int nbytes)125 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
126 unsigned int nbytes)
127 {
128 if (unlikely(dctx->buflen)) {
129 u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
130
131 memcpy(dctx->buf + dctx->buflen, src, bytes);
132 src += bytes;
133 nbytes -= bytes;
134 dctx->buflen += bytes;
135
136 if (dctx->buflen == POLY1305_BLOCK_SIZE) {
137 poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
138 dctx->buflen = 0;
139 }
140 }
141
142 if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
143 unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
144
145 if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
146 do {
147 unsigned int todo = min_t(unsigned int, len, SZ_4K);
148
149 kernel_neon_begin();
150 poly1305_blocks_neon(&dctx->h, src, todo, 1);
151 kernel_neon_end();
152
153 len -= todo;
154 src += todo;
155 } while (len);
156 } else {
157 poly1305_blocks(&dctx->h, src, len, 1);
158 src += len;
159 }
160 nbytes %= POLY1305_BLOCK_SIZE;
161 }
162
163 if (unlikely(nbytes)) {
164 dctx->buflen = nbytes;
165 memcpy(dctx->buf, src, nbytes);
166 }
167 }
168 EXPORT_SYMBOL(poly1305_update_arch);
169
poly1305_final_arch(struct poly1305_desc_ctx * dctx,u8 * dst)170 void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
171 {
172 if (unlikely(dctx->buflen)) {
173 dctx->buf[dctx->buflen++] = 1;
174 memset(dctx->buf + dctx->buflen, 0,
175 POLY1305_BLOCK_SIZE - dctx->buflen);
176 poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
177 }
178
179 poly1305_emit(&dctx->h, dst, dctx->s);
180 memzero_explicit(dctx, sizeof(*dctx));
181 }
182 EXPORT_SYMBOL(poly1305_final_arch);
183
neon_poly1305_final(struct shash_desc * desc,u8 * dst)184 static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
185 {
186 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
187
188 if (unlikely(!dctx->sset))
189 return -ENOKEY;
190
191 poly1305_final_arch(dctx, dst);
192 return 0;
193 }
194
195 static struct shash_alg neon_poly1305_alg = {
196 .init = neon_poly1305_init,
197 .update = neon_poly1305_update,
198 .final = neon_poly1305_final,
199 .digestsize = POLY1305_DIGEST_SIZE,
200 .descsize = sizeof(struct poly1305_desc_ctx),
201
202 .base.cra_name = "poly1305",
203 .base.cra_driver_name = "poly1305-neon",
204 .base.cra_priority = 200,
205 .base.cra_blocksize = POLY1305_BLOCK_SIZE,
206 .base.cra_module = THIS_MODULE,
207 };
208
neon_poly1305_mod_init(void)209 static int __init neon_poly1305_mod_init(void)
210 {
211 if (!cpu_have_named_feature(ASIMD))
212 return 0;
213
214 static_branch_enable(&have_neon);
215
216 return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
217 crypto_register_shash(&neon_poly1305_alg) : 0;
218 }
219
neon_poly1305_mod_exit(void)220 static void __exit neon_poly1305_mod_exit(void)
221 {
222 if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD))
223 crypto_unregister_shash(&neon_poly1305_alg);
224 }
225
226 module_init(neon_poly1305_mod_init);
227 module_exit(neon_poly1305_mod_exit);
228
229 MODULE_DESCRIPTION("Poly1305 transform using NEON instructions");
230 MODULE_LICENSE("GPL v2");
231 MODULE_ALIAS_CRYPTO("poly1305");
232 MODULE_ALIAS_CRYPTO("poly1305-neon");
233