1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM 4 * 5 * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> 6 */ 7 8 #include <asm/hwcap.h> 9 #include <asm/neon.h> 10 #include <asm/simd.h> 11 #include <linux/unaligned.h> 12 #include <crypto/algapi.h> 13 #include <crypto/internal/hash.h> 14 #include <crypto/internal/poly1305.h> 15 #include <crypto/internal/simd.h> 16 #include <linux/cpufeature.h> 17 #include <linux/crypto.h> 18 #include <linux/jump_label.h> 19 #include <linux/module.h> 20 21 void poly1305_init_arm(void *state, const u8 *key); 22 void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); 23 void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); 24 void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce); 25 26 void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) 27 { 28 } 29 30 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); 31 32 void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) 33 { 34 poly1305_init_arm(&dctx->h, key); 35 dctx->s[0] = get_unaligned_le32(key + 16); 36 dctx->s[1] = get_unaligned_le32(key + 20); 37 dctx->s[2] = get_unaligned_le32(key + 24); 38 dctx->s[3] = get_unaligned_le32(key + 28); 39 dctx->buflen = 0; 40 } 41 EXPORT_SYMBOL(poly1305_init_arch); 42 43 static int arm_poly1305_init(struct shash_desc *desc) 44 { 45 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 46 47 dctx->buflen = 0; 48 dctx->rset = 0; 49 dctx->sset = false; 50 51 return 0; 52 } 53 54 static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, 55 u32 len, u32 hibit, bool do_neon) 56 { 57 if (unlikely(!dctx->sset)) { 58 if (!dctx->rset) { 59 poly1305_init_arm(&dctx->h, src); 60 src += POLY1305_BLOCK_SIZE; 61 len -= POLY1305_BLOCK_SIZE; 62 dctx->rset = 1; 63 } 64 if (len >= POLY1305_BLOCK_SIZE) { 65 dctx->s[0] = get_unaligned_le32(src + 0); 66 dctx->s[1] = get_unaligned_le32(src + 4); 67 dctx->s[2] = get_unaligned_le32(src + 8); 68 dctx->s[3] = get_unaligned_le32(src + 12); 69 src += POLY1305_BLOCK_SIZE; 70 len -= POLY1305_BLOCK_SIZE; 71 dctx->sset = true; 72 } 73 if (len < POLY1305_BLOCK_SIZE) 74 return; 75 } 76 77 len &= ~(POLY1305_BLOCK_SIZE - 1); 78 79 if (static_branch_likely(&have_neon) && likely(do_neon)) 80 poly1305_blocks_neon(&dctx->h, src, len, hibit); 81 else 82 poly1305_blocks_arm(&dctx->h, src, len, hibit); 83 } 84 85 static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx, 86 const u8 *src, u32 len, bool do_neon) 87 { 88 if (unlikely(dctx->buflen)) { 89 u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); 90 91 memcpy(dctx->buf + dctx->buflen, src, bytes); 92 src += bytes; 93 len -= bytes; 94 dctx->buflen += bytes; 95 96 if (dctx->buflen == POLY1305_BLOCK_SIZE) { 97 arm_poly1305_blocks(dctx, dctx->buf, 98 POLY1305_BLOCK_SIZE, 1, false); 99 dctx->buflen = 0; 100 } 101 } 102 103 if (likely(len >= POLY1305_BLOCK_SIZE)) { 104 arm_poly1305_blocks(dctx, src, len, 1, do_neon); 105 src += round_down(len, POLY1305_BLOCK_SIZE); 106 len %= POLY1305_BLOCK_SIZE; 107 } 108 109 if (unlikely(len)) { 110 dctx->buflen = len; 111 memcpy(dctx->buf, src, len); 112 } 113 } 114 115 static int arm_poly1305_update(struct shash_desc *desc, 116 const u8 *src, unsigned int srclen) 117 { 118 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 119 120 arm_poly1305_do_update(dctx, src, srclen, false); 121 return 0; 122 } 123 124 static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc, 125 const u8 *src, 126 unsigned int srclen) 127 { 128 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 129 bool do_neon = crypto_simd_usable() && srclen > 128; 130 131 if (static_branch_likely(&have_neon) && do_neon) 132 kernel_neon_begin(); 133 arm_poly1305_do_update(dctx, src, srclen, do_neon); 134 if (static_branch_likely(&have_neon) && do_neon) 135 kernel_neon_end(); 136 return 0; 137 } 138 139 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, 140 unsigned int nbytes) 141 { 142 bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && 143 crypto_simd_usable(); 144 145 if (unlikely(dctx->buflen)) { 146 u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); 147 148 memcpy(dctx->buf + dctx->buflen, src, bytes); 149 src += bytes; 150 nbytes -= bytes; 151 dctx->buflen += bytes; 152 153 if (dctx->buflen == POLY1305_BLOCK_SIZE) { 154 poly1305_blocks_arm(&dctx->h, dctx->buf, 155 POLY1305_BLOCK_SIZE, 1); 156 dctx->buflen = 0; 157 } 158 } 159 160 if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { 161 unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); 162 163 if (static_branch_likely(&have_neon) && do_neon) { 164 do { 165 unsigned int todo = min_t(unsigned int, len, SZ_4K); 166 167 kernel_neon_begin(); 168 poly1305_blocks_neon(&dctx->h, src, todo, 1); 169 kernel_neon_end(); 170 171 len -= todo; 172 src += todo; 173 } while (len); 174 } else { 175 poly1305_blocks_arm(&dctx->h, src, len, 1); 176 src += len; 177 } 178 nbytes %= POLY1305_BLOCK_SIZE; 179 } 180 181 if (unlikely(nbytes)) { 182 dctx->buflen = nbytes; 183 memcpy(dctx->buf, src, nbytes); 184 } 185 } 186 EXPORT_SYMBOL(poly1305_update_arch); 187 188 void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) 189 { 190 if (unlikely(dctx->buflen)) { 191 dctx->buf[dctx->buflen++] = 1; 192 memset(dctx->buf + dctx->buflen, 0, 193 POLY1305_BLOCK_SIZE - dctx->buflen); 194 poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); 195 } 196 197 poly1305_emit_arm(&dctx->h, dst, dctx->s); 198 *dctx = (struct poly1305_desc_ctx){}; 199 } 200 EXPORT_SYMBOL(poly1305_final_arch); 201 202 static int arm_poly1305_final(struct shash_desc *desc, u8 *dst) 203 { 204 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 205 206 if (unlikely(!dctx->sset)) 207 return -ENOKEY; 208 209 poly1305_final_arch(dctx, dst); 210 return 0; 211 } 212 213 static struct shash_alg arm_poly1305_algs[] = {{ 214 .init = arm_poly1305_init, 215 .update = arm_poly1305_update, 216 .final = arm_poly1305_final, 217 .digestsize = POLY1305_DIGEST_SIZE, 218 .descsize = sizeof(struct poly1305_desc_ctx), 219 220 .base.cra_name = "poly1305", 221 .base.cra_driver_name = "poly1305-arm", 222 .base.cra_priority = 150, 223 .base.cra_blocksize = POLY1305_BLOCK_SIZE, 224 .base.cra_module = THIS_MODULE, 225 #ifdef CONFIG_KERNEL_MODE_NEON 226 }, { 227 .init = arm_poly1305_init, 228 .update = arm_poly1305_update_neon, 229 .final = arm_poly1305_final, 230 .digestsize = POLY1305_DIGEST_SIZE, 231 .descsize = sizeof(struct poly1305_desc_ctx), 232 233 .base.cra_name = "poly1305", 234 .base.cra_driver_name = "poly1305-neon", 235 .base.cra_priority = 200, 236 .base.cra_blocksize = POLY1305_BLOCK_SIZE, 237 .base.cra_module = THIS_MODULE, 238 #endif 239 }}; 240 241 static int __init arm_poly1305_mod_init(void) 242 { 243 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && 244 (elf_hwcap & HWCAP_NEON)) 245 static_branch_enable(&have_neon); 246 else if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) 247 /* register only the first entry */ 248 return crypto_register_shash(&arm_poly1305_algs[0]); 249 250 return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? 251 crypto_register_shashes(arm_poly1305_algs, 252 ARRAY_SIZE(arm_poly1305_algs)) : 0; 253 } 254 255 static void __exit arm_poly1305_mod_exit(void) 256 { 257 if (!IS_REACHABLE(CONFIG_CRYPTO_HASH)) 258 return; 259 if (!static_branch_likely(&have_neon)) { 260 crypto_unregister_shash(&arm_poly1305_algs[0]); 261 return; 262 } 263 crypto_unregister_shashes(arm_poly1305_algs, 264 ARRAY_SIZE(arm_poly1305_algs)); 265 } 266 267 module_init(arm_poly1305_mod_init); 268 module_exit(arm_poly1305_mod_exit); 269 270 MODULE_DESCRIPTION("Accelerated Poly1305 transform for ARM"); 271 MODULE_LICENSE("GPL v2"); 272 MODULE_ALIAS_CRYPTO("poly1305"); 273 MODULE_ALIAS_CRYPTO("poly1305-arm"); 274 MODULE_ALIAS_CRYPTO("poly1305-neon"); 275