1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64 4 * 5 * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> 6 */ 7 8 #include <asm/hwcap.h> 9 #include <asm/neon.h> 10 #include <asm/simd.h> 11 #include <linux/unaligned.h> 12 #include <crypto/algapi.h> 13 #include <crypto/internal/hash.h> 14 #include <crypto/internal/poly1305.h> 15 #include <crypto/internal/simd.h> 16 #include <linux/cpufeature.h> 17 #include <linux/crypto.h> 18 #include <linux/jump_label.h> 19 #include <linux/module.h> 20 21 asmlinkage void poly1305_init_arm64(void *state, const u8 *key); 22 asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit); 23 asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); 24 asmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce); 25 26 static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); 27 28 void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) 29 { 30 poly1305_init_arm64(&dctx->h, key); 31 dctx->s[0] = get_unaligned_le32(key + 16); 32 dctx->s[1] = get_unaligned_le32(key + 20); 33 dctx->s[2] = get_unaligned_le32(key + 24); 34 dctx->s[3] = get_unaligned_le32(key + 28); 35 dctx->buflen = 0; 36 } 37 EXPORT_SYMBOL(poly1305_init_arch); 38 39 static int neon_poly1305_init(struct shash_desc *desc) 40 { 41 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 42 43 dctx->buflen = 0; 44 dctx->rset = 0; 45 dctx->sset = false; 46 47 return 0; 48 } 49 50 static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, 51 u32 len, u32 hibit, bool do_neon) 52 { 53 if (unlikely(!dctx->sset)) { 54 if (!dctx->rset) { 55 poly1305_init_arm64(&dctx->h, src); 56 src += POLY1305_BLOCK_SIZE; 57 len -= POLY1305_BLOCK_SIZE; 58 dctx->rset = 1; 59 } 60 if (len >= POLY1305_BLOCK_SIZE) { 61 dctx->s[0] = get_unaligned_le32(src + 0); 62 dctx->s[1] = get_unaligned_le32(src + 4); 63 dctx->s[2] = get_unaligned_le32(src + 8); 64 dctx->s[3] = get_unaligned_le32(src + 12); 65 src += POLY1305_BLOCK_SIZE; 66 len -= POLY1305_BLOCK_SIZE; 67 dctx->sset = true; 68 } 69 if (len < POLY1305_BLOCK_SIZE) 70 return; 71 } 72 73 len &= ~(POLY1305_BLOCK_SIZE - 1); 74 75 if (static_branch_likely(&have_neon) && likely(do_neon)) 76 poly1305_blocks_neon(&dctx->h, src, len, hibit); 77 else 78 poly1305_blocks(&dctx->h, src, len, hibit); 79 } 80 81 static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx, 82 const u8 *src, u32 len, bool do_neon) 83 { 84 if (unlikely(dctx->buflen)) { 85 u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); 86 87 memcpy(dctx->buf + dctx->buflen, src, bytes); 88 src += bytes; 89 len -= bytes; 90 dctx->buflen += bytes; 91 92 if (dctx->buflen == POLY1305_BLOCK_SIZE) { 93 neon_poly1305_blocks(dctx, dctx->buf, 94 POLY1305_BLOCK_SIZE, 1, false); 95 dctx->buflen = 0; 96 } 97 } 98 99 if (likely(len >= POLY1305_BLOCK_SIZE)) { 100 neon_poly1305_blocks(dctx, src, len, 1, do_neon); 101 src += round_down(len, POLY1305_BLOCK_SIZE); 102 len %= POLY1305_BLOCK_SIZE; 103 } 104 105 if (unlikely(len)) { 106 dctx->buflen = len; 107 memcpy(dctx->buf, src, len); 108 } 109 } 110 111 static int neon_poly1305_update(struct shash_desc *desc, 112 const u8 *src, unsigned int srclen) 113 { 114 bool do_neon = crypto_simd_usable() && srclen > 128; 115 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 116 117 if (static_branch_likely(&have_neon) && do_neon) 118 kernel_neon_begin(); 119 neon_poly1305_do_update(dctx, src, srclen, do_neon); 120 if (static_branch_likely(&have_neon) && do_neon) 121 kernel_neon_end(); 122 return 0; 123 } 124 125 void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, 126 unsigned int nbytes) 127 { 128 if (unlikely(dctx->buflen)) { 129 u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); 130 131 memcpy(dctx->buf + dctx->buflen, src, bytes); 132 src += bytes; 133 nbytes -= bytes; 134 dctx->buflen += bytes; 135 136 if (dctx->buflen == POLY1305_BLOCK_SIZE) { 137 poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); 138 dctx->buflen = 0; 139 } 140 } 141 142 if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { 143 unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); 144 145 if (static_branch_likely(&have_neon) && crypto_simd_usable()) { 146 do { 147 unsigned int todo = min_t(unsigned int, len, SZ_4K); 148 149 kernel_neon_begin(); 150 poly1305_blocks_neon(&dctx->h, src, todo, 1); 151 kernel_neon_end(); 152 153 len -= todo; 154 src += todo; 155 } while (len); 156 } else { 157 poly1305_blocks(&dctx->h, src, len, 1); 158 src += len; 159 } 160 nbytes %= POLY1305_BLOCK_SIZE; 161 } 162 163 if (unlikely(nbytes)) { 164 dctx->buflen = nbytes; 165 memcpy(dctx->buf, src, nbytes); 166 } 167 } 168 EXPORT_SYMBOL(poly1305_update_arch); 169 170 void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) 171 { 172 if (unlikely(dctx->buflen)) { 173 dctx->buf[dctx->buflen++] = 1; 174 memset(dctx->buf + dctx->buflen, 0, 175 POLY1305_BLOCK_SIZE - dctx->buflen); 176 poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); 177 } 178 179 poly1305_emit(&dctx->h, dst, dctx->s); 180 memzero_explicit(dctx, sizeof(*dctx)); 181 } 182 EXPORT_SYMBOL(poly1305_final_arch); 183 184 static int neon_poly1305_final(struct shash_desc *desc, u8 *dst) 185 { 186 struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 187 188 if (unlikely(!dctx->sset)) 189 return -ENOKEY; 190 191 poly1305_final_arch(dctx, dst); 192 return 0; 193 } 194 195 static struct shash_alg neon_poly1305_alg = { 196 .init = neon_poly1305_init, 197 .update = neon_poly1305_update, 198 .final = neon_poly1305_final, 199 .digestsize = POLY1305_DIGEST_SIZE, 200 .descsize = sizeof(struct poly1305_desc_ctx), 201 202 .base.cra_name = "poly1305", 203 .base.cra_driver_name = "poly1305-neon", 204 .base.cra_priority = 200, 205 .base.cra_blocksize = POLY1305_BLOCK_SIZE, 206 .base.cra_module = THIS_MODULE, 207 }; 208 209 static int __init neon_poly1305_mod_init(void) 210 { 211 if (!cpu_have_named_feature(ASIMD)) 212 return 0; 213 214 static_branch_enable(&have_neon); 215 216 return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? 217 crypto_register_shash(&neon_poly1305_alg) : 0; 218 } 219 220 static void __exit neon_poly1305_mod_exit(void) 221 { 222 if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD)) 223 crypto_unregister_shash(&neon_poly1305_alg); 224 } 225 226 module_init(neon_poly1305_mod_init); 227 module_exit(neon_poly1305_mod_exit); 228 229 MODULE_DESCRIPTION("Poly1305 transform using NEON instructions"); 230 MODULE_LICENSE("GPL v2"); 231 MODULE_ALIAS_CRYPTO("poly1305"); 232 MODULE_ALIAS_CRYPTO("poly1305-neon"); 233