1*61f86c70SEric Biggers /* 2*61f86c70SEric Biggers * ChaCha and HChaCha functions (ARM64 optimized) 3*61f86c70SEric Biggers * 4*61f86c70SEric Biggers * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> 5*61f86c70SEric Biggers * 6*61f86c70SEric Biggers * This program is free software; you can redistribute it and/or modify 7*61f86c70SEric Biggers * it under the terms of the GNU General Public License version 2 as 8*61f86c70SEric Biggers * published by the Free Software Foundation. 9*61f86c70SEric Biggers * 10*61f86c70SEric Biggers * Based on: 11*61f86c70SEric Biggers * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code 12*61f86c70SEric Biggers * 13*61f86c70SEric Biggers * Copyright (C) 2015 Martin Willi 14*61f86c70SEric Biggers * 15*61f86c70SEric Biggers * This program is free software; you can redistribute it and/or modify 16*61f86c70SEric Biggers * it under the terms of the GNU General Public License as published by 17*61f86c70SEric Biggers * the Free Software Foundation; either version 2 of the License, or 18*61f86c70SEric Biggers * (at your option) any later version. 19*61f86c70SEric Biggers */ 20*61f86c70SEric Biggers 21*61f86c70SEric Biggers #include <crypto/chacha.h> 22*61f86c70SEric Biggers #include <crypto/internal/simd.h> 23*61f86c70SEric Biggers #include <linux/jump_label.h> 24*61f86c70SEric Biggers #include <linux/kernel.h> 25*61f86c70SEric Biggers #include <linux/module.h> 26*61f86c70SEric Biggers 27*61f86c70SEric Biggers #include <asm/hwcap.h> 28*61f86c70SEric Biggers #include <asm/neon.h> 29*61f86c70SEric Biggers #include <asm/simd.h> 30*61f86c70SEric Biggers 31*61f86c70SEric Biggers asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, 32*61f86c70SEric Biggers u8 *dst, const u8 *src, int nrounds); 33*61f86c70SEric Biggers asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, 34*61f86c70SEric Biggers u8 *dst, const u8 *src, 35*61f86c70SEric Biggers int nrounds, int bytes); 36*61f86c70SEric Biggers asmlinkage void hchacha_block_neon(const struct chacha_state *state, 37*61f86c70SEric Biggers u32 out[HCHACHA_OUT_WORDS], int nrounds); 38*61f86c70SEric Biggers 39*61f86c70SEric Biggers static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); 40*61f86c70SEric Biggers 41*61f86c70SEric Biggers static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, 42*61f86c70SEric Biggers int bytes, int nrounds) 43*61f86c70SEric Biggers { 44*61f86c70SEric Biggers while (bytes > 0) { 45*61f86c70SEric Biggers int l = min(bytes, CHACHA_BLOCK_SIZE * 5); 46*61f86c70SEric Biggers 47*61f86c70SEric Biggers if (l <= CHACHA_BLOCK_SIZE) { 48*61f86c70SEric Biggers u8 buf[CHACHA_BLOCK_SIZE]; 49*61f86c70SEric Biggers 50*61f86c70SEric Biggers memcpy(buf, src, l); 51*61f86c70SEric Biggers chacha_block_xor_neon(state, buf, buf, nrounds); 52*61f86c70SEric Biggers memcpy(dst, buf, l); 53*61f86c70SEric Biggers state->x[12] += 1; 54*61f86c70SEric Biggers break; 55*61f86c70SEric Biggers } 56*61f86c70SEric Biggers chacha_4block_xor_neon(state, dst, src, nrounds, l); 57*61f86c70SEric Biggers bytes -= l; 58*61f86c70SEric Biggers src += l; 59*61f86c70SEric Biggers dst += l; 60*61f86c70SEric Biggers state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); 61*61f86c70SEric Biggers } 62*61f86c70SEric Biggers } 63*61f86c70SEric Biggers 64*61f86c70SEric Biggers void hchacha_block_arch(const struct chacha_state *state, 65*61f86c70SEric Biggers u32 out[HCHACHA_OUT_WORDS], int nrounds) 66*61f86c70SEric Biggers { 67*61f86c70SEric Biggers if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) { 68*61f86c70SEric Biggers hchacha_block_generic(state, out, nrounds); 69*61f86c70SEric Biggers } else { 70*61f86c70SEric Biggers kernel_neon_begin(); 71*61f86c70SEric Biggers hchacha_block_neon(state, out, nrounds); 72*61f86c70SEric Biggers kernel_neon_end(); 73*61f86c70SEric Biggers } 74*61f86c70SEric Biggers } 75*61f86c70SEric Biggers EXPORT_SYMBOL(hchacha_block_arch); 76*61f86c70SEric Biggers 77*61f86c70SEric Biggers void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, 78*61f86c70SEric Biggers unsigned int bytes, int nrounds) 79*61f86c70SEric Biggers { 80*61f86c70SEric Biggers if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE || 81*61f86c70SEric Biggers !crypto_simd_usable()) 82*61f86c70SEric Biggers return chacha_crypt_generic(state, dst, src, bytes, nrounds); 83*61f86c70SEric Biggers 84*61f86c70SEric Biggers do { 85*61f86c70SEric Biggers unsigned int todo = min_t(unsigned int, bytes, SZ_4K); 86*61f86c70SEric Biggers 87*61f86c70SEric Biggers kernel_neon_begin(); 88*61f86c70SEric Biggers chacha_doneon(state, dst, src, todo, nrounds); 89*61f86c70SEric Biggers kernel_neon_end(); 90*61f86c70SEric Biggers 91*61f86c70SEric Biggers bytes -= todo; 92*61f86c70SEric Biggers src += todo; 93*61f86c70SEric Biggers dst += todo; 94*61f86c70SEric Biggers } while (bytes); 95*61f86c70SEric Biggers } 96*61f86c70SEric Biggers EXPORT_SYMBOL(chacha_crypt_arch); 97*61f86c70SEric Biggers 98*61f86c70SEric Biggers bool chacha_is_arch_optimized(void) 99*61f86c70SEric Biggers { 100*61f86c70SEric Biggers return static_key_enabled(&have_neon); 101*61f86c70SEric Biggers } 102*61f86c70SEric Biggers EXPORT_SYMBOL(chacha_is_arch_optimized); 103*61f86c70SEric Biggers 104*61f86c70SEric Biggers static int __init chacha_simd_mod_init(void) 105*61f86c70SEric Biggers { 106*61f86c70SEric Biggers if (cpu_have_named_feature(ASIMD)) 107*61f86c70SEric Biggers static_branch_enable(&have_neon); 108*61f86c70SEric Biggers return 0; 109*61f86c70SEric Biggers } 110*61f86c70SEric Biggers subsys_initcall(chacha_simd_mod_init); 111*61f86c70SEric Biggers 112*61f86c70SEric Biggers static void __exit chacha_simd_mod_exit(void) 113*61f86c70SEric Biggers { 114*61f86c70SEric Biggers } 115*61f86c70SEric Biggers module_exit(chacha_simd_mod_exit); 116*61f86c70SEric Biggers 117*61f86c70SEric Biggers MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM64 optimized)"); 118*61f86c70SEric Biggers MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 119*61f86c70SEric Biggers MODULE_LICENSE("GPL v2"); 120