1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * ChaCha and HChaCha functions (ARM optimized) 4 * 5 * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> 6 * Copyright (C) 2015 Martin Willi 7 */ 8 9 #include <crypto/internal/simd.h> 10 #include <linux/jump_label.h> 11 #include <linux/kernel.h> 12 13 #include <asm/cputype.h> 14 #include <asm/hwcap.h> 15 #include <asm/simd.h> 16 17 asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, 18 u8 *dst, const u8 *src, int nrounds); 19 asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, 20 u8 *dst, const u8 *src, 21 int nrounds, unsigned int nbytes); 22 asmlinkage void hchacha_block_arm(const struct chacha_state *state, 23 u32 out[HCHACHA_OUT_WORDS], int nrounds); 24 asmlinkage void hchacha_block_neon(const struct chacha_state *state, 25 u32 out[HCHACHA_OUT_WORDS], int nrounds); 26 27 asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, 28 const struct chacha_state *state, int nrounds); 29 30 static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); 31 32 static inline bool neon_usable(void) 33 { 34 return static_branch_likely(&use_neon) && crypto_simd_usable(); 35 } 36 37 static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, 38 unsigned int bytes, int nrounds) 39 { 40 u8 buf[CHACHA_BLOCK_SIZE]; 41 42 while (bytes > CHACHA_BLOCK_SIZE) { 43 unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); 44 45 chacha_4block_xor_neon(state, dst, src, nrounds, l); 46 bytes -= l; 47 src += l; 48 dst += l; 49 state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); 50 } 51 if (bytes) { 52 const u8 *s = src; 53 u8 *d = dst; 54 55 if (bytes != CHACHA_BLOCK_SIZE) 56 s = d = memcpy(buf, src, bytes); 57 chacha_block_xor_neon(state, d, s, nrounds); 58 if (d != dst) 59 memcpy(dst, buf, bytes); 60 state->x[12]++; 61 } 62 } 63 64 static void hchacha_block_arch(const struct chacha_state *state, 65 u32 out[HCHACHA_OUT_WORDS], int nrounds) 66 { 67 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { 68 hchacha_block_arm(state, out, nrounds); 69 } else { 70 scoped_ksimd() 71 hchacha_block_neon(state, out, nrounds); 72 } 73 } 74 75 static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, 76 const u8 *src, unsigned int bytes, int nrounds) 77 { 78 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || 79 bytes <= CHACHA_BLOCK_SIZE) { 80 chacha_doarm(dst, src, bytes, state, nrounds); 81 state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); 82 return; 83 } 84 85 do { 86 unsigned int todo = min_t(unsigned int, bytes, SZ_4K); 87 88 scoped_ksimd() 89 chacha_doneon(state, dst, src, todo, nrounds); 90 91 bytes -= todo; 92 src += todo; 93 dst += todo; 94 } while (bytes); 95 } 96 97 #define chacha_mod_init_arch chacha_mod_init_arch 98 static void chacha_mod_init_arch(void) 99 { 100 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { 101 switch (read_cpuid_part()) { 102 case ARM_CPU_PART_CORTEX_A7: 103 case ARM_CPU_PART_CORTEX_A5: 104 /* 105 * The Cortex-A7 and Cortex-A5 do not perform well with 106 * the NEON implementation but do incredibly with the 107 * scalar one and use less power. 108 */ 109 break; 110 default: 111 static_branch_enable(&use_neon); 112 } 113 } 114 } 115