1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * ChaCha and HChaCha functions (ARM optimized) 4 * 5 * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> 6 * Copyright (C) 2015 Martin Willi 7 */ 8 9 #include <crypto/internal/simd.h> 10 #include <linux/jump_label.h> 11 #include <linux/kernel.h> 12 13 #include <asm/cputype.h> 14 #include <asm/hwcap.h> 15 #include <asm/neon.h> 16 #include <asm/simd.h> 17 18 asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, 19 u8 *dst, const u8 *src, int nrounds); 20 asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, 21 u8 *dst, const u8 *src, 22 int nrounds, unsigned int nbytes); 23 asmlinkage void hchacha_block_arm(const struct chacha_state *state, 24 u32 out[HCHACHA_OUT_WORDS], int nrounds); 25 asmlinkage void hchacha_block_neon(const struct chacha_state *state, 26 u32 out[HCHACHA_OUT_WORDS], int nrounds); 27 28 asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, 29 const struct chacha_state *state, int nrounds); 30 31 static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); 32 33 static inline bool neon_usable(void) 34 { 35 return static_branch_likely(&use_neon) && crypto_simd_usable(); 36 } 37 38 static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, 39 unsigned int bytes, int nrounds) 40 { 41 u8 buf[CHACHA_BLOCK_SIZE]; 42 43 while (bytes > CHACHA_BLOCK_SIZE) { 44 unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); 45 46 chacha_4block_xor_neon(state, dst, src, nrounds, l); 47 bytes -= l; 48 src += l; 49 dst += l; 50 state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); 51 } 52 if (bytes) { 53 const u8 *s = src; 54 u8 *d = dst; 55 56 if (bytes != CHACHA_BLOCK_SIZE) 57 s = d = memcpy(buf, src, bytes); 58 chacha_block_xor_neon(state, d, s, nrounds); 59 if (d != dst) 60 memcpy(dst, buf, bytes); 61 state->x[12]++; 62 } 63 } 64 65 static void hchacha_block_arch(const struct chacha_state *state, 66 u32 out[HCHACHA_OUT_WORDS], int nrounds) 67 { 68 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { 69 hchacha_block_arm(state, out, nrounds); 70 } else { 71 kernel_neon_begin(); 72 hchacha_block_neon(state, out, nrounds); 73 kernel_neon_end(); 74 } 75 } 76 77 static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, 78 const u8 *src, unsigned int bytes, int nrounds) 79 { 80 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || 81 bytes <= CHACHA_BLOCK_SIZE) { 82 chacha_doarm(dst, src, bytes, state, nrounds); 83 state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); 84 return; 85 } 86 87 do { 88 unsigned int todo = min_t(unsigned int, bytes, SZ_4K); 89 90 kernel_neon_begin(); 91 chacha_doneon(state, dst, src, todo, nrounds); 92 kernel_neon_end(); 93 94 bytes -= todo; 95 src += todo; 96 dst += todo; 97 } while (bytes); 98 } 99 100 #define chacha_mod_init_arch chacha_mod_init_arch 101 static void chacha_mod_init_arch(void) 102 { 103 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { 104 switch (read_cpuid_part()) { 105 case ARM_CPU_PART_CORTEX_A7: 106 case ARM_CPU_PART_CORTEX_A5: 107 /* 108 * The Cortex-A7 and Cortex-A5 do not perform well with 109 * the NEON implementation but do incredibly with the 110 * scalar one and use less power. 111 */ 112 break; 113 default: 114 static_branch_enable(&use_neon); 115 } 116 } 117 } 118