1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * ChaCha and HChaCha functions (ARM optimized) 4 * 5 * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> 6 * Copyright (C) 2015 Martin Willi 7 */ 8 9 #include <crypto/chacha.h> 10 #include <crypto/internal/simd.h> 11 #include <linux/jump_label.h> 12 #include <linux/kernel.h> 13 #include <linux/module.h> 14 15 #include <asm/cputype.h> 16 #include <asm/hwcap.h> 17 #include <asm/neon.h> 18 #include <asm/simd.h> 19 20 asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, 21 u8 *dst, const u8 *src, int nrounds); 22 asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, 23 u8 *dst, const u8 *src, 24 int nrounds, unsigned int nbytes); 25 asmlinkage void hchacha_block_arm(const struct chacha_state *state, 26 u32 out[HCHACHA_OUT_WORDS], int nrounds); 27 asmlinkage void hchacha_block_neon(const struct chacha_state *state, 28 u32 out[HCHACHA_OUT_WORDS], int nrounds); 29 30 asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, 31 const struct chacha_state *state, int nrounds); 32 33 static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); 34 35 static inline bool neon_usable(void) 36 { 37 return static_branch_likely(&use_neon) && crypto_simd_usable(); 38 } 39 40 static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, 41 unsigned int bytes, int nrounds) 42 { 43 u8 buf[CHACHA_BLOCK_SIZE]; 44 45 while (bytes > CHACHA_BLOCK_SIZE) { 46 unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); 47 48 chacha_4block_xor_neon(state, dst, src, nrounds, l); 49 bytes -= l; 50 src += l; 51 dst += l; 52 state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); 53 } 54 if (bytes) { 55 const u8 *s = src; 56 u8 *d = dst; 57 58 if (bytes != CHACHA_BLOCK_SIZE) 59 s = d = memcpy(buf, src, bytes); 60 chacha_block_xor_neon(state, d, s, nrounds); 61 if (d != dst) 62 memcpy(dst, buf, bytes); 63 state->x[12]++; 64 } 65 } 66 67 void hchacha_block_arch(const struct chacha_state *state, 68 u32 out[HCHACHA_OUT_WORDS], int nrounds) 69 { 70 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { 71 hchacha_block_arm(state, out, nrounds); 72 } else { 73 kernel_neon_begin(); 74 hchacha_block_neon(state, out, nrounds); 75 kernel_neon_end(); 76 } 77 } 78 EXPORT_SYMBOL(hchacha_block_arch); 79 80 void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, 81 unsigned int bytes, int nrounds) 82 { 83 if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || 84 bytes <= CHACHA_BLOCK_SIZE) { 85 chacha_doarm(dst, src, bytes, state, nrounds); 86 state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); 87 return; 88 } 89 90 do { 91 unsigned int todo = min_t(unsigned int, bytes, SZ_4K); 92 93 kernel_neon_begin(); 94 chacha_doneon(state, dst, src, todo, nrounds); 95 kernel_neon_end(); 96 97 bytes -= todo; 98 src += todo; 99 dst += todo; 100 } while (bytes); 101 } 102 EXPORT_SYMBOL(chacha_crypt_arch); 103 104 bool chacha_is_arch_optimized(void) 105 { 106 /* We always can use at least the ARM scalar implementation. */ 107 return true; 108 } 109 EXPORT_SYMBOL(chacha_is_arch_optimized); 110 111 static int __init chacha_arm_mod_init(void) 112 { 113 if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { 114 switch (read_cpuid_part()) { 115 case ARM_CPU_PART_CORTEX_A7: 116 case ARM_CPU_PART_CORTEX_A5: 117 /* 118 * The Cortex-A7 and Cortex-A5 do not perform well with 119 * the NEON implementation but do incredibly with the 120 * scalar one and use less power. 121 */ 122 break; 123 default: 124 static_branch_enable(&use_neon); 125 } 126 } 127 return 0; 128 } 129 subsys_initcall(chacha_arm_mod_init); 130 131 static void __exit chacha_arm_mod_exit(void) 132 { 133 } 134 module_exit(chacha_arm_mod_exit); 135 136 MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM optimized)"); 137 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 138 MODULE_LICENSE("GPL v2"); 139