1*676d45abSEric Biggers // SPDX-License-Identifier: GPL-2.0-or-later
2*676d45abSEric Biggers /*
3*676d45abSEric Biggers * ChaCha stream cipher (P10 accelerated)
4*676d45abSEric Biggers *
5*676d45abSEric Biggers * Copyright 2023- IBM Corp. All rights reserved.
6*676d45abSEric Biggers */
7*676d45abSEric Biggers
8*676d45abSEric Biggers #include <crypto/chacha.h>
9*676d45abSEric Biggers #include <crypto/internal/simd.h>
10*676d45abSEric Biggers #include <linux/kernel.h>
11*676d45abSEric Biggers #include <linux/module.h>
12*676d45abSEric Biggers #include <linux/cpufeature.h>
13*676d45abSEric Biggers #include <linux/sizes.h>
14*676d45abSEric Biggers #include <asm/simd.h>
15*676d45abSEric Biggers #include <asm/switch_to.h>
16*676d45abSEric Biggers
17*676d45abSEric Biggers asmlinkage void chacha_p10le_8x(const struct chacha_state *state, u8 *dst,
18*676d45abSEric Biggers const u8 *src, unsigned int len, int nrounds);
19*676d45abSEric Biggers
20*676d45abSEric Biggers static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10);
21*676d45abSEric Biggers
vsx_begin(void)22*676d45abSEric Biggers static void vsx_begin(void)
23*676d45abSEric Biggers {
24*676d45abSEric Biggers preempt_disable();
25*676d45abSEric Biggers enable_kernel_vsx();
26*676d45abSEric Biggers }
27*676d45abSEric Biggers
vsx_end(void)28*676d45abSEric Biggers static void vsx_end(void)
29*676d45abSEric Biggers {
30*676d45abSEric Biggers disable_kernel_vsx();
31*676d45abSEric Biggers preempt_enable();
32*676d45abSEric Biggers }
33*676d45abSEric Biggers
chacha_p10_do_8x(struct chacha_state * state,u8 * dst,const u8 * src,unsigned int bytes,int nrounds)34*676d45abSEric Biggers static void chacha_p10_do_8x(struct chacha_state *state, u8 *dst, const u8 *src,
35*676d45abSEric Biggers unsigned int bytes, int nrounds)
36*676d45abSEric Biggers {
37*676d45abSEric Biggers unsigned int l = bytes & ~0x0FF;
38*676d45abSEric Biggers
39*676d45abSEric Biggers if (l > 0) {
40*676d45abSEric Biggers chacha_p10le_8x(state, dst, src, l, nrounds);
41*676d45abSEric Biggers bytes -= l;
42*676d45abSEric Biggers src += l;
43*676d45abSEric Biggers dst += l;
44*676d45abSEric Biggers state->x[12] += l / CHACHA_BLOCK_SIZE;
45*676d45abSEric Biggers }
46*676d45abSEric Biggers
47*676d45abSEric Biggers if (bytes > 0)
48*676d45abSEric Biggers chacha_crypt_generic(state, dst, src, bytes, nrounds);
49*676d45abSEric Biggers }
50*676d45abSEric Biggers
hchacha_block_arch(const struct chacha_state * state,u32 out[HCHACHA_OUT_WORDS],int nrounds)51*676d45abSEric Biggers void hchacha_block_arch(const struct chacha_state *state,
52*676d45abSEric Biggers u32 out[HCHACHA_OUT_WORDS], int nrounds)
53*676d45abSEric Biggers {
54*676d45abSEric Biggers hchacha_block_generic(state, out, nrounds);
55*676d45abSEric Biggers }
56*676d45abSEric Biggers EXPORT_SYMBOL(hchacha_block_arch);
57*676d45abSEric Biggers
chacha_crypt_arch(struct chacha_state * state,u8 * dst,const u8 * src,unsigned int bytes,int nrounds)58*676d45abSEric Biggers void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src,
59*676d45abSEric Biggers unsigned int bytes, int nrounds)
60*676d45abSEric Biggers {
61*676d45abSEric Biggers if (!static_branch_likely(&have_p10) || bytes <= CHACHA_BLOCK_SIZE ||
62*676d45abSEric Biggers !crypto_simd_usable())
63*676d45abSEric Biggers return chacha_crypt_generic(state, dst, src, bytes, nrounds);
64*676d45abSEric Biggers
65*676d45abSEric Biggers do {
66*676d45abSEric Biggers unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
67*676d45abSEric Biggers
68*676d45abSEric Biggers vsx_begin();
69*676d45abSEric Biggers chacha_p10_do_8x(state, dst, src, todo, nrounds);
70*676d45abSEric Biggers vsx_end();
71*676d45abSEric Biggers
72*676d45abSEric Biggers bytes -= todo;
73*676d45abSEric Biggers src += todo;
74*676d45abSEric Biggers dst += todo;
75*676d45abSEric Biggers } while (bytes);
76*676d45abSEric Biggers }
77*676d45abSEric Biggers EXPORT_SYMBOL(chacha_crypt_arch);
78*676d45abSEric Biggers
chacha_is_arch_optimized(void)79*676d45abSEric Biggers bool chacha_is_arch_optimized(void)
80*676d45abSEric Biggers {
81*676d45abSEric Biggers return static_key_enabled(&have_p10);
82*676d45abSEric Biggers }
83*676d45abSEric Biggers EXPORT_SYMBOL(chacha_is_arch_optimized);
84*676d45abSEric Biggers
chacha_p10_init(void)85*676d45abSEric Biggers static int __init chacha_p10_init(void)
86*676d45abSEric Biggers {
87*676d45abSEric Biggers if (cpu_has_feature(CPU_FTR_ARCH_31))
88*676d45abSEric Biggers static_branch_enable(&have_p10);
89*676d45abSEric Biggers return 0;
90*676d45abSEric Biggers }
91*676d45abSEric Biggers subsys_initcall(chacha_p10_init);
92*676d45abSEric Biggers
chacha_p10_exit(void)93*676d45abSEric Biggers static void __exit chacha_p10_exit(void)
94*676d45abSEric Biggers {
95*676d45abSEric Biggers }
96*676d45abSEric Biggers module_exit(chacha_p10_exit);
97*676d45abSEric Biggers
98*676d45abSEric Biggers MODULE_DESCRIPTION("ChaCha stream cipher (P10 accelerated)");
99*676d45abSEric Biggers MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>");
100*676d45abSEric Biggers MODULE_LICENSE("GPL v2");
101