1*13cecc52SEric Biggers /* SPDX-License-Identifier: GPL-2.0 */
2*13cecc52SEric Biggers /*
3*13cecc52SEric Biggers * ChaCha stream cipher (s390 optimized)
4*13cecc52SEric Biggers *
5*13cecc52SEric Biggers * Copyright IBM Corp. 2021
6*13cecc52SEric Biggers */
7*13cecc52SEric Biggers
8*13cecc52SEric Biggers #include <linux/cpufeature.h>
9*13cecc52SEric Biggers #include <linux/export.h>
10*13cecc52SEric Biggers #include <linux/kernel.h>
11*13cecc52SEric Biggers #include <linux/sizes.h>
12*13cecc52SEric Biggers #include <asm/fpu.h>
13*13cecc52SEric Biggers #include "chacha-s390.h"
14*13cecc52SEric Biggers
15*13cecc52SEric Biggers #define hchacha_block_arch hchacha_block_generic /* not implemented yet */
16*13cecc52SEric Biggers
chacha_crypt_arch(struct chacha_state * state,u8 * dst,const u8 * src,unsigned int bytes,int nrounds)17*13cecc52SEric Biggers static void chacha_crypt_arch(struct chacha_state *state, u8 *dst,
18*13cecc52SEric Biggers const u8 *src, unsigned int bytes, int nrounds)
19*13cecc52SEric Biggers {
20*13cecc52SEric Biggers /* s390 chacha20 implementation has 20 rounds hard-coded,
21*13cecc52SEric Biggers * it cannot handle a block of data or less, but otherwise
22*13cecc52SEric Biggers * it can handle data of arbitrary size
23*13cecc52SEric Biggers */
24*13cecc52SEric Biggers if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx()) {
25*13cecc52SEric Biggers chacha_crypt_generic(state, dst, src, bytes, nrounds);
26*13cecc52SEric Biggers } else {
27*13cecc52SEric Biggers DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
28*13cecc52SEric Biggers
29*13cecc52SEric Biggers kernel_fpu_begin(&vxstate, KERNEL_VXR);
30*13cecc52SEric Biggers chacha20_vx(dst, src, bytes, &state->x[4], &state->x[12]);
31*13cecc52SEric Biggers kernel_fpu_end(&vxstate, KERNEL_VXR);
32*13cecc52SEric Biggers
33*13cecc52SEric Biggers state->x[12] += round_up(bytes, CHACHA_BLOCK_SIZE) /
34*13cecc52SEric Biggers CHACHA_BLOCK_SIZE;
35*13cecc52SEric Biggers }
36*13cecc52SEric Biggers }
37