1*530b304fSEric Biggers // SPDX-License-Identifier: GPL-2.0-only
2*530b304fSEric Biggers /*
3*530b304fSEric Biggers * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instructions
4*530b304fSEric Biggers *
5*530b304fSEric Biggers * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
6*530b304fSEric Biggers */
7*530b304fSEric Biggers
8*530b304fSEric Biggers #include <linux/cpufeature.h>
9*530b304fSEric Biggers
10*530b304fSEric Biggers #include <crypto/internal/simd.h>
11*530b304fSEric Biggers
12*530b304fSEric Biggers #include <asm/hwcap.h>
13*530b304fSEric Biggers #include <asm/neon.h>
14*530b304fSEric Biggers #include <asm/simd.h>
15*530b304fSEric Biggers
16*530b304fSEric Biggers static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
17*530b304fSEric Biggers static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pmull);
18*530b304fSEric Biggers
19*530b304fSEric Biggers #define PMULL_MIN_LEN 64 /* min size of buffer for pmull functions */
20*530b304fSEric Biggers
21*530b304fSEric Biggers asmlinkage u32 crc32_pmull_le(const u8 buf[], u32 len, u32 init_crc);
22*530b304fSEric Biggers asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], u32 len);
23*530b304fSEric Biggers
24*530b304fSEric Biggers asmlinkage u32 crc32c_pmull_le(const u8 buf[], u32 len, u32 init_crc);
25*530b304fSEric Biggers asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], u32 len);
26*530b304fSEric Biggers
crc32_le_scalar(u32 crc,const u8 * p,size_t len)27*530b304fSEric Biggers static inline u32 crc32_le_scalar(u32 crc, const u8 *p, size_t len)
28*530b304fSEric Biggers {
29*530b304fSEric Biggers if (static_branch_likely(&have_crc32))
30*530b304fSEric Biggers return crc32_armv8_le(crc, p, len);
31*530b304fSEric Biggers return crc32_le_base(crc, p, len);
32*530b304fSEric Biggers }
33*530b304fSEric Biggers
crc32_le_arch(u32 crc,const u8 * p,size_t len)34*530b304fSEric Biggers static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
35*530b304fSEric Biggers {
36*530b304fSEric Biggers if (len >= PMULL_MIN_LEN + 15 &&
37*530b304fSEric Biggers static_branch_likely(&have_pmull) && crypto_simd_usable()) {
38*530b304fSEric Biggers size_t n = -(uintptr_t)p & 15;
39*530b304fSEric Biggers
40*530b304fSEric Biggers /* align p to 16-byte boundary */
41*530b304fSEric Biggers if (n) {
42*530b304fSEric Biggers crc = crc32_le_scalar(crc, p, n);
43*530b304fSEric Biggers p += n;
44*530b304fSEric Biggers len -= n;
45*530b304fSEric Biggers }
46*530b304fSEric Biggers n = round_down(len, 16);
47*530b304fSEric Biggers kernel_neon_begin();
48*530b304fSEric Biggers crc = crc32_pmull_le(p, n, crc);
49*530b304fSEric Biggers kernel_neon_end();
50*530b304fSEric Biggers p += n;
51*530b304fSEric Biggers len -= n;
52*530b304fSEric Biggers }
53*530b304fSEric Biggers return crc32_le_scalar(crc, p, len);
54*530b304fSEric Biggers }
55*530b304fSEric Biggers
crc32c_scalar(u32 crc,const u8 * p,size_t len)56*530b304fSEric Biggers static inline u32 crc32c_scalar(u32 crc, const u8 *p, size_t len)
57*530b304fSEric Biggers {
58*530b304fSEric Biggers if (static_branch_likely(&have_crc32))
59*530b304fSEric Biggers return crc32c_armv8_le(crc, p, len);
60*530b304fSEric Biggers return crc32c_base(crc, p, len);
61*530b304fSEric Biggers }
62*530b304fSEric Biggers
crc32c_arch(u32 crc,const u8 * p,size_t len)63*530b304fSEric Biggers static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
64*530b304fSEric Biggers {
65*530b304fSEric Biggers if (len >= PMULL_MIN_LEN + 15 &&
66*530b304fSEric Biggers static_branch_likely(&have_pmull) && crypto_simd_usable()) {
67*530b304fSEric Biggers size_t n = -(uintptr_t)p & 15;
68*530b304fSEric Biggers
69*530b304fSEric Biggers /* align p to 16-byte boundary */
70*530b304fSEric Biggers if (n) {
71*530b304fSEric Biggers crc = crc32c_scalar(crc, p, n);
72*530b304fSEric Biggers p += n;
73*530b304fSEric Biggers len -= n;
74*530b304fSEric Biggers }
75*530b304fSEric Biggers n = round_down(len, 16);
76*530b304fSEric Biggers kernel_neon_begin();
77*530b304fSEric Biggers crc = crc32c_pmull_le(p, n, crc);
78*530b304fSEric Biggers kernel_neon_end();
79*530b304fSEric Biggers p += n;
80*530b304fSEric Biggers len -= n;
81*530b304fSEric Biggers }
82*530b304fSEric Biggers return crc32c_scalar(crc, p, len);
83*530b304fSEric Biggers }
84*530b304fSEric Biggers
85*530b304fSEric Biggers #define crc32_be_arch crc32_be_base /* not implemented on this arch */
86*530b304fSEric Biggers
87*530b304fSEric Biggers #define crc32_mod_init_arch crc32_mod_init_arch
crc32_mod_init_arch(void)88*530b304fSEric Biggers static inline void crc32_mod_init_arch(void)
89*530b304fSEric Biggers {
90*530b304fSEric Biggers if (elf_hwcap2 & HWCAP2_CRC32)
91*530b304fSEric Biggers static_branch_enable(&have_crc32);
92*530b304fSEric Biggers if (elf_hwcap2 & HWCAP2_PMULL)
93*530b304fSEric Biggers static_branch_enable(&have_pmull);
94*530b304fSEric Biggers }
95*530b304fSEric Biggers
crc32_optimizations_arch(void)96*530b304fSEric Biggers static inline u32 crc32_optimizations_arch(void)
97*530b304fSEric Biggers {
98*530b304fSEric Biggers if (elf_hwcap2 & (HWCAP2_CRC32 | HWCAP2_PMULL))
99*530b304fSEric Biggers return CRC32_LE_OPTIMIZATION | CRC32C_OPTIMIZATION;
100*530b304fSEric Biggers return 0;
101*530b304fSEric Biggers }
102