xref: /linux/arch/x86/lib/crc32-glue.c (revision 37b33c68b00089a574ebd0a856a5d554eb3001b7)
1*55d1ecceSEric Biggers // SPDX-License-Identifier: GPL-2.0-only
2*55d1ecceSEric Biggers /*
3*55d1ecceSEric Biggers  * x86-optimized CRC32 functions
4*55d1ecceSEric Biggers  *
5*55d1ecceSEric Biggers  * Copyright (C) 2008 Intel Corporation
6*55d1ecceSEric Biggers  * Copyright 2012 Xyratex Technology Limited
7*55d1ecceSEric Biggers  * Copyright 2024 Google LLC
8*55d1ecceSEric Biggers  */
9*55d1ecceSEric Biggers 
10*55d1ecceSEric Biggers #include <asm/cpufeatures.h>
11*55d1ecceSEric Biggers #include <asm/simd.h>
12*55d1ecceSEric Biggers #include <crypto/internal/simd.h>
13*55d1ecceSEric Biggers #include <linux/crc32.h>
14*55d1ecceSEric Biggers #include <linux/linkage.h>
15*55d1ecceSEric Biggers #include <linux/module.h>
16*55d1ecceSEric Biggers 
17*55d1ecceSEric Biggers /* minimum size of buffer for crc32_pclmul_le_16 */
18*55d1ecceSEric Biggers #define CRC32_PCLMUL_MIN_LEN	64
19*55d1ecceSEric Biggers 
20*55d1ecceSEric Biggers static DEFINE_STATIC_KEY_FALSE(have_crc32);
21*55d1ecceSEric Biggers static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
22*55d1ecceSEric Biggers 
23*55d1ecceSEric Biggers u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
24*55d1ecceSEric Biggers 
crc32_le_arch(u32 crc,const u8 * p,size_t len)25*55d1ecceSEric Biggers u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
26*55d1ecceSEric Biggers {
27*55d1ecceSEric Biggers 	if (len >= CRC32_PCLMUL_MIN_LEN + 15 &&
28*55d1ecceSEric Biggers 	    static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
29*55d1ecceSEric Biggers 		size_t n = -(uintptr_t)p & 15;
30*55d1ecceSEric Biggers 
31*55d1ecceSEric Biggers 		/* align p to 16-byte boundary */
32*55d1ecceSEric Biggers 		if (n) {
33*55d1ecceSEric Biggers 			crc = crc32_le_base(crc, p, n);
34*55d1ecceSEric Biggers 			p += n;
35*55d1ecceSEric Biggers 			len -= n;
36*55d1ecceSEric Biggers 		}
37*55d1ecceSEric Biggers 		n = round_down(len, 16);
38*55d1ecceSEric Biggers 		kernel_fpu_begin();
39*55d1ecceSEric Biggers 		crc = crc32_pclmul_le_16(crc, p, n);
40*55d1ecceSEric Biggers 		kernel_fpu_end();
41*55d1ecceSEric Biggers 		p += n;
42*55d1ecceSEric Biggers 		len -= n;
43*55d1ecceSEric Biggers 	}
44*55d1ecceSEric Biggers 	if (len)
45*55d1ecceSEric Biggers 		crc = crc32_le_base(crc, p, len);
46*55d1ecceSEric Biggers 	return crc;
47*55d1ecceSEric Biggers }
48*55d1ecceSEric Biggers EXPORT_SYMBOL(crc32_le_arch);
49*55d1ecceSEric Biggers 
50*55d1ecceSEric Biggers #ifdef CONFIG_X86_64
51*55d1ecceSEric Biggers #define CRC32_INST "crc32q %1, %q0"
52*55d1ecceSEric Biggers #else
53*55d1ecceSEric Biggers #define CRC32_INST "crc32l %1, %0"
54*55d1ecceSEric Biggers #endif
55*55d1ecceSEric Biggers 
56*55d1ecceSEric Biggers /*
57*55d1ecceSEric Biggers  * Use carryless multiply version of crc32c when buffer size is >= 512 to
58*55d1ecceSEric Biggers  * account for FPU state save/restore overhead.
59*55d1ecceSEric Biggers  */
60*55d1ecceSEric Biggers #define CRC32C_PCLMUL_BREAKEVEN	512
61*55d1ecceSEric Biggers 
62*55d1ecceSEric Biggers asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
63*55d1ecceSEric Biggers 
crc32c_le_arch(u32 crc,const u8 * p,size_t len)64*55d1ecceSEric Biggers u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
65*55d1ecceSEric Biggers {
66*55d1ecceSEric Biggers 	size_t num_longs;
67*55d1ecceSEric Biggers 
68*55d1ecceSEric Biggers 	if (!static_branch_likely(&have_crc32))
69*55d1ecceSEric Biggers 		return crc32c_le_base(crc, p, len);
70*55d1ecceSEric Biggers 
71*55d1ecceSEric Biggers 	if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN &&
72*55d1ecceSEric Biggers 	    static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
73*55d1ecceSEric Biggers 		kernel_fpu_begin();
74*55d1ecceSEric Biggers 		crc = crc32c_x86_3way(crc, p, len);
75*55d1ecceSEric Biggers 		kernel_fpu_end();
76*55d1ecceSEric Biggers 		return crc;
77*55d1ecceSEric Biggers 	}
78*55d1ecceSEric Biggers 
79*55d1ecceSEric Biggers 	for (num_longs = len / sizeof(unsigned long);
80*55d1ecceSEric Biggers 	     num_longs != 0; num_longs--, p += sizeof(unsigned long))
81*55d1ecceSEric Biggers 		asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p));
82*55d1ecceSEric Biggers 
83*55d1ecceSEric Biggers 	for (len %= sizeof(unsigned long); len; len--, p++)
84*55d1ecceSEric Biggers 		asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p));
85*55d1ecceSEric Biggers 
86*55d1ecceSEric Biggers 	return crc;
87*55d1ecceSEric Biggers }
88*55d1ecceSEric Biggers EXPORT_SYMBOL(crc32c_le_arch);
89*55d1ecceSEric Biggers 
crc32_be_arch(u32 crc,const u8 * p,size_t len)90*55d1ecceSEric Biggers u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
91*55d1ecceSEric Biggers {
92*55d1ecceSEric Biggers 	return crc32_be_base(crc, p, len);
93*55d1ecceSEric Biggers }
94*55d1ecceSEric Biggers EXPORT_SYMBOL(crc32_be_arch);
95*55d1ecceSEric Biggers 
crc32_x86_init(void)96*55d1ecceSEric Biggers static int __init crc32_x86_init(void)
97*55d1ecceSEric Biggers {
98*55d1ecceSEric Biggers 	if (boot_cpu_has(X86_FEATURE_XMM4_2))
99*55d1ecceSEric Biggers 		static_branch_enable(&have_crc32);
100*55d1ecceSEric Biggers 	if (boot_cpu_has(X86_FEATURE_PCLMULQDQ))
101*55d1ecceSEric Biggers 		static_branch_enable(&have_pclmulqdq);
102*55d1ecceSEric Biggers 	return 0;
103*55d1ecceSEric Biggers }
104*55d1ecceSEric Biggers arch_initcall(crc32_x86_init);
105*55d1ecceSEric Biggers 
crc32_x86_exit(void)106*55d1ecceSEric Biggers static void __exit crc32_x86_exit(void)
107*55d1ecceSEric Biggers {
108*55d1ecceSEric Biggers }
109*55d1ecceSEric Biggers module_exit(crc32_x86_exit);
110*55d1ecceSEric Biggers 
crc32_optimizations(void)111*55d1ecceSEric Biggers u32 crc32_optimizations(void)
112*55d1ecceSEric Biggers {
113*55d1ecceSEric Biggers 	u32 optimizations = 0;
114*55d1ecceSEric Biggers 
115*55d1ecceSEric Biggers 	if (static_key_enabled(&have_crc32))
116*55d1ecceSEric Biggers 		optimizations |= CRC32C_OPTIMIZATION;
117*55d1ecceSEric Biggers 	if (static_key_enabled(&have_pclmulqdq))
118*55d1ecceSEric Biggers 		optimizations |= CRC32_LE_OPTIMIZATION;
119*55d1ecceSEric Biggers 	return optimizations;
120*55d1ecceSEric Biggers }
121*55d1ecceSEric Biggers EXPORT_SYMBOL(crc32_optimizations);
122*55d1ecceSEric Biggers 
123*55d1ecceSEric Biggers MODULE_DESCRIPTION("x86-optimized CRC32 functions");
124*55d1ecceSEric Biggers MODULE_LICENSE("GPL");
125