xref: /linux/lib/crc/x86/crc32.h (revision 110628e55a577468ef21f01e042e87c4257b2fd5)
1b10749d8SEric Biggers // SPDX-License-Identifier: GPL-2.0-only
2b10749d8SEric Biggers /*
3b10749d8SEric Biggers  * x86-optimized CRC32 functions
4b10749d8SEric Biggers  *
5b10749d8SEric Biggers  * Copyright (C) 2008 Intel Corporation
6b10749d8SEric Biggers  * Copyright 2012 Xyratex Technology Limited
7b10749d8SEric Biggers  * Copyright 2024 Google LLC
8b10749d8SEric Biggers  */
9b10749d8SEric Biggers 
10b10749d8SEric Biggers #include "crc-pclmul-template.h"
11b10749d8SEric Biggers 
12b10749d8SEric Biggers static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
13b10749d8SEric Biggers static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
14b10749d8SEric Biggers 
15b10749d8SEric Biggers DECLARE_CRC_PCLMUL_FUNCS(crc32_lsb, u32);
16b10749d8SEric Biggers 
17b10749d8SEric Biggers static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
18b10749d8SEric Biggers {
19b10749d8SEric Biggers 	CRC_PCLMUL(crc, p, len, crc32_lsb, crc32_lsb_0xedb88320_consts,
20b10749d8SEric Biggers 		   have_pclmulqdq);
21b10749d8SEric Biggers 	return crc32_le_base(crc, p, len);
22b10749d8SEric Biggers }
23b10749d8SEric Biggers 
24b10749d8SEric Biggers #ifdef CONFIG_X86_64
25b10749d8SEric Biggers #define CRC32_INST "crc32q %1, %q0"
26b10749d8SEric Biggers #else
27b10749d8SEric Biggers #define CRC32_INST "crc32l %1, %0"
28b10749d8SEric Biggers #endif
29b10749d8SEric Biggers 
30b10749d8SEric Biggers /*
31b10749d8SEric Biggers  * Use carryless multiply version of crc32c when buffer size is >= 512 to
32b10749d8SEric Biggers  * account for FPU state save/restore overhead.
33b10749d8SEric Biggers  */
34b10749d8SEric Biggers #define CRC32C_PCLMUL_BREAKEVEN	512
35b10749d8SEric Biggers 
36b10749d8SEric Biggers asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
37b10749d8SEric Biggers 
38b10749d8SEric Biggers static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
39b10749d8SEric Biggers {
40b10749d8SEric Biggers 	size_t num_longs;
41b10749d8SEric Biggers 
42b10749d8SEric Biggers 	if (!static_branch_likely(&have_crc32))
43b10749d8SEric Biggers 		return crc32c_base(crc, p, len);
44b10749d8SEric Biggers 
45b10749d8SEric Biggers 	if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN &&
46b10749d8SEric Biggers 	    static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
47b10749d8SEric Biggers 		kernel_fpu_begin();
48b10749d8SEric Biggers 		crc = crc32c_x86_3way(crc, p, len);
49b10749d8SEric Biggers 		kernel_fpu_end();
50b10749d8SEric Biggers 		return crc;
51b10749d8SEric Biggers 	}
52b10749d8SEric Biggers 
53b10749d8SEric Biggers 	for (num_longs = len / sizeof(unsigned long);
54b10749d8SEric Biggers 	     num_longs != 0; num_longs--, p += sizeof(unsigned long))
55b10749d8SEric Biggers 		asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p));
56b10749d8SEric Biggers 
57b10749d8SEric Biggers 	if (sizeof(unsigned long) > 4 && (len & 4)) {
58b10749d8SEric Biggers 		asm("crc32l %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u32 *)p));
59b10749d8SEric Biggers 		p += 4;
60b10749d8SEric Biggers 	}
61b10749d8SEric Biggers 	if (len & 2) {
62b10749d8SEric Biggers 		asm("crc32w %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u16 *)p));
63b10749d8SEric Biggers 		p += 2;
64b10749d8SEric Biggers 	}
65b10749d8SEric Biggers 	if (len & 1)
66b10749d8SEric Biggers 		asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p));
67b10749d8SEric Biggers 
68b10749d8SEric Biggers 	return crc;
69b10749d8SEric Biggers }
70b10749d8SEric Biggers 
71b10749d8SEric Biggers #define crc32_be_arch crc32_be_base /* not implemented on this arch */
72b10749d8SEric Biggers 
73b10749d8SEric Biggers #define crc32_mod_init_arch crc32_mod_init_arch
74b10749d8SEric Biggers static inline void crc32_mod_init_arch(void)
75b10749d8SEric Biggers {
76b10749d8SEric Biggers 	if (boot_cpu_has(X86_FEATURE_XMM4_2))
77b10749d8SEric Biggers 		static_branch_enable(&have_crc32);
78b10749d8SEric Biggers 	if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
79b10749d8SEric Biggers 		static_branch_enable(&have_pclmulqdq);
80*110628e5SEric Biggers 		if (have_vpclmul()) {
81*110628e5SEric Biggers 			if (have_avx512()) {
82*110628e5SEric Biggers 				static_call_update(crc32_lsb_pclmul,
83*110628e5SEric Biggers 						   crc32_lsb_vpclmul_avx512);
84*110628e5SEric Biggers 			} else {
85*110628e5SEric Biggers 				static_call_update(crc32_lsb_pclmul,
86*110628e5SEric Biggers 						   crc32_lsb_vpclmul_avx2);
87*110628e5SEric Biggers 			}
88*110628e5SEric Biggers 		}
89b10749d8SEric Biggers 	}
90b10749d8SEric Biggers }
91b10749d8SEric Biggers 
92b10749d8SEric Biggers static inline u32 crc32_optimizations_arch(void)
93b10749d8SEric Biggers {
94b10749d8SEric Biggers 	u32 optimizations = 0;
95b10749d8SEric Biggers 
96b10749d8SEric Biggers 	if (static_key_enabled(&have_crc32))
97b10749d8SEric Biggers 		optimizations |= CRC32C_OPTIMIZATION;
98b10749d8SEric Biggers 	if (static_key_enabled(&have_pclmulqdq))
99b10749d8SEric Biggers 		optimizations |= CRC32_LE_OPTIMIZATION;
100b10749d8SEric Biggers 	return optimizations;
101b10749d8SEric Biggers }
102