1*55d1ecceSEric Biggers // SPDX-License-Identifier: GPL-2.0-only 2*55d1ecceSEric Biggers /* 3*55d1ecceSEric Biggers * x86-optimized CRC32 functions 4*55d1ecceSEric Biggers * 5*55d1ecceSEric Biggers * Copyright (C) 2008 Intel Corporation 6*55d1ecceSEric Biggers * Copyright 2012 Xyratex Technology Limited 7*55d1ecceSEric Biggers * Copyright 2024 Google LLC 8*55d1ecceSEric Biggers */ 9*55d1ecceSEric Biggers 10*55d1ecceSEric Biggers #include <asm/cpufeatures.h> 11*55d1ecceSEric Biggers #include <asm/simd.h> 12*55d1ecceSEric Biggers #include <crypto/internal/simd.h> 13*55d1ecceSEric Biggers #include <linux/crc32.h> 14*55d1ecceSEric Biggers #include <linux/linkage.h> 15*55d1ecceSEric Biggers #include <linux/module.h> 16*55d1ecceSEric Biggers 17*55d1ecceSEric Biggers /* minimum size of buffer for crc32_pclmul_le_16 */ 18*55d1ecceSEric Biggers #define CRC32_PCLMUL_MIN_LEN 64 19*55d1ecceSEric Biggers 20*55d1ecceSEric Biggers static DEFINE_STATIC_KEY_FALSE(have_crc32); 21*55d1ecceSEric Biggers static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq); 22*55d1ecceSEric Biggers 23*55d1ecceSEric Biggers u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len); 24*55d1ecceSEric Biggers 25*55d1ecceSEric Biggers u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) 26*55d1ecceSEric Biggers { 27*55d1ecceSEric Biggers if (len >= CRC32_PCLMUL_MIN_LEN + 15 && 28*55d1ecceSEric Biggers static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) { 29*55d1ecceSEric Biggers size_t n = -(uintptr_t)p & 15; 30*55d1ecceSEric Biggers 31*55d1ecceSEric Biggers /* align p to 16-byte boundary */ 32*55d1ecceSEric Biggers if (n) { 33*55d1ecceSEric Biggers crc = crc32_le_base(crc, p, n); 34*55d1ecceSEric Biggers p += n; 35*55d1ecceSEric Biggers len -= n; 36*55d1ecceSEric Biggers } 37*55d1ecceSEric Biggers n = round_down(len, 16); 38*55d1ecceSEric Biggers kernel_fpu_begin(); 39*55d1ecceSEric Biggers crc = crc32_pclmul_le_16(crc, p, n); 40*55d1ecceSEric Biggers kernel_fpu_end(); 41*55d1ecceSEric Biggers p += n; 42*55d1ecceSEric Biggers len -= n; 43*55d1ecceSEric Biggers } 44*55d1ecceSEric Biggers if (len) 45*55d1ecceSEric Biggers crc = crc32_le_base(crc, p, len); 46*55d1ecceSEric Biggers return crc; 47*55d1ecceSEric Biggers } 48*55d1ecceSEric Biggers EXPORT_SYMBOL(crc32_le_arch); 49*55d1ecceSEric Biggers 50*55d1ecceSEric Biggers #ifdef CONFIG_X86_64 51*55d1ecceSEric Biggers #define CRC32_INST "crc32q %1, %q0" 52*55d1ecceSEric Biggers #else 53*55d1ecceSEric Biggers #define CRC32_INST "crc32l %1, %0" 54*55d1ecceSEric Biggers #endif 55*55d1ecceSEric Biggers 56*55d1ecceSEric Biggers /* 57*55d1ecceSEric Biggers * Use carryless multiply version of crc32c when buffer size is >= 512 to 58*55d1ecceSEric Biggers * account for FPU state save/restore overhead. 59*55d1ecceSEric Biggers */ 60*55d1ecceSEric Biggers #define CRC32C_PCLMUL_BREAKEVEN 512 61*55d1ecceSEric Biggers 62*55d1ecceSEric Biggers asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); 63*55d1ecceSEric Biggers 64*55d1ecceSEric Biggers u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) 65*55d1ecceSEric Biggers { 66*55d1ecceSEric Biggers size_t num_longs; 67*55d1ecceSEric Biggers 68*55d1ecceSEric Biggers if (!static_branch_likely(&have_crc32)) 69*55d1ecceSEric Biggers return crc32c_le_base(crc, p, len); 70*55d1ecceSEric Biggers 71*55d1ecceSEric Biggers if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN && 72*55d1ecceSEric Biggers static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) { 73*55d1ecceSEric Biggers kernel_fpu_begin(); 74*55d1ecceSEric Biggers crc = crc32c_x86_3way(crc, p, len); 75*55d1ecceSEric Biggers kernel_fpu_end(); 76*55d1ecceSEric Biggers return crc; 77*55d1ecceSEric Biggers } 78*55d1ecceSEric Biggers 79*55d1ecceSEric Biggers for (num_longs = len / sizeof(unsigned long); 80*55d1ecceSEric Biggers num_longs != 0; num_longs--, p += sizeof(unsigned long)) 81*55d1ecceSEric Biggers asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p)); 82*55d1ecceSEric Biggers 83*55d1ecceSEric Biggers for (len %= sizeof(unsigned long); len; len--, p++) 84*55d1ecceSEric Biggers asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p)); 85*55d1ecceSEric Biggers 86*55d1ecceSEric Biggers return crc; 87*55d1ecceSEric Biggers } 88*55d1ecceSEric Biggers EXPORT_SYMBOL(crc32c_le_arch); 89*55d1ecceSEric Biggers 90*55d1ecceSEric Biggers u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) 91*55d1ecceSEric Biggers { 92*55d1ecceSEric Biggers return crc32_be_base(crc, p, len); 93*55d1ecceSEric Biggers } 94*55d1ecceSEric Biggers EXPORT_SYMBOL(crc32_be_arch); 95*55d1ecceSEric Biggers 96*55d1ecceSEric Biggers static int __init crc32_x86_init(void) 97*55d1ecceSEric Biggers { 98*55d1ecceSEric Biggers if (boot_cpu_has(X86_FEATURE_XMM4_2)) 99*55d1ecceSEric Biggers static_branch_enable(&have_crc32); 100*55d1ecceSEric Biggers if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) 101*55d1ecceSEric Biggers static_branch_enable(&have_pclmulqdq); 102*55d1ecceSEric Biggers return 0; 103*55d1ecceSEric Biggers } 104*55d1ecceSEric Biggers arch_initcall(crc32_x86_init); 105*55d1ecceSEric Biggers 106*55d1ecceSEric Biggers static void __exit crc32_x86_exit(void) 107*55d1ecceSEric Biggers { 108*55d1ecceSEric Biggers } 109*55d1ecceSEric Biggers module_exit(crc32_x86_exit); 110*55d1ecceSEric Biggers 111*55d1ecceSEric Biggers u32 crc32_optimizations(void) 112*55d1ecceSEric Biggers { 113*55d1ecceSEric Biggers u32 optimizations = 0; 114*55d1ecceSEric Biggers 115*55d1ecceSEric Biggers if (static_key_enabled(&have_crc32)) 116*55d1ecceSEric Biggers optimizations |= CRC32C_OPTIMIZATION; 117*55d1ecceSEric Biggers if (static_key_enabled(&have_pclmulqdq)) 118*55d1ecceSEric Biggers optimizations |= CRC32_LE_OPTIMIZATION; 119*55d1ecceSEric Biggers return optimizations; 120*55d1ecceSEric Biggers } 121*55d1ecceSEric Biggers EXPORT_SYMBOL(crc32_optimizations); 122*55d1ecceSEric Biggers 123*55d1ecceSEric Biggers MODULE_DESCRIPTION("x86-optimized CRC32 functions"); 124*55d1ecceSEric Biggers MODULE_LICENSE("GPL"); 125