1*b10749d8SEric Biggers // SPDX-License-Identifier: GPL-2.0-only 2*b10749d8SEric Biggers /* 3*b10749d8SEric Biggers * x86-optimized CRC32 functions 4*b10749d8SEric Biggers * 5*b10749d8SEric Biggers * Copyright (C) 2008 Intel Corporation 6*b10749d8SEric Biggers * Copyright 2012 Xyratex Technology Limited 7*b10749d8SEric Biggers * Copyright 2024 Google LLC 8*b10749d8SEric Biggers */ 9*b10749d8SEric Biggers 10*b10749d8SEric Biggers #include "crc-pclmul-template.h" 11*b10749d8SEric Biggers 12*b10749d8SEric Biggers static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32); 13*b10749d8SEric Biggers static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_pclmulqdq); 14*b10749d8SEric Biggers 15*b10749d8SEric Biggers DECLARE_CRC_PCLMUL_FUNCS(crc32_lsb, u32); 16*b10749d8SEric Biggers 17*b10749d8SEric Biggers static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) 18*b10749d8SEric Biggers { 19*b10749d8SEric Biggers CRC_PCLMUL(crc, p, len, crc32_lsb, crc32_lsb_0xedb88320_consts, 20*b10749d8SEric Biggers have_pclmulqdq); 21*b10749d8SEric Biggers return crc32_le_base(crc, p, len); 22*b10749d8SEric Biggers } 23*b10749d8SEric Biggers 24*b10749d8SEric Biggers #ifdef CONFIG_X86_64 25*b10749d8SEric Biggers #define CRC32_INST "crc32q %1, %q0" 26*b10749d8SEric Biggers #else 27*b10749d8SEric Biggers #define CRC32_INST "crc32l %1, %0" 28*b10749d8SEric Biggers #endif 29*b10749d8SEric Biggers 30*b10749d8SEric Biggers /* 31*b10749d8SEric Biggers * Use carryless multiply version of crc32c when buffer size is >= 512 to 32*b10749d8SEric Biggers * account for FPU state save/restore overhead. 33*b10749d8SEric Biggers */ 34*b10749d8SEric Biggers #define CRC32C_PCLMUL_BREAKEVEN 512 35*b10749d8SEric Biggers 36*b10749d8SEric Biggers asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); 37*b10749d8SEric Biggers 38*b10749d8SEric Biggers static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len) 39*b10749d8SEric Biggers { 40*b10749d8SEric Biggers size_t num_longs; 41*b10749d8SEric Biggers 42*b10749d8SEric Biggers if (!static_branch_likely(&have_crc32)) 43*b10749d8SEric Biggers return crc32c_base(crc, p, len); 44*b10749d8SEric Biggers 45*b10749d8SEric Biggers if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN && 46*b10749d8SEric Biggers static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) { 47*b10749d8SEric Biggers kernel_fpu_begin(); 48*b10749d8SEric Biggers crc = crc32c_x86_3way(crc, p, len); 49*b10749d8SEric Biggers kernel_fpu_end(); 50*b10749d8SEric Biggers return crc; 51*b10749d8SEric Biggers } 52*b10749d8SEric Biggers 53*b10749d8SEric Biggers for (num_longs = len / sizeof(unsigned long); 54*b10749d8SEric Biggers num_longs != 0; num_longs--, p += sizeof(unsigned long)) 55*b10749d8SEric Biggers asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p)); 56*b10749d8SEric Biggers 57*b10749d8SEric Biggers if (sizeof(unsigned long) > 4 && (len & 4)) { 58*b10749d8SEric Biggers asm("crc32l %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u32 *)p)); 59*b10749d8SEric Biggers p += 4; 60*b10749d8SEric Biggers } 61*b10749d8SEric Biggers if (len & 2) { 62*b10749d8SEric Biggers asm("crc32w %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u16 *)p)); 63*b10749d8SEric Biggers p += 2; 64*b10749d8SEric Biggers } 65*b10749d8SEric Biggers if (len & 1) 66*b10749d8SEric Biggers asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p)); 67*b10749d8SEric Biggers 68*b10749d8SEric Biggers return crc; 69*b10749d8SEric Biggers } 70*b10749d8SEric Biggers 71*b10749d8SEric Biggers #define crc32_be_arch crc32_be_base /* not implemented on this arch */ 72*b10749d8SEric Biggers 73*b10749d8SEric Biggers #define crc32_mod_init_arch crc32_mod_init_arch 74*b10749d8SEric Biggers static inline void crc32_mod_init_arch(void) 75*b10749d8SEric Biggers { 76*b10749d8SEric Biggers if (boot_cpu_has(X86_FEATURE_XMM4_2)) 77*b10749d8SEric Biggers static_branch_enable(&have_crc32); 78*b10749d8SEric Biggers if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { 79*b10749d8SEric Biggers static_branch_enable(&have_pclmulqdq); 80*b10749d8SEric Biggers INIT_CRC_PCLMUL(crc32_lsb); 81*b10749d8SEric Biggers } 82*b10749d8SEric Biggers } 83*b10749d8SEric Biggers 84*b10749d8SEric Biggers static inline u32 crc32_optimizations_arch(void) 85*b10749d8SEric Biggers { 86*b10749d8SEric Biggers u32 optimizations = 0; 87*b10749d8SEric Biggers 88*b10749d8SEric Biggers if (static_key_enabled(&have_crc32)) 89*b10749d8SEric Biggers optimizations |= CRC32C_OPTIMIZATION; 90*b10749d8SEric Biggers if (static_key_enabled(&have_pclmulqdq)) 91*b10749d8SEric Biggers optimizations |= CRC32_LE_OPTIMIZATION; 92*b10749d8SEric Biggers return optimizations; 93*b10749d8SEric Biggers } 94