1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file crc32.c 6 /// \brief CRC32 calculation 7 // 8 // Authors: Lasse Collin 9 // Ilya Kurdyukov 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "check.h" 14 #include "crc_common.h" 15 16 #if defined(CRC_X86_CLMUL) 17 # define BUILDING_CRC_CLMUL 32 18 # include "crc_x86_clmul.h" 19 #elif defined(CRC32_ARM64) 20 # include "crc32_arm64.h" 21 #elif defined(CRC32_LOONGARCH) 22 # include "crc32_loongarch.h" 23 #endif 24 25 26 #ifdef CRC32_GENERIC 27 28 /////////////////// 29 // Generic CRC32 // 30 /////////////////// 31 32 #ifdef WORDS_BIGENDIAN 33 # include "crc32_table_be.h" 34 #else 35 # include "crc32_table_le.h" 36 #endif 37 38 39 #ifdef HAVE_CRC_X86_ASM 40 extern uint32_t lzma_crc32_generic( 41 const uint8_t *buf, size_t size, uint32_t crc); 42 #else 43 static uint32_t 44 lzma_crc32_generic(const uint8_t *buf, size_t size, uint32_t crc) 45 { 46 crc = ~crc; 47 48 #ifdef WORDS_BIGENDIAN 49 crc = byteswap32(crc); 50 #endif 51 52 if (size > 8) { 53 // Fix the alignment, if needed. The if statement above 54 // ensures that this won't read past the end of buf[]. 55 while ((uintptr_t)(buf) & 7) { 56 crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc); 57 --size; 58 } 59 60 // Calculate the position where to stop. 61 const uint8_t *const limit = buf + (size & ~(size_t)(7)); 62 63 // Calculate how many bytes must be calculated separately 64 // before returning the result. 65 size &= (size_t)(7); 66 67 // Calculate the CRC32 using the slice-by-eight algorithm. 68 while (buf < limit) { 69 crc ^= aligned_read32ne(buf); 70 buf += 4; 71 72 crc = lzma_crc32_table[7][A(crc)] 73 ^ lzma_crc32_table[6][B(crc)] 74 ^ lzma_crc32_table[5][C(crc)] 75 ^ lzma_crc32_table[4][D(crc)]; 76 77 const uint32_t tmp = aligned_read32ne(buf); 78 buf += 4; 79 80 // At least with some compilers, it is critical for 81 // performance, that the crc variable is XORed 82 // between the two table-lookup pairs. 83 crc = lzma_crc32_table[3][A(tmp)] 84 ^ lzma_crc32_table[2][B(tmp)] 85 ^ crc 86 ^ lzma_crc32_table[1][C(tmp)] 87 ^ lzma_crc32_table[0][D(tmp)]; 88 } 89 } 90 91 while (size-- != 0) 92 crc = lzma_crc32_table[0][*buf++ ^ A(crc)] ^ S8(crc); 93 94 #ifdef WORDS_BIGENDIAN 95 crc = byteswap32(crc); 96 #endif 97 98 return ~crc; 99 } 100 #endif // HAVE_CRC_X86_ASM 101 #endif // CRC32_GENERIC 102 103 104 #if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED) 105 106 ////////////////////////// 107 // Function dispatching // 108 ////////////////////////// 109 110 // If both the generic and arch-optimized implementations are built, then 111 // the function to use is selected at runtime because the system running 112 // the binary might not have the arch-specific instruction set extension(s) 113 // available. The dispatch methods in order of priority: 114 // 115 // 1. Constructor. This method uses __attribute__((__constructor__)) to 116 // set crc32_func at load time. This avoids extra computation (and any 117 // unlikely threading bugs) on the first call to lzma_crc32() to decide 118 // which implementation should be used. 119 // 120 // 2. First Call Resolution. On the very first call to lzma_crc32(), the 121 // call will be directed to crc32_dispatch() instead. This will set the 122 // appropriate implementation function and will not be called again. 123 // This method does not use any kind of locking but is safe because if 124 // multiple threads run the dispatcher simultaneously then they will all 125 // set crc32_func to the same value. 126 127 typedef uint32_t (*crc32_func_type)( 128 const uint8_t *buf, size_t size, uint32_t crc); 129 130 // This resolver is shared between all dispatch methods. 131 static crc32_func_type 132 crc32_resolve(void) 133 { 134 return is_arch_extension_supported() 135 ? &crc32_arch_optimized : &lzma_crc32_generic; 136 } 137 138 139 #ifdef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR 140 // Constructor method. 141 # define CRC32_SET_FUNC_ATTR __attribute__((__constructor__)) 142 static crc32_func_type crc32_func; 143 #else 144 // First Call Resolution method. 145 # define CRC32_SET_FUNC_ATTR 146 static uint32_t crc32_dispatch(const uint8_t *buf, size_t size, uint32_t crc); 147 static crc32_func_type crc32_func = &crc32_dispatch; 148 #endif 149 150 CRC32_SET_FUNC_ATTR 151 static void 152 crc32_set_func(void) 153 { 154 crc32_func = crc32_resolve(); 155 return; 156 } 157 158 #ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR 159 static uint32_t 160 crc32_dispatch(const uint8_t *buf, size_t size, uint32_t crc) 161 { 162 // When __attribute__((__constructor__)) isn't supported, set the 163 // function pointer without any locking. If multiple threads run 164 // the detection code in parallel, they will all end up setting 165 // the pointer to the same value. This avoids the use of 166 // mythread_once() on every call to lzma_crc32() but this likely 167 // isn't strictly standards compliant. Let's change it if it breaks. 168 crc32_set_func(); 169 return crc32_func(buf, size, crc); 170 } 171 172 #endif 173 #endif 174 175 176 extern LZMA_API(uint32_t) 177 lzma_crc32(const uint8_t *buf, size_t size, uint32_t crc) 178 { 179 #if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED) 180 /* 181 #ifndef HAVE_FUNC_ATTRIBUTE_CONSTRUCTOR 182 // See crc32_dispatch(). This would be the alternative which uses 183 // locking and doesn't use crc32_dispatch(). Note that on Windows 184 // this method needs Vista threads. 185 mythread_once(crc64_set_func); 186 #endif 187 */ 188 return crc32_func(buf, size, crc); 189 190 #elif defined(CRC32_ARCH_OPTIMIZED) 191 return crc32_arch_optimized(buf, size, crc); 192 193 #else 194 return lzma_crc32_generic(buf, size, crc); 195 #endif 196 } 197