1 // SPDX-License-Identifier: 0BSD 2 3 /////////////////////////////////////////////////////////////////////////////// 4 // 5 /// \file crc32_arm64.h 6 /// \brief CRC32 calculation with ARM64 optimization 7 // 8 // Authors: Chenxi Mao 9 // Jia Tan 10 // Hans Jansen 11 // 12 /////////////////////////////////////////////////////////////////////////////// 13 14 #ifndef LZMA_CRC32_ARM64_H 15 #define LZMA_CRC32_ARM64_H 16 17 // MSVC always has the CRC intrinsics available when building for ARM64 18 // there is no need to include any header files. 19 #ifndef _MSC_VER 20 # include <arm_acle.h> 21 #endif 22 23 // If both versions are going to be built, we need runtime detection 24 // to check if the instructions are supported. 25 #if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED) 26 # if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO) 27 # include <sys/auxv.h> 28 # elif defined(_WIN32) 29 # include <processthreadsapi.h> 30 # elif defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME) 31 # include <sys/sysctl.h> 32 # endif 33 #endif 34 35 // Some EDG-based compilers support ARM64 and define __GNUC__ 36 // (such as Nvidia's nvcc), but do not support function attributes. 37 // 38 // NOTE: Build systems check for this too, keep them in sync with this. 39 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__) 40 # define crc_attr_target __attribute__((__target__("+crc"))) 41 #else 42 # define crc_attr_target 43 #endif 44 45 46 crc_attr_target 47 static uint32_t 48 crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc) 49 { 50 crc = ~crc; 51 52 // Align the input buffer because this was shown to be 53 // significantly faster than unaligned accesses. 54 const size_t align_amount = my_min(size, (0U - (uintptr_t)buf) & 7); 55 56 for (const uint8_t *limit = buf + align_amount; buf < limit; ++buf) 57 crc = __crc32b(crc, *buf); 58 59 size -= align_amount; 60 61 // Process 8 bytes at a time. The end point is determined by 62 // ignoring the least significant three bits of size to ensure 63 // we do not process past the bounds of the buffer. This guarantees 64 // that limit is a multiple of 8 and is strictly less than size. 65 for (const uint8_t *limit = buf + (size & ~(size_t)7); 66 buf < limit; buf += 8) 67 crc = __crc32d(crc, aligned_read64le(buf)); 68 69 // Process the remaining bytes that are not 8 byte aligned. 70 for (const uint8_t *limit = buf + (size & 7); buf < limit; ++buf) 71 crc = __crc32b(crc, *buf); 72 73 return ~crc; 74 } 75 76 77 #if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED) 78 static inline bool 79 is_arch_extension_supported(void) 80 { 81 #if defined(HAVE_GETAUXVAL) 82 return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0; 83 84 #elif defined(HAVE_ELF_AUX_INFO) 85 unsigned long feature_flags; 86 87 if (elf_aux_info(AT_HWCAP, &feature_flags, sizeof(feature_flags)) != 0) 88 return false; 89 90 return (feature_flags & HWCAP_CRC32) != 0; 91 92 #elif defined(_WIN32) 93 return IsProcessorFeaturePresent( 94 PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE); 95 96 #elif defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME) 97 int has_crc32 = 0; 98 size_t size = sizeof(has_crc32); 99 100 // The sysctlbyname() function requires a string identifier for the 101 // CPU feature it tests. The Apple documentation lists the string 102 // "hw.optional.armv8_crc32", which can be found here: 103 // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics#3915619 104 if (sysctlbyname("hw.optional.armv8_crc32", &has_crc32, 105 &size, NULL, 0) != 0) 106 return false; 107 108 return has_crc32; 109 110 #else 111 // If a runtime detection method cannot be found, then this must 112 // be a compile time error. The checks in crc_common.h should ensure 113 // a runtime detection method is always found if this function is 114 // built. It would be possible to just return false here, but this 115 // is inefficient for binary size and runtime since only the generic 116 // method could ever be used. 117 # error Runtime detection method unavailable. 118 #endif 119 } 120 #endif 121 122 #endif // LZMA_CRC32_ARM64_H 123