xref: /freebsd/contrib/xz/src/liblzma/check/crc32_arm64.h (revision a91a246563dffa876a52f53a98de4af9fa364c52)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       crc32_arm64.h
6 /// \brief      CRC32 calculation with ARM64 optimization
7 //
8 //  Authors:    Chenxi Mao
9 //              Jia Tan
10 //              Hans Jansen
11 //
12 ///////////////////////////////////////////////////////////////////////////////
13 
14 
15 #ifndef LZMA_CRC32_ARM64_H
16 #define LZMA_CRC32_ARM64_H
17 
18 // MSVC always has the CRC intrinsics available when building for ARM64
19 // there is no need to include any header files.
20 #ifndef _MSC_VER
21 #	include <arm_acle.h>
22 #endif
23 
24 #if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED)
25 #	if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO)
26 #		include <sys/auxv.h>
27 #	elif defined(_WIN32)
28 #		include <processthreadsapi.h>
29 #	elif defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME)
30 #		include <sys/sysctl.h>
31 #	endif
32 #endif
33 
34 // Some EDG-based compilers support ARM64 and define __GNUC__
35 // (such as Nvidia's nvcc), but do not support function attributes.
36 //
37 // NOTE: Build systems check for this too, keep them in sync with this.
38 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__)
39 #	define crc_attr_target \
40         __attribute__((__target__("+crc")))
41 #else
42 #	define crc_attr_target
43 #endif
44 
45 
46 crc_attr_target
47 static uint32_t
48 crc32_arch_optimized(const uint8_t *buf, size_t size, uint32_t crc)
49 {
50 	crc = ~crc;
51 
52 	// Align the input buffer because this was shown to be
53 	// significantly faster than unaligned accesses.
54 	const size_t align_amount = my_min(size, (8 - (uintptr_t)buf) & 7);
55 
56 	for (const uint8_t *limit = buf + align_amount; buf < limit; ++buf)
57 		crc = __crc32b(crc, *buf);
58 
59 	size -= align_amount;
60 
61 	// Process 8 bytes at a time. The end point is determined by
62 	// ignoring the least significant three bits of size to ensure
63 	// we do not process past the bounds of the buffer. This guarantees
64 	// that limit is a multiple of 8 and is strictly less than size.
65 	for (const uint8_t *limit = buf + (size & ~((size_t)7));
66 			buf < limit; buf += 8)
67 		crc = __crc32d(crc, aligned_read64le(buf));
68 
69 	// Process the remaining bytes that are not 8 byte aligned.
70 	for (const uint8_t *limit = buf + (size & 7); buf < limit; ++buf)
71 		crc = __crc32b(crc, *buf);
72 
73 	return ~crc;
74 }
75 
76 
77 #if defined(CRC32_GENERIC) && defined(CRC32_ARCH_OPTIMIZED)
78 static inline bool
79 is_arch_extension_supported(void)
80 {
81 #if defined(HAVE_GETAUXVAL)
82 	return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0;
83 
84 #elif defined(HAVE_ELF_AUX_INFO)
85 	unsigned long feature_flags;
86 
87 	elf_aux_info(AT_HWCAP, &feature_flags, sizeof(feature_flags));
88 	return feature_flags & HWCAP_CRC32 != 0;
89 
90 #elif defined(_WIN32)
91 	return IsProcessorFeaturePresent(
92 			PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
93 
94 #elif defined(__APPLE__) && defined(HAVE_SYSCTLBYNAME)
95 	int has_crc32 = 0;
96 	size_t size = sizeof(has_crc32);
97 
98 	// The sysctlbyname() function requires a string identifier for the
99 	// CPU feature it tests. The Apple documentation lists the string
100 	// "hw.optional.armv8_crc32", which can be found here:
101 	// (https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics#3915619)
102 	int err = sysctlbyname("hw.optional.armv8_crc32", &has_crc32,
103 			&size, NULL, 0);
104 
105 	return !err && has_crc32;
106 
107 #else
108 	// If a runtime detection method cannot be found, then this must
109 	// be a compile time error. The checks in crc_common.h should ensure
110 	// a runtime detection method is always found if this function is
111 	// built. It would be possible to just return false here, but this
112 	// is inefficient for binary size and runtime since only the generic
113 	// method could ever be used.
114 #	error Runtime detection method unavailable.
115 #endif
116 }
117 #endif
118 
119 #endif // LZMA_CRC32_ARM64_H
120