xref: /freebsd/contrib/xz/src/common/tuklib_integer.h (revision b333cd44de6db4d3182add2f80870c7a96d570d9)
181ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
281ad8388SMartin Matuska //
381ad8388SMartin Matuska /// \file       tuklib_integer.h
481ad8388SMartin Matuska /// \brief      Various integer and bit operations
581ad8388SMartin Matuska ///
681ad8388SMartin Matuska /// This file provides macros or functions to do some basic integer and bit
781ad8388SMartin Matuska /// operations.
881ad8388SMartin Matuska ///
9a8675d92SXin LI /// Native endian inline functions (XX = 16, 32, or 64):
10a8675d92SXin LI ///   - Unaligned native endian reads: readXXne(ptr)
11a8675d92SXin LI ///   - Unaligned native endian writes: writeXXne(ptr, num)
12a8675d92SXin LI ///   - Aligned native endian reads: aligned_readXXne(ptr)
13a8675d92SXin LI ///   - Aligned native endian writes: aligned_writeXXne(ptr, num)
14a8675d92SXin LI ///
15a8675d92SXin LI /// Endianness-converting integer operations (these can be macros!)
16a8675d92SXin LI /// (XX = 16, 32, or 64; Y = b or l):
1781ad8388SMartin Matuska ///   - Byte swapping: bswapXX(num)
18a8675d92SXin LI ///   - Byte order conversions to/from native (byteswaps if Y isn't
19a8675d92SXin LI ///     the native endianness): convXXYe(num)
2073ed8e77SXin LI ///   - Unaligned reads: readXXYe(ptr)
2173ed8e77SXin LI ///   - Unaligned writes: writeXXYe(ptr, num)
22a8675d92SXin LI ///   - Aligned reads: aligned_readXXYe(ptr)
23a8675d92SXin LI ///   - Aligned writes: aligned_writeXXYe(ptr, num)
2481ad8388SMartin Matuska ///
25a8675d92SXin LI /// Since the above can macros, the arguments should have no side effects
26a8675d92SXin LI /// because they may be evaluated more than once.
2781ad8388SMartin Matuska ///
28a8675d92SXin LI /// Bit scan operations for non-zero 32-bit integers (inline functions):
2981ad8388SMartin Matuska ///   - Bit scan reverse (find highest non-zero bit): bsr32(num)
3081ad8388SMartin Matuska ///   - Count leading zeros: clz32(num)
3181ad8388SMartin Matuska ///   - Count trailing zeros: ctz32(num)
3281ad8388SMartin Matuska ///   - Bit scan forward (simply an alias for ctz32()): bsf32(num)
3381ad8388SMartin Matuska ///
3481ad8388SMartin Matuska /// The above bit scan operations return 0-31. If num is zero,
3581ad8388SMartin Matuska /// the result is undefined.
3681ad8388SMartin Matuska //
3781ad8388SMartin Matuska //  Authors:    Lasse Collin
3881ad8388SMartin Matuska //              Joachim Henke
3981ad8388SMartin Matuska //
4081ad8388SMartin Matuska //  This file has been put into the public domain.
4181ad8388SMartin Matuska //  You can do whatever you want with this file.
4281ad8388SMartin Matuska //
4381ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
4481ad8388SMartin Matuska 
4581ad8388SMartin Matuska #ifndef TUKLIB_INTEGER_H
4681ad8388SMartin Matuska #define TUKLIB_INTEGER_H
4781ad8388SMartin Matuska 
4881ad8388SMartin Matuska #include "tuklib_common.h"
49a8675d92SXin LI #include <string.h>
50a8675d92SXin LI 
51a8675d92SXin LI // Newer Intel C compilers require immintrin.h for _bit_scan_reverse()
52a8675d92SXin LI // and such functions.
53a8675d92SXin LI #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500)
54a8675d92SXin LI #	include <immintrin.h>
55*b333cd44SXin LI // Only include <intrin.h> when it is needed. GCC and Clang can both
56*b333cd44SXin LI // use __builtin's, so we only need Windows instrincs when using MSVC.
57*b333cd44SXin LI // GCC and Clang can set _MSC_VER on Windows, so we need to exclude these
58*b333cd44SXin LI // cases explicitly.
59*b333cd44SXin LI #elif defined(_MSC_VER) && !TUKLIB_GNUC_REQ(3, 4) && !defined(__clang__)
60*b333cd44SXin LI #	include <intrin.h>
61a8675d92SXin LI #endif
6281ad8388SMartin Matuska 
6381ad8388SMartin Matuska 
64a8675d92SXin LI ///////////////////
65a8675d92SXin LI // Byte swapping //
66a8675d92SXin LI ///////////////////
6781ad8388SMartin Matuska 
68a8675d92SXin LI #if defined(HAVE___BUILTIN_BSWAPXX)
69a8675d92SXin LI 	// GCC >= 4.8 and Clang
70a8675d92SXin LI #	define bswap16(n) __builtin_bswap16(n)
71a8675d92SXin LI #	define bswap32(n) __builtin_bswap32(n)
72a8675d92SXin LI #	define bswap64(n) __builtin_bswap64(n)
73a8675d92SXin LI 
74a8675d92SXin LI #elif defined(HAVE_BYTESWAP_H)
7581ad8388SMartin Matuska 	// glibc, uClibc, dietlibc
7681ad8388SMartin Matuska #	include <byteswap.h>
7781ad8388SMartin Matuska #	ifdef HAVE_BSWAP_16
7881ad8388SMartin Matuska #		define bswap16(num) bswap_16(num)
7981ad8388SMartin Matuska #	endif
8081ad8388SMartin Matuska #	ifdef HAVE_BSWAP_32
8181ad8388SMartin Matuska #		define bswap32(num) bswap_32(num)
8281ad8388SMartin Matuska #	endif
8381ad8388SMartin Matuska #	ifdef HAVE_BSWAP_64
8481ad8388SMartin Matuska #		define bswap64(num) bswap_64(num)
8581ad8388SMartin Matuska #	endif
8681ad8388SMartin Matuska 
8781ad8388SMartin Matuska #elif defined(HAVE_SYS_ENDIAN_H)
8881ad8388SMartin Matuska 	// *BSDs and Darwin
8981ad8388SMartin Matuska #	include <sys/endian.h>
9081ad8388SMartin Matuska 
9181ad8388SMartin Matuska #elif defined(HAVE_SYS_BYTEORDER_H)
9281ad8388SMartin Matuska 	// Solaris
9381ad8388SMartin Matuska #	include <sys/byteorder.h>
9481ad8388SMartin Matuska #	ifdef BSWAP_16
9581ad8388SMartin Matuska #		define bswap16(num) BSWAP_16(num)
9681ad8388SMartin Matuska #	endif
9781ad8388SMartin Matuska #	ifdef BSWAP_32
9881ad8388SMartin Matuska #		define bswap32(num) BSWAP_32(num)
9981ad8388SMartin Matuska #	endif
10081ad8388SMartin Matuska #	ifdef BSWAP_64
10181ad8388SMartin Matuska #		define bswap64(num) BSWAP_64(num)
10281ad8388SMartin Matuska #	endif
10381ad8388SMartin Matuska #	ifdef BE_16
10481ad8388SMartin Matuska #		define conv16be(num) BE_16(num)
10581ad8388SMartin Matuska #	endif
10681ad8388SMartin Matuska #	ifdef BE_32
10781ad8388SMartin Matuska #		define conv32be(num) BE_32(num)
10881ad8388SMartin Matuska #	endif
10981ad8388SMartin Matuska #	ifdef BE_64
11081ad8388SMartin Matuska #		define conv64be(num) BE_64(num)
11181ad8388SMartin Matuska #	endif
11281ad8388SMartin Matuska #	ifdef LE_16
11381ad8388SMartin Matuska #		define conv16le(num) LE_16(num)
11481ad8388SMartin Matuska #	endif
11581ad8388SMartin Matuska #	ifdef LE_32
11681ad8388SMartin Matuska #		define conv32le(num) LE_32(num)
11781ad8388SMartin Matuska #	endif
11881ad8388SMartin Matuska #	ifdef LE_64
11981ad8388SMartin Matuska #		define conv64le(num) LE_64(num)
12081ad8388SMartin Matuska #	endif
12181ad8388SMartin Matuska #endif
12281ad8388SMartin Matuska 
12381ad8388SMartin Matuska #ifndef bswap16
124a8675d92SXin LI #	define bswap16(n) (uint16_t)( \
125a8675d92SXin LI 		  (((n) & 0x00FFU) << 8) \
126a8675d92SXin LI 		| (((n) & 0xFF00U) >> 8) \
127a8675d92SXin LI 	)
12881ad8388SMartin Matuska #endif
12981ad8388SMartin Matuska 
13081ad8388SMartin Matuska #ifndef bswap32
131a8675d92SXin LI #	define bswap32(n) (uint32_t)( \
132a8675d92SXin LI 		  (((n) & UINT32_C(0x000000FF)) << 24) \
133a8675d92SXin LI 		| (((n) & UINT32_C(0x0000FF00)) << 8) \
134a8675d92SXin LI 		| (((n) & UINT32_C(0x00FF0000)) >> 8) \
135a8675d92SXin LI 		| (((n) & UINT32_C(0xFF000000)) >> 24) \
136a8675d92SXin LI 	)
13781ad8388SMartin Matuska #endif
13881ad8388SMartin Matuska 
13981ad8388SMartin Matuska #ifndef bswap64
140a8675d92SXin LI #	define bswap64(n) (uint64_t)( \
141a8675d92SXin LI 		  (((n) & UINT64_C(0x00000000000000FF)) << 56) \
142a8675d92SXin LI 		| (((n) & UINT64_C(0x000000000000FF00)) << 40) \
143a8675d92SXin LI 		| (((n) & UINT64_C(0x0000000000FF0000)) << 24) \
144a8675d92SXin LI 		| (((n) & UINT64_C(0x00000000FF000000)) << 8) \
145a8675d92SXin LI 		| (((n) & UINT64_C(0x000000FF00000000)) >> 8) \
146a8675d92SXin LI 		| (((n) & UINT64_C(0x0000FF0000000000)) >> 24) \
147a8675d92SXin LI 		| (((n) & UINT64_C(0x00FF000000000000)) >> 40) \
148a8675d92SXin LI 		| (((n) & UINT64_C(0xFF00000000000000)) >> 56) \
149a8675d92SXin LI 	)
15081ad8388SMartin Matuska #endif
15181ad8388SMartin Matuska 
15281ad8388SMartin Matuska // Define conversion macros using the basic byte swapping macros.
15381ad8388SMartin Matuska #ifdef WORDS_BIGENDIAN
15481ad8388SMartin Matuska #	ifndef conv16be
15581ad8388SMartin Matuska #		define conv16be(num) ((uint16_t)(num))
15681ad8388SMartin Matuska #	endif
15781ad8388SMartin Matuska #	ifndef conv32be
15881ad8388SMartin Matuska #		define conv32be(num) ((uint32_t)(num))
15981ad8388SMartin Matuska #	endif
16081ad8388SMartin Matuska #	ifndef conv64be
16181ad8388SMartin Matuska #		define conv64be(num) ((uint64_t)(num))
16281ad8388SMartin Matuska #	endif
16381ad8388SMartin Matuska #	ifndef conv16le
16481ad8388SMartin Matuska #		define conv16le(num) bswap16(num)
16581ad8388SMartin Matuska #	endif
16681ad8388SMartin Matuska #	ifndef conv32le
16781ad8388SMartin Matuska #		define conv32le(num) bswap32(num)
16881ad8388SMartin Matuska #	endif
16981ad8388SMartin Matuska #	ifndef conv64le
17081ad8388SMartin Matuska #		define conv64le(num) bswap64(num)
17181ad8388SMartin Matuska #	endif
17281ad8388SMartin Matuska #else
17381ad8388SMartin Matuska #	ifndef conv16be
17481ad8388SMartin Matuska #		define conv16be(num) bswap16(num)
17581ad8388SMartin Matuska #	endif
17681ad8388SMartin Matuska #	ifndef conv32be
17781ad8388SMartin Matuska #		define conv32be(num) bswap32(num)
17881ad8388SMartin Matuska #	endif
17981ad8388SMartin Matuska #	ifndef conv64be
18081ad8388SMartin Matuska #		define conv64be(num) bswap64(num)
18181ad8388SMartin Matuska #	endif
18281ad8388SMartin Matuska #	ifndef conv16le
18381ad8388SMartin Matuska #		define conv16le(num) ((uint16_t)(num))
18481ad8388SMartin Matuska #	endif
18581ad8388SMartin Matuska #	ifndef conv32le
18681ad8388SMartin Matuska #		define conv32le(num) ((uint32_t)(num))
18781ad8388SMartin Matuska #	endif
18881ad8388SMartin Matuska #	ifndef conv64le
18981ad8388SMartin Matuska #		define conv64le(num) ((uint64_t)(num))
19081ad8388SMartin Matuska #	endif
19181ad8388SMartin Matuska #endif
19281ad8388SMartin Matuska 
19381ad8388SMartin Matuska 
194a8675d92SXin LI ////////////////////////////////
195a8675d92SXin LI // Unaligned reads and writes //
196a8675d92SXin LI ////////////////////////////////
197a8675d92SXin LI 
198a8675d92SXin LI // The traditional way of casting e.g. *(const uint16_t *)uint8_pointer
199a8675d92SXin LI // is bad even if the uint8_pointer is properly aligned because this kind
200a8675d92SXin LI // of casts break strict aliasing rules and result in undefined behavior.
201a8675d92SXin LI // With unaligned pointers it's even worse: compilers may emit vector
202a8675d92SXin LI // instructions that require aligned pointers even if non-vector
203a8675d92SXin LI // instructions work with unaligned pointers.
204a8675d92SXin LI //
205a8675d92SXin LI // Using memcpy() is the standard compliant way to do unaligned access.
206a8675d92SXin LI // Many modern compilers inline it so there is no function call overhead.
207a8675d92SXin LI // For those compilers that don't handle the memcpy() method well, the
208a8675d92SXin LI // old casting method (that violates strict aliasing) can be requested at
209a8675d92SXin LI // build time. A third method, casting to a packed struct, would also be
210a8675d92SXin LI // an option but isn't provided to keep things simpler (it's already a mess).
211a8675d92SXin LI // Hopefully this is flexible enough in practice.
21281ad8388SMartin Matuska 
21381ad8388SMartin Matuska static inline uint16_t
214a8675d92SXin LI read16ne(const uint8_t *buf)
21581ad8388SMartin Matuska {
216a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
217a8675d92SXin LI 		&& defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
218a8675d92SXin LI 	return *(const uint16_t *)buf;
219a8675d92SXin LI #else
220a8675d92SXin LI 	uint16_t num;
221a8675d92SXin LI 	memcpy(&num, buf, sizeof(num));
222a8675d92SXin LI 	return num;
223a8675d92SXin LI #endif
22481ad8388SMartin Matuska }
22581ad8388SMartin Matuska 
22681ad8388SMartin Matuska 
22781ad8388SMartin Matuska static inline uint32_t
228a8675d92SXin LI read32ne(const uint8_t *buf)
22981ad8388SMartin Matuska {
230a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
231a8675d92SXin LI 		&& defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
232a8675d92SXin LI 	return *(const uint32_t *)buf;
233a8675d92SXin LI #else
234a8675d92SXin LI 	uint32_t num;
235a8675d92SXin LI 	memcpy(&num, buf, sizeof(num));
236a8675d92SXin LI 	return num;
237a8675d92SXin LI #endif
23881ad8388SMartin Matuska }
23981ad8388SMartin Matuska 
24081ad8388SMartin Matuska 
24181ad8388SMartin Matuska static inline uint64_t
242a8675d92SXin LI read64ne(const uint8_t *buf)
24381ad8388SMartin Matuska {
244a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
245a8675d92SXin LI 		&& defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
246a8675d92SXin LI 	return *(const uint64_t *)buf;
247a8675d92SXin LI #else
248a8675d92SXin LI 	uint64_t num;
249a8675d92SXin LI 	memcpy(&num, buf, sizeof(num));
250a8675d92SXin LI 	return num;
251a8675d92SXin LI #endif
25281ad8388SMartin Matuska }
25381ad8388SMartin Matuska 
25481ad8388SMartin Matuska 
25581ad8388SMartin Matuska static inline void
25681ad8388SMartin Matuska write16ne(uint8_t *buf, uint16_t num)
25781ad8388SMartin Matuska {
258a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
259a8675d92SXin LI 		&& defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
26081ad8388SMartin Matuska 	*(uint16_t *)buf = num;
261a8675d92SXin LI #else
262a8675d92SXin LI 	memcpy(buf, &num, sizeof(num));
263a8675d92SXin LI #endif
26481ad8388SMartin Matuska 	return;
26581ad8388SMartin Matuska }
26681ad8388SMartin Matuska 
26781ad8388SMartin Matuska 
26881ad8388SMartin Matuska static inline void
26981ad8388SMartin Matuska write32ne(uint8_t *buf, uint32_t num)
27081ad8388SMartin Matuska {
271a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
272a8675d92SXin LI 		&& defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
27381ad8388SMartin Matuska 	*(uint32_t *)buf = num;
274a8675d92SXin LI #else
275a8675d92SXin LI 	memcpy(buf, &num, sizeof(num));
276a8675d92SXin LI #endif
27781ad8388SMartin Matuska 	return;
27881ad8388SMartin Matuska }
27981ad8388SMartin Matuska 
28081ad8388SMartin Matuska 
28181ad8388SMartin Matuska static inline void
28281ad8388SMartin Matuska write64ne(uint8_t *buf, uint64_t num)
28381ad8388SMartin Matuska {
284a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
285a8675d92SXin LI 		&& defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
28681ad8388SMartin Matuska 	*(uint64_t *)buf = num;
287a8675d92SXin LI #else
288a8675d92SXin LI 	memcpy(buf, &num, sizeof(num));
289a8675d92SXin LI #endif
29081ad8388SMartin Matuska 	return;
29181ad8388SMartin Matuska }
29281ad8388SMartin Matuska 
29381ad8388SMartin Matuska 
29481ad8388SMartin Matuska static inline uint16_t
295a8675d92SXin LI read16be(const uint8_t *buf)
29681ad8388SMartin Matuska {
297a8675d92SXin LI #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
298a8675d92SXin LI 	uint16_t num = read16ne(buf);
299a8675d92SXin LI 	return conv16be(num);
300a8675d92SXin LI #else
30181ad8388SMartin Matuska 	uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
30281ad8388SMartin Matuska 	return num;
303a8675d92SXin LI #endif
30481ad8388SMartin Matuska }
30581ad8388SMartin Matuska 
30681ad8388SMartin Matuska 
30781ad8388SMartin Matuska static inline uint16_t
308a8675d92SXin LI read16le(const uint8_t *buf)
30981ad8388SMartin Matuska {
310a8675d92SXin LI #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
311a8675d92SXin LI 	uint16_t num = read16ne(buf);
312a8675d92SXin LI 	return conv16le(num);
313a8675d92SXin LI #else
31481ad8388SMartin Matuska 	uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
31581ad8388SMartin Matuska 	return num;
316a8675d92SXin LI #endif
31781ad8388SMartin Matuska }
31881ad8388SMartin Matuska 
31981ad8388SMartin Matuska 
32081ad8388SMartin Matuska static inline uint32_t
321a8675d92SXin LI read32be(const uint8_t *buf)
32281ad8388SMartin Matuska {
323a8675d92SXin LI #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
324a8675d92SXin LI 	uint32_t num = read32ne(buf);
325a8675d92SXin LI 	return conv32be(num);
326a8675d92SXin LI #else
32781ad8388SMartin Matuska 	uint32_t num = (uint32_t)buf[0] << 24;
32881ad8388SMartin Matuska 	num |= (uint32_t)buf[1] << 16;
32981ad8388SMartin Matuska 	num |= (uint32_t)buf[2] << 8;
33081ad8388SMartin Matuska 	num |= (uint32_t)buf[3];
33181ad8388SMartin Matuska 	return num;
332a8675d92SXin LI #endif
33381ad8388SMartin Matuska }
33481ad8388SMartin Matuska 
33581ad8388SMartin Matuska 
33681ad8388SMartin Matuska static inline uint32_t
337a8675d92SXin LI read32le(const uint8_t *buf)
33881ad8388SMartin Matuska {
339a8675d92SXin LI #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
340a8675d92SXin LI 	uint32_t num = read32ne(buf);
341a8675d92SXin LI 	return conv32le(num);
342a8675d92SXin LI #else
34381ad8388SMartin Matuska 	uint32_t num = (uint32_t)buf[0];
34481ad8388SMartin Matuska 	num |= (uint32_t)buf[1] << 8;
34581ad8388SMartin Matuska 	num |= (uint32_t)buf[2] << 16;
34681ad8388SMartin Matuska 	num |= (uint32_t)buf[3] << 24;
34781ad8388SMartin Matuska 	return num;
348a8675d92SXin LI #endif
34981ad8388SMartin Matuska }
35081ad8388SMartin Matuska 
35181ad8388SMartin Matuska 
35273ed8e77SXin LI static inline uint64_t
35373ed8e77SXin LI read64be(const uint8_t *buf)
35473ed8e77SXin LI {
35573ed8e77SXin LI #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
35673ed8e77SXin LI 	uint64_t num = read64ne(buf);
35773ed8e77SXin LI 	return conv64be(num);
35873ed8e77SXin LI #else
35973ed8e77SXin LI 	uint64_t num = (uint64_t)buf[0] << 56;
36073ed8e77SXin LI 	num |= (uint64_t)buf[1] << 48;
36173ed8e77SXin LI 	num |= (uint64_t)buf[2] << 40;
36273ed8e77SXin LI 	num |= (uint64_t)buf[3] << 32;
36373ed8e77SXin LI 	num |= (uint64_t)buf[4] << 24;
36473ed8e77SXin LI 	num |= (uint64_t)buf[5] << 16;
36573ed8e77SXin LI 	num |= (uint64_t)buf[6] << 8;
36673ed8e77SXin LI 	num |= (uint64_t)buf[7];
36773ed8e77SXin LI 	return num;
36873ed8e77SXin LI #endif
36973ed8e77SXin LI }
37073ed8e77SXin LI 
37173ed8e77SXin LI 
37273ed8e77SXin LI static inline uint64_t
37373ed8e77SXin LI read64le(const uint8_t *buf)
37473ed8e77SXin LI {
37573ed8e77SXin LI #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
37673ed8e77SXin LI 	uint64_t num = read64ne(buf);
37773ed8e77SXin LI 	return conv64le(num);
37873ed8e77SXin LI #else
37973ed8e77SXin LI 	uint64_t num = (uint64_t)buf[0];
38073ed8e77SXin LI 	num |= (uint64_t)buf[1] << 8;
38173ed8e77SXin LI 	num |= (uint64_t)buf[2] << 16;
38273ed8e77SXin LI 	num |= (uint64_t)buf[3] << 24;
38373ed8e77SXin LI 	num |= (uint64_t)buf[4] << 32;
38473ed8e77SXin LI 	num |= (uint64_t)buf[5] << 40;
38573ed8e77SXin LI 	num |= (uint64_t)buf[6] << 48;
38673ed8e77SXin LI 	num |= (uint64_t)buf[7] << 56;
38773ed8e77SXin LI 	return num;
38873ed8e77SXin LI #endif
38973ed8e77SXin LI }
39073ed8e77SXin LI 
39173ed8e77SXin LI 
392a8675d92SXin LI // NOTE: Possible byte swapping must be done in a macro to allow the compiler
393a8675d92SXin LI // to optimize byte swapping of constants when using glibc's or *BSD's
394a8675d92SXin LI // byte swapping macros. The actual write is done in an inline function
395a8675d92SXin LI // to make type checking of the buf pointer possible.
396a8675d92SXin LI #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
397a8675d92SXin LI #	define write16be(buf, num) write16ne(buf, conv16be(num))
398a8675d92SXin LI #	define write32be(buf, num) write32ne(buf, conv32be(num))
39973ed8e77SXin LI #	define write64be(buf, num) write64ne(buf, conv64be(num))
400a8675d92SXin LI #endif
401a8675d92SXin LI 
402a8675d92SXin LI #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
403a8675d92SXin LI #	define write16le(buf, num) write16ne(buf, conv16le(num))
404a8675d92SXin LI #	define write32le(buf, num) write32ne(buf, conv32le(num))
40573ed8e77SXin LI #	define write64le(buf, num) write64ne(buf, conv64le(num))
406a8675d92SXin LI #endif
407a8675d92SXin LI 
408a8675d92SXin LI 
409a8675d92SXin LI #ifndef write16be
41081ad8388SMartin Matuska static inline void
411a8675d92SXin LI write16be(uint8_t *buf, uint16_t num)
41281ad8388SMartin Matuska {
413342bcb12SXin LI 	buf[0] = (uint8_t)(num >> 8);
414342bcb12SXin LI 	buf[1] = (uint8_t)num;
41581ad8388SMartin Matuska 	return;
41681ad8388SMartin Matuska }
417a8675d92SXin LI #endif
41881ad8388SMartin Matuska 
41981ad8388SMartin Matuska 
420a8675d92SXin LI #ifndef write16le
42181ad8388SMartin Matuska static inline void
422a8675d92SXin LI write16le(uint8_t *buf, uint16_t num)
42381ad8388SMartin Matuska {
424342bcb12SXin LI 	buf[0] = (uint8_t)num;
425342bcb12SXin LI 	buf[1] = (uint8_t)(num >> 8);
42681ad8388SMartin Matuska 	return;
42781ad8388SMartin Matuska }
428a8675d92SXin LI #endif
42981ad8388SMartin Matuska 
43081ad8388SMartin Matuska 
431a8675d92SXin LI #ifndef write32be
43281ad8388SMartin Matuska static inline void
433a8675d92SXin LI write32be(uint8_t *buf, uint32_t num)
43481ad8388SMartin Matuska {
435342bcb12SXin LI 	buf[0] = (uint8_t)(num >> 24);
436342bcb12SXin LI 	buf[1] = (uint8_t)(num >> 16);
437342bcb12SXin LI 	buf[2] = (uint8_t)(num >> 8);
438342bcb12SXin LI 	buf[3] = (uint8_t)num;
43981ad8388SMartin Matuska 	return;
44081ad8388SMartin Matuska }
441a8675d92SXin LI #endif
44281ad8388SMartin Matuska 
44381ad8388SMartin Matuska 
444a8675d92SXin LI #ifndef write32le
44581ad8388SMartin Matuska static inline void
446a8675d92SXin LI write32le(uint8_t *buf, uint32_t num)
44781ad8388SMartin Matuska {
448342bcb12SXin LI 	buf[0] = (uint8_t)num;
449342bcb12SXin LI 	buf[1] = (uint8_t)(num >> 8);
450342bcb12SXin LI 	buf[2] = (uint8_t)(num >> 16);
451342bcb12SXin LI 	buf[3] = (uint8_t)(num >> 24);
45281ad8388SMartin Matuska 	return;
45381ad8388SMartin Matuska }
45481ad8388SMartin Matuska #endif
45581ad8388SMartin Matuska 
45681ad8388SMartin Matuska 
457a8675d92SXin LI //////////////////////////////
458a8675d92SXin LI // Aligned reads and writes //
459a8675d92SXin LI //////////////////////////////
460a8675d92SXin LI 
461a8675d92SXin LI // Separate functions for aligned reads and writes are provided since on
462a8675d92SXin LI // strict-align archs aligned access is much faster than unaligned access.
463a8675d92SXin LI //
464a8675d92SXin LI // Just like in the unaligned case, memcpy() is needed to avoid
465a8675d92SXin LI // strict aliasing violations. However, on archs that don't support
466a8675d92SXin LI // unaligned access the compiler cannot know that the pointers given
467a8675d92SXin LI // to memcpy() are aligned which results in slow code. As of C11 there is
468a8675d92SXin LI // no standard way to tell the compiler that we know that the address is
469a8675d92SXin LI // aligned but some compilers have language extensions to do that. With
470a8675d92SXin LI // such language extensions the memcpy() method gives excellent results.
471a8675d92SXin LI //
472a8675d92SXin LI // What to do on a strict-align system when no known language extentensions
473a8675d92SXin LI // are available? Falling back to byte-by-byte access would be safe but ruin
474a8675d92SXin LI // optimizations that have been made specifically with aligned access in mind.
475a8675d92SXin LI // As a compromise, aligned reads will fall back to non-compliant type punning
476a8675d92SXin LI // but aligned writes will be byte-by-byte, that is, fast reads are preferred
477a8675d92SXin LI // over fast writes. This obviously isn't great but hopefully it's a working
478a8675d92SXin LI // compromise for now.
479a8675d92SXin LI //
480a8675d92SXin LI // __builtin_assume_aligned is support by GCC >= 4.7 and clang >= 3.6.
481a8675d92SXin LI #ifdef HAVE___BUILTIN_ASSUME_ALIGNED
482a8675d92SXin LI #	define tuklib_memcpy_aligned(dest, src, size) \
483a8675d92SXin LI 		memcpy(dest, __builtin_assume_aligned(src, size), size)
484a8675d92SXin LI #else
485a8675d92SXin LI #	define tuklib_memcpy_aligned(dest, src, size) \
486a8675d92SXin LI 		memcpy(dest, src, size)
487a8675d92SXin LI #	ifndef TUKLIB_FAST_UNALIGNED_ACCESS
488a8675d92SXin LI #		define TUKLIB_USE_UNSAFE_ALIGNED_READS 1
489a8675d92SXin LI #	endif
490a8675d92SXin LI #endif
491a8675d92SXin LI 
492a8675d92SXin LI 
493a8675d92SXin LI static inline uint16_t
494a8675d92SXin LI aligned_read16ne(const uint8_t *buf)
495a8675d92SXin LI {
496a8675d92SXin LI #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
497a8675d92SXin LI 		|| defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
498a8675d92SXin LI 	return *(const uint16_t *)buf;
499a8675d92SXin LI #else
500a8675d92SXin LI 	uint16_t num;
501a8675d92SXin LI 	tuklib_memcpy_aligned(&num, buf, sizeof(num));
502a8675d92SXin LI 	return num;
503a8675d92SXin LI #endif
504a8675d92SXin LI }
505a8675d92SXin LI 
506a8675d92SXin LI 
507a8675d92SXin LI static inline uint32_t
508a8675d92SXin LI aligned_read32ne(const uint8_t *buf)
509a8675d92SXin LI {
510a8675d92SXin LI #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
511a8675d92SXin LI 		|| defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
512a8675d92SXin LI 	return *(const uint32_t *)buf;
513a8675d92SXin LI #else
514a8675d92SXin LI 	uint32_t num;
515a8675d92SXin LI 	tuklib_memcpy_aligned(&num, buf, sizeof(num));
516a8675d92SXin LI 	return num;
517a8675d92SXin LI #endif
518a8675d92SXin LI }
519a8675d92SXin LI 
520a8675d92SXin LI 
521a8675d92SXin LI static inline uint64_t
522a8675d92SXin LI aligned_read64ne(const uint8_t *buf)
523a8675d92SXin LI {
524a8675d92SXin LI #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
525a8675d92SXin LI 		|| defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
526a8675d92SXin LI 	return *(const uint64_t *)buf;
527a8675d92SXin LI #else
528a8675d92SXin LI 	uint64_t num;
529a8675d92SXin LI 	tuklib_memcpy_aligned(&num, buf, sizeof(num));
530a8675d92SXin LI 	return num;
531a8675d92SXin LI #endif
532a8675d92SXin LI }
533a8675d92SXin LI 
534a8675d92SXin LI 
535a8675d92SXin LI static inline void
536a8675d92SXin LI aligned_write16ne(uint8_t *buf, uint16_t num)
537a8675d92SXin LI {
538a8675d92SXin LI #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
539a8675d92SXin LI 	*(uint16_t *)buf = num;
540a8675d92SXin LI #else
541a8675d92SXin LI 	tuklib_memcpy_aligned(buf, &num, sizeof(num));
542a8675d92SXin LI #endif
543a8675d92SXin LI 	return;
544a8675d92SXin LI }
545a8675d92SXin LI 
546a8675d92SXin LI 
547a8675d92SXin LI static inline void
548a8675d92SXin LI aligned_write32ne(uint8_t *buf, uint32_t num)
549a8675d92SXin LI {
550a8675d92SXin LI #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
551a8675d92SXin LI 	*(uint32_t *)buf = num;
552a8675d92SXin LI #else
553a8675d92SXin LI 	tuklib_memcpy_aligned(buf, &num, sizeof(num));
554a8675d92SXin LI #endif
555a8675d92SXin LI 	return;
556a8675d92SXin LI }
557a8675d92SXin LI 
558a8675d92SXin LI 
559a8675d92SXin LI static inline void
560a8675d92SXin LI aligned_write64ne(uint8_t *buf, uint64_t num)
561a8675d92SXin LI {
562a8675d92SXin LI #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
563a8675d92SXin LI 	*(uint64_t *)buf = num;
564a8675d92SXin LI #else
565a8675d92SXin LI 	tuklib_memcpy_aligned(buf, &num, sizeof(num));
566a8675d92SXin LI #endif
567a8675d92SXin LI 	return;
568a8675d92SXin LI }
569a8675d92SXin LI 
570a8675d92SXin LI 
571a8675d92SXin LI static inline uint16_t
572a8675d92SXin LI aligned_read16be(const uint8_t *buf)
573a8675d92SXin LI {
574a8675d92SXin LI 	uint16_t num = aligned_read16ne(buf);
575a8675d92SXin LI 	return conv16be(num);
576a8675d92SXin LI }
577a8675d92SXin LI 
578a8675d92SXin LI 
579a8675d92SXin LI static inline uint16_t
580a8675d92SXin LI aligned_read16le(const uint8_t *buf)
581a8675d92SXin LI {
582a8675d92SXin LI 	uint16_t num = aligned_read16ne(buf);
583a8675d92SXin LI 	return conv16le(num);
584a8675d92SXin LI }
585a8675d92SXin LI 
586a8675d92SXin LI 
587a8675d92SXin LI static inline uint32_t
588a8675d92SXin LI aligned_read32be(const uint8_t *buf)
589a8675d92SXin LI {
590a8675d92SXin LI 	uint32_t num = aligned_read32ne(buf);
591a8675d92SXin LI 	return conv32be(num);
592a8675d92SXin LI }
593a8675d92SXin LI 
594a8675d92SXin LI 
595a8675d92SXin LI static inline uint32_t
596a8675d92SXin LI aligned_read32le(const uint8_t *buf)
597a8675d92SXin LI {
598a8675d92SXin LI 	uint32_t num = aligned_read32ne(buf);
599a8675d92SXin LI 	return conv32le(num);
600a8675d92SXin LI }
601a8675d92SXin LI 
602a8675d92SXin LI 
603a8675d92SXin LI static inline uint64_t
604a8675d92SXin LI aligned_read64be(const uint8_t *buf)
605a8675d92SXin LI {
606a8675d92SXin LI 	uint64_t num = aligned_read64ne(buf);
607a8675d92SXin LI 	return conv64be(num);
608a8675d92SXin LI }
609a8675d92SXin LI 
610a8675d92SXin LI 
611a8675d92SXin LI static inline uint64_t
612a8675d92SXin LI aligned_read64le(const uint8_t *buf)
613a8675d92SXin LI {
614a8675d92SXin LI 	uint64_t num = aligned_read64ne(buf);
615a8675d92SXin LI 	return conv64le(num);
616a8675d92SXin LI }
617a8675d92SXin LI 
618a8675d92SXin LI 
619a8675d92SXin LI // These need to be macros like in the unaligned case.
620a8675d92SXin LI #define aligned_write16be(buf, num) aligned_write16ne((buf), conv16be(num))
621a8675d92SXin LI #define aligned_write16le(buf, num) aligned_write16ne((buf), conv16le(num))
622a8675d92SXin LI #define aligned_write32be(buf, num) aligned_write32ne((buf), conv32be(num))
623a8675d92SXin LI #define aligned_write32le(buf, num) aligned_write32ne((buf), conv32le(num))
624a8675d92SXin LI #define aligned_write64be(buf, num) aligned_write64ne((buf), conv64be(num))
625a8675d92SXin LI #define aligned_write64le(buf, num) aligned_write64ne((buf), conv64le(num))
626a8675d92SXin LI 
627a8675d92SXin LI 
628a8675d92SXin LI ////////////////////
629a8675d92SXin LI // Bit operations //
630a8675d92SXin LI ////////////////////
631a8675d92SXin LI 
63281ad8388SMartin Matuska static inline uint32_t
63381ad8388SMartin Matuska bsr32(uint32_t n)
63481ad8388SMartin Matuska {
63581ad8388SMartin Matuska 	// Check for ICC first, since it tends to define __GNUC__ too.
63681ad8388SMartin Matuska #if defined(__INTEL_COMPILER)
63781ad8388SMartin Matuska 	return _bit_scan_reverse(n);
63881ad8388SMartin Matuska 
639*b333cd44SXin LI #elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX == UINT32_MAX
64081ad8388SMartin Matuska 	// GCC >= 3.4 has __builtin_clz(), which gives good results on
64181ad8388SMartin Matuska 	// multiple architectures. On x86, __builtin_clz() ^ 31U becomes
64281ad8388SMartin Matuska 	// either plain BSR (so the XOR gets optimized away) or LZCNT and
64381ad8388SMartin Matuska 	// XOR (if -march indicates that SSE4a instructions are supported).
644a8675d92SXin LI 	return (uint32_t)__builtin_clz(n) ^ 31U;
64581ad8388SMartin Matuska 
64681ad8388SMartin Matuska #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
64781ad8388SMartin Matuska 	uint32_t i;
64881ad8388SMartin Matuska 	__asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
64981ad8388SMartin Matuska 	return i;
65081ad8388SMartin Matuska 
651a8675d92SXin LI #elif defined(_MSC_VER)
652a8675d92SXin LI 	unsigned long i;
653a8675d92SXin LI 	_BitScanReverse(&i, n);
65481ad8388SMartin Matuska 	return i;
65581ad8388SMartin Matuska 
65681ad8388SMartin Matuska #else
65781ad8388SMartin Matuska 	uint32_t i = 31;
65881ad8388SMartin Matuska 
659a8675d92SXin LI 	if ((n & 0xFFFF0000) == 0) {
66081ad8388SMartin Matuska 		n <<= 16;
66181ad8388SMartin Matuska 		i = 15;
66281ad8388SMartin Matuska 	}
66381ad8388SMartin Matuska 
664a8675d92SXin LI 	if ((n & 0xFF000000) == 0) {
66581ad8388SMartin Matuska 		n <<= 8;
66681ad8388SMartin Matuska 		i -= 8;
66781ad8388SMartin Matuska 	}
66881ad8388SMartin Matuska 
669a8675d92SXin LI 	if ((n & 0xF0000000) == 0) {
67081ad8388SMartin Matuska 		n <<= 4;
67181ad8388SMartin Matuska 		i -= 4;
67281ad8388SMartin Matuska 	}
67381ad8388SMartin Matuska 
674a8675d92SXin LI 	if ((n & 0xC0000000) == 0) {
67581ad8388SMartin Matuska 		n <<= 2;
67681ad8388SMartin Matuska 		i -= 2;
67781ad8388SMartin Matuska 	}
67881ad8388SMartin Matuska 
679a8675d92SXin LI 	if ((n & 0x80000000) == 0)
68081ad8388SMartin Matuska 		--i;
68181ad8388SMartin Matuska 
68281ad8388SMartin Matuska 	return i;
68381ad8388SMartin Matuska #endif
68481ad8388SMartin Matuska }
68581ad8388SMartin Matuska 
68681ad8388SMartin Matuska 
68781ad8388SMartin Matuska static inline uint32_t
68881ad8388SMartin Matuska clz32(uint32_t n)
68981ad8388SMartin Matuska {
69081ad8388SMartin Matuska #if defined(__INTEL_COMPILER)
69181ad8388SMartin Matuska 	return _bit_scan_reverse(n) ^ 31U;
69281ad8388SMartin Matuska 
693*b333cd44SXin LI #elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX == UINT32_MAX
694a8675d92SXin LI 	return (uint32_t)__builtin_clz(n);
69581ad8388SMartin Matuska 
69681ad8388SMartin Matuska #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
69781ad8388SMartin Matuska 	uint32_t i;
69881ad8388SMartin Matuska 	__asm__("bsrl %1, %0\n\t"
69981ad8388SMartin Matuska 		"xorl $31, %0"
70081ad8388SMartin Matuska 		: "=r" (i) : "rm" (n));
70181ad8388SMartin Matuska 	return i;
70281ad8388SMartin Matuska 
703a8675d92SXin LI #elif defined(_MSC_VER)
704a8675d92SXin LI 	unsigned long i;
705a8675d92SXin LI 	_BitScanReverse(&i, n);
70681ad8388SMartin Matuska 	return i ^ 31U;
70781ad8388SMartin Matuska 
70881ad8388SMartin Matuska #else
70981ad8388SMartin Matuska 	uint32_t i = 0;
71081ad8388SMartin Matuska 
711a8675d92SXin LI 	if ((n & 0xFFFF0000) == 0) {
71281ad8388SMartin Matuska 		n <<= 16;
71381ad8388SMartin Matuska 		i = 16;
71481ad8388SMartin Matuska 	}
71581ad8388SMartin Matuska 
716a8675d92SXin LI 	if ((n & 0xFF000000) == 0) {
71781ad8388SMartin Matuska 		n <<= 8;
71881ad8388SMartin Matuska 		i += 8;
71981ad8388SMartin Matuska 	}
72081ad8388SMartin Matuska 
721a8675d92SXin LI 	if ((n & 0xF0000000) == 0) {
72281ad8388SMartin Matuska 		n <<= 4;
72381ad8388SMartin Matuska 		i += 4;
72481ad8388SMartin Matuska 	}
72581ad8388SMartin Matuska 
726a8675d92SXin LI 	if ((n & 0xC0000000) == 0) {
72781ad8388SMartin Matuska 		n <<= 2;
72881ad8388SMartin Matuska 		i += 2;
72981ad8388SMartin Matuska 	}
73081ad8388SMartin Matuska 
731a8675d92SXin LI 	if ((n & 0x80000000) == 0)
73281ad8388SMartin Matuska 		++i;
73381ad8388SMartin Matuska 
73481ad8388SMartin Matuska 	return i;
73581ad8388SMartin Matuska #endif
73681ad8388SMartin Matuska }
73781ad8388SMartin Matuska 
73881ad8388SMartin Matuska 
73981ad8388SMartin Matuska static inline uint32_t
74081ad8388SMartin Matuska ctz32(uint32_t n)
74181ad8388SMartin Matuska {
74281ad8388SMartin Matuska #if defined(__INTEL_COMPILER)
74381ad8388SMartin Matuska 	return _bit_scan_forward(n);
74481ad8388SMartin Matuska 
745*b333cd44SXin LI #elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX >= UINT32_MAX
746a8675d92SXin LI 	return (uint32_t)__builtin_ctz(n);
74781ad8388SMartin Matuska 
74881ad8388SMartin Matuska #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
74981ad8388SMartin Matuska 	uint32_t i;
75081ad8388SMartin Matuska 	__asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
75181ad8388SMartin Matuska 	return i;
75281ad8388SMartin Matuska 
753a8675d92SXin LI #elif defined(_MSC_VER)
754a8675d92SXin LI 	unsigned long i;
755a8675d92SXin LI 	_BitScanForward(&i, n);
75681ad8388SMartin Matuska 	return i;
75781ad8388SMartin Matuska 
75881ad8388SMartin Matuska #else
75981ad8388SMartin Matuska 	uint32_t i = 0;
76081ad8388SMartin Matuska 
761a8675d92SXin LI 	if ((n & 0x0000FFFF) == 0) {
76281ad8388SMartin Matuska 		n >>= 16;
76381ad8388SMartin Matuska 		i = 16;
76481ad8388SMartin Matuska 	}
76581ad8388SMartin Matuska 
766a8675d92SXin LI 	if ((n & 0x000000FF) == 0) {
76781ad8388SMartin Matuska 		n >>= 8;
76881ad8388SMartin Matuska 		i += 8;
76981ad8388SMartin Matuska 	}
77081ad8388SMartin Matuska 
771a8675d92SXin LI 	if ((n & 0x0000000F) == 0) {
77281ad8388SMartin Matuska 		n >>= 4;
77381ad8388SMartin Matuska 		i += 4;
77481ad8388SMartin Matuska 	}
77581ad8388SMartin Matuska 
776a8675d92SXin LI 	if ((n & 0x00000003) == 0) {
77781ad8388SMartin Matuska 		n >>= 2;
77881ad8388SMartin Matuska 		i += 2;
77981ad8388SMartin Matuska 	}
78081ad8388SMartin Matuska 
781a8675d92SXin LI 	if ((n & 0x00000001) == 0)
78281ad8388SMartin Matuska 		++i;
78381ad8388SMartin Matuska 
78481ad8388SMartin Matuska 	return i;
78581ad8388SMartin Matuska #endif
78681ad8388SMartin Matuska }
78781ad8388SMartin Matuska 
78881ad8388SMartin Matuska #define bsf32 ctz32
78981ad8388SMartin Matuska 
79081ad8388SMartin Matuska #endif
791