xref: /freebsd/contrib/xz/src/common/tuklib_integer.h (revision 73ed8e77a79398eb8e7b600a0b67f286e9e5cd53)
181ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
281ad8388SMartin Matuska //
381ad8388SMartin Matuska /// \file       tuklib_integer.h
481ad8388SMartin Matuska /// \brief      Various integer and bit operations
581ad8388SMartin Matuska ///
681ad8388SMartin Matuska /// This file provides macros or functions to do some basic integer and bit
781ad8388SMartin Matuska /// operations.
881ad8388SMartin Matuska ///
9a8675d92SXin LI /// Native endian inline functions (XX = 16, 32, or 64):
10a8675d92SXin LI ///   - Unaligned native endian reads: readXXne(ptr)
11a8675d92SXin LI ///   - Unaligned native endian writes: writeXXne(ptr, num)
12a8675d92SXin LI ///   - Aligned native endian reads: aligned_readXXne(ptr)
13a8675d92SXin LI ///   - Aligned native endian writes: aligned_writeXXne(ptr, num)
14a8675d92SXin LI ///
15a8675d92SXin LI /// Endianness-converting integer operations (these can be macros!)
16a8675d92SXin LI /// (XX = 16, 32, or 64; Y = b or l):
1781ad8388SMartin Matuska ///   - Byte swapping: bswapXX(num)
18a8675d92SXin LI ///   - Byte order conversions to/from native (byteswaps if Y isn't
19a8675d92SXin LI ///     the native endianness): convXXYe(num)
20*73ed8e77SXin LI ///   - Unaligned reads: readXXYe(ptr)
21*73ed8e77SXin LI ///   - Unaligned writes: writeXXYe(ptr, num)
22a8675d92SXin LI ///   - Aligned reads: aligned_readXXYe(ptr)
23a8675d92SXin LI ///   - Aligned writes: aligned_writeXXYe(ptr, num)
2481ad8388SMartin Matuska ///
25a8675d92SXin LI /// Since the above can macros, the arguments should have no side effects
26a8675d92SXin LI /// because they may be evaluated more than once.
2781ad8388SMartin Matuska ///
28a8675d92SXin LI /// Bit scan operations for non-zero 32-bit integers (inline functions):
2981ad8388SMartin Matuska ///   - Bit scan reverse (find highest non-zero bit): bsr32(num)
3081ad8388SMartin Matuska ///   - Count leading zeros: clz32(num)
3181ad8388SMartin Matuska ///   - Count trailing zeros: ctz32(num)
3281ad8388SMartin Matuska ///   - Bit scan forward (simply an alias for ctz32()): bsf32(num)
3381ad8388SMartin Matuska ///
3481ad8388SMartin Matuska /// The above bit scan operations return 0-31. If num is zero,
3581ad8388SMartin Matuska /// the result is undefined.
3681ad8388SMartin Matuska //
3781ad8388SMartin Matuska //  Authors:    Lasse Collin
3881ad8388SMartin Matuska //              Joachim Henke
3981ad8388SMartin Matuska //
4081ad8388SMartin Matuska //  This file has been put into the public domain.
4181ad8388SMartin Matuska //  You can do whatever you want with this file.
4281ad8388SMartin Matuska //
4381ad8388SMartin Matuska ///////////////////////////////////////////////////////////////////////////////
4481ad8388SMartin Matuska 
4581ad8388SMartin Matuska #ifndef TUKLIB_INTEGER_H
4681ad8388SMartin Matuska #define TUKLIB_INTEGER_H
4781ad8388SMartin Matuska 
4881ad8388SMartin Matuska #include "tuklib_common.h"
49a8675d92SXin LI #include <string.h>
50a8675d92SXin LI 
51a8675d92SXin LI // Newer Intel C compilers require immintrin.h for _bit_scan_reverse()
52a8675d92SXin LI // and such functions.
53a8675d92SXin LI #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500)
54a8675d92SXin LI #	include <immintrin.h>
55a8675d92SXin LI #endif
5681ad8388SMartin Matuska 
5781ad8388SMartin Matuska 
58a8675d92SXin LI ///////////////////
59a8675d92SXin LI // Byte swapping //
60a8675d92SXin LI ///////////////////
6181ad8388SMartin Matuska 
62a8675d92SXin LI #if defined(HAVE___BUILTIN_BSWAPXX)
63a8675d92SXin LI 	// GCC >= 4.8 and Clang
64a8675d92SXin LI #	define bswap16(n) __builtin_bswap16(n)
65a8675d92SXin LI #	define bswap32(n) __builtin_bswap32(n)
66a8675d92SXin LI #	define bswap64(n) __builtin_bswap64(n)
67a8675d92SXin LI 
68a8675d92SXin LI #elif defined(HAVE_BYTESWAP_H)
6981ad8388SMartin Matuska 	// glibc, uClibc, dietlibc
7081ad8388SMartin Matuska #	include <byteswap.h>
7181ad8388SMartin Matuska #	ifdef HAVE_BSWAP_16
7281ad8388SMartin Matuska #		define bswap16(num) bswap_16(num)
7381ad8388SMartin Matuska #	endif
7481ad8388SMartin Matuska #	ifdef HAVE_BSWAP_32
7581ad8388SMartin Matuska #		define bswap32(num) bswap_32(num)
7681ad8388SMartin Matuska #	endif
7781ad8388SMartin Matuska #	ifdef HAVE_BSWAP_64
7881ad8388SMartin Matuska #		define bswap64(num) bswap_64(num)
7981ad8388SMartin Matuska #	endif
8081ad8388SMartin Matuska 
8181ad8388SMartin Matuska #elif defined(HAVE_SYS_ENDIAN_H)
8281ad8388SMartin Matuska 	// *BSDs and Darwin
8381ad8388SMartin Matuska #	include <sys/endian.h>
8481ad8388SMartin Matuska 
8581ad8388SMartin Matuska #elif defined(HAVE_SYS_BYTEORDER_H)
8681ad8388SMartin Matuska 	// Solaris
8781ad8388SMartin Matuska #	include <sys/byteorder.h>
8881ad8388SMartin Matuska #	ifdef BSWAP_16
8981ad8388SMartin Matuska #		define bswap16(num) BSWAP_16(num)
9081ad8388SMartin Matuska #	endif
9181ad8388SMartin Matuska #	ifdef BSWAP_32
9281ad8388SMartin Matuska #		define bswap32(num) BSWAP_32(num)
9381ad8388SMartin Matuska #	endif
9481ad8388SMartin Matuska #	ifdef BSWAP_64
9581ad8388SMartin Matuska #		define bswap64(num) BSWAP_64(num)
9681ad8388SMartin Matuska #	endif
9781ad8388SMartin Matuska #	ifdef BE_16
9881ad8388SMartin Matuska #		define conv16be(num) BE_16(num)
9981ad8388SMartin Matuska #	endif
10081ad8388SMartin Matuska #	ifdef BE_32
10181ad8388SMartin Matuska #		define conv32be(num) BE_32(num)
10281ad8388SMartin Matuska #	endif
10381ad8388SMartin Matuska #	ifdef BE_64
10481ad8388SMartin Matuska #		define conv64be(num) BE_64(num)
10581ad8388SMartin Matuska #	endif
10681ad8388SMartin Matuska #	ifdef LE_16
10781ad8388SMartin Matuska #		define conv16le(num) LE_16(num)
10881ad8388SMartin Matuska #	endif
10981ad8388SMartin Matuska #	ifdef LE_32
11081ad8388SMartin Matuska #		define conv32le(num) LE_32(num)
11181ad8388SMartin Matuska #	endif
11281ad8388SMartin Matuska #	ifdef LE_64
11381ad8388SMartin Matuska #		define conv64le(num) LE_64(num)
11481ad8388SMartin Matuska #	endif
11581ad8388SMartin Matuska #endif
11681ad8388SMartin Matuska 
11781ad8388SMartin Matuska #ifndef bswap16
118a8675d92SXin LI #	define bswap16(n) (uint16_t)( \
119a8675d92SXin LI 		  (((n) & 0x00FFU) << 8) \
120a8675d92SXin LI 		| (((n) & 0xFF00U) >> 8) \
121a8675d92SXin LI 	)
12281ad8388SMartin Matuska #endif
12381ad8388SMartin Matuska 
12481ad8388SMartin Matuska #ifndef bswap32
125a8675d92SXin LI #	define bswap32(n) (uint32_t)( \
126a8675d92SXin LI 		  (((n) & UINT32_C(0x000000FF)) << 24) \
127a8675d92SXin LI 		| (((n) & UINT32_C(0x0000FF00)) << 8) \
128a8675d92SXin LI 		| (((n) & UINT32_C(0x00FF0000)) >> 8) \
129a8675d92SXin LI 		| (((n) & UINT32_C(0xFF000000)) >> 24) \
130a8675d92SXin LI 	)
13181ad8388SMartin Matuska #endif
13281ad8388SMartin Matuska 
13381ad8388SMartin Matuska #ifndef bswap64
134a8675d92SXin LI #	define bswap64(n) (uint64_t)( \
135a8675d92SXin LI 		  (((n) & UINT64_C(0x00000000000000FF)) << 56) \
136a8675d92SXin LI 		| (((n) & UINT64_C(0x000000000000FF00)) << 40) \
137a8675d92SXin LI 		| (((n) & UINT64_C(0x0000000000FF0000)) << 24) \
138a8675d92SXin LI 		| (((n) & UINT64_C(0x00000000FF000000)) << 8) \
139a8675d92SXin LI 		| (((n) & UINT64_C(0x000000FF00000000)) >> 8) \
140a8675d92SXin LI 		| (((n) & UINT64_C(0x0000FF0000000000)) >> 24) \
141a8675d92SXin LI 		| (((n) & UINT64_C(0x00FF000000000000)) >> 40) \
142a8675d92SXin LI 		| (((n) & UINT64_C(0xFF00000000000000)) >> 56) \
143a8675d92SXin LI 	)
14481ad8388SMartin Matuska #endif
14581ad8388SMartin Matuska 
14681ad8388SMartin Matuska // Define conversion macros using the basic byte swapping macros.
14781ad8388SMartin Matuska #ifdef WORDS_BIGENDIAN
14881ad8388SMartin Matuska #	ifndef conv16be
14981ad8388SMartin Matuska #		define conv16be(num) ((uint16_t)(num))
15081ad8388SMartin Matuska #	endif
15181ad8388SMartin Matuska #	ifndef conv32be
15281ad8388SMartin Matuska #		define conv32be(num) ((uint32_t)(num))
15381ad8388SMartin Matuska #	endif
15481ad8388SMartin Matuska #	ifndef conv64be
15581ad8388SMartin Matuska #		define conv64be(num) ((uint64_t)(num))
15681ad8388SMartin Matuska #	endif
15781ad8388SMartin Matuska #	ifndef conv16le
15881ad8388SMartin Matuska #		define conv16le(num) bswap16(num)
15981ad8388SMartin Matuska #	endif
16081ad8388SMartin Matuska #	ifndef conv32le
16181ad8388SMartin Matuska #		define conv32le(num) bswap32(num)
16281ad8388SMartin Matuska #	endif
16381ad8388SMartin Matuska #	ifndef conv64le
16481ad8388SMartin Matuska #		define conv64le(num) bswap64(num)
16581ad8388SMartin Matuska #	endif
16681ad8388SMartin Matuska #else
16781ad8388SMartin Matuska #	ifndef conv16be
16881ad8388SMartin Matuska #		define conv16be(num) bswap16(num)
16981ad8388SMartin Matuska #	endif
17081ad8388SMartin Matuska #	ifndef conv32be
17181ad8388SMartin Matuska #		define conv32be(num) bswap32(num)
17281ad8388SMartin Matuska #	endif
17381ad8388SMartin Matuska #	ifndef conv64be
17481ad8388SMartin Matuska #		define conv64be(num) bswap64(num)
17581ad8388SMartin Matuska #	endif
17681ad8388SMartin Matuska #	ifndef conv16le
17781ad8388SMartin Matuska #		define conv16le(num) ((uint16_t)(num))
17881ad8388SMartin Matuska #	endif
17981ad8388SMartin Matuska #	ifndef conv32le
18081ad8388SMartin Matuska #		define conv32le(num) ((uint32_t)(num))
18181ad8388SMartin Matuska #	endif
18281ad8388SMartin Matuska #	ifndef conv64le
18381ad8388SMartin Matuska #		define conv64le(num) ((uint64_t)(num))
18481ad8388SMartin Matuska #	endif
18581ad8388SMartin Matuska #endif
18681ad8388SMartin Matuska 
18781ad8388SMartin Matuska 
188a8675d92SXin LI ////////////////////////////////
189a8675d92SXin LI // Unaligned reads and writes //
190a8675d92SXin LI ////////////////////////////////
191a8675d92SXin LI 
192a8675d92SXin LI // The traditional way of casting e.g. *(const uint16_t *)uint8_pointer
193a8675d92SXin LI // is bad even if the uint8_pointer is properly aligned because this kind
194a8675d92SXin LI // of casts break strict aliasing rules and result in undefined behavior.
195a8675d92SXin LI // With unaligned pointers it's even worse: compilers may emit vector
196a8675d92SXin LI // instructions that require aligned pointers even if non-vector
197a8675d92SXin LI // instructions work with unaligned pointers.
198a8675d92SXin LI //
199a8675d92SXin LI // Using memcpy() is the standard compliant way to do unaligned access.
200a8675d92SXin LI // Many modern compilers inline it so there is no function call overhead.
201a8675d92SXin LI // For those compilers that don't handle the memcpy() method well, the
202a8675d92SXin LI // old casting method (that violates strict aliasing) can be requested at
203a8675d92SXin LI // build time. A third method, casting to a packed struct, would also be
204a8675d92SXin LI // an option but isn't provided to keep things simpler (it's already a mess).
205a8675d92SXin LI // Hopefully this is flexible enough in practice.
20681ad8388SMartin Matuska 
20781ad8388SMartin Matuska static inline uint16_t
208a8675d92SXin LI read16ne(const uint8_t *buf)
20981ad8388SMartin Matuska {
210a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
211a8675d92SXin LI 		&& defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
212a8675d92SXin LI 	return *(const uint16_t *)buf;
213a8675d92SXin LI #else
214a8675d92SXin LI 	uint16_t num;
215a8675d92SXin LI 	memcpy(&num, buf, sizeof(num));
216a8675d92SXin LI 	return num;
217a8675d92SXin LI #endif
21881ad8388SMartin Matuska }
21981ad8388SMartin Matuska 
22081ad8388SMartin Matuska 
22181ad8388SMartin Matuska static inline uint32_t
222a8675d92SXin LI read32ne(const uint8_t *buf)
22381ad8388SMartin Matuska {
224a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
225a8675d92SXin LI 		&& defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
226a8675d92SXin LI 	return *(const uint32_t *)buf;
227a8675d92SXin LI #else
228a8675d92SXin LI 	uint32_t num;
229a8675d92SXin LI 	memcpy(&num, buf, sizeof(num));
230a8675d92SXin LI 	return num;
231a8675d92SXin LI #endif
23281ad8388SMartin Matuska }
23381ad8388SMartin Matuska 
23481ad8388SMartin Matuska 
23581ad8388SMartin Matuska static inline uint64_t
236a8675d92SXin LI read64ne(const uint8_t *buf)
23781ad8388SMartin Matuska {
238a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
239a8675d92SXin LI 		&& defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
240a8675d92SXin LI 	return *(const uint64_t *)buf;
241a8675d92SXin LI #else
242a8675d92SXin LI 	uint64_t num;
243a8675d92SXin LI 	memcpy(&num, buf, sizeof(num));
244a8675d92SXin LI 	return num;
245a8675d92SXin LI #endif
24681ad8388SMartin Matuska }
24781ad8388SMartin Matuska 
24881ad8388SMartin Matuska 
24981ad8388SMartin Matuska static inline void
25081ad8388SMartin Matuska write16ne(uint8_t *buf, uint16_t num)
25181ad8388SMartin Matuska {
252a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
253a8675d92SXin LI 		&& defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
25481ad8388SMartin Matuska 	*(uint16_t *)buf = num;
255a8675d92SXin LI #else
256a8675d92SXin LI 	memcpy(buf, &num, sizeof(num));
257a8675d92SXin LI #endif
25881ad8388SMartin Matuska 	return;
25981ad8388SMartin Matuska }
26081ad8388SMartin Matuska 
26181ad8388SMartin Matuska 
26281ad8388SMartin Matuska static inline void
26381ad8388SMartin Matuska write32ne(uint8_t *buf, uint32_t num)
26481ad8388SMartin Matuska {
265a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
266a8675d92SXin LI 		&& defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
26781ad8388SMartin Matuska 	*(uint32_t *)buf = num;
268a8675d92SXin LI #else
269a8675d92SXin LI 	memcpy(buf, &num, sizeof(num));
270a8675d92SXin LI #endif
27181ad8388SMartin Matuska 	return;
27281ad8388SMartin Matuska }
27381ad8388SMartin Matuska 
27481ad8388SMartin Matuska 
27581ad8388SMartin Matuska static inline void
27681ad8388SMartin Matuska write64ne(uint8_t *buf, uint64_t num)
27781ad8388SMartin Matuska {
278a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
279a8675d92SXin LI 		&& defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
28081ad8388SMartin Matuska 	*(uint64_t *)buf = num;
281a8675d92SXin LI #else
282a8675d92SXin LI 	memcpy(buf, &num, sizeof(num));
283a8675d92SXin LI #endif
28481ad8388SMartin Matuska 	return;
28581ad8388SMartin Matuska }
28681ad8388SMartin Matuska 
28781ad8388SMartin Matuska 
28881ad8388SMartin Matuska static inline uint16_t
289a8675d92SXin LI read16be(const uint8_t *buf)
29081ad8388SMartin Matuska {
291a8675d92SXin LI #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
292a8675d92SXin LI 	uint16_t num = read16ne(buf);
293a8675d92SXin LI 	return conv16be(num);
294a8675d92SXin LI #else
29581ad8388SMartin Matuska 	uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
29681ad8388SMartin Matuska 	return num;
297a8675d92SXin LI #endif
29881ad8388SMartin Matuska }
29981ad8388SMartin Matuska 
30081ad8388SMartin Matuska 
30181ad8388SMartin Matuska static inline uint16_t
302a8675d92SXin LI read16le(const uint8_t *buf)
30381ad8388SMartin Matuska {
304a8675d92SXin LI #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
305a8675d92SXin LI 	uint16_t num = read16ne(buf);
306a8675d92SXin LI 	return conv16le(num);
307a8675d92SXin LI #else
30881ad8388SMartin Matuska 	uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
30981ad8388SMartin Matuska 	return num;
310a8675d92SXin LI #endif
31181ad8388SMartin Matuska }
31281ad8388SMartin Matuska 
31381ad8388SMartin Matuska 
31481ad8388SMartin Matuska static inline uint32_t
315a8675d92SXin LI read32be(const uint8_t *buf)
31681ad8388SMartin Matuska {
317a8675d92SXin LI #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
318a8675d92SXin LI 	uint32_t num = read32ne(buf);
319a8675d92SXin LI 	return conv32be(num);
320a8675d92SXin LI #else
32181ad8388SMartin Matuska 	uint32_t num = (uint32_t)buf[0] << 24;
32281ad8388SMartin Matuska 	num |= (uint32_t)buf[1] << 16;
32381ad8388SMartin Matuska 	num |= (uint32_t)buf[2] << 8;
32481ad8388SMartin Matuska 	num |= (uint32_t)buf[3];
32581ad8388SMartin Matuska 	return num;
326a8675d92SXin LI #endif
32781ad8388SMartin Matuska }
32881ad8388SMartin Matuska 
32981ad8388SMartin Matuska 
33081ad8388SMartin Matuska static inline uint32_t
331a8675d92SXin LI read32le(const uint8_t *buf)
33281ad8388SMartin Matuska {
333a8675d92SXin LI #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
334a8675d92SXin LI 	uint32_t num = read32ne(buf);
335a8675d92SXin LI 	return conv32le(num);
336a8675d92SXin LI #else
33781ad8388SMartin Matuska 	uint32_t num = (uint32_t)buf[0];
33881ad8388SMartin Matuska 	num |= (uint32_t)buf[1] << 8;
33981ad8388SMartin Matuska 	num |= (uint32_t)buf[2] << 16;
34081ad8388SMartin Matuska 	num |= (uint32_t)buf[3] << 24;
34181ad8388SMartin Matuska 	return num;
342a8675d92SXin LI #endif
34381ad8388SMartin Matuska }
34481ad8388SMartin Matuska 
34581ad8388SMartin Matuska 
346*73ed8e77SXin LI static inline uint64_t
347*73ed8e77SXin LI read64be(const uint8_t *buf)
348*73ed8e77SXin LI {
349*73ed8e77SXin LI #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
350*73ed8e77SXin LI 	uint64_t num = read64ne(buf);
351*73ed8e77SXin LI 	return conv64be(num);
352*73ed8e77SXin LI #else
353*73ed8e77SXin LI 	uint64_t num = (uint64_t)buf[0] << 56;
354*73ed8e77SXin LI 	num |= (uint64_t)buf[1] << 48;
355*73ed8e77SXin LI 	num |= (uint64_t)buf[2] << 40;
356*73ed8e77SXin LI 	num |= (uint64_t)buf[3] << 32;
357*73ed8e77SXin LI 	num |= (uint64_t)buf[4] << 24;
358*73ed8e77SXin LI 	num |= (uint64_t)buf[5] << 16;
359*73ed8e77SXin LI 	num |= (uint64_t)buf[6] << 8;
360*73ed8e77SXin LI 	num |= (uint64_t)buf[7];
361*73ed8e77SXin LI 	return num;
362*73ed8e77SXin LI #endif
363*73ed8e77SXin LI }
364*73ed8e77SXin LI 
365*73ed8e77SXin LI 
366*73ed8e77SXin LI static inline uint64_t
367*73ed8e77SXin LI read64le(const uint8_t *buf)
368*73ed8e77SXin LI {
369*73ed8e77SXin LI #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
370*73ed8e77SXin LI 	uint64_t num = read64ne(buf);
371*73ed8e77SXin LI 	return conv64le(num);
372*73ed8e77SXin LI #else
373*73ed8e77SXin LI 	uint64_t num = (uint64_t)buf[0];
374*73ed8e77SXin LI 	num |= (uint64_t)buf[1] << 8;
375*73ed8e77SXin LI 	num |= (uint64_t)buf[2] << 16;
376*73ed8e77SXin LI 	num |= (uint64_t)buf[3] << 24;
377*73ed8e77SXin LI 	num |= (uint64_t)buf[4] << 32;
378*73ed8e77SXin LI 	num |= (uint64_t)buf[5] << 40;
379*73ed8e77SXin LI 	num |= (uint64_t)buf[6] << 48;
380*73ed8e77SXin LI 	num |= (uint64_t)buf[7] << 56;
381*73ed8e77SXin LI 	return num;
382*73ed8e77SXin LI #endif
383*73ed8e77SXin LI }
384*73ed8e77SXin LI 
385*73ed8e77SXin LI 
386a8675d92SXin LI // NOTE: Possible byte swapping must be done in a macro to allow the compiler
387a8675d92SXin LI // to optimize byte swapping of constants when using glibc's or *BSD's
388a8675d92SXin LI // byte swapping macros. The actual write is done in an inline function
389a8675d92SXin LI // to make type checking of the buf pointer possible.
390a8675d92SXin LI #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
391a8675d92SXin LI #	define write16be(buf, num) write16ne(buf, conv16be(num))
392a8675d92SXin LI #	define write32be(buf, num) write32ne(buf, conv32be(num))
393*73ed8e77SXin LI #	define write64be(buf, num) write64ne(buf, conv64be(num))
394a8675d92SXin LI #endif
395a8675d92SXin LI 
396a8675d92SXin LI #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
397a8675d92SXin LI #	define write16le(buf, num) write16ne(buf, conv16le(num))
398a8675d92SXin LI #	define write32le(buf, num) write32ne(buf, conv32le(num))
399*73ed8e77SXin LI #	define write64le(buf, num) write64ne(buf, conv64le(num))
400a8675d92SXin LI #endif
401a8675d92SXin LI 
402a8675d92SXin LI 
403a8675d92SXin LI #ifndef write16be
40481ad8388SMartin Matuska static inline void
405a8675d92SXin LI write16be(uint8_t *buf, uint16_t num)
40681ad8388SMartin Matuska {
407342bcb12SXin LI 	buf[0] = (uint8_t)(num >> 8);
408342bcb12SXin LI 	buf[1] = (uint8_t)num;
40981ad8388SMartin Matuska 	return;
41081ad8388SMartin Matuska }
411a8675d92SXin LI #endif
41281ad8388SMartin Matuska 
41381ad8388SMartin Matuska 
414a8675d92SXin LI #ifndef write16le
41581ad8388SMartin Matuska static inline void
416a8675d92SXin LI write16le(uint8_t *buf, uint16_t num)
41781ad8388SMartin Matuska {
418342bcb12SXin LI 	buf[0] = (uint8_t)num;
419342bcb12SXin LI 	buf[1] = (uint8_t)(num >> 8);
42081ad8388SMartin Matuska 	return;
42181ad8388SMartin Matuska }
422a8675d92SXin LI #endif
42381ad8388SMartin Matuska 
42481ad8388SMartin Matuska 
425a8675d92SXin LI #ifndef write32be
42681ad8388SMartin Matuska static inline void
427a8675d92SXin LI write32be(uint8_t *buf, uint32_t num)
42881ad8388SMartin Matuska {
429342bcb12SXin LI 	buf[0] = (uint8_t)(num >> 24);
430342bcb12SXin LI 	buf[1] = (uint8_t)(num >> 16);
431342bcb12SXin LI 	buf[2] = (uint8_t)(num >> 8);
432342bcb12SXin LI 	buf[3] = (uint8_t)num;
43381ad8388SMartin Matuska 	return;
43481ad8388SMartin Matuska }
435a8675d92SXin LI #endif
43681ad8388SMartin Matuska 
43781ad8388SMartin Matuska 
438a8675d92SXin LI #ifndef write32le
43981ad8388SMartin Matuska static inline void
440a8675d92SXin LI write32le(uint8_t *buf, uint32_t num)
44181ad8388SMartin Matuska {
442342bcb12SXin LI 	buf[0] = (uint8_t)num;
443342bcb12SXin LI 	buf[1] = (uint8_t)(num >> 8);
444342bcb12SXin LI 	buf[2] = (uint8_t)(num >> 16);
445342bcb12SXin LI 	buf[3] = (uint8_t)(num >> 24);
44681ad8388SMartin Matuska 	return;
44781ad8388SMartin Matuska }
44881ad8388SMartin Matuska #endif
44981ad8388SMartin Matuska 
45081ad8388SMartin Matuska 
451a8675d92SXin LI //////////////////////////////
452a8675d92SXin LI // Aligned reads and writes //
453a8675d92SXin LI //////////////////////////////
454a8675d92SXin LI 
455a8675d92SXin LI // Separate functions for aligned reads and writes are provided since on
456a8675d92SXin LI // strict-align archs aligned access is much faster than unaligned access.
457a8675d92SXin LI //
458a8675d92SXin LI // Just like in the unaligned case, memcpy() is needed to avoid
459a8675d92SXin LI // strict aliasing violations. However, on archs that don't support
460a8675d92SXin LI // unaligned access the compiler cannot know that the pointers given
461a8675d92SXin LI // to memcpy() are aligned which results in slow code. As of C11 there is
462a8675d92SXin LI // no standard way to tell the compiler that we know that the address is
463a8675d92SXin LI // aligned but some compilers have language extensions to do that. With
464a8675d92SXin LI // such language extensions the memcpy() method gives excellent results.
465a8675d92SXin LI //
466a8675d92SXin LI // What to do on a strict-align system when no known language extentensions
467a8675d92SXin LI // are available? Falling back to byte-by-byte access would be safe but ruin
468a8675d92SXin LI // optimizations that have been made specifically with aligned access in mind.
469a8675d92SXin LI // As a compromise, aligned reads will fall back to non-compliant type punning
470a8675d92SXin LI // but aligned writes will be byte-by-byte, that is, fast reads are preferred
471a8675d92SXin LI // over fast writes. This obviously isn't great but hopefully it's a working
472a8675d92SXin LI // compromise for now.
473a8675d92SXin LI //
474a8675d92SXin LI // __builtin_assume_aligned is support by GCC >= 4.7 and clang >= 3.6.
475a8675d92SXin LI #ifdef HAVE___BUILTIN_ASSUME_ALIGNED
476a8675d92SXin LI #	define tuklib_memcpy_aligned(dest, src, size) \
477a8675d92SXin LI 		memcpy(dest, __builtin_assume_aligned(src, size), size)
478a8675d92SXin LI #else
479a8675d92SXin LI #	define tuklib_memcpy_aligned(dest, src, size) \
480a8675d92SXin LI 		memcpy(dest, src, size)
481a8675d92SXin LI #	ifndef TUKLIB_FAST_UNALIGNED_ACCESS
482a8675d92SXin LI #		define TUKLIB_USE_UNSAFE_ALIGNED_READS 1
483a8675d92SXin LI #	endif
484a8675d92SXin LI #endif
485a8675d92SXin LI 
486a8675d92SXin LI 
487a8675d92SXin LI static inline uint16_t
488a8675d92SXin LI aligned_read16ne(const uint8_t *buf)
489a8675d92SXin LI {
490a8675d92SXin LI #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
491a8675d92SXin LI 		|| defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
492a8675d92SXin LI 	return *(const uint16_t *)buf;
493a8675d92SXin LI #else
494a8675d92SXin LI 	uint16_t num;
495a8675d92SXin LI 	tuklib_memcpy_aligned(&num, buf, sizeof(num));
496a8675d92SXin LI 	return num;
497a8675d92SXin LI #endif
498a8675d92SXin LI }
499a8675d92SXin LI 
500a8675d92SXin LI 
501a8675d92SXin LI static inline uint32_t
502a8675d92SXin LI aligned_read32ne(const uint8_t *buf)
503a8675d92SXin LI {
504a8675d92SXin LI #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
505a8675d92SXin LI 		|| defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
506a8675d92SXin LI 	return *(const uint32_t *)buf;
507a8675d92SXin LI #else
508a8675d92SXin LI 	uint32_t num;
509a8675d92SXin LI 	tuklib_memcpy_aligned(&num, buf, sizeof(num));
510a8675d92SXin LI 	return num;
511a8675d92SXin LI #endif
512a8675d92SXin LI }
513a8675d92SXin LI 
514a8675d92SXin LI 
515a8675d92SXin LI static inline uint64_t
516a8675d92SXin LI aligned_read64ne(const uint8_t *buf)
517a8675d92SXin LI {
518a8675d92SXin LI #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
519a8675d92SXin LI 		|| defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
520a8675d92SXin LI 	return *(const uint64_t *)buf;
521a8675d92SXin LI #else
522a8675d92SXin LI 	uint64_t num;
523a8675d92SXin LI 	tuklib_memcpy_aligned(&num, buf, sizeof(num));
524a8675d92SXin LI 	return num;
525a8675d92SXin LI #endif
526a8675d92SXin LI }
527a8675d92SXin LI 
528a8675d92SXin LI 
529a8675d92SXin LI static inline void
530a8675d92SXin LI aligned_write16ne(uint8_t *buf, uint16_t num)
531a8675d92SXin LI {
532a8675d92SXin LI #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
533a8675d92SXin LI 	*(uint16_t *)buf = num;
534a8675d92SXin LI #else
535a8675d92SXin LI 	tuklib_memcpy_aligned(buf, &num, sizeof(num));
536a8675d92SXin LI #endif
537a8675d92SXin LI 	return;
538a8675d92SXin LI }
539a8675d92SXin LI 
540a8675d92SXin LI 
541a8675d92SXin LI static inline void
542a8675d92SXin LI aligned_write32ne(uint8_t *buf, uint32_t num)
543a8675d92SXin LI {
544a8675d92SXin LI #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
545a8675d92SXin LI 	*(uint32_t *)buf = num;
546a8675d92SXin LI #else
547a8675d92SXin LI 	tuklib_memcpy_aligned(buf, &num, sizeof(num));
548a8675d92SXin LI #endif
549a8675d92SXin LI 	return;
550a8675d92SXin LI }
551a8675d92SXin LI 
552a8675d92SXin LI 
553a8675d92SXin LI static inline void
554a8675d92SXin LI aligned_write64ne(uint8_t *buf, uint64_t num)
555a8675d92SXin LI {
556a8675d92SXin LI #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
557a8675d92SXin LI 	*(uint64_t *)buf = num;
558a8675d92SXin LI #else
559a8675d92SXin LI 	tuklib_memcpy_aligned(buf, &num, sizeof(num));
560a8675d92SXin LI #endif
561a8675d92SXin LI 	return;
562a8675d92SXin LI }
563a8675d92SXin LI 
564a8675d92SXin LI 
565a8675d92SXin LI static inline uint16_t
566a8675d92SXin LI aligned_read16be(const uint8_t *buf)
567a8675d92SXin LI {
568a8675d92SXin LI 	uint16_t num = aligned_read16ne(buf);
569a8675d92SXin LI 	return conv16be(num);
570a8675d92SXin LI }
571a8675d92SXin LI 
572a8675d92SXin LI 
573a8675d92SXin LI static inline uint16_t
574a8675d92SXin LI aligned_read16le(const uint8_t *buf)
575a8675d92SXin LI {
576a8675d92SXin LI 	uint16_t num = aligned_read16ne(buf);
577a8675d92SXin LI 	return conv16le(num);
578a8675d92SXin LI }
579a8675d92SXin LI 
580a8675d92SXin LI 
581a8675d92SXin LI static inline uint32_t
582a8675d92SXin LI aligned_read32be(const uint8_t *buf)
583a8675d92SXin LI {
584a8675d92SXin LI 	uint32_t num = aligned_read32ne(buf);
585a8675d92SXin LI 	return conv32be(num);
586a8675d92SXin LI }
587a8675d92SXin LI 
588a8675d92SXin LI 
589a8675d92SXin LI static inline uint32_t
590a8675d92SXin LI aligned_read32le(const uint8_t *buf)
591a8675d92SXin LI {
592a8675d92SXin LI 	uint32_t num = aligned_read32ne(buf);
593a8675d92SXin LI 	return conv32le(num);
594a8675d92SXin LI }
595a8675d92SXin LI 
596a8675d92SXin LI 
597a8675d92SXin LI static inline uint64_t
598a8675d92SXin LI aligned_read64be(const uint8_t *buf)
599a8675d92SXin LI {
600a8675d92SXin LI 	uint64_t num = aligned_read64ne(buf);
601a8675d92SXin LI 	return conv64be(num);
602a8675d92SXin LI }
603a8675d92SXin LI 
604a8675d92SXin LI 
605a8675d92SXin LI static inline uint64_t
606a8675d92SXin LI aligned_read64le(const uint8_t *buf)
607a8675d92SXin LI {
608a8675d92SXin LI 	uint64_t num = aligned_read64ne(buf);
609a8675d92SXin LI 	return conv64le(num);
610a8675d92SXin LI }
611a8675d92SXin LI 
612a8675d92SXin LI 
613a8675d92SXin LI // These need to be macros like in the unaligned case.
614a8675d92SXin LI #define aligned_write16be(buf, num) aligned_write16ne((buf), conv16be(num))
615a8675d92SXin LI #define aligned_write16le(buf, num) aligned_write16ne((buf), conv16le(num))
616a8675d92SXin LI #define aligned_write32be(buf, num) aligned_write32ne((buf), conv32be(num))
617a8675d92SXin LI #define aligned_write32le(buf, num) aligned_write32ne((buf), conv32le(num))
618a8675d92SXin LI #define aligned_write64be(buf, num) aligned_write64ne((buf), conv64be(num))
619a8675d92SXin LI #define aligned_write64le(buf, num) aligned_write64ne((buf), conv64le(num))
620a8675d92SXin LI 
621a8675d92SXin LI 
622a8675d92SXin LI ////////////////////
623a8675d92SXin LI // Bit operations //
624a8675d92SXin LI ////////////////////
625a8675d92SXin LI 
62681ad8388SMartin Matuska static inline uint32_t
62781ad8388SMartin Matuska bsr32(uint32_t n)
62881ad8388SMartin Matuska {
62981ad8388SMartin Matuska 	// Check for ICC first, since it tends to define __GNUC__ too.
63081ad8388SMartin Matuska #if defined(__INTEL_COMPILER)
63181ad8388SMartin Matuska 	return _bit_scan_reverse(n);
63281ad8388SMartin Matuska 
63381ad8388SMartin Matuska #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
63481ad8388SMartin Matuska 	// GCC >= 3.4 has __builtin_clz(), which gives good results on
63581ad8388SMartin Matuska 	// multiple architectures. On x86, __builtin_clz() ^ 31U becomes
63681ad8388SMartin Matuska 	// either plain BSR (so the XOR gets optimized away) or LZCNT and
63781ad8388SMartin Matuska 	// XOR (if -march indicates that SSE4a instructions are supported).
638a8675d92SXin LI 	return (uint32_t)__builtin_clz(n) ^ 31U;
63981ad8388SMartin Matuska 
64081ad8388SMartin Matuska #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
64181ad8388SMartin Matuska 	uint32_t i;
64281ad8388SMartin Matuska 	__asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
64381ad8388SMartin Matuska 	return i;
64481ad8388SMartin Matuska 
645a8675d92SXin LI #elif defined(_MSC_VER)
646a8675d92SXin LI 	unsigned long i;
647a8675d92SXin LI 	_BitScanReverse(&i, n);
64881ad8388SMartin Matuska 	return i;
64981ad8388SMartin Matuska 
65081ad8388SMartin Matuska #else
65181ad8388SMartin Matuska 	uint32_t i = 31;
65281ad8388SMartin Matuska 
653a8675d92SXin LI 	if ((n & 0xFFFF0000) == 0) {
65481ad8388SMartin Matuska 		n <<= 16;
65581ad8388SMartin Matuska 		i = 15;
65681ad8388SMartin Matuska 	}
65781ad8388SMartin Matuska 
658a8675d92SXin LI 	if ((n & 0xFF000000) == 0) {
65981ad8388SMartin Matuska 		n <<= 8;
66081ad8388SMartin Matuska 		i -= 8;
66181ad8388SMartin Matuska 	}
66281ad8388SMartin Matuska 
663a8675d92SXin LI 	if ((n & 0xF0000000) == 0) {
66481ad8388SMartin Matuska 		n <<= 4;
66581ad8388SMartin Matuska 		i -= 4;
66681ad8388SMartin Matuska 	}
66781ad8388SMartin Matuska 
668a8675d92SXin LI 	if ((n & 0xC0000000) == 0) {
66981ad8388SMartin Matuska 		n <<= 2;
67081ad8388SMartin Matuska 		i -= 2;
67181ad8388SMartin Matuska 	}
67281ad8388SMartin Matuska 
673a8675d92SXin LI 	if ((n & 0x80000000) == 0)
67481ad8388SMartin Matuska 		--i;
67581ad8388SMartin Matuska 
67681ad8388SMartin Matuska 	return i;
67781ad8388SMartin Matuska #endif
67881ad8388SMartin Matuska }
67981ad8388SMartin Matuska 
68081ad8388SMartin Matuska 
68181ad8388SMartin Matuska static inline uint32_t
68281ad8388SMartin Matuska clz32(uint32_t n)
68381ad8388SMartin Matuska {
68481ad8388SMartin Matuska #if defined(__INTEL_COMPILER)
68581ad8388SMartin Matuska 	return _bit_scan_reverse(n) ^ 31U;
68681ad8388SMartin Matuska 
68781ad8388SMartin Matuska #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
688a8675d92SXin LI 	return (uint32_t)__builtin_clz(n);
68981ad8388SMartin Matuska 
69081ad8388SMartin Matuska #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
69181ad8388SMartin Matuska 	uint32_t i;
69281ad8388SMartin Matuska 	__asm__("bsrl %1, %0\n\t"
69381ad8388SMartin Matuska 		"xorl $31, %0"
69481ad8388SMartin Matuska 		: "=r" (i) : "rm" (n));
69581ad8388SMartin Matuska 	return i;
69681ad8388SMartin Matuska 
697a8675d92SXin LI #elif defined(_MSC_VER)
698a8675d92SXin LI 	unsigned long i;
699a8675d92SXin LI 	_BitScanReverse(&i, n);
70081ad8388SMartin Matuska 	return i ^ 31U;
70181ad8388SMartin Matuska 
70281ad8388SMartin Matuska #else
70381ad8388SMartin Matuska 	uint32_t i = 0;
70481ad8388SMartin Matuska 
705a8675d92SXin LI 	if ((n & 0xFFFF0000) == 0) {
70681ad8388SMartin Matuska 		n <<= 16;
70781ad8388SMartin Matuska 		i = 16;
70881ad8388SMartin Matuska 	}
70981ad8388SMartin Matuska 
710a8675d92SXin LI 	if ((n & 0xFF000000) == 0) {
71181ad8388SMartin Matuska 		n <<= 8;
71281ad8388SMartin Matuska 		i += 8;
71381ad8388SMartin Matuska 	}
71481ad8388SMartin Matuska 
715a8675d92SXin LI 	if ((n & 0xF0000000) == 0) {
71681ad8388SMartin Matuska 		n <<= 4;
71781ad8388SMartin Matuska 		i += 4;
71881ad8388SMartin Matuska 	}
71981ad8388SMartin Matuska 
720a8675d92SXin LI 	if ((n & 0xC0000000) == 0) {
72181ad8388SMartin Matuska 		n <<= 2;
72281ad8388SMartin Matuska 		i += 2;
72381ad8388SMartin Matuska 	}
72481ad8388SMartin Matuska 
725a8675d92SXin LI 	if ((n & 0x80000000) == 0)
72681ad8388SMartin Matuska 		++i;
72781ad8388SMartin Matuska 
72881ad8388SMartin Matuska 	return i;
72981ad8388SMartin Matuska #endif
73081ad8388SMartin Matuska }
73181ad8388SMartin Matuska 
73281ad8388SMartin Matuska 
73381ad8388SMartin Matuska static inline uint32_t
73481ad8388SMartin Matuska ctz32(uint32_t n)
73581ad8388SMartin Matuska {
73681ad8388SMartin Matuska #if defined(__INTEL_COMPILER)
73781ad8388SMartin Matuska 	return _bit_scan_forward(n);
73881ad8388SMartin Matuska 
73981ad8388SMartin Matuska #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX
740a8675d92SXin LI 	return (uint32_t)__builtin_ctz(n);
74181ad8388SMartin Matuska 
74281ad8388SMartin Matuska #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
74381ad8388SMartin Matuska 	uint32_t i;
74481ad8388SMartin Matuska 	__asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
74581ad8388SMartin Matuska 	return i;
74681ad8388SMartin Matuska 
747a8675d92SXin LI #elif defined(_MSC_VER)
748a8675d92SXin LI 	unsigned long i;
749a8675d92SXin LI 	_BitScanForward(&i, n);
75081ad8388SMartin Matuska 	return i;
75181ad8388SMartin Matuska 
75281ad8388SMartin Matuska #else
75381ad8388SMartin Matuska 	uint32_t i = 0;
75481ad8388SMartin Matuska 
755a8675d92SXin LI 	if ((n & 0x0000FFFF) == 0) {
75681ad8388SMartin Matuska 		n >>= 16;
75781ad8388SMartin Matuska 		i = 16;
75881ad8388SMartin Matuska 	}
75981ad8388SMartin Matuska 
760a8675d92SXin LI 	if ((n & 0x000000FF) == 0) {
76181ad8388SMartin Matuska 		n >>= 8;
76281ad8388SMartin Matuska 		i += 8;
76381ad8388SMartin Matuska 	}
76481ad8388SMartin Matuska 
765a8675d92SXin LI 	if ((n & 0x0000000F) == 0) {
76681ad8388SMartin Matuska 		n >>= 4;
76781ad8388SMartin Matuska 		i += 4;
76881ad8388SMartin Matuska 	}
76981ad8388SMartin Matuska 
770a8675d92SXin LI 	if ((n & 0x00000003) == 0) {
77181ad8388SMartin Matuska 		n >>= 2;
77281ad8388SMartin Matuska 		i += 2;
77381ad8388SMartin Matuska 	}
77481ad8388SMartin Matuska 
775a8675d92SXin LI 	if ((n & 0x00000001) == 0)
77681ad8388SMartin Matuska 		++i;
77781ad8388SMartin Matuska 
77881ad8388SMartin Matuska 	return i;
77981ad8388SMartin Matuska #endif
78081ad8388SMartin Matuska }
78181ad8388SMartin Matuska 
78281ad8388SMartin Matuska #define bsf32 ctz32
78381ad8388SMartin Matuska 
78481ad8388SMartin Matuska #endif
785