181ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 281ad8388SMartin Matuska // 381ad8388SMartin Matuska /// \file tuklib_integer.h 481ad8388SMartin Matuska /// \brief Various integer and bit operations 581ad8388SMartin Matuska /// 681ad8388SMartin Matuska /// This file provides macros or functions to do some basic integer and bit 781ad8388SMartin Matuska /// operations. 881ad8388SMartin Matuska /// 9a8675d92SXin LI /// Native endian inline functions (XX = 16, 32, or 64): 10a8675d92SXin LI /// - Unaligned native endian reads: readXXne(ptr) 11a8675d92SXin LI /// - Unaligned native endian writes: writeXXne(ptr, num) 12a8675d92SXin LI /// - Aligned native endian reads: aligned_readXXne(ptr) 13a8675d92SXin LI /// - Aligned native endian writes: aligned_writeXXne(ptr, num) 14a8675d92SXin LI /// 15a8675d92SXin LI /// Endianness-converting integer operations (these can be macros!) 16a8675d92SXin LI /// (XX = 16, 32, or 64; Y = b or l): 1781ad8388SMartin Matuska /// - Byte swapping: bswapXX(num) 18a8675d92SXin LI /// - Byte order conversions to/from native (byteswaps if Y isn't 19a8675d92SXin LI /// the native endianness): convXXYe(num) 2073ed8e77SXin LI /// - Unaligned reads: readXXYe(ptr) 2173ed8e77SXin LI /// - Unaligned writes: writeXXYe(ptr, num) 22a8675d92SXin LI /// - Aligned reads: aligned_readXXYe(ptr) 23a8675d92SXin LI /// - Aligned writes: aligned_writeXXYe(ptr, num) 2481ad8388SMartin Matuska /// 25a8675d92SXin LI /// Since the above can macros, the arguments should have no side effects 26a8675d92SXin LI /// because they may be evaluated more than once. 2781ad8388SMartin Matuska /// 28a8675d92SXin LI /// Bit scan operations for non-zero 32-bit integers (inline functions): 2981ad8388SMartin Matuska /// - Bit scan reverse (find highest non-zero bit): bsr32(num) 3081ad8388SMartin Matuska /// - Count leading zeros: clz32(num) 3181ad8388SMartin Matuska /// - Count trailing zeros: ctz32(num) 3281ad8388SMartin Matuska /// - Bit scan forward (simply an alias for ctz32()): bsf32(num) 3381ad8388SMartin Matuska /// 3481ad8388SMartin Matuska /// The above bit scan operations return 0-31. If num is zero, 3581ad8388SMartin Matuska /// the result is undefined. 3681ad8388SMartin Matuska // 3781ad8388SMartin Matuska // Authors: Lasse Collin 3881ad8388SMartin Matuska // Joachim Henke 3981ad8388SMartin Matuska // 4081ad8388SMartin Matuska // This file has been put into the public domain. 4181ad8388SMartin Matuska // You can do whatever you want with this file. 4281ad8388SMartin Matuska // 4381ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 4481ad8388SMartin Matuska 4581ad8388SMartin Matuska #ifndef TUKLIB_INTEGER_H 4681ad8388SMartin Matuska #define TUKLIB_INTEGER_H 4781ad8388SMartin Matuska 4881ad8388SMartin Matuska #include "tuklib_common.h" 49a8675d92SXin LI #include <string.h> 50a8675d92SXin LI 51a8675d92SXin LI // Newer Intel C compilers require immintrin.h for _bit_scan_reverse() 52a8675d92SXin LI // and such functions. 53a8675d92SXin LI #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500) 54a8675d92SXin LI # include <immintrin.h> 55*b333cd44SXin LI // Only include <intrin.h> when it is needed. GCC and Clang can both 56*b333cd44SXin LI // use __builtin's, so we only need Windows instrincs when using MSVC. 57*b333cd44SXin LI // GCC and Clang can set _MSC_VER on Windows, so we need to exclude these 58*b333cd44SXin LI // cases explicitly. 59*b333cd44SXin LI #elif defined(_MSC_VER) && !TUKLIB_GNUC_REQ(3, 4) && !defined(__clang__) 60*b333cd44SXin LI # include <intrin.h> 61a8675d92SXin LI #endif 6281ad8388SMartin Matuska 6381ad8388SMartin Matuska 64a8675d92SXin LI /////////////////// 65a8675d92SXin LI // Byte swapping // 66a8675d92SXin LI /////////////////// 6781ad8388SMartin Matuska 68a8675d92SXin LI #if defined(HAVE___BUILTIN_BSWAPXX) 69a8675d92SXin LI // GCC >= 4.8 and Clang 70a8675d92SXin LI # define bswap16(n) __builtin_bswap16(n) 71a8675d92SXin LI # define bswap32(n) __builtin_bswap32(n) 72a8675d92SXin LI # define bswap64(n) __builtin_bswap64(n) 73a8675d92SXin LI 74a8675d92SXin LI #elif defined(HAVE_BYTESWAP_H) 7581ad8388SMartin Matuska // glibc, uClibc, dietlibc 7681ad8388SMartin Matuska # include <byteswap.h> 7781ad8388SMartin Matuska # ifdef HAVE_BSWAP_16 7881ad8388SMartin Matuska # define bswap16(num) bswap_16(num) 7981ad8388SMartin Matuska # endif 8081ad8388SMartin Matuska # ifdef HAVE_BSWAP_32 8181ad8388SMartin Matuska # define bswap32(num) bswap_32(num) 8281ad8388SMartin Matuska # endif 8381ad8388SMartin Matuska # ifdef HAVE_BSWAP_64 8481ad8388SMartin Matuska # define bswap64(num) bswap_64(num) 8581ad8388SMartin Matuska # endif 8681ad8388SMartin Matuska 8781ad8388SMartin Matuska #elif defined(HAVE_SYS_ENDIAN_H) 8881ad8388SMartin Matuska // *BSDs and Darwin 8981ad8388SMartin Matuska # include <sys/endian.h> 9081ad8388SMartin Matuska 9181ad8388SMartin Matuska #elif defined(HAVE_SYS_BYTEORDER_H) 9281ad8388SMartin Matuska // Solaris 9381ad8388SMartin Matuska # include <sys/byteorder.h> 9481ad8388SMartin Matuska # ifdef BSWAP_16 9581ad8388SMartin Matuska # define bswap16(num) BSWAP_16(num) 9681ad8388SMartin Matuska # endif 9781ad8388SMartin Matuska # ifdef BSWAP_32 9881ad8388SMartin Matuska # define bswap32(num) BSWAP_32(num) 9981ad8388SMartin Matuska # endif 10081ad8388SMartin Matuska # ifdef BSWAP_64 10181ad8388SMartin Matuska # define bswap64(num) BSWAP_64(num) 10281ad8388SMartin Matuska # endif 10381ad8388SMartin Matuska # ifdef BE_16 10481ad8388SMartin Matuska # define conv16be(num) BE_16(num) 10581ad8388SMartin Matuska # endif 10681ad8388SMartin Matuska # ifdef BE_32 10781ad8388SMartin Matuska # define conv32be(num) BE_32(num) 10881ad8388SMartin Matuska # endif 10981ad8388SMartin Matuska # ifdef BE_64 11081ad8388SMartin Matuska # define conv64be(num) BE_64(num) 11181ad8388SMartin Matuska # endif 11281ad8388SMartin Matuska # ifdef LE_16 11381ad8388SMartin Matuska # define conv16le(num) LE_16(num) 11481ad8388SMartin Matuska # endif 11581ad8388SMartin Matuska # ifdef LE_32 11681ad8388SMartin Matuska # define conv32le(num) LE_32(num) 11781ad8388SMartin Matuska # endif 11881ad8388SMartin Matuska # ifdef LE_64 11981ad8388SMartin Matuska # define conv64le(num) LE_64(num) 12081ad8388SMartin Matuska # endif 12181ad8388SMartin Matuska #endif 12281ad8388SMartin Matuska 12381ad8388SMartin Matuska #ifndef bswap16 124a8675d92SXin LI # define bswap16(n) (uint16_t)( \ 125a8675d92SXin LI (((n) & 0x00FFU) << 8) \ 126a8675d92SXin LI | (((n) & 0xFF00U) >> 8) \ 127a8675d92SXin LI ) 12881ad8388SMartin Matuska #endif 12981ad8388SMartin Matuska 13081ad8388SMartin Matuska #ifndef bswap32 131a8675d92SXin LI # define bswap32(n) (uint32_t)( \ 132a8675d92SXin LI (((n) & UINT32_C(0x000000FF)) << 24) \ 133a8675d92SXin LI | (((n) & UINT32_C(0x0000FF00)) << 8) \ 134a8675d92SXin LI | (((n) & UINT32_C(0x00FF0000)) >> 8) \ 135a8675d92SXin LI | (((n) & UINT32_C(0xFF000000)) >> 24) \ 136a8675d92SXin LI ) 13781ad8388SMartin Matuska #endif 13881ad8388SMartin Matuska 13981ad8388SMartin Matuska #ifndef bswap64 140a8675d92SXin LI # define bswap64(n) (uint64_t)( \ 141a8675d92SXin LI (((n) & UINT64_C(0x00000000000000FF)) << 56) \ 142a8675d92SXin LI | (((n) & UINT64_C(0x000000000000FF00)) << 40) \ 143a8675d92SXin LI | (((n) & UINT64_C(0x0000000000FF0000)) << 24) \ 144a8675d92SXin LI | (((n) & UINT64_C(0x00000000FF000000)) << 8) \ 145a8675d92SXin LI | (((n) & UINT64_C(0x000000FF00000000)) >> 8) \ 146a8675d92SXin LI | (((n) & UINT64_C(0x0000FF0000000000)) >> 24) \ 147a8675d92SXin LI | (((n) & UINT64_C(0x00FF000000000000)) >> 40) \ 148a8675d92SXin LI | (((n) & UINT64_C(0xFF00000000000000)) >> 56) \ 149a8675d92SXin LI ) 15081ad8388SMartin Matuska #endif 15181ad8388SMartin Matuska 15281ad8388SMartin Matuska // Define conversion macros using the basic byte swapping macros. 15381ad8388SMartin Matuska #ifdef WORDS_BIGENDIAN 15481ad8388SMartin Matuska # ifndef conv16be 15581ad8388SMartin Matuska # define conv16be(num) ((uint16_t)(num)) 15681ad8388SMartin Matuska # endif 15781ad8388SMartin Matuska # ifndef conv32be 15881ad8388SMartin Matuska # define conv32be(num) ((uint32_t)(num)) 15981ad8388SMartin Matuska # endif 16081ad8388SMartin Matuska # ifndef conv64be 16181ad8388SMartin Matuska # define conv64be(num) ((uint64_t)(num)) 16281ad8388SMartin Matuska # endif 16381ad8388SMartin Matuska # ifndef conv16le 16481ad8388SMartin Matuska # define conv16le(num) bswap16(num) 16581ad8388SMartin Matuska # endif 16681ad8388SMartin Matuska # ifndef conv32le 16781ad8388SMartin Matuska # define conv32le(num) bswap32(num) 16881ad8388SMartin Matuska # endif 16981ad8388SMartin Matuska # ifndef conv64le 17081ad8388SMartin Matuska # define conv64le(num) bswap64(num) 17181ad8388SMartin Matuska # endif 17281ad8388SMartin Matuska #else 17381ad8388SMartin Matuska # ifndef conv16be 17481ad8388SMartin Matuska # define conv16be(num) bswap16(num) 17581ad8388SMartin Matuska # endif 17681ad8388SMartin Matuska # ifndef conv32be 17781ad8388SMartin Matuska # define conv32be(num) bswap32(num) 17881ad8388SMartin Matuska # endif 17981ad8388SMartin Matuska # ifndef conv64be 18081ad8388SMartin Matuska # define conv64be(num) bswap64(num) 18181ad8388SMartin Matuska # endif 18281ad8388SMartin Matuska # ifndef conv16le 18381ad8388SMartin Matuska # define conv16le(num) ((uint16_t)(num)) 18481ad8388SMartin Matuska # endif 18581ad8388SMartin Matuska # ifndef conv32le 18681ad8388SMartin Matuska # define conv32le(num) ((uint32_t)(num)) 18781ad8388SMartin Matuska # endif 18881ad8388SMartin Matuska # ifndef conv64le 18981ad8388SMartin Matuska # define conv64le(num) ((uint64_t)(num)) 19081ad8388SMartin Matuska # endif 19181ad8388SMartin Matuska #endif 19281ad8388SMartin Matuska 19381ad8388SMartin Matuska 194a8675d92SXin LI //////////////////////////////// 195a8675d92SXin LI // Unaligned reads and writes // 196a8675d92SXin LI //////////////////////////////// 197a8675d92SXin LI 198a8675d92SXin LI // The traditional way of casting e.g. *(const uint16_t *)uint8_pointer 199a8675d92SXin LI // is bad even if the uint8_pointer is properly aligned because this kind 200a8675d92SXin LI // of casts break strict aliasing rules and result in undefined behavior. 201a8675d92SXin LI // With unaligned pointers it's even worse: compilers may emit vector 202a8675d92SXin LI // instructions that require aligned pointers even if non-vector 203a8675d92SXin LI // instructions work with unaligned pointers. 204a8675d92SXin LI // 205a8675d92SXin LI // Using memcpy() is the standard compliant way to do unaligned access. 206a8675d92SXin LI // Many modern compilers inline it so there is no function call overhead. 207a8675d92SXin LI // For those compilers that don't handle the memcpy() method well, the 208a8675d92SXin LI // old casting method (that violates strict aliasing) can be requested at 209a8675d92SXin LI // build time. A third method, casting to a packed struct, would also be 210a8675d92SXin LI // an option but isn't provided to keep things simpler (it's already a mess). 211a8675d92SXin LI // Hopefully this is flexible enough in practice. 21281ad8388SMartin Matuska 21381ad8388SMartin Matuska static inline uint16_t 214a8675d92SXin LI read16ne(const uint8_t *buf) 21581ad8388SMartin Matuska { 216a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 217a8675d92SXin LI && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 218a8675d92SXin LI return *(const uint16_t *)buf; 219a8675d92SXin LI #else 220a8675d92SXin LI uint16_t num; 221a8675d92SXin LI memcpy(&num, buf, sizeof(num)); 222a8675d92SXin LI return num; 223a8675d92SXin LI #endif 22481ad8388SMartin Matuska } 22581ad8388SMartin Matuska 22681ad8388SMartin Matuska 22781ad8388SMartin Matuska static inline uint32_t 228a8675d92SXin LI read32ne(const uint8_t *buf) 22981ad8388SMartin Matuska { 230a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 231a8675d92SXin LI && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 232a8675d92SXin LI return *(const uint32_t *)buf; 233a8675d92SXin LI #else 234a8675d92SXin LI uint32_t num; 235a8675d92SXin LI memcpy(&num, buf, sizeof(num)); 236a8675d92SXin LI return num; 237a8675d92SXin LI #endif 23881ad8388SMartin Matuska } 23981ad8388SMartin Matuska 24081ad8388SMartin Matuska 24181ad8388SMartin Matuska static inline uint64_t 242a8675d92SXin LI read64ne(const uint8_t *buf) 24381ad8388SMartin Matuska { 244a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 245a8675d92SXin LI && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 246a8675d92SXin LI return *(const uint64_t *)buf; 247a8675d92SXin LI #else 248a8675d92SXin LI uint64_t num; 249a8675d92SXin LI memcpy(&num, buf, sizeof(num)); 250a8675d92SXin LI return num; 251a8675d92SXin LI #endif 25281ad8388SMartin Matuska } 25381ad8388SMartin Matuska 25481ad8388SMartin Matuska 25581ad8388SMartin Matuska static inline void 25681ad8388SMartin Matuska write16ne(uint8_t *buf, uint16_t num) 25781ad8388SMartin Matuska { 258a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 259a8675d92SXin LI && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 26081ad8388SMartin Matuska *(uint16_t *)buf = num; 261a8675d92SXin LI #else 262a8675d92SXin LI memcpy(buf, &num, sizeof(num)); 263a8675d92SXin LI #endif 26481ad8388SMartin Matuska return; 26581ad8388SMartin Matuska } 26681ad8388SMartin Matuska 26781ad8388SMartin Matuska 26881ad8388SMartin Matuska static inline void 26981ad8388SMartin Matuska write32ne(uint8_t *buf, uint32_t num) 27081ad8388SMartin Matuska { 271a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 272a8675d92SXin LI && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 27381ad8388SMartin Matuska *(uint32_t *)buf = num; 274a8675d92SXin LI #else 275a8675d92SXin LI memcpy(buf, &num, sizeof(num)); 276a8675d92SXin LI #endif 27781ad8388SMartin Matuska return; 27881ad8388SMartin Matuska } 27981ad8388SMartin Matuska 28081ad8388SMartin Matuska 28181ad8388SMartin Matuska static inline void 28281ad8388SMartin Matuska write64ne(uint8_t *buf, uint64_t num) 28381ad8388SMartin Matuska { 284a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 285a8675d92SXin LI && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 28681ad8388SMartin Matuska *(uint64_t *)buf = num; 287a8675d92SXin LI #else 288a8675d92SXin LI memcpy(buf, &num, sizeof(num)); 289a8675d92SXin LI #endif 29081ad8388SMartin Matuska return; 29181ad8388SMartin Matuska } 29281ad8388SMartin Matuska 29381ad8388SMartin Matuska 29481ad8388SMartin Matuska static inline uint16_t 295a8675d92SXin LI read16be(const uint8_t *buf) 29681ad8388SMartin Matuska { 297a8675d92SXin LI #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 298a8675d92SXin LI uint16_t num = read16ne(buf); 299a8675d92SXin LI return conv16be(num); 300a8675d92SXin LI #else 30181ad8388SMartin Matuska uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1]; 30281ad8388SMartin Matuska return num; 303a8675d92SXin LI #endif 30481ad8388SMartin Matuska } 30581ad8388SMartin Matuska 30681ad8388SMartin Matuska 30781ad8388SMartin Matuska static inline uint16_t 308a8675d92SXin LI read16le(const uint8_t *buf) 30981ad8388SMartin Matuska { 310a8675d92SXin LI #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 311a8675d92SXin LI uint16_t num = read16ne(buf); 312a8675d92SXin LI return conv16le(num); 313a8675d92SXin LI #else 31481ad8388SMartin Matuska uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8); 31581ad8388SMartin Matuska return num; 316a8675d92SXin LI #endif 31781ad8388SMartin Matuska } 31881ad8388SMartin Matuska 31981ad8388SMartin Matuska 32081ad8388SMartin Matuska static inline uint32_t 321a8675d92SXin LI read32be(const uint8_t *buf) 32281ad8388SMartin Matuska { 323a8675d92SXin LI #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 324a8675d92SXin LI uint32_t num = read32ne(buf); 325a8675d92SXin LI return conv32be(num); 326a8675d92SXin LI #else 32781ad8388SMartin Matuska uint32_t num = (uint32_t)buf[0] << 24; 32881ad8388SMartin Matuska num |= (uint32_t)buf[1] << 16; 32981ad8388SMartin Matuska num |= (uint32_t)buf[2] << 8; 33081ad8388SMartin Matuska num |= (uint32_t)buf[3]; 33181ad8388SMartin Matuska return num; 332a8675d92SXin LI #endif 33381ad8388SMartin Matuska } 33481ad8388SMartin Matuska 33581ad8388SMartin Matuska 33681ad8388SMartin Matuska static inline uint32_t 337a8675d92SXin LI read32le(const uint8_t *buf) 33881ad8388SMartin Matuska { 339a8675d92SXin LI #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 340a8675d92SXin LI uint32_t num = read32ne(buf); 341a8675d92SXin LI return conv32le(num); 342a8675d92SXin LI #else 34381ad8388SMartin Matuska uint32_t num = (uint32_t)buf[0]; 34481ad8388SMartin Matuska num |= (uint32_t)buf[1] << 8; 34581ad8388SMartin Matuska num |= (uint32_t)buf[2] << 16; 34681ad8388SMartin Matuska num |= (uint32_t)buf[3] << 24; 34781ad8388SMartin Matuska return num; 348a8675d92SXin LI #endif 34981ad8388SMartin Matuska } 35081ad8388SMartin Matuska 35181ad8388SMartin Matuska 35273ed8e77SXin LI static inline uint64_t 35373ed8e77SXin LI read64be(const uint8_t *buf) 35473ed8e77SXin LI { 35573ed8e77SXin LI #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 35673ed8e77SXin LI uint64_t num = read64ne(buf); 35773ed8e77SXin LI return conv64be(num); 35873ed8e77SXin LI #else 35973ed8e77SXin LI uint64_t num = (uint64_t)buf[0] << 56; 36073ed8e77SXin LI num |= (uint64_t)buf[1] << 48; 36173ed8e77SXin LI num |= (uint64_t)buf[2] << 40; 36273ed8e77SXin LI num |= (uint64_t)buf[3] << 32; 36373ed8e77SXin LI num |= (uint64_t)buf[4] << 24; 36473ed8e77SXin LI num |= (uint64_t)buf[5] << 16; 36573ed8e77SXin LI num |= (uint64_t)buf[6] << 8; 36673ed8e77SXin LI num |= (uint64_t)buf[7]; 36773ed8e77SXin LI return num; 36873ed8e77SXin LI #endif 36973ed8e77SXin LI } 37073ed8e77SXin LI 37173ed8e77SXin LI 37273ed8e77SXin LI static inline uint64_t 37373ed8e77SXin LI read64le(const uint8_t *buf) 37473ed8e77SXin LI { 37573ed8e77SXin LI #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 37673ed8e77SXin LI uint64_t num = read64ne(buf); 37773ed8e77SXin LI return conv64le(num); 37873ed8e77SXin LI #else 37973ed8e77SXin LI uint64_t num = (uint64_t)buf[0]; 38073ed8e77SXin LI num |= (uint64_t)buf[1] << 8; 38173ed8e77SXin LI num |= (uint64_t)buf[2] << 16; 38273ed8e77SXin LI num |= (uint64_t)buf[3] << 24; 38373ed8e77SXin LI num |= (uint64_t)buf[4] << 32; 38473ed8e77SXin LI num |= (uint64_t)buf[5] << 40; 38573ed8e77SXin LI num |= (uint64_t)buf[6] << 48; 38673ed8e77SXin LI num |= (uint64_t)buf[7] << 56; 38773ed8e77SXin LI return num; 38873ed8e77SXin LI #endif 38973ed8e77SXin LI } 39073ed8e77SXin LI 39173ed8e77SXin LI 392a8675d92SXin LI // NOTE: Possible byte swapping must be done in a macro to allow the compiler 393a8675d92SXin LI // to optimize byte swapping of constants when using glibc's or *BSD's 394a8675d92SXin LI // byte swapping macros. The actual write is done in an inline function 395a8675d92SXin LI // to make type checking of the buf pointer possible. 396a8675d92SXin LI #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 397a8675d92SXin LI # define write16be(buf, num) write16ne(buf, conv16be(num)) 398a8675d92SXin LI # define write32be(buf, num) write32ne(buf, conv32be(num)) 39973ed8e77SXin LI # define write64be(buf, num) write64ne(buf, conv64be(num)) 400a8675d92SXin LI #endif 401a8675d92SXin LI 402a8675d92SXin LI #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 403a8675d92SXin LI # define write16le(buf, num) write16ne(buf, conv16le(num)) 404a8675d92SXin LI # define write32le(buf, num) write32ne(buf, conv32le(num)) 40573ed8e77SXin LI # define write64le(buf, num) write64ne(buf, conv64le(num)) 406a8675d92SXin LI #endif 407a8675d92SXin LI 408a8675d92SXin LI 409a8675d92SXin LI #ifndef write16be 41081ad8388SMartin Matuska static inline void 411a8675d92SXin LI write16be(uint8_t *buf, uint16_t num) 41281ad8388SMartin Matuska { 413342bcb12SXin LI buf[0] = (uint8_t)(num >> 8); 414342bcb12SXin LI buf[1] = (uint8_t)num; 41581ad8388SMartin Matuska return; 41681ad8388SMartin Matuska } 417a8675d92SXin LI #endif 41881ad8388SMartin Matuska 41981ad8388SMartin Matuska 420a8675d92SXin LI #ifndef write16le 42181ad8388SMartin Matuska static inline void 422a8675d92SXin LI write16le(uint8_t *buf, uint16_t num) 42381ad8388SMartin Matuska { 424342bcb12SXin LI buf[0] = (uint8_t)num; 425342bcb12SXin LI buf[1] = (uint8_t)(num >> 8); 42681ad8388SMartin Matuska return; 42781ad8388SMartin Matuska } 428a8675d92SXin LI #endif 42981ad8388SMartin Matuska 43081ad8388SMartin Matuska 431a8675d92SXin LI #ifndef write32be 43281ad8388SMartin Matuska static inline void 433a8675d92SXin LI write32be(uint8_t *buf, uint32_t num) 43481ad8388SMartin Matuska { 435342bcb12SXin LI buf[0] = (uint8_t)(num >> 24); 436342bcb12SXin LI buf[1] = (uint8_t)(num >> 16); 437342bcb12SXin LI buf[2] = (uint8_t)(num >> 8); 438342bcb12SXin LI buf[3] = (uint8_t)num; 43981ad8388SMartin Matuska return; 44081ad8388SMartin Matuska } 441a8675d92SXin LI #endif 44281ad8388SMartin Matuska 44381ad8388SMartin Matuska 444a8675d92SXin LI #ifndef write32le 44581ad8388SMartin Matuska static inline void 446a8675d92SXin LI write32le(uint8_t *buf, uint32_t num) 44781ad8388SMartin Matuska { 448342bcb12SXin LI buf[0] = (uint8_t)num; 449342bcb12SXin LI buf[1] = (uint8_t)(num >> 8); 450342bcb12SXin LI buf[2] = (uint8_t)(num >> 16); 451342bcb12SXin LI buf[3] = (uint8_t)(num >> 24); 45281ad8388SMartin Matuska return; 45381ad8388SMartin Matuska } 45481ad8388SMartin Matuska #endif 45581ad8388SMartin Matuska 45681ad8388SMartin Matuska 457a8675d92SXin LI ////////////////////////////// 458a8675d92SXin LI // Aligned reads and writes // 459a8675d92SXin LI ////////////////////////////// 460a8675d92SXin LI 461a8675d92SXin LI // Separate functions for aligned reads and writes are provided since on 462a8675d92SXin LI // strict-align archs aligned access is much faster than unaligned access. 463a8675d92SXin LI // 464a8675d92SXin LI // Just like in the unaligned case, memcpy() is needed to avoid 465a8675d92SXin LI // strict aliasing violations. However, on archs that don't support 466a8675d92SXin LI // unaligned access the compiler cannot know that the pointers given 467a8675d92SXin LI // to memcpy() are aligned which results in slow code. As of C11 there is 468a8675d92SXin LI // no standard way to tell the compiler that we know that the address is 469a8675d92SXin LI // aligned but some compilers have language extensions to do that. With 470a8675d92SXin LI // such language extensions the memcpy() method gives excellent results. 471a8675d92SXin LI // 472a8675d92SXin LI // What to do on a strict-align system when no known language extentensions 473a8675d92SXin LI // are available? Falling back to byte-by-byte access would be safe but ruin 474a8675d92SXin LI // optimizations that have been made specifically with aligned access in mind. 475a8675d92SXin LI // As a compromise, aligned reads will fall back to non-compliant type punning 476a8675d92SXin LI // but aligned writes will be byte-by-byte, that is, fast reads are preferred 477a8675d92SXin LI // over fast writes. This obviously isn't great but hopefully it's a working 478a8675d92SXin LI // compromise for now. 479a8675d92SXin LI // 480a8675d92SXin LI // __builtin_assume_aligned is support by GCC >= 4.7 and clang >= 3.6. 481a8675d92SXin LI #ifdef HAVE___BUILTIN_ASSUME_ALIGNED 482a8675d92SXin LI # define tuklib_memcpy_aligned(dest, src, size) \ 483a8675d92SXin LI memcpy(dest, __builtin_assume_aligned(src, size), size) 484a8675d92SXin LI #else 485a8675d92SXin LI # define tuklib_memcpy_aligned(dest, src, size) \ 486a8675d92SXin LI memcpy(dest, src, size) 487a8675d92SXin LI # ifndef TUKLIB_FAST_UNALIGNED_ACCESS 488a8675d92SXin LI # define TUKLIB_USE_UNSAFE_ALIGNED_READS 1 489a8675d92SXin LI # endif 490a8675d92SXin LI #endif 491a8675d92SXin LI 492a8675d92SXin LI 493a8675d92SXin LI static inline uint16_t 494a8675d92SXin LI aligned_read16ne(const uint8_t *buf) 495a8675d92SXin LI { 496a8675d92SXin LI #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ 497a8675d92SXin LI || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) 498a8675d92SXin LI return *(const uint16_t *)buf; 499a8675d92SXin LI #else 500a8675d92SXin LI uint16_t num; 501a8675d92SXin LI tuklib_memcpy_aligned(&num, buf, sizeof(num)); 502a8675d92SXin LI return num; 503a8675d92SXin LI #endif 504a8675d92SXin LI } 505a8675d92SXin LI 506a8675d92SXin LI 507a8675d92SXin LI static inline uint32_t 508a8675d92SXin LI aligned_read32ne(const uint8_t *buf) 509a8675d92SXin LI { 510a8675d92SXin LI #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ 511a8675d92SXin LI || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) 512a8675d92SXin LI return *(const uint32_t *)buf; 513a8675d92SXin LI #else 514a8675d92SXin LI uint32_t num; 515a8675d92SXin LI tuklib_memcpy_aligned(&num, buf, sizeof(num)); 516a8675d92SXin LI return num; 517a8675d92SXin LI #endif 518a8675d92SXin LI } 519a8675d92SXin LI 520a8675d92SXin LI 521a8675d92SXin LI static inline uint64_t 522a8675d92SXin LI aligned_read64ne(const uint8_t *buf) 523a8675d92SXin LI { 524a8675d92SXin LI #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ 525a8675d92SXin LI || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) 526a8675d92SXin LI return *(const uint64_t *)buf; 527a8675d92SXin LI #else 528a8675d92SXin LI uint64_t num; 529a8675d92SXin LI tuklib_memcpy_aligned(&num, buf, sizeof(num)); 530a8675d92SXin LI return num; 531a8675d92SXin LI #endif 532a8675d92SXin LI } 533a8675d92SXin LI 534a8675d92SXin LI 535a8675d92SXin LI static inline void 536a8675d92SXin LI aligned_write16ne(uint8_t *buf, uint16_t num) 537a8675d92SXin LI { 538a8675d92SXin LI #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING 539a8675d92SXin LI *(uint16_t *)buf = num; 540a8675d92SXin LI #else 541a8675d92SXin LI tuklib_memcpy_aligned(buf, &num, sizeof(num)); 542a8675d92SXin LI #endif 543a8675d92SXin LI return; 544a8675d92SXin LI } 545a8675d92SXin LI 546a8675d92SXin LI 547a8675d92SXin LI static inline void 548a8675d92SXin LI aligned_write32ne(uint8_t *buf, uint32_t num) 549a8675d92SXin LI { 550a8675d92SXin LI #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING 551a8675d92SXin LI *(uint32_t *)buf = num; 552a8675d92SXin LI #else 553a8675d92SXin LI tuklib_memcpy_aligned(buf, &num, sizeof(num)); 554a8675d92SXin LI #endif 555a8675d92SXin LI return; 556a8675d92SXin LI } 557a8675d92SXin LI 558a8675d92SXin LI 559a8675d92SXin LI static inline void 560a8675d92SXin LI aligned_write64ne(uint8_t *buf, uint64_t num) 561a8675d92SXin LI { 562a8675d92SXin LI #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING 563a8675d92SXin LI *(uint64_t *)buf = num; 564a8675d92SXin LI #else 565a8675d92SXin LI tuklib_memcpy_aligned(buf, &num, sizeof(num)); 566a8675d92SXin LI #endif 567a8675d92SXin LI return; 568a8675d92SXin LI } 569a8675d92SXin LI 570a8675d92SXin LI 571a8675d92SXin LI static inline uint16_t 572a8675d92SXin LI aligned_read16be(const uint8_t *buf) 573a8675d92SXin LI { 574a8675d92SXin LI uint16_t num = aligned_read16ne(buf); 575a8675d92SXin LI return conv16be(num); 576a8675d92SXin LI } 577a8675d92SXin LI 578a8675d92SXin LI 579a8675d92SXin LI static inline uint16_t 580a8675d92SXin LI aligned_read16le(const uint8_t *buf) 581a8675d92SXin LI { 582a8675d92SXin LI uint16_t num = aligned_read16ne(buf); 583a8675d92SXin LI return conv16le(num); 584a8675d92SXin LI } 585a8675d92SXin LI 586a8675d92SXin LI 587a8675d92SXin LI static inline uint32_t 588a8675d92SXin LI aligned_read32be(const uint8_t *buf) 589a8675d92SXin LI { 590a8675d92SXin LI uint32_t num = aligned_read32ne(buf); 591a8675d92SXin LI return conv32be(num); 592a8675d92SXin LI } 593a8675d92SXin LI 594a8675d92SXin LI 595a8675d92SXin LI static inline uint32_t 596a8675d92SXin LI aligned_read32le(const uint8_t *buf) 597a8675d92SXin LI { 598a8675d92SXin LI uint32_t num = aligned_read32ne(buf); 599a8675d92SXin LI return conv32le(num); 600a8675d92SXin LI } 601a8675d92SXin LI 602a8675d92SXin LI 603a8675d92SXin LI static inline uint64_t 604a8675d92SXin LI aligned_read64be(const uint8_t *buf) 605a8675d92SXin LI { 606a8675d92SXin LI uint64_t num = aligned_read64ne(buf); 607a8675d92SXin LI return conv64be(num); 608a8675d92SXin LI } 609a8675d92SXin LI 610a8675d92SXin LI 611a8675d92SXin LI static inline uint64_t 612a8675d92SXin LI aligned_read64le(const uint8_t *buf) 613a8675d92SXin LI { 614a8675d92SXin LI uint64_t num = aligned_read64ne(buf); 615a8675d92SXin LI return conv64le(num); 616a8675d92SXin LI } 617a8675d92SXin LI 618a8675d92SXin LI 619a8675d92SXin LI // These need to be macros like in the unaligned case. 620a8675d92SXin LI #define aligned_write16be(buf, num) aligned_write16ne((buf), conv16be(num)) 621a8675d92SXin LI #define aligned_write16le(buf, num) aligned_write16ne((buf), conv16le(num)) 622a8675d92SXin LI #define aligned_write32be(buf, num) aligned_write32ne((buf), conv32be(num)) 623a8675d92SXin LI #define aligned_write32le(buf, num) aligned_write32ne((buf), conv32le(num)) 624a8675d92SXin LI #define aligned_write64be(buf, num) aligned_write64ne((buf), conv64be(num)) 625a8675d92SXin LI #define aligned_write64le(buf, num) aligned_write64ne((buf), conv64le(num)) 626a8675d92SXin LI 627a8675d92SXin LI 628a8675d92SXin LI //////////////////// 629a8675d92SXin LI // Bit operations // 630a8675d92SXin LI //////////////////// 631a8675d92SXin LI 63281ad8388SMartin Matuska static inline uint32_t 63381ad8388SMartin Matuska bsr32(uint32_t n) 63481ad8388SMartin Matuska { 63581ad8388SMartin Matuska // Check for ICC first, since it tends to define __GNUC__ too. 63681ad8388SMartin Matuska #if defined(__INTEL_COMPILER) 63781ad8388SMartin Matuska return _bit_scan_reverse(n); 63881ad8388SMartin Matuska 639*b333cd44SXin LI #elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX == UINT32_MAX 64081ad8388SMartin Matuska // GCC >= 3.4 has __builtin_clz(), which gives good results on 64181ad8388SMartin Matuska // multiple architectures. On x86, __builtin_clz() ^ 31U becomes 64281ad8388SMartin Matuska // either plain BSR (so the XOR gets optimized away) or LZCNT and 64381ad8388SMartin Matuska // XOR (if -march indicates that SSE4a instructions are supported). 644a8675d92SXin LI return (uint32_t)__builtin_clz(n) ^ 31U; 64581ad8388SMartin Matuska 64681ad8388SMartin Matuska #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 64781ad8388SMartin Matuska uint32_t i; 64881ad8388SMartin Matuska __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n)); 64981ad8388SMartin Matuska return i; 65081ad8388SMartin Matuska 651a8675d92SXin LI #elif defined(_MSC_VER) 652a8675d92SXin LI unsigned long i; 653a8675d92SXin LI _BitScanReverse(&i, n); 65481ad8388SMartin Matuska return i; 65581ad8388SMartin Matuska 65681ad8388SMartin Matuska #else 65781ad8388SMartin Matuska uint32_t i = 31; 65881ad8388SMartin Matuska 659a8675d92SXin LI if ((n & 0xFFFF0000) == 0) { 66081ad8388SMartin Matuska n <<= 16; 66181ad8388SMartin Matuska i = 15; 66281ad8388SMartin Matuska } 66381ad8388SMartin Matuska 664a8675d92SXin LI if ((n & 0xFF000000) == 0) { 66581ad8388SMartin Matuska n <<= 8; 66681ad8388SMartin Matuska i -= 8; 66781ad8388SMartin Matuska } 66881ad8388SMartin Matuska 669a8675d92SXin LI if ((n & 0xF0000000) == 0) { 67081ad8388SMartin Matuska n <<= 4; 67181ad8388SMartin Matuska i -= 4; 67281ad8388SMartin Matuska } 67381ad8388SMartin Matuska 674a8675d92SXin LI if ((n & 0xC0000000) == 0) { 67581ad8388SMartin Matuska n <<= 2; 67681ad8388SMartin Matuska i -= 2; 67781ad8388SMartin Matuska } 67881ad8388SMartin Matuska 679a8675d92SXin LI if ((n & 0x80000000) == 0) 68081ad8388SMartin Matuska --i; 68181ad8388SMartin Matuska 68281ad8388SMartin Matuska return i; 68381ad8388SMartin Matuska #endif 68481ad8388SMartin Matuska } 68581ad8388SMartin Matuska 68681ad8388SMartin Matuska 68781ad8388SMartin Matuska static inline uint32_t 68881ad8388SMartin Matuska clz32(uint32_t n) 68981ad8388SMartin Matuska { 69081ad8388SMartin Matuska #if defined(__INTEL_COMPILER) 69181ad8388SMartin Matuska return _bit_scan_reverse(n) ^ 31U; 69281ad8388SMartin Matuska 693*b333cd44SXin LI #elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX == UINT32_MAX 694a8675d92SXin LI return (uint32_t)__builtin_clz(n); 69581ad8388SMartin Matuska 69681ad8388SMartin Matuska #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 69781ad8388SMartin Matuska uint32_t i; 69881ad8388SMartin Matuska __asm__("bsrl %1, %0\n\t" 69981ad8388SMartin Matuska "xorl $31, %0" 70081ad8388SMartin Matuska : "=r" (i) : "rm" (n)); 70181ad8388SMartin Matuska return i; 70281ad8388SMartin Matuska 703a8675d92SXin LI #elif defined(_MSC_VER) 704a8675d92SXin LI unsigned long i; 705a8675d92SXin LI _BitScanReverse(&i, n); 70681ad8388SMartin Matuska return i ^ 31U; 70781ad8388SMartin Matuska 70881ad8388SMartin Matuska #else 70981ad8388SMartin Matuska uint32_t i = 0; 71081ad8388SMartin Matuska 711a8675d92SXin LI if ((n & 0xFFFF0000) == 0) { 71281ad8388SMartin Matuska n <<= 16; 71381ad8388SMartin Matuska i = 16; 71481ad8388SMartin Matuska } 71581ad8388SMartin Matuska 716a8675d92SXin LI if ((n & 0xFF000000) == 0) { 71781ad8388SMartin Matuska n <<= 8; 71881ad8388SMartin Matuska i += 8; 71981ad8388SMartin Matuska } 72081ad8388SMartin Matuska 721a8675d92SXin LI if ((n & 0xF0000000) == 0) { 72281ad8388SMartin Matuska n <<= 4; 72381ad8388SMartin Matuska i += 4; 72481ad8388SMartin Matuska } 72581ad8388SMartin Matuska 726a8675d92SXin LI if ((n & 0xC0000000) == 0) { 72781ad8388SMartin Matuska n <<= 2; 72881ad8388SMartin Matuska i += 2; 72981ad8388SMartin Matuska } 73081ad8388SMartin Matuska 731a8675d92SXin LI if ((n & 0x80000000) == 0) 73281ad8388SMartin Matuska ++i; 73381ad8388SMartin Matuska 73481ad8388SMartin Matuska return i; 73581ad8388SMartin Matuska #endif 73681ad8388SMartin Matuska } 73781ad8388SMartin Matuska 73881ad8388SMartin Matuska 73981ad8388SMartin Matuska static inline uint32_t 74081ad8388SMartin Matuska ctz32(uint32_t n) 74181ad8388SMartin Matuska { 74281ad8388SMartin Matuska #if defined(__INTEL_COMPILER) 74381ad8388SMartin Matuska return _bit_scan_forward(n); 74481ad8388SMartin Matuska 745*b333cd44SXin LI #elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX >= UINT32_MAX 746a8675d92SXin LI return (uint32_t)__builtin_ctz(n); 74781ad8388SMartin Matuska 74881ad8388SMartin Matuska #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 74981ad8388SMartin Matuska uint32_t i; 75081ad8388SMartin Matuska __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n)); 75181ad8388SMartin Matuska return i; 75281ad8388SMartin Matuska 753a8675d92SXin LI #elif defined(_MSC_VER) 754a8675d92SXin LI unsigned long i; 755a8675d92SXin LI _BitScanForward(&i, n); 75681ad8388SMartin Matuska return i; 75781ad8388SMartin Matuska 75881ad8388SMartin Matuska #else 75981ad8388SMartin Matuska uint32_t i = 0; 76081ad8388SMartin Matuska 761a8675d92SXin LI if ((n & 0x0000FFFF) == 0) { 76281ad8388SMartin Matuska n >>= 16; 76381ad8388SMartin Matuska i = 16; 76481ad8388SMartin Matuska } 76581ad8388SMartin Matuska 766a8675d92SXin LI if ((n & 0x000000FF) == 0) { 76781ad8388SMartin Matuska n >>= 8; 76881ad8388SMartin Matuska i += 8; 76981ad8388SMartin Matuska } 77081ad8388SMartin Matuska 771a8675d92SXin LI if ((n & 0x0000000F) == 0) { 77281ad8388SMartin Matuska n >>= 4; 77381ad8388SMartin Matuska i += 4; 77481ad8388SMartin Matuska } 77581ad8388SMartin Matuska 776a8675d92SXin LI if ((n & 0x00000003) == 0) { 77781ad8388SMartin Matuska n >>= 2; 77881ad8388SMartin Matuska i += 2; 77981ad8388SMartin Matuska } 78081ad8388SMartin Matuska 781a8675d92SXin LI if ((n & 0x00000001) == 0) 78281ad8388SMartin Matuska ++i; 78381ad8388SMartin Matuska 78481ad8388SMartin Matuska return i; 78581ad8388SMartin Matuska #endif 78681ad8388SMartin Matuska } 78781ad8388SMartin Matuska 78881ad8388SMartin Matuska #define bsf32 ctz32 78981ad8388SMartin Matuska 79081ad8388SMartin Matuska #endif 791