181ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 281ad8388SMartin Matuska // 381ad8388SMartin Matuska /// \file tuklib_integer.h 481ad8388SMartin Matuska /// \brief Various integer and bit operations 581ad8388SMartin Matuska /// 681ad8388SMartin Matuska /// This file provides macros or functions to do some basic integer and bit 781ad8388SMartin Matuska /// operations. 881ad8388SMartin Matuska /// 9a8675d92SXin LI /// Native endian inline functions (XX = 16, 32, or 64): 10a8675d92SXin LI /// - Unaligned native endian reads: readXXne(ptr) 11a8675d92SXin LI /// - Unaligned native endian writes: writeXXne(ptr, num) 12a8675d92SXin LI /// - Aligned native endian reads: aligned_readXXne(ptr) 13a8675d92SXin LI /// - Aligned native endian writes: aligned_writeXXne(ptr, num) 14a8675d92SXin LI /// 15a8675d92SXin LI /// Endianness-converting integer operations (these can be macros!) 16a8675d92SXin LI /// (XX = 16, 32, or 64; Y = b or l): 1781ad8388SMartin Matuska /// - Byte swapping: bswapXX(num) 18a8675d92SXin LI /// - Byte order conversions to/from native (byteswaps if Y isn't 19a8675d92SXin LI /// the native endianness): convXXYe(num) 20*73ed8e77SXin LI /// - Unaligned reads: readXXYe(ptr) 21*73ed8e77SXin LI /// - Unaligned writes: writeXXYe(ptr, num) 22a8675d92SXin LI /// - Aligned reads: aligned_readXXYe(ptr) 23a8675d92SXin LI /// - Aligned writes: aligned_writeXXYe(ptr, num) 2481ad8388SMartin Matuska /// 25a8675d92SXin LI /// Since the above can macros, the arguments should have no side effects 26a8675d92SXin LI /// because they may be evaluated more than once. 2781ad8388SMartin Matuska /// 28a8675d92SXin LI /// Bit scan operations for non-zero 32-bit integers (inline functions): 2981ad8388SMartin Matuska /// - Bit scan reverse (find highest non-zero bit): bsr32(num) 3081ad8388SMartin Matuska /// - Count leading zeros: clz32(num) 3181ad8388SMartin Matuska /// - Count trailing zeros: ctz32(num) 3281ad8388SMartin Matuska /// - Bit scan forward (simply an alias for ctz32()): bsf32(num) 3381ad8388SMartin Matuska /// 3481ad8388SMartin Matuska /// The above bit scan operations return 0-31. If num is zero, 3581ad8388SMartin Matuska /// the result is undefined. 3681ad8388SMartin Matuska // 3781ad8388SMartin Matuska // Authors: Lasse Collin 3881ad8388SMartin Matuska // Joachim Henke 3981ad8388SMartin Matuska // 4081ad8388SMartin Matuska // This file has been put into the public domain. 4181ad8388SMartin Matuska // You can do whatever you want with this file. 4281ad8388SMartin Matuska // 4381ad8388SMartin Matuska /////////////////////////////////////////////////////////////////////////////// 4481ad8388SMartin Matuska 4581ad8388SMartin Matuska #ifndef TUKLIB_INTEGER_H 4681ad8388SMartin Matuska #define TUKLIB_INTEGER_H 4781ad8388SMartin Matuska 4881ad8388SMartin Matuska #include "tuklib_common.h" 49a8675d92SXin LI #include <string.h> 50a8675d92SXin LI 51a8675d92SXin LI // Newer Intel C compilers require immintrin.h for _bit_scan_reverse() 52a8675d92SXin LI // and such functions. 53a8675d92SXin LI #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500) 54a8675d92SXin LI # include <immintrin.h> 55a8675d92SXin LI #endif 5681ad8388SMartin Matuska 5781ad8388SMartin Matuska 58a8675d92SXin LI /////////////////// 59a8675d92SXin LI // Byte swapping // 60a8675d92SXin LI /////////////////// 6181ad8388SMartin Matuska 62a8675d92SXin LI #if defined(HAVE___BUILTIN_BSWAPXX) 63a8675d92SXin LI // GCC >= 4.8 and Clang 64a8675d92SXin LI # define bswap16(n) __builtin_bswap16(n) 65a8675d92SXin LI # define bswap32(n) __builtin_bswap32(n) 66a8675d92SXin LI # define bswap64(n) __builtin_bswap64(n) 67a8675d92SXin LI 68a8675d92SXin LI #elif defined(HAVE_BYTESWAP_H) 6981ad8388SMartin Matuska // glibc, uClibc, dietlibc 7081ad8388SMartin Matuska # include <byteswap.h> 7181ad8388SMartin Matuska # ifdef HAVE_BSWAP_16 7281ad8388SMartin Matuska # define bswap16(num) bswap_16(num) 7381ad8388SMartin Matuska # endif 7481ad8388SMartin Matuska # ifdef HAVE_BSWAP_32 7581ad8388SMartin Matuska # define bswap32(num) bswap_32(num) 7681ad8388SMartin Matuska # endif 7781ad8388SMartin Matuska # ifdef HAVE_BSWAP_64 7881ad8388SMartin Matuska # define bswap64(num) bswap_64(num) 7981ad8388SMartin Matuska # endif 8081ad8388SMartin Matuska 8181ad8388SMartin Matuska #elif defined(HAVE_SYS_ENDIAN_H) 8281ad8388SMartin Matuska // *BSDs and Darwin 8381ad8388SMartin Matuska # include <sys/endian.h> 8481ad8388SMartin Matuska 8581ad8388SMartin Matuska #elif defined(HAVE_SYS_BYTEORDER_H) 8681ad8388SMartin Matuska // Solaris 8781ad8388SMartin Matuska # include <sys/byteorder.h> 8881ad8388SMartin Matuska # ifdef BSWAP_16 8981ad8388SMartin Matuska # define bswap16(num) BSWAP_16(num) 9081ad8388SMartin Matuska # endif 9181ad8388SMartin Matuska # ifdef BSWAP_32 9281ad8388SMartin Matuska # define bswap32(num) BSWAP_32(num) 9381ad8388SMartin Matuska # endif 9481ad8388SMartin Matuska # ifdef BSWAP_64 9581ad8388SMartin Matuska # define bswap64(num) BSWAP_64(num) 9681ad8388SMartin Matuska # endif 9781ad8388SMartin Matuska # ifdef BE_16 9881ad8388SMartin Matuska # define conv16be(num) BE_16(num) 9981ad8388SMartin Matuska # endif 10081ad8388SMartin Matuska # ifdef BE_32 10181ad8388SMartin Matuska # define conv32be(num) BE_32(num) 10281ad8388SMartin Matuska # endif 10381ad8388SMartin Matuska # ifdef BE_64 10481ad8388SMartin Matuska # define conv64be(num) BE_64(num) 10581ad8388SMartin Matuska # endif 10681ad8388SMartin Matuska # ifdef LE_16 10781ad8388SMartin Matuska # define conv16le(num) LE_16(num) 10881ad8388SMartin Matuska # endif 10981ad8388SMartin Matuska # ifdef LE_32 11081ad8388SMartin Matuska # define conv32le(num) LE_32(num) 11181ad8388SMartin Matuska # endif 11281ad8388SMartin Matuska # ifdef LE_64 11381ad8388SMartin Matuska # define conv64le(num) LE_64(num) 11481ad8388SMartin Matuska # endif 11581ad8388SMartin Matuska #endif 11681ad8388SMartin Matuska 11781ad8388SMartin Matuska #ifndef bswap16 118a8675d92SXin LI # define bswap16(n) (uint16_t)( \ 119a8675d92SXin LI (((n) & 0x00FFU) << 8) \ 120a8675d92SXin LI | (((n) & 0xFF00U) >> 8) \ 121a8675d92SXin LI ) 12281ad8388SMartin Matuska #endif 12381ad8388SMartin Matuska 12481ad8388SMartin Matuska #ifndef bswap32 125a8675d92SXin LI # define bswap32(n) (uint32_t)( \ 126a8675d92SXin LI (((n) & UINT32_C(0x000000FF)) << 24) \ 127a8675d92SXin LI | (((n) & UINT32_C(0x0000FF00)) << 8) \ 128a8675d92SXin LI | (((n) & UINT32_C(0x00FF0000)) >> 8) \ 129a8675d92SXin LI | (((n) & UINT32_C(0xFF000000)) >> 24) \ 130a8675d92SXin LI ) 13181ad8388SMartin Matuska #endif 13281ad8388SMartin Matuska 13381ad8388SMartin Matuska #ifndef bswap64 134a8675d92SXin LI # define bswap64(n) (uint64_t)( \ 135a8675d92SXin LI (((n) & UINT64_C(0x00000000000000FF)) << 56) \ 136a8675d92SXin LI | (((n) & UINT64_C(0x000000000000FF00)) << 40) \ 137a8675d92SXin LI | (((n) & UINT64_C(0x0000000000FF0000)) << 24) \ 138a8675d92SXin LI | (((n) & UINT64_C(0x00000000FF000000)) << 8) \ 139a8675d92SXin LI | (((n) & UINT64_C(0x000000FF00000000)) >> 8) \ 140a8675d92SXin LI | (((n) & UINT64_C(0x0000FF0000000000)) >> 24) \ 141a8675d92SXin LI | (((n) & UINT64_C(0x00FF000000000000)) >> 40) \ 142a8675d92SXin LI | (((n) & UINT64_C(0xFF00000000000000)) >> 56) \ 143a8675d92SXin LI ) 14481ad8388SMartin Matuska #endif 14581ad8388SMartin Matuska 14681ad8388SMartin Matuska // Define conversion macros using the basic byte swapping macros. 14781ad8388SMartin Matuska #ifdef WORDS_BIGENDIAN 14881ad8388SMartin Matuska # ifndef conv16be 14981ad8388SMartin Matuska # define conv16be(num) ((uint16_t)(num)) 15081ad8388SMartin Matuska # endif 15181ad8388SMartin Matuska # ifndef conv32be 15281ad8388SMartin Matuska # define conv32be(num) ((uint32_t)(num)) 15381ad8388SMartin Matuska # endif 15481ad8388SMartin Matuska # ifndef conv64be 15581ad8388SMartin Matuska # define conv64be(num) ((uint64_t)(num)) 15681ad8388SMartin Matuska # endif 15781ad8388SMartin Matuska # ifndef conv16le 15881ad8388SMartin Matuska # define conv16le(num) bswap16(num) 15981ad8388SMartin Matuska # endif 16081ad8388SMartin Matuska # ifndef conv32le 16181ad8388SMartin Matuska # define conv32le(num) bswap32(num) 16281ad8388SMartin Matuska # endif 16381ad8388SMartin Matuska # ifndef conv64le 16481ad8388SMartin Matuska # define conv64le(num) bswap64(num) 16581ad8388SMartin Matuska # endif 16681ad8388SMartin Matuska #else 16781ad8388SMartin Matuska # ifndef conv16be 16881ad8388SMartin Matuska # define conv16be(num) bswap16(num) 16981ad8388SMartin Matuska # endif 17081ad8388SMartin Matuska # ifndef conv32be 17181ad8388SMartin Matuska # define conv32be(num) bswap32(num) 17281ad8388SMartin Matuska # endif 17381ad8388SMartin Matuska # ifndef conv64be 17481ad8388SMartin Matuska # define conv64be(num) bswap64(num) 17581ad8388SMartin Matuska # endif 17681ad8388SMartin Matuska # ifndef conv16le 17781ad8388SMartin Matuska # define conv16le(num) ((uint16_t)(num)) 17881ad8388SMartin Matuska # endif 17981ad8388SMartin Matuska # ifndef conv32le 18081ad8388SMartin Matuska # define conv32le(num) ((uint32_t)(num)) 18181ad8388SMartin Matuska # endif 18281ad8388SMartin Matuska # ifndef conv64le 18381ad8388SMartin Matuska # define conv64le(num) ((uint64_t)(num)) 18481ad8388SMartin Matuska # endif 18581ad8388SMartin Matuska #endif 18681ad8388SMartin Matuska 18781ad8388SMartin Matuska 188a8675d92SXin LI //////////////////////////////// 189a8675d92SXin LI // Unaligned reads and writes // 190a8675d92SXin LI //////////////////////////////// 191a8675d92SXin LI 192a8675d92SXin LI // The traditional way of casting e.g. *(const uint16_t *)uint8_pointer 193a8675d92SXin LI // is bad even if the uint8_pointer is properly aligned because this kind 194a8675d92SXin LI // of casts break strict aliasing rules and result in undefined behavior. 195a8675d92SXin LI // With unaligned pointers it's even worse: compilers may emit vector 196a8675d92SXin LI // instructions that require aligned pointers even if non-vector 197a8675d92SXin LI // instructions work with unaligned pointers. 198a8675d92SXin LI // 199a8675d92SXin LI // Using memcpy() is the standard compliant way to do unaligned access. 200a8675d92SXin LI // Many modern compilers inline it so there is no function call overhead. 201a8675d92SXin LI // For those compilers that don't handle the memcpy() method well, the 202a8675d92SXin LI // old casting method (that violates strict aliasing) can be requested at 203a8675d92SXin LI // build time. A third method, casting to a packed struct, would also be 204a8675d92SXin LI // an option but isn't provided to keep things simpler (it's already a mess). 205a8675d92SXin LI // Hopefully this is flexible enough in practice. 20681ad8388SMartin Matuska 20781ad8388SMartin Matuska static inline uint16_t 208a8675d92SXin LI read16ne(const uint8_t *buf) 20981ad8388SMartin Matuska { 210a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 211a8675d92SXin LI && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 212a8675d92SXin LI return *(const uint16_t *)buf; 213a8675d92SXin LI #else 214a8675d92SXin LI uint16_t num; 215a8675d92SXin LI memcpy(&num, buf, sizeof(num)); 216a8675d92SXin LI return num; 217a8675d92SXin LI #endif 21881ad8388SMartin Matuska } 21981ad8388SMartin Matuska 22081ad8388SMartin Matuska 22181ad8388SMartin Matuska static inline uint32_t 222a8675d92SXin LI read32ne(const uint8_t *buf) 22381ad8388SMartin Matuska { 224a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 225a8675d92SXin LI && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 226a8675d92SXin LI return *(const uint32_t *)buf; 227a8675d92SXin LI #else 228a8675d92SXin LI uint32_t num; 229a8675d92SXin LI memcpy(&num, buf, sizeof(num)); 230a8675d92SXin LI return num; 231a8675d92SXin LI #endif 23281ad8388SMartin Matuska } 23381ad8388SMartin Matuska 23481ad8388SMartin Matuska 23581ad8388SMartin Matuska static inline uint64_t 236a8675d92SXin LI read64ne(const uint8_t *buf) 23781ad8388SMartin Matuska { 238a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 239a8675d92SXin LI && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 240a8675d92SXin LI return *(const uint64_t *)buf; 241a8675d92SXin LI #else 242a8675d92SXin LI uint64_t num; 243a8675d92SXin LI memcpy(&num, buf, sizeof(num)); 244a8675d92SXin LI return num; 245a8675d92SXin LI #endif 24681ad8388SMartin Matuska } 24781ad8388SMartin Matuska 24881ad8388SMartin Matuska 24981ad8388SMartin Matuska static inline void 25081ad8388SMartin Matuska write16ne(uint8_t *buf, uint16_t num) 25181ad8388SMartin Matuska { 252a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 253a8675d92SXin LI && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 25481ad8388SMartin Matuska *(uint16_t *)buf = num; 255a8675d92SXin LI #else 256a8675d92SXin LI memcpy(buf, &num, sizeof(num)); 257a8675d92SXin LI #endif 25881ad8388SMartin Matuska return; 25981ad8388SMartin Matuska } 26081ad8388SMartin Matuska 26181ad8388SMartin Matuska 26281ad8388SMartin Matuska static inline void 26381ad8388SMartin Matuska write32ne(uint8_t *buf, uint32_t num) 26481ad8388SMartin Matuska { 265a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 266a8675d92SXin LI && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 26781ad8388SMartin Matuska *(uint32_t *)buf = num; 268a8675d92SXin LI #else 269a8675d92SXin LI memcpy(buf, &num, sizeof(num)); 270a8675d92SXin LI #endif 27181ad8388SMartin Matuska return; 27281ad8388SMartin Matuska } 27381ad8388SMartin Matuska 27481ad8388SMartin Matuska 27581ad8388SMartin Matuska static inline void 27681ad8388SMartin Matuska write64ne(uint8_t *buf, uint64_t num) 27781ad8388SMartin Matuska { 278a8675d92SXin LI #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 279a8675d92SXin LI && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 28081ad8388SMartin Matuska *(uint64_t *)buf = num; 281a8675d92SXin LI #else 282a8675d92SXin LI memcpy(buf, &num, sizeof(num)); 283a8675d92SXin LI #endif 28481ad8388SMartin Matuska return; 28581ad8388SMartin Matuska } 28681ad8388SMartin Matuska 28781ad8388SMartin Matuska 28881ad8388SMartin Matuska static inline uint16_t 289a8675d92SXin LI read16be(const uint8_t *buf) 29081ad8388SMartin Matuska { 291a8675d92SXin LI #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 292a8675d92SXin LI uint16_t num = read16ne(buf); 293a8675d92SXin LI return conv16be(num); 294a8675d92SXin LI #else 29581ad8388SMartin Matuska uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1]; 29681ad8388SMartin Matuska return num; 297a8675d92SXin LI #endif 29881ad8388SMartin Matuska } 29981ad8388SMartin Matuska 30081ad8388SMartin Matuska 30181ad8388SMartin Matuska static inline uint16_t 302a8675d92SXin LI read16le(const uint8_t *buf) 30381ad8388SMartin Matuska { 304a8675d92SXin LI #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 305a8675d92SXin LI uint16_t num = read16ne(buf); 306a8675d92SXin LI return conv16le(num); 307a8675d92SXin LI #else 30881ad8388SMartin Matuska uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8); 30981ad8388SMartin Matuska return num; 310a8675d92SXin LI #endif 31181ad8388SMartin Matuska } 31281ad8388SMartin Matuska 31381ad8388SMartin Matuska 31481ad8388SMartin Matuska static inline uint32_t 315a8675d92SXin LI read32be(const uint8_t *buf) 31681ad8388SMartin Matuska { 317a8675d92SXin LI #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 318a8675d92SXin LI uint32_t num = read32ne(buf); 319a8675d92SXin LI return conv32be(num); 320a8675d92SXin LI #else 32181ad8388SMartin Matuska uint32_t num = (uint32_t)buf[0] << 24; 32281ad8388SMartin Matuska num |= (uint32_t)buf[1] << 16; 32381ad8388SMartin Matuska num |= (uint32_t)buf[2] << 8; 32481ad8388SMartin Matuska num |= (uint32_t)buf[3]; 32581ad8388SMartin Matuska return num; 326a8675d92SXin LI #endif 32781ad8388SMartin Matuska } 32881ad8388SMartin Matuska 32981ad8388SMartin Matuska 33081ad8388SMartin Matuska static inline uint32_t 331a8675d92SXin LI read32le(const uint8_t *buf) 33281ad8388SMartin Matuska { 333a8675d92SXin LI #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 334a8675d92SXin LI uint32_t num = read32ne(buf); 335a8675d92SXin LI return conv32le(num); 336a8675d92SXin LI #else 33781ad8388SMartin Matuska uint32_t num = (uint32_t)buf[0]; 33881ad8388SMartin Matuska num |= (uint32_t)buf[1] << 8; 33981ad8388SMartin Matuska num |= (uint32_t)buf[2] << 16; 34081ad8388SMartin Matuska num |= (uint32_t)buf[3] << 24; 34181ad8388SMartin Matuska return num; 342a8675d92SXin LI #endif 34381ad8388SMartin Matuska } 34481ad8388SMartin Matuska 34581ad8388SMartin Matuska 346*73ed8e77SXin LI static inline uint64_t 347*73ed8e77SXin LI read64be(const uint8_t *buf) 348*73ed8e77SXin LI { 349*73ed8e77SXin LI #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 350*73ed8e77SXin LI uint64_t num = read64ne(buf); 351*73ed8e77SXin LI return conv64be(num); 352*73ed8e77SXin LI #else 353*73ed8e77SXin LI uint64_t num = (uint64_t)buf[0] << 56; 354*73ed8e77SXin LI num |= (uint64_t)buf[1] << 48; 355*73ed8e77SXin LI num |= (uint64_t)buf[2] << 40; 356*73ed8e77SXin LI num |= (uint64_t)buf[3] << 32; 357*73ed8e77SXin LI num |= (uint64_t)buf[4] << 24; 358*73ed8e77SXin LI num |= (uint64_t)buf[5] << 16; 359*73ed8e77SXin LI num |= (uint64_t)buf[6] << 8; 360*73ed8e77SXin LI num |= (uint64_t)buf[7]; 361*73ed8e77SXin LI return num; 362*73ed8e77SXin LI #endif 363*73ed8e77SXin LI } 364*73ed8e77SXin LI 365*73ed8e77SXin LI 366*73ed8e77SXin LI static inline uint64_t 367*73ed8e77SXin LI read64le(const uint8_t *buf) 368*73ed8e77SXin LI { 369*73ed8e77SXin LI #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 370*73ed8e77SXin LI uint64_t num = read64ne(buf); 371*73ed8e77SXin LI return conv64le(num); 372*73ed8e77SXin LI #else 373*73ed8e77SXin LI uint64_t num = (uint64_t)buf[0]; 374*73ed8e77SXin LI num |= (uint64_t)buf[1] << 8; 375*73ed8e77SXin LI num |= (uint64_t)buf[2] << 16; 376*73ed8e77SXin LI num |= (uint64_t)buf[3] << 24; 377*73ed8e77SXin LI num |= (uint64_t)buf[4] << 32; 378*73ed8e77SXin LI num |= (uint64_t)buf[5] << 40; 379*73ed8e77SXin LI num |= (uint64_t)buf[6] << 48; 380*73ed8e77SXin LI num |= (uint64_t)buf[7] << 56; 381*73ed8e77SXin LI return num; 382*73ed8e77SXin LI #endif 383*73ed8e77SXin LI } 384*73ed8e77SXin LI 385*73ed8e77SXin LI 386a8675d92SXin LI // NOTE: Possible byte swapping must be done in a macro to allow the compiler 387a8675d92SXin LI // to optimize byte swapping of constants when using glibc's or *BSD's 388a8675d92SXin LI // byte swapping macros. The actual write is done in an inline function 389a8675d92SXin LI // to make type checking of the buf pointer possible. 390a8675d92SXin LI #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 391a8675d92SXin LI # define write16be(buf, num) write16ne(buf, conv16be(num)) 392a8675d92SXin LI # define write32be(buf, num) write32ne(buf, conv32be(num)) 393*73ed8e77SXin LI # define write64be(buf, num) write64ne(buf, conv64be(num)) 394a8675d92SXin LI #endif 395a8675d92SXin LI 396a8675d92SXin LI #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 397a8675d92SXin LI # define write16le(buf, num) write16ne(buf, conv16le(num)) 398a8675d92SXin LI # define write32le(buf, num) write32ne(buf, conv32le(num)) 399*73ed8e77SXin LI # define write64le(buf, num) write64ne(buf, conv64le(num)) 400a8675d92SXin LI #endif 401a8675d92SXin LI 402a8675d92SXin LI 403a8675d92SXin LI #ifndef write16be 40481ad8388SMartin Matuska static inline void 405a8675d92SXin LI write16be(uint8_t *buf, uint16_t num) 40681ad8388SMartin Matuska { 407342bcb12SXin LI buf[0] = (uint8_t)(num >> 8); 408342bcb12SXin LI buf[1] = (uint8_t)num; 40981ad8388SMartin Matuska return; 41081ad8388SMartin Matuska } 411a8675d92SXin LI #endif 41281ad8388SMartin Matuska 41381ad8388SMartin Matuska 414a8675d92SXin LI #ifndef write16le 41581ad8388SMartin Matuska static inline void 416a8675d92SXin LI write16le(uint8_t *buf, uint16_t num) 41781ad8388SMartin Matuska { 418342bcb12SXin LI buf[0] = (uint8_t)num; 419342bcb12SXin LI buf[1] = (uint8_t)(num >> 8); 42081ad8388SMartin Matuska return; 42181ad8388SMartin Matuska } 422a8675d92SXin LI #endif 42381ad8388SMartin Matuska 42481ad8388SMartin Matuska 425a8675d92SXin LI #ifndef write32be 42681ad8388SMartin Matuska static inline void 427a8675d92SXin LI write32be(uint8_t *buf, uint32_t num) 42881ad8388SMartin Matuska { 429342bcb12SXin LI buf[0] = (uint8_t)(num >> 24); 430342bcb12SXin LI buf[1] = (uint8_t)(num >> 16); 431342bcb12SXin LI buf[2] = (uint8_t)(num >> 8); 432342bcb12SXin LI buf[3] = (uint8_t)num; 43381ad8388SMartin Matuska return; 43481ad8388SMartin Matuska } 435a8675d92SXin LI #endif 43681ad8388SMartin Matuska 43781ad8388SMartin Matuska 438a8675d92SXin LI #ifndef write32le 43981ad8388SMartin Matuska static inline void 440a8675d92SXin LI write32le(uint8_t *buf, uint32_t num) 44181ad8388SMartin Matuska { 442342bcb12SXin LI buf[0] = (uint8_t)num; 443342bcb12SXin LI buf[1] = (uint8_t)(num >> 8); 444342bcb12SXin LI buf[2] = (uint8_t)(num >> 16); 445342bcb12SXin LI buf[3] = (uint8_t)(num >> 24); 44681ad8388SMartin Matuska return; 44781ad8388SMartin Matuska } 44881ad8388SMartin Matuska #endif 44981ad8388SMartin Matuska 45081ad8388SMartin Matuska 451a8675d92SXin LI ////////////////////////////// 452a8675d92SXin LI // Aligned reads and writes // 453a8675d92SXin LI ////////////////////////////// 454a8675d92SXin LI 455a8675d92SXin LI // Separate functions for aligned reads and writes are provided since on 456a8675d92SXin LI // strict-align archs aligned access is much faster than unaligned access. 457a8675d92SXin LI // 458a8675d92SXin LI // Just like in the unaligned case, memcpy() is needed to avoid 459a8675d92SXin LI // strict aliasing violations. However, on archs that don't support 460a8675d92SXin LI // unaligned access the compiler cannot know that the pointers given 461a8675d92SXin LI // to memcpy() are aligned which results in slow code. As of C11 there is 462a8675d92SXin LI // no standard way to tell the compiler that we know that the address is 463a8675d92SXin LI // aligned but some compilers have language extensions to do that. With 464a8675d92SXin LI // such language extensions the memcpy() method gives excellent results. 465a8675d92SXin LI // 466a8675d92SXin LI // What to do on a strict-align system when no known language extentensions 467a8675d92SXin LI // are available? Falling back to byte-by-byte access would be safe but ruin 468a8675d92SXin LI // optimizations that have been made specifically with aligned access in mind. 469a8675d92SXin LI // As a compromise, aligned reads will fall back to non-compliant type punning 470a8675d92SXin LI // but aligned writes will be byte-by-byte, that is, fast reads are preferred 471a8675d92SXin LI // over fast writes. This obviously isn't great but hopefully it's a working 472a8675d92SXin LI // compromise for now. 473a8675d92SXin LI // 474a8675d92SXin LI // __builtin_assume_aligned is support by GCC >= 4.7 and clang >= 3.6. 475a8675d92SXin LI #ifdef HAVE___BUILTIN_ASSUME_ALIGNED 476a8675d92SXin LI # define tuklib_memcpy_aligned(dest, src, size) \ 477a8675d92SXin LI memcpy(dest, __builtin_assume_aligned(src, size), size) 478a8675d92SXin LI #else 479a8675d92SXin LI # define tuklib_memcpy_aligned(dest, src, size) \ 480a8675d92SXin LI memcpy(dest, src, size) 481a8675d92SXin LI # ifndef TUKLIB_FAST_UNALIGNED_ACCESS 482a8675d92SXin LI # define TUKLIB_USE_UNSAFE_ALIGNED_READS 1 483a8675d92SXin LI # endif 484a8675d92SXin LI #endif 485a8675d92SXin LI 486a8675d92SXin LI 487a8675d92SXin LI static inline uint16_t 488a8675d92SXin LI aligned_read16ne(const uint8_t *buf) 489a8675d92SXin LI { 490a8675d92SXin LI #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ 491a8675d92SXin LI || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) 492a8675d92SXin LI return *(const uint16_t *)buf; 493a8675d92SXin LI #else 494a8675d92SXin LI uint16_t num; 495a8675d92SXin LI tuklib_memcpy_aligned(&num, buf, sizeof(num)); 496a8675d92SXin LI return num; 497a8675d92SXin LI #endif 498a8675d92SXin LI } 499a8675d92SXin LI 500a8675d92SXin LI 501a8675d92SXin LI static inline uint32_t 502a8675d92SXin LI aligned_read32ne(const uint8_t *buf) 503a8675d92SXin LI { 504a8675d92SXin LI #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ 505a8675d92SXin LI || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) 506a8675d92SXin LI return *(const uint32_t *)buf; 507a8675d92SXin LI #else 508a8675d92SXin LI uint32_t num; 509a8675d92SXin LI tuklib_memcpy_aligned(&num, buf, sizeof(num)); 510a8675d92SXin LI return num; 511a8675d92SXin LI #endif 512a8675d92SXin LI } 513a8675d92SXin LI 514a8675d92SXin LI 515a8675d92SXin LI static inline uint64_t 516a8675d92SXin LI aligned_read64ne(const uint8_t *buf) 517a8675d92SXin LI { 518a8675d92SXin LI #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ 519a8675d92SXin LI || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) 520a8675d92SXin LI return *(const uint64_t *)buf; 521a8675d92SXin LI #else 522a8675d92SXin LI uint64_t num; 523a8675d92SXin LI tuklib_memcpy_aligned(&num, buf, sizeof(num)); 524a8675d92SXin LI return num; 525a8675d92SXin LI #endif 526a8675d92SXin LI } 527a8675d92SXin LI 528a8675d92SXin LI 529a8675d92SXin LI static inline void 530a8675d92SXin LI aligned_write16ne(uint8_t *buf, uint16_t num) 531a8675d92SXin LI { 532a8675d92SXin LI #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING 533a8675d92SXin LI *(uint16_t *)buf = num; 534a8675d92SXin LI #else 535a8675d92SXin LI tuklib_memcpy_aligned(buf, &num, sizeof(num)); 536a8675d92SXin LI #endif 537a8675d92SXin LI return; 538a8675d92SXin LI } 539a8675d92SXin LI 540a8675d92SXin LI 541a8675d92SXin LI static inline void 542a8675d92SXin LI aligned_write32ne(uint8_t *buf, uint32_t num) 543a8675d92SXin LI { 544a8675d92SXin LI #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING 545a8675d92SXin LI *(uint32_t *)buf = num; 546a8675d92SXin LI #else 547a8675d92SXin LI tuklib_memcpy_aligned(buf, &num, sizeof(num)); 548a8675d92SXin LI #endif 549a8675d92SXin LI return; 550a8675d92SXin LI } 551a8675d92SXin LI 552a8675d92SXin LI 553a8675d92SXin LI static inline void 554a8675d92SXin LI aligned_write64ne(uint8_t *buf, uint64_t num) 555a8675d92SXin LI { 556a8675d92SXin LI #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING 557a8675d92SXin LI *(uint64_t *)buf = num; 558a8675d92SXin LI #else 559a8675d92SXin LI tuklib_memcpy_aligned(buf, &num, sizeof(num)); 560a8675d92SXin LI #endif 561a8675d92SXin LI return; 562a8675d92SXin LI } 563a8675d92SXin LI 564a8675d92SXin LI 565a8675d92SXin LI static inline uint16_t 566a8675d92SXin LI aligned_read16be(const uint8_t *buf) 567a8675d92SXin LI { 568a8675d92SXin LI uint16_t num = aligned_read16ne(buf); 569a8675d92SXin LI return conv16be(num); 570a8675d92SXin LI } 571a8675d92SXin LI 572a8675d92SXin LI 573a8675d92SXin LI static inline uint16_t 574a8675d92SXin LI aligned_read16le(const uint8_t *buf) 575a8675d92SXin LI { 576a8675d92SXin LI uint16_t num = aligned_read16ne(buf); 577a8675d92SXin LI return conv16le(num); 578a8675d92SXin LI } 579a8675d92SXin LI 580a8675d92SXin LI 581a8675d92SXin LI static inline uint32_t 582a8675d92SXin LI aligned_read32be(const uint8_t *buf) 583a8675d92SXin LI { 584a8675d92SXin LI uint32_t num = aligned_read32ne(buf); 585a8675d92SXin LI return conv32be(num); 586a8675d92SXin LI } 587a8675d92SXin LI 588a8675d92SXin LI 589a8675d92SXin LI static inline uint32_t 590a8675d92SXin LI aligned_read32le(const uint8_t *buf) 591a8675d92SXin LI { 592a8675d92SXin LI uint32_t num = aligned_read32ne(buf); 593a8675d92SXin LI return conv32le(num); 594a8675d92SXin LI } 595a8675d92SXin LI 596a8675d92SXin LI 597a8675d92SXin LI static inline uint64_t 598a8675d92SXin LI aligned_read64be(const uint8_t *buf) 599a8675d92SXin LI { 600a8675d92SXin LI uint64_t num = aligned_read64ne(buf); 601a8675d92SXin LI return conv64be(num); 602a8675d92SXin LI } 603a8675d92SXin LI 604a8675d92SXin LI 605a8675d92SXin LI static inline uint64_t 606a8675d92SXin LI aligned_read64le(const uint8_t *buf) 607a8675d92SXin LI { 608a8675d92SXin LI uint64_t num = aligned_read64ne(buf); 609a8675d92SXin LI return conv64le(num); 610a8675d92SXin LI } 611a8675d92SXin LI 612a8675d92SXin LI 613a8675d92SXin LI // These need to be macros like in the unaligned case. 614a8675d92SXin LI #define aligned_write16be(buf, num) aligned_write16ne((buf), conv16be(num)) 615a8675d92SXin LI #define aligned_write16le(buf, num) aligned_write16ne((buf), conv16le(num)) 616a8675d92SXin LI #define aligned_write32be(buf, num) aligned_write32ne((buf), conv32be(num)) 617a8675d92SXin LI #define aligned_write32le(buf, num) aligned_write32ne((buf), conv32le(num)) 618a8675d92SXin LI #define aligned_write64be(buf, num) aligned_write64ne((buf), conv64be(num)) 619a8675d92SXin LI #define aligned_write64le(buf, num) aligned_write64ne((buf), conv64le(num)) 620a8675d92SXin LI 621a8675d92SXin LI 622a8675d92SXin LI //////////////////// 623a8675d92SXin LI // Bit operations // 624a8675d92SXin LI //////////////////// 625a8675d92SXin LI 62681ad8388SMartin Matuska static inline uint32_t 62781ad8388SMartin Matuska bsr32(uint32_t n) 62881ad8388SMartin Matuska { 62981ad8388SMartin Matuska // Check for ICC first, since it tends to define __GNUC__ too. 63081ad8388SMartin Matuska #if defined(__INTEL_COMPILER) 63181ad8388SMartin Matuska return _bit_scan_reverse(n); 63281ad8388SMartin Matuska 63381ad8388SMartin Matuska #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX 63481ad8388SMartin Matuska // GCC >= 3.4 has __builtin_clz(), which gives good results on 63581ad8388SMartin Matuska // multiple architectures. On x86, __builtin_clz() ^ 31U becomes 63681ad8388SMartin Matuska // either plain BSR (so the XOR gets optimized away) or LZCNT and 63781ad8388SMartin Matuska // XOR (if -march indicates that SSE4a instructions are supported). 638a8675d92SXin LI return (uint32_t)__builtin_clz(n) ^ 31U; 63981ad8388SMartin Matuska 64081ad8388SMartin Matuska #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 64181ad8388SMartin Matuska uint32_t i; 64281ad8388SMartin Matuska __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n)); 64381ad8388SMartin Matuska return i; 64481ad8388SMartin Matuska 645a8675d92SXin LI #elif defined(_MSC_VER) 646a8675d92SXin LI unsigned long i; 647a8675d92SXin LI _BitScanReverse(&i, n); 64881ad8388SMartin Matuska return i; 64981ad8388SMartin Matuska 65081ad8388SMartin Matuska #else 65181ad8388SMartin Matuska uint32_t i = 31; 65281ad8388SMartin Matuska 653a8675d92SXin LI if ((n & 0xFFFF0000) == 0) { 65481ad8388SMartin Matuska n <<= 16; 65581ad8388SMartin Matuska i = 15; 65681ad8388SMartin Matuska } 65781ad8388SMartin Matuska 658a8675d92SXin LI if ((n & 0xFF000000) == 0) { 65981ad8388SMartin Matuska n <<= 8; 66081ad8388SMartin Matuska i -= 8; 66181ad8388SMartin Matuska } 66281ad8388SMartin Matuska 663a8675d92SXin LI if ((n & 0xF0000000) == 0) { 66481ad8388SMartin Matuska n <<= 4; 66581ad8388SMartin Matuska i -= 4; 66681ad8388SMartin Matuska } 66781ad8388SMartin Matuska 668a8675d92SXin LI if ((n & 0xC0000000) == 0) { 66981ad8388SMartin Matuska n <<= 2; 67081ad8388SMartin Matuska i -= 2; 67181ad8388SMartin Matuska } 67281ad8388SMartin Matuska 673a8675d92SXin LI if ((n & 0x80000000) == 0) 67481ad8388SMartin Matuska --i; 67581ad8388SMartin Matuska 67681ad8388SMartin Matuska return i; 67781ad8388SMartin Matuska #endif 67881ad8388SMartin Matuska } 67981ad8388SMartin Matuska 68081ad8388SMartin Matuska 68181ad8388SMartin Matuska static inline uint32_t 68281ad8388SMartin Matuska clz32(uint32_t n) 68381ad8388SMartin Matuska { 68481ad8388SMartin Matuska #if defined(__INTEL_COMPILER) 68581ad8388SMartin Matuska return _bit_scan_reverse(n) ^ 31U; 68681ad8388SMartin Matuska 68781ad8388SMartin Matuska #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX 688a8675d92SXin LI return (uint32_t)__builtin_clz(n); 68981ad8388SMartin Matuska 69081ad8388SMartin Matuska #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 69181ad8388SMartin Matuska uint32_t i; 69281ad8388SMartin Matuska __asm__("bsrl %1, %0\n\t" 69381ad8388SMartin Matuska "xorl $31, %0" 69481ad8388SMartin Matuska : "=r" (i) : "rm" (n)); 69581ad8388SMartin Matuska return i; 69681ad8388SMartin Matuska 697a8675d92SXin LI #elif defined(_MSC_VER) 698a8675d92SXin LI unsigned long i; 699a8675d92SXin LI _BitScanReverse(&i, n); 70081ad8388SMartin Matuska return i ^ 31U; 70181ad8388SMartin Matuska 70281ad8388SMartin Matuska #else 70381ad8388SMartin Matuska uint32_t i = 0; 70481ad8388SMartin Matuska 705a8675d92SXin LI if ((n & 0xFFFF0000) == 0) { 70681ad8388SMartin Matuska n <<= 16; 70781ad8388SMartin Matuska i = 16; 70881ad8388SMartin Matuska } 70981ad8388SMartin Matuska 710a8675d92SXin LI if ((n & 0xFF000000) == 0) { 71181ad8388SMartin Matuska n <<= 8; 71281ad8388SMartin Matuska i += 8; 71381ad8388SMartin Matuska } 71481ad8388SMartin Matuska 715a8675d92SXin LI if ((n & 0xF0000000) == 0) { 71681ad8388SMartin Matuska n <<= 4; 71781ad8388SMartin Matuska i += 4; 71881ad8388SMartin Matuska } 71981ad8388SMartin Matuska 720a8675d92SXin LI if ((n & 0xC0000000) == 0) { 72181ad8388SMartin Matuska n <<= 2; 72281ad8388SMartin Matuska i += 2; 72381ad8388SMartin Matuska } 72481ad8388SMartin Matuska 725a8675d92SXin LI if ((n & 0x80000000) == 0) 72681ad8388SMartin Matuska ++i; 72781ad8388SMartin Matuska 72881ad8388SMartin Matuska return i; 72981ad8388SMartin Matuska #endif 73081ad8388SMartin Matuska } 73181ad8388SMartin Matuska 73281ad8388SMartin Matuska 73381ad8388SMartin Matuska static inline uint32_t 73481ad8388SMartin Matuska ctz32(uint32_t n) 73581ad8388SMartin Matuska { 73681ad8388SMartin Matuska #if defined(__INTEL_COMPILER) 73781ad8388SMartin Matuska return _bit_scan_forward(n); 73881ad8388SMartin Matuska 73981ad8388SMartin Matuska #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX 740a8675d92SXin LI return (uint32_t)__builtin_ctz(n); 74181ad8388SMartin Matuska 74281ad8388SMartin Matuska #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 74381ad8388SMartin Matuska uint32_t i; 74481ad8388SMartin Matuska __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n)); 74581ad8388SMartin Matuska return i; 74681ad8388SMartin Matuska 747a8675d92SXin LI #elif defined(_MSC_VER) 748a8675d92SXin LI unsigned long i; 749a8675d92SXin LI _BitScanForward(&i, n); 75081ad8388SMartin Matuska return i; 75181ad8388SMartin Matuska 75281ad8388SMartin Matuska #else 75381ad8388SMartin Matuska uint32_t i = 0; 75481ad8388SMartin Matuska 755a8675d92SXin LI if ((n & 0x0000FFFF) == 0) { 75681ad8388SMartin Matuska n >>= 16; 75781ad8388SMartin Matuska i = 16; 75881ad8388SMartin Matuska } 75981ad8388SMartin Matuska 760a8675d92SXin LI if ((n & 0x000000FF) == 0) { 76181ad8388SMartin Matuska n >>= 8; 76281ad8388SMartin Matuska i += 8; 76381ad8388SMartin Matuska } 76481ad8388SMartin Matuska 765a8675d92SXin LI if ((n & 0x0000000F) == 0) { 76681ad8388SMartin Matuska n >>= 4; 76781ad8388SMartin Matuska i += 4; 76881ad8388SMartin Matuska } 76981ad8388SMartin Matuska 770a8675d92SXin LI if ((n & 0x00000003) == 0) { 77181ad8388SMartin Matuska n >>= 2; 77281ad8388SMartin Matuska i += 2; 77381ad8388SMartin Matuska } 77481ad8388SMartin Matuska 775a8675d92SXin LI if ((n & 0x00000001) == 0) 77681ad8388SMartin Matuska ++i; 77781ad8388SMartin Matuska 77881ad8388SMartin Matuska return i; 77981ad8388SMartin Matuska #endif 78081ad8388SMartin Matuska } 78181ad8388SMartin Matuska 78281ad8388SMartin Matuska #define bsf32 ctz32 78381ad8388SMartin Matuska 78481ad8388SMartin Matuska #endif 785