1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file tuklib_integer.h 4 /// \brief Various integer and bit operations 5 /// 6 /// This file provides macros or functions to do some basic integer and bit 7 /// operations. 8 /// 9 /// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l): 10 /// - Byte swapping: bswapXX(num) 11 /// - Byte order conversions to/from native: convXXYe(num) 12 /// - Aligned reads: readXXYe(ptr) 13 /// - Aligned writes: writeXXYe(ptr, num) 14 /// - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr) 15 /// - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num) 16 /// 17 /// Since they can macros, the arguments should have no side effects since 18 /// they may be evaluated more than once. 19 /// 20 /// \todo PowerPC and possibly some other architectures support 21 /// byte swapping load and store instructions. This file 22 /// doesn't take advantage of those instructions. 23 /// 24 /// Bit scan operations for non-zero 32-bit integers: 25 /// - Bit scan reverse (find highest non-zero bit): bsr32(num) 26 /// - Count leading zeros: clz32(num) 27 /// - Count trailing zeros: ctz32(num) 28 /// - Bit scan forward (simply an alias for ctz32()): bsf32(num) 29 /// 30 /// The above bit scan operations return 0-31. If num is zero, 31 /// the result is undefined. 32 // 33 // Authors: Lasse Collin 34 // Joachim Henke 35 // 36 // This file has been put into the public domain. 37 // You can do whatever you want with this file. 38 // 39 /////////////////////////////////////////////////////////////////////////////// 40 41 #ifndef TUKLIB_INTEGER_H 42 #define TUKLIB_INTEGER_H 43 44 #include "tuklib_common.h" 45 46 47 //////////////////////////////////////// 48 // Operating system specific features // 49 //////////////////////////////////////// 50 51 #if defined(HAVE_BYTESWAP_H) 52 // glibc, uClibc, dietlibc 53 # include <byteswap.h> 54 # ifdef HAVE_BSWAP_16 55 # define bswap16(num) bswap_16(num) 56 # endif 57 # ifdef HAVE_BSWAP_32 58 # define bswap32(num) bswap_32(num) 59 # endif 60 # ifdef HAVE_BSWAP_64 61 # define bswap64(num) bswap_64(num) 62 # endif 63 64 #elif defined(HAVE_SYS_ENDIAN_H) 65 // *BSDs and Darwin 66 # include <sys/endian.h> 67 68 #elif defined(HAVE_SYS_BYTEORDER_H) 69 // Solaris 70 # include <sys/byteorder.h> 71 # ifdef BSWAP_16 72 # define bswap16(num) BSWAP_16(num) 73 # endif 74 # ifdef BSWAP_32 75 # define bswap32(num) BSWAP_32(num) 76 # endif 77 # ifdef BSWAP_64 78 # define bswap64(num) BSWAP_64(num) 79 # endif 80 # ifdef BE_16 81 # define conv16be(num) BE_16(num) 82 # endif 83 # ifdef BE_32 84 # define conv32be(num) BE_32(num) 85 # endif 86 # ifdef BE_64 87 # define conv64be(num) BE_64(num) 88 # endif 89 # ifdef LE_16 90 # define conv16le(num) LE_16(num) 91 # endif 92 # ifdef LE_32 93 # define conv32le(num) LE_32(num) 94 # endif 95 # ifdef LE_64 96 # define conv64le(num) LE_64(num) 97 # endif 98 #endif 99 100 101 //////////////////////////////// 102 // Compiler-specific features // 103 //////////////////////////////// 104 105 // Newer Intel C compilers require immintrin.h for _bit_scan_reverse() 106 // and such functions. 107 #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500) 108 # include <immintrin.h> 109 #endif 110 111 112 /////////////////// 113 // Byte swapping // 114 /////////////////// 115 116 #ifndef bswap16 117 # define bswap16(num) \ 118 (((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8)) 119 #endif 120 121 #ifndef bswap32 122 # define bswap32(num) \ 123 ( (((uint32_t)(num) << 24) ) \ 124 | (((uint32_t)(num) << 8) & UINT32_C(0x00FF0000)) \ 125 | (((uint32_t)(num) >> 8) & UINT32_C(0x0000FF00)) \ 126 | (((uint32_t)(num) >> 24) ) ) 127 #endif 128 129 #ifndef bswap64 130 # define bswap64(num) \ 131 ( (((uint64_t)(num) << 56) ) \ 132 | (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \ 133 | (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \ 134 | (((uint64_t)(num) << 8) & UINT64_C(0x000000FF00000000)) \ 135 | (((uint64_t)(num) >> 8) & UINT64_C(0x00000000FF000000)) \ 136 | (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \ 137 | (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \ 138 | (((uint64_t)(num) >> 56) ) ) 139 #endif 140 141 // Define conversion macros using the basic byte swapping macros. 142 #ifdef WORDS_BIGENDIAN 143 # ifndef conv16be 144 # define conv16be(num) ((uint16_t)(num)) 145 # endif 146 # ifndef conv32be 147 # define conv32be(num) ((uint32_t)(num)) 148 # endif 149 # ifndef conv64be 150 # define conv64be(num) ((uint64_t)(num)) 151 # endif 152 # ifndef conv16le 153 # define conv16le(num) bswap16(num) 154 # endif 155 # ifndef conv32le 156 # define conv32le(num) bswap32(num) 157 # endif 158 # ifndef conv64le 159 # define conv64le(num) bswap64(num) 160 # endif 161 #else 162 # ifndef conv16be 163 # define conv16be(num) bswap16(num) 164 # endif 165 # ifndef conv32be 166 # define conv32be(num) bswap32(num) 167 # endif 168 # ifndef conv64be 169 # define conv64be(num) bswap64(num) 170 # endif 171 # ifndef conv16le 172 # define conv16le(num) ((uint16_t)(num)) 173 # endif 174 # ifndef conv32le 175 # define conv32le(num) ((uint32_t)(num)) 176 # endif 177 # ifndef conv64le 178 # define conv64le(num) ((uint64_t)(num)) 179 # endif 180 #endif 181 182 183 ////////////////////////////// 184 // Aligned reads and writes // 185 ////////////////////////////// 186 187 static inline uint16_t 188 read16be(const uint8_t *buf) 189 { 190 uint16_t num = *(const uint16_t *)buf; 191 return conv16be(num); 192 } 193 194 195 static inline uint16_t 196 read16le(const uint8_t *buf) 197 { 198 uint16_t num = *(const uint16_t *)buf; 199 return conv16le(num); 200 } 201 202 203 static inline uint32_t 204 read32be(const uint8_t *buf) 205 { 206 uint32_t num = *(const uint32_t *)buf; 207 return conv32be(num); 208 } 209 210 211 static inline uint32_t 212 read32le(const uint8_t *buf) 213 { 214 uint32_t num = *(const uint32_t *)buf; 215 return conv32le(num); 216 } 217 218 219 static inline uint64_t 220 read64be(const uint8_t *buf) 221 { 222 uint64_t num = *(const uint64_t *)buf; 223 return conv64be(num); 224 } 225 226 227 static inline uint64_t 228 read64le(const uint8_t *buf) 229 { 230 uint64_t num = *(const uint64_t *)buf; 231 return conv64le(num); 232 } 233 234 235 // NOTE: Possible byte swapping must be done in a macro to allow GCC 236 // to optimize byte swapping of constants when using glibc's or *BSD's 237 // byte swapping macros. The actual write is done in an inline function 238 // to make type checking of the buf pointer possible similarly to readXXYe() 239 // functions. 240 241 #define write16be(buf, num) write16ne((buf), conv16be(num)) 242 #define write16le(buf, num) write16ne((buf), conv16le(num)) 243 #define write32be(buf, num) write32ne((buf), conv32be(num)) 244 #define write32le(buf, num) write32ne((buf), conv32le(num)) 245 #define write64be(buf, num) write64ne((buf), conv64be(num)) 246 #define write64le(buf, num) write64ne((buf), conv64le(num)) 247 248 249 static inline void 250 write16ne(uint8_t *buf, uint16_t num) 251 { 252 *(uint16_t *)buf = num; 253 return; 254 } 255 256 257 static inline void 258 write32ne(uint8_t *buf, uint32_t num) 259 { 260 *(uint32_t *)buf = num; 261 return; 262 } 263 264 265 static inline void 266 write64ne(uint8_t *buf, uint64_t num) 267 { 268 *(uint64_t *)buf = num; 269 return; 270 } 271 272 273 //////////////////////////////// 274 // Unaligned reads and writes // 275 //////////////////////////////// 276 277 // NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and 278 // 32-bit unaligned integer loads and stores. It's possible that 64-bit 279 // unaligned access doesn't work or is slower than byte-by-byte access. 280 // Since unaligned 64-bit is probably not needed as often as 16-bit or 281 // 32-bit, we simply don't support 64-bit unaligned access for now. 282 #ifdef TUKLIB_FAST_UNALIGNED_ACCESS 283 # define unaligned_read16be read16be 284 # define unaligned_read16le read16le 285 # define unaligned_read32be read32be 286 # define unaligned_read32le read32le 287 # define unaligned_write16be write16be 288 # define unaligned_write16le write16le 289 # define unaligned_write32be write32be 290 # define unaligned_write32le write32le 291 292 #else 293 294 static inline uint16_t 295 unaligned_read16be(const uint8_t *buf) 296 { 297 uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1]; 298 return num; 299 } 300 301 302 static inline uint16_t 303 unaligned_read16le(const uint8_t *buf) 304 { 305 uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8); 306 return num; 307 } 308 309 310 static inline uint32_t 311 unaligned_read32be(const uint8_t *buf) 312 { 313 uint32_t num = (uint32_t)buf[0] << 24; 314 num |= (uint32_t)buf[1] << 16; 315 num |= (uint32_t)buf[2] << 8; 316 num |= (uint32_t)buf[3]; 317 return num; 318 } 319 320 321 static inline uint32_t 322 unaligned_read32le(const uint8_t *buf) 323 { 324 uint32_t num = (uint32_t)buf[0]; 325 num |= (uint32_t)buf[1] << 8; 326 num |= (uint32_t)buf[2] << 16; 327 num |= (uint32_t)buf[3] << 24; 328 return num; 329 } 330 331 332 static inline void 333 unaligned_write16be(uint8_t *buf, uint16_t num) 334 { 335 buf[0] = (uint8_t)(num >> 8); 336 buf[1] = (uint8_t)num; 337 return; 338 } 339 340 341 static inline void 342 unaligned_write16le(uint8_t *buf, uint16_t num) 343 { 344 buf[0] = (uint8_t)num; 345 buf[1] = (uint8_t)(num >> 8); 346 return; 347 } 348 349 350 static inline void 351 unaligned_write32be(uint8_t *buf, uint32_t num) 352 { 353 buf[0] = (uint8_t)(num >> 24); 354 buf[1] = (uint8_t)(num >> 16); 355 buf[2] = (uint8_t)(num >> 8); 356 buf[3] = (uint8_t)num; 357 return; 358 } 359 360 361 static inline void 362 unaligned_write32le(uint8_t *buf, uint32_t num) 363 { 364 buf[0] = (uint8_t)num; 365 buf[1] = (uint8_t)(num >> 8); 366 buf[2] = (uint8_t)(num >> 16); 367 buf[3] = (uint8_t)(num >> 24); 368 return; 369 } 370 371 #endif 372 373 374 static inline uint32_t 375 bsr32(uint32_t n) 376 { 377 // Check for ICC first, since it tends to define __GNUC__ too. 378 #if defined(__INTEL_COMPILER) 379 return _bit_scan_reverse(n); 380 381 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX 382 // GCC >= 3.4 has __builtin_clz(), which gives good results on 383 // multiple architectures. On x86, __builtin_clz() ^ 31U becomes 384 // either plain BSR (so the XOR gets optimized away) or LZCNT and 385 // XOR (if -march indicates that SSE4a instructions are supported). 386 return __builtin_clz(n) ^ 31U; 387 388 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 389 uint32_t i; 390 __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n)); 391 return i; 392 393 #elif defined(_MSC_VER) && _MSC_VER >= 1400 394 // MSVC isn't supported by tuklib, but since this code exists, 395 // it doesn't hurt to have it here anyway. 396 uint32_t i; 397 _BitScanReverse((DWORD *)&i, n); 398 return i; 399 400 #else 401 uint32_t i = 31; 402 403 if ((n & UINT32_C(0xFFFF0000)) == 0) { 404 n <<= 16; 405 i = 15; 406 } 407 408 if ((n & UINT32_C(0xFF000000)) == 0) { 409 n <<= 8; 410 i -= 8; 411 } 412 413 if ((n & UINT32_C(0xF0000000)) == 0) { 414 n <<= 4; 415 i -= 4; 416 } 417 418 if ((n & UINT32_C(0xC0000000)) == 0) { 419 n <<= 2; 420 i -= 2; 421 } 422 423 if ((n & UINT32_C(0x80000000)) == 0) 424 --i; 425 426 return i; 427 #endif 428 } 429 430 431 static inline uint32_t 432 clz32(uint32_t n) 433 { 434 #if defined(__INTEL_COMPILER) 435 return _bit_scan_reverse(n) ^ 31U; 436 437 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX 438 return __builtin_clz(n); 439 440 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 441 uint32_t i; 442 __asm__("bsrl %1, %0\n\t" 443 "xorl $31, %0" 444 : "=r" (i) : "rm" (n)); 445 return i; 446 447 #elif defined(_MSC_VER) && _MSC_VER >= 1400 448 uint32_t i; 449 _BitScanReverse((DWORD *)&i, n); 450 return i ^ 31U; 451 452 #else 453 uint32_t i = 0; 454 455 if ((n & UINT32_C(0xFFFF0000)) == 0) { 456 n <<= 16; 457 i = 16; 458 } 459 460 if ((n & UINT32_C(0xFF000000)) == 0) { 461 n <<= 8; 462 i += 8; 463 } 464 465 if ((n & UINT32_C(0xF0000000)) == 0) { 466 n <<= 4; 467 i += 4; 468 } 469 470 if ((n & UINT32_C(0xC0000000)) == 0) { 471 n <<= 2; 472 i += 2; 473 } 474 475 if ((n & UINT32_C(0x80000000)) == 0) 476 ++i; 477 478 return i; 479 #endif 480 } 481 482 483 static inline uint32_t 484 ctz32(uint32_t n) 485 { 486 #if defined(__INTEL_COMPILER) 487 return _bit_scan_forward(n); 488 489 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX 490 return __builtin_ctz(n); 491 492 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 493 uint32_t i; 494 __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n)); 495 return i; 496 497 #elif defined(_MSC_VER) && _MSC_VER >= 1400 498 uint32_t i; 499 _BitScanForward((DWORD *)&i, n); 500 return i; 501 502 #else 503 uint32_t i = 0; 504 505 if ((n & UINT32_C(0x0000FFFF)) == 0) { 506 n >>= 16; 507 i = 16; 508 } 509 510 if ((n & UINT32_C(0x000000FF)) == 0) { 511 n >>= 8; 512 i += 8; 513 } 514 515 if ((n & UINT32_C(0x0000000F)) == 0) { 516 n >>= 4; 517 i += 4; 518 } 519 520 if ((n & UINT32_C(0x00000003)) == 0) { 521 n >>= 2; 522 i += 2; 523 } 524 525 if ((n & UINT32_C(0x00000001)) == 0) 526 ++i; 527 528 return i; 529 #endif 530 } 531 532 #define bsf32 ctz32 533 534 #endif 535