1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file tuklib_integer.h 4 /// \brief Various integer and bit operations 5 /// 6 /// This file provides macros or functions to do some basic integer and bit 7 /// operations. 8 /// 9 /// Native endian inline functions (XX = 16, 32, or 64): 10 /// - Unaligned native endian reads: readXXne(ptr) 11 /// - Unaligned native endian writes: writeXXne(ptr, num) 12 /// - Aligned native endian reads: aligned_readXXne(ptr) 13 /// - Aligned native endian writes: aligned_writeXXne(ptr, num) 14 /// 15 /// Endianness-converting integer operations (these can be macros!) 16 /// (XX = 16, 32, or 64; Y = b or l): 17 /// - Byte swapping: bswapXX(num) 18 /// - Byte order conversions to/from native (byteswaps if Y isn't 19 /// the native endianness): convXXYe(num) 20 /// - Unaligned reads: readXXYe(ptr) 21 /// - Unaligned writes: writeXXYe(ptr, num) 22 /// - Aligned reads: aligned_readXXYe(ptr) 23 /// - Aligned writes: aligned_writeXXYe(ptr, num) 24 /// 25 /// Since the above can macros, the arguments should have no side effects 26 /// because they may be evaluated more than once. 27 /// 28 /// Bit scan operations for non-zero 32-bit integers (inline functions): 29 /// - Bit scan reverse (find highest non-zero bit): bsr32(num) 30 /// - Count leading zeros: clz32(num) 31 /// - Count trailing zeros: ctz32(num) 32 /// - Bit scan forward (simply an alias for ctz32()): bsf32(num) 33 /// 34 /// The above bit scan operations return 0-31. If num is zero, 35 /// the result is undefined. 36 // 37 // Authors: Lasse Collin 38 // Joachim Henke 39 // 40 // This file has been put into the public domain. 41 // You can do whatever you want with this file. 42 // 43 /////////////////////////////////////////////////////////////////////////////// 44 45 #ifndef TUKLIB_INTEGER_H 46 #define TUKLIB_INTEGER_H 47 48 #include "tuklib_common.h" 49 #include <string.h> 50 51 // Newer Intel C compilers require immintrin.h for _bit_scan_reverse() 52 // and such functions. 53 #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500) 54 # include <immintrin.h> 55 // Only include <intrin.h> when it is needed. GCC and Clang can both 56 // use __builtin's, so we only need Windows instrincs when using MSVC. 57 // GCC and Clang can set _MSC_VER on Windows, so we need to exclude these 58 // cases explicitly. 59 #elif defined(_MSC_VER) && !TUKLIB_GNUC_REQ(3, 4) && !defined(__clang__) 60 # include <intrin.h> 61 #endif 62 63 64 /////////////////// 65 // Byte swapping // 66 /////////////////// 67 68 #if defined(HAVE___BUILTIN_BSWAPXX) 69 // GCC >= 4.8 and Clang 70 # define bswap16(n) __builtin_bswap16(n) 71 # define bswap32(n) __builtin_bswap32(n) 72 # define bswap64(n) __builtin_bswap64(n) 73 74 #elif defined(HAVE_BYTESWAP_H) 75 // glibc, uClibc, dietlibc 76 # include <byteswap.h> 77 # ifdef HAVE_BSWAP_16 78 # define bswap16(num) bswap_16(num) 79 # endif 80 # ifdef HAVE_BSWAP_32 81 # define bswap32(num) bswap_32(num) 82 # endif 83 # ifdef HAVE_BSWAP_64 84 # define bswap64(num) bswap_64(num) 85 # endif 86 87 #elif defined(HAVE_SYS_ENDIAN_H) 88 // *BSDs and Darwin 89 # include <sys/endian.h> 90 91 #elif defined(HAVE_SYS_BYTEORDER_H) 92 // Solaris 93 # include <sys/byteorder.h> 94 # ifdef BSWAP_16 95 # define bswap16(num) BSWAP_16(num) 96 # endif 97 # ifdef BSWAP_32 98 # define bswap32(num) BSWAP_32(num) 99 # endif 100 # ifdef BSWAP_64 101 # define bswap64(num) BSWAP_64(num) 102 # endif 103 # ifdef BE_16 104 # define conv16be(num) BE_16(num) 105 # endif 106 # ifdef BE_32 107 # define conv32be(num) BE_32(num) 108 # endif 109 # ifdef BE_64 110 # define conv64be(num) BE_64(num) 111 # endif 112 # ifdef LE_16 113 # define conv16le(num) LE_16(num) 114 # endif 115 # ifdef LE_32 116 # define conv32le(num) LE_32(num) 117 # endif 118 # ifdef LE_64 119 # define conv64le(num) LE_64(num) 120 # endif 121 #endif 122 123 #ifndef bswap16 124 # define bswap16(n) (uint16_t)( \ 125 (((n) & 0x00FFU) << 8) \ 126 | (((n) & 0xFF00U) >> 8) \ 127 ) 128 #endif 129 130 #ifndef bswap32 131 # define bswap32(n) (uint32_t)( \ 132 (((n) & UINT32_C(0x000000FF)) << 24) \ 133 | (((n) & UINT32_C(0x0000FF00)) << 8) \ 134 | (((n) & UINT32_C(0x00FF0000)) >> 8) \ 135 | (((n) & UINT32_C(0xFF000000)) >> 24) \ 136 ) 137 #endif 138 139 #ifndef bswap64 140 # define bswap64(n) (uint64_t)( \ 141 (((n) & UINT64_C(0x00000000000000FF)) << 56) \ 142 | (((n) & UINT64_C(0x000000000000FF00)) << 40) \ 143 | (((n) & UINT64_C(0x0000000000FF0000)) << 24) \ 144 | (((n) & UINT64_C(0x00000000FF000000)) << 8) \ 145 | (((n) & UINT64_C(0x000000FF00000000)) >> 8) \ 146 | (((n) & UINT64_C(0x0000FF0000000000)) >> 24) \ 147 | (((n) & UINT64_C(0x00FF000000000000)) >> 40) \ 148 | (((n) & UINT64_C(0xFF00000000000000)) >> 56) \ 149 ) 150 #endif 151 152 // Define conversion macros using the basic byte swapping macros. 153 #ifdef WORDS_BIGENDIAN 154 # ifndef conv16be 155 # define conv16be(num) ((uint16_t)(num)) 156 # endif 157 # ifndef conv32be 158 # define conv32be(num) ((uint32_t)(num)) 159 # endif 160 # ifndef conv64be 161 # define conv64be(num) ((uint64_t)(num)) 162 # endif 163 # ifndef conv16le 164 # define conv16le(num) bswap16(num) 165 # endif 166 # ifndef conv32le 167 # define conv32le(num) bswap32(num) 168 # endif 169 # ifndef conv64le 170 # define conv64le(num) bswap64(num) 171 # endif 172 #else 173 # ifndef conv16be 174 # define conv16be(num) bswap16(num) 175 # endif 176 # ifndef conv32be 177 # define conv32be(num) bswap32(num) 178 # endif 179 # ifndef conv64be 180 # define conv64be(num) bswap64(num) 181 # endif 182 # ifndef conv16le 183 # define conv16le(num) ((uint16_t)(num)) 184 # endif 185 # ifndef conv32le 186 # define conv32le(num) ((uint32_t)(num)) 187 # endif 188 # ifndef conv64le 189 # define conv64le(num) ((uint64_t)(num)) 190 # endif 191 #endif 192 193 194 //////////////////////////////// 195 // Unaligned reads and writes // 196 //////////////////////////////// 197 198 // The traditional way of casting e.g. *(const uint16_t *)uint8_pointer 199 // is bad even if the uint8_pointer is properly aligned because this kind 200 // of casts break strict aliasing rules and result in undefined behavior. 201 // With unaligned pointers it's even worse: compilers may emit vector 202 // instructions that require aligned pointers even if non-vector 203 // instructions work with unaligned pointers. 204 // 205 // Using memcpy() is the standard compliant way to do unaligned access. 206 // Many modern compilers inline it so there is no function call overhead. 207 // For those compilers that don't handle the memcpy() method well, the 208 // old casting method (that violates strict aliasing) can be requested at 209 // build time. A third method, casting to a packed struct, would also be 210 // an option but isn't provided to keep things simpler (it's already a mess). 211 // Hopefully this is flexible enough in practice. 212 213 static inline uint16_t 214 read16ne(const uint8_t *buf) 215 { 216 #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 217 && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 218 return *(const uint16_t *)buf; 219 #else 220 uint16_t num; 221 memcpy(&num, buf, sizeof(num)); 222 return num; 223 #endif 224 } 225 226 227 static inline uint32_t 228 read32ne(const uint8_t *buf) 229 { 230 #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 231 && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 232 return *(const uint32_t *)buf; 233 #else 234 uint32_t num; 235 memcpy(&num, buf, sizeof(num)); 236 return num; 237 #endif 238 } 239 240 241 static inline uint64_t 242 read64ne(const uint8_t *buf) 243 { 244 #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 245 && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 246 return *(const uint64_t *)buf; 247 #else 248 uint64_t num; 249 memcpy(&num, buf, sizeof(num)); 250 return num; 251 #endif 252 } 253 254 255 static inline void 256 write16ne(uint8_t *buf, uint16_t num) 257 { 258 #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 259 && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 260 *(uint16_t *)buf = num; 261 #else 262 memcpy(buf, &num, sizeof(num)); 263 #endif 264 return; 265 } 266 267 268 static inline void 269 write32ne(uint8_t *buf, uint32_t num) 270 { 271 #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 272 && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 273 *(uint32_t *)buf = num; 274 #else 275 memcpy(buf, &num, sizeof(num)); 276 #endif 277 return; 278 } 279 280 281 static inline void 282 write64ne(uint8_t *buf, uint64_t num) 283 { 284 #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \ 285 && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) 286 *(uint64_t *)buf = num; 287 #else 288 memcpy(buf, &num, sizeof(num)); 289 #endif 290 return; 291 } 292 293 294 static inline uint16_t 295 read16be(const uint8_t *buf) 296 { 297 #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 298 uint16_t num = read16ne(buf); 299 return conv16be(num); 300 #else 301 uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1]; 302 return num; 303 #endif 304 } 305 306 307 static inline uint16_t 308 read16le(const uint8_t *buf) 309 { 310 #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 311 uint16_t num = read16ne(buf); 312 return conv16le(num); 313 #else 314 uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8); 315 return num; 316 #endif 317 } 318 319 320 static inline uint32_t 321 read32be(const uint8_t *buf) 322 { 323 #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 324 uint32_t num = read32ne(buf); 325 return conv32be(num); 326 #else 327 uint32_t num = (uint32_t)buf[0] << 24; 328 num |= (uint32_t)buf[1] << 16; 329 num |= (uint32_t)buf[2] << 8; 330 num |= (uint32_t)buf[3]; 331 return num; 332 #endif 333 } 334 335 336 static inline uint32_t 337 read32le(const uint8_t *buf) 338 { 339 #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 340 uint32_t num = read32ne(buf); 341 return conv32le(num); 342 #else 343 uint32_t num = (uint32_t)buf[0]; 344 num |= (uint32_t)buf[1] << 8; 345 num |= (uint32_t)buf[2] << 16; 346 num |= (uint32_t)buf[3] << 24; 347 return num; 348 #endif 349 } 350 351 352 static inline uint64_t 353 read64be(const uint8_t *buf) 354 { 355 #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 356 uint64_t num = read64ne(buf); 357 return conv64be(num); 358 #else 359 uint64_t num = (uint64_t)buf[0] << 56; 360 num |= (uint64_t)buf[1] << 48; 361 num |= (uint64_t)buf[2] << 40; 362 num |= (uint64_t)buf[3] << 32; 363 num |= (uint64_t)buf[4] << 24; 364 num |= (uint64_t)buf[5] << 16; 365 num |= (uint64_t)buf[6] << 8; 366 num |= (uint64_t)buf[7]; 367 return num; 368 #endif 369 } 370 371 372 static inline uint64_t 373 read64le(const uint8_t *buf) 374 { 375 #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 376 uint64_t num = read64ne(buf); 377 return conv64le(num); 378 #else 379 uint64_t num = (uint64_t)buf[0]; 380 num |= (uint64_t)buf[1] << 8; 381 num |= (uint64_t)buf[2] << 16; 382 num |= (uint64_t)buf[3] << 24; 383 num |= (uint64_t)buf[4] << 32; 384 num |= (uint64_t)buf[5] << 40; 385 num |= (uint64_t)buf[6] << 48; 386 num |= (uint64_t)buf[7] << 56; 387 return num; 388 #endif 389 } 390 391 392 // NOTE: Possible byte swapping must be done in a macro to allow the compiler 393 // to optimize byte swapping of constants when using glibc's or *BSD's 394 // byte swapping macros. The actual write is done in an inline function 395 // to make type checking of the buf pointer possible. 396 #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 397 # define write16be(buf, num) write16ne(buf, conv16be(num)) 398 # define write32be(buf, num) write32ne(buf, conv32be(num)) 399 # define write64be(buf, num) write64ne(buf, conv64be(num)) 400 #endif 401 402 #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS) 403 # define write16le(buf, num) write16ne(buf, conv16le(num)) 404 # define write32le(buf, num) write32ne(buf, conv32le(num)) 405 # define write64le(buf, num) write64ne(buf, conv64le(num)) 406 #endif 407 408 409 #ifndef write16be 410 static inline void 411 write16be(uint8_t *buf, uint16_t num) 412 { 413 buf[0] = (uint8_t)(num >> 8); 414 buf[1] = (uint8_t)num; 415 return; 416 } 417 #endif 418 419 420 #ifndef write16le 421 static inline void 422 write16le(uint8_t *buf, uint16_t num) 423 { 424 buf[0] = (uint8_t)num; 425 buf[1] = (uint8_t)(num >> 8); 426 return; 427 } 428 #endif 429 430 431 #ifndef write32be 432 static inline void 433 write32be(uint8_t *buf, uint32_t num) 434 { 435 buf[0] = (uint8_t)(num >> 24); 436 buf[1] = (uint8_t)(num >> 16); 437 buf[2] = (uint8_t)(num >> 8); 438 buf[3] = (uint8_t)num; 439 return; 440 } 441 #endif 442 443 444 #ifndef write32le 445 static inline void 446 write32le(uint8_t *buf, uint32_t num) 447 { 448 buf[0] = (uint8_t)num; 449 buf[1] = (uint8_t)(num >> 8); 450 buf[2] = (uint8_t)(num >> 16); 451 buf[3] = (uint8_t)(num >> 24); 452 return; 453 } 454 #endif 455 456 457 ////////////////////////////// 458 // Aligned reads and writes // 459 ////////////////////////////// 460 461 // Separate functions for aligned reads and writes are provided since on 462 // strict-align archs aligned access is much faster than unaligned access. 463 // 464 // Just like in the unaligned case, memcpy() is needed to avoid 465 // strict aliasing violations. However, on archs that don't support 466 // unaligned access the compiler cannot know that the pointers given 467 // to memcpy() are aligned which results in slow code. As of C11 there is 468 // no standard way to tell the compiler that we know that the address is 469 // aligned but some compilers have language extensions to do that. With 470 // such language extensions the memcpy() method gives excellent results. 471 // 472 // What to do on a strict-align system when no known language extentensions 473 // are available? Falling back to byte-by-byte access would be safe but ruin 474 // optimizations that have been made specifically with aligned access in mind. 475 // As a compromise, aligned reads will fall back to non-compliant type punning 476 // but aligned writes will be byte-by-byte, that is, fast reads are preferred 477 // over fast writes. This obviously isn't great but hopefully it's a working 478 // compromise for now. 479 // 480 // __builtin_assume_aligned is support by GCC >= 4.7 and clang >= 3.6. 481 #ifdef HAVE___BUILTIN_ASSUME_ALIGNED 482 # define tuklib_memcpy_aligned(dest, src, size) \ 483 memcpy(dest, __builtin_assume_aligned(src, size), size) 484 #else 485 # define tuklib_memcpy_aligned(dest, src, size) \ 486 memcpy(dest, src, size) 487 # ifndef TUKLIB_FAST_UNALIGNED_ACCESS 488 # define TUKLIB_USE_UNSAFE_ALIGNED_READS 1 489 # endif 490 #endif 491 492 493 static inline uint16_t 494 aligned_read16ne(const uint8_t *buf) 495 { 496 #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ 497 || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) 498 return *(const uint16_t *)buf; 499 #else 500 uint16_t num; 501 tuklib_memcpy_aligned(&num, buf, sizeof(num)); 502 return num; 503 #endif 504 } 505 506 507 static inline uint32_t 508 aligned_read32ne(const uint8_t *buf) 509 { 510 #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ 511 || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) 512 return *(const uint32_t *)buf; 513 #else 514 uint32_t num; 515 tuklib_memcpy_aligned(&num, buf, sizeof(num)); 516 return num; 517 #endif 518 } 519 520 521 static inline uint64_t 522 aligned_read64ne(const uint8_t *buf) 523 { 524 #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \ 525 || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS) 526 return *(const uint64_t *)buf; 527 #else 528 uint64_t num; 529 tuklib_memcpy_aligned(&num, buf, sizeof(num)); 530 return num; 531 #endif 532 } 533 534 535 static inline void 536 aligned_write16ne(uint8_t *buf, uint16_t num) 537 { 538 #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING 539 *(uint16_t *)buf = num; 540 #else 541 tuklib_memcpy_aligned(buf, &num, sizeof(num)); 542 #endif 543 return; 544 } 545 546 547 static inline void 548 aligned_write32ne(uint8_t *buf, uint32_t num) 549 { 550 #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING 551 *(uint32_t *)buf = num; 552 #else 553 tuklib_memcpy_aligned(buf, &num, sizeof(num)); 554 #endif 555 return; 556 } 557 558 559 static inline void 560 aligned_write64ne(uint8_t *buf, uint64_t num) 561 { 562 #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING 563 *(uint64_t *)buf = num; 564 #else 565 tuklib_memcpy_aligned(buf, &num, sizeof(num)); 566 #endif 567 return; 568 } 569 570 571 static inline uint16_t 572 aligned_read16be(const uint8_t *buf) 573 { 574 uint16_t num = aligned_read16ne(buf); 575 return conv16be(num); 576 } 577 578 579 static inline uint16_t 580 aligned_read16le(const uint8_t *buf) 581 { 582 uint16_t num = aligned_read16ne(buf); 583 return conv16le(num); 584 } 585 586 587 static inline uint32_t 588 aligned_read32be(const uint8_t *buf) 589 { 590 uint32_t num = aligned_read32ne(buf); 591 return conv32be(num); 592 } 593 594 595 static inline uint32_t 596 aligned_read32le(const uint8_t *buf) 597 { 598 uint32_t num = aligned_read32ne(buf); 599 return conv32le(num); 600 } 601 602 603 static inline uint64_t 604 aligned_read64be(const uint8_t *buf) 605 { 606 uint64_t num = aligned_read64ne(buf); 607 return conv64be(num); 608 } 609 610 611 static inline uint64_t 612 aligned_read64le(const uint8_t *buf) 613 { 614 uint64_t num = aligned_read64ne(buf); 615 return conv64le(num); 616 } 617 618 619 // These need to be macros like in the unaligned case. 620 #define aligned_write16be(buf, num) aligned_write16ne((buf), conv16be(num)) 621 #define aligned_write16le(buf, num) aligned_write16ne((buf), conv16le(num)) 622 #define aligned_write32be(buf, num) aligned_write32ne((buf), conv32be(num)) 623 #define aligned_write32le(buf, num) aligned_write32ne((buf), conv32le(num)) 624 #define aligned_write64be(buf, num) aligned_write64ne((buf), conv64be(num)) 625 #define aligned_write64le(buf, num) aligned_write64ne((buf), conv64le(num)) 626 627 628 //////////////////// 629 // Bit operations // 630 //////////////////// 631 632 static inline uint32_t 633 bsr32(uint32_t n) 634 { 635 // Check for ICC first, since it tends to define __GNUC__ too. 636 #if defined(__INTEL_COMPILER) 637 return _bit_scan_reverse(n); 638 639 #elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX == UINT32_MAX 640 // GCC >= 3.4 has __builtin_clz(), which gives good results on 641 // multiple architectures. On x86, __builtin_clz() ^ 31U becomes 642 // either plain BSR (so the XOR gets optimized away) or LZCNT and 643 // XOR (if -march indicates that SSE4a instructions are supported). 644 return (uint32_t)__builtin_clz(n) ^ 31U; 645 646 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 647 uint32_t i; 648 __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n)); 649 return i; 650 651 #elif defined(_MSC_VER) 652 unsigned long i; 653 _BitScanReverse(&i, n); 654 return i; 655 656 #else 657 uint32_t i = 31; 658 659 if ((n & 0xFFFF0000) == 0) { 660 n <<= 16; 661 i = 15; 662 } 663 664 if ((n & 0xFF000000) == 0) { 665 n <<= 8; 666 i -= 8; 667 } 668 669 if ((n & 0xF0000000) == 0) { 670 n <<= 4; 671 i -= 4; 672 } 673 674 if ((n & 0xC0000000) == 0) { 675 n <<= 2; 676 i -= 2; 677 } 678 679 if ((n & 0x80000000) == 0) 680 --i; 681 682 return i; 683 #endif 684 } 685 686 687 static inline uint32_t 688 clz32(uint32_t n) 689 { 690 #if defined(__INTEL_COMPILER) 691 return _bit_scan_reverse(n) ^ 31U; 692 693 #elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX == UINT32_MAX 694 return (uint32_t)__builtin_clz(n); 695 696 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 697 uint32_t i; 698 __asm__("bsrl %1, %0\n\t" 699 "xorl $31, %0" 700 : "=r" (i) : "rm" (n)); 701 return i; 702 703 #elif defined(_MSC_VER) 704 unsigned long i; 705 _BitScanReverse(&i, n); 706 return i ^ 31U; 707 708 #else 709 uint32_t i = 0; 710 711 if ((n & 0xFFFF0000) == 0) { 712 n <<= 16; 713 i = 16; 714 } 715 716 if ((n & 0xFF000000) == 0) { 717 n <<= 8; 718 i += 8; 719 } 720 721 if ((n & 0xF0000000) == 0) { 722 n <<= 4; 723 i += 4; 724 } 725 726 if ((n & 0xC0000000) == 0) { 727 n <<= 2; 728 i += 2; 729 } 730 731 if ((n & 0x80000000) == 0) 732 ++i; 733 734 return i; 735 #endif 736 } 737 738 739 static inline uint32_t 740 ctz32(uint32_t n) 741 { 742 #if defined(__INTEL_COMPILER) 743 return _bit_scan_forward(n); 744 745 #elif (TUKLIB_GNUC_REQ(3, 4) || defined(__clang__)) && UINT_MAX >= UINT32_MAX 746 return (uint32_t)__builtin_ctz(n); 747 748 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) 749 uint32_t i; 750 __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n)); 751 return i; 752 753 #elif defined(_MSC_VER) 754 unsigned long i; 755 _BitScanForward(&i, n); 756 return i; 757 758 #else 759 uint32_t i = 0; 760 761 if ((n & 0x0000FFFF) == 0) { 762 n >>= 16; 763 i = 16; 764 } 765 766 if ((n & 0x000000FF) == 0) { 767 n >>= 8; 768 i += 8; 769 } 770 771 if ((n & 0x0000000F) == 0) { 772 n >>= 4; 773 i += 4; 774 } 775 776 if ((n & 0x00000003) == 0) { 777 n >>= 2; 778 i += 2; 779 } 780 781 if ((n & 0x00000001) == 0) 782 ++i; 783 784 return i; 785 #endif 786 } 787 788 #define bsf32 ctz32 789 790 #endif 791