xxhash.cpp - OpenGrok cross reference for /freebsd/contrib/llvm-project/llvm/lib/Support/xxhash.cpp

Lines Matching +full:1 +full:x64 +full:- +full:bit
2  *  xxHash - Extremely Fast Hash algorithm
3  *  Copyright (C) 2012-2023, Yann Collet
5  *  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
31  *  - xxHash homepage: http://www.xxhash.com
32  *  - xxHash source repository : https://github.com/Cyan4973/xxHash
53 #define LLVM_XXH_USE_NEON 1
67   return (X << R) | (X >> (64 - R));  in rotl64()
111     const unsigned char *const Limit = BEnd - 32;  in xxHash64()
115     uint64_t V4 = Seed - PRIME64_1;  in xxHash64()
128     H64 = rotl64(V1, 1) + rotl64(V2, 7) + rotl64(V3, 12) + rotl64(V4, 18);  in xxHash64()
171 // clang-format off
178     0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
180     0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
186 // clang-format on
191 // Calculates a 64-bit to 128-bit multiply, then XOR folds it.
228   const uint8_t c2 = input[len >> 1];  in XXH3_len_1to3_64b()
229   const uint8_t c3 = input[len - 1];  in XXH3_len_1to3_64b()
242   const uint32_t input2 = endian::read32le(input + len - 4);  in XXH3_len_4to8_64b()
244       (endian::read64le(secret + 8) ^ endian::read64le(secret + 16)) - seed;  in XXH3_len_4to8_64b()
260       (endian::read64le(secret + 40) ^ endian::read64le(secret + 48)) - seed;  in XXH3_len_9to16_64b()
262   input_hi ^= endian::read64le(input + len - 8);  in XXH3_len_9to16_64b()
284   uint64_t rhs = 0U - seed;  in XXH3_mix16B()
292 /* For mid range keys, XXH3 uses a Mum-hash variant. */
299   acc_end = XXH3_mix16B(input + len - 16, secret + 16, seed);  in XXH3_len_17to128_64b()
302     acc_end += XXH3_mix16B(input + len - 32, secret + 48, seed);  in XXH3_len_17to128_64b()
305       acc_end += XXH3_mix16B(input + len - 48, secret + 80, seed);  in XXH3_len_17to128_64b()
308         acc_end += XXH3_mix16B(input + len - 64, secret + 112, seed);  in XXH3_len_17to128_64b()
330                        secret + 16 * (i - 8) + XXH3_MIDSIZE_STARTOFFSET, seed);  in XXH3_len_129to240_64b()
334       XXH3_mix16B(input + len - 16,  in XXH3_len_129to240_64b()
335                   secret + XXH3_SECRETSIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);  in XXH3_len_129to240_64b()
346 // - workaround for suboptimal codegen on older GCC
347 // - compiler barriers against instruction reordering
348 // - WebAssembly SIMD support
349 // - configurable split between NEON and scalar lanes (benchmarking shows no
375     uint64x2_t data_vec_2 = XXH_vld1q_u64(input + ((i + 1) * 16));  in XXH3_accumulate_512_neon()
379     uint64x2_t key_vec_2 = XXH_vld1q_u64(secret + ((i + 1) * 16));  in XXH3_accumulate_512_neon()
382     uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);  in XXH3_accumulate_512_neon()
383     uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);  in XXH3_accumulate_512_neon()
391      * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to  in XXH3_accumulate_512_neon()
395      * The intrinsic returns a double vector because the original ARMv7-a  in XXH3_accumulate_512_neon()
399      *  [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]  in XXH3_accumulate_512_neon()
400      *  [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]  in XXH3_accumulate_512_neon()
408     uint32x4_t data_key_hi = unzipped.val[1];  in XXH3_accumulate_512_neon()
425     xacc[i + 1] = vaddq_u64(xacc[i + 1], sum_2);  in XXH3_accumulate_512_neon()
483     acc[i ^ 1] += data_val;  in XXH3_accumulate_512_scalar()
509                             acc[1] ^ endian::read64le(secret + 8));  in XXH3_mix2Accs()
524       (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;  in XXH3_hashLong_64b()
526   const size_t nb_blocks = (len - 1) / block_len;  in XXH3_hashLong_64b()
533     XXH3_scrambleAcc(acc, secret + secretSize - XXH_STRIPE_LEN);  in XXH3_hashLong_64b()
537   const size_t nbStripes = (len - 1 - (block_len * nb_blocks)) / XXH_STRIPE_LEN;  in XXH3_hashLong_64b()
543   XXH3_accumulate_512(acc, input + len - XXH_STRIPE_LEN,  in XXH3_hashLong_64b()
544                       secret + secretSize - XXH_STRIPE_LEN -  in XXH3_hashLong_64b()
568  * XXH3's 128-bit variant has better mixing and strength than the 64-bit
571  * For example, extra steps are taken to avoid the seed-dependent collisions
572  * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
575  * lengths. Note that longer hashes are about as fast as the 64-bit version
576  * due to it using only a slight modification of the 64-bit loop.
578  * XXH128 is also more oriented towards 64-bit machines. It is still extremely
579  * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
585  * @brief 32-bit rotate left.
587  * @param x The 32-bit integer to be rotated.
605 #define XXH_rotl32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
606 #define XXH_rotl64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
612  * @brief Calculates a 64->128-bit long multiply.
617  * @param lhs , rhs The 64-bit integers to be multiplied
618  * @return The 128-bit result represented in an @ref XXH128_hash_t.
624    * On most 64-bit targets, GCC and Clang define a __uint128_t type.  in XXH_mult64to128()
625    * This is usually the best way as it usually uses a native long 64-bit  in XXH_mult64to128()
630    * Despite being a 32-bit platform, Clang (and emscripten) define this type  in XXH_mult64to128()
632    * compiler builtin call which calculates a full 128-bit multiply.  in XXH_mult64to128()
634    * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677  in XXH_mult64to128()
647    * MSVC for x64's _umul128 method.  in XXH_mult64to128()
652    * This compiles to single operand MUL on x64.  in XXH_mult64to128()
683    * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.  in XXH_mult64to128()
690    *     ----------  in XXH_mult64to128()
691    *           1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15  in XXH_mult64to128()
693    *         2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21  in XXH_mult64to128()
695    *     ---------  in XXH_mult64to128()
698    *     ---------  in XXH_mult64to128()
702    *  1. It avoids manual carry tracking. Just like how  in XXH_mult64to128()
708    *     in 32-bit ARMv6 and later, which is shown below:  in XXH_mult64to128()
719    *     comparable to some 64-bit ALUs.  in XXH_mult64to128()
722    *     of 32-bit ADD/ADCs.  in XXH_mult64to128()
752   /* A doubled version of 1to3_64b with different constants. */  in XXH3_len_1to3_128b()
754    * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }  in XXH3_len_1to3_128b()
755    * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }  in XXH3_len_1to3_128b()
756    * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }  in XXH3_len_1to3_128b()
759   uint8_t const c2 = input[len >> 1];  in XXH3_len_1to3_128b()
760   uint8_t const c3 = input[len - 1];  in XXH3_len_1to3_128b()
767       (endian::read32le(secret + 8) ^ endian::read32le(secret + 12)) - seed;  in XXH3_len_1to3_128b()
781   uint32_t const input_hi = endian::read32le(input + len - 4);  in XXH3_len_4to8_128b()
791   m128.high64 += (m128.low64 << 1);  in XXH3_len_4to8_128b()
805       (endian::read64le(secret + 32) ^ endian::read64le(secret + 40)) - seed;  in XXH3_len_9to16_128b()
809   uint64_t input_hi = endian::read64le(input + len - 8);  in XXH3_len_9to16_128b()
814    * both the low and high bits in the 128x64 multiply below.  in XXH3_len_9to16_128b()
816   m128.low64 += (uint64_t)(len - 1) << 54;  in XXH3_len_9to16_128b()
823    * The best approach to this operation is different on 32-bit and 64-bit.  in XXH3_len_9to16_128b()
825   if (sizeof(void *) < sizeof(uint64_t)) { /* 32-bit */  in XXH3_len_9to16_128b()
827      * 32-bit optimized version, which is more readable.  in XXH3_len_9to16_128b()
829      * On 32-bit, it removes an ADC and delays a dependency between the two  in XXH3_len_9to16_128b()
830      * halves of m128.high64, but it generates an extra mask on 64-bit.  in XXH3_len_9to16_128b()
836      * 64-bit optimized (albeit more confusing) version.  in XXH3_len_9to16_128b()
846      * Inverse Property: x + y - x == y  in XXH3_len_9to16_128b()
847      *    a + (b * (1 + c - 1))  in XXH3_len_9to16_128b()
849      *    a + (b * 1) + (b * (c - 1))  in XXH3_len_9to16_128b()
850      * Identity Property: x * 1 == x  in XXH3_len_9to16_128b()
851      *    a + b + (b * (c - 1))  in XXH3_len_9to16_128b()
855      * - 1))  in XXH3_len_9to16_128b()
858      *    input_hi + ((uint64_t)input_hi.lo * (PRIME32_2 - 1))  in XXH3_len_9to16_128b()
860     m128.high64 += input_hi + XXH_mult32to64((uint32_t)input_hi, PRIME32_2 - 1);  in XXH3_len_9to16_128b()
865   /* 128x64 multiply: h128 = m128 * PRIME64_2; */  in XXH3_len_9to16_128b()
897  * A bit slower than XXH3_mix16B, but handles multiply by zero better.
922             XXH128_mix32B(acc, input + 48, input + len - 64, secret + 96, seed);  in XXH3_len_17to128_128b()
924       acc = XXH128_mix32B(acc, input + 32, input + len - 48, secret + 64, seed);  in XXH3_len_17to128_128b()
926     acc = XXH128_mix32B(acc, input + 16, input + len - 32, secret + 32, seed);  in XXH3_len_17to128_128b()
928   acc = XXH128_mix32B(acc, input, input + len - 16, secret, seed);  in XXH3_len_17to128_128b()
932                 ((len - seed) * PRIME64_2);  in XXH3_len_17to128_128b()
934   h128.high64 = (uint64_t)0 - XXH3_avalanche(h128.high64);  in XXH3_len_17to128_128b()
954     acc = XXH128_mix32B(acc, input + i - 32, input + i - 16, secret + i - 32,  in XXH3_len_129to240_128b()
960    * NB: `i <= len` will duplicate the last 32-bytes if  in XXH3_len_129to240_128b()
965     acc = XXH128_mix32B(acc, input + i - 32, input + i - 16,  in XXH3_len_129to240_128b()
966                         secret + XXH3_MIDSIZE_STARTOFFSET + i - 160, seed);  in XXH3_len_129to240_128b()
970       XXH128_mix32B(acc, input + len - 16, input + len - 32,  in XXH3_len_129to240_128b()
971                     secret + XXH3_SECRETSIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,  in XXH3_len_129to240_128b()
972                     (uint64_t)0 - seed);  in XXH3_len_129to240_128b()
977                 ((len - seed) * PRIME64_2);  in XXH3_len_129to240_128b()
979   h128.high64 = (uint64_t)0 - XXH3_avalanche(h128.high64);  in XXH3_len_129to240_128b()
987       (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;  in XXH3_hashLong_128b()
989   const size_t nb_blocks = (len - 1) / block_len;  in XXH3_hashLong_128b()
997     XXH3_scrambleAcc(acc, secret + secretSize - XXH_STRIPE_LEN);  in XXH3_hashLong_128b()
1001   const size_t nbStripes = (len - 1 - (block_len * nb_blocks)) / XXH_STRIPE_LEN;  in XXH3_hashLong_128b()
1007   XXH3_accumulate_512(acc, input + len - XXH_STRIPE_LEN,  in XXH3_hashLong_128b()
1008                       secret + secretSize - XXH_STRIPE_LEN -  in XXH3_hashLong_128b()
1018       acc, secret + secretSize - sizeof(acc) - XXH_SECRET_MERGEACCS_START,  in XXH3_hashLong_128b()
1030    * For now, it's a contract pre-condition.  in xxh3_128bits()
In current file

In project "undefined"

On Google