Lines Matching +full:pre +full:- +full:configurable

2  *  xxHash - Extremely Fast Hash algorithm
3 * Copyright (C) 2012-2023, Yann Collet
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
31 * - xxHash homepage: http://www.xxhash.com
32 * - xxHash source repository : https://github.com/Cyan4973/xxHash
67 return (X << R) | (X >> (64 - R)); in rotl64()
111 const unsigned char *const Limit = BEnd - 32; in xxHash64()
115 uint64_t V4 = Seed - PRIME64_1; in xxHash64()
171 // clang-format off
186 // clang-format on
191 // Calculates a 64-bit to 128-bit multiply, then XOR folds it.
229 const uint8_t c3 = input[len - 1]; in XXH3_len_1to3_64b()
242 const uint32_t input2 = endian::read32le(input + len - 4); in XXH3_len_4to8_64b()
244 (endian::read64le(secret + 8) ^ endian::read64le(secret + 16)) - seed; in XXH3_len_4to8_64b()
260 (endian::read64le(secret + 40) ^ endian::read64le(secret + 48)) - seed; in XXH3_len_9to16_64b()
262 input_hi ^= endian::read64le(input + len - 8); in XXH3_len_9to16_64b()
284 uint64_t rhs = 0U - seed; in XXH3_mix16B()
292 /* For mid range keys, XXH3 uses a Mum-hash variant. */
299 acc_end = XXH3_mix16B(input + len - 16, secret + 16, seed); in XXH3_len_17to128_64b()
302 acc_end += XXH3_mix16B(input + len - 32, secret + 48, seed); in XXH3_len_17to128_64b()
305 acc_end += XXH3_mix16B(input + len - 48, secret + 80, seed); in XXH3_len_17to128_64b()
308 acc_end += XXH3_mix16B(input + len - 64, secret + 112, seed); in XXH3_len_17to128_64b()
330 secret + 16 * (i - 8) + XXH3_MIDSIZE_STARTOFFSET, seed); in XXH3_len_129to240_64b()
334 XXH3_mix16B(input + len - 16, in XXH3_len_129to240_64b()
335 secret + XXH3_SECRETSIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed); in XXH3_len_129to240_64b()
346 // - workaround for suboptimal codegen on older GCC
347 // - compiler barriers against instruction reordering
348 // - WebAssembly SIMD support
349 // - configurable split between NEON and scalar lanes (benchmarking shows no
391 * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to in XXH3_accumulate_512_neon()
395 * The intrinsic returns a double vector because the original ARMv7-a in XXH3_accumulate_512_neon()
399 * [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ] in XXH3_accumulate_512_neon()
400 * [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ] in XXH3_accumulate_512_neon()
524 (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE; in XXH3_hashLong_64b()
526 const size_t nb_blocks = (len - 1) / block_len; in XXH3_hashLong_64b()
533 XXH3_scrambleAcc(acc, secret + secretSize - XXH_STRIPE_LEN); in XXH3_hashLong_64b()
537 const size_t nbStripes = (len - 1 - (block_len * nb_blocks)) / XXH_STRIPE_LEN; in XXH3_hashLong_64b()
543 XXH3_accumulate_512(acc, input + len - XXH_STRIPE_LEN, in XXH3_hashLong_64b()
544 secret + secretSize - XXH_STRIPE_LEN - in XXH3_hashLong_64b()
568 * XXH3's 128-bit variant has better mixing and strength than the 64-bit
571 * For example, extra steps are taken to avoid the seed-dependent collisions
572 * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
575 * lengths. Note that longer hashes are about as fast as the 64-bit version
576 * due to it using only a slight modification of the 64-bit loop.
578 * XXH128 is also more oriented towards 64-bit machines. It is still extremely
579 * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
585 * @brief 32-bit rotate left.
587 * @param x The 32-bit integer to be rotated.
589 * @pre
605 #define XXH_rotl32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
606 #define XXH_rotl64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
612 * @brief Calculates a 64->128-bit long multiply.
617 * @param lhs , rhs The 64-bit integers to be multiplied
618 * @return The 128-bit result represented in an @ref XXH128_hash_t.
624 * On most 64-bit targets, GCC and Clang define a __uint128_t type. in XXH_mult64to128()
625 * This is usually the best way as it usually uses a native long 64-bit in XXH_mult64to128()
630 * Despite being a 32-bit platform, Clang (and emscripten) define this type in XXH_mult64to128()
632 * compiler builtin call which calculates a full 128-bit multiply. in XXH_mult64to128()
634 * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677 in XXH_mult64to128()
683 * Portable scalar method. Optimized for 32-bit and 64-bit ALUs. in XXH_mult64to128()
690 * ---------- in XXH_mult64to128()
695 * --------- in XXH_mult64to128()
698 * --------- in XXH_mult64to128()
708 * in 32-bit ARMv6 and later, which is shown below: in XXH_mult64to128()
719 * comparable to some 64-bit ALUs. in XXH_mult64to128()
722 * of 32-bit ADD/ADCs. in XXH_mult64to128()
760 uint8_t const c3 = input[len - 1]; in XXH3_len_1to3_128b()
767 (endian::read32le(secret + 8) ^ endian::read32le(secret + 12)) - seed; in XXH3_len_1to3_128b()
781 uint32_t const input_hi = endian::read32le(input + len - 4); in XXH3_len_4to8_128b()
805 (endian::read64le(secret + 32) ^ endian::read64le(secret + 40)) - seed; in XXH3_len_9to16_128b()
809 uint64_t input_hi = endian::read64le(input + len - 8); in XXH3_len_9to16_128b()
816 m128.low64 += (uint64_t)(len - 1) << 54; in XXH3_len_9to16_128b()
823 * The best approach to this operation is different on 32-bit and 64-bit. in XXH3_len_9to16_128b()
825 if (sizeof(void *) < sizeof(uint64_t)) { /* 32-bit */ in XXH3_len_9to16_128b()
827 * 32-bit optimized version, which is more readable. in XXH3_len_9to16_128b()
829 * On 32-bit, it removes an ADC and delays a dependency between the two in XXH3_len_9to16_128b()
830 * halves of m128.high64, but it generates an extra mask on 64-bit. in XXH3_len_9to16_128b()
836 * 64-bit optimized (albeit more confusing) version. in XXH3_len_9to16_128b()
846 * Inverse Property: x + y - x == y in XXH3_len_9to16_128b()
847 * a + (b * (1 + c - 1)) in XXH3_len_9to16_128b()
849 * a + (b * 1) + (b * (c - 1)) in XXH3_len_9to16_128b()
851 * a + b + (b * (c - 1)) in XXH3_len_9to16_128b()
855 * - 1)) in XXH3_len_9to16_128b()
858 * input_hi + ((uint64_t)input_hi.lo * (PRIME32_2 - 1)) in XXH3_len_9to16_128b()
860 m128.high64 += input_hi + XXH_mult32to64((uint32_t)input_hi, PRIME32_2 - 1); in XXH3_len_9to16_128b()
922 XXH128_mix32B(acc, input + 48, input + len - 64, secret + 96, seed); in XXH3_len_17to128_128b()
924 acc = XXH128_mix32B(acc, input + 32, input + len - 48, secret + 64, seed); in XXH3_len_17to128_128b()
926 acc = XXH128_mix32B(acc, input + 16, input + len - 32, secret + 32, seed); in XXH3_len_17to128_128b()
928 acc = XXH128_mix32B(acc, input, input + len - 16, secret, seed); in XXH3_len_17to128_128b()
932 ((len - seed) * PRIME64_2); in XXH3_len_17to128_128b()
934 h128.high64 = (uint64_t)0 - XXH3_avalanche(h128.high64); in XXH3_len_17to128_128b()
954 acc = XXH128_mix32B(acc, input + i - 32, input + i - 16, secret + i - 32, in XXH3_len_129to240_128b()
960 * NB: `i <= len` will duplicate the last 32-bytes if in XXH3_len_129to240_128b()
965 acc = XXH128_mix32B(acc, input + i - 32, input + i - 16, in XXH3_len_129to240_128b()
966 secret + XXH3_MIDSIZE_STARTOFFSET + i - 160, seed); in XXH3_len_129to240_128b()
970 XXH128_mix32B(acc, input + len - 16, input + len - 32, in XXH3_len_129to240_128b()
971 secret + XXH3_SECRETSIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16, in XXH3_len_129to240_128b()
972 (uint64_t)0 - seed); in XXH3_len_129to240_128b()
977 ((len - seed) * PRIME64_2); in XXH3_len_129to240_128b()
979 h128.high64 = (uint64_t)0 - XXH3_avalanche(h128.high64); in XXH3_len_129to240_128b()
987 (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE; in XXH3_hashLong_128b()
989 const size_t nb_blocks = (len - 1) / block_len; in XXH3_hashLong_128b()
997 XXH3_scrambleAcc(acc, secret + secretSize - XXH_STRIPE_LEN); in XXH3_hashLong_128b()
1001 const size_t nbStripes = (len - 1 - (block_len * nb_blocks)) / XXH_STRIPE_LEN; in XXH3_hashLong_128b()
1007 XXH3_accumulate_512(acc, input + len - XXH_STRIPE_LEN, in XXH3_hashLong_128b()
1008 secret + secretSize - XXH_STRIPE_LEN - in XXH3_hashLong_128b()
1018 acc, secret + secretSize - sizeof(acc) - XXH_SECRET_MERGEACCS_START, in XXH3_hashLong_128b()
1030 * For now, it's a contract pre-condition. in xxh3_128bits()