Lines Matching +full:16 +full:- +full:input

2  *  xxHash - Extremely Fast Hash algorithm
3 * Copyright (C) 2012-2023, Yann Collet
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
31 * - xxHash homepage: http://www.xxhash.com
32 * - xxHash source repository : https://github.com/Cyan4973/xxHash
67 return (X << R) | (X >> (64 - R)); in rotl64()
80 static uint64_t round(uint64_t Acc, uint64_t Input) { in round() argument
81 Acc += Input * PRIME64_2; in round()
111 const unsigned char *const Limit = BEnd - 32; in xxHash64()
115 uint64_t V4 = Seed - PRIME64_1; in xxHash64()
171 // clang-format off
186 // clang-format on
191 // Calculates a 64-bit to 128-bit multiply, then XOR folds it.
225 static uint64_t XXH3_len_1to3_64b(const uint8_t *input, size_t len, in XXH3_len_1to3_64b() argument
227 const uint8_t c1 = input[0]; in XXH3_len_1to3_64b()
228 const uint8_t c2 = input[len >> 1]; in XXH3_len_1to3_64b()
229 const uint8_t c3 = input[len - 1]; in XXH3_len_1to3_64b()
230 uint32_t combined = ((uint32_t)c1 << 16) | ((uint32_t)c2 << 24) | in XXH3_len_1to3_64b()
238 static uint64_t XXH3_len_4to8_64b(const uint8_t *input, size_t len, in XXH3_len_4to8_64b() argument
241 const uint32_t input1 = endian::read32le(input); in XXH3_len_4to8_64b()
242 const uint32_t input2 = endian::read32le(input + len - 4); in XXH3_len_4to8_64b()
244 (endian::read64le(secret + 8) ^ endian::read64le(secret + 16)) - seed; in XXH3_len_4to8_64b()
255 static uint64_t XXH3_len_9to16_64b(const uint8_t *input, size_t len, in XXH3_len_9to16_64b() argument
260 (endian::read64le(secret + 40) ^ endian::read64le(secret + 48)) - seed; in XXH3_len_9to16_64b()
261 input_lo ^= endian::read64le(input); in XXH3_len_9to16_64b()
262 input_hi ^= endian::read64le(input + len - 8); in XXH3_len_9to16_64b()
269 static uint64_t XXH3_len_0to16_64b(const uint8_t *input, size_t len, in XXH3_len_0to16_64b() argument
272 return XXH3_len_9to16_64b(input, len, secret, seed); in XXH3_len_0to16_64b()
274 return XXH3_len_4to8_64b(input, len, secret, seed); in XXH3_len_0to16_64b()
276 return XXH3_len_1to3_64b(input, len, secret, seed); in XXH3_len_0to16_64b()
281 static uint64_t XXH3_mix16B(const uint8_t *input, uint8_t const *secret, in XXH3_mix16B() argument
284 uint64_t rhs = 0U - seed; in XXH3_mix16B()
287 lhs ^= endian::read64le(input); in XXH3_mix16B()
288 rhs ^= endian::read64le(input + 8); in XXH3_mix16B()
292 /* For mid range keys, XXH3 uses a Mum-hash variant. */
294 static uint64_t XXH3_len_17to128_64b(const uint8_t *input, size_t len, in XXH3_len_17to128_64b() argument
298 acc += XXH3_mix16B(input + 0, secret + 0, seed); in XXH3_len_17to128_64b()
299 acc_end = XXH3_mix16B(input + len - 16, secret + 16, seed); in XXH3_len_17to128_64b()
301 acc += XXH3_mix16B(input + 16, secret + 32, seed); in XXH3_len_17to128_64b()
302 acc_end += XXH3_mix16B(input + len - 32, secret + 48, seed); in XXH3_len_17to128_64b()
304 acc += XXH3_mix16B(input + 32, secret + 64, seed); in XXH3_len_17to128_64b()
305 acc_end += XXH3_mix16B(input + len - 48, secret + 80, seed); in XXH3_len_17to128_64b()
307 acc += XXH3_mix16B(input + 48, secret + 96, seed); in XXH3_len_17to128_64b()
308 acc_end += XXH3_mix16B(input + len - 64, secret + 112, seed); in XXH3_len_17to128_64b()
320 static uint64_t XXH3_len_129to240_64b(const uint8_t *input, size_t len, in XXH3_len_129to240_64b() argument
323 const unsigned nbRounds = len / 16; in XXH3_len_129to240_64b()
325 acc += XXH3_mix16B(input + 16 * i, secret + 16 * i, seed); in XXH3_len_129to240_64b()
329 acc += XXH3_mix16B(input + 16 * i, in XXH3_len_129to240_64b()
330 secret + 16 * (i - 8) + XXH3_MIDSIZE_STARTOFFSET, seed); in XXH3_len_129to240_64b()
334 XXH3_mix16B(input + len - 16, in XXH3_len_129to240_64b()
335 secret + XXH3_SECRETSIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed); in XXH3_len_129to240_64b()
346 // - workaround for suboptimal codegen on older GCC
347 // - compiler barriers against instruction reordering
348 // - WebAssembly SIMD support
349 // - configurable split between NEON and scalar lanes (benchmarking shows no
365 static void XXH3_accumulate_512_neon(uint64_t *acc, const uint8_t *input, in XXH3_accumulate_512_neon() argument
373 /* data_vec = input[i]; */ in XXH3_accumulate_512_neon()
374 uint64x2_t data_vec_1 = XXH_vld1q_u64(input + (i * 16)); in XXH3_accumulate_512_neon()
375 uint64x2_t data_vec_2 = XXH_vld1q_u64(input + ((i + 1) * 16)); in XXH3_accumulate_512_neon()
378 uint64x2_t key_vec_1 = XXH_vld1q_u64(secret + (i * 16)); in XXH3_accumulate_512_neon()
379 uint64x2_t key_vec_2 = XXH_vld1q_u64(secret + ((i + 1) * 16)); in XXH3_accumulate_512_neon()
391 * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to in XXH3_accumulate_512_neon()
395 * The intrinsic returns a double vector because the original ARMv7-a in XXH3_accumulate_512_neon()
399 * [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ] in XXH3_accumulate_512_neon()
400 * [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ] in XXH3_accumulate_512_neon()
446 uint64x2_t key_vec = XXH_vld1q_u64(secret + (i * 16)); in XXH3_scrambleAcc_neon()
478 static void XXH3_accumulate_512_scalar(uint64_t *acc, const uint8_t *input, in XXH3_accumulate_512_scalar() argument
481 uint64_t data_val = endian::read64le(input + 8 * i); in XXH3_accumulate_512_scalar()
499 static void XXH3_accumulate(uint64_t *acc, const uint8_t *input, in XXH3_accumulate() argument
502 XXH3_accumulate_512(acc, input + n * XXH_STRIPE_LEN, in XXH3_accumulate()
516 result64 += XXH3_mix2Accs(acc + 2 * i, key + 16 * i); in XXH3_mergeAccs()
521 static uint64_t XXH3_hashLong_64b(const uint8_t *input, size_t len, in XXH3_hashLong_64b() argument
524 (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE; in XXH3_hashLong_64b()
526 const size_t nb_blocks = (len - 1) / block_len; in XXH3_hashLong_64b()
527 alignas(16) uint64_t acc[XXH_ACC_NB] = { in XXH3_hashLong_64b()
532 XXH3_accumulate(acc, input + n * block_len, secret, nbStripesPerBlock); in XXH3_hashLong_64b()
533 XXH3_scrambleAcc(acc, secret + secretSize - XXH_STRIPE_LEN); in XXH3_hashLong_64b()
537 const size_t nbStripes = (len - 1 - (block_len * nb_blocks)) / XXH_STRIPE_LEN; in XXH3_hashLong_64b()
539 XXH3_accumulate(acc, input + nb_blocks * block_len, secret, nbStripes); in XXH3_hashLong_64b()
543 XXH3_accumulate_512(acc, input + len - XXH_STRIPE_LEN, in XXH3_hashLong_64b()
544 secret + secretSize - XXH_STRIPE_LEN - in XXH3_hashLong_64b()
556 if (len <= 16) in xxh3_64bits()
568 * XXH3's 128-bit variant has better mixing and strength than the 64-bit
571 * For example, extra steps are taken to avoid the seed-dependent collisions
572 * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
575 * lengths. Note that longer hashes are about as fast as the 64-bit version
576 * due to it using only a slight modification of the 64-bit loop.
578 * XXH128 is also more oriented towards 64-bit machines. It is still extremely
579 * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
585 * @brief 32-bit rotate left.
587 * @param x The 32-bit integer to be rotated.
605 #define XXH_rotl32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
606 #define XXH_rotl64(x, r) (((x) << (r)) | ((x) >> (64 - (r))))
612 * @brief Calculates a 64->128-bit long multiply.
617 * @param lhs , rhs The 64-bit integers to be multiplied
618 * @return The 128-bit result represented in an @ref XXH128_hash_t.
624 * On most 64-bit targets, GCC and Clang define a __uint128_t type. in XXH_mult64to128()
625 * This is usually the best way as it usually uses a native long 64-bit in XXH_mult64to128()
630 * Despite being a 32-bit platform, Clang (and emscripten) define this type in XXH_mult64to128()
632 * compiler builtin call which calculates a full 128-bit multiply. in XXH_mult64to128()
634 * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677 in XXH_mult64to128()
683 * Portable scalar method. Optimized for 32-bit and 64-bit ALUs. in XXH_mult64to128()
690 * ---------- in XXH_mult64to128()
695 * --------- in XXH_mult64to128()
698 * --------- in XXH_mult64to128()
708 * in 32-bit ARMv6 and later, which is shown below: in XXH_mult64to128()
719 * comparable to some 64-bit ALUs. in XXH_mult64to128()
722 * of 32-bit ADD/ADCs. in XXH_mult64to128()
750 XXH3_len_1to3_128b(const uint8_t *input, size_t len, const uint8_t *secret, in XXH3_len_1to3_128b() argument
754 * len = 1: combinedl = { input[0], 0x01, input[0], input[0] } in XXH3_len_1to3_128b()
755 * len = 2: combinedl = { input[1], 0x02, input[0], input[1] } in XXH3_len_1to3_128b()
756 * len = 3: combinedl = { input[2], 0x03, input[0], input[1] } in XXH3_len_1to3_128b()
758 uint8_t const c1 = input[0]; in XXH3_len_1to3_128b()
759 uint8_t const c2 = input[len >> 1]; in XXH3_len_1to3_128b()
760 uint8_t const c3 = input[len - 1]; in XXH3_len_1to3_128b()
761 uint32_t const combinedl = ((uint32_t)c1 << 16) | ((uint32_t)c2 << 24) | in XXH3_len_1to3_128b()
767 (endian::read32le(secret + 8) ^ endian::read32le(secret + 12)) - seed; in XXH3_len_1to3_128b()
777 XXH3_len_4to8_128b(const uint8_t *input, size_t len, const uint8_t *secret, in XXH3_len_4to8_128b() argument
780 uint32_t const input_lo = endian::read32le(input); in XXH3_len_4to8_128b()
781 uint32_t const input_hi = endian::read32le(input + len - 4); in XXH3_len_4to8_128b()
784 (endian::read64le(secret + 16) ^ endian::read64le(secret + 24)) + seed; in XXH3_len_4to8_128b()
802 XXH3_len_9to16_128b(const uint8_t *input, size_t len, const uint8_t *secret, in XXH3_len_9to16_128b() argument
805 (endian::read64le(secret + 32) ^ endian::read64le(secret + 40)) - seed; in XXH3_len_9to16_128b()
808 uint64_t const input_lo = endian::read64le(input); in XXH3_len_9to16_128b()
809 uint64_t input_hi = endian::read64le(input + len - 8); in XXH3_len_9to16_128b()
816 m128.low64 += (uint64_t)(len - 1) << 54; in XXH3_len_9to16_128b()
823 * The best approach to this operation is different on 32-bit and 64-bit. in XXH3_len_9to16_128b()
825 if (sizeof(void *) < sizeof(uint64_t)) { /* 32-bit */ in XXH3_len_9to16_128b()
827 * 32-bit optimized version, which is more readable. in XXH3_len_9to16_128b()
829 * On 32-bit, it removes an ADC and delays a dependency between the two in XXH3_len_9to16_128b()
830 * halves of m128.high64, but it generates an extra mask on 64-bit. in XXH3_len_9to16_128b()
836 * 64-bit optimized (albeit more confusing) version. in XXH3_len_9to16_128b()
846 * Inverse Property: x + y - x == y in XXH3_len_9to16_128b()
847 * a + (b * (1 + c - 1)) in XXH3_len_9to16_128b()
849 * a + (b * 1) + (b * (c - 1)) in XXH3_len_9to16_128b()
851 * a + b + (b * (c - 1)) in XXH3_len_9to16_128b()
855 * - 1)) in XXH3_len_9to16_128b()
858 * input_hi + ((uint64_t)input_hi.lo * (PRIME32_2 - 1)) in XXH3_len_9to16_128b()
860 m128.high64 += input_hi + XXH_mult32to64((uint32_t)input_hi, PRIME32_2 - 1); in XXH3_len_9to16_128b()
878 XXH3_len_0to16_128b(const uint8_t *input, size_t len, const uint8_t *secret, in XXH3_len_0to16_128b() argument
881 return XXH3_len_9to16_128b(input, len, secret, seed); in XXH3_len_0to16_128b()
883 return XXH3_len_4to8_128b(input, len, secret, seed); in XXH3_len_0to16_128b()
885 return XXH3_len_1to3_128b(input, len, secret, seed); in XXH3_len_0to16_128b()
904 acc.high64 += XXH3_mix16B(input_2, secret + 16, seed); in XXH128_mix32B()
910 XXH3_len_17to128_128b(const uint8_t *input, size_t len, const uint8_t *secret, in XXH3_len_17to128_128b() argument
922 XXH128_mix32B(acc, input + 48, input + len - 64, secret + 96, seed); in XXH3_len_17to128_128b()
924 acc = XXH128_mix32B(acc, input + 32, input + len - 48, secret + 64, seed); in XXH3_len_17to128_128b()
926 acc = XXH128_mix32B(acc, input + 16, input + len - 32, secret + 32, seed); in XXH3_len_17to128_128b()
928 acc = XXH128_mix32B(acc, input, input + len - 16, secret, seed); in XXH3_len_17to128_128b()
932 ((len - seed) * PRIME64_2); in XXH3_len_17to128_128b()
934 h128.high64 = (uint64_t)0 - XXH3_avalanche(h128.high64); in XXH3_len_17to128_128b()
939 XXH3_len_129to240_128b(const uint8_t *input, size_t len, const uint8_t *secret, in XXH3_len_129to240_128b() argument
954 acc = XXH128_mix32B(acc, input + i - 32, input + i - 16, secret + i - 32, in XXH3_len_129to240_128b()
960 * NB: `i <= len` will duplicate the last 32-bytes if in XXH3_len_129to240_128b()
965 acc = XXH128_mix32B(acc, input + i - 32, input + i - 16, in XXH3_len_129to240_128b()
966 secret + XXH3_MIDSIZE_STARTOFFSET + i - 160, seed); in XXH3_len_129to240_128b()
970 XXH128_mix32B(acc, input + len - 16, input + len - 32, in XXH3_len_129to240_128b()
971 secret + XXH3_SECRETSIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16, in XXH3_len_129to240_128b()
972 (uint64_t)0 - seed); in XXH3_len_129to240_128b()
977 ((len - seed) * PRIME64_2); in XXH3_len_129to240_128b()
979 h128.high64 = (uint64_t)0 - XXH3_avalanche(h128.high64); in XXH3_len_129to240_128b()
984 XXH3_hashLong_128b(const uint8_t *input, size_t len, const uint8_t *secret, in XXH3_hashLong_128b() argument
987 (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE; in XXH3_hashLong_128b()
989 const size_t nb_blocks = (len - 1) / block_len; in XXH3_hashLong_128b()
990 alignas(16) uint64_t acc[XXH_ACC_NB] = { in XXH3_hashLong_128b()
996 XXH3_accumulate(acc, input + n * block_len, secret, nbStripesPerBlock); in XXH3_hashLong_128b()
997 XXH3_scrambleAcc(acc, secret + secretSize - XXH_STRIPE_LEN); in XXH3_hashLong_128b()
1001 const size_t nbStripes = (len - 1 - (block_len * nb_blocks)) / XXH_STRIPE_LEN; in XXH3_hashLong_128b()
1003 XXH3_accumulate(acc, input + nb_blocks * block_len, secret, nbStripes); in XXH3_hashLong_128b()
1007 XXH3_accumulate_512(acc, input + len - XXH_STRIPE_LEN, in XXH3_hashLong_128b()
1008 secret + secretSize - XXH_STRIPE_LEN - in XXH3_hashLong_128b()
1018 acc, secret + secretSize - sizeof(acc) - XXH_SECRET_MERGEACCS_START, in XXH3_hashLong_128b()
1025 const uint8_t *input = data.data(); in xxh3_128bits() local
1030 * For now, it's a contract pre-condition. in xxh3_128bits()
1033 if (len <= 16) in xxh3_128bits()
1034 return XXH3_len_0to16_128b(input, len, kSecret, /*seed64=*/0); in xxh3_128bits()
1036 return XXH3_len_17to128_128b(input, len, kSecret, sizeof(kSecret), in xxh3_128bits()
1039 return XXH3_len_129to240_128b(input, len, kSecret, sizeof(kSecret), in xxh3_128bits()
1041 return XXH3_hashLong_128b(input, len, kSecret, sizeof(kSecret)); in xxh3_128bits()