xxhash.h - OpenGrok cross reference for /freebsd/sys/contrib/zstd/lib/common/xxhash.h

Lines Matching +full:16 +full:- +full:input
2  *  xxHash - Fast Hash algorithm
6  *  - xxHash homepage: http://www.xxhash.com
7  *  - xxHash source repository : https://github.com/Cyan4973/xxHash
9  * This source code is licensed under both the BSD-style license (found in the
12  * You may select, at your option, one of the above-listed licenses.
49 MD5-32          0.33 GB/s    10       Ronald L. Rivest
50 SHA1-32         0.28 GB/s    10
57 Other speed-oriented implementations can be faster,
59 https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c349009…
61 A 64-bit version, named XXH64, is available since r35.
62 It offers much better speed, but for 64-bit applications only.
79  * expressed as a compile-time constant:
81  *      https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
335 /*-**********************************************************************
336 *  32-bit hash
340  * @brief An unsigned 32-bit integer.
360 #       error "unsupported platform: need a 32-bit type"
370  * Contains functions used in the classic 32-bit xxHash algorithm.
373  *   XXH32 is useful for older platforms, with no or poor 64-bit performance.
375  *   for both 32-bit and 64-bit systems, and offers true 64/128 bit hash results.
383  * @brief Calculates the 32-bit hash of @p input using xxHash32.
387  * @param input The block of data to be hashed, at least @p length bytes in size.
388  * @param length The length of @p input, in bytes.
389  * @param seed The 32-bit seed to alter the hash's output predictably.
392  *   The memory between @p input and @p input + @p length must be valid,
393  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
396  * @return The calculated 32-bit hash value.
404 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
407  * Streaming functions generate the xxHash value from an incremental input.
408  * This method is slower than single-call functions, due to state management.
421  * This function returns the nn-bits hash as an int or long long.
423  * It's still possible to continue inserting input into the hash state after a
495  * @param seed The 32-bit seed to alter the hash result predictably.
505  * @brief Consumes a block of @p input to an @ref XXH32_state_t.
510  * @param input The block of data to be hashed, at least @p length bytes in size.
511  * @param length The length of @p input, in bytes.
516  *   The memory between @p input and @p input + @p length must be valid,
517  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
522 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t lengt…
545  * This the simplest and fastest format for further post-processing.
550  * The canonical representation settles this issue by mandating big-endian
551  * convention, the same convention as human-readable numbers (large digits first).
598 /* C-language Attributes are added in C23. */
634 /*-**********************************************************************
635 *  64-bit hash
639  * @brief An unsigned 64-bit integer.
655      /* the following type must have a width of 64-bit */
666  * Contains functions used in the classic 64-bit xxHash algorithm.
669  *   XXH3 provides competitive speed for both 32-bit and 64-bit systems,
676  * @brief Calculates the 64-bit hash of @p input using xxHash64.
678  * This function usually runs faster on 64-bit systems, but slower on 32-bit
681  * @param input The block of data to be hashed, at least @p length bytes in size.
682  * @param length The length of @p input, in bytes.
683  * @param seed The 64-bit seed to alter the hash's output predictably.
686  *   The memory between @p input and @p input + @p length must be valid,
687  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
690  * @return The calculated 64-bit hash.
698 /* Begin FreeBSD - This symbol is needed by dll-linked CLI zstd(1). */
701 XXH_PUBLIC_API XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);
715 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t lengt…
732  *  - Improved speed for both small and large inputs
733  *  - True 64-bit and 128-bit outputs
734  *  - SIMD acceleration
735  *  - Improved 32-bit viability
739  *    https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
745  * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
747  * Any 32-bit and 64-bit targets that can run XXH32 smoothly
766  * The API supports one-shot hashing, streaming mode, and custom secrets.
769 /*-**********************************************************************
770 *  XXH3 64-bit variant
774  * default 64-bit variant, using default secret and default seed of 0.
820  * As a consequence, streaming is slower than one-shot hashing.
821  * For better performance, prefer one-shot functions whenever applicable.
849  * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
857 XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t …
864 /*-**********************************************************************
865 *  XXH3 128-bit variant
869  * @brief The return value from 128-bit hashes.
887  * As a consequence, streaming is slower than one-shot hashing.
888  * For better performance, prefer one-shot functions whenever applicable.
893  * All reset and streaming functions have same meaning as their 64-bit counterpart.
900 XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t…
971    XXH32_hash_t large_len;    /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
973 …XXH32_hash_t mem32[4];     /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
979 #ifndef XXH_NO_LONG_LONG  /* defined when there is no 64-bit support */
994    XXH64_hash_t total_len;    /*!< Total length hashed. This is always 64-bit. */
1078        /*!< Reserved field. Needed for padding on 64-bit. */
1082        /*!< Total length hashed. 64-bit even on 32-bit targets. */
1100  * @brief Initializes a stack-allocated `XXH3_state_s`.
1110 #define XXH3_INITSTATE(XXH3_state_ptr)   { (XXH3_state_ptr)->seed = 0; }
1114  * simple alias to pre-selected XXH3_128bits variant
1125  * Derive a high-entropy secret from any user-defined content, named customSeed.
1127  * The `_withSecret()` variants are useful to provide a higher level of protection than 64-bit seed,
1130  * The function accepts as input a custom seed of any length and any content,
1131  * and derives from it a high-entropy secret of length @secretSize
1140  * _and_ feature very high entropy (consist of random-looking bytes).
1183  * hence offering only a pure speed benefit on "large" input,
1184  * by skipping the need to regenerate the secret for every large input.
1186  * Another usage scenario is to hash the secret to a 64-bit hash value,
1230 /*-**********************************************************************
1232  *-**********************************************************************
1268  * @brief Define this to disable 64-bit code.
1281  * is sub-optimal.
1288  *  - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy`
1293  *  - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((packed))`
1299  *  - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast
1307  *  - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift
1310  *     inline small `memcpy()` calls, and it might also be faster on big-endian
1316  *   Methods 1 and 2 rely on implementation-defined behavior. Use these with
1320  * See http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
1328  * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
1334  * It checks for input alignment, and when conditions are met, uses a "fast
1335  * path" employing direct 32-bit/64-bit reads, resulting in _dramatically
1356  * @brief When non-zero, sets all functions to `static`.
1371  * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using
1372  * -fno-inline with GCC or Clang, this will automatically be defined.
1432 #  if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \
1433    || defined(__NO_INLINE__)     /* -O0, -fno-inline */
1529 #    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } whi…
1580  * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness.
1585  * @return The 32-bit native endian integer from the bytes at @p ptr.
1591  * @brief Reads an unaligned 32-bit little endian integer from @p ptr.
1596  * @return The 32-bit little endian integer from the bytes at @p ptr.
1602  * @brief Reads an unaligned 32-bit big endian integer from @p ptr.
1607  * @return The 32-bit big endian integer from the bytes at @p ptr.
1624  * @return The 32-bit little endian integer from the bytes at @p ptr.
1654     return ((const xxh_unalign*)ptr)->u32;  in XXH_read32()
1661  * see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
1713      * Portable and well-defined behavior.  in XXH_isLittleEndian()
1727 *  Compiler-specific Functions and Macros
1740  * @brief 32-bit rotate left.
1742  * @param x The 32-bit integer to be rotated.
1759 #  define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
1760 #  define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
1766  * @brief A 32-bit byteswap.
1768  * @param x The 32-bit integer to byteswap.
1800  * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
1811          | ((xxh_u32)bytePtr[2] << 16)  in XXH_readLE32()
1820          | ((xxh_u32)bytePtr[1] << 16)  in XXH_readBE32()
1855 *  32-bit hash functions
1882  * This shuffles the bits so that any bit from @p input impacts several bits in
1886  * @param input The stripe of input to mix.
1889 static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)  in XXH32_round()  argument
1891     acc += input * XXH_PRIME32_2;  in XXH32_round()
1904      * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on  in XXH32_round()
1910      * - Four instructions are required to rotate,  in XXH32_round()
1919      * - Instruction level parallelism is actually more beneficial here because  in XXH32_round()
1937  * The final mix ensures that all input bits have a chance to impact any bit in
1949     h32 ^= h32 >> 16;  in XXH32_avalanche()
1957  * @brief Processes the last 0-15 bytes of @p ptr.
1959  * There may be up to 15 bytes remaining to consume from the input.
1960  * This final stage will digest them to ensure that all input bytes are present
1964  * @param ptr The pointer to the remaining input.
1965  * @param len The remaining length, modulo 16.
1990             len -= 4;  in XXH32_finalize()
1994             --len;  in XXH32_finalize()
1998          switch(len&15) /* or switch(bEnd - p) */ {  in XXH32_finalize()
2054  * @param input , len , seed Directly passed from @ref XXH32().
2055  * @param align Whether @p input is aligned.
2059 XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)  in XXH32_endian_align()  argument
2063     if (input==NULL) XXH_ASSERT(len == 0);  in XXH32_endian_align()
2065     if (len>=16) {  in XXH32_endian_align()
2066         const xxh_u8* const bEnd = input + len;  in XXH32_endian_align()
2067         const xxh_u8* const limit = bEnd - 15;  in XXH32_endian_align()
2071         xxh_u32 v4 = seed - XXH_PRIME32_1;  in XXH32_endian_align()
2074             v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4;  in XXH32_endian_align()
2075             v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4;  in XXH32_endian_align()
2076             v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4;  in XXH32_endian_align()
2077             v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4;  in XXH32_endian_align()
2078         } while (input < limit);  in XXH32_endian_align()
2088     return XXH32_finalize(h32, input, len&15, align);  in XXH32_endian_align()
2092 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)  in XXH32()  argument
2098     XXH32_update(&state, (const xxh_u8*)input, len);  in XXH32()
2102 …     if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */  in XXH32()
2103             return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);  in XXH32()
2106     return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);  in XXH32()
2138     statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;  in XXH32_reset()
2139     statePtr->v[1] = seed + XXH_PRIME32_2;  in XXH32_reset()
2140     statePtr->v[2] = seed + 0;  in XXH32_reset()
2141     statePtr->v[3] = seed - XXH_PRIME32_1;  in XXH32_reset()
2148 XXH32_update(XXH32_state_t* state, const void* input, size_t len)  in XXH32_update()  argument
2150     if (input==NULL) {  in XXH32_update()
2155     {   const xxh_u8* p = (const xxh_u8*)input;  in XXH32_update()
2158         state->total_len_32 += (XXH32_hash_t)len;  in XXH32_update()
2159         state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));  in XXH32_update()
2161         if (state->memsize + len < 16)  {   /* fill in tmp buffer */  in XXH32_update()
2162             XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len);  in XXH32_update()
2163             state->memsize += (XXH32_hash_t)len;  in XXH32_update()
2167         if (state->memsize) {   /* some data left from previous update */  in XXH32_update()
2168             XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);  in XXH32_update()
2169             {   const xxh_u32* p32 = state->mem32;  in XXH32_update()
2170                 state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;  in XXH32_update()
2171                 state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;  in XXH32_update()
2172                 state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;  in XXH32_update()
2173                 state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));  in XXH32_update()
2175             p += 16-state->memsize;  in XXH32_update()
2176             state->memsize = 0;  in XXH32_update()
2179         if (p <= bEnd-16) {  in XXH32_update()
2180             const xxh_u8* const limit = bEnd - 16;  in XXH32_update()
2183                 state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;  in XXH32_update()
2184                 state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;  in XXH32_update()
2185                 state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;  in XXH32_update()
2186                 state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;  in XXH32_update()
2192             XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));  in XXH32_update()
2193             state->memsize = (unsigned)(bEnd-p);  in XXH32_update()
2206     if (state->large_len) {  in XXH32_digest()
2207         h32 = XXH_rotl32(state->v[0], 1)  in XXH32_digest()
2208             + XXH_rotl32(state->v[1], 7)  in XXH32_digest()
2209             + XXH_rotl32(state->v[2], 12)  in XXH32_digest()
2210             + XXH_rotl32(state->v[3], 18);  in XXH32_digest()
2212         h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;  in XXH32_digest()
2215     h32 += state->total_len_32;  in XXH32_digest()
2217     return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);  in XXH32_digest()
2229  * as human-readable numbers (large digits first).
2253 *  64-bit hash functions
2295     return ((const xxh_unalign64*)ptr)->u64;  in XXH_read64()
2302  * see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
2332 /* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */
2340          | ((xxh_u64)bytePtr[2] << 16)  in XXH_readLE64()
2353          | ((xxh_u64)bytePtr[5] << 16)  in XXH_readBE64()
2405 static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)  in XXH64_round()  argument
2407     acc += input * XXH_PRIME64_2;  in XXH64_round()
2444         len -= 8;  in XXH64_finalize()
2450         len -= 4;  in XXH64_finalize()
2455         --len;  in XXH64_finalize()
2471 XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)  in XXH64_endian_align()  argument
2474     if (input==NULL) XXH_ASSERT(len == 0);  in XXH64_endian_align()
2477         const xxh_u8* const bEnd = input + len;  in XXH64_endian_align()
2478         const xxh_u8* const limit = bEnd - 31;  in XXH64_endian_align()
2482         xxh_u64 v4 = seed - XXH_PRIME64_1;  in XXH64_endian_align()
2485             v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8;  in XXH64_endian_align()
2486             v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;  in XXH64_endian_align()
2487             v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;  in XXH64_endian_align()
2488             v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;  in XXH64_endian_align()
2489         } while (input<limit);  in XXH64_endian_align()
2503     return XXH64_finalize(h64, input, len, align);  in XXH64_endian_align()
2508 XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)  in XXH64()  argument
2514     XXH64_update(&state, (const xxh_u8*)input, len);  in XXH64()
2518         if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */  in XXH64()
2519             return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);  in XXH64()
2522     return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);  in XXH64()
2552     statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;  in XXH64_reset()
2553     statePtr->v[1] = seed + XXH_PRIME64_2;  in XXH64_reset()
2554     statePtr->v[2] = seed + 0;  in XXH64_reset()
2555     statePtr->v[3] = seed - XXH_PRIME64_1;  in XXH64_reset()
2561 XXH64_update (XXH64_state_t* state, const void* input, size_t len)  in XXH64_update()  argument
2563     if (input==NULL) {  in XXH64_update()
2568     {   const xxh_u8* p = (const xxh_u8*)input;  in XXH64_update()
2571         state->total_len += len;  in XXH64_update()
2573         if (state->memsize + len < 32) {  /* fill in tmp buffer */  in XXH64_update()
2574             XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len);  in XXH64_update()
2575             state->memsize += (xxh_u32)len;  in XXH64_update()
2579         if (state->memsize) {   /* tmp buffer is full */  in XXH64_update()
2580             XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);  in XXH64_update()
2581             state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));  in XXH64_update()
2582             state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));  in XXH64_update()
2583             state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));  in XXH64_update()
2584             state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));  in XXH64_update()
2585             p += 32 - state->memsize;  in XXH64_update()
2586             state->memsize = 0;  in XXH64_update()
2590             const xxh_u8* const limit = bEnd - 32;  in XXH64_update()
2593                 state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;  in XXH64_update()
2594                 state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;  in XXH64_update()
2595                 state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;  in XXH64_update()
2596                 state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;  in XXH64_update()
2602             XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));  in XXH64_update()
2603             state->memsize = (unsigned)(bEnd-p);  in XXH64_update()
2616     if (state->total_len >= 32) {  in XXH64_digest()
2617 …h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_…  in XXH64_digest()
2618         h64 = XXH64_mergeRound(h64, state->v[0]);  in XXH64_digest()
2619         h64 = XXH64_mergeRound(h64, state->v[1]);  in XXH64_digest()
2620         h64 = XXH64_mergeRound(h64, state->v[2]);  in XXH64_digest()
2621         h64 = XXH64_mergeRound(h64, state->v[3]);  in XXH64_digest()
2623         h64  = state->v[2] /*seed*/ + XXH_PRIME64_5;  in XXH64_digest()
2626     h64 += (xxh_u64) state->total_len;  in XXH64_digest()
2628     return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);  in XXH64_digest()
2701  * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
2702  * remaining a true 64-bit/128-bit hash function.
2704  * This is done by prioritizing a subset of 64-bit operations that can be
2705  * emulated without too many steps on the average 32-bit machine.
2707  * For example, these two lines seem similar, and run equally fast on 64-bit:
2713  * However, to a 32-bit machine, there is a major difference.
2717  *   x.lo ^= (x.hi >> (47 - 32));
2722  *   x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
2727  *  - All the bits we need are in the upper 32 bits, so we can ignore the lower
2729  *  - The shift result will always fit in the lower 32 bits, and therefore,
2734  *  - Usable unaligned access
2735  *  - A 32-bit or 64-bit ALU
2736  *      - If 32-bit, a decent ADC instruction
2737  *  - A 32 or 64-bit multiply with a 64-bit result
2738  *  - For the 128-bit variant, a decent byteswap helps short inputs.
2740  * The first two are already required by XXH32, and almost all 32-bit and 64-bit
2743  * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
2746  * First of all, Thumb-1 lacks support for the UMULL instruction which
2755  * do a 32->64 multiply with UMULL, and the flexible operand allowing free
2760  * If compiling Thumb-1 for a target which supports ARM instructions, we will
2764  * to specify -march, as you likely meant to compile for a newer architecture.
2770 #   warning "XXH3 is highly inefficient without ARM or Thumb-2."
2808     XXH_NEON   = 4,  /*!< NEON for most ARMv7-A and all AArch64 */
2809     XXH_VSX    = 5,  /*!< VSX and ZVector for POWER8/z13 (64-bit) */
2867 #     define XXH_ACC_ALIGN 16
2871 #     define XXH_ACC_ALIGN 16
2873 #     define XXH_ACC_ALIGN 16
2888  * GCC usually generates the best code with -O3 for xxHash.
2898  *   -O2 -mavx2 -march=haswell
2900  *   -O2 -mavx2 -mno-avx256-split-unaligned-load
2904  * -O2, but the other one we can't control without "failed to inline always
2909   && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
2911 #  pragma GCC optimize("-O2")
2922  * To do the same operation, the 128-bit 'Q' register needs to be split into
2923  * two 64-bit 'D' registers, performing this operation::
2926  *            |              '---------. .--------'                |
2928  *            |              .---------' '--------.                |
2932  * completely different than the fastest method for ARMv7-A.
2934  * ARMv7-A treats D registers as unions overlaying Q registers, so modifying
2936  * will only affect bits 8-15 of AX on x86.
2941  * On ARMv7-A, this strangely modifies both parameters in place instead of
2942  * taking the usual 3-operand form.
2944  * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on the
2946  * halves where we want - all in one instruction.
2961  * aarch64 cannot access the high bits of a Q-form register, and writes to a
2962  * D-form register zero the high bits, similar to how writes to W-form scalar
2984  * This is available on ARMv7-A, but is less efficient than a single VZIP.32.
2988  * Function-like macro:
3002       /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */     \
3003 …    /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */ \
3024  * emulated 64-bit arithmetic is too slow.
3028  * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but it can't
3029  * have more than 2 NEON (F0/F1) micro-ops. If you are only using NEON instructions,
3033  * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
3040  * This change benefits CPUs with large micro-op buffers without negatively affecting
3044  *  |:----------------------|:--------------------|----------:|-----------:|------:|
3045  *  | Snapdragon 730 (A76)  | 2 NEON/8 micro-ops  |  8.8 GB/s |  10.1 GB/s |  ~16% |
3046  *  | Snapdragon 835 (A73)  | 2 NEON/3 micro-ops  |  5.1 GB/s |   5.3 GB/s |   ~5% |
3047  *  | Marvell PXA1928 (A53) | In-order dual-issue |  1.9 GB/s |   1.9 GB/s |    0% |
3098 #    warning "-maltivec=be is not recommended. Please use native endianness."
3173 #    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
3216  * @brief Calculates a 32-bit to 64-bit long multiply.
3225  * If you are compiling for platforms like Thumb-1 and don't have a better option,
3229  * @return 64-bit product of the low 32 bits of @p x and @p y.
3241  * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
3244  * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
3250  * @brief Calculates a 64->128-bit long multiply.
3255  * @param lhs , rhs The 64-bit integers to be multiplied
3256  * @return The 128-bit result represented in an @ref XXH128_hash_t.
3264      * On most 64-bit targets, GCC and Clang define a __uint128_t type.  in XXH_mult64to128()
3265      * This is usually the best way as it usually uses a native long 64-bit  in XXH_mult64to128()
3270      * Despite being a 32-bit platform, Clang (and emscripten) define this type  in XXH_mult64to128()
3272      * compiler builtin call which calculates a full 128-bit multiply.  in XXH_mult64to128()
3274      * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677  in XXH_mult64to128()
3322      * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.  in XXH_mult64to128()
3329      *     ----------  in XXH_mult64to128()
3334      *     ---------  in XXH_mult64to128()
3337      *     ---------  in XXH_mult64to128()
3347      *     in 32-bit ARMv6 and later, which is shown below:  in XXH_mult64to128()
3358      *     comparable to some 64-bit ALUs.  in XXH_mult64to128()
3361      *     of 32-bit ADD/ADCs.  in XXH_mult64to128()
3383  * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it.
3388  * @param lhs , rhs The 64-bit integers to multiply
3408  * suitable when input bits are already partially mixed
3421  * preferable when input has not been previously mixed
3438  * sub-optimal on short lengths. It used an iterative algorithm which strongly
3445  * reduces latency, especially when emulating 64-bit multiplies on 32-bit.
3452  * At very short lengths, there isn't enough input to fully hide secrets, or use
3468 XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)  in XXH3_len_1to3_64b()  argument
3470     XXH_ASSERT(input != NULL);  in XXH3_len_1to3_64b()
3474      * len = 1: combined = { input[0], 0x01, input[0], input[0] }  in XXH3_len_1to3_64b()
3475      * len = 2: combined = { input[1], 0x02, input[0], input[1] }  in XXH3_len_1to3_64b()
3476      * len = 3: combined = { input[2], 0x03, input[0], input[1] }  in XXH3_len_1to3_64b()
3478     {   xxh_u8  const c1 = input[0];  in XXH3_len_1to3_64b()
3479         xxh_u8  const c2 = input[len >> 1];  in XXH3_len_1to3_64b()
3480         xxh_u8  const c3 = input[len - 1];  in XXH3_len_1to3_64b()
3481         xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2  << 24)  in XXH3_len_1to3_64b()
3490 XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)  in XXH3_len_4to8_64b()  argument
3492     XXH_ASSERT(input != NULL);  in XXH3_len_4to8_64b()
3496     {   xxh_u32 const input1 = XXH_readLE32(input);  in XXH3_len_4to8_64b()
3497         xxh_u32 const input2 = XXH_readLE32(input + len - 4);  in XXH3_len_4to8_64b()
3498         xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;  in XXH3_len_4to8_64b()
3506 XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)  in XXH3_len_9to16_64b()  argument
3508     XXH_ASSERT(input != NULL);  in XXH3_len_9to16_64b()
3510     XXH_ASSERT(9 <= len && len <= 16);  in XXH3_len_9to16_64b()
3512         xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;  in XXH3_len_9to16_64b()
3513         xxh_u64 const input_lo = XXH_readLE64(input)           ^ bitflip1;  in XXH3_len_9to16_64b()
3514         xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;  in XXH3_len_9to16_64b()
3523 XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)  in XXH3_len_0to16_64b()  argument
3525     XXH_ASSERT(len <= 16);  in XXH3_len_0to16_64b()
3526     {   if (XXH_likely(len >  8)) return XXH3_len_9to16_64b(input, len, secret, seed);  in XXH3_len_0to16_64b()
3527         if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);  in XXH3_len_0to16_64b()
3528         if (len) return XXH3_len_1to3_64b(input, len, secret, seed);  in XXH3_len_0to16_64b()
3534  * DISCLAIMER: There are known *seed-dependent* multicollisions here due to
3540  * unseeded non-cryptographic hashes, it does not attempt to defend itself
3549  * function that is only called up to 16 times per hash with up to 240 bytes of
3550  * input.
3552  * This is not too bad for a non-cryptographic hash function, especially with
3555  * The 128-bit variant (which trades some speed for strength) is NOT affected
3559 XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,  in XXH3_mix16B()  argument
3567      * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in  in XXH3_mix16B()
3573      * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,  in XXH3_mix16B()
3582     {   xxh_u64 const input_lo = XXH_readLE64(input);  in XXH3_mix16B()
3583         xxh_u64 const input_hi = XXH_readLE64(input+8);  in XXH3_mix16B()
3586             input_hi ^ (XXH_readLE64(secret+8) - seed64)  in XXH3_mix16B()
3591 /* For mid range keys, XXH3 uses a Mum-hash variant. */
3593 XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,  in XXH3_len_17to128_64b()  argument
3598     XXH_ASSERT(16 < len && len <= 128);  in XXH3_len_17to128_64b()
3604                     acc += XXH3_mix16B(input+48, secret+96, seed);  in XXH3_len_17to128_64b()
3605                     acc += XXH3_mix16B(input+len-64, secret+112, seed);  in XXH3_len_17to128_64b()
3607                 acc += XXH3_mix16B(input+32, secret+64, seed);  in XXH3_len_17to128_64b()
3608                 acc += XXH3_mix16B(input+len-48, secret+80, seed);  in XXH3_len_17to128_64b()
3610             acc += XXH3_mix16B(input+16, secret+32, seed);  in XXH3_len_17to128_64b()
3611             acc += XXH3_mix16B(input+len-32, secret+48, seed);  in XXH3_len_17to128_64b()
3613         acc += XXH3_mix16B(input+0, secret+0, seed);  in XXH3_len_17to128_64b()
3614         acc += XXH3_mix16B(input+len-16, secret+16, seed);  in XXH3_len_17to128_64b()
3623 XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,  in XXH3_len_129to240_64b()  argument
3634         int const nbRounds = (int)len / 16;  in XXH3_len_129to240_64b()
3637             acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);  in XXH3_len_129to240_64b()
3646          * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.  in XXH3_len_129to240_64b()
3649          * For 64->128-bit multiplies, even if the NEON was 100% optimal, it  in XXH3_len_129to240_64b()
3667             acc += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);  in XXH3_len_129to240_64b()
3670 …acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed…  in XXH3_len_129to240_64b()
3703     /* the following type must have a width of 64-bit */
3713  * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
3716  * We harden it by mixing the original input to the accumulators as well as the product.
3719  * original input is preserved.
3721  * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
3722  * cross-pollination, as otherwise the upper and lower halves would be
3725  * This doesn't matter on 64-bit hashes since they all get merged together in
3740                      const void* XXH_RESTRICT input,  in XXH3_accumulate_512_avx512()  argument
3748         /* data_vec    = input[0]; */  in XXH3_accumulate_512_avx512()
3749         __m512i const data_vec    = _mm512_loadu_si512   (input);  in XXH3_accumulate_512_avx512()
3771  *  // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to
3819 …st seed = _mm512_mask_set1_epi64(_mm512_set1_epi64((xxh_i64)seed64), 0xAA, (xxh_i64)(0U - seed64));  in XXH3_initCustomSecret_avx512()
3849                     const void* XXH_RESTRICT input,  in XXH3_accumulate_512_avx2()  argument
3856         const         __m256i* const xinput  = (const __m256i *) input;  in XXH3_accumulate_512_avx2()
3917 …   __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - …  in XXH3_initCustomSecret_avx2()
3925          *   - do not extract the secret from sse registers in the internal loop  in XXH3_initCustomSecret_avx2()
3926          *   - use less common registers, and avoid pushing these reg into stack  in XXH3_initCustomSecret_avx2()
3933         /* GCC -O2 need unroll loop manually */  in XXH3_initCustomSecret_avx2()
3954                     const void* XXH_RESTRICT input,  in XXH3_accumulate_512_sse2()  argument
3957     /* SSE2 is just a half-scale version of the AVX2 version. */  in XXH3_accumulate_512_sse2()
3962         const         __m128i* const xinput  = (const __m128i *) input;  in XXH3_accumulate_512_sse2()
4024         XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };  in XXH3_initCustomSecret_sse2()
4027         __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);  in XXH3_initCustomSecret_sse2()
4036          *   - do not extract the secret from sse registers in the internal loop  in XXH3_initCustomSecret_sse2()
4037          *   - use less common registers, and avoid pushing these reg into stack  in XXH3_initCustomSecret_sse2()
4055 XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,
4074                     const void* XXH_RESTRICT input,  in XXH3_accumulate_512_neon()  argument
4082         uint8_t const* const xinput = (const uint8_t *) input;  in XXH3_accumulate_512_neon()
4089             uint8x16_t data_vec    = vld1q_u8(xinput  + (i * 16));  in XXH3_accumulate_512_neon()
4091             uint8x16_t key_vec     = vld1q_u8(xsecret + (i * 16));  in XXH3_accumulate_512_neon()
4110             XXH3_scalarRound(acc, input, secret, i);  in XXH3_accumulate_512_neon()
4133             uint8x16_t key_vec  = vld1q_u8    (xsecret + (i * 16));  in XXH3_scrambleAcc_neon()
4153                  * However, unlike SSE, Clang lacks a 64-bit multiply routine  in XXH3_scrambleAcc_neon()
4154                  * for NEON, and it scalarizes two 64-bit multiplies instead.  in XXH3_scrambleAcc_neon()
4180                     const void* XXH_RESTRICT input,  in XXH3_accumulate_512_vsx()  argument
4185     xxh_u64x2 const* const xinput   = (xxh_u64x2 const*) input;   /* no alignment restriction */  in XXH3_accumulate_512_vsx()
4246 /* scalar variants - universal */
4257                  void const* XXH_RESTRICT input,  in XXH3_scalarRound()  argument
4262     xxh_u8 const* xinput  = (xxh_u8 const*) input;  in XXH3_scalarRound()
4265     XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);  in XXH3_scalarRound()
4280                      const void* XXH_RESTRICT input,  in XXH3_accumulate_512_scalar()  argument
4285         XXH3_scalarRound(acc, input, secret, i);  in XXH3_accumulate_512_scalar()
4303     XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);  in XXH3_scalarScrambleRound()
4333      * which requires a non-const pointer.  in XXH3_initCustomSecret_scalar()
4345      * While MOVK is great for generating constants (2 cycles for a 64-bit  in XXH3_initCustomSecret_scalar()
4380     {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;  in XXH3_initCustomSecret_scalar()
4389             xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i)     + seed64;  in XXH3_initCustomSecret_scalar()
4390             xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64;  in XXH3_initCustomSecret_scalar()
4391             XXH_writeLE64((xxh_u8*)customSecret + 16*i,     lo);  in XXH3_initCustomSecret_scalar()
4392             XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi);  in XXH3_initCustomSecret_scalar()
4461                 const xxh_u8* XXH_RESTRICT input,  in XXH3_accumulate()  argument
4468         const xxh_u8* const in = input + n*XXH_STRIPE_LEN;  in XXH3_accumulate()
4478                       const xxh_u8* XXH_RESTRICT input, size_t len,  in XXH3_hashLong_internal_loop()  argument
4483     size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;  in XXH3_hashLong_internal_loop()
4485     size_t const nb_blocks = (len - 1) / block_len;  in XXH3_hashLong_internal_loop()
4492         XXH3_accumulate(acc, input + n*block_len, secret, nbStripesPerBlock, f_acc512);  in XXH3_hashLong_internal_loop()
4493         f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);  in XXH3_hashLong_internal_loop()
4498     {   size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;  in XXH3_hashLong_internal_loop()
4500         XXH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, f_acc512);  in XXH3_hashLong_internal_loop()
4503         {   const xxh_u8* const p = input + len - XXH_STRIPE_LEN;  in XXH3_hashLong_internal_loop()
4505             f_acc512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);  in XXH3_hashLong_internal_loop()
4524         result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);  in XXH3_mergeAccs()
4531          * Prevent autovectorization on Clang ARMv7-a. Exact same problem as  in XXH3_mergeAccs()
4548 XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,  in XXH3_hashLong_64b_internal()  argument
4555 …XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_a…  in XXH3_hashLong_64b_internal()
4571 XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,  in XXH3_hashLong_64b_withSecret()  argument
4575 …return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate_512, XXH3_scrambl…  in XXH3_hashLong_64b_withSecret()
4585 XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,  in XXH3_hashLong_64b_default()  argument
4589 …return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_…  in XXH3_hashLong_64b_default()
4604 XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,  in XXH3_hashLong_64b_withSeed_internal()  argument
4611         return XXH3_hashLong_64b_internal(input, len,  in XXH3_hashLong_64b_withSeed_internal()
4616         return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),  in XXH3_hashLong_64b_withSeed_internal()
4625 XXH3_hashLong_64b_withSeed(const void* input, size_t len,  in XXH3_hashLong_64b_withSeed()  argument
4629     return XXH3_hashLong_64b_withSeed_internal(input, len, seed,  in XXH3_hashLong_64b_withSeed()
4638 XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,  in XXH3_64bits_internal()  argument
4646      * For now, it's a contract pre-condition.  in XXH3_64bits_internal()
4650     if (len <= 16)  in XXH3_64bits_internal()
4651         return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);  in XXH3_64bits_internal()
4653 …  return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);  in XXH3_64bits_internal()
4655 … return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);  in XXH3_64bits_internal()
4656     return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);  in XXH3_64bits_internal()
4663 XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t len)  in XXH3_64bits()  argument
4665 …return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_d…  in XXH3_64bits()
4670 XXH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)  in XXH3_64bits_withSecret()  argument
4672     return XXH3_64bits_internal(input, len, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);  in XXH3_64bits_withSecret()
4677 XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)  in XXH3_64bits_withSeed()  argument
4679 …return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64…  in XXH3_64bits_withSeed()
4683 XXH3_64bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize,…  in XXH3_64bits_withSecretandSeed()  argument
4686         return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);  in XXH3_64bits_withSecretandSeed()
4687     return XXH3_hashLong_64b_withSecret(input, len, seed, (const xxh_u8*)secret, secretSize);  in XXH3_64bits_withSecretandSeed()
4698  * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
4699  * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
4700  * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
4719     XXH_ASSERT((align & (align-1)) == 0);   /* power of 2 */  in XXH_alignedMalloc()
4730             size_t offset = align - ((size_t)base & (align - 1)); /* base % align */  in XXH_alignedMalloc()
4731             /* Add the offset for the now-aligned pointer */  in XXH_alignedMalloc()
4737             ptr[-1] = (xxh_u8)offset;  in XXH_alignedMalloc()
4752         xxh_u8 offset = ptr[-1];  in XXH_alignedFree()
4754         xxh_u8* base = ptr - offset;  in XXH_alignedFree()
4787     size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;  in XXH3_reset_internal()
4792     statePtr->acc[0] = XXH_PRIME32_3;  in XXH3_reset_internal()
4793     statePtr->acc[1] = XXH_PRIME64_1;  in XXH3_reset_internal()
4794     statePtr->acc[2] = XXH_PRIME64_2;  in XXH3_reset_internal()
4795     statePtr->acc[3] = XXH_PRIME64_3;  in XXH3_reset_internal()
4796     statePtr->acc[4] = XXH_PRIME64_4;  in XXH3_reset_internal()
4797     statePtr->acc[5] = XXH_PRIME32_2;  in XXH3_reset_internal()
4798     statePtr->acc[6] = XXH_PRIME64_5;  in XXH3_reset_internal()
4799     statePtr->acc[7] = XXH_PRIME32_1;  in XXH3_reset_internal()
4800     statePtr->seed = seed;  in XXH3_reset_internal()
4801     statePtr->useSeed = (seed != 0);  in XXH3_reset_internal()
4802     statePtr->extSecret = (const unsigned char*)secret;  in XXH3_reset_internal()
4804     statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;  in XXH3_reset_internal()
4805     statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;  in XXH3_reset_internal()
4834     if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))  in XXH3_64bits_reset_withSeed()
4835         XXH3_initCustomSecret(statePtr->customSecret, seed);  in XXH3_64bits_reset_withSeed()
4848     statePtr->useSeed = 1; /* always, even if seed64==0 */  in XXH3_64bits_reset_withSecretandSeed()
4853  * there must be a guarantee that at least one more byte must be consumed from input
4858                     const xxh_u8* XXH_RESTRICT input, size_t nbStripes,  in XXH3_consumeStripes()  argument
4865     if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {  in XXH3_consumeStripes()
4867         size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;  in XXH3_consumeStripes()
4868         size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;  in XXH3_consumeStripes()
4869 …XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEn…  in XXH3_consumeStripes()
4871 …XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, …  in XXH3_consumeStripes()
4874 …XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_…  in XXH3_consumeStripes()
4889             const xxh_u8* XXH_RESTRICT input, size_t len,  in XXH3_update()  argument
4893     if (input==NULL) {  in XXH3_update()
4899     {   const xxh_u8* const bEnd = input + len;  in XXH3_update()
4900 …const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extS…  in XXH3_update()
4906         XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));  in XXH3_update()
4908         xxh_u64* XXH_RESTRICT const acc = state->acc;  in XXH3_update()
4910         state->totalLen += len;  in XXH3_update()
4911         XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);  in XXH3_update()
4913         /* small input : just fill in tmp buffer */  in XXH3_update()
4914         if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {  in XXH3_update()
4915             XXH_memcpy(state->buffer + state->bufferedSize, input, len);  in XXH3_update()
4916             state->bufferedSize += (XXH32_hash_t)len;  in XXH3_update()
4920         /* total input is now > XXH3_INTERNALBUFFER_SIZE */  in XXH3_update()
4928         if (state->bufferedSize) {  in XXH3_update()
4929             size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;  in XXH3_update()
4930             XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);  in XXH3_update()
4931             input += loadSize;  in XXH3_update()
4933                                &state->nbStripesSoFar, state->nbStripesPerBlock,  in XXH3_update()
4934                                 state->buffer, XXH3_INTERNALBUFFER_STRIPES,  in XXH3_update()
4935                                 secret, state->secretLimit,  in XXH3_update()
4937             state->bufferedSize = 0;  in XXH3_update()
4939         XXH_ASSERT(input < bEnd);  in XXH3_update()
4941         /* large input to consume : ingest per full block */  in XXH3_update()
4942         if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {  in XXH3_update()
4943             size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;  in XXH3_update()
4944             XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);  in XXH3_update()
4946             {   size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;  in XXH3_update()
4948 …XXH3_accumulate(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToE…  in XXH3_update()
4949                 f_scramble(acc, secret + state->secretLimit);  in XXH3_update()
4950                 state->nbStripesSoFar = 0;  in XXH3_update()
4951                 input += nbStripesToEnd * XXH_STRIPE_LEN;  in XXH3_update()
4952                 nbStripes -= nbStripesToEnd;  in XXH3_update()
4955             while(nbStripes >= state->nbStripesPerBlock) {  in XXH3_update()
4956                 XXH3_accumulate(acc, input, secret, state->nbStripesPerBlock, f_acc512);  in XXH3_update()
4957                 f_scramble(acc, secret + state->secretLimit);  in XXH3_update()
4958                 input += state->nbStripesPerBlock * XXH_STRIPE_LEN;  in XXH3_update()
4959                 nbStripes -= state->nbStripesPerBlock;  in XXH3_update()
4962             XXH3_accumulate(acc, input, secret, nbStripes, f_acc512);  in XXH3_update()
4963             input += nbStripes * XXH_STRIPE_LEN;  in XXH3_update()
4964             XXH_ASSERT(input < bEnd);  /* at least some bytes left */  in XXH3_update()
4965             state->nbStripesSoFar = nbStripes;  in XXH3_update()
4967 …XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STR…  in XXH3_update()
4968             XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);  in XXH3_update()
4971             /* Consume input by a multiple of internal buffer size */  in XXH3_update()
4972             if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {  in XXH3_update()
4973                 const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;  in XXH3_update()
4976                                        &state->nbStripesSoFar, state->nbStripesPerBlock,  in XXH3_update()
4977                                         input, XXH3_INTERNALBUFFER_STRIPES,  in XXH3_update()
4978                                         secret, state->secretLimit,  in XXH3_update()
4980                     input += XXH3_INTERNALBUFFER_SIZE;  in XXH3_update()
4981                 } while (input<limit);  in XXH3_update()
4983 …XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STR…  in XXH3_update()
4987         /* Some remaining input (always) : buffer it */  in XXH3_update()
4988         XXH_ASSERT(input < bEnd);  in XXH3_update()
4989         XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);  in XXH3_update()
4990         XXH_ASSERT(state->bufferedSize == 0);  in XXH3_update()
4991         XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));  in XXH3_update()
4992         state->bufferedSize = (XXH32_hash_t)(bEnd-input);  in XXH3_update()
4995         memcpy(state->acc, acc, sizeof(acc));  in XXH3_update()
5004 XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)  in XXH3_64bits_update()  argument
5006     return XXH3_update(state, (const xxh_u8*)input, len,  in XXH3_64bits_update()
5018      * continue ingesting more input afterwards.  in XXH3_digest_long()
5020     XXH_memcpy(acc, state->acc, sizeof(state->acc));  in XXH3_digest_long()
5021     if (state->bufferedSize >= XXH_STRIPE_LEN) {  in XXH3_digest_long()
5022         size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;  in XXH3_digest_long()
5023         size_t nbStripesSoFar = state->nbStripesSoFar;  in XXH3_digest_long()
5025                            &nbStripesSoFar, state->nbStripesPerBlock,  in XXH3_digest_long()
5026                             state->buffer, nbStripes,  in XXH3_digest_long()
5027                             secret, state->secretLimit,  in XXH3_digest_long()
5031                             state->buffer + state->bufferedSize - XXH_STRIPE_LEN,  in XXH3_digest_long()
5032                             secret + state->secretLimit - XXH_SECRET_LASTACC_START);  in XXH3_digest_long()
5035         size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;  in XXH3_digest_long()
5036         XXH_ASSERT(state->bufferedSize > 0);  /* there is always some input buffered */  in XXH3_digest_long()
5037         XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);  in XXH3_digest_long()
5038         XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);  in XXH3_digest_long()
5041                             secret + state->secretLimit - XXH_SECRET_LASTACC_START);  in XXH3_digest_long()
5048 …const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extS…  in XXH3_64bits_digest()
5049     if (state->totalLen > XXH3_MIDSIZE_MAX) {  in XXH3_64bits_digest()
5054                               (xxh_u64)state->totalLen * XXH_PRIME64_1);  in XXH3_64bits_digest()
5056     /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */  in XXH3_64bits_digest()
5057     if (state->useSeed)  in XXH3_64bits_digest()
5058         return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);  in XXH3_64bits_digest()
5059     return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),  in XXH3_64bits_digest()
5060                                   secret, state->secretLimit + XXH_STRIPE_LEN);  in XXH3_64bits_digest()
5068  * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
5071  * For example, extra steps are taken to avoid the seed-dependent collisions
5072  * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
5075  * lengths. Note that longer hashes are about as fast as the 64-bit version
5076  * due to it using only a slight modification of the 64-bit loop.
5078  * XXH128 is also more oriented towards 64-bit machines. It is still extremely
5079  * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
5083 XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)  in XXH3_len_1to3_128b()  argument
5086     XXH_ASSERT(input != NULL);  in XXH3_len_1to3_128b()
5090      * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }  in XXH3_len_1to3_128b()
5091      * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }  in XXH3_len_1to3_128b()
5092      * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }  in XXH3_len_1to3_128b()
5094     {   xxh_u8 const c1 = input[0];  in XXH3_len_1to3_128b()
5095         xxh_u8 const c2 = input[len >> 1];  in XXH3_len_1to3_128b()
5096         xxh_u8 const c3 = input[len - 1];  in XXH3_len_1to3_128b()
5097         xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)  in XXH3_len_1to3_128b()
5101         xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;  in XXH3_len_1to3_128b()
5112 XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)  in XXH3_len_4to8_128b()  argument
5114     XXH_ASSERT(input != NULL);  in XXH3_len_4to8_128b()
5118     {   xxh_u32 const input_lo = XXH_readLE32(input);  in XXH3_len_4to8_128b()
5119         xxh_u32 const input_hi = XXH_readLE32(input + len - 4);  in XXH3_len_4to8_128b()
5121         xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;  in XXH3_len_4to8_128b()
5139 XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)  in XXH3_len_9to16_128b()  argument
5141     XXH_ASSERT(input != NULL);  in XXH3_len_9to16_128b()
5143     XXH_ASSERT(9 <= len && len <= 16);  in XXH3_len_9to16_128b()
5144     {   xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;  in XXH3_len_9to16_128b()
5146         xxh_u64 const input_lo = XXH_readLE64(input);  in XXH3_len_9to16_128b()
5147         xxh_u64       input_hi = XXH_readLE64(input + len - 8);  in XXH3_len_9to16_128b()
5153         m128.low64 += (xxh_u64)(len - 1) << 54;  in XXH3_len_9to16_128b()
5160          * The best approach to this operation is different on 32-bit and 64-bit.  in XXH3_len_9to16_128b()
5162         if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */  in XXH3_len_9to16_128b()
5164              * 32-bit optimized version, which is more readable.  in XXH3_len_9to16_128b()
5166              * On 32-bit, it removes an ADC and delays a dependency between the two  in XXH3_len_9to16_128b()
5167              * halves of m128.high64, but it generates an extra mask on 64-bit.  in XXH3_len_9to16_128b()
5172              * 64-bit optimized (albeit more confusing) version.  in XXH3_len_9to16_128b()
5182              * Inverse Property: x + y - x == y  in XXH3_len_9to16_128b()
5183              *    a + (b * (1 + c - 1))  in XXH3_len_9to16_128b()
5185              *    a + (b * 1) + (b * (c - 1))  in XXH3_len_9to16_128b()
5187              *    a + b + (b * (c - 1))  in XXH3_len_9to16_128b()
5190              *    input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))  in XXH3_len_9to16_128b()
5193              *    input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))  in XXH3_len_9to16_128b()
5195             m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);  in XXH3_len_9to16_128b()
5214 XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)  in XXH3_len_0to16_128b()  argument
5216     XXH_ASSERT(len <= 16);  in XXH3_len_0to16_128b()
5217     {   if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed);  in XXH3_len_0to16_128b()
5218         if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);  in XXH3_len_0to16_128b()
5219         if (len) return XXH3_len_1to3_128b(input, len, secret, seed);  in XXH3_len_0to16_128b()
5238     acc.high64 += XXH3_mix16B (input_2, secret+16, seed);  in XXH128_mix32B()
5245 XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,  in XXH3_len_17to128_128b()  argument
5250     XXH_ASSERT(16 < len && len <= 128);  in XXH3_len_17to128_128b()
5258                     acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);  in XXH3_len_17to128_128b()
5260                 acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);  in XXH3_len_17to128_128b()
5262             acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);  in XXH3_len_17to128_128b()
5264         acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);  in XXH3_len_17to128_128b()
5269                         + ((len - seed) * XXH_PRIME64_2);  in XXH3_len_17to128_128b()
5271             h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);  in XXH3_len_17to128_128b()
5278 XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,  in XXH3_len_129to240_128b()  argument
5292                                 input  + (32 * i),  in XXH3_len_129to240_128b()
5293                                 input  + (32 * i) + 16,  in XXH3_len_129to240_128b()
5302                                 input + (32 * i),  in XXH3_len_129to240_128b()
5303                                 input + (32 * i) + 16,  in XXH3_len_129to240_128b()
5304                                 secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)),  in XXH3_len_129to240_128b()
5309                             input + len - 16,  in XXH3_len_129to240_128b()
5310                             input + len - 32,  in XXH3_len_129to240_128b()
5311                             secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,  in XXH3_len_129to240_128b()
5312                             0ULL - seed);  in XXH3_len_129to240_128b()
5318                         + ((len - seed) * XXH_PRIME64_2);  in XXH3_len_129to240_128b()
5320             h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);  in XXH3_len_129to240_128b()
5327 XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,  in XXH3_hashLong_128b_internal()  argument
5334 …XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc512, f_scramb…  in XXH3_hashLong_128b_internal()
5345                                             - sizeof(acc) - XXH_SECRET_MERGEACCS_START,  in XXH3_hashLong_128b_internal()
5355 XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,  in XXH3_hashLong_128b_default()  argument
5360     return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),  in XXH3_hashLong_128b_default()
5369 XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,  in XXH3_hashLong_128b_withSecret()  argument
5374     return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,  in XXH3_hashLong_128b_withSecret()
5379 XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,  in XXH3_hashLong_128b_withSeed_internal()  argument
5386         return XXH3_hashLong_128b_internal(input, len,  in XXH3_hashLong_128b_withSeed_internal()
5391         return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),  in XXH3_hashLong_128b_withSeed_internal()
5400 XXH3_hashLong_128b_withSeed(const void* input, size_t len,  in XXH3_hashLong_128b_withSeed()  argument
5404     return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,  in XXH3_hashLong_128b_withSeed()
5412 XXH3_128bits_internal(const void* input, size_t len,  in XXH3_128bits_internal()  argument
5420      * For now, it's a contract pre-condition.  in XXH3_128bits_internal()
5423     if (len <= 16)  in XXH3_128bits_internal()
5424         return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);  in XXH3_128bits_internal()
5426 … return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);  in XXH3_128bits_internal()
5428 …return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);  in XXH3_128bits_internal()
5429     return f_hl128(input, len, seed64, secret, secretLen);  in XXH3_128bits_internal()
5436 XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)  in XXH3_128bits()  argument
5438     return XXH3_128bits_internal(input, len, 0,  in XXH3_128bits()
5445 XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)  in XXH3_128bits_withSecret()  argument
5447     return XXH3_128bits_internal(input, len, 0,  in XXH3_128bits_withSecret()
5454 XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)  in XXH3_128bits_withSeed()  argument
5456     return XXH3_128bits_internal(input, len, seed,  in XXH3_128bits_withSeed()
5463 XXH3_128bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize…  in XXH3_128bits_withSecretandSeed()  argument
5466         return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);  in XXH3_128bits_withSecretandSeed()
5467     return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);  in XXH3_128bits_withSecretandSeed()
5472 XXH128(const void* input, size_t len, XXH64_hash_t seed)  in XXH128()  argument
5474     return XXH3_128bits_withSeed(input, len, seed);  in XXH128()
5478 /* ===   XXH3 128-bit streaming   === */
5481  * All initialization and update functions are identical to 64-bit streaming variant.
5515 XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)  in XXH3_128bits_update()  argument
5517     return XXH3_update(state, (const xxh_u8*)input, len,  in XXH3_128bits_update()
5524 …const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extS…  in XXH3_128bits_digest()
5525     if (state->totalLen > XXH3_MIDSIZE_MAX) {  in XXH3_128bits_digest()
5528         XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);  in XXH3_128bits_digest()
5532                                          (xxh_u64)state->totalLen * XXH_PRIME64_1);  in XXH3_128bits_digest()
5534                                          secret + state->secretLimit + XXH_STRIPE_LEN  in XXH3_128bits_digest()
5535                                                 - sizeof(acc) - XXH_SECRET_MERGEACCS_START,  in XXH3_128bits_digest()
5536                                          ~((xxh_u64)state->totalLen * XXH_PRIME64_2));  in XXH3_128bits_digest()
5541     if (state->seed)  in XXH3_128bits_digest()
5542         return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);  in XXH3_128bits_digest()
5543     return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),  in XXH3_128bits_digest()
5544                                    secret, state->secretLimit + XXH_STRIPE_LEN);  in XXH3_128bits_digest()
5547 /* 128-bit utility functions */
5568     int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);  in XXH128_cmp()
5571     return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);  in XXH128_cmp()
5595     h.low64  = XXH_readBE64(src->digest + 8);  in XXH128_hashFromCanonical()
5636     /* Fill secretBuffer with a copy of customSeed - repeat as needed */  in XXH3_generateSecret()
5639             size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);  in XXH3_generateSecret()
5644     {   size_t const nbSeg16 = secretSize / 16;  in XXH3_generateSecret()
5650             XXH3_combine16((char*)secretBuffer + n*16, h128);  in XXH3_generateSecret()
5653         XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));  in XXH3_generateSecret()
5673   && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
In current file

In project "undefined"

On Google