xref: /freebsd/sys/contrib/openzfs/module/zstd/lib/common/xxhash.c (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
2 /*
3  *  xxHash - Fast Hash algorithm
4  *  Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
5  *
6  *  You can contact the author at :
7  *  - xxHash homepage: http://www.xxhash.com
8  *  - xxHash source repository : https://github.com/Cyan4973/xxHash
9  *
10  * This source code is licensed under both the BSD-style license (found in the
11  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
12  * in the COPYING file in the root directory of this source tree).
13  * You may select, at your option, one of the above-listed licenses.
14 */
15 
16 
17 /* *************************************
18 *  Tuning parameters
19 ***************************************/
20 /*!XXH_FORCE_MEMORY_ACCESS :
21  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
22  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
23  * The below switch allow to select different access method for improved performance.
24  * Method 0 (default) : use `memcpy()`. Safe and portable.
25  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
26  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
27  * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
28  *            It can generate buggy code on targets which do not support unaligned memory accesses.
29  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
30  * See http://stackoverflow.com/a/32095106/646947 for details.
31  * Prefer these methods in priority order (0 > 1 > 2)
32  */
33 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
34 #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
35 #    define XXH_FORCE_MEMORY_ACCESS 2
36 #  elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
37   (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
38   defined(__ICCARM__)
39 #    define XXH_FORCE_MEMORY_ACCESS 1
40 #  endif
41 #endif
42 
43 /*!XXH_ACCEPT_NULL_INPUT_POINTER :
44  * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
45  * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
46  * By default, this option is disabled. To enable it, uncomment below define :
47  */
48 /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
49 
50 /*!XXH_FORCE_NATIVE_FORMAT :
51  * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
52  * Results are therefore identical for little-endian and big-endian CPU.
53  * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
54  * Should endian-independence be of no importance for your application, you may set the #define below to 1,
55  * to improve speed for Big-endian CPU.
56  * This option has no impact on Little_Endian CPU.
57  */
58 #ifndef XXH_FORCE_NATIVE_FORMAT   /* can be defined externally */
59 #  define XXH_FORCE_NATIVE_FORMAT 0
60 #endif
61 
62 /*!XXH_FORCE_ALIGN_CHECK :
63  * This is a minor performance trick, only useful with lots of very small keys.
64  * It means : check for aligned/unaligned input.
65  * The check costs one initial branch per hash; set to 0 when the input data
66  * is guaranteed to be aligned.
67  */
68 #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
69 #  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
70 #    define XXH_FORCE_ALIGN_CHECK 0
71 #  else
72 #    define XXH_FORCE_ALIGN_CHECK 1
73 #  endif
74 #endif
75 
76 
77 /* *************************************
78 *  Includes & Memory related functions
79 ***************************************/
80 /* Modify the local functions below should you wish to use some other memory routines */
81 /* for malloc(), free() */
82 #include <stdlib.h>
83 #include <stddef.h>     /* size_t */
XXH_malloc(size_t s)84 static void* XXH_malloc(size_t s) { return malloc(s); }
XXH_free(void * p)85 static void  XXH_free  (void* p)  { free(p); }
86 /* for memcpy() */
87 #include <string.h>
XXH_memcpy(void * dest,const void * src,size_t size)88 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
89 
90 #ifndef XXH_STATIC_LINKING_ONLY
91 #  define XXH_STATIC_LINKING_ONLY
92 #endif
93 #include "xxhash.h"
94 
95 
96 /* *************************************
97 *  Compiler Specific Options
98 ***************************************/
99 #if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
100 #  define INLINE_KEYWORD inline
101 #else
102 #  define INLINE_KEYWORD
103 #endif
104 
105 #if defined(__GNUC__) || defined(__ICCARM__)
106 #  define FORCE_INLINE_ATTR __attribute__((always_inline))
107 #elif defined(_MSC_VER)
108 #  define FORCE_INLINE_ATTR __forceinline
109 #else
110 #  define FORCE_INLINE_ATTR
111 #endif
112 
113 #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
114 
115 
116 #ifdef _MSC_VER
117 #  pragma warning(disable : 4127)      /* disable: C4127: conditional expression is constant */
118 #endif
119 
120 
121 /* *************************************
122 *  Basic Types
123 ***************************************/
124 #ifndef MEM_MODULE
125 # define MEM_MODULE
126 # if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
127 #   include <stdint.h>
128     typedef uint8_t  BYTE;
129     typedef uint16_t U16;
130     typedef uint32_t U32;
131     typedef  int32_t S32;
132     typedef uint64_t U64;
133 #  else
134     typedef unsigned char      BYTE;
135     typedef unsigned short     U16;
136     typedef unsigned int       U32;
137     typedef   signed int       S32;
138     typedef unsigned long long U64;   /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
139 #  endif
140 #endif
141 
142 
143 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
144 
145 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
XXH_read32(const void * memPtr)146 static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
XXH_read64(const void * memPtr)147 static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
148 
149 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
150 
151 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
152 /* currently only defined for gcc and icc */
153 typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
154 
XXH_read32(const void * ptr)155 static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
XXH_read64(const void * ptr)156 static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
157 
158 #else
159 
160 /* portable and safe solution. Generally efficient.
161  * see : http://stackoverflow.com/a/32095106/646947
162  */
163 
XXH_read32(const void * memPtr)164 static U32 XXH_read32(const void* memPtr)
165 {
166     U32 val;
167     memcpy(&val, memPtr, sizeof(val));
168     return val;
169 }
170 
XXH_read64(const void * memPtr)171 static U64 XXH_read64(const void* memPtr)
172 {
173     U64 val;
174     memcpy(&val, memPtr, sizeof(val));
175     return val;
176 }
177 
178 #endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
179 
180 
181 /* ****************************************
182 *  Compiler-specific Functions and Macros
183 ******************************************/
184 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
185 
186 /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
187 #if defined(_MSC_VER)
188 #  define XXH_rotl32(x,r) _rotl(x,r)
189 #  define XXH_rotl64(x,r) _rotl64(x,r)
190 #else
191 #if defined(__ICCARM__)
192 #  include <intrinsics.h>
193 #  define XXH_rotl32(x,r) __ROR(x,(32 - r))
194 #else
195 #  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
196 #endif
197 #  define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
198 #endif
199 
200 #if defined(_MSC_VER)     /* Visual Studio */
201 #  define XXH_swap32 _byteswap_ulong
202 #  define XXH_swap64 _byteswap_uint64
203 #elif GCC_VERSION >= 403
204 #  define XXH_swap32 __builtin_bswap32
205 #  define XXH_swap64 __builtin_bswap64
206 #else
XXH_swap32(U32 x)207 static U32 XXH_swap32 (U32 x)
208 {
209     return  ((x << 24) & 0xff000000 ) |
210             ((x <<  8) & 0x00ff0000 ) |
211             ((x >>  8) & 0x0000ff00 ) |
212             ((x >> 24) & 0x000000ff );
213 }
XXH_swap64(U64 x)214 static U64 XXH_swap64 (U64 x)
215 {
216     return  ((x << 56) & 0xff00000000000000ULL) |
217             ((x << 40) & 0x00ff000000000000ULL) |
218             ((x << 24) & 0x0000ff0000000000ULL) |
219             ((x << 8)  & 0x000000ff00000000ULL) |
220             ((x >> 8)  & 0x00000000ff000000ULL) |
221             ((x >> 24) & 0x0000000000ff0000ULL) |
222             ((x >> 40) & 0x000000000000ff00ULL) |
223             ((x >> 56) & 0x00000000000000ffULL);
224 }
225 #endif
226 
227 
228 /* *************************************
229 *  Architecture Macros
230 ***************************************/
231 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
232 
233 /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
234 #ifndef XXH_CPU_LITTLE_ENDIAN
235     static const int g_one = 1;
236 #   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&g_one))
237 #endif
238 
239 
240 /* ***************************
241 *  Memory reads
242 *****************************/
243 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
244 
XXH_readLE32_align(const void * ptr,XXH_endianess endian,XXH_alignment align)245 FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
246 {
247     if (align==XXH_unaligned)
248         return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
249     else
250         return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
251 }
252 
XXH_readLE32(const void * ptr,XXH_endianess endian)253 FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
254 {
255     return XXH_readLE32_align(ptr, endian, XXH_unaligned);
256 }
257 
XXH_readBE32(const void * ptr)258 static U32 XXH_readBE32(const void* ptr)
259 {
260     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
261 }
262 
XXH_readLE64_align(const void * ptr,XXH_endianess endian,XXH_alignment align)263 FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
264 {
265     if (align==XXH_unaligned)
266         return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
267     else
268         return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
269 }
270 
XXH_readLE64(const void * ptr,XXH_endianess endian)271 FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
272 {
273     return XXH_readLE64_align(ptr, endian, XXH_unaligned);
274 }
275 
XXH_readBE64(const void * ptr)276 static U64 XXH_readBE64(const void* ptr)
277 {
278     return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
279 }
280 
281 
282 /* *************************************
283 *  Macros
284 ***************************************/
285 #define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }    /* use only *after* variable declarations */
286 
287 
288 /* *************************************
289 *  Constants
290 ***************************************/
291 static const U32 PRIME32_1 = 2654435761U;
292 static const U32 PRIME32_2 = 2246822519U;
293 static const U32 PRIME32_3 = 3266489917U;
294 static const U32 PRIME32_4 =  668265263U;
295 static const U32 PRIME32_5 =  374761393U;
296 
297 static const U64 PRIME64_1 = 11400714785074694791ULL;
298 static const U64 PRIME64_2 = 14029467366897019727ULL;
299 static const U64 PRIME64_3 =  1609587929392839161ULL;
300 static const U64 PRIME64_4 =  9650029242287828579ULL;
301 static const U64 PRIME64_5 =  2870177450012600261ULL;
302 
XXH_versionNumber(void)303 XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
304 
305 
306 /* **************************
307 *  Utils
308 ****************************/
XXH32_copyState(XXH32_state_t * restrict dstState,const XXH32_state_t * restrict srcState)309 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
310 {
311     memcpy(dstState, srcState, sizeof(*dstState));
312 }
313 
XXH64_copyState(XXH64_state_t * restrict dstState,const XXH64_state_t * restrict srcState)314 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
315 {
316     memcpy(dstState, srcState, sizeof(*dstState));
317 }
318 
319 
320 /* ***************************
321 *  Simple Hash Functions
322 *****************************/
323 
XXH32_round(U32 seed,U32 input)324 static U32 XXH32_round(U32 seed, U32 input)
325 {
326     seed += input * PRIME32_2;
327     seed  = XXH_rotl32(seed, 13);
328     seed *= PRIME32_1;
329     return seed;
330 }
331 
XXH32_endian_align(const void * input,size_t len,U32 seed,XXH_endianess endian,XXH_alignment align)332 FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
333 {
334     const BYTE* p = (const BYTE*)input;
335     const BYTE* bEnd = p + len;
336     U32 h32;
337 #define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
338 
339 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
340     if (p==NULL) {
341         len=0;
342         bEnd=p=(const BYTE*)(size_t)16;
343     }
344 #endif
345 
346     if (len>=16) {
347         const BYTE* const limit = bEnd - 16;
348         U32 v1 = seed + PRIME32_1 + PRIME32_2;
349         U32 v2 = seed + PRIME32_2;
350         U32 v3 = seed + 0;
351         U32 v4 = seed - PRIME32_1;
352 
353         do {
354             v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
355             v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
356             v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
357             v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
358         } while (p<=limit);
359 
360         h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
361     } else {
362         h32  = seed + PRIME32_5;
363     }
364 
365     h32 += (U32) len;
366 
367     while (p+4<=bEnd) {
368         h32 += XXH_get32bits(p) * PRIME32_3;
369         h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
370         p+=4;
371     }
372 
373     while (p<bEnd) {
374         h32 += (*p) * PRIME32_5;
375         h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
376         p++;
377     }
378 
379     h32 ^= h32 >> 15;
380     h32 *= PRIME32_2;
381     h32 ^= h32 >> 13;
382     h32 *= PRIME32_3;
383     h32 ^= h32 >> 16;
384 
385     return h32;
386 }
387 
388 
XXH32(const void * input,size_t len,unsigned int seed)389 XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
390 {
391 #if 0
392     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
393     XXH32_CREATESTATE_STATIC(state);
394     XXH32_reset(state, seed);
395     XXH32_update(state, input, len);
396     return XXH32_digest(state);
397 #else
398     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
399 
400     if (XXH_FORCE_ALIGN_CHECK) {
401         if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
402             if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
403                 return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
404             else
405                 return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
406     }   }
407 
408     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
409         return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
410     else
411         return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
412 #endif
413 }
414 
415 
XXH64_round(U64 acc,U64 input)416 static U64 XXH64_round(U64 acc, U64 input)
417 {
418     acc += input * PRIME64_2;
419     acc  = XXH_rotl64(acc, 31);
420     acc *= PRIME64_1;
421     return acc;
422 }
423 
XXH64_mergeRound(U64 acc,U64 val)424 static U64 XXH64_mergeRound(U64 acc, U64 val)
425 {
426     val  = XXH64_round(0, val);
427     acc ^= val;
428     acc  = acc * PRIME64_1 + PRIME64_4;
429     return acc;
430 }
431 
XXH64_endian_align(const void * input,size_t len,U64 seed,XXH_endianess endian,XXH_alignment align)432 FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
433 {
434     const BYTE* p = (const BYTE*)input;
435     const BYTE* const bEnd = p + len;
436     U64 h64;
437 #define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
438 
439 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
440     if (p==NULL) {
441         len=0;
442         bEnd=p=(const BYTE*)(size_t)32;
443     }
444 #endif
445 
446     if (len>=32) {
447         const BYTE* const limit = bEnd - 32;
448         U64 v1 = seed + PRIME64_1 + PRIME64_2;
449         U64 v2 = seed + PRIME64_2;
450         U64 v3 = seed + 0;
451         U64 v4 = seed - PRIME64_1;
452 
453         do {
454             v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
455             v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
456             v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
457             v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
458         } while (p<=limit);
459 
460         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
461         h64 = XXH64_mergeRound(h64, v1);
462         h64 = XXH64_mergeRound(h64, v2);
463         h64 = XXH64_mergeRound(h64, v3);
464         h64 = XXH64_mergeRound(h64, v4);
465 
466     } else {
467         h64  = seed + PRIME64_5;
468     }
469 
470     h64 += (U64) len;
471 
472     while (p+8<=bEnd) {
473         U64 const k1 = XXH64_round(0, XXH_get64bits(p));
474         h64 ^= k1;
475         h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
476         p+=8;
477     }
478 
479     if (p+4<=bEnd) {
480         h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
481         h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
482         p+=4;
483     }
484 
485     while (p<bEnd) {
486         h64 ^= (*p) * PRIME64_5;
487         h64 = XXH_rotl64(h64, 11) * PRIME64_1;
488         p++;
489     }
490 
491     h64 ^= h64 >> 33;
492     h64 *= PRIME64_2;
493     h64 ^= h64 >> 29;
494     h64 *= PRIME64_3;
495     h64 ^= h64 >> 32;
496 
497     return h64;
498 }
499 
500 
XXH64(const void * input,size_t len,unsigned long long seed)501 XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
502 {
503 #if 0
504     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
505     XXH64_CREATESTATE_STATIC(state);
506     XXH64_reset(state, seed);
507     XXH64_update(state, input, len);
508     return XXH64_digest(state);
509 #else
510     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
511 
512     if (XXH_FORCE_ALIGN_CHECK) {
513         if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
514             if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
515                 return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
516             else
517                 return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
518     }   }
519 
520     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
521         return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
522     else
523         return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
524 #endif
525 }
526 
527 
528 /* **************************************************
529 *  Advanced Hash Functions
530 ****************************************************/
531 
XXH32_createState(void)532 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
533 {
534     return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
535 }
XXH32_freeState(XXH32_state_t * statePtr)536 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
537 {
538     XXH_free(statePtr);
539     return XXH_OK;
540 }
541 
XXH64_createState(void)542 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
543 {
544     return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
545 }
XXH64_freeState(XXH64_state_t * statePtr)546 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
547 {
548     XXH_free(statePtr);
549     return XXH_OK;
550 }
551 
552 
553 /*** Hash feed ***/
554 
XXH32_reset(XXH32_state_t * statePtr,unsigned int seed)555 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
556 {
557     XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
558     memset(&state, 0, sizeof(state)-4);   /* do not write into reserved, for future removal */
559     state.v1 = seed + PRIME32_1 + PRIME32_2;
560     state.v2 = seed + PRIME32_2;
561     state.v3 = seed + 0;
562     state.v4 = seed - PRIME32_1;
563     memcpy(statePtr, &state, sizeof(state));
564     return XXH_OK;
565 }
566 
567 
XXH64_reset(XXH64_state_t * statePtr,unsigned long long seed)568 XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
569 {
570     XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
571     memset(&state, 0, sizeof(state)-8);   /* do not write into reserved, for future removal */
572     state.v1 = seed + PRIME64_1 + PRIME64_2;
573     state.v2 = seed + PRIME64_2;
574     state.v3 = seed + 0;
575     state.v4 = seed - PRIME64_1;
576     memcpy(statePtr, &state, sizeof(state));
577     return XXH_OK;
578 }
579 
580 
XXH32_update_endian(XXH32_state_t * state,const void * input,size_t len,XXH_endianess endian)581 FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
582 {
583     const BYTE* p = (const BYTE*)input;
584     const BYTE* const bEnd = p + len;
585 
586 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
587     if (input==NULL) return XXH_ERROR;
588 #endif
589 
590     state->total_len_32 += (unsigned)len;
591     state->large_len |= (len>=16) | (state->total_len_32>=16);
592 
593     if (state->memsize + len < 16)  {   /* fill in tmp buffer */
594         XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
595         state->memsize += (unsigned)len;
596         return XXH_OK;
597     }
598 
599     if (state->memsize) {   /* some data left from previous update */
600         XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
601         {   const U32* p32 = state->mem32;
602             state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
603             state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
604             state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
605             state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
606         }
607         p += 16-state->memsize;
608         state->memsize = 0;
609     }
610 
611     if (p <= bEnd-16) {
612         const BYTE* const limit = bEnd - 16;
613         U32 v1 = state->v1;
614         U32 v2 = state->v2;
615         U32 v3 = state->v3;
616         U32 v4 = state->v4;
617 
618         do {
619             v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
620             v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
621             v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
622             v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
623         } while (p<=limit);
624 
625         state->v1 = v1;
626         state->v2 = v2;
627         state->v3 = v3;
628         state->v4 = v4;
629     }
630 
631     if (p < bEnd) {
632         XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
633         state->memsize = (unsigned)(bEnd-p);
634     }
635 
636     return XXH_OK;
637 }
638 
XXH32_update(XXH32_state_t * state_in,const void * input,size_t len)639 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
640 {
641     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
642 
643     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
644         return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
645     else
646         return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
647 }
648 
649 
650 
XXH32_digest_endian(const XXH32_state_t * state,XXH_endianess endian)651 FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
652 {
653     const BYTE * p = (const BYTE*)state->mem32;
654     const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
655     U32 h32;
656 
657     if (state->large_len) {
658         h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
659     } else {
660         h32 = state->v3 /* == seed */ + PRIME32_5;
661     }
662 
663     h32 += state->total_len_32;
664 
665     while (p+4<=bEnd) {
666         h32 += XXH_readLE32(p, endian) * PRIME32_3;
667         h32  = XXH_rotl32(h32, 17) * PRIME32_4;
668         p+=4;
669     }
670 
671     while (p<bEnd) {
672         h32 += (*p) * PRIME32_5;
673         h32  = XXH_rotl32(h32, 11) * PRIME32_1;
674         p++;
675     }
676 
677     h32 ^= h32 >> 15;
678     h32 *= PRIME32_2;
679     h32 ^= h32 >> 13;
680     h32 *= PRIME32_3;
681     h32 ^= h32 >> 16;
682 
683     return h32;
684 }
685 
686 
XXH32_digest(const XXH32_state_t * state_in)687 XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
688 {
689     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
690 
691     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
692         return XXH32_digest_endian(state_in, XXH_littleEndian);
693     else
694         return XXH32_digest_endian(state_in, XXH_bigEndian);
695 }
696 
697 
698 
699 /* **** XXH64 **** */
700 
XXH64_update_endian(XXH64_state_t * state,const void * input,size_t len,XXH_endianess endian)701 FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
702 {
703     const BYTE* p = (const BYTE*)input;
704     const BYTE* const bEnd = p + len;
705 
706 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
707     if (input==NULL) return XXH_ERROR;
708 #endif
709 
710     state->total_len += len;
711 
712     if (state->memsize + len < 32) {  /* fill in tmp buffer */
713         if (input != NULL) {
714             XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
715         }
716         state->memsize += (U32)len;
717         return XXH_OK;
718     }
719 
720     if (state->memsize) {   /* tmp buffer is full */
721         XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
722         state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
723         state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
724         state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
725         state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
726         p += 32-state->memsize;
727         state->memsize = 0;
728     }
729 
730     if (p+32 <= bEnd) {
731         const BYTE* const limit = bEnd - 32;
732         U64 v1 = state->v1;
733         U64 v2 = state->v2;
734         U64 v3 = state->v3;
735         U64 v4 = state->v4;
736 
737         do {
738             v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
739             v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
740             v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
741             v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
742         } while (p<=limit);
743 
744         state->v1 = v1;
745         state->v2 = v2;
746         state->v3 = v3;
747         state->v4 = v4;
748     }
749 
750     if (p < bEnd) {
751         XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
752         state->memsize = (unsigned)(bEnd-p);
753     }
754 
755     return XXH_OK;
756 }
757 
XXH64_update(XXH64_state_t * state_in,const void * input,size_t len)758 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
759 {
760     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
761 
762     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
763         return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
764     else
765         return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
766 }
767 
768 
769 
XXH64_digest_endian(const XXH64_state_t * state,XXH_endianess endian)770 FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
771 {
772     const BYTE * p = (const BYTE*)state->mem64;
773     const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
774     U64 h64;
775 
776     if (state->total_len >= 32) {
777         U64 const v1 = state->v1;
778         U64 const v2 = state->v2;
779         U64 const v3 = state->v3;
780         U64 const v4 = state->v4;
781 
782         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
783         h64 = XXH64_mergeRound(h64, v1);
784         h64 = XXH64_mergeRound(h64, v2);
785         h64 = XXH64_mergeRound(h64, v3);
786         h64 = XXH64_mergeRound(h64, v4);
787     } else {
788         h64  = state->v3 + PRIME64_5;
789     }
790 
791     h64 += (U64) state->total_len;
792 
793     while (p+8<=bEnd) {
794         U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
795         h64 ^= k1;
796         h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
797         p+=8;
798     }
799 
800     if (p+4<=bEnd) {
801         h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
802         h64  = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
803         p+=4;
804     }
805 
806     while (p<bEnd) {
807         h64 ^= (*p) * PRIME64_5;
808         h64  = XXH_rotl64(h64, 11) * PRIME64_1;
809         p++;
810     }
811 
812     h64 ^= h64 >> 33;
813     h64 *= PRIME64_2;
814     h64 ^= h64 >> 29;
815     h64 *= PRIME64_3;
816     h64 ^= h64 >> 32;
817 
818     return h64;
819 }
820 
821 
XXH64_digest(const XXH64_state_t * state_in)822 XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
823 {
824     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
825 
826     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
827         return XXH64_digest_endian(state_in, XXH_littleEndian);
828     else
829         return XXH64_digest_endian(state_in, XXH_bigEndian);
830 }
831 
832 
833 /* **************************
834 *  Canonical representation
835 ****************************/
836 
837 /*! Default XXH result types are basic unsigned 32 and 64 bits.
838 *   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
839 *   These functions allow transformation of hash result into and from its canonical format.
840 *   This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
841 */
842 
XXH32_canonicalFromHash(XXH32_canonical_t * dst,XXH32_hash_t hash)843 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
844 {
845     XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
846     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
847     memcpy(dst, &hash, sizeof(*dst));
848 }
849 
XXH64_canonicalFromHash(XXH64_canonical_t * dst,XXH64_hash_t hash)850 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
851 {
852     XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
853     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
854     memcpy(dst, &hash, sizeof(*dst));
855 }
856 
XXH32_hashFromCanonical(const XXH32_canonical_t * src)857 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
858 {
859     return XXH_readBE32(src);
860 }
861 
XXH64_hashFromCanonical(const XXH64_canonical_t * src)862 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
863 {
864     return XXH_readBE64(src);
865 }
866