xref: /freebsd/sys/contrib/openzfs/module/zstd/lib/common/xxhash.h (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1*61145dc2SMartin Matuska // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
2c03c5b1cSMartin Matuska /*
3c03c5b1cSMartin Matuska  * xxHash - Extremely Fast Hash algorithm
4c03c5b1cSMartin Matuska  * Header File
5c03c5b1cSMartin Matuska  * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
6c03c5b1cSMartin Matuska  *
7c03c5b1cSMartin Matuska  * You can contact the author at :
8c03c5b1cSMartin Matuska  * - xxHash source repository : https://github.com/Cyan4973/xxHash
9c03c5b1cSMartin Matuska  *
10c03c5b1cSMartin Matuska  * This source code is licensed under both the BSD-style license (found in the
11c03c5b1cSMartin Matuska  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
12c03c5b1cSMartin Matuska  * in the COPYING file in the root directory of this source tree).
13c03c5b1cSMartin Matuska  * You may select, at your option, one of the above-listed licenses.
14c03c5b1cSMartin Matuska */
15c03c5b1cSMartin Matuska 
16c03c5b1cSMartin Matuska /* Notice extracted from xxHash homepage :
17c03c5b1cSMartin Matuska 
18c03c5b1cSMartin Matuska xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
19c03c5b1cSMartin Matuska It also successfully passes all tests from the SMHasher suite.
20c03c5b1cSMartin Matuska 
21c03c5b1cSMartin Matuska Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
22c03c5b1cSMartin Matuska 
23c03c5b1cSMartin Matuska Name            Speed       Q.Score   Author
24c03c5b1cSMartin Matuska xxHash          5.4 GB/s     10
25c03c5b1cSMartin Matuska CrapWow         3.2 GB/s      2       Andrew
26c03c5b1cSMartin Matuska MumurHash 3a    2.7 GB/s     10       Austin Appleby
27c03c5b1cSMartin Matuska SpookyHash      2.0 GB/s     10       Bob Jenkins
28c03c5b1cSMartin Matuska SBox            1.4 GB/s      9       Bret Mulvey
29c03c5b1cSMartin Matuska Lookup3         1.2 GB/s      9       Bob Jenkins
30c03c5b1cSMartin Matuska SuperFastHash   1.2 GB/s      1       Paul Hsieh
31c03c5b1cSMartin Matuska CityHash64      1.05 GB/s    10       Pike & Alakuijala
32c03c5b1cSMartin Matuska FNV             0.55 GB/s     5       Fowler, Noll, Vo
33c03c5b1cSMartin Matuska CRC32           0.43 GB/s     9
34c03c5b1cSMartin Matuska MD5-32          0.33 GB/s    10       Ronald L. Rivest
35c03c5b1cSMartin Matuska SHA1-32         0.28 GB/s    10
36c03c5b1cSMartin Matuska 
37c03c5b1cSMartin Matuska Q.Score is a measure of quality of the hash function.
38c03c5b1cSMartin Matuska It depends on successfully passing SMHasher test set.
39c03c5b1cSMartin Matuska 10 is a perfect score.
40c03c5b1cSMartin Matuska 
41c03c5b1cSMartin Matuska A 64-bits version, named XXH64, is available since r35.
42c03c5b1cSMartin Matuska It offers much better speed, but for 64-bits applications only.
43c03c5b1cSMartin Matuska Name     Speed on 64 bits    Speed on 32 bits
44c03c5b1cSMartin Matuska XXH64       13.8 GB/s            1.9 GB/s
45c03c5b1cSMartin Matuska XXH32        6.8 GB/s            6.0 GB/s
46c03c5b1cSMartin Matuska */
47c03c5b1cSMartin Matuska 
48c03c5b1cSMartin Matuska #if defined (__cplusplus)
49c03c5b1cSMartin Matuska extern "C" {
50c03c5b1cSMartin Matuska #endif
51c03c5b1cSMartin Matuska 
52c03c5b1cSMartin Matuska #ifndef XXHASH_H_5627135585666179
53c03c5b1cSMartin Matuska #define XXHASH_H_5627135585666179 1
54c03c5b1cSMartin Matuska 
55c03c5b1cSMartin Matuska 
56c03c5b1cSMartin Matuska /* ****************************
57c03c5b1cSMartin Matuska *  Definitions
58c03c5b1cSMartin Matuska ******************************/
59c03c5b1cSMartin Matuska #include <stddef.h>   /* size_t */
60c03c5b1cSMartin Matuska typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
61c03c5b1cSMartin Matuska 
62c03c5b1cSMartin Matuska 
63c03c5b1cSMartin Matuska /* ****************************
64c03c5b1cSMartin Matuska *  API modifier
65c03c5b1cSMartin Matuska ******************************/
66c03c5b1cSMartin Matuska /** XXH_PRIVATE_API
67c03c5b1cSMartin Matuska *   This is useful if you want to include xxhash functions in `static` mode
68c03c5b1cSMartin Matuska *   in order to inline them, and remove their symbol from the public list.
69c03c5b1cSMartin Matuska *   Methodology :
70c03c5b1cSMartin Matuska *     #define XXH_PRIVATE_API
71c03c5b1cSMartin Matuska *     #include "xxhash.h"
72c03c5b1cSMartin Matuska *   `xxhash.c` is automatically included.
73c03c5b1cSMartin Matuska *   It's not useful to compile and link it as a separate module anymore.
74c03c5b1cSMartin Matuska */
75c03c5b1cSMartin Matuska #ifdef XXH_PRIVATE_API
76c03c5b1cSMartin Matuska #  ifndef XXH_STATIC_LINKING_ONLY
77c03c5b1cSMartin Matuska #    define XXH_STATIC_LINKING_ONLY
78c03c5b1cSMartin Matuska #  endif
79c03c5b1cSMartin Matuska #  if defined(__GNUC__)
80c03c5b1cSMartin Matuska #    define XXH_PUBLIC_API static __inline __attribute__((unused))
81c03c5b1cSMartin Matuska #  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
82c03c5b1cSMartin Matuska #    define XXH_PUBLIC_API static inline
83c03c5b1cSMartin Matuska #  elif defined(_MSC_VER)
84c03c5b1cSMartin Matuska #    define XXH_PUBLIC_API static __inline
85c03c5b1cSMartin Matuska #  else
86c03c5b1cSMartin Matuska #    define XXH_PUBLIC_API static   /* this version may generate warnings for unused static functions; disable the relevant warning */
87c03c5b1cSMartin Matuska #  endif
88c03c5b1cSMartin Matuska #else
89c03c5b1cSMartin Matuska #  define XXH_PUBLIC_API   /* do nothing */
90c03c5b1cSMartin Matuska #endif /* XXH_PRIVATE_API */
91c03c5b1cSMartin Matuska 
92c03c5b1cSMartin Matuska /*!XXH_NAMESPACE, aka Namespace Emulation :
93c03c5b1cSMartin Matuska 
94c03c5b1cSMartin Matuska If you want to include _and expose_ xxHash functions from within your own library,
95c03c5b1cSMartin Matuska but also want to avoid symbol collisions with another library which also includes xxHash,
96c03c5b1cSMartin Matuska 
97c03c5b1cSMartin Matuska you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
98c03c5b1cSMartin Matuska with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values).
99c03c5b1cSMartin Matuska 
100c03c5b1cSMartin Matuska Note that no change is required within the calling program as long as it includes `xxhash.h` :
101c03c5b1cSMartin Matuska regular symbol name will be automatically translated by this header.
102c03c5b1cSMartin Matuska */
103c03c5b1cSMartin Matuska #ifdef XXH_NAMESPACE
104c03c5b1cSMartin Matuska #  define XXH_CAT(A,B) A##B
105c03c5b1cSMartin Matuska #  define XXH_NAME2(A,B) XXH_CAT(A,B)
106c03c5b1cSMartin Matuska #  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
107c03c5b1cSMartin Matuska #  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
108c03c5b1cSMartin Matuska #  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
109c03c5b1cSMartin Matuska #  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
110c03c5b1cSMartin Matuska #  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
111c03c5b1cSMartin Matuska #  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
112c03c5b1cSMartin Matuska #  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
113c03c5b1cSMartin Matuska #  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
114c03c5b1cSMartin Matuska #  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
115c03c5b1cSMartin Matuska #  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
116c03c5b1cSMartin Matuska #  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
117c03c5b1cSMartin Matuska #  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
118c03c5b1cSMartin Matuska #  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
119c03c5b1cSMartin Matuska #  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
120c03c5b1cSMartin Matuska #  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
121c03c5b1cSMartin Matuska #  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
122c03c5b1cSMartin Matuska #  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
123c03c5b1cSMartin Matuska #  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
124c03c5b1cSMartin Matuska #  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
125c03c5b1cSMartin Matuska #endif
126c03c5b1cSMartin Matuska 
127c03c5b1cSMartin Matuska 
128c03c5b1cSMartin Matuska /* *************************************
129c03c5b1cSMartin Matuska *  Version
130c03c5b1cSMartin Matuska ***************************************/
131c03c5b1cSMartin Matuska #define XXH_VERSION_MAJOR    0
132c03c5b1cSMartin Matuska #define XXH_VERSION_MINOR    6
133c03c5b1cSMartin Matuska #define XXH_VERSION_RELEASE  2
134c03c5b1cSMartin Matuska #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
135c03c5b1cSMartin Matuska XXH_PUBLIC_API unsigned XXH_versionNumber (void);
136c03c5b1cSMartin Matuska 
137c03c5b1cSMartin Matuska 
138c03c5b1cSMartin Matuska /* ****************************
139c03c5b1cSMartin Matuska *  Simple Hash Functions
140c03c5b1cSMartin Matuska ******************************/
141c03c5b1cSMartin Matuska typedef unsigned int       XXH32_hash_t;
142c03c5b1cSMartin Matuska typedef unsigned long long XXH64_hash_t;
143c03c5b1cSMartin Matuska 
144c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
145c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
146c03c5b1cSMartin Matuska 
147c03c5b1cSMartin Matuska /*!
148c03c5b1cSMartin Matuska XXH32() :
149c03c5b1cSMartin Matuska     Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
150c03c5b1cSMartin Matuska     The memory between input & input+length must be valid (allocated and read-accessible).
151c03c5b1cSMartin Matuska     "seed" can be used to alter the result predictably.
152c03c5b1cSMartin Matuska     Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
153c03c5b1cSMartin Matuska XXH64() :
154c03c5b1cSMartin Matuska     Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
155c03c5b1cSMartin Matuska     "seed" can be used to alter the result predictably.
156c03c5b1cSMartin Matuska     This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
157c03c5b1cSMartin Matuska */
158c03c5b1cSMartin Matuska 
159c03c5b1cSMartin Matuska 
160c03c5b1cSMartin Matuska /* ****************************
161c03c5b1cSMartin Matuska *  Streaming Hash Functions
162c03c5b1cSMartin Matuska ******************************/
163c03c5b1cSMartin Matuska typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
164c03c5b1cSMartin Matuska typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
165c03c5b1cSMartin Matuska 
166c03c5b1cSMartin Matuska /*! State allocation, compatible with dynamic libraries */
167c03c5b1cSMartin Matuska 
168c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
169c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
170c03c5b1cSMartin Matuska 
171c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
172c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
173c03c5b1cSMartin Matuska 
174c03c5b1cSMartin Matuska 
175c03c5b1cSMartin Matuska /* hash streaming */
176c03c5b1cSMartin Matuska 
177c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
178c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
179c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
180c03c5b1cSMartin Matuska 
181c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
182c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
183c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
184c03c5b1cSMartin Matuska 
185c03c5b1cSMartin Matuska /*
186c03c5b1cSMartin Matuska These functions generate the xxHash of an input provided in multiple segments.
187c03c5b1cSMartin Matuska Note that, for small input, they are slower than single-call functions, due to state management.
188c03c5b1cSMartin Matuska For small input, prefer `XXH32()` and `XXH64()` .
189c03c5b1cSMartin Matuska 
190c03c5b1cSMartin Matuska XXH state must first be allocated, using XXH*_createState() .
191c03c5b1cSMartin Matuska 
192c03c5b1cSMartin Matuska Start a new hash by initializing state with a seed, using XXH*_reset().
193c03c5b1cSMartin Matuska 
194c03c5b1cSMartin Matuska Then, feed the hash state by calling XXH*_update() as many times as necessary.
195c03c5b1cSMartin Matuska Obviously, input must be allocated and read accessible.
196c03c5b1cSMartin Matuska The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
197c03c5b1cSMartin Matuska 
198c03c5b1cSMartin Matuska Finally, a hash value can be produced anytime, by using XXH*_digest().
199c03c5b1cSMartin Matuska This function returns the nn-bits hash as an int or long long.
200c03c5b1cSMartin Matuska 
201c03c5b1cSMartin Matuska It's still possible to continue inserting input into the hash state after a digest,
202c03c5b1cSMartin Matuska and generate some new hashes later on, by calling again XXH*_digest().
203c03c5b1cSMartin Matuska 
204c03c5b1cSMartin Matuska When done, free XXH state space if it was allocated dynamically.
205c03c5b1cSMartin Matuska */
206c03c5b1cSMartin Matuska 
207c03c5b1cSMartin Matuska 
208c03c5b1cSMartin Matuska /* **************************
209c03c5b1cSMartin Matuska *  Utils
210c03c5b1cSMartin Matuska ****************************/
211c03c5b1cSMartin Matuska #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* ! C99 */
212c03c5b1cSMartin Matuska #  define restrict   /* disable restrict */
213c03c5b1cSMartin Matuska #endif
214c03c5b1cSMartin Matuska 
215c03c5b1cSMartin Matuska XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state);
216c03c5b1cSMartin Matuska XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state);
217c03c5b1cSMartin Matuska 
218c03c5b1cSMartin Matuska 
219c03c5b1cSMartin Matuska /* **************************
220c03c5b1cSMartin Matuska *  Canonical representation
221c03c5b1cSMartin Matuska ****************************/
222c03c5b1cSMartin Matuska /* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
223c03c5b1cSMartin Matuska *  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
224c03c5b1cSMartin Matuska *  These functions allow transformation of hash result into and from its canonical format.
225c03c5b1cSMartin Matuska *  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
226c03c5b1cSMartin Matuska */
227c03c5b1cSMartin Matuska typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
228c03c5b1cSMartin Matuska typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
229c03c5b1cSMartin Matuska 
230c03c5b1cSMartin Matuska XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
231c03c5b1cSMartin Matuska XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
232c03c5b1cSMartin Matuska 
233c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
234c03c5b1cSMartin Matuska XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
235c03c5b1cSMartin Matuska 
236c03c5b1cSMartin Matuska #endif /* XXHASH_H_5627135585666179 */
237c03c5b1cSMartin Matuska 
238c03c5b1cSMartin Matuska 
239c03c5b1cSMartin Matuska 
240c03c5b1cSMartin Matuska /* ================================================================================================
241c03c5b1cSMartin Matuska    This section contains definitions which are not guaranteed to remain stable.
242c03c5b1cSMartin Matuska    They may change in future versions, becoming incompatible with a different version of the library.
243c03c5b1cSMartin Matuska    They shall only be used with static linking.
244c03c5b1cSMartin Matuska    Never use these definitions in association with dynamic linking !
245c03c5b1cSMartin Matuska =================================================================================================== */
246c03c5b1cSMartin Matuska #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345)
247c03c5b1cSMartin Matuska #define XXH_STATIC_H_3543687687345
248c03c5b1cSMartin Matuska 
249c03c5b1cSMartin Matuska /* These definitions are only meant to allow allocation of XXH state
250c03c5b1cSMartin Matuska    statically, on stack, or in a struct for example.
251c03c5b1cSMartin Matuska    Do not use members directly. */
252c03c5b1cSMartin Matuska 
253c03c5b1cSMartin Matuska    struct XXH32_state_s {
254c03c5b1cSMartin Matuska        unsigned total_len_32;
255c03c5b1cSMartin Matuska        unsigned large_len;
256c03c5b1cSMartin Matuska        unsigned v1;
257c03c5b1cSMartin Matuska        unsigned v2;
258c03c5b1cSMartin Matuska        unsigned v3;
259c03c5b1cSMartin Matuska        unsigned v4;
260c03c5b1cSMartin Matuska        unsigned mem32[4];   /* buffer defined as U32 for alignment */
261c03c5b1cSMartin Matuska        unsigned memsize;
262c03c5b1cSMartin Matuska        unsigned reserved;   /* never read nor write, will be removed in a future version */
263c03c5b1cSMartin Matuska    };   /* typedef'd to XXH32_state_t */
264c03c5b1cSMartin Matuska 
265c03c5b1cSMartin Matuska    struct XXH64_state_s {
266c03c5b1cSMartin Matuska        unsigned long long total_len;
267c03c5b1cSMartin Matuska        unsigned long long v1;
268c03c5b1cSMartin Matuska        unsigned long long v2;
269c03c5b1cSMartin Matuska        unsigned long long v3;
270c03c5b1cSMartin Matuska        unsigned long long v4;
271c03c5b1cSMartin Matuska        unsigned long long mem64[4];   /* buffer defined as U64 for alignment */
272c03c5b1cSMartin Matuska        unsigned memsize;
273c03c5b1cSMartin Matuska        unsigned reserved[2];          /* never read nor write, will be removed in a future version */
274c03c5b1cSMartin Matuska    };   /* typedef'd to XXH64_state_t */
275c03c5b1cSMartin Matuska 
276c03c5b1cSMartin Matuska 
277c03c5b1cSMartin Matuska #  ifdef XXH_PRIVATE_API
278c03c5b1cSMartin Matuska #    include "xxhash.c"   /* include xxhash functions as `static`, for inlining */
279c03c5b1cSMartin Matuska #  endif
280c03c5b1cSMartin Matuska 
281c03c5b1cSMartin Matuska #endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */
282c03c5b1cSMartin Matuska 
283c03c5b1cSMartin Matuska 
284c03c5b1cSMartin Matuska #if defined (__cplusplus)
285c03c5b1cSMartin Matuska }
286c03c5b1cSMartin Matuska #endif
287