xref: /freebsd/sys/contrib/zstd/lib/common/xxhash.h (revision 52f72944b8f5abb2386eae924357dee8aea17d5b)
1 /*
2    xxHash - Extremely Fast Hash algorithm
3    Header File
4    Copyright (C) 2012-2016, Yann Collet.
5 
6    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 
8    Redistribution and use in source and binary forms, with or without
9    modification, are permitted provided that the following conditions are
10    met:
11 
12        * Redistributions of source code must retain the above copyright
13    notice, this list of conditions and the following disclaimer.
14        * Redistributions in binary form must reproduce the above
15    copyright notice, this list of conditions and the following disclaimer
16    in the documentation and/or other materials provided with the
17    distribution.
18 
19    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31    You can contact the author at :
32    - xxHash source repository : https://github.com/Cyan4973/xxHash
33 */
34 
35 /* Notice extracted from xxHash homepage :
36 
37 xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
38 It also successfully passes all tests from the SMHasher suite.
39 
40 Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
41 
42 Name            Speed       Q.Score   Author
43 xxHash          5.4 GB/s     10
44 CrapWow         3.2 GB/s      2       Andrew
45 MumurHash 3a    2.7 GB/s     10       Austin Appleby
46 SpookyHash      2.0 GB/s     10       Bob Jenkins
47 SBox            1.4 GB/s      9       Bret Mulvey
48 Lookup3         1.2 GB/s      9       Bob Jenkins
49 SuperFastHash   1.2 GB/s      1       Paul Hsieh
50 CityHash64      1.05 GB/s    10       Pike & Alakuijala
51 FNV             0.55 GB/s     5       Fowler, Noll, Vo
52 CRC32           0.43 GB/s     9
53 MD5-32          0.33 GB/s    10       Ronald L. Rivest
54 SHA1-32         0.28 GB/s    10
55 
56 Q.Score is a measure of quality of the hash function.
57 It depends on successfully passing SMHasher test set.
58 10 is a perfect score.
59 
60 A 64-bits version, named XXH64, is available since r35.
61 It offers much better speed, but for 64-bits applications only.
62 Name     Speed on 64 bits    Speed on 32 bits
63 XXH64       13.8 GB/s            1.9 GB/s
64 XXH32        6.8 GB/s            6.0 GB/s
65 */
66 
67 #if defined (__cplusplus)
68 extern "C" {
69 #endif
70 
71 #ifndef XXHASH_H_5627135585666179
72 #define XXHASH_H_5627135585666179 1
73 
74 
75 /* ****************************
76 *  Definitions
77 ******************************/
78 #include <stddef.h>   /* size_t */
79 typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
80 
81 
82 /* ****************************
83 *  API modifier
84 ******************************/
85 /** XXH_PRIVATE_API
86 *   This is useful if you want to include xxhash functions in `static` mode
87 *   in order to inline them, and remove their symbol from the public list.
88 *   Methodology :
89 *     #define XXH_PRIVATE_API
90 *     #include "xxhash.h"
91 *   `xxhash.c` is automatically included.
92 *   It's not useful to compile and link it as a separate module anymore.
93 */
94 #ifdef XXH_PRIVATE_API
95 #  ifndef XXH_STATIC_LINKING_ONLY
96 #    define XXH_STATIC_LINKING_ONLY
97 #  endif
98 #  if defined(__GNUC__)
99 #    define XXH_PUBLIC_API static __inline __attribute__((unused))
100 #  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
101 #    define XXH_PUBLIC_API static inline
102 #  elif defined(_MSC_VER)
103 #    define XXH_PUBLIC_API static __inline
104 #  else
105 #    define XXH_PUBLIC_API static   /* this version may generate warnings for unused static functions; disable the relevant warning */
106 #  endif
107 #else
108 #  define XXH_PUBLIC_API   /* do nothing */
109 #endif /* XXH_PRIVATE_API */
110 
111 /*!XXH_NAMESPACE, aka Namespace Emulation :
112 
113 If you want to include _and expose_ xxHash functions from within your own library,
114 but also want to avoid symbol collisions with another library which also includes xxHash,
115 
116 you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
117 with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values).
118 
119 Note that no change is required within the calling program as long as it includes `xxhash.h` :
120 regular symbol name will be automatically translated by this header.
121 */
122 #ifdef XXH_NAMESPACE
123 #  define XXH_CAT(A,B) A##B
124 #  define XXH_NAME2(A,B) XXH_CAT(A,B)
125 #  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
126 #  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
127 #  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
128 #  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
129 #  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
130 #  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
131 #  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
132 #  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
133 #  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
134 #  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
135 #  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
136 #  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
137 #  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
138 #  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
139 #  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
140 #  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
141 #  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
142 #  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
143 #  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
144 #endif
145 
146 
147 /* *************************************
148 *  Version
149 ***************************************/
150 #define XXH_VERSION_MAJOR    0
151 #define XXH_VERSION_MINOR    6
152 #define XXH_VERSION_RELEASE  2
153 #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
154 XXH_PUBLIC_API unsigned XXH_versionNumber (void);
155 
156 
157 /* ****************************
158 *  Simple Hash Functions
159 ******************************/
160 typedef unsigned int       XXH32_hash_t;
161 typedef unsigned long long XXH64_hash_t;
162 
163 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
164 XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
165 
166 /*!
167 XXH32() :
168     Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
169     The memory between input & input+length must be valid (allocated and read-accessible).
170     "seed" can be used to alter the result predictably.
171     Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
172 XXH64() :
173     Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
174     "seed" can be used to alter the result predictably.
175     This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
176 */
177 
178 
179 /* ****************************
180 *  Streaming Hash Functions
181 ******************************/
182 typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
183 typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
184 
185 /*! State allocation, compatible with dynamic libraries */
186 
187 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
188 XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
189 
190 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
191 XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
192 
193 
194 /* hash streaming */
195 
196 XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
197 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
198 XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
199 
200 XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
201 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
202 XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
203 
204 /*
205 These functions generate the xxHash of an input provided in multiple segments.
206 Note that, for small input, they are slower than single-call functions, due to state management.
207 For small input, prefer `XXH32()` and `XXH64()` .
208 
209 XXH state must first be allocated, using XXH*_createState() .
210 
211 Start a new hash by initializing state with a seed, using XXH*_reset().
212 
213 Then, feed the hash state by calling XXH*_update() as many times as necessary.
214 Obviously, input must be allocated and read accessible.
215 The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
216 
217 Finally, a hash value can be produced anytime, by using XXH*_digest().
218 This function returns the nn-bits hash as an int or long long.
219 
220 It's still possible to continue inserting input into the hash state after a digest,
221 and generate some new hashes later on, by calling again XXH*_digest().
222 
223 When done, free XXH state space if it was allocated dynamically.
224 */
225 
226 
227 /* **************************
228 *  Utils
229 ****************************/
230 #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* ! C99 */
231 #  define restrict   /* disable restrict */
232 #endif
233 
234 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state);
235 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state);
236 
237 
238 /* **************************
239 *  Canonical representation
240 ****************************/
241 /* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
242 *  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
243 *  These functions allow transformation of hash result into and from its canonical format.
244 *  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
245 */
246 typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
247 typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
248 
249 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
250 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
251 
252 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
253 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
254 
255 #endif /* XXHASH_H_5627135585666179 */
256 
257 
258 
259 /* ================================================================================================
260    This section contains definitions which are not guaranteed to remain stable.
261    They may change in future versions, becoming incompatible with a different version of the library.
262    They shall only be used with static linking.
263    Never use these definitions in association with dynamic linking !
264 =================================================================================================== */
265 #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345)
266 #define XXH_STATIC_H_3543687687345
267 
268 /* These definitions are only meant to allow allocation of XXH state
269    statically, on stack, or in a struct for example.
270    Do not use members directly. */
271 
272    struct XXH32_state_s {
273        unsigned total_len_32;
274        unsigned large_len;
275        unsigned v1;
276        unsigned v2;
277        unsigned v3;
278        unsigned v4;
279        unsigned mem32[4];   /* buffer defined as U32 for alignment */
280        unsigned memsize;
281        unsigned reserved;   /* never read nor write, will be removed in a future version */
282    };   /* typedef'd to XXH32_state_t */
283 
284    struct XXH64_state_s {
285        unsigned long long total_len;
286        unsigned long long v1;
287        unsigned long long v2;
288        unsigned long long v3;
289        unsigned long long v4;
290        unsigned long long mem64[4];   /* buffer defined as U64 for alignment */
291        unsigned memsize;
292        unsigned reserved[2];          /* never read nor write, will be removed in a future version */
293    };   /* typedef'd to XXH64_state_t */
294 
295 
296 #  ifdef XXH_PRIVATE_API
297 #    include "xxhash.c"   /* include xxhash functions as `static`, for inlining */
298 #  endif
299 
300 #endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */
301 
302 
303 #if defined (__cplusplus)
304 }
305 #endif
306