1*0c16b537SWarner Losh /* 2*0c16b537SWarner Losh xxHash - Extremely Fast Hash algorithm 3*0c16b537SWarner Losh Header File 4*0c16b537SWarner Losh Copyright (C) 2012-2016, Yann Collet. 5*0c16b537SWarner Losh 6*0c16b537SWarner Losh BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 7*0c16b537SWarner Losh 8*0c16b537SWarner Losh Redistribution and use in source and binary forms, with or without 9*0c16b537SWarner Losh modification, are permitted provided that the following conditions are 10*0c16b537SWarner Losh met: 11*0c16b537SWarner Losh 12*0c16b537SWarner Losh * Redistributions of source code must retain the above copyright 13*0c16b537SWarner Losh notice, this list of conditions and the following disclaimer. 14*0c16b537SWarner Losh * Redistributions in binary form must reproduce the above 15*0c16b537SWarner Losh copyright notice, this list of conditions and the following disclaimer 16*0c16b537SWarner Losh in the documentation and/or other materials provided with the 17*0c16b537SWarner Losh distribution. 18*0c16b537SWarner Losh 19*0c16b537SWarner Losh THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20*0c16b537SWarner Losh "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21*0c16b537SWarner Losh LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22*0c16b537SWarner Losh A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23*0c16b537SWarner Losh OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24*0c16b537SWarner Losh SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25*0c16b537SWarner Losh LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26*0c16b537SWarner Losh DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27*0c16b537SWarner Losh THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28*0c16b537SWarner Losh (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29*0c16b537SWarner Losh OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30*0c16b537SWarner Losh 31*0c16b537SWarner Losh You can contact the author at : 32*0c16b537SWarner Losh - xxHash source repository : https://github.com/Cyan4973/xxHash 33*0c16b537SWarner Losh */ 34*0c16b537SWarner Losh 35*0c16b537SWarner Losh /* Notice extracted from xxHash homepage : 36*0c16b537SWarner Losh 37*0c16b537SWarner Losh xxHash is an extremely fast Hash algorithm, running at RAM speed limits. 38*0c16b537SWarner Losh It also successfully passes all tests from the SMHasher suite. 39*0c16b537SWarner Losh 40*0c16b537SWarner Losh Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) 41*0c16b537SWarner Losh 42*0c16b537SWarner Losh Name Speed Q.Score Author 43*0c16b537SWarner Losh xxHash 5.4 GB/s 10 44*0c16b537SWarner Losh CrapWow 3.2 GB/s 2 Andrew 45*0c16b537SWarner Losh MumurHash 3a 2.7 GB/s 10 Austin Appleby 46*0c16b537SWarner Losh SpookyHash 2.0 GB/s 10 Bob Jenkins 47*0c16b537SWarner Losh SBox 1.4 GB/s 9 Bret Mulvey 48*0c16b537SWarner Losh Lookup3 1.2 GB/s 9 Bob Jenkins 49*0c16b537SWarner Losh SuperFastHash 1.2 GB/s 1 Paul Hsieh 50*0c16b537SWarner Losh CityHash64 1.05 GB/s 10 Pike & Alakuijala 51*0c16b537SWarner Losh FNV 0.55 GB/s 5 Fowler, Noll, Vo 52*0c16b537SWarner Losh CRC32 0.43 GB/s 9 53*0c16b537SWarner Losh MD5-32 0.33 GB/s 10 Ronald L. Rivest 54*0c16b537SWarner Losh SHA1-32 0.28 GB/s 10 55*0c16b537SWarner Losh 56*0c16b537SWarner Losh Q.Score is a measure of quality of the hash function. 57*0c16b537SWarner Losh It depends on successfully passing SMHasher test set. 58*0c16b537SWarner Losh 10 is a perfect score. 59*0c16b537SWarner Losh 60*0c16b537SWarner Losh A 64-bits version, named XXH64, is available since r35. 61*0c16b537SWarner Losh It offers much better speed, but for 64-bits applications only. 62*0c16b537SWarner Losh Name Speed on 64 bits Speed on 32 bits 63*0c16b537SWarner Losh XXH64 13.8 GB/s 1.9 GB/s 64*0c16b537SWarner Losh XXH32 6.8 GB/s 6.0 GB/s 65*0c16b537SWarner Losh */ 66*0c16b537SWarner Losh 67*0c16b537SWarner Losh #if defined (__cplusplus) 68*0c16b537SWarner Losh extern "C" { 69*0c16b537SWarner Losh #endif 70*0c16b537SWarner Losh 71*0c16b537SWarner Losh #ifndef XXHASH_H_5627135585666179 72*0c16b537SWarner Losh #define XXHASH_H_5627135585666179 1 73*0c16b537SWarner Losh 74*0c16b537SWarner Losh 75*0c16b537SWarner Losh /* **************************** 76*0c16b537SWarner Losh * Definitions 77*0c16b537SWarner Losh ******************************/ 78*0c16b537SWarner Losh #include <stddef.h> /* size_t */ 79*0c16b537SWarner Losh typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; 80*0c16b537SWarner Losh 81*0c16b537SWarner Losh 82*0c16b537SWarner Losh /* **************************** 83*0c16b537SWarner Losh * API modifier 84*0c16b537SWarner Losh ******************************/ 85*0c16b537SWarner Losh /** XXH_PRIVATE_API 86*0c16b537SWarner Losh * This is useful if you want to include xxhash functions in `static` mode 87*0c16b537SWarner Losh * in order to inline them, and remove their symbol from the public list. 88*0c16b537SWarner Losh * Methodology : 89*0c16b537SWarner Losh * #define XXH_PRIVATE_API 90*0c16b537SWarner Losh * #include "xxhash.h" 91*0c16b537SWarner Losh * `xxhash.c` is automatically included. 92*0c16b537SWarner Losh * It's not useful to compile and link it as a separate module anymore. 93*0c16b537SWarner Losh */ 94*0c16b537SWarner Losh #ifdef XXH_PRIVATE_API 95*0c16b537SWarner Losh # ifndef XXH_STATIC_LINKING_ONLY 96*0c16b537SWarner Losh # define XXH_STATIC_LINKING_ONLY 97*0c16b537SWarner Losh # endif 98*0c16b537SWarner Losh # if defined(__GNUC__) 99*0c16b537SWarner Losh # define XXH_PUBLIC_API static __inline __attribute__((unused)) 100*0c16b537SWarner Losh # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 101*0c16b537SWarner Losh # define XXH_PUBLIC_API static inline 102*0c16b537SWarner Losh # elif defined(_MSC_VER) 103*0c16b537SWarner Losh # define XXH_PUBLIC_API static __inline 104*0c16b537SWarner Losh # else 105*0c16b537SWarner Losh # define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ 106*0c16b537SWarner Losh # endif 107*0c16b537SWarner Losh #else 108*0c16b537SWarner Losh # define XXH_PUBLIC_API /* do nothing */ 109*0c16b537SWarner Losh #endif /* XXH_PRIVATE_API */ 110*0c16b537SWarner Losh 111*0c16b537SWarner Losh /*!XXH_NAMESPACE, aka Namespace Emulation : 112*0c16b537SWarner Losh 113*0c16b537SWarner Losh If you want to include _and expose_ xxHash functions from within your own library, 114*0c16b537SWarner Losh but also want to avoid symbol collisions with another library which also includes xxHash, 115*0c16b537SWarner Losh 116*0c16b537SWarner Losh you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library 117*0c16b537SWarner Losh with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). 118*0c16b537SWarner Losh 119*0c16b537SWarner Losh Note that no change is required within the calling program as long as it includes `xxhash.h` : 120*0c16b537SWarner Losh regular symbol name will be automatically translated by this header. 121*0c16b537SWarner Losh */ 122*0c16b537SWarner Losh #ifdef XXH_NAMESPACE 123*0c16b537SWarner Losh # define XXH_CAT(A,B) A##B 124*0c16b537SWarner Losh # define XXH_NAME2(A,B) XXH_CAT(A,B) 125*0c16b537SWarner Losh # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) 126*0c16b537SWarner Losh # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) 127*0c16b537SWarner Losh # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) 128*0c16b537SWarner Losh # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) 129*0c16b537SWarner Losh # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) 130*0c16b537SWarner Losh # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) 131*0c16b537SWarner Losh # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) 132*0c16b537SWarner Losh # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) 133*0c16b537SWarner Losh # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) 134*0c16b537SWarner Losh # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) 135*0c16b537SWarner Losh # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) 136*0c16b537SWarner Losh # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) 137*0c16b537SWarner Losh # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) 138*0c16b537SWarner Losh # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) 139*0c16b537SWarner Losh # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) 140*0c16b537SWarner Losh # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) 141*0c16b537SWarner Losh # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) 142*0c16b537SWarner Losh # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) 143*0c16b537SWarner Losh # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) 144*0c16b537SWarner Losh #endif 145*0c16b537SWarner Losh 146*0c16b537SWarner Losh 147*0c16b537SWarner Losh /* ************************************* 148*0c16b537SWarner Losh * Version 149*0c16b537SWarner Losh ***************************************/ 150*0c16b537SWarner Losh #define XXH_VERSION_MAJOR 0 151*0c16b537SWarner Losh #define XXH_VERSION_MINOR 6 152*0c16b537SWarner Losh #define XXH_VERSION_RELEASE 2 153*0c16b537SWarner Losh #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) 154*0c16b537SWarner Losh XXH_PUBLIC_API unsigned XXH_versionNumber (void); 155*0c16b537SWarner Losh 156*0c16b537SWarner Losh 157*0c16b537SWarner Losh /* **************************** 158*0c16b537SWarner Losh * Simple Hash Functions 159*0c16b537SWarner Losh ******************************/ 160*0c16b537SWarner Losh typedef unsigned int XXH32_hash_t; 161*0c16b537SWarner Losh typedef unsigned long long XXH64_hash_t; 162*0c16b537SWarner Losh 163*0c16b537SWarner Losh XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); 164*0c16b537SWarner Losh XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); 165*0c16b537SWarner Losh 166*0c16b537SWarner Losh /*! 167*0c16b537SWarner Losh XXH32() : 168*0c16b537SWarner Losh Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". 169*0c16b537SWarner Losh The memory between input & input+length must be valid (allocated and read-accessible). 170*0c16b537SWarner Losh "seed" can be used to alter the result predictably. 171*0c16b537SWarner Losh Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s 172*0c16b537SWarner Losh XXH64() : 173*0c16b537SWarner Losh Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". 174*0c16b537SWarner Losh "seed" can be used to alter the result predictably. 175*0c16b537SWarner Losh This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). 176*0c16b537SWarner Losh */ 177*0c16b537SWarner Losh 178*0c16b537SWarner Losh 179*0c16b537SWarner Losh /* **************************** 180*0c16b537SWarner Losh * Streaming Hash Functions 181*0c16b537SWarner Losh ******************************/ 182*0c16b537SWarner Losh typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ 183*0c16b537SWarner Losh typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ 184*0c16b537SWarner Losh 185*0c16b537SWarner Losh /*! State allocation, compatible with dynamic libraries */ 186*0c16b537SWarner Losh 187*0c16b537SWarner Losh XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); 188*0c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); 189*0c16b537SWarner Losh 190*0c16b537SWarner Losh XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); 191*0c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); 192*0c16b537SWarner Losh 193*0c16b537SWarner Losh 194*0c16b537SWarner Losh /* hash streaming */ 195*0c16b537SWarner Losh 196*0c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); 197*0c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); 198*0c16b537SWarner Losh XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); 199*0c16b537SWarner Losh 200*0c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); 201*0c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); 202*0c16b537SWarner Losh XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); 203*0c16b537SWarner Losh 204*0c16b537SWarner Losh /* 205*0c16b537SWarner Losh These functions generate the xxHash of an input provided in multiple segments. 206*0c16b537SWarner Losh Note that, for small input, they are slower than single-call functions, due to state management. 207*0c16b537SWarner Losh For small input, prefer `XXH32()` and `XXH64()` . 208*0c16b537SWarner Losh 209*0c16b537SWarner Losh XXH state must first be allocated, using XXH*_createState() . 210*0c16b537SWarner Losh 211*0c16b537SWarner Losh Start a new hash by initializing state with a seed, using XXH*_reset(). 212*0c16b537SWarner Losh 213*0c16b537SWarner Losh Then, feed the hash state by calling XXH*_update() as many times as necessary. 214*0c16b537SWarner Losh Obviously, input must be allocated and read accessible. 215*0c16b537SWarner Losh The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. 216*0c16b537SWarner Losh 217*0c16b537SWarner Losh Finally, a hash value can be produced anytime, by using XXH*_digest(). 218*0c16b537SWarner Losh This function returns the nn-bits hash as an int or long long. 219*0c16b537SWarner Losh 220*0c16b537SWarner Losh It's still possible to continue inserting input into the hash state after a digest, 221*0c16b537SWarner Losh and generate some new hashes later on, by calling again XXH*_digest(). 222*0c16b537SWarner Losh 223*0c16b537SWarner Losh When done, free XXH state space if it was allocated dynamically. 224*0c16b537SWarner Losh */ 225*0c16b537SWarner Losh 226*0c16b537SWarner Losh 227*0c16b537SWarner Losh /* ************************** 228*0c16b537SWarner Losh * Utils 229*0c16b537SWarner Losh ****************************/ 230*0c16b537SWarner Losh #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ 231*0c16b537SWarner Losh # define restrict /* disable restrict */ 232*0c16b537SWarner Losh #endif 233*0c16b537SWarner Losh 234*0c16b537SWarner Losh XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state); 235*0c16b537SWarner Losh XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); 236*0c16b537SWarner Losh 237*0c16b537SWarner Losh 238*0c16b537SWarner Losh /* ************************** 239*0c16b537SWarner Losh * Canonical representation 240*0c16b537SWarner Losh ****************************/ 241*0c16b537SWarner Losh /* Default result type for XXH functions are primitive unsigned 32 and 64 bits. 242*0c16b537SWarner Losh * The canonical representation uses human-readable write convention, aka big-endian (large digits first). 243*0c16b537SWarner Losh * These functions allow transformation of hash result into and from its canonical format. 244*0c16b537SWarner Losh * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. 245*0c16b537SWarner Losh */ 246*0c16b537SWarner Losh typedef struct { unsigned char digest[4]; } XXH32_canonical_t; 247*0c16b537SWarner Losh typedef struct { unsigned char digest[8]; } XXH64_canonical_t; 248*0c16b537SWarner Losh 249*0c16b537SWarner Losh XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); 250*0c16b537SWarner Losh XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); 251*0c16b537SWarner Losh 252*0c16b537SWarner Losh XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); 253*0c16b537SWarner Losh XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); 254*0c16b537SWarner Losh 255*0c16b537SWarner Losh #endif /* XXHASH_H_5627135585666179 */ 256*0c16b537SWarner Losh 257*0c16b537SWarner Losh 258*0c16b537SWarner Losh 259*0c16b537SWarner Losh /* ================================================================================================ 260*0c16b537SWarner Losh This section contains definitions which are not guaranteed to remain stable. 261*0c16b537SWarner Losh They may change in future versions, becoming incompatible with a different version of the library. 262*0c16b537SWarner Losh They shall only be used with static linking. 263*0c16b537SWarner Losh Never use these definitions in association with dynamic linking ! 264*0c16b537SWarner Losh =================================================================================================== */ 265*0c16b537SWarner Losh #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345) 266*0c16b537SWarner Losh #define XXH_STATIC_H_3543687687345 267*0c16b537SWarner Losh 268*0c16b537SWarner Losh /* These definitions are only meant to allow allocation of XXH state 269*0c16b537SWarner Losh statically, on stack, or in a struct for example. 270*0c16b537SWarner Losh Do not use members directly. */ 271*0c16b537SWarner Losh 272*0c16b537SWarner Losh struct XXH32_state_s { 273*0c16b537SWarner Losh unsigned total_len_32; 274*0c16b537SWarner Losh unsigned large_len; 275*0c16b537SWarner Losh unsigned v1; 276*0c16b537SWarner Losh unsigned v2; 277*0c16b537SWarner Losh unsigned v3; 278*0c16b537SWarner Losh unsigned v4; 279*0c16b537SWarner Losh unsigned mem32[4]; /* buffer defined as U32 for alignment */ 280*0c16b537SWarner Losh unsigned memsize; 281*0c16b537SWarner Losh unsigned reserved; /* never read nor write, will be removed in a future version */ 282*0c16b537SWarner Losh }; /* typedef'd to XXH32_state_t */ 283*0c16b537SWarner Losh 284*0c16b537SWarner Losh struct XXH64_state_s { 285*0c16b537SWarner Losh unsigned long long total_len; 286*0c16b537SWarner Losh unsigned long long v1; 287*0c16b537SWarner Losh unsigned long long v2; 288*0c16b537SWarner Losh unsigned long long v3; 289*0c16b537SWarner Losh unsigned long long v4; 290*0c16b537SWarner Losh unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ 291*0c16b537SWarner Losh unsigned memsize; 292*0c16b537SWarner Losh unsigned reserved[2]; /* never read nor write, will be removed in a future version */ 293*0c16b537SWarner Losh }; /* typedef'd to XXH64_state_t */ 294*0c16b537SWarner Losh 295*0c16b537SWarner Losh 296*0c16b537SWarner Losh # ifdef XXH_PRIVATE_API 297*0c16b537SWarner Losh # include "xxhash.c" /* include xxhash functions as `static`, for inlining */ 298*0c16b537SWarner Losh # endif 299*0c16b537SWarner Losh 300*0c16b537SWarner Losh #endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ 301*0c16b537SWarner Losh 302*0c16b537SWarner Losh 303*0c16b537SWarner Losh #if defined (__cplusplus) 304*0c16b537SWarner Losh } 305*0c16b537SWarner Losh #endif 306