10c16b537SWarner Losh /* 237f1f268SConrad Meyer * xxHash - Extremely Fast Hash algorithm 337f1f268SConrad Meyer * Header File 437f1f268SConrad Meyer * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. 537f1f268SConrad Meyer * 637f1f268SConrad Meyer * You can contact the author at : 737f1f268SConrad Meyer * - xxHash source repository : https://github.com/Cyan4973/xxHash 837f1f268SConrad Meyer * 937f1f268SConrad Meyer * This source code is licensed under both the BSD-style license (found in the 1037f1f268SConrad Meyer * LICENSE file in the root directory of this source tree) and the GPLv2 (found 1137f1f268SConrad Meyer * in the COPYING file in the root directory of this source tree). 1237f1f268SConrad Meyer * You may select, at your option, one of the above-listed licenses. 130c16b537SWarner Losh */ 140c16b537SWarner Losh 150c16b537SWarner Losh /* Notice extracted from xxHash homepage : 160c16b537SWarner Losh 170c16b537SWarner Losh xxHash is an extremely fast Hash algorithm, running at RAM speed limits. 180c16b537SWarner Losh It also successfully passes all tests from the SMHasher suite. 190c16b537SWarner Losh 200c16b537SWarner Losh Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) 210c16b537SWarner Losh 220c16b537SWarner Losh Name Speed Q.Score Author 230c16b537SWarner Losh xxHash 5.4 GB/s 10 240c16b537SWarner Losh CrapWow 3.2 GB/s 2 Andrew 250c16b537SWarner Losh MumurHash 3a 2.7 GB/s 10 Austin Appleby 260c16b537SWarner Losh SpookyHash 2.0 GB/s 10 Bob Jenkins 270c16b537SWarner Losh SBox 1.4 GB/s 9 Bret Mulvey 280c16b537SWarner Losh Lookup3 1.2 GB/s 9 Bob Jenkins 290c16b537SWarner Losh SuperFastHash 1.2 GB/s 1 Paul Hsieh 300c16b537SWarner Losh CityHash64 1.05 GB/s 10 Pike & Alakuijala 310c16b537SWarner Losh FNV 0.55 GB/s 5 Fowler, Noll, Vo 320c16b537SWarner Losh CRC32 0.43 GB/s 9 330c16b537SWarner Losh MD5-32 0.33 GB/s 10 Ronald L. Rivest 340c16b537SWarner Losh SHA1-32 0.28 GB/s 10 350c16b537SWarner Losh 360c16b537SWarner Losh Q.Score is a measure of quality of the hash function. 370c16b537SWarner Losh It depends on successfully passing SMHasher test set. 380c16b537SWarner Losh 10 is a perfect score. 390c16b537SWarner Losh 400c16b537SWarner Losh A 64-bits version, named XXH64, is available since r35. 410c16b537SWarner Losh It offers much better speed, but for 64-bits applications only. 420c16b537SWarner Losh Name Speed on 64 bits Speed on 32 bits 430c16b537SWarner Losh XXH64 13.8 GB/s 1.9 GB/s 440c16b537SWarner Losh XXH32 6.8 GB/s 6.0 GB/s 450c16b537SWarner Losh */ 460c16b537SWarner Losh 470c16b537SWarner Losh #if defined (__cplusplus) 480c16b537SWarner Losh extern "C" { 490c16b537SWarner Losh #endif 500c16b537SWarner Losh 510c16b537SWarner Losh #ifndef XXHASH_H_5627135585666179 520c16b537SWarner Losh #define XXHASH_H_5627135585666179 1 530c16b537SWarner Losh 540c16b537SWarner Losh 550c16b537SWarner Losh /* **************************** 560c16b537SWarner Losh * Definitions 570c16b537SWarner Losh ******************************/ 58f7cd7fe5SConrad Meyer #include "zstd_deps.h" 590c16b537SWarner Losh typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; 600c16b537SWarner Losh 610c16b537SWarner Losh 620c16b537SWarner Losh /* **************************** 630c16b537SWarner Losh * API modifier 640c16b537SWarner Losh ******************************/ 650c16b537SWarner Losh /** XXH_PRIVATE_API 660c16b537SWarner Losh * This is useful if you want to include xxhash functions in `static` mode 670c16b537SWarner Losh * in order to inline them, and remove their symbol from the public list. 680c16b537SWarner Losh * Methodology : 690c16b537SWarner Losh * #define XXH_PRIVATE_API 700c16b537SWarner Losh * #include "xxhash.h" 710c16b537SWarner Losh * `xxhash.c` is automatically included. 720c16b537SWarner Losh * It's not useful to compile and link it as a separate module anymore. 730c16b537SWarner Losh */ 740c16b537SWarner Losh #ifdef XXH_PRIVATE_API 750c16b537SWarner Losh # ifndef XXH_STATIC_LINKING_ONLY 760c16b537SWarner Losh # define XXH_STATIC_LINKING_ONLY 770c16b537SWarner Losh # endif 780c16b537SWarner Losh # if defined(__GNUC__) 790c16b537SWarner Losh # define XXH_PUBLIC_API static __inline __attribute__((unused)) 800c16b537SWarner Losh # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) 810c16b537SWarner Losh # define XXH_PUBLIC_API static inline 820c16b537SWarner Losh # elif defined(_MSC_VER) 830c16b537SWarner Losh # define XXH_PUBLIC_API static __inline 840c16b537SWarner Losh # else 850c16b537SWarner Losh # define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ 860c16b537SWarner Losh # endif 870c16b537SWarner Losh #else 880c16b537SWarner Losh # define XXH_PUBLIC_API /* do nothing */ 890c16b537SWarner Losh #endif /* XXH_PRIVATE_API */ 900c16b537SWarner Losh 910c16b537SWarner Losh /*!XXH_NAMESPACE, aka Namespace Emulation : 920c16b537SWarner Losh 930c16b537SWarner Losh If you want to include _and expose_ xxHash functions from within your own library, 940c16b537SWarner Losh but also want to avoid symbol collisions with another library which also includes xxHash, 950c16b537SWarner Losh 960c16b537SWarner Losh you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library 970c16b537SWarner Losh with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). 980c16b537SWarner Losh 990c16b537SWarner Losh Note that no change is required within the calling program as long as it includes `xxhash.h` : 1000c16b537SWarner Losh regular symbol name will be automatically translated by this header. 1010c16b537SWarner Losh */ 1020c16b537SWarner Losh #ifdef XXH_NAMESPACE 1030c16b537SWarner Losh # define XXH_CAT(A,B) A##B 1040c16b537SWarner Losh # define XXH_NAME2(A,B) XXH_CAT(A,B) 1050c16b537SWarner Losh # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) 1060c16b537SWarner Losh # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) 1070c16b537SWarner Losh # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) 1080c16b537SWarner Losh # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) 1090c16b537SWarner Losh # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) 1100c16b537SWarner Losh # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) 1110c16b537SWarner Losh # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) 1120c16b537SWarner Losh # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) 1130c16b537SWarner Losh # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) 1140c16b537SWarner Losh # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) 1150c16b537SWarner Losh # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) 1160c16b537SWarner Losh # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) 1170c16b537SWarner Losh # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) 1180c16b537SWarner Losh # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) 1190c16b537SWarner Losh # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) 1200c16b537SWarner Losh # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) 1210c16b537SWarner Losh # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) 1220c16b537SWarner Losh # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) 1230c16b537SWarner Losh # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) 1240c16b537SWarner Losh #endif 1250c16b537SWarner Losh 1260c16b537SWarner Losh 1270c16b537SWarner Losh /* ************************************* 1280c16b537SWarner Losh * Version 1290c16b537SWarner Losh ***************************************/ 1300c16b537SWarner Losh #define XXH_VERSION_MAJOR 0 1310c16b537SWarner Losh #define XXH_VERSION_MINOR 6 1320c16b537SWarner Losh #define XXH_VERSION_RELEASE 2 1330c16b537SWarner Losh #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) 1340c16b537SWarner Losh XXH_PUBLIC_API unsigned XXH_versionNumber (void); 1350c16b537SWarner Losh 1360c16b537SWarner Losh 1370c16b537SWarner Losh /* **************************** 1380c16b537SWarner Losh * Simple Hash Functions 1390c16b537SWarner Losh ******************************/ 1400c16b537SWarner Losh typedef unsigned int XXH32_hash_t; 1410c16b537SWarner Losh typedef unsigned long long XXH64_hash_t; 1420c16b537SWarner Losh 1430c16b537SWarner Losh XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); 144*98689d0fSConrad Meyer /* Begin FreeBSD - This symbol is needed by dll-linked CLI zstd(1). */ 145*98689d0fSConrad Meyer __attribute__((visibility ("default"))) 146*98689d0fSConrad Meyer /* End FreeBSD */ 1470c16b537SWarner Losh XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); 1480c16b537SWarner Losh 1490c16b537SWarner Losh /*! 1500c16b537SWarner Losh XXH32() : 1510c16b537SWarner Losh Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". 1520c16b537SWarner Losh The memory between input & input+length must be valid (allocated and read-accessible). 1530c16b537SWarner Losh "seed" can be used to alter the result predictably. 1540c16b537SWarner Losh Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s 1550c16b537SWarner Losh XXH64() : 1560c16b537SWarner Losh Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". 1570c16b537SWarner Losh "seed" can be used to alter the result predictably. 1580c16b537SWarner Losh This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). 1590c16b537SWarner Losh */ 1600c16b537SWarner Losh 1610c16b537SWarner Losh 1620c16b537SWarner Losh /* **************************** 1630c16b537SWarner Losh * Streaming Hash Functions 1640c16b537SWarner Losh ******************************/ 1650c16b537SWarner Losh typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ 1660c16b537SWarner Losh typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ 1670c16b537SWarner Losh 1680c16b537SWarner Losh /*! State allocation, compatible with dynamic libraries */ 1690c16b537SWarner Losh 1700c16b537SWarner Losh XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); 1710c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); 1720c16b537SWarner Losh 1730c16b537SWarner Losh XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); 1740c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); 1750c16b537SWarner Losh 1760c16b537SWarner Losh 1770c16b537SWarner Losh /* hash streaming */ 1780c16b537SWarner Losh 1790c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); 1800c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); 1810c16b537SWarner Losh XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); 1820c16b537SWarner Losh 1830c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); 1840c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); 1850c16b537SWarner Losh XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); 1860c16b537SWarner Losh 1870c16b537SWarner Losh /* 1880c16b537SWarner Losh These functions generate the xxHash of an input provided in multiple segments. 1890c16b537SWarner Losh Note that, for small input, they are slower than single-call functions, due to state management. 1900c16b537SWarner Losh For small input, prefer `XXH32()` and `XXH64()` . 1910c16b537SWarner Losh 1920c16b537SWarner Losh XXH state must first be allocated, using XXH*_createState() . 1930c16b537SWarner Losh 1940c16b537SWarner Losh Start a new hash by initializing state with a seed, using XXH*_reset(). 1950c16b537SWarner Losh 1960c16b537SWarner Losh Then, feed the hash state by calling XXH*_update() as many times as necessary. 1970c16b537SWarner Losh Obviously, input must be allocated and read accessible. 1980c16b537SWarner Losh The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. 1990c16b537SWarner Losh 2000c16b537SWarner Losh Finally, a hash value can be produced anytime, by using XXH*_digest(). 2010c16b537SWarner Losh This function returns the nn-bits hash as an int or long long. 2020c16b537SWarner Losh 2030c16b537SWarner Losh It's still possible to continue inserting input into the hash state after a digest, 2040c16b537SWarner Losh and generate some new hashes later on, by calling again XXH*_digest(). 2050c16b537SWarner Losh 2060c16b537SWarner Losh When done, free XXH state space if it was allocated dynamically. 2070c16b537SWarner Losh */ 2080c16b537SWarner Losh 2090c16b537SWarner Losh 2100c16b537SWarner Losh /* ************************** 2110c16b537SWarner Losh * Utils 2120c16b537SWarner Losh ****************************/ 2130c16b537SWarner Losh #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ 2140c16b537SWarner Losh # define restrict /* disable restrict */ 2150c16b537SWarner Losh #endif 2160c16b537SWarner Losh 2170c16b537SWarner Losh XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state); 2180c16b537SWarner Losh XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); 2190c16b537SWarner Losh 2200c16b537SWarner Losh 2210c16b537SWarner Losh /* ************************** 2220c16b537SWarner Losh * Canonical representation 2230c16b537SWarner Losh ****************************/ 2240c16b537SWarner Losh /* Default result type for XXH functions are primitive unsigned 32 and 64 bits. 2250c16b537SWarner Losh * The canonical representation uses human-readable write convention, aka big-endian (large digits first). 2260c16b537SWarner Losh * These functions allow transformation of hash result into and from its canonical format. 2270c16b537SWarner Losh * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. 2280c16b537SWarner Losh */ 2290c16b537SWarner Losh typedef struct { unsigned char digest[4]; } XXH32_canonical_t; 2300c16b537SWarner Losh typedef struct { unsigned char digest[8]; } XXH64_canonical_t; 2310c16b537SWarner Losh 2320c16b537SWarner Losh XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); 2330c16b537SWarner Losh XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); 2340c16b537SWarner Losh 2350c16b537SWarner Losh XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); 2360c16b537SWarner Losh XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); 2370c16b537SWarner Losh 2380c16b537SWarner Losh #endif /* XXHASH_H_5627135585666179 */ 2390c16b537SWarner Losh 2400c16b537SWarner Losh 2410c16b537SWarner Losh 2420c16b537SWarner Losh /* ================================================================================================ 2430c16b537SWarner Losh This section contains definitions which are not guaranteed to remain stable. 2440c16b537SWarner Losh They may change in future versions, becoming incompatible with a different version of the library. 2450c16b537SWarner Losh They shall only be used with static linking. 2460c16b537SWarner Losh Never use these definitions in association with dynamic linking ! 2470c16b537SWarner Losh =================================================================================================== */ 2480c16b537SWarner Losh #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345) 2490c16b537SWarner Losh #define XXH_STATIC_H_3543687687345 2500c16b537SWarner Losh 2510c16b537SWarner Losh /* These definitions are only meant to allow allocation of XXH state 2520c16b537SWarner Losh statically, on stack, or in a struct for example. 2530c16b537SWarner Losh Do not use members directly. */ 2540c16b537SWarner Losh 2550c16b537SWarner Losh struct XXH32_state_s { 2560c16b537SWarner Losh unsigned total_len_32; 2570c16b537SWarner Losh unsigned large_len; 2580c16b537SWarner Losh unsigned v1; 2590c16b537SWarner Losh unsigned v2; 2600c16b537SWarner Losh unsigned v3; 2610c16b537SWarner Losh unsigned v4; 2620c16b537SWarner Losh unsigned mem32[4]; /* buffer defined as U32 for alignment */ 2630c16b537SWarner Losh unsigned memsize; 2640c16b537SWarner Losh unsigned reserved; /* never read nor write, will be removed in a future version */ 2650c16b537SWarner Losh }; /* typedef'd to XXH32_state_t */ 2660c16b537SWarner Losh 2670c16b537SWarner Losh struct XXH64_state_s { 2680c16b537SWarner Losh unsigned long long total_len; 2690c16b537SWarner Losh unsigned long long v1; 2700c16b537SWarner Losh unsigned long long v2; 2710c16b537SWarner Losh unsigned long long v3; 2720c16b537SWarner Losh unsigned long long v4; 2730c16b537SWarner Losh unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ 2740c16b537SWarner Losh unsigned memsize; 2750c16b537SWarner Losh unsigned reserved[2]; /* never read nor write, will be removed in a future version */ 2760c16b537SWarner Losh }; /* typedef'd to XXH64_state_t */ 2770c16b537SWarner Losh 2780c16b537SWarner Losh 2790c16b537SWarner Losh # ifdef XXH_PRIVATE_API 2800c16b537SWarner Losh # include "xxhash.c" /* include xxhash functions as `static`, for inlining */ 2810c16b537SWarner Losh # endif 2820c16b537SWarner Losh 2830c16b537SWarner Losh #endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ 2840c16b537SWarner Losh 2850c16b537SWarner Losh 2860c16b537SWarner Losh #if defined (__cplusplus) 2870c16b537SWarner Losh } 2880c16b537SWarner Losh #endif 289