xref: /freebsd/sys/contrib/zstd/lib/common/xxhash.h (revision 98689d0ffb0a0e218d3b32201df59be9fa771830)
10c16b537SWarner Losh /*
237f1f268SConrad Meyer  * xxHash - Extremely Fast Hash algorithm
337f1f268SConrad Meyer  * Header File
437f1f268SConrad Meyer  * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc.
537f1f268SConrad Meyer  *
637f1f268SConrad Meyer  * You can contact the author at :
737f1f268SConrad Meyer  * - xxHash source repository : https://github.com/Cyan4973/xxHash
837f1f268SConrad Meyer  *
937f1f268SConrad Meyer  * This source code is licensed under both the BSD-style license (found in the
1037f1f268SConrad Meyer  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
1137f1f268SConrad Meyer  * in the COPYING file in the root directory of this source tree).
1237f1f268SConrad Meyer  * You may select, at your option, one of the above-listed licenses.
130c16b537SWarner Losh */
140c16b537SWarner Losh 
150c16b537SWarner Losh /* Notice extracted from xxHash homepage :
160c16b537SWarner Losh 
170c16b537SWarner Losh xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
180c16b537SWarner Losh It also successfully passes all tests from the SMHasher suite.
190c16b537SWarner Losh 
200c16b537SWarner Losh Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
210c16b537SWarner Losh 
220c16b537SWarner Losh Name            Speed       Q.Score   Author
230c16b537SWarner Losh xxHash          5.4 GB/s     10
240c16b537SWarner Losh CrapWow         3.2 GB/s      2       Andrew
250c16b537SWarner Losh MumurHash 3a    2.7 GB/s     10       Austin Appleby
260c16b537SWarner Losh SpookyHash      2.0 GB/s     10       Bob Jenkins
270c16b537SWarner Losh SBox            1.4 GB/s      9       Bret Mulvey
280c16b537SWarner Losh Lookup3         1.2 GB/s      9       Bob Jenkins
290c16b537SWarner Losh SuperFastHash   1.2 GB/s      1       Paul Hsieh
300c16b537SWarner Losh CityHash64      1.05 GB/s    10       Pike & Alakuijala
310c16b537SWarner Losh FNV             0.55 GB/s     5       Fowler, Noll, Vo
320c16b537SWarner Losh CRC32           0.43 GB/s     9
330c16b537SWarner Losh MD5-32          0.33 GB/s    10       Ronald L. Rivest
340c16b537SWarner Losh SHA1-32         0.28 GB/s    10
350c16b537SWarner Losh 
360c16b537SWarner Losh Q.Score is a measure of quality of the hash function.
370c16b537SWarner Losh It depends on successfully passing SMHasher test set.
380c16b537SWarner Losh 10 is a perfect score.
390c16b537SWarner Losh 
400c16b537SWarner Losh A 64-bits version, named XXH64, is available since r35.
410c16b537SWarner Losh It offers much better speed, but for 64-bits applications only.
420c16b537SWarner Losh Name     Speed on 64 bits    Speed on 32 bits
430c16b537SWarner Losh XXH64       13.8 GB/s            1.9 GB/s
440c16b537SWarner Losh XXH32        6.8 GB/s            6.0 GB/s
450c16b537SWarner Losh */
460c16b537SWarner Losh 
470c16b537SWarner Losh #if defined (__cplusplus)
480c16b537SWarner Losh extern "C" {
490c16b537SWarner Losh #endif
500c16b537SWarner Losh 
510c16b537SWarner Losh #ifndef XXHASH_H_5627135585666179
520c16b537SWarner Losh #define XXHASH_H_5627135585666179 1
530c16b537SWarner Losh 
540c16b537SWarner Losh 
550c16b537SWarner Losh /* ****************************
560c16b537SWarner Losh *  Definitions
570c16b537SWarner Losh ******************************/
58f7cd7fe5SConrad Meyer #include "zstd_deps.h"
590c16b537SWarner Losh typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
600c16b537SWarner Losh 
610c16b537SWarner Losh 
620c16b537SWarner Losh /* ****************************
630c16b537SWarner Losh *  API modifier
640c16b537SWarner Losh ******************************/
650c16b537SWarner Losh /** XXH_PRIVATE_API
660c16b537SWarner Losh *   This is useful if you want to include xxhash functions in `static` mode
670c16b537SWarner Losh *   in order to inline them, and remove their symbol from the public list.
680c16b537SWarner Losh *   Methodology :
690c16b537SWarner Losh *     #define XXH_PRIVATE_API
700c16b537SWarner Losh *     #include "xxhash.h"
710c16b537SWarner Losh *   `xxhash.c` is automatically included.
720c16b537SWarner Losh *   It's not useful to compile and link it as a separate module anymore.
730c16b537SWarner Losh */
740c16b537SWarner Losh #ifdef XXH_PRIVATE_API
750c16b537SWarner Losh #  ifndef XXH_STATIC_LINKING_ONLY
760c16b537SWarner Losh #    define XXH_STATIC_LINKING_ONLY
770c16b537SWarner Losh #  endif
780c16b537SWarner Losh #  if defined(__GNUC__)
790c16b537SWarner Losh #    define XXH_PUBLIC_API static __inline __attribute__((unused))
800c16b537SWarner Losh #  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
810c16b537SWarner Losh #    define XXH_PUBLIC_API static inline
820c16b537SWarner Losh #  elif defined(_MSC_VER)
830c16b537SWarner Losh #    define XXH_PUBLIC_API static __inline
840c16b537SWarner Losh #  else
850c16b537SWarner Losh #    define XXH_PUBLIC_API static   /* this version may generate warnings for unused static functions; disable the relevant warning */
860c16b537SWarner Losh #  endif
870c16b537SWarner Losh #else
880c16b537SWarner Losh #  define XXH_PUBLIC_API   /* do nothing */
890c16b537SWarner Losh #endif /* XXH_PRIVATE_API */
900c16b537SWarner Losh 
910c16b537SWarner Losh /*!XXH_NAMESPACE, aka Namespace Emulation :
920c16b537SWarner Losh 
930c16b537SWarner Losh If you want to include _and expose_ xxHash functions from within your own library,
940c16b537SWarner Losh but also want to avoid symbol collisions with another library which also includes xxHash,
950c16b537SWarner Losh 
960c16b537SWarner Losh you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
970c16b537SWarner Losh with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values).
980c16b537SWarner Losh 
990c16b537SWarner Losh Note that no change is required within the calling program as long as it includes `xxhash.h` :
1000c16b537SWarner Losh regular symbol name will be automatically translated by this header.
1010c16b537SWarner Losh */
1020c16b537SWarner Losh #ifdef XXH_NAMESPACE
1030c16b537SWarner Losh #  define XXH_CAT(A,B) A##B
1040c16b537SWarner Losh #  define XXH_NAME2(A,B) XXH_CAT(A,B)
1050c16b537SWarner Losh #  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
1060c16b537SWarner Losh #  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
1070c16b537SWarner Losh #  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
1080c16b537SWarner Losh #  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
1090c16b537SWarner Losh #  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
1100c16b537SWarner Losh #  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
1110c16b537SWarner Losh #  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
1120c16b537SWarner Losh #  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
1130c16b537SWarner Losh #  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
1140c16b537SWarner Losh #  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
1150c16b537SWarner Losh #  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
1160c16b537SWarner Losh #  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
1170c16b537SWarner Losh #  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
1180c16b537SWarner Losh #  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
1190c16b537SWarner Losh #  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
1200c16b537SWarner Losh #  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
1210c16b537SWarner Losh #  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
1220c16b537SWarner Losh #  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
1230c16b537SWarner Losh #  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
1240c16b537SWarner Losh #endif
1250c16b537SWarner Losh 
1260c16b537SWarner Losh 
1270c16b537SWarner Losh /* *************************************
1280c16b537SWarner Losh *  Version
1290c16b537SWarner Losh ***************************************/
1300c16b537SWarner Losh #define XXH_VERSION_MAJOR    0
1310c16b537SWarner Losh #define XXH_VERSION_MINOR    6
1320c16b537SWarner Losh #define XXH_VERSION_RELEASE  2
1330c16b537SWarner Losh #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
1340c16b537SWarner Losh XXH_PUBLIC_API unsigned XXH_versionNumber (void);
1350c16b537SWarner Losh 
1360c16b537SWarner Losh 
1370c16b537SWarner Losh /* ****************************
1380c16b537SWarner Losh *  Simple Hash Functions
1390c16b537SWarner Losh ******************************/
1400c16b537SWarner Losh typedef unsigned int       XXH32_hash_t;
1410c16b537SWarner Losh typedef unsigned long long XXH64_hash_t;
1420c16b537SWarner Losh 
1430c16b537SWarner Losh XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
144*98689d0fSConrad Meyer /* Begin FreeBSD - This symbol is needed by dll-linked CLI zstd(1). */
145*98689d0fSConrad Meyer __attribute__((visibility ("default")))
146*98689d0fSConrad Meyer /* End FreeBSD */
1470c16b537SWarner Losh XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
1480c16b537SWarner Losh 
1490c16b537SWarner Losh /*!
1500c16b537SWarner Losh XXH32() :
1510c16b537SWarner Losh     Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
1520c16b537SWarner Losh     The memory between input & input+length must be valid (allocated and read-accessible).
1530c16b537SWarner Losh     "seed" can be used to alter the result predictably.
1540c16b537SWarner Losh     Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
1550c16b537SWarner Losh XXH64() :
1560c16b537SWarner Losh     Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
1570c16b537SWarner Losh     "seed" can be used to alter the result predictably.
1580c16b537SWarner Losh     This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
1590c16b537SWarner Losh */
1600c16b537SWarner Losh 
1610c16b537SWarner Losh 
1620c16b537SWarner Losh /* ****************************
1630c16b537SWarner Losh *  Streaming Hash Functions
1640c16b537SWarner Losh ******************************/
1650c16b537SWarner Losh typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
1660c16b537SWarner Losh typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
1670c16b537SWarner Losh 
1680c16b537SWarner Losh /*! State allocation, compatible with dynamic libraries */
1690c16b537SWarner Losh 
1700c16b537SWarner Losh XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
1710c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
1720c16b537SWarner Losh 
1730c16b537SWarner Losh XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
1740c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
1750c16b537SWarner Losh 
1760c16b537SWarner Losh 
1770c16b537SWarner Losh /* hash streaming */
1780c16b537SWarner Losh 
1790c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
1800c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
1810c16b537SWarner Losh XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
1820c16b537SWarner Losh 
1830c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
1840c16b537SWarner Losh XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
1850c16b537SWarner Losh XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
1860c16b537SWarner Losh 
1870c16b537SWarner Losh /*
1880c16b537SWarner Losh These functions generate the xxHash of an input provided in multiple segments.
1890c16b537SWarner Losh Note that, for small input, they are slower than single-call functions, due to state management.
1900c16b537SWarner Losh For small input, prefer `XXH32()` and `XXH64()` .
1910c16b537SWarner Losh 
1920c16b537SWarner Losh XXH state must first be allocated, using XXH*_createState() .
1930c16b537SWarner Losh 
1940c16b537SWarner Losh Start a new hash by initializing state with a seed, using XXH*_reset().
1950c16b537SWarner Losh 
1960c16b537SWarner Losh Then, feed the hash state by calling XXH*_update() as many times as necessary.
1970c16b537SWarner Losh Obviously, input must be allocated and read accessible.
1980c16b537SWarner Losh The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
1990c16b537SWarner Losh 
2000c16b537SWarner Losh Finally, a hash value can be produced anytime, by using XXH*_digest().
2010c16b537SWarner Losh This function returns the nn-bits hash as an int or long long.
2020c16b537SWarner Losh 
2030c16b537SWarner Losh It's still possible to continue inserting input into the hash state after a digest,
2040c16b537SWarner Losh and generate some new hashes later on, by calling again XXH*_digest().
2050c16b537SWarner Losh 
2060c16b537SWarner Losh When done, free XXH state space if it was allocated dynamically.
2070c16b537SWarner Losh */
2080c16b537SWarner Losh 
2090c16b537SWarner Losh 
2100c16b537SWarner Losh /* **************************
2110c16b537SWarner Losh *  Utils
2120c16b537SWarner Losh ****************************/
2130c16b537SWarner Losh #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* ! C99 */
2140c16b537SWarner Losh #  define restrict   /* disable restrict */
2150c16b537SWarner Losh #endif
2160c16b537SWarner Losh 
2170c16b537SWarner Losh XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state);
2180c16b537SWarner Losh XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state);
2190c16b537SWarner Losh 
2200c16b537SWarner Losh 
2210c16b537SWarner Losh /* **************************
2220c16b537SWarner Losh *  Canonical representation
2230c16b537SWarner Losh ****************************/
2240c16b537SWarner Losh /* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
2250c16b537SWarner Losh *  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
2260c16b537SWarner Losh *  These functions allow transformation of hash result into and from its canonical format.
2270c16b537SWarner Losh *  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
2280c16b537SWarner Losh */
2290c16b537SWarner Losh typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
2300c16b537SWarner Losh typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
2310c16b537SWarner Losh 
2320c16b537SWarner Losh XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
2330c16b537SWarner Losh XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
2340c16b537SWarner Losh 
2350c16b537SWarner Losh XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
2360c16b537SWarner Losh XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
2370c16b537SWarner Losh 
2380c16b537SWarner Losh #endif /* XXHASH_H_5627135585666179 */
2390c16b537SWarner Losh 
2400c16b537SWarner Losh 
2410c16b537SWarner Losh 
2420c16b537SWarner Losh /* ================================================================================================
2430c16b537SWarner Losh    This section contains definitions which are not guaranteed to remain stable.
2440c16b537SWarner Losh    They may change in future versions, becoming incompatible with a different version of the library.
2450c16b537SWarner Losh    They shall only be used with static linking.
2460c16b537SWarner Losh    Never use these definitions in association with dynamic linking !
2470c16b537SWarner Losh =================================================================================================== */
2480c16b537SWarner Losh #if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345)
2490c16b537SWarner Losh #define XXH_STATIC_H_3543687687345
2500c16b537SWarner Losh 
2510c16b537SWarner Losh /* These definitions are only meant to allow allocation of XXH state
2520c16b537SWarner Losh    statically, on stack, or in a struct for example.
2530c16b537SWarner Losh    Do not use members directly. */
2540c16b537SWarner Losh 
2550c16b537SWarner Losh    struct XXH32_state_s {
2560c16b537SWarner Losh        unsigned total_len_32;
2570c16b537SWarner Losh        unsigned large_len;
2580c16b537SWarner Losh        unsigned v1;
2590c16b537SWarner Losh        unsigned v2;
2600c16b537SWarner Losh        unsigned v3;
2610c16b537SWarner Losh        unsigned v4;
2620c16b537SWarner Losh        unsigned mem32[4];   /* buffer defined as U32 for alignment */
2630c16b537SWarner Losh        unsigned memsize;
2640c16b537SWarner Losh        unsigned reserved;   /* never read nor write, will be removed in a future version */
2650c16b537SWarner Losh    };   /* typedef'd to XXH32_state_t */
2660c16b537SWarner Losh 
2670c16b537SWarner Losh    struct XXH64_state_s {
2680c16b537SWarner Losh        unsigned long long total_len;
2690c16b537SWarner Losh        unsigned long long v1;
2700c16b537SWarner Losh        unsigned long long v2;
2710c16b537SWarner Losh        unsigned long long v3;
2720c16b537SWarner Losh        unsigned long long v4;
2730c16b537SWarner Losh        unsigned long long mem64[4];   /* buffer defined as U64 for alignment */
2740c16b537SWarner Losh        unsigned memsize;
2750c16b537SWarner Losh        unsigned reserved[2];          /* never read nor write, will be removed in a future version */
2760c16b537SWarner Losh    };   /* typedef'd to XXH64_state_t */
2770c16b537SWarner Losh 
2780c16b537SWarner Losh 
2790c16b537SWarner Losh #  ifdef XXH_PRIVATE_API
2800c16b537SWarner Losh #    include "xxhash.c"   /* include xxhash functions as `static`, for inlining */
2810c16b537SWarner Losh #  endif
2820c16b537SWarner Losh 
2830c16b537SWarner Losh #endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */
2840c16b537SWarner Losh 
2850c16b537SWarner Losh 
2860c16b537SWarner Losh #if defined (__cplusplus)
2870c16b537SWarner Losh }
2880c16b537SWarner Losh #endif
289