xref: /freebsd/contrib/libucl/src/mum.h (revision a0409676120c1e558d0ade943019934e0f15118d)
16525738fSBaptiste Daroussin /* Copyright (c) 2016 Vladimir Makarov <vmakarov@gcc.gnu.org>
26525738fSBaptiste Daroussin 
36525738fSBaptiste Daroussin    Permission is hereby granted, free of charge, to any person
46525738fSBaptiste Daroussin    obtaining a copy of this software and associated documentation
56525738fSBaptiste Daroussin    files (the "Software"), to deal in the Software without
66525738fSBaptiste Daroussin    restriction, including without limitation the rights to use, copy,
76525738fSBaptiste Daroussin    modify, merge, publish, distribute, sublicense, and/or sell copies
86525738fSBaptiste Daroussin    of the Software, and to permit persons to whom the Software is
96525738fSBaptiste Daroussin    furnished to do so, subject to the following conditions:
106525738fSBaptiste Daroussin 
116525738fSBaptiste Daroussin    The above copyright notice and this permission notice shall be
126525738fSBaptiste Daroussin    included in all copies or substantial portions of the Software.
136525738fSBaptiste Daroussin 
146525738fSBaptiste Daroussin    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
156525738fSBaptiste Daroussin    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
166525738fSBaptiste Daroussin    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
176525738fSBaptiste Daroussin    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
186525738fSBaptiste Daroussin    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
196525738fSBaptiste Daroussin    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
206525738fSBaptiste Daroussin    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
216525738fSBaptiste Daroussin    SOFTWARE.
226525738fSBaptiste Daroussin */
236525738fSBaptiste Daroussin 
246525738fSBaptiste Daroussin /* This file implements MUM (MUltiply and Mix) hashing.  We randomize
256525738fSBaptiste Daroussin    input data by 64x64-bit multiplication and mixing hi- and low-parts
266525738fSBaptiste Daroussin    of the multiplication result by using an addition and then mix it
276525738fSBaptiste Daroussin    into the current state.  We use prime numbers randomly generated
286525738fSBaptiste Daroussin    with the equal probability of their bit values for the
296525738fSBaptiste Daroussin    multiplication.  When all primes are used once, the state is
306525738fSBaptiste Daroussin    randomized and the same prime numbers are used again for data
316525738fSBaptiste Daroussin    randomization.
326525738fSBaptiste Daroussin 
336525738fSBaptiste Daroussin    The MUM hashing passes all SMHasher tests.  Pseudo Random Number
346525738fSBaptiste Daroussin    Generator based on MUM also passes NIST Statistical Test Suite for
356525738fSBaptiste Daroussin    Random and Pseudorandom Number Generators for Cryptographic
366525738fSBaptiste Daroussin    Applications (version 2.2.1) with 1000 bitstreams each containing
376525738fSBaptiste Daroussin    1M bits.  MUM hashing is also faster Spooky64 and City64 on small
386525738fSBaptiste Daroussin    strings (at least up to 512-bit) on Haswell and Power7.  The MUM bulk
396525738fSBaptiste Daroussin    speed (speed on very long data) is bigger than Spooky and City on
406525738fSBaptiste Daroussin    Power7.  On Haswell the bulk speed is bigger than Spooky one and
416525738fSBaptiste Daroussin    close to City speed.  */
426525738fSBaptiste Daroussin 
436525738fSBaptiste Daroussin #ifndef __MUM_HASH__
446525738fSBaptiste Daroussin #define __MUM_HASH__
456525738fSBaptiste Daroussin 
466525738fSBaptiste Daroussin #include <stddef.h>
476525738fSBaptiste Daroussin #include <stdlib.h>
486525738fSBaptiste Daroussin #include <string.h>
496525738fSBaptiste Daroussin #include <limits.h>
506525738fSBaptiste Daroussin 
516525738fSBaptiste Daroussin #ifdef _MSC_VER
526525738fSBaptiste Daroussin typedef unsigned __int16 uint16_t;
536525738fSBaptiste Daroussin typedef unsigned __int32 uint32_t;
546525738fSBaptiste Daroussin typedef unsigned __int64 uint64_t;
556525738fSBaptiste Daroussin #else
566525738fSBaptiste Daroussin #include <stdint.h>
576525738fSBaptiste Daroussin #endif
586525738fSBaptiste Daroussin 
596525738fSBaptiste Daroussin /* Macro saying to use 128-bit integers implemented by GCC for some
606525738fSBaptiste Daroussin    targets.  */
616525738fSBaptiste Daroussin #ifndef _MUM_USE_INT128
626525738fSBaptiste Daroussin /* In GCC uint128_t is defined if HOST_BITS_PER_WIDE_INT >= 64.
636525738fSBaptiste Daroussin    HOST_WIDE_INT is long if HOST_BITS_PER_LONG > HOST_BITS_PER_INT,
646525738fSBaptiste Daroussin    otherwise int. */
656525738fSBaptiste Daroussin #if defined(__GNUC__) && UINT_MAX != ULONG_MAX
666525738fSBaptiste Daroussin #define _MUM_USE_INT128 1
676525738fSBaptiste Daroussin #else
686525738fSBaptiste Daroussin #define _MUM_USE_INT128 0
696525738fSBaptiste Daroussin #endif
706525738fSBaptiste Daroussin #endif
716525738fSBaptiste Daroussin 
723933ba6bSBaptiste Daroussin #if 0
736525738fSBaptiste Daroussin #if defined(__GNUC__) && ((__GNUC__ == 4) &&  (__GNUC_MINOR__ >= 9) || (__GNUC__ > 4))
746525738fSBaptiste Daroussin #define _MUM_FRESH_GCC
756525738fSBaptiste Daroussin #endif
763933ba6bSBaptiste Daroussin #endif
776525738fSBaptiste Daroussin 
78912517a7SBaptiste Daroussin #if defined(__GNUC__) && !defined(__llvm__) && defined(_MUM_FRESH_GCC)
796525738fSBaptiste Daroussin #define _MUM_ATTRIBUTE_UNUSED  __attribute__((unused))
806525738fSBaptiste Daroussin #define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
816525738fSBaptiste Daroussin #define _MUM_TARGET(opts) __attribute__((__target__ (opts)))
826525738fSBaptiste Daroussin #else
836525738fSBaptiste Daroussin #define _MUM_ATTRIBUTE_UNUSED
846525738fSBaptiste Daroussin #define _MUM_OPTIMIZE(opts)
856525738fSBaptiste Daroussin #define _MUM_TARGET(opts)
866525738fSBaptiste Daroussin #endif
876525738fSBaptiste Daroussin 
886525738fSBaptiste Daroussin 
896525738fSBaptiste Daroussin /* Here are different primes randomly generated with the equal
906525738fSBaptiste Daroussin    probability of their bit values.  They are used to randomize input
916525738fSBaptiste Daroussin    values.  */
926525738fSBaptiste Daroussin static uint64_t _mum_hash_step_prime = 0x2e0bb864e9ea7df5ULL;
936525738fSBaptiste Daroussin static uint64_t _mum_key_step_prime = 0xcdb32970830fcaa1ULL;
946525738fSBaptiste Daroussin static uint64_t _mum_block_start_prime = 0xc42b5e2e6480b23bULL;
956525738fSBaptiste Daroussin static uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL;
966525738fSBaptiste Daroussin static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL;
976525738fSBaptiste Daroussin static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL;
986525738fSBaptiste Daroussin static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL;
996525738fSBaptiste Daroussin 
1006525738fSBaptiste Daroussin static uint64_t _mum_primes [] = {
1016525738fSBaptiste Daroussin   0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f,
1026525738fSBaptiste Daroussin   0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81,
1036525738fSBaptiste Daroussin   0X3bc721b2aad05197, 0X71b1a19b907d6e33, 0X525e6c1084a8534b, 0X9e4c2cd340c1299f,
1046525738fSBaptiste Daroussin   0Xde3add92e94caa37, 0X7e14eadb1f65311d, 0X3f5aa40f89812853, 0X33b15a3b587d15c9,
1056525738fSBaptiste Daroussin };
1066525738fSBaptiste Daroussin 
1076525738fSBaptiste Daroussin /* Multiply 64-bit V and P and return sum of high and low parts of the
1086525738fSBaptiste Daroussin    result.  */
1096525738fSBaptiste Daroussin static inline uint64_t
_mum(uint64_t v,uint64_t p)1106525738fSBaptiste Daroussin _mum (uint64_t v, uint64_t p) {
1116525738fSBaptiste Daroussin   uint64_t hi, lo;
1126525738fSBaptiste Daroussin #if _MUM_USE_INT128
1136525738fSBaptiste Daroussin #if defined(__aarch64__)
1146525738fSBaptiste Daroussin   /* AARCH64 needs 2 insns to calculate 128-bit result of the
1156525738fSBaptiste Daroussin      multiplication.  If we use a generic code we actually call a
1166525738fSBaptiste Daroussin      function doing 128x128->128 bit multiplication.  The function is
1176525738fSBaptiste Daroussin      very slow.  */
1186525738fSBaptiste Daroussin   lo = v * p, hi;
1196525738fSBaptiste Daroussin   asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p));
1206525738fSBaptiste Daroussin #else
1216525738fSBaptiste Daroussin   __uint128_t r = (__uint128_t) v * (__uint128_t) p;
1226525738fSBaptiste Daroussin   hi = (uint64_t) (r >> 64);
1236525738fSBaptiste Daroussin   lo = (uint64_t) r;
1246525738fSBaptiste Daroussin #endif
1256525738fSBaptiste Daroussin #else
1266525738fSBaptiste Daroussin   /* Implementation of 64x64->128-bit multiplication by four 32x32->64
1276525738fSBaptiste Daroussin      bit multiplication.  */
1286525738fSBaptiste Daroussin   uint64_t hv = v >> 32, hp = p >> 32;
1296525738fSBaptiste Daroussin   uint64_t lv = (uint32_t) v, lp = (uint32_t) p;
1306525738fSBaptiste Daroussin   uint64_t rh =  hv * hp;
1316525738fSBaptiste Daroussin   uint64_t rm_0 = hv * lp;
1326525738fSBaptiste Daroussin   uint64_t rm_1 = hp * lv;
1336525738fSBaptiste Daroussin   uint64_t rl =  lv * lp;
1346525738fSBaptiste Daroussin   uint64_t t, carry = 0;
1356525738fSBaptiste Daroussin 
1366525738fSBaptiste Daroussin   /* We could ignore a carry bit here if we did not care about the
1376525738fSBaptiste Daroussin      same hash for 32-bit and 64-bit targets.  */
1386525738fSBaptiste Daroussin   t = rl + (rm_0 << 32);
1396525738fSBaptiste Daroussin #ifdef MUM_TARGET_INDEPENDENT_HASH
1406525738fSBaptiste Daroussin   carry = t < rl;
1416525738fSBaptiste Daroussin #endif
1426525738fSBaptiste Daroussin   lo = t + (rm_1 << 32);
1436525738fSBaptiste Daroussin #ifdef MUM_TARGET_INDEPENDENT_HASH
1446525738fSBaptiste Daroussin   carry += lo < t;
1456525738fSBaptiste Daroussin #endif
1466525738fSBaptiste Daroussin   hi = rh + (rm_0 >> 32) + (rm_1 >> 32) + carry;
1476525738fSBaptiste Daroussin #endif
1486525738fSBaptiste Daroussin   /* We could use XOR here too but, for some reasons, on Haswell and
1496525738fSBaptiste Daroussin      Power7 using an addition improves hashing performance by 10% for
1506525738fSBaptiste Daroussin      small strings.  */
1516525738fSBaptiste Daroussin   return hi + lo;
1526525738fSBaptiste Daroussin }
1536525738fSBaptiste Daroussin 
1546525738fSBaptiste Daroussin #if defined(_MSC_VER)
1556525738fSBaptiste Daroussin #define _mum_bswap_32(x) _byteswap_uint32_t (x)
1566525738fSBaptiste Daroussin #define _mum_bswap_64(x) _byteswap_uint64_t (x)
1576525738fSBaptiste Daroussin #elif defined(__APPLE__)
1586525738fSBaptiste Daroussin #include <libkern/OSByteOrder.h>
1596525738fSBaptiste Daroussin #define _mum_bswap_32(x) OSSwapInt32 (x)
1606525738fSBaptiste Daroussin #define _mum_bswap_64(x) OSSwapInt64 (x)
1616525738fSBaptiste Daroussin #elif defined(__GNUC__)
1626525738fSBaptiste Daroussin #define _mum_bswap32(x) __builtin_bswap32 (x)
1636525738fSBaptiste Daroussin #define _mum_bswap64(x) __builtin_bswap64 (x)
1646525738fSBaptiste Daroussin #else
1656525738fSBaptiste Daroussin #include <byteswap.h>
1666525738fSBaptiste Daroussin #define _mum_bswap32(x) bswap32 (x)
1676525738fSBaptiste Daroussin #define _mum_bswap64(x) bswap64 (x)
1686525738fSBaptiste Daroussin #endif
1696525738fSBaptiste Daroussin 
1706525738fSBaptiste Daroussin static inline uint64_t
_mum_le(uint64_t v)1716525738fSBaptiste Daroussin _mum_le (uint64_t v) {
1726525738fSBaptiste Daroussin #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
1736525738fSBaptiste Daroussin   return v;
1746525738fSBaptiste Daroussin #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1756525738fSBaptiste Daroussin   return _mum_bswap64 (v);
1766525738fSBaptiste Daroussin #else
177*a0409676SBaptiste Daroussin #error "Unknown endianness"
1786525738fSBaptiste Daroussin #endif
1796525738fSBaptiste Daroussin }
1806525738fSBaptiste Daroussin 
1816525738fSBaptiste Daroussin static inline uint32_t
_mum_le32(uint32_t v)1826525738fSBaptiste Daroussin _mum_le32 (uint32_t v) {
1836525738fSBaptiste Daroussin #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
1846525738fSBaptiste Daroussin   return v;
1856525738fSBaptiste Daroussin #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1866525738fSBaptiste Daroussin   return _mum_bswap32 (v);
1876525738fSBaptiste Daroussin #else
188*a0409676SBaptiste Daroussin #error "Unknown endianness"
1896525738fSBaptiste Daroussin #endif
1906525738fSBaptiste Daroussin }
1916525738fSBaptiste Daroussin 
1926525738fSBaptiste Daroussin /* Macro defining how many times the most nested loop in
1936525738fSBaptiste Daroussin    _mum_hash_aligned will be unrolled by the compiler (although it can
1946525738fSBaptiste Daroussin    make an own decision:).  Use only a constant here to help a
1956525738fSBaptiste Daroussin    compiler to unroll a major loop.
1966525738fSBaptiste Daroussin 
1976525738fSBaptiste Daroussin    The macro value affects the result hash for strings > 128 bit.  The
1986525738fSBaptiste Daroussin    unroll factor greatly affects the hashing speed.  We prefer the
1996525738fSBaptiste Daroussin    speed.  */
2006525738fSBaptiste Daroussin #ifndef _MUM_UNROLL_FACTOR_POWER
2016525738fSBaptiste Daroussin #if defined(__PPC64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
2026525738fSBaptiste Daroussin #define _MUM_UNROLL_FACTOR_POWER 3
2036525738fSBaptiste Daroussin #elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
2046525738fSBaptiste Daroussin #define _MUM_UNROLL_FACTOR_POWER 4
2056525738fSBaptiste Daroussin #else
2066525738fSBaptiste Daroussin #define _MUM_UNROLL_FACTOR_POWER 2
2076525738fSBaptiste Daroussin #endif
2086525738fSBaptiste Daroussin #endif
2096525738fSBaptiste Daroussin 
2106525738fSBaptiste Daroussin #if _MUM_UNROLL_FACTOR_POWER < 1
2116525738fSBaptiste Daroussin #error "too small unroll factor"
2126525738fSBaptiste Daroussin #elif _MUM_UNROLL_FACTOR_POWER > 4
2136525738fSBaptiste Daroussin #error "We have not enough primes for such unroll factor"
2146525738fSBaptiste Daroussin #endif
2156525738fSBaptiste Daroussin 
2166525738fSBaptiste Daroussin #define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER)
2176525738fSBaptiste Daroussin 
2186525738fSBaptiste Daroussin static inline uint64_t _MUM_OPTIMIZE("unroll-loops")
_mum_hash_aligned(uint64_t start,const void * key,size_t len)2196525738fSBaptiste Daroussin _mum_hash_aligned (uint64_t start, const void *key, size_t len) {
2206525738fSBaptiste Daroussin   uint64_t result = start;
2216525738fSBaptiste Daroussin   const unsigned char *str = (const unsigned char *) key;
2226525738fSBaptiste Daroussin   uint64_t u64;
2236525738fSBaptiste Daroussin   int i;
2246525738fSBaptiste Daroussin   size_t n;
2256525738fSBaptiste Daroussin 
2266525738fSBaptiste Daroussin   result = _mum (result, _mum_block_start_prime);
2276525738fSBaptiste Daroussin   while  (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
2286525738fSBaptiste Daroussin     /* This loop could be vectorized when we have vector insns for
2296525738fSBaptiste Daroussin        64x64->128-bit multiplication.  AVX2 currently only have a
2306525738fSBaptiste Daroussin        vector insn for 4 32x32->64-bit multiplication.  */
2316525738fSBaptiste Daroussin     for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
2326525738fSBaptiste Daroussin       result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
2336525738fSBaptiste Daroussin     len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
2346525738fSBaptiste Daroussin     str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
2356525738fSBaptiste Daroussin     /* We will use the same prime numbers on the next iterations --
2366525738fSBaptiste Daroussin        randomize the state.  */
2376525738fSBaptiste Daroussin     result = _mum (result, _mum_unroll_prime);
2386525738fSBaptiste Daroussin   }
2396525738fSBaptiste Daroussin   n = len / sizeof (uint64_t);
2406525738fSBaptiste Daroussin   for (i = 0; i < (int)n; i++)
2416525738fSBaptiste Daroussin     result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
2426525738fSBaptiste Daroussin   len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t);
2436525738fSBaptiste Daroussin   switch (len) {
2446525738fSBaptiste Daroussin   case 7:
2456525738fSBaptiste Daroussin     u64 = _mum_le32 (*(uint32_t *) str);
2466525738fSBaptiste Daroussin     u64 |= (uint64_t) str[4] << 32;
2476525738fSBaptiste Daroussin     u64 |= (uint64_t) str[5] << 40;
2486525738fSBaptiste Daroussin     u64 |= (uint64_t) str[6] << 48;
2496525738fSBaptiste Daroussin     return result ^ _mum (u64, _mum_tail_prime);
2506525738fSBaptiste Daroussin   case 6:
2516525738fSBaptiste Daroussin     u64 = _mum_le32 (*(uint32_t *) str);
2526525738fSBaptiste Daroussin     u64 |= (uint64_t) str[4] << 32;
2536525738fSBaptiste Daroussin     u64 |= (uint64_t) str[5] << 40;
2546525738fSBaptiste Daroussin     return result ^ _mum (u64, _mum_tail_prime);
2556525738fSBaptiste Daroussin   case 5:
2566525738fSBaptiste Daroussin     u64 = _mum_le32 (*(uint32_t *) str);
2576525738fSBaptiste Daroussin     u64 |= (uint64_t) str[4] << 32;
2586525738fSBaptiste Daroussin     return result ^ _mum (u64, _mum_tail_prime);
2596525738fSBaptiste Daroussin   case 4:
2606525738fSBaptiste Daroussin     u64 = _mum_le32 (*(uint32_t *) str);
2616525738fSBaptiste Daroussin     return result ^ _mum (u64, _mum_tail_prime);
2626525738fSBaptiste Daroussin   case 3:
2636525738fSBaptiste Daroussin     u64 = str[0];
2646525738fSBaptiste Daroussin     u64 |= (uint64_t) str[1] << 8;
2656525738fSBaptiste Daroussin     u64 |= (uint64_t) str[2] << 16;
2666525738fSBaptiste Daroussin     return result ^ _mum (u64, _mum_tail_prime);
2676525738fSBaptiste Daroussin   case 2:
2686525738fSBaptiste Daroussin     u64 = str[0];
2696525738fSBaptiste Daroussin     u64 |= (uint64_t) str[1] << 8;
2706525738fSBaptiste Daroussin     return result ^ _mum (u64, _mum_tail_prime);
2716525738fSBaptiste Daroussin   case 1:
2726525738fSBaptiste Daroussin     u64 = str[0];
2736525738fSBaptiste Daroussin     return result ^ _mum (u64, _mum_tail_prime);
2746525738fSBaptiste Daroussin   }
2756525738fSBaptiste Daroussin   return result;
2766525738fSBaptiste Daroussin }
2776525738fSBaptiste Daroussin 
2786525738fSBaptiste Daroussin /* Final randomization of H.  */
2796525738fSBaptiste Daroussin static inline uint64_t
_mum_final(uint64_t h)2806525738fSBaptiste Daroussin _mum_final (uint64_t h) {
2816525738fSBaptiste Daroussin   h ^= _mum (h, _mum_finish_prime1);
2826525738fSBaptiste Daroussin   h ^= _mum (h, _mum_finish_prime2);
2836525738fSBaptiste Daroussin   return h;
2846525738fSBaptiste Daroussin }
2856525738fSBaptiste Daroussin 
2866525738fSBaptiste Daroussin #if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
2876525738fSBaptiste Daroussin 
2886525738fSBaptiste Daroussin /* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where
2896525738fSBaptiste Daroussin    it is possible.  Although on modern Intel processors MULQ takes
2906525738fSBaptiste Daroussin    3-cycles vs. 4 for MULX, MULX permits more freedom in insn
2916525738fSBaptiste Daroussin    scheduling as it uses less fixed registers.  */
2926525738fSBaptiste Daroussin static inline uint64_t _MUM_TARGET("arch=haswell")
_mum_hash_avx2(const void * key,size_t len,uint64_t seed)2936525738fSBaptiste Daroussin _mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {
2946525738fSBaptiste Daroussin   return _mum_final (_mum_hash_aligned (seed + len, key, len));
2956525738fSBaptiste Daroussin }
2966525738fSBaptiste Daroussin #endif
2976525738fSBaptiste Daroussin 
2986525738fSBaptiste Daroussin #ifndef _MUM_UNALIGNED_ACCESS
2996525738fSBaptiste Daroussin #if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \
3006525738fSBaptiste Daroussin     || defined(__s390__) || defined(__m32c__) || defined(cris)     \
3016525738fSBaptiste Daroussin     || defined(__CR16__) || defined(__vax__) || defined(__m68k__) \
3026525738fSBaptiste Daroussin     || defined(__aarch64__)
3036525738fSBaptiste Daroussin #define _MUM_UNALIGNED_ACCESS 1
3046525738fSBaptiste Daroussin #else
3056525738fSBaptiste Daroussin #define _MUM_UNALIGNED_ACCESS 0
3066525738fSBaptiste Daroussin #endif
3076525738fSBaptiste Daroussin #endif
3086525738fSBaptiste Daroussin 
3096525738fSBaptiste Daroussin /* When we need an aligned access to data being hashed we move part of
3106525738fSBaptiste Daroussin    the unaligned data to an aligned block of given size and then
3116525738fSBaptiste Daroussin    process it, repeating processing the data by the block.  */
3126525738fSBaptiste Daroussin #ifndef _MUM_BLOCK_LEN
3136525738fSBaptiste Daroussin #define _MUM_BLOCK_LEN 1024
3146525738fSBaptiste Daroussin #endif
3156525738fSBaptiste Daroussin 
3166525738fSBaptiste Daroussin #if _MUM_BLOCK_LEN < 8
3176525738fSBaptiste Daroussin #error "too small block length"
3186525738fSBaptiste Daroussin #endif
3196525738fSBaptiste Daroussin 
3206525738fSBaptiste Daroussin static inline uint64_t
3216525738fSBaptiste Daroussin #if defined(__x86_64__)
3226525738fSBaptiste Daroussin _MUM_TARGET("inline-all-stringops")
3236525738fSBaptiste Daroussin #endif
_mum_hash_default(const void * key,size_t len,uint64_t seed)3246525738fSBaptiste Daroussin _mum_hash_default (const void *key, size_t len, uint64_t seed) {
3256525738fSBaptiste Daroussin   uint64_t result;
3266525738fSBaptiste Daroussin   const unsigned char *str = (const unsigned char *) key;
3276525738fSBaptiste Daroussin   size_t block_len;
3286525738fSBaptiste Daroussin   uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)];
3296525738fSBaptiste Daroussin 
3306525738fSBaptiste Daroussin   result = seed + len;
3316525738fSBaptiste Daroussin   if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0)
3326525738fSBaptiste Daroussin     result = _mum_hash_aligned (result, key, len);
3336525738fSBaptiste Daroussin   else {
3346525738fSBaptiste Daroussin     while (len != 0) {
3356525738fSBaptiste Daroussin       block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN;
3366525738fSBaptiste Daroussin       memmove (buf, str, block_len);
3376525738fSBaptiste Daroussin       result = _mum_hash_aligned (result, buf, block_len);
3386525738fSBaptiste Daroussin       len -= block_len;
3396525738fSBaptiste Daroussin       str += block_len;
3406525738fSBaptiste Daroussin     }
3416525738fSBaptiste Daroussin   }
3426525738fSBaptiste Daroussin   return _mum_final (result);
3436525738fSBaptiste Daroussin }
3446525738fSBaptiste Daroussin 
3456525738fSBaptiste Daroussin static inline uint64_t
_mum_next_factor(void)3466525738fSBaptiste Daroussin _mum_next_factor (void) {
3476525738fSBaptiste Daroussin   uint64_t start = 0;
3486525738fSBaptiste Daroussin   int i;
3496525738fSBaptiste Daroussin 
3506525738fSBaptiste Daroussin   for (i = 0; i < 8; i++)
3516525738fSBaptiste Daroussin     start = (start << 8) | rand() % 256;
3526525738fSBaptiste Daroussin   return start;
3536525738fSBaptiste Daroussin }
3546525738fSBaptiste Daroussin 
3556525738fSBaptiste Daroussin /* ++++++++++++++++++++++++++ Interface functions: +++++++++++++++++++  */
3566525738fSBaptiste Daroussin 
3576525738fSBaptiste Daroussin /* Set random multiplicators depending on SEED.  */
3586525738fSBaptiste Daroussin static inline void
mum_hash_randomize(uint64_t seed)3596525738fSBaptiste Daroussin mum_hash_randomize (uint64_t seed) {
3606525738fSBaptiste Daroussin   int i;
3616525738fSBaptiste Daroussin 
3626525738fSBaptiste Daroussin   srand (seed);
3636525738fSBaptiste Daroussin   _mum_hash_step_prime = _mum_next_factor ();
3646525738fSBaptiste Daroussin   _mum_key_step_prime = _mum_next_factor ();
3656525738fSBaptiste Daroussin   _mum_finish_prime1 = _mum_next_factor ();
3666525738fSBaptiste Daroussin   _mum_finish_prime2 = _mum_next_factor ();
3676525738fSBaptiste Daroussin   _mum_block_start_prime = _mum_next_factor ();
3686525738fSBaptiste Daroussin   _mum_unroll_prime = _mum_next_factor ();
3696525738fSBaptiste Daroussin   _mum_tail_prime = _mum_next_factor ();
3706525738fSBaptiste Daroussin   for (i = 0; i < (int)(sizeof (_mum_primes) / sizeof (uint64_t)); i++)
3716525738fSBaptiste Daroussin     _mum_primes[i] = _mum_next_factor ();
3726525738fSBaptiste Daroussin }
3736525738fSBaptiste Daroussin 
3746525738fSBaptiste Daroussin /* Start hashing data with SEED.  Return the state.  */
3756525738fSBaptiste Daroussin static inline uint64_t
mum_hash_init(uint64_t seed)3766525738fSBaptiste Daroussin mum_hash_init (uint64_t seed) {
3776525738fSBaptiste Daroussin   return seed;
3786525738fSBaptiste Daroussin }
3796525738fSBaptiste Daroussin 
3806525738fSBaptiste Daroussin /* Process data KEY with the state H and return the updated state.  */
3816525738fSBaptiste Daroussin static inline uint64_t
mum_hash_step(uint64_t h,uint64_t key)3826525738fSBaptiste Daroussin mum_hash_step (uint64_t h, uint64_t key)
3836525738fSBaptiste Daroussin {
3846525738fSBaptiste Daroussin   return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime);
3856525738fSBaptiste Daroussin }
3866525738fSBaptiste Daroussin 
3876525738fSBaptiste Daroussin /* Return the result of hashing using the current state H.  */
3886525738fSBaptiste Daroussin static inline uint64_t
mum_hash_finish(uint64_t h)3896525738fSBaptiste Daroussin mum_hash_finish (uint64_t h) {
3906525738fSBaptiste Daroussin   return _mum_final (h);
3916525738fSBaptiste Daroussin }
3926525738fSBaptiste Daroussin 
3936525738fSBaptiste Daroussin /* Fast hashing of KEY with SEED.  The hash is always the same for the
3946525738fSBaptiste Daroussin    same key on any target. */
3956525738fSBaptiste Daroussin static inline size_t
mum_hash64(uint64_t key,uint64_t seed)3966525738fSBaptiste Daroussin mum_hash64 (uint64_t key, uint64_t seed) {
3976525738fSBaptiste Daroussin   return mum_hash_finish (mum_hash_step (mum_hash_init (seed), key));
3986525738fSBaptiste Daroussin }
3996525738fSBaptiste Daroussin 
4006525738fSBaptiste Daroussin /* Hash data KEY of length LEN and SEED.  The hash depends on the
401*a0409676SBaptiste Daroussin    target endianness and the unroll factor.  */
4026525738fSBaptiste Daroussin static inline uint64_t
mum_hash(const void * key,size_t len,uint64_t seed)4036525738fSBaptiste Daroussin mum_hash (const void *key, size_t len, uint64_t seed) {
4046525738fSBaptiste Daroussin #if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
4056525738fSBaptiste Daroussin   static int avx2_support = 0;
4066525738fSBaptiste Daroussin 
4076525738fSBaptiste Daroussin   if (avx2_support > 0)
4086525738fSBaptiste Daroussin     return _mum_hash_avx2 (key, len, seed);
4096525738fSBaptiste Daroussin   else if (! avx2_support) {
4106525738fSBaptiste Daroussin     __builtin_cpu_init ();
4116525738fSBaptiste Daroussin     avx2_support =  __builtin_cpu_supports ("avx2") ? 1 : -1;
4126525738fSBaptiste Daroussin     if (avx2_support > 0)
4136525738fSBaptiste Daroussin       return _mum_hash_avx2 (key, len, seed);
4146525738fSBaptiste Daroussin   }
4156525738fSBaptiste Daroussin #endif
4166525738fSBaptiste Daroussin   return _mum_hash_default (key, len, seed);
4176525738fSBaptiste Daroussin }
4186525738fSBaptiste Daroussin 
4196525738fSBaptiste Daroussin #endif
420