16525738fSBaptiste Daroussin /* Copyright (c) 2016 Vladimir Makarov <vmakarov@gcc.gnu.org>
26525738fSBaptiste Daroussin
36525738fSBaptiste Daroussin Permission is hereby granted, free of charge, to any person
46525738fSBaptiste Daroussin obtaining a copy of this software and associated documentation
56525738fSBaptiste Daroussin files (the "Software"), to deal in the Software without
66525738fSBaptiste Daroussin restriction, including without limitation the rights to use, copy,
76525738fSBaptiste Daroussin modify, merge, publish, distribute, sublicense, and/or sell copies
86525738fSBaptiste Daroussin of the Software, and to permit persons to whom the Software is
96525738fSBaptiste Daroussin furnished to do so, subject to the following conditions:
106525738fSBaptiste Daroussin
116525738fSBaptiste Daroussin The above copyright notice and this permission notice shall be
126525738fSBaptiste Daroussin included in all copies or substantial portions of the Software.
136525738fSBaptiste Daroussin
146525738fSBaptiste Daroussin THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
156525738fSBaptiste Daroussin EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
166525738fSBaptiste Daroussin MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
176525738fSBaptiste Daroussin NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
186525738fSBaptiste Daroussin BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
196525738fSBaptiste Daroussin ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
206525738fSBaptiste Daroussin CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
216525738fSBaptiste Daroussin SOFTWARE.
226525738fSBaptiste Daroussin */
236525738fSBaptiste Daroussin
246525738fSBaptiste Daroussin /* This file implements MUM (MUltiply and Mix) hashing. We randomize
256525738fSBaptiste Daroussin input data by 64x64-bit multiplication and mixing hi- and low-parts
266525738fSBaptiste Daroussin of the multiplication result by using an addition and then mix it
276525738fSBaptiste Daroussin into the current state. We use prime numbers randomly generated
286525738fSBaptiste Daroussin with the equal probability of their bit values for the
296525738fSBaptiste Daroussin multiplication. When all primes are used once, the state is
306525738fSBaptiste Daroussin randomized and the same prime numbers are used again for data
316525738fSBaptiste Daroussin randomization.
326525738fSBaptiste Daroussin
336525738fSBaptiste Daroussin The MUM hashing passes all SMHasher tests. Pseudo Random Number
346525738fSBaptiste Daroussin Generator based on MUM also passes NIST Statistical Test Suite for
356525738fSBaptiste Daroussin Random and Pseudorandom Number Generators for Cryptographic
366525738fSBaptiste Daroussin Applications (version 2.2.1) with 1000 bitstreams each containing
376525738fSBaptiste Daroussin 1M bits. MUM hashing is also faster Spooky64 and City64 on small
386525738fSBaptiste Daroussin strings (at least up to 512-bit) on Haswell and Power7. The MUM bulk
396525738fSBaptiste Daroussin speed (speed on very long data) is bigger than Spooky and City on
406525738fSBaptiste Daroussin Power7. On Haswell the bulk speed is bigger than Spooky one and
416525738fSBaptiste Daroussin close to City speed. */
426525738fSBaptiste Daroussin
436525738fSBaptiste Daroussin #ifndef __MUM_HASH__
446525738fSBaptiste Daroussin #define __MUM_HASH__
456525738fSBaptiste Daroussin
466525738fSBaptiste Daroussin #include <stddef.h>
476525738fSBaptiste Daroussin #include <stdlib.h>
486525738fSBaptiste Daroussin #include <string.h>
496525738fSBaptiste Daroussin #include <limits.h>
506525738fSBaptiste Daroussin
516525738fSBaptiste Daroussin #ifdef _MSC_VER
526525738fSBaptiste Daroussin typedef unsigned __int16 uint16_t;
536525738fSBaptiste Daroussin typedef unsigned __int32 uint32_t;
546525738fSBaptiste Daroussin typedef unsigned __int64 uint64_t;
556525738fSBaptiste Daroussin #else
566525738fSBaptiste Daroussin #include <stdint.h>
576525738fSBaptiste Daroussin #endif
586525738fSBaptiste Daroussin
596525738fSBaptiste Daroussin /* Macro saying to use 128-bit integers implemented by GCC for some
606525738fSBaptiste Daroussin targets. */
616525738fSBaptiste Daroussin #ifndef _MUM_USE_INT128
626525738fSBaptiste Daroussin /* In GCC uint128_t is defined if HOST_BITS_PER_WIDE_INT >= 64.
636525738fSBaptiste Daroussin HOST_WIDE_INT is long if HOST_BITS_PER_LONG > HOST_BITS_PER_INT,
646525738fSBaptiste Daroussin otherwise int. */
656525738fSBaptiste Daroussin #if defined(__GNUC__) && UINT_MAX != ULONG_MAX
666525738fSBaptiste Daroussin #define _MUM_USE_INT128 1
676525738fSBaptiste Daroussin #else
686525738fSBaptiste Daroussin #define _MUM_USE_INT128 0
696525738fSBaptiste Daroussin #endif
706525738fSBaptiste Daroussin #endif
716525738fSBaptiste Daroussin
723933ba6bSBaptiste Daroussin #if 0
736525738fSBaptiste Daroussin #if defined(__GNUC__) && ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 9) || (__GNUC__ > 4))
746525738fSBaptiste Daroussin #define _MUM_FRESH_GCC
756525738fSBaptiste Daroussin #endif
763933ba6bSBaptiste Daroussin #endif
776525738fSBaptiste Daroussin
78912517a7SBaptiste Daroussin #if defined(__GNUC__) && !defined(__llvm__) && defined(_MUM_FRESH_GCC)
796525738fSBaptiste Daroussin #define _MUM_ATTRIBUTE_UNUSED __attribute__((unused))
806525738fSBaptiste Daroussin #define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
816525738fSBaptiste Daroussin #define _MUM_TARGET(opts) __attribute__((__target__ (opts)))
826525738fSBaptiste Daroussin #else
836525738fSBaptiste Daroussin #define _MUM_ATTRIBUTE_UNUSED
846525738fSBaptiste Daroussin #define _MUM_OPTIMIZE(opts)
856525738fSBaptiste Daroussin #define _MUM_TARGET(opts)
866525738fSBaptiste Daroussin #endif
876525738fSBaptiste Daroussin
886525738fSBaptiste Daroussin
896525738fSBaptiste Daroussin /* Here are different primes randomly generated with the equal
906525738fSBaptiste Daroussin probability of their bit values. They are used to randomize input
916525738fSBaptiste Daroussin values. */
926525738fSBaptiste Daroussin static uint64_t _mum_hash_step_prime = 0x2e0bb864e9ea7df5ULL;
936525738fSBaptiste Daroussin static uint64_t _mum_key_step_prime = 0xcdb32970830fcaa1ULL;
946525738fSBaptiste Daroussin static uint64_t _mum_block_start_prime = 0xc42b5e2e6480b23bULL;
956525738fSBaptiste Daroussin static uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL;
966525738fSBaptiste Daroussin static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL;
976525738fSBaptiste Daroussin static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL;
986525738fSBaptiste Daroussin static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL;
996525738fSBaptiste Daroussin
1006525738fSBaptiste Daroussin static uint64_t _mum_primes [] = {
1016525738fSBaptiste Daroussin 0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f,
1026525738fSBaptiste Daroussin 0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81,
1036525738fSBaptiste Daroussin 0X3bc721b2aad05197, 0X71b1a19b907d6e33, 0X525e6c1084a8534b, 0X9e4c2cd340c1299f,
1046525738fSBaptiste Daroussin 0Xde3add92e94caa37, 0X7e14eadb1f65311d, 0X3f5aa40f89812853, 0X33b15a3b587d15c9,
1056525738fSBaptiste Daroussin };
1066525738fSBaptiste Daroussin
1076525738fSBaptiste Daroussin /* Multiply 64-bit V and P and return sum of high and low parts of the
1086525738fSBaptiste Daroussin result. */
1096525738fSBaptiste Daroussin static inline uint64_t
_mum(uint64_t v,uint64_t p)1106525738fSBaptiste Daroussin _mum (uint64_t v, uint64_t p) {
1116525738fSBaptiste Daroussin uint64_t hi, lo;
1126525738fSBaptiste Daroussin #if _MUM_USE_INT128
1136525738fSBaptiste Daroussin #if defined(__aarch64__)
1146525738fSBaptiste Daroussin /* AARCH64 needs 2 insns to calculate 128-bit result of the
1156525738fSBaptiste Daroussin multiplication. If we use a generic code we actually call a
1166525738fSBaptiste Daroussin function doing 128x128->128 bit multiplication. The function is
1176525738fSBaptiste Daroussin very slow. */
1186525738fSBaptiste Daroussin lo = v * p, hi;
1196525738fSBaptiste Daroussin asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p));
1206525738fSBaptiste Daroussin #else
1216525738fSBaptiste Daroussin __uint128_t r = (__uint128_t) v * (__uint128_t) p;
1226525738fSBaptiste Daroussin hi = (uint64_t) (r >> 64);
1236525738fSBaptiste Daroussin lo = (uint64_t) r;
1246525738fSBaptiste Daroussin #endif
1256525738fSBaptiste Daroussin #else
1266525738fSBaptiste Daroussin /* Implementation of 64x64->128-bit multiplication by four 32x32->64
1276525738fSBaptiste Daroussin bit multiplication. */
1286525738fSBaptiste Daroussin uint64_t hv = v >> 32, hp = p >> 32;
1296525738fSBaptiste Daroussin uint64_t lv = (uint32_t) v, lp = (uint32_t) p;
1306525738fSBaptiste Daroussin uint64_t rh = hv * hp;
1316525738fSBaptiste Daroussin uint64_t rm_0 = hv * lp;
1326525738fSBaptiste Daroussin uint64_t rm_1 = hp * lv;
1336525738fSBaptiste Daroussin uint64_t rl = lv * lp;
1346525738fSBaptiste Daroussin uint64_t t, carry = 0;
1356525738fSBaptiste Daroussin
1366525738fSBaptiste Daroussin /* We could ignore a carry bit here if we did not care about the
1376525738fSBaptiste Daroussin same hash for 32-bit and 64-bit targets. */
1386525738fSBaptiste Daroussin t = rl + (rm_0 << 32);
1396525738fSBaptiste Daroussin #ifdef MUM_TARGET_INDEPENDENT_HASH
1406525738fSBaptiste Daroussin carry = t < rl;
1416525738fSBaptiste Daroussin #endif
1426525738fSBaptiste Daroussin lo = t + (rm_1 << 32);
1436525738fSBaptiste Daroussin #ifdef MUM_TARGET_INDEPENDENT_HASH
1446525738fSBaptiste Daroussin carry += lo < t;
1456525738fSBaptiste Daroussin #endif
1466525738fSBaptiste Daroussin hi = rh + (rm_0 >> 32) + (rm_1 >> 32) + carry;
1476525738fSBaptiste Daroussin #endif
1486525738fSBaptiste Daroussin /* We could use XOR here too but, for some reasons, on Haswell and
1496525738fSBaptiste Daroussin Power7 using an addition improves hashing performance by 10% for
1506525738fSBaptiste Daroussin small strings. */
1516525738fSBaptiste Daroussin return hi + lo;
1526525738fSBaptiste Daroussin }
1536525738fSBaptiste Daroussin
1546525738fSBaptiste Daroussin #if defined(_MSC_VER)
1556525738fSBaptiste Daroussin #define _mum_bswap_32(x) _byteswap_uint32_t (x)
1566525738fSBaptiste Daroussin #define _mum_bswap_64(x) _byteswap_uint64_t (x)
1576525738fSBaptiste Daroussin #elif defined(__APPLE__)
1586525738fSBaptiste Daroussin #include <libkern/OSByteOrder.h>
1596525738fSBaptiste Daroussin #define _mum_bswap_32(x) OSSwapInt32 (x)
1606525738fSBaptiste Daroussin #define _mum_bswap_64(x) OSSwapInt64 (x)
1616525738fSBaptiste Daroussin #elif defined(__GNUC__)
1626525738fSBaptiste Daroussin #define _mum_bswap32(x) __builtin_bswap32 (x)
1636525738fSBaptiste Daroussin #define _mum_bswap64(x) __builtin_bswap64 (x)
1646525738fSBaptiste Daroussin #else
1656525738fSBaptiste Daroussin #include <byteswap.h>
1666525738fSBaptiste Daroussin #define _mum_bswap32(x) bswap32 (x)
1676525738fSBaptiste Daroussin #define _mum_bswap64(x) bswap64 (x)
1686525738fSBaptiste Daroussin #endif
1696525738fSBaptiste Daroussin
1706525738fSBaptiste Daroussin static inline uint64_t
_mum_le(uint64_t v)1716525738fSBaptiste Daroussin _mum_le (uint64_t v) {
1726525738fSBaptiste Daroussin #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
1736525738fSBaptiste Daroussin return v;
1746525738fSBaptiste Daroussin #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1756525738fSBaptiste Daroussin return _mum_bswap64 (v);
1766525738fSBaptiste Daroussin #else
177*a0409676SBaptiste Daroussin #error "Unknown endianness"
1786525738fSBaptiste Daroussin #endif
1796525738fSBaptiste Daroussin }
1806525738fSBaptiste Daroussin
1816525738fSBaptiste Daroussin static inline uint32_t
_mum_le32(uint32_t v)1826525738fSBaptiste Daroussin _mum_le32 (uint32_t v) {
1836525738fSBaptiste Daroussin #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
1846525738fSBaptiste Daroussin return v;
1856525738fSBaptiste Daroussin #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1866525738fSBaptiste Daroussin return _mum_bswap32 (v);
1876525738fSBaptiste Daroussin #else
188*a0409676SBaptiste Daroussin #error "Unknown endianness"
1896525738fSBaptiste Daroussin #endif
1906525738fSBaptiste Daroussin }
1916525738fSBaptiste Daroussin
1926525738fSBaptiste Daroussin /* Macro defining how many times the most nested loop in
1936525738fSBaptiste Daroussin _mum_hash_aligned will be unrolled by the compiler (although it can
1946525738fSBaptiste Daroussin make an own decision:). Use only a constant here to help a
1956525738fSBaptiste Daroussin compiler to unroll a major loop.
1966525738fSBaptiste Daroussin
1976525738fSBaptiste Daroussin The macro value affects the result hash for strings > 128 bit. The
1986525738fSBaptiste Daroussin unroll factor greatly affects the hashing speed. We prefer the
1996525738fSBaptiste Daroussin speed. */
2006525738fSBaptiste Daroussin #ifndef _MUM_UNROLL_FACTOR_POWER
2016525738fSBaptiste Daroussin #if defined(__PPC64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
2026525738fSBaptiste Daroussin #define _MUM_UNROLL_FACTOR_POWER 3
2036525738fSBaptiste Daroussin #elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
2046525738fSBaptiste Daroussin #define _MUM_UNROLL_FACTOR_POWER 4
2056525738fSBaptiste Daroussin #else
2066525738fSBaptiste Daroussin #define _MUM_UNROLL_FACTOR_POWER 2
2076525738fSBaptiste Daroussin #endif
2086525738fSBaptiste Daroussin #endif
2096525738fSBaptiste Daroussin
2106525738fSBaptiste Daroussin #if _MUM_UNROLL_FACTOR_POWER < 1
2116525738fSBaptiste Daroussin #error "too small unroll factor"
2126525738fSBaptiste Daroussin #elif _MUM_UNROLL_FACTOR_POWER > 4
2136525738fSBaptiste Daroussin #error "We have not enough primes for such unroll factor"
2146525738fSBaptiste Daroussin #endif
2156525738fSBaptiste Daroussin
2166525738fSBaptiste Daroussin #define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER)
2176525738fSBaptiste Daroussin
2186525738fSBaptiste Daroussin static inline uint64_t _MUM_OPTIMIZE("unroll-loops")
_mum_hash_aligned(uint64_t start,const void * key,size_t len)2196525738fSBaptiste Daroussin _mum_hash_aligned (uint64_t start, const void *key, size_t len) {
2206525738fSBaptiste Daroussin uint64_t result = start;
2216525738fSBaptiste Daroussin const unsigned char *str = (const unsigned char *) key;
2226525738fSBaptiste Daroussin uint64_t u64;
2236525738fSBaptiste Daroussin int i;
2246525738fSBaptiste Daroussin size_t n;
2256525738fSBaptiste Daroussin
2266525738fSBaptiste Daroussin result = _mum (result, _mum_block_start_prime);
2276525738fSBaptiste Daroussin while (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
2286525738fSBaptiste Daroussin /* This loop could be vectorized when we have vector insns for
2296525738fSBaptiste Daroussin 64x64->128-bit multiplication. AVX2 currently only have a
2306525738fSBaptiste Daroussin vector insn for 4 32x32->64-bit multiplication. */
2316525738fSBaptiste Daroussin for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
2326525738fSBaptiste Daroussin result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
2336525738fSBaptiste Daroussin len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
2346525738fSBaptiste Daroussin str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
2356525738fSBaptiste Daroussin /* We will use the same prime numbers on the next iterations --
2366525738fSBaptiste Daroussin randomize the state. */
2376525738fSBaptiste Daroussin result = _mum (result, _mum_unroll_prime);
2386525738fSBaptiste Daroussin }
2396525738fSBaptiste Daroussin n = len / sizeof (uint64_t);
2406525738fSBaptiste Daroussin for (i = 0; i < (int)n; i++)
2416525738fSBaptiste Daroussin result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
2426525738fSBaptiste Daroussin len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t);
2436525738fSBaptiste Daroussin switch (len) {
2446525738fSBaptiste Daroussin case 7:
2456525738fSBaptiste Daroussin u64 = _mum_le32 (*(uint32_t *) str);
2466525738fSBaptiste Daroussin u64 |= (uint64_t) str[4] << 32;
2476525738fSBaptiste Daroussin u64 |= (uint64_t) str[5] << 40;
2486525738fSBaptiste Daroussin u64 |= (uint64_t) str[6] << 48;
2496525738fSBaptiste Daroussin return result ^ _mum (u64, _mum_tail_prime);
2506525738fSBaptiste Daroussin case 6:
2516525738fSBaptiste Daroussin u64 = _mum_le32 (*(uint32_t *) str);
2526525738fSBaptiste Daroussin u64 |= (uint64_t) str[4] << 32;
2536525738fSBaptiste Daroussin u64 |= (uint64_t) str[5] << 40;
2546525738fSBaptiste Daroussin return result ^ _mum (u64, _mum_tail_prime);
2556525738fSBaptiste Daroussin case 5:
2566525738fSBaptiste Daroussin u64 = _mum_le32 (*(uint32_t *) str);
2576525738fSBaptiste Daroussin u64 |= (uint64_t) str[4] << 32;
2586525738fSBaptiste Daroussin return result ^ _mum (u64, _mum_tail_prime);
2596525738fSBaptiste Daroussin case 4:
2606525738fSBaptiste Daroussin u64 = _mum_le32 (*(uint32_t *) str);
2616525738fSBaptiste Daroussin return result ^ _mum (u64, _mum_tail_prime);
2626525738fSBaptiste Daroussin case 3:
2636525738fSBaptiste Daroussin u64 = str[0];
2646525738fSBaptiste Daroussin u64 |= (uint64_t) str[1] << 8;
2656525738fSBaptiste Daroussin u64 |= (uint64_t) str[2] << 16;
2666525738fSBaptiste Daroussin return result ^ _mum (u64, _mum_tail_prime);
2676525738fSBaptiste Daroussin case 2:
2686525738fSBaptiste Daroussin u64 = str[0];
2696525738fSBaptiste Daroussin u64 |= (uint64_t) str[1] << 8;
2706525738fSBaptiste Daroussin return result ^ _mum (u64, _mum_tail_prime);
2716525738fSBaptiste Daroussin case 1:
2726525738fSBaptiste Daroussin u64 = str[0];
2736525738fSBaptiste Daroussin return result ^ _mum (u64, _mum_tail_prime);
2746525738fSBaptiste Daroussin }
2756525738fSBaptiste Daroussin return result;
2766525738fSBaptiste Daroussin }
2776525738fSBaptiste Daroussin
2786525738fSBaptiste Daroussin /* Final randomization of H. */
2796525738fSBaptiste Daroussin static inline uint64_t
_mum_final(uint64_t h)2806525738fSBaptiste Daroussin _mum_final (uint64_t h) {
2816525738fSBaptiste Daroussin h ^= _mum (h, _mum_finish_prime1);
2826525738fSBaptiste Daroussin h ^= _mum (h, _mum_finish_prime2);
2836525738fSBaptiste Daroussin return h;
2846525738fSBaptiste Daroussin }
2856525738fSBaptiste Daroussin
2866525738fSBaptiste Daroussin #if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
2876525738fSBaptiste Daroussin
2886525738fSBaptiste Daroussin /* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where
2896525738fSBaptiste Daroussin it is possible. Although on modern Intel processors MULQ takes
2906525738fSBaptiste Daroussin 3-cycles vs. 4 for MULX, MULX permits more freedom in insn
2916525738fSBaptiste Daroussin scheduling as it uses less fixed registers. */
2926525738fSBaptiste Daroussin static inline uint64_t _MUM_TARGET("arch=haswell")
_mum_hash_avx2(const void * key,size_t len,uint64_t seed)2936525738fSBaptiste Daroussin _mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {
2946525738fSBaptiste Daroussin return _mum_final (_mum_hash_aligned (seed + len, key, len));
2956525738fSBaptiste Daroussin }
2966525738fSBaptiste Daroussin #endif
2976525738fSBaptiste Daroussin
2986525738fSBaptiste Daroussin #ifndef _MUM_UNALIGNED_ACCESS
2996525738fSBaptiste Daroussin #if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \
3006525738fSBaptiste Daroussin || defined(__s390__) || defined(__m32c__) || defined(cris) \
3016525738fSBaptiste Daroussin || defined(__CR16__) || defined(__vax__) || defined(__m68k__) \
3026525738fSBaptiste Daroussin || defined(__aarch64__)
3036525738fSBaptiste Daroussin #define _MUM_UNALIGNED_ACCESS 1
3046525738fSBaptiste Daroussin #else
3056525738fSBaptiste Daroussin #define _MUM_UNALIGNED_ACCESS 0
3066525738fSBaptiste Daroussin #endif
3076525738fSBaptiste Daroussin #endif
3086525738fSBaptiste Daroussin
3096525738fSBaptiste Daroussin /* When we need an aligned access to data being hashed we move part of
3106525738fSBaptiste Daroussin the unaligned data to an aligned block of given size and then
3116525738fSBaptiste Daroussin process it, repeating processing the data by the block. */
3126525738fSBaptiste Daroussin #ifndef _MUM_BLOCK_LEN
3136525738fSBaptiste Daroussin #define _MUM_BLOCK_LEN 1024
3146525738fSBaptiste Daroussin #endif
3156525738fSBaptiste Daroussin
3166525738fSBaptiste Daroussin #if _MUM_BLOCK_LEN < 8
3176525738fSBaptiste Daroussin #error "too small block length"
3186525738fSBaptiste Daroussin #endif
3196525738fSBaptiste Daroussin
3206525738fSBaptiste Daroussin static inline uint64_t
3216525738fSBaptiste Daroussin #if defined(__x86_64__)
3226525738fSBaptiste Daroussin _MUM_TARGET("inline-all-stringops")
3236525738fSBaptiste Daroussin #endif
_mum_hash_default(const void * key,size_t len,uint64_t seed)3246525738fSBaptiste Daroussin _mum_hash_default (const void *key, size_t len, uint64_t seed) {
3256525738fSBaptiste Daroussin uint64_t result;
3266525738fSBaptiste Daroussin const unsigned char *str = (const unsigned char *) key;
3276525738fSBaptiste Daroussin size_t block_len;
3286525738fSBaptiste Daroussin uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)];
3296525738fSBaptiste Daroussin
3306525738fSBaptiste Daroussin result = seed + len;
3316525738fSBaptiste Daroussin if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0)
3326525738fSBaptiste Daroussin result = _mum_hash_aligned (result, key, len);
3336525738fSBaptiste Daroussin else {
3346525738fSBaptiste Daroussin while (len != 0) {
3356525738fSBaptiste Daroussin block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN;
3366525738fSBaptiste Daroussin memmove (buf, str, block_len);
3376525738fSBaptiste Daroussin result = _mum_hash_aligned (result, buf, block_len);
3386525738fSBaptiste Daroussin len -= block_len;
3396525738fSBaptiste Daroussin str += block_len;
3406525738fSBaptiste Daroussin }
3416525738fSBaptiste Daroussin }
3426525738fSBaptiste Daroussin return _mum_final (result);
3436525738fSBaptiste Daroussin }
3446525738fSBaptiste Daroussin
3456525738fSBaptiste Daroussin static inline uint64_t
_mum_next_factor(void)3466525738fSBaptiste Daroussin _mum_next_factor (void) {
3476525738fSBaptiste Daroussin uint64_t start = 0;
3486525738fSBaptiste Daroussin int i;
3496525738fSBaptiste Daroussin
3506525738fSBaptiste Daroussin for (i = 0; i < 8; i++)
3516525738fSBaptiste Daroussin start = (start << 8) | rand() % 256;
3526525738fSBaptiste Daroussin return start;
3536525738fSBaptiste Daroussin }
3546525738fSBaptiste Daroussin
3556525738fSBaptiste Daroussin /* ++++++++++++++++++++++++++ Interface functions: +++++++++++++++++++ */
3566525738fSBaptiste Daroussin
3576525738fSBaptiste Daroussin /* Set random multiplicators depending on SEED. */
3586525738fSBaptiste Daroussin static inline void
mum_hash_randomize(uint64_t seed)3596525738fSBaptiste Daroussin mum_hash_randomize (uint64_t seed) {
3606525738fSBaptiste Daroussin int i;
3616525738fSBaptiste Daroussin
3626525738fSBaptiste Daroussin srand (seed);
3636525738fSBaptiste Daroussin _mum_hash_step_prime = _mum_next_factor ();
3646525738fSBaptiste Daroussin _mum_key_step_prime = _mum_next_factor ();
3656525738fSBaptiste Daroussin _mum_finish_prime1 = _mum_next_factor ();
3666525738fSBaptiste Daroussin _mum_finish_prime2 = _mum_next_factor ();
3676525738fSBaptiste Daroussin _mum_block_start_prime = _mum_next_factor ();
3686525738fSBaptiste Daroussin _mum_unroll_prime = _mum_next_factor ();
3696525738fSBaptiste Daroussin _mum_tail_prime = _mum_next_factor ();
3706525738fSBaptiste Daroussin for (i = 0; i < (int)(sizeof (_mum_primes) / sizeof (uint64_t)); i++)
3716525738fSBaptiste Daroussin _mum_primes[i] = _mum_next_factor ();
3726525738fSBaptiste Daroussin }
3736525738fSBaptiste Daroussin
3746525738fSBaptiste Daroussin /* Start hashing data with SEED. Return the state. */
3756525738fSBaptiste Daroussin static inline uint64_t
mum_hash_init(uint64_t seed)3766525738fSBaptiste Daroussin mum_hash_init (uint64_t seed) {
3776525738fSBaptiste Daroussin return seed;
3786525738fSBaptiste Daroussin }
3796525738fSBaptiste Daroussin
3806525738fSBaptiste Daroussin /* Process data KEY with the state H and return the updated state. */
3816525738fSBaptiste Daroussin static inline uint64_t
mum_hash_step(uint64_t h,uint64_t key)3826525738fSBaptiste Daroussin mum_hash_step (uint64_t h, uint64_t key)
3836525738fSBaptiste Daroussin {
3846525738fSBaptiste Daroussin return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime);
3856525738fSBaptiste Daroussin }
3866525738fSBaptiste Daroussin
3876525738fSBaptiste Daroussin /* Return the result of hashing using the current state H. */
3886525738fSBaptiste Daroussin static inline uint64_t
mum_hash_finish(uint64_t h)3896525738fSBaptiste Daroussin mum_hash_finish (uint64_t h) {
3906525738fSBaptiste Daroussin return _mum_final (h);
3916525738fSBaptiste Daroussin }
3926525738fSBaptiste Daroussin
3936525738fSBaptiste Daroussin /* Fast hashing of KEY with SEED. The hash is always the same for the
3946525738fSBaptiste Daroussin same key on any target. */
3956525738fSBaptiste Daroussin static inline size_t
mum_hash64(uint64_t key,uint64_t seed)3966525738fSBaptiste Daroussin mum_hash64 (uint64_t key, uint64_t seed) {
3976525738fSBaptiste Daroussin return mum_hash_finish (mum_hash_step (mum_hash_init (seed), key));
3986525738fSBaptiste Daroussin }
3996525738fSBaptiste Daroussin
4006525738fSBaptiste Daroussin /* Hash data KEY of length LEN and SEED. The hash depends on the
401*a0409676SBaptiste Daroussin target endianness and the unroll factor. */
4026525738fSBaptiste Daroussin static inline uint64_t
mum_hash(const void * key,size_t len,uint64_t seed)4036525738fSBaptiste Daroussin mum_hash (const void *key, size_t len, uint64_t seed) {
4046525738fSBaptiste Daroussin #if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
4056525738fSBaptiste Daroussin static int avx2_support = 0;
4066525738fSBaptiste Daroussin
4076525738fSBaptiste Daroussin if (avx2_support > 0)
4086525738fSBaptiste Daroussin return _mum_hash_avx2 (key, len, seed);
4096525738fSBaptiste Daroussin else if (! avx2_support) {
4106525738fSBaptiste Daroussin __builtin_cpu_init ();
4116525738fSBaptiste Daroussin avx2_support = __builtin_cpu_supports ("avx2") ? 1 : -1;
4126525738fSBaptiste Daroussin if (avx2_support > 0)
4136525738fSBaptiste Daroussin return _mum_hash_avx2 (key, len, seed);
4146525738fSBaptiste Daroussin }
4156525738fSBaptiste Daroussin #endif
4166525738fSBaptiste Daroussin return _mum_hash_default (key, len, seed);
4176525738fSBaptiste Daroussin }
4186525738fSBaptiste Daroussin
4196525738fSBaptiste Daroussin #endif
420