11d2baefcSGeorge V. Neville-Neil /*-
21d2baefcSGeorge V. Neville-Neil * Copyright (c) 2014 Dag-Erling Smørgrav
31d2baefcSGeorge V. Neville-Neil * All rights reserved.
41d2baefcSGeorge V. Neville-Neil *
51d2baefcSGeorge V. Neville-Neil * Redistribution and use in source and binary forms, with or without
61d2baefcSGeorge V. Neville-Neil * modification, are permitted provided that the following conditions
71d2baefcSGeorge V. Neville-Neil * are met:
81d2baefcSGeorge V. Neville-Neil * 1. Redistributions of source code must retain the above copyright
91d2baefcSGeorge V. Neville-Neil * notice, this list of conditions and the following disclaimer.
101d2baefcSGeorge V. Neville-Neil * 2. Redistributions in binary form must reproduce the above copyright
111d2baefcSGeorge V. Neville-Neil * notice, this list of conditions and the following disclaimer in the
121d2baefcSGeorge V. Neville-Neil * documentation and/or other materials provided with the distribution.
131d2baefcSGeorge V. Neville-Neil *
141d2baefcSGeorge V. Neville-Neil * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
151d2baefcSGeorge V. Neville-Neil * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
161d2baefcSGeorge V. Neville-Neil * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
171d2baefcSGeorge V. Neville-Neil * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
181d2baefcSGeorge V. Neville-Neil * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
191d2baefcSGeorge V. Neville-Neil * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
201d2baefcSGeorge V. Neville-Neil * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
211d2baefcSGeorge V. Neville-Neil * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
221d2baefcSGeorge V. Neville-Neil * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
231d2baefcSGeorge V. Neville-Neil * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
241d2baefcSGeorge V. Neville-Neil * SUCH DAMAGE.
251d2baefcSGeorge V. Neville-Neil */
261d2baefcSGeorge V. Neville-Neil
271d2baefcSGeorge V. Neville-Neil #include <sys/hash.h>
281d2baefcSGeorge V. Neville-Neil #include <sys/endian.h>
291d2baefcSGeorge V. Neville-Neil #include <sys/stdint.h>
301d2baefcSGeorge V. Neville-Neil #include <sys/types.h>
311d2baefcSGeorge V. Neville-Neil
321d2baefcSGeorge V. Neville-Neil #define rol32(i32, n) ((i32) << (n) | (i32) >> (32 - (n)))
331d2baefcSGeorge V. Neville-Neil
341d2baefcSGeorge V. Neville-Neil /*
35*99e9de87SDag-Erling Smørgrav * Simple implementation of the Murmur3-32 hash function.
36*99e9de87SDag-Erling Smørgrav *
37*99e9de87SDag-Erling Smørgrav * This implementation is slow but safe. It can be made significantly
38*99e9de87SDag-Erling Smørgrav * faster if the caller guarantees that the input is correctly aligned for
39*99e9de87SDag-Erling Smørgrav * 32-bit reads, and slightly faster yet if the caller guarantees that the
40*99e9de87SDag-Erling Smørgrav * length of the input is always a multiple of 4 bytes.
411d2baefcSGeorge V. Neville-Neil */
421d2baefcSGeorge V. Neville-Neil uint32_t
murmur3_32_hash(const void * data,size_t len,uint32_t seed)43*99e9de87SDag-Erling Smørgrav murmur3_32_hash(const void *data, size_t len, uint32_t seed)
441d2baefcSGeorge V. Neville-Neil {
45*99e9de87SDag-Erling Smørgrav const uint8_t *bytes;
461d2baefcSGeorge V. Neville-Neil uint32_t hash, k;
471d2baefcSGeorge V. Neville-Neil size_t res;
481d2baefcSGeorge V. Neville-Neil
49*99e9de87SDag-Erling Smørgrav /* initialization */
50*99e9de87SDag-Erling Smørgrav bytes = data;
511d2baefcSGeorge V. Neville-Neil res = len;
521d2baefcSGeorge V. Neville-Neil hash = seed;
531d2baefcSGeorge V. Neville-Neil
54*99e9de87SDag-Erling Smørgrav /* main loop */
55*99e9de87SDag-Erling Smørgrav while (res >= 4) {
56*99e9de87SDag-Erling Smørgrav /* replace with le32toh() if input is aligned */
57*99e9de87SDag-Erling Smørgrav k = le32dec(bytes);
58*99e9de87SDag-Erling Smørgrav bytes += 4;
59*99e9de87SDag-Erling Smørgrav res -= 4;
60*99e9de87SDag-Erling Smørgrav k *= 0xcc9e2d51;
61*99e9de87SDag-Erling Smørgrav k = rol32(k, 15);
62*99e9de87SDag-Erling Smørgrav k *= 0x1b873593;
63*99e9de87SDag-Erling Smørgrav hash ^= k;
64*99e9de87SDag-Erling Smørgrav hash = rol32(hash, 13);
65*99e9de87SDag-Erling Smørgrav hash *= 5;
66*99e9de87SDag-Erling Smørgrav hash += 0xe6546b64;
67*99e9de87SDag-Erling Smørgrav }
68*99e9de87SDag-Erling Smørgrav
69*99e9de87SDag-Erling Smørgrav /* remainder */
70*99e9de87SDag-Erling Smørgrav /* remove if input length is a multiple of 4 */
71*99e9de87SDag-Erling Smørgrav if (res > 0) {
72*99e9de87SDag-Erling Smørgrav k = 0;
73*99e9de87SDag-Erling Smørgrav switch (res) {
74*99e9de87SDag-Erling Smørgrav case 3:
75*99e9de87SDag-Erling Smørgrav k |= bytes[2] << 16;
76*99e9de87SDag-Erling Smørgrav case 2:
77*99e9de87SDag-Erling Smørgrav k |= bytes[1] << 8;
78*99e9de87SDag-Erling Smørgrav case 1:
79*99e9de87SDag-Erling Smørgrav k |= bytes[0];
80*99e9de87SDag-Erling Smørgrav k *= 0xcc9e2d51;
81*99e9de87SDag-Erling Smørgrav k = rol32(k, 15);
82*99e9de87SDag-Erling Smørgrav k *= 0x1b873593;
83*99e9de87SDag-Erling Smørgrav hash ^= k;
84*99e9de87SDag-Erling Smørgrav break;
85*99e9de87SDag-Erling Smørgrav }
86*99e9de87SDag-Erling Smørgrav }
87*99e9de87SDag-Erling Smørgrav
88*99e9de87SDag-Erling Smørgrav /* finalize */
89*99e9de87SDag-Erling Smørgrav hash ^= (uint32_t)len;
90*99e9de87SDag-Erling Smørgrav hash ^= hash >> 16;
91*99e9de87SDag-Erling Smørgrav hash *= 0x85ebca6b;
92*99e9de87SDag-Erling Smørgrav hash ^= hash >> 13;
93*99e9de87SDag-Erling Smørgrav hash *= 0xc2b2ae35;
94*99e9de87SDag-Erling Smørgrav hash ^= hash >> 16;
95*99e9de87SDag-Erling Smørgrav return (hash);
96*99e9de87SDag-Erling Smørgrav }
97*99e9de87SDag-Erling Smørgrav
98*99e9de87SDag-Erling Smørgrav /*
99*99e9de87SDag-Erling Smørgrav * Simplified version of the above optimized for aligned sequences of
100*99e9de87SDag-Erling Smørgrav * 32-bit words. The count argument is the number of words, not the
101*99e9de87SDag-Erling Smørgrav * length in bytes.
102*99e9de87SDag-Erling Smørgrav */
103*99e9de87SDag-Erling Smørgrav uint32_t
murmur3_32_hash32(const uint32_t * data,size_t count,uint32_t seed)104*99e9de87SDag-Erling Smørgrav murmur3_32_hash32(const uint32_t *data, size_t count, uint32_t seed)
105*99e9de87SDag-Erling Smørgrav {
106*99e9de87SDag-Erling Smørgrav uint32_t hash, k;
107*99e9de87SDag-Erling Smørgrav size_t res;
108*99e9de87SDag-Erling Smørgrav
1091d2baefcSGeorge V. Neville-Neil /* iterate */
110*99e9de87SDag-Erling Smørgrav for (res = count, hash = seed; res > 0; res--, data++) {
111*99e9de87SDag-Erling Smørgrav k = le32toh(*data);
1121d2baefcSGeorge V. Neville-Neil k *= 0xcc9e2d51;
1131d2baefcSGeorge V. Neville-Neil k = rol32(k, 15);
1141d2baefcSGeorge V. Neville-Neil k *= 0x1b873593;
1151d2baefcSGeorge V. Neville-Neil hash ^= k;
1161d2baefcSGeorge V. Neville-Neil hash = rol32(hash, 13);
1171d2baefcSGeorge V. Neville-Neil hash *= 5;
1181d2baefcSGeorge V. Neville-Neil hash += 0xe6546b64;
1191d2baefcSGeorge V. Neville-Neil }
1201d2baefcSGeorge V. Neville-Neil
1211d2baefcSGeorge V. Neville-Neil /* finalize */
122*99e9de87SDag-Erling Smørgrav hash ^= (uint32_t)count;
1231d2baefcSGeorge V. Neville-Neil hash ^= hash >> 16;
1241d2baefcSGeorge V. Neville-Neil hash *= 0x85ebca6b;
1251d2baefcSGeorge V. Neville-Neil hash ^= hash >> 13;
1261d2baefcSGeorge V. Neville-Neil hash *= 0xc2b2ae35;
1271d2baefcSGeorge V. Neville-Neil hash ^= hash >> 16;
1281d2baefcSGeorge V. Neville-Neil return (hash);
1291d2baefcSGeorge V. Neville-Neil }
130