xref: /freebsd/sys/libkern/murmur3_32.c (revision 99e9de871aee402a1166cdecb957fe6c8c99da13)
11d2baefcSGeorge V. Neville-Neil /*-
21d2baefcSGeorge V. Neville-Neil  * Copyright (c) 2014 Dag-Erling Smørgrav
31d2baefcSGeorge V. Neville-Neil  * All rights reserved.
41d2baefcSGeorge V. Neville-Neil  *
51d2baefcSGeorge V. Neville-Neil  * Redistribution and use in source and binary forms, with or without
61d2baefcSGeorge V. Neville-Neil  * modification, are permitted provided that the following conditions
71d2baefcSGeorge V. Neville-Neil  * are met:
81d2baefcSGeorge V. Neville-Neil  * 1. Redistributions of source code must retain the above copyright
91d2baefcSGeorge V. Neville-Neil  *    notice, this list of conditions and the following disclaimer.
101d2baefcSGeorge V. Neville-Neil  * 2. Redistributions in binary form must reproduce the above copyright
111d2baefcSGeorge V. Neville-Neil  *    notice, this list of conditions and the following disclaimer in the
121d2baefcSGeorge V. Neville-Neil  *    documentation and/or other materials provided with the distribution.
131d2baefcSGeorge V. Neville-Neil  *
141d2baefcSGeorge V. Neville-Neil  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
151d2baefcSGeorge V. Neville-Neil  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
161d2baefcSGeorge V. Neville-Neil  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
171d2baefcSGeorge V. Neville-Neil  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
181d2baefcSGeorge V. Neville-Neil  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
191d2baefcSGeorge V. Neville-Neil  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
201d2baefcSGeorge V. Neville-Neil  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
211d2baefcSGeorge V. Neville-Neil  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
221d2baefcSGeorge V. Neville-Neil  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
231d2baefcSGeorge V. Neville-Neil  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
241d2baefcSGeorge V. Neville-Neil  * SUCH DAMAGE.
25*99e9de87SDag-Erling Smørgrav  *
26*99e9de87SDag-Erling Smørgrav  * $FreeBSD$
271d2baefcSGeorge V. Neville-Neil  */
281d2baefcSGeorge V. Neville-Neil 
291d2baefcSGeorge V. Neville-Neil #include <sys/hash.h>
301d2baefcSGeorge V. Neville-Neil #include <sys/endian.h>
311d2baefcSGeorge V. Neville-Neil #include <sys/stdint.h>
321d2baefcSGeorge V. Neville-Neil #include <sys/types.h>
331d2baefcSGeorge V. Neville-Neil 
341d2baefcSGeorge V. Neville-Neil #define rol32(i32, n) ((i32) << (n) | (i32) >> (32 - (n)))
351d2baefcSGeorge V. Neville-Neil 
361d2baefcSGeorge V. Neville-Neil /*
37*99e9de87SDag-Erling Smørgrav  * Simple implementation of the Murmur3-32 hash function.
38*99e9de87SDag-Erling Smørgrav  *
39*99e9de87SDag-Erling Smørgrav  * This implementation is slow but safe.  It can be made significantly
40*99e9de87SDag-Erling Smørgrav  * faster if the caller guarantees that the input is correctly aligned for
41*99e9de87SDag-Erling Smørgrav  * 32-bit reads, and slightly faster yet if the caller guarantees that the
42*99e9de87SDag-Erling Smørgrav  * length of the input is always a multiple of 4 bytes.
431d2baefcSGeorge V. Neville-Neil  */
441d2baefcSGeorge V. Neville-Neil uint32_t
45*99e9de87SDag-Erling Smørgrav murmur3_32_hash(const void *data, size_t len, uint32_t seed)
461d2baefcSGeorge V. Neville-Neil {
47*99e9de87SDag-Erling Smørgrav 	const uint8_t *bytes;
481d2baefcSGeorge V. Neville-Neil 	uint32_t hash, k;
491d2baefcSGeorge V. Neville-Neil 	size_t res;
501d2baefcSGeorge V. Neville-Neil 
51*99e9de87SDag-Erling Smørgrav 	/* initialization */
52*99e9de87SDag-Erling Smørgrav 	bytes = data;
531d2baefcSGeorge V. Neville-Neil 	res = len;
541d2baefcSGeorge V. Neville-Neil 	hash = seed;
551d2baefcSGeorge V. Neville-Neil 
56*99e9de87SDag-Erling Smørgrav 	/* main loop */
57*99e9de87SDag-Erling Smørgrav 	while (res >= 4) {
58*99e9de87SDag-Erling Smørgrav 		/* replace with le32toh() if input is aligned */
59*99e9de87SDag-Erling Smørgrav 		k = le32dec(bytes);
60*99e9de87SDag-Erling Smørgrav 		bytes += 4;
61*99e9de87SDag-Erling Smørgrav 		res -= 4;
62*99e9de87SDag-Erling Smørgrav 		k *= 0xcc9e2d51;
63*99e9de87SDag-Erling Smørgrav 		k = rol32(k, 15);
64*99e9de87SDag-Erling Smørgrav 		k *= 0x1b873593;
65*99e9de87SDag-Erling Smørgrav 		hash ^= k;
66*99e9de87SDag-Erling Smørgrav 		hash = rol32(hash, 13);
67*99e9de87SDag-Erling Smørgrav 		hash *= 5;
68*99e9de87SDag-Erling Smørgrav 		hash += 0xe6546b64;
69*99e9de87SDag-Erling Smørgrav 	}
70*99e9de87SDag-Erling Smørgrav 
71*99e9de87SDag-Erling Smørgrav 	/* remainder */
72*99e9de87SDag-Erling Smørgrav 	/* remove if input length is a multiple of 4 */
73*99e9de87SDag-Erling Smørgrav 	if (res > 0) {
74*99e9de87SDag-Erling Smørgrav 		k = 0;
75*99e9de87SDag-Erling Smørgrav 		switch (res) {
76*99e9de87SDag-Erling Smørgrav 		case 3:
77*99e9de87SDag-Erling Smørgrav 			k |= bytes[2] << 16;
78*99e9de87SDag-Erling Smørgrav 		case 2:
79*99e9de87SDag-Erling Smørgrav 			k |= bytes[1] << 8;
80*99e9de87SDag-Erling Smørgrav 		case 1:
81*99e9de87SDag-Erling Smørgrav 			k |= bytes[0];
82*99e9de87SDag-Erling Smørgrav 			k *= 0xcc9e2d51;
83*99e9de87SDag-Erling Smørgrav 			k = rol32(k, 15);
84*99e9de87SDag-Erling Smørgrav 			k *= 0x1b873593;
85*99e9de87SDag-Erling Smørgrav 			hash ^= k;
86*99e9de87SDag-Erling Smørgrav 			break;
87*99e9de87SDag-Erling Smørgrav 		}
88*99e9de87SDag-Erling Smørgrav 	}
89*99e9de87SDag-Erling Smørgrav 
90*99e9de87SDag-Erling Smørgrav 	/* finalize */
91*99e9de87SDag-Erling Smørgrav 	hash ^= (uint32_t)len;
92*99e9de87SDag-Erling Smørgrav 	hash ^= hash >> 16;
93*99e9de87SDag-Erling Smørgrav 	hash *= 0x85ebca6b;
94*99e9de87SDag-Erling Smørgrav 	hash ^= hash >> 13;
95*99e9de87SDag-Erling Smørgrav 	hash *= 0xc2b2ae35;
96*99e9de87SDag-Erling Smørgrav 	hash ^= hash >> 16;
97*99e9de87SDag-Erling Smørgrav 	return (hash);
98*99e9de87SDag-Erling Smørgrav }
99*99e9de87SDag-Erling Smørgrav 
100*99e9de87SDag-Erling Smørgrav /*
101*99e9de87SDag-Erling Smørgrav  * Simplified version of the above optimized for aligned sequences of
102*99e9de87SDag-Erling Smørgrav  * 32-bit words.  The count argument is the number of words, not the
103*99e9de87SDag-Erling Smørgrav  * length in bytes.
104*99e9de87SDag-Erling Smørgrav  */
105*99e9de87SDag-Erling Smørgrav uint32_t
106*99e9de87SDag-Erling Smørgrav murmur3_32_hash32(const uint32_t *data, size_t count, uint32_t seed)
107*99e9de87SDag-Erling Smørgrav {
108*99e9de87SDag-Erling Smørgrav 	uint32_t hash, k;
109*99e9de87SDag-Erling Smørgrav 	size_t res;
110*99e9de87SDag-Erling Smørgrav 
1111d2baefcSGeorge V. Neville-Neil 	/* iterate */
112*99e9de87SDag-Erling Smørgrav 	for (res = count, hash = seed; res > 0; res--, data++) {
113*99e9de87SDag-Erling Smørgrav 		k = le32toh(*data);
1141d2baefcSGeorge V. Neville-Neil 		k *= 0xcc9e2d51;
1151d2baefcSGeorge V. Neville-Neil 		k = rol32(k, 15);
1161d2baefcSGeorge V. Neville-Neil 		k *= 0x1b873593;
1171d2baefcSGeorge V. Neville-Neil 		hash ^= k;
1181d2baefcSGeorge V. Neville-Neil 		hash = rol32(hash, 13);
1191d2baefcSGeorge V. Neville-Neil 		hash *= 5;
1201d2baefcSGeorge V. Neville-Neil 		hash += 0xe6546b64;
1211d2baefcSGeorge V. Neville-Neil 	}
1221d2baefcSGeorge V. Neville-Neil 
1231d2baefcSGeorge V. Neville-Neil 	/* finalize */
124*99e9de87SDag-Erling Smørgrav 	hash ^= (uint32_t)count;
1251d2baefcSGeorge V. Neville-Neil 	hash ^= hash >> 16;
1261d2baefcSGeorge V. Neville-Neil 	hash *= 0x85ebca6b;
1271d2baefcSGeorge V. Neville-Neil 	hash ^= hash >> 13;
1281d2baefcSGeorge V. Neville-Neil 	hash *= 0xc2b2ae35;
1291d2baefcSGeorge V. Neville-Neil 	hash ^= hash >> 16;
1301d2baefcSGeorge V. Neville-Neil 	return (hash);
1311d2baefcSGeorge V. Neville-Neil }
1321d2baefcSGeorge V. Neville-Neil 
133