xref: /freebsd/sys/libkern/murmur3_32.c (revision 95ee2897e98f5d444f26ed2334cc7c439f9c16c6)
11d2baefcSGeorge V. Neville-Neil /*-
21d2baefcSGeorge V. Neville-Neil  * Copyright (c) 2014 Dag-Erling Smørgrav
31d2baefcSGeorge V. Neville-Neil  * All rights reserved.
41d2baefcSGeorge V. Neville-Neil  *
51d2baefcSGeorge V. Neville-Neil  * Redistribution and use in source and binary forms, with or without
61d2baefcSGeorge V. Neville-Neil  * modification, are permitted provided that the following conditions
71d2baefcSGeorge V. Neville-Neil  * are met:
81d2baefcSGeorge V. Neville-Neil  * 1. Redistributions of source code must retain the above copyright
91d2baefcSGeorge V. Neville-Neil  *    notice, this list of conditions and the following disclaimer.
101d2baefcSGeorge V. Neville-Neil  * 2. Redistributions in binary form must reproduce the above copyright
111d2baefcSGeorge V. Neville-Neil  *    notice, this list of conditions and the following disclaimer in the
121d2baefcSGeorge V. Neville-Neil  *    documentation and/or other materials provided with the distribution.
131d2baefcSGeorge V. Neville-Neil  *
141d2baefcSGeorge V. Neville-Neil  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
151d2baefcSGeorge V. Neville-Neil  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
161d2baefcSGeorge V. Neville-Neil  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
171d2baefcSGeorge V. Neville-Neil  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
181d2baefcSGeorge V. Neville-Neil  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
191d2baefcSGeorge V. Neville-Neil  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
201d2baefcSGeorge V. Neville-Neil  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
211d2baefcSGeorge V. Neville-Neil  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
221d2baefcSGeorge V. Neville-Neil  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
231d2baefcSGeorge V. Neville-Neil  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
241d2baefcSGeorge V. Neville-Neil  * SUCH DAMAGE.
251d2baefcSGeorge V. Neville-Neil  */
261d2baefcSGeorge V. Neville-Neil 
271d2baefcSGeorge V. Neville-Neil #include <sys/hash.h>
281d2baefcSGeorge V. Neville-Neil #include <sys/endian.h>
291d2baefcSGeorge V. Neville-Neil #include <sys/stdint.h>
301d2baefcSGeorge V. Neville-Neil #include <sys/types.h>
311d2baefcSGeorge V. Neville-Neil 
321d2baefcSGeorge V. Neville-Neil #define rol32(i32, n) ((i32) << (n) | (i32) >> (32 - (n)))
331d2baefcSGeorge V. Neville-Neil 
341d2baefcSGeorge V. Neville-Neil /*
35*99e9de87SDag-Erling Smørgrav  * Simple implementation of the Murmur3-32 hash function.
36*99e9de87SDag-Erling Smørgrav  *
37*99e9de87SDag-Erling Smørgrav  * This implementation is slow but safe.  It can be made significantly
38*99e9de87SDag-Erling Smørgrav  * faster if the caller guarantees that the input is correctly aligned for
39*99e9de87SDag-Erling Smørgrav  * 32-bit reads, and slightly faster yet if the caller guarantees that the
40*99e9de87SDag-Erling Smørgrav  * length of the input is always a multiple of 4 bytes.
411d2baefcSGeorge V. Neville-Neil  */
421d2baefcSGeorge V. Neville-Neil uint32_t
murmur3_32_hash(const void * data,size_t len,uint32_t seed)43*99e9de87SDag-Erling Smørgrav murmur3_32_hash(const void *data, size_t len, uint32_t seed)
441d2baefcSGeorge V. Neville-Neil {
45*99e9de87SDag-Erling Smørgrav 	const uint8_t *bytes;
461d2baefcSGeorge V. Neville-Neil 	uint32_t hash, k;
471d2baefcSGeorge V. Neville-Neil 	size_t res;
481d2baefcSGeorge V. Neville-Neil 
49*99e9de87SDag-Erling Smørgrav 	/* initialization */
50*99e9de87SDag-Erling Smørgrav 	bytes = data;
511d2baefcSGeorge V. Neville-Neil 	res = len;
521d2baefcSGeorge V. Neville-Neil 	hash = seed;
531d2baefcSGeorge V. Neville-Neil 
54*99e9de87SDag-Erling Smørgrav 	/* main loop */
55*99e9de87SDag-Erling Smørgrav 	while (res >= 4) {
56*99e9de87SDag-Erling Smørgrav 		/* replace with le32toh() if input is aligned */
57*99e9de87SDag-Erling Smørgrav 		k = le32dec(bytes);
58*99e9de87SDag-Erling Smørgrav 		bytes += 4;
59*99e9de87SDag-Erling Smørgrav 		res -= 4;
60*99e9de87SDag-Erling Smørgrav 		k *= 0xcc9e2d51;
61*99e9de87SDag-Erling Smørgrav 		k = rol32(k, 15);
62*99e9de87SDag-Erling Smørgrav 		k *= 0x1b873593;
63*99e9de87SDag-Erling Smørgrav 		hash ^= k;
64*99e9de87SDag-Erling Smørgrav 		hash = rol32(hash, 13);
65*99e9de87SDag-Erling Smørgrav 		hash *= 5;
66*99e9de87SDag-Erling Smørgrav 		hash += 0xe6546b64;
67*99e9de87SDag-Erling Smørgrav 	}
68*99e9de87SDag-Erling Smørgrav 
69*99e9de87SDag-Erling Smørgrav 	/* remainder */
70*99e9de87SDag-Erling Smørgrav 	/* remove if input length is a multiple of 4 */
71*99e9de87SDag-Erling Smørgrav 	if (res > 0) {
72*99e9de87SDag-Erling Smørgrav 		k = 0;
73*99e9de87SDag-Erling Smørgrav 		switch (res) {
74*99e9de87SDag-Erling Smørgrav 		case 3:
75*99e9de87SDag-Erling Smørgrav 			k |= bytes[2] << 16;
76*99e9de87SDag-Erling Smørgrav 		case 2:
77*99e9de87SDag-Erling Smørgrav 			k |= bytes[1] << 8;
78*99e9de87SDag-Erling Smørgrav 		case 1:
79*99e9de87SDag-Erling Smørgrav 			k |= bytes[0];
80*99e9de87SDag-Erling Smørgrav 			k *= 0xcc9e2d51;
81*99e9de87SDag-Erling Smørgrav 			k = rol32(k, 15);
82*99e9de87SDag-Erling Smørgrav 			k *= 0x1b873593;
83*99e9de87SDag-Erling Smørgrav 			hash ^= k;
84*99e9de87SDag-Erling Smørgrav 			break;
85*99e9de87SDag-Erling Smørgrav 		}
86*99e9de87SDag-Erling Smørgrav 	}
87*99e9de87SDag-Erling Smørgrav 
88*99e9de87SDag-Erling Smørgrav 	/* finalize */
89*99e9de87SDag-Erling Smørgrav 	hash ^= (uint32_t)len;
90*99e9de87SDag-Erling Smørgrav 	hash ^= hash >> 16;
91*99e9de87SDag-Erling Smørgrav 	hash *= 0x85ebca6b;
92*99e9de87SDag-Erling Smørgrav 	hash ^= hash >> 13;
93*99e9de87SDag-Erling Smørgrav 	hash *= 0xc2b2ae35;
94*99e9de87SDag-Erling Smørgrav 	hash ^= hash >> 16;
95*99e9de87SDag-Erling Smørgrav 	return (hash);
96*99e9de87SDag-Erling Smørgrav }
97*99e9de87SDag-Erling Smørgrav 
98*99e9de87SDag-Erling Smørgrav /*
99*99e9de87SDag-Erling Smørgrav  * Simplified version of the above optimized for aligned sequences of
100*99e9de87SDag-Erling Smørgrav  * 32-bit words.  The count argument is the number of words, not the
101*99e9de87SDag-Erling Smørgrav  * length in bytes.
102*99e9de87SDag-Erling Smørgrav  */
103*99e9de87SDag-Erling Smørgrav uint32_t
murmur3_32_hash32(const uint32_t * data,size_t count,uint32_t seed)104*99e9de87SDag-Erling Smørgrav murmur3_32_hash32(const uint32_t *data, size_t count, uint32_t seed)
105*99e9de87SDag-Erling Smørgrav {
106*99e9de87SDag-Erling Smørgrav 	uint32_t hash, k;
107*99e9de87SDag-Erling Smørgrav 	size_t res;
108*99e9de87SDag-Erling Smørgrav 
1091d2baefcSGeorge V. Neville-Neil 	/* iterate */
110*99e9de87SDag-Erling Smørgrav 	for (res = count, hash = seed; res > 0; res--, data++) {
111*99e9de87SDag-Erling Smørgrav 		k = le32toh(*data);
1121d2baefcSGeorge V. Neville-Neil 		k *= 0xcc9e2d51;
1131d2baefcSGeorge V. Neville-Neil 		k = rol32(k, 15);
1141d2baefcSGeorge V. Neville-Neil 		k *= 0x1b873593;
1151d2baefcSGeorge V. Neville-Neil 		hash ^= k;
1161d2baefcSGeorge V. Neville-Neil 		hash = rol32(hash, 13);
1171d2baefcSGeorge V. Neville-Neil 		hash *= 5;
1181d2baefcSGeorge V. Neville-Neil 		hash += 0xe6546b64;
1191d2baefcSGeorge V. Neville-Neil 	}
1201d2baefcSGeorge V. Neville-Neil 
1211d2baefcSGeorge V. Neville-Neil 	/* finalize */
122*99e9de87SDag-Erling Smørgrav 	hash ^= (uint32_t)count;
1231d2baefcSGeorge V. Neville-Neil 	hash ^= hash >> 16;
1241d2baefcSGeorge V. Neville-Neil 	hash *= 0x85ebca6b;
1251d2baefcSGeorge V. Neville-Neil 	hash ^= hash >> 13;
1261d2baefcSGeorge V. Neville-Neil 	hash *= 0xc2b2ae35;
1271d2baefcSGeorge V. Neville-Neil 	hash ^= hash >> 16;
1281d2baefcSGeorge V. Neville-Neil 	return (hash);
1291d2baefcSGeorge V. Neville-Neil }
130