xref: /freebsd/sys/libkern/murmur3_32.c (revision 2008043f386721d58158e37e0d7e50df8095942d)
1 /*-
2  * Copyright (c) 2014 Dag-Erling Smørgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/hash.h>
28 #include <sys/endian.h>
29 #include <sys/stdint.h>
30 #include <sys/types.h>
31 
32 #define rol32(i32, n) ((i32) << (n) | (i32) >> (32 - (n)))
33 
34 /*
35  * Simple implementation of the Murmur3-32 hash function.
36  *
37  * This implementation is slow but safe.  It can be made significantly
38  * faster if the caller guarantees that the input is correctly aligned for
39  * 32-bit reads, and slightly faster yet if the caller guarantees that the
40  * length of the input is always a multiple of 4 bytes.
41  */
42 uint32_t
43 murmur3_32_hash(const void *data, size_t len, uint32_t seed)
44 {
45 	const uint8_t *bytes;
46 	uint32_t hash, k;
47 	size_t res;
48 
49 	/* initialization */
50 	bytes = data;
51 	res = len;
52 	hash = seed;
53 
54 	/* main loop */
55 	while (res >= 4) {
56 		/* replace with le32toh() if input is aligned */
57 		k = le32dec(bytes);
58 		bytes += 4;
59 		res -= 4;
60 		k *= 0xcc9e2d51;
61 		k = rol32(k, 15);
62 		k *= 0x1b873593;
63 		hash ^= k;
64 		hash = rol32(hash, 13);
65 		hash *= 5;
66 		hash += 0xe6546b64;
67 	}
68 
69 	/* remainder */
70 	/* remove if input length is a multiple of 4 */
71 	if (res > 0) {
72 		k = 0;
73 		switch (res) {
74 		case 3:
75 			k |= bytes[2] << 16;
76 		case 2:
77 			k |= bytes[1] << 8;
78 		case 1:
79 			k |= bytes[0];
80 			k *= 0xcc9e2d51;
81 			k = rol32(k, 15);
82 			k *= 0x1b873593;
83 			hash ^= k;
84 			break;
85 		}
86 	}
87 
88 	/* finalize */
89 	hash ^= (uint32_t)len;
90 	hash ^= hash >> 16;
91 	hash *= 0x85ebca6b;
92 	hash ^= hash >> 13;
93 	hash *= 0xc2b2ae35;
94 	hash ^= hash >> 16;
95 	return (hash);
96 }
97 
98 /*
99  * Simplified version of the above optimized for aligned sequences of
100  * 32-bit words.  The count argument is the number of words, not the
101  * length in bytes.
102  */
103 uint32_t
104 murmur3_32_hash32(const uint32_t *data, size_t count, uint32_t seed)
105 {
106 	uint32_t hash, k;
107 	size_t res;
108 
109 	/* iterate */
110 	for (res = count, hash = seed; res > 0; res--, data++) {
111 		k = le32toh(*data);
112 		k *= 0xcc9e2d51;
113 		k = rol32(k, 15);
114 		k *= 0x1b873593;
115 		hash ^= k;
116 		hash = rol32(hash, 13);
117 		hash *= 5;
118 		hash += 0xe6546b64;
119 	}
120 
121 	/* finalize */
122 	hash ^= (uint32_t)count;
123 	hash ^= hash >> 16;
124 	hash *= 0x85ebca6b;
125 	hash ^= hash >> 13;
126 	hash *= 0xc2b2ae35;
127 	hash ^= hash >> 16;
128 	return (hash);
129 }
130