xref: /freebsd/sys/libkern/murmur3_32.c (revision 7a0c41d5d7d4e9770ef6f5d56f893efc8f18ab7c)
1 /*-
2  * Copyright (c) 2014 Dag-Erling Smørgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/hash.h>
30 #include <sys/endian.h>
31 #include <sys/stdint.h>
32 #include <sys/types.h>
33 
34 #define rol32(i32, n) ((i32) << (n) | (i32) >> (32 - (n)))
35 
36 /*
37  * Simple implementation of the Murmur3-32 hash function.
38  *
39  * This implementation is slow but safe.  It can be made significantly
40  * faster if the caller guarantees that the input is correctly aligned for
41  * 32-bit reads, and slightly faster yet if the caller guarantees that the
42  * length of the input is always a multiple of 4 bytes.
43  */
44 uint32_t
45 murmur3_32_hash(const void *data, size_t len, uint32_t seed)
46 {
47 	const uint8_t *bytes;
48 	uint32_t hash, k;
49 	size_t res;
50 
51 	/* initialization */
52 	bytes = data;
53 	res = len;
54 	hash = seed;
55 
56 	/* main loop */
57 	while (res >= 4) {
58 		/* replace with le32toh() if input is aligned */
59 		k = le32dec(bytes);
60 		bytes += 4;
61 		res -= 4;
62 		k *= 0xcc9e2d51;
63 		k = rol32(k, 15);
64 		k *= 0x1b873593;
65 		hash ^= k;
66 		hash = rol32(hash, 13);
67 		hash *= 5;
68 		hash += 0xe6546b64;
69 	}
70 
71 	/* remainder */
72 	/* remove if input length is a multiple of 4 */
73 	if (res > 0) {
74 		k = 0;
75 		switch (res) {
76 		case 3:
77 			k |= bytes[2] << 16;
78 		case 2:
79 			k |= bytes[1] << 8;
80 		case 1:
81 			k |= bytes[0];
82 			k *= 0xcc9e2d51;
83 			k = rol32(k, 15);
84 			k *= 0x1b873593;
85 			hash ^= k;
86 			break;
87 		}
88 	}
89 
90 	/* finalize */
91 	hash ^= (uint32_t)len;
92 	hash ^= hash >> 16;
93 	hash *= 0x85ebca6b;
94 	hash ^= hash >> 13;
95 	hash *= 0xc2b2ae35;
96 	hash ^= hash >> 16;
97 	return (hash);
98 }
99 
100 /*
101  * Simplified version of the above optimized for aligned sequences of
102  * 32-bit words.  The count argument is the number of words, not the
103  * length in bytes.
104  */
105 uint32_t
106 murmur3_32_hash32(const uint32_t *data, size_t count, uint32_t seed)
107 {
108 	uint32_t hash, k;
109 	size_t res;
110 
111 	/* iterate */
112 	for (res = count, hash = seed; res > 0; res--, data++) {
113 		k = le32toh(*data);
114 		k *= 0xcc9e2d51;
115 		k = rol32(k, 15);
116 		k *= 0x1b873593;
117 		hash ^= k;
118 		hash = rol32(hash, 13);
119 		hash *= 5;
120 		hash += 0xe6546b64;
121 	}
122 
123 	/* finalize */
124 	hash ^= (uint32_t)count;
125 	hash ^= hash >> 16;
126 	hash *= 0x85ebca6b;
127 	hash ^= hash >> 13;
128 	hash *= 0xc2b2ae35;
129 	hash ^= hash >> 16;
130 	return (hash);
131 }
132 
133