xref: /freebsd/sys/libkern/jenkins_hash.c (revision 0e8011faf58b743cc652e3b2ad0f7671227610df)
1 /*
2  * Taken from http://burtleburtle.net/bob/c/lookup3.c
3  */
4 
5 #include <sys/hash.h>
6 #include <machine/endian.h>
7 
8 /*
9 -------------------------------------------------------------------------------
10 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
11 
12 These are functions for producing 32-bit hashes for hash table lookup.
13 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
14 are externally useful functions.  Routines to test the hash are included
15 if SELF_TEST is defined.  You can use this free for any purpose.  It's in
16 the public domain.  It has no warranty.
17 
18 You probably want to use hashlittle().  hashlittle() and hashbig()
19 hash byte arrays.  hashlittle() is faster than hashbig() on
20 little-endian machines.  Intel and AMD are little-endian machines.
21 On second thought, you probably want hashlittle2(), which is identical to
22 hashlittle() except it returns two 32-bit hashes for the price of one.
23 You could implement hashbig2() if you wanted but I haven't bothered here.
24 
25 If you want to find a hash of, say, exactly 7 integers, do
26   a = i1;  b = i2;  c = i3;
27   mix(a,b,c);
28   a += i4; b += i5; c += i6;
29   mix(a,b,c);
30   a += i7;
31   final(a,b,c);
32 then use c as the hash value.  If you have a variable length array of
33 4-byte integers to hash, use hashword().  If you have a byte array (like
34 a character string), use hashlittle().  If you have several byte arrays, or
35 a mix of things, see the comments above hashlittle().
36 
37 Why is this so big?  I read 12 bytes at a time into 3 4-byte integers,
38 then mix those integers.  This is fast (you can do a lot more thorough
39 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
40 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
41 -------------------------------------------------------------------------------
42 */
43 
44 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
45 
46 /*
47 -------------------------------------------------------------------------------
48 mix -- mix 3 32-bit values reversibly.
49 
50 This is reversible, so any information in (a,b,c) before mix() is
51 still in (a,b,c) after mix().
52 
53 If four pairs of (a,b,c) inputs are run through mix(), or through
54 mix() in reverse, there are at least 32 bits of the output that
55 are sometimes the same for one pair and different for another pair.
56 This was tested for:
57 * pairs that differed by one bit, by two bits, in any combination
58   of top bits of (a,b,c), or in any combination of bottom bits of
59   (a,b,c).
60 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
61   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
62   is commonly produced by subtraction) look like a single 1-bit
63   difference.
64 * the base values were pseudorandom, all zero but one bit set, or
65   all zero plus a counter that starts at zero.
66 
67 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
68 satisfy this are
69     4  6  8 16 19  4
70     9 15  3 18 27 15
71    14  9  3  7 17  3
72 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
73 for "differ" defined as + with a one-bit base and a two-bit delta.  I
74 used http://burtleburtle.net/bob/hash/avalanche.html to choose
75 the operations, constants, and arrangements of the variables.
76 
77 This does not achieve avalanche.  There are input bits of (a,b,c)
78 that fail to affect some output bits of (a,b,c), especially of a.  The
79 most thoroughly mixed value is c, but it doesn't really even achieve
80 avalanche in c.
81 
82 This allows some parallelism.  Read-after-writes are good at doubling
83 the number of bits affected, so the goal of mixing pulls in the opposite
84 direction as the goal of parallelism.  I did what I could.  Rotates
85 seem to cost as much as shifts on every machine I could lay my hands
86 on, and rotates are much kinder to the top and bottom bits, so I used
87 rotates.
88 -------------------------------------------------------------------------------
89 */
90 #define mix(a,b,c) \
91 { \
92   a -= c;  a ^= rot(c, 4);  c += b; \
93   b -= a;  b ^= rot(a, 6);  a += c; \
94   c -= b;  c ^= rot(b, 8);  b += a; \
95   a -= c;  a ^= rot(c,16);  c += b; \
96   b -= a;  b ^= rot(a,19);  a += c; \
97   c -= b;  c ^= rot(b, 4);  b += a; \
98 }
99 
100 /*
101 -------------------------------------------------------------------------------
102 final -- final mixing of 3 32-bit values (a,b,c) into c
103 
104 Pairs of (a,b,c) values differing in only a few bits will usually
105 produce values of c that look totally different.  This was tested for
106 * pairs that differed by one bit, by two bits, in any combination
107   of top bits of (a,b,c), or in any combination of bottom bits of
108   (a,b,c).
109 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
110   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
111   is commonly produced by subtraction) look like a single 1-bit
112   difference.
113 * the base values were pseudorandom, all zero but one bit set, or
114   all zero plus a counter that starts at zero.
115 
116 These constants passed:
117  14 11 25 16 4 14 24
118  12 14 25 16 4 14 24
119 and these came close:
120   4  8 15 26 3 22 24
121  10  8 15 26 3 22 24
122  11  8 15 26 3 22 24
123 -------------------------------------------------------------------------------
124 */
125 #define final(a,b,c) \
126 { \
127   c ^= b; c -= rot(b,14); \
128   a ^= c; a -= rot(c,11); \
129   b ^= a; b -= rot(a,25); \
130   c ^= b; c -= rot(b,16); \
131   a ^= c; a -= rot(c,4);  \
132   b ^= a; b -= rot(a,14); \
133   c ^= b; c -= rot(b,24); \
134 }
135 
136 /*
137 --------------------------------------------------------------------
138  This works on all machines.  To be useful, it requires
139  -- that the key be an array of uint32_t's, and
140  -- that the length be the number of uint32_t's in the key
141 
142  The function hashword() is identical to hashlittle() on little-endian
143  machines, and identical to hashbig() on big-endian machines,
144  except that the length has to be measured in uint32_ts rather than in
145  bytes.  hashlittle() is more complicated than hashword() only because
146  hashlittle() has to dance around fitting the key bytes into registers.
147 --------------------------------------------------------------------
148 */
149 uint32_t jenkins_hash32(
150 const uint32_t *k,                   /* the key, an array of uint32_t values */
151 size_t          length,               /* the length of the key, in uint32_ts */
152 uint32_t        initval)         /* the previous hash, or an arbitrary value */
153 {
154   uint32_t a,b,c;
155 
156   /* Set up the internal state */
157   a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
158 
159   /*------------------------------------------------- handle most of the key */
160   while (length > 3)
161   {
162     a += k[0];
163     b += k[1];
164     c += k[2];
165     mix(a,b,c);
166     length -= 3;
167     k += 3;
168   }
169 
170   /*------------------------------------------- handle the last 3 uint32_t's */
171   switch(length)                     /* all the case statements fall through */
172   {
173   case 3 : c+=k[2];
174   case 2 : b+=k[1];
175   case 1 : a+=k[0];
176     final(a,b,c);
177   case 0:     /* case 0: nothing left to add */
178     break;
179   }
180   /*------------------------------------------------------ report the result */
181   return c;
182 }
183 
184 #if BYTE_ORDER == LITTLE_ENDIAN
185 /*
186 -------------------------------------------------------------------------------
187 hashlittle() -- hash a variable-length key into a 32-bit value
188   k       : the key (the unaligned variable-length array of bytes)
189   length  : the length of the key, counting by bytes
190   initval : can be any 4-byte value
191 Returns a 32-bit value.  Every bit of the key affects every bit of
192 the return value.  Two keys differing by one or two bits will have
193 totally different hash values.
194 
195 The best hash table sizes are powers of 2.  There is no need to do
196 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
197 use a bitmask.  For example, if you need only 10 bits, do
198   h = (h & hashmask(10));
199 In which case, the hash table should have hashsize(10) elements.
200 
201 If you are hashing n strings (uint8_t **)k, do it like this:
202   for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
203 
204 By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
205 code any way you wish, private, educational, or commercial.  It's free.
206 
207 Use for hash table lookup, or anything where one collision in 2^^32 is
208 acceptable.  Do NOT use for cryptographic purposes.
209 -------------------------------------------------------------------------------
210 */
211 
212 uint32_t jenkins_hash( const void *key, size_t length, uint32_t initval)
213 {
214   uint32_t a,b,c;                                          /* internal state */
215   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
216 
217   /* Set up the internal state */
218   a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
219 
220   u.ptr = key;
221   if ((u.i & 0x3) == 0) {
222     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
223 
224     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
225     while (length > 12)
226     {
227       a += k[0];
228       b += k[1];
229       c += k[2];
230       mix(a,b,c);
231       length -= 12;
232       k += 3;
233     }
234 
235     /*----------------------------- handle the last (probably partial) block */
236     /*
237      * "k[2]&0xffffff" actually reads beyond the end of the string, but
238      * then masks off the part it's not allowed to read.  Because the
239      * string is aligned, the masked-off tail is in the same word as the
240      * rest of the string.  Every machine with memory protection I've seen
241      * does it on word boundaries, so is OK with this.  But VALGRIND will
242      * still catch it and complain.  The masking trick does make the hash
243      * noticeably faster for short strings (like English words).
244      */
245 
246     switch(length)
247     {
248     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
249     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
250     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
251     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
252     case 8 : b+=k[1]; a+=k[0]; break;
253     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
254     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
255     case 5 : b+=k[1]&0xff; a+=k[0]; break;
256     case 4 : a+=k[0]; break;
257     case 3 : a+=k[0]&0xffffff; break;
258     case 2 : a+=k[0]&0xffff; break;
259     case 1 : a+=k[0]&0xff; break;
260     case 0 : return c;              /* zero length strings require no mixing */
261     }
262 
263   } else if ((u.i & 0x1) == 0) {
264     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
265     const uint8_t  *k8;
266 
267     /*--------------- all but last block: aligned reads and different mixing */
268     while (length > 12)
269     {
270       a += k[0] + (((uint32_t)k[1])<<16);
271       b += k[2] + (((uint32_t)k[3])<<16);
272       c += k[4] + (((uint32_t)k[5])<<16);
273       mix(a,b,c);
274       length -= 12;
275       k += 6;
276     }
277 
278     /*----------------------------- handle the last (probably partial) block */
279     k8 = (const uint8_t *)k;
280     switch(length)
281     {
282     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
283              b+=k[2]+(((uint32_t)k[3])<<16);
284              a+=k[0]+(((uint32_t)k[1])<<16);
285              break;
286     case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
287     case 10: c+=k[4];
288              b+=k[2]+(((uint32_t)k[3])<<16);
289              a+=k[0]+(((uint32_t)k[1])<<16);
290              break;
291     case 9 : c+=k8[8];                      /* fall through */
292     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
293              a+=k[0]+(((uint32_t)k[1])<<16);
294              break;
295     case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
296     case 6 : b+=k[2];
297              a+=k[0]+(((uint32_t)k[1])<<16);
298              break;
299     case 5 : b+=k8[4];                      /* fall through */
300     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
301              break;
302     case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
303     case 2 : a+=k[0];
304              break;
305     case 1 : a+=k8[0];
306              break;
307     case 0 : return c;                     /* zero length requires no mixing */
308     }
309 
310   } else {                        /* need to read the key one byte at a time */
311     const uint8_t *k = (const uint8_t *)key;
312 
313     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
314     while (length > 12)
315     {
316       a += k[0];
317       a += ((uint32_t)k[1])<<8;
318       a += ((uint32_t)k[2])<<16;
319       a += ((uint32_t)k[3])<<24;
320       b += k[4];
321       b += ((uint32_t)k[5])<<8;
322       b += ((uint32_t)k[6])<<16;
323       b += ((uint32_t)k[7])<<24;
324       c += k[8];
325       c += ((uint32_t)k[9])<<8;
326       c += ((uint32_t)k[10])<<16;
327       c += ((uint32_t)k[11])<<24;
328       mix(a,b,c);
329       length -= 12;
330       k += 12;
331     }
332 
333     /*-------------------------------- last block: affect all 32 bits of (c) */
334     switch(length)                   /* all the case statements fall through */
335     {
336     case 12: c+=((uint32_t)k[11])<<24;
337     case 11: c+=((uint32_t)k[10])<<16;
338     case 10: c+=((uint32_t)k[9])<<8;
339     case 9 : c+=k[8];
340     case 8 : b+=((uint32_t)k[7])<<24;
341     case 7 : b+=((uint32_t)k[6])<<16;
342     case 6 : b+=((uint32_t)k[5])<<8;
343     case 5 : b+=k[4];
344     case 4 : a+=((uint32_t)k[3])<<24;
345     case 3 : a+=((uint32_t)k[2])<<16;
346     case 2 : a+=((uint32_t)k[1])<<8;
347     case 1 : a+=k[0];
348              break;
349     case 0 : return c;
350     }
351   }
352 
353   final(a,b,c);
354   return c;
355 }
356 
357 #else /* !(BYTE_ORDER == LITTLE_ENDIAN) */
358 
359 /*
360  * hashbig():
361  * This is the same as hashword() on big-endian machines.  It is different
362  * from hashlittle() on all machines.  hashbig() takes advantage of
363  * big-endian byte ordering.
364  */
365 uint32_t jenkins_hash( const void *key, size_t length, uint32_t initval)
366 {
367   uint32_t a,b,c;
368   union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
369 
370   /* Set up the internal state */
371   a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
372 
373   u.ptr = key;
374   if ((u.i & 0x3) == 0) {
375     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
376 
377     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
378     while (length > 12)
379     {
380       a += k[0];
381       b += k[1];
382       c += k[2];
383       mix(a,b,c);
384       length -= 12;
385       k += 3;
386     }
387 
388     /*----------------------------- handle the last (probably partial) block */
389     /*
390      * "k[2]<<8" actually reads beyond the end of the string, but
391      * then shifts out the part it's not allowed to read.  Because the
392      * string is aligned, the illegal read is in the same word as the
393      * rest of the string.  Every machine with memory protection I've seen
394      * does it on word boundaries, so is OK with this.  But VALGRIND will
395      * still catch it and complain.  The masking trick does make the hash
396      * noticeably faster for short strings (like English words).
397      */
398 
399     switch(length)
400     {
401     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
402     case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
403     case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
404     case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
405     case 8 : b+=k[1]; a+=k[0]; break;
406     case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
407     case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
408     case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
409     case 4 : a+=k[0]; break;
410     case 3 : a+=k[0]&0xffffff00; break;
411     case 2 : a+=k[0]&0xffff0000; break;
412     case 1 : a+=k[0]&0xff000000; break;
413     case 0 : return c;              /* zero length strings require no mixing */
414     }
415 
416   } else {                        /* need to read the key one byte at a time */
417     const uint8_t *k = (const uint8_t *)key;
418 
419     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
420     while (length > 12)
421     {
422       a += ((uint32_t)k[0])<<24;
423       a += ((uint32_t)k[1])<<16;
424       a += ((uint32_t)k[2])<<8;
425       a += ((uint32_t)k[3]);
426       b += ((uint32_t)k[4])<<24;
427       b += ((uint32_t)k[5])<<16;
428       b += ((uint32_t)k[6])<<8;
429       b += ((uint32_t)k[7]);
430       c += ((uint32_t)k[8])<<24;
431       c += ((uint32_t)k[9])<<16;
432       c += ((uint32_t)k[10])<<8;
433       c += ((uint32_t)k[11]);
434       mix(a,b,c);
435       length -= 12;
436       k += 12;
437     }
438 
439     /*-------------------------------- last block: affect all 32 bits of (c) */
440     switch(length)                   /* all the case statements fall through */
441     {
442     case 12: c+=k[11];
443     case 11: c+=((uint32_t)k[10])<<8;
444     case 10: c+=((uint32_t)k[9])<<16;
445     case 9 : c+=((uint32_t)k[8])<<24;
446     case 8 : b+=k[7];
447     case 7 : b+=((uint32_t)k[6])<<8;
448     case 6 : b+=((uint32_t)k[5])<<16;
449     case 5 : b+=((uint32_t)k[4])<<24;
450     case 4 : a+=k[3];
451     case 3 : a+=((uint32_t)k[2])<<8;
452     case 2 : a+=((uint32_t)k[1])<<16;
453     case 1 : a+=((uint32_t)k[0])<<24;
454              break;
455     case 0 : return c;
456     }
457   }
458 
459   final(a,b,c);
460   return c;
461 }
462 #endif
463