162208ca5SGleb Smirnoff /*
262208ca5SGleb Smirnoff * Taken from http://burtleburtle.net/bob/c/lookup3.c
362208ca5SGleb Smirnoff */
462208ca5SGleb Smirnoff
562208ca5SGleb Smirnoff #include <sys/hash.h>
662208ca5SGleb Smirnoff #include <machine/endian.h>
762208ca5SGleb Smirnoff
862208ca5SGleb Smirnoff /*
962208ca5SGleb Smirnoff -------------------------------------------------------------------------------
1062208ca5SGleb Smirnoff lookup3.c, by Bob Jenkins, May 2006, Public Domain.
1162208ca5SGleb Smirnoff
1262208ca5SGleb Smirnoff These are functions for producing 32-bit hashes for hash table lookup.
1362208ca5SGleb Smirnoff hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
1462208ca5SGleb Smirnoff are externally useful functions. Routines to test the hash are included
1562208ca5SGleb Smirnoff if SELF_TEST is defined. You can use this free for any purpose. It's in
1662208ca5SGleb Smirnoff the public domain. It has no warranty.
1762208ca5SGleb Smirnoff
1862208ca5SGleb Smirnoff You probably want to use hashlittle(). hashlittle() and hashbig()
190af1b472SEitan Adler hash byte arrays. hashlittle() is faster than hashbig() on
2062208ca5SGleb Smirnoff little-endian machines. Intel and AMD are little-endian machines.
2162208ca5SGleb Smirnoff On second thought, you probably want hashlittle2(), which is identical to
2262208ca5SGleb Smirnoff hashlittle() except it returns two 32-bit hashes for the price of one.
2362208ca5SGleb Smirnoff You could implement hashbig2() if you wanted but I haven't bothered here.
2462208ca5SGleb Smirnoff
2562208ca5SGleb Smirnoff If you want to find a hash of, say, exactly 7 integers, do
2662208ca5SGleb Smirnoff a = i1; b = i2; c = i3;
2762208ca5SGleb Smirnoff mix(a,b,c);
2862208ca5SGleb Smirnoff a += i4; b += i5; c += i6;
2962208ca5SGleb Smirnoff mix(a,b,c);
3062208ca5SGleb Smirnoff a += i7;
3162208ca5SGleb Smirnoff final(a,b,c);
3262208ca5SGleb Smirnoff then use c as the hash value. If you have a variable length array of
3362208ca5SGleb Smirnoff 4-byte integers to hash, use hashword(). If you have a byte array (like
3462208ca5SGleb Smirnoff a character string), use hashlittle(). If you have several byte arrays, or
3562208ca5SGleb Smirnoff a mix of things, see the comments above hashlittle().
3662208ca5SGleb Smirnoff
3762208ca5SGleb Smirnoff Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
3862208ca5SGleb Smirnoff then mix those integers. This is fast (you can do a lot more thorough
3962208ca5SGleb Smirnoff mixing with 12*3 instructions on 3 integers than you can with 3 instructions
4062208ca5SGleb Smirnoff on 1 byte), but shoehorning those bytes into integers efficiently is messy.
4162208ca5SGleb Smirnoff -------------------------------------------------------------------------------
4262208ca5SGleb Smirnoff */
4362208ca5SGleb Smirnoff
4462208ca5SGleb Smirnoff #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
4562208ca5SGleb Smirnoff
4662208ca5SGleb Smirnoff /*
4762208ca5SGleb Smirnoff -------------------------------------------------------------------------------
4862208ca5SGleb Smirnoff mix -- mix 3 32-bit values reversibly.
4962208ca5SGleb Smirnoff
5062208ca5SGleb Smirnoff This is reversible, so any information in (a,b,c) before mix() is
5162208ca5SGleb Smirnoff still in (a,b,c) after mix().
5262208ca5SGleb Smirnoff
5362208ca5SGleb Smirnoff If four pairs of (a,b,c) inputs are run through mix(), or through
5462208ca5SGleb Smirnoff mix() in reverse, there are at least 32 bits of the output that
5562208ca5SGleb Smirnoff are sometimes the same for one pair and different for another pair.
5662208ca5SGleb Smirnoff This was tested for:
5762208ca5SGleb Smirnoff * pairs that differed by one bit, by two bits, in any combination
5862208ca5SGleb Smirnoff of top bits of (a,b,c), or in any combination of bottom bits of
5962208ca5SGleb Smirnoff (a,b,c).
6062208ca5SGleb Smirnoff * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
6162208ca5SGleb Smirnoff the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
6262208ca5SGleb Smirnoff is commonly produced by subtraction) look like a single 1-bit
6362208ca5SGleb Smirnoff difference.
6462208ca5SGleb Smirnoff * the base values were pseudorandom, all zero but one bit set, or
6562208ca5SGleb Smirnoff all zero plus a counter that starts at zero.
6662208ca5SGleb Smirnoff
6762208ca5SGleb Smirnoff Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
6862208ca5SGleb Smirnoff satisfy this are
6962208ca5SGleb Smirnoff 4 6 8 16 19 4
7062208ca5SGleb Smirnoff 9 15 3 18 27 15
7162208ca5SGleb Smirnoff 14 9 3 7 17 3
7262208ca5SGleb Smirnoff Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
7362208ca5SGleb Smirnoff for "differ" defined as + with a one-bit base and a two-bit delta. I
7462208ca5SGleb Smirnoff used http://burtleburtle.net/bob/hash/avalanche.html to choose
7562208ca5SGleb Smirnoff the operations, constants, and arrangements of the variables.
7662208ca5SGleb Smirnoff
7762208ca5SGleb Smirnoff This does not achieve avalanche. There are input bits of (a,b,c)
7862208ca5SGleb Smirnoff that fail to affect some output bits of (a,b,c), especially of a. The
7962208ca5SGleb Smirnoff most thoroughly mixed value is c, but it doesn't really even achieve
8062208ca5SGleb Smirnoff avalanche in c.
8162208ca5SGleb Smirnoff
8262208ca5SGleb Smirnoff This allows some parallelism. Read-after-writes are good at doubling
8362208ca5SGleb Smirnoff the number of bits affected, so the goal of mixing pulls in the opposite
8462208ca5SGleb Smirnoff direction as the goal of parallelism. I did what I could. Rotates
8562208ca5SGleb Smirnoff seem to cost as much as shifts on every machine I could lay my hands
8662208ca5SGleb Smirnoff on, and rotates are much kinder to the top and bottom bits, so I used
8762208ca5SGleb Smirnoff rotates.
8862208ca5SGleb Smirnoff -------------------------------------------------------------------------------
8962208ca5SGleb Smirnoff */
9062208ca5SGleb Smirnoff #define mix(a,b,c) \
9162208ca5SGleb Smirnoff { \
9262208ca5SGleb Smirnoff a -= c; a ^= rot(c, 4); c += b; \
9362208ca5SGleb Smirnoff b -= a; b ^= rot(a, 6); a += c; \
9462208ca5SGleb Smirnoff c -= b; c ^= rot(b, 8); b += a; \
9562208ca5SGleb Smirnoff a -= c; a ^= rot(c,16); c += b; \
9662208ca5SGleb Smirnoff b -= a; b ^= rot(a,19); a += c; \
9762208ca5SGleb Smirnoff c -= b; c ^= rot(b, 4); b += a; \
9862208ca5SGleb Smirnoff }
9962208ca5SGleb Smirnoff
10062208ca5SGleb Smirnoff /*
10162208ca5SGleb Smirnoff -------------------------------------------------------------------------------
10262208ca5SGleb Smirnoff final -- final mixing of 3 32-bit values (a,b,c) into c
10362208ca5SGleb Smirnoff
10462208ca5SGleb Smirnoff Pairs of (a,b,c) values differing in only a few bits will usually
10562208ca5SGleb Smirnoff produce values of c that look totally different. This was tested for
10662208ca5SGleb Smirnoff * pairs that differed by one bit, by two bits, in any combination
10762208ca5SGleb Smirnoff of top bits of (a,b,c), or in any combination of bottom bits of
10862208ca5SGleb Smirnoff (a,b,c).
10962208ca5SGleb Smirnoff * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
11062208ca5SGleb Smirnoff the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
11162208ca5SGleb Smirnoff is commonly produced by subtraction) look like a single 1-bit
11262208ca5SGleb Smirnoff difference.
11362208ca5SGleb Smirnoff * the base values were pseudorandom, all zero but one bit set, or
11462208ca5SGleb Smirnoff all zero plus a counter that starts at zero.
11562208ca5SGleb Smirnoff
11662208ca5SGleb Smirnoff These constants passed:
11762208ca5SGleb Smirnoff 14 11 25 16 4 14 24
11862208ca5SGleb Smirnoff 12 14 25 16 4 14 24
11962208ca5SGleb Smirnoff and these came close:
12062208ca5SGleb Smirnoff 4 8 15 26 3 22 24
12162208ca5SGleb Smirnoff 10 8 15 26 3 22 24
12262208ca5SGleb Smirnoff 11 8 15 26 3 22 24
12362208ca5SGleb Smirnoff -------------------------------------------------------------------------------
12462208ca5SGleb Smirnoff */
12562208ca5SGleb Smirnoff #define final(a,b,c) \
12662208ca5SGleb Smirnoff { \
12762208ca5SGleb Smirnoff c ^= b; c -= rot(b,14); \
12862208ca5SGleb Smirnoff a ^= c; a -= rot(c,11); \
12962208ca5SGleb Smirnoff b ^= a; b -= rot(a,25); \
13062208ca5SGleb Smirnoff c ^= b; c -= rot(b,16); \
13162208ca5SGleb Smirnoff a ^= c; a -= rot(c,4); \
13262208ca5SGleb Smirnoff b ^= a; b -= rot(a,14); \
13362208ca5SGleb Smirnoff c ^= b; c -= rot(b,24); \
13462208ca5SGleb Smirnoff }
13562208ca5SGleb Smirnoff
13662208ca5SGleb Smirnoff /*
13762208ca5SGleb Smirnoff --------------------------------------------------------------------
13862208ca5SGleb Smirnoff This works on all machines. To be useful, it requires
13962208ca5SGleb Smirnoff -- that the key be an array of uint32_t's, and
14062208ca5SGleb Smirnoff -- that the length be the number of uint32_t's in the key
14162208ca5SGleb Smirnoff
14262208ca5SGleb Smirnoff The function hashword() is identical to hashlittle() on little-endian
14362208ca5SGleb Smirnoff machines, and identical to hashbig() on big-endian machines,
14462208ca5SGleb Smirnoff except that the length has to be measured in uint32_ts rather than in
14562208ca5SGleb Smirnoff bytes. hashlittle() is more complicated than hashword() only because
14662208ca5SGleb Smirnoff hashlittle() has to dance around fitting the key bytes into registers.
14762208ca5SGleb Smirnoff --------------------------------------------------------------------
14862208ca5SGleb Smirnoff */
jenkins_hash32(const uint32_t * k,size_t length,uint32_t initval)14962208ca5SGleb Smirnoff uint32_t jenkins_hash32(
15062208ca5SGleb Smirnoff const uint32_t *k, /* the key, an array of uint32_t values */
15162208ca5SGleb Smirnoff size_t length, /* the length of the key, in uint32_ts */
15262208ca5SGleb Smirnoff uint32_t initval) /* the previous hash, or an arbitrary value */
15362208ca5SGleb Smirnoff {
15462208ca5SGleb Smirnoff uint32_t a,b,c;
15562208ca5SGleb Smirnoff
15662208ca5SGleb Smirnoff /* Set up the internal state */
15762208ca5SGleb Smirnoff a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
15862208ca5SGleb Smirnoff
15962208ca5SGleb Smirnoff /*------------------------------------------------- handle most of the key */
16062208ca5SGleb Smirnoff while (length > 3)
16162208ca5SGleb Smirnoff {
16262208ca5SGleb Smirnoff a += k[0];
16362208ca5SGleb Smirnoff b += k[1];
16462208ca5SGleb Smirnoff c += k[2];
16562208ca5SGleb Smirnoff mix(a,b,c);
16662208ca5SGleb Smirnoff length -= 3;
16762208ca5SGleb Smirnoff k += 3;
16862208ca5SGleb Smirnoff }
16962208ca5SGleb Smirnoff
17062208ca5SGleb Smirnoff /*------------------------------------------- handle the last 3 uint32_t's */
17162208ca5SGleb Smirnoff switch(length) /* all the case statements fall through */
17262208ca5SGleb Smirnoff {
17362208ca5SGleb Smirnoff case 3 : c+=k[2];
17462208ca5SGleb Smirnoff case 2 : b+=k[1];
17562208ca5SGleb Smirnoff case 1 : a+=k[0];
17662208ca5SGleb Smirnoff final(a,b,c);
17762208ca5SGleb Smirnoff case 0: /* case 0: nothing left to add */
17862208ca5SGleb Smirnoff break;
17962208ca5SGleb Smirnoff }
18062208ca5SGleb Smirnoff /*------------------------------------------------------ report the result */
18162208ca5SGleb Smirnoff return c;
18262208ca5SGleb Smirnoff }
18362208ca5SGleb Smirnoff
18462208ca5SGleb Smirnoff #if BYTE_ORDER == LITTLE_ENDIAN
18562208ca5SGleb Smirnoff /*
18662208ca5SGleb Smirnoff -------------------------------------------------------------------------------
18762208ca5SGleb Smirnoff hashlittle() -- hash a variable-length key into a 32-bit value
18862208ca5SGleb Smirnoff k : the key (the unaligned variable-length array of bytes)
18962208ca5SGleb Smirnoff length : the length of the key, counting by bytes
19062208ca5SGleb Smirnoff initval : can be any 4-byte value
19162208ca5SGleb Smirnoff Returns a 32-bit value. Every bit of the key affects every bit of
19262208ca5SGleb Smirnoff the return value. Two keys differing by one or two bits will have
19362208ca5SGleb Smirnoff totally different hash values.
19462208ca5SGleb Smirnoff
19562208ca5SGleb Smirnoff The best hash table sizes are powers of 2. There is no need to do
19662208ca5SGleb Smirnoff mod a prime (mod is sooo slow!). If you need less than 32 bits,
19762208ca5SGleb Smirnoff use a bitmask. For example, if you need only 10 bits, do
19862208ca5SGleb Smirnoff h = (h & hashmask(10));
19962208ca5SGleb Smirnoff In which case, the hash table should have hashsize(10) elements.
20062208ca5SGleb Smirnoff
20162208ca5SGleb Smirnoff If you are hashing n strings (uint8_t **)k, do it like this:
20262208ca5SGleb Smirnoff for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
20362208ca5SGleb Smirnoff
20462208ca5SGleb Smirnoff By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
20562208ca5SGleb Smirnoff code any way you wish, private, educational, or commercial. It's free.
20662208ca5SGleb Smirnoff
20762208ca5SGleb Smirnoff Use for hash table lookup, or anything where one collision in 2^^32 is
20862208ca5SGleb Smirnoff acceptable. Do NOT use for cryptographic purposes.
20962208ca5SGleb Smirnoff -------------------------------------------------------------------------------
21062208ca5SGleb Smirnoff */
21162208ca5SGleb Smirnoff
jenkins_hash(const void * key,size_t length,uint32_t initval)21262208ca5SGleb Smirnoff uint32_t jenkins_hash( const void *key, size_t length, uint32_t initval)
21362208ca5SGleb Smirnoff {
21462208ca5SGleb Smirnoff uint32_t a,b,c; /* internal state */
21562208ca5SGleb Smirnoff union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
21662208ca5SGleb Smirnoff
21762208ca5SGleb Smirnoff /* Set up the internal state */
21862208ca5SGleb Smirnoff a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
21962208ca5SGleb Smirnoff
22062208ca5SGleb Smirnoff u.ptr = key;
22162208ca5SGleb Smirnoff if ((u.i & 0x3) == 0) {
22262208ca5SGleb Smirnoff const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
22362208ca5SGleb Smirnoff
22462208ca5SGleb Smirnoff /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
22562208ca5SGleb Smirnoff while (length > 12)
22662208ca5SGleb Smirnoff {
22762208ca5SGleb Smirnoff a += k[0];
22862208ca5SGleb Smirnoff b += k[1];
22962208ca5SGleb Smirnoff c += k[2];
23062208ca5SGleb Smirnoff mix(a,b,c);
23162208ca5SGleb Smirnoff length -= 12;
23262208ca5SGleb Smirnoff k += 3;
23362208ca5SGleb Smirnoff }
23462208ca5SGleb Smirnoff
23562208ca5SGleb Smirnoff /*----------------------------- handle the last (probably partial) block */
23662208ca5SGleb Smirnoff /*
23762208ca5SGleb Smirnoff * "k[2]&0xffffff" actually reads beyond the end of the string, but
23862208ca5SGleb Smirnoff * then masks off the part it's not allowed to read. Because the
23962208ca5SGleb Smirnoff * string is aligned, the masked-off tail is in the same word as the
24062208ca5SGleb Smirnoff * rest of the string. Every machine with memory protection I've seen
24162208ca5SGleb Smirnoff * does it on word boundaries, so is OK with this. But VALGRIND will
24262208ca5SGleb Smirnoff * still catch it and complain. The masking trick does make the hash
243*d10abf84SGordon Bergling * noticeably faster for short strings (like English words).
24462208ca5SGleb Smirnoff */
24562208ca5SGleb Smirnoff
24662208ca5SGleb Smirnoff switch(length)
24762208ca5SGleb Smirnoff {
24862208ca5SGleb Smirnoff case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
24962208ca5SGleb Smirnoff case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
25062208ca5SGleb Smirnoff case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
25162208ca5SGleb Smirnoff case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
25262208ca5SGleb Smirnoff case 8 : b+=k[1]; a+=k[0]; break;
25362208ca5SGleb Smirnoff case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
25462208ca5SGleb Smirnoff case 6 : b+=k[1]&0xffff; a+=k[0]; break;
25562208ca5SGleb Smirnoff case 5 : b+=k[1]&0xff; a+=k[0]; break;
25662208ca5SGleb Smirnoff case 4 : a+=k[0]; break;
25762208ca5SGleb Smirnoff case 3 : a+=k[0]&0xffffff; break;
25862208ca5SGleb Smirnoff case 2 : a+=k[0]&0xffff; break;
25962208ca5SGleb Smirnoff case 1 : a+=k[0]&0xff; break;
26062208ca5SGleb Smirnoff case 0 : return c; /* zero length strings require no mixing */
26162208ca5SGleb Smirnoff }
26262208ca5SGleb Smirnoff
26362208ca5SGleb Smirnoff } else if ((u.i & 0x1) == 0) {
26462208ca5SGleb Smirnoff const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
26562208ca5SGleb Smirnoff const uint8_t *k8;
26662208ca5SGleb Smirnoff
26762208ca5SGleb Smirnoff /*--------------- all but last block: aligned reads and different mixing */
26862208ca5SGleb Smirnoff while (length > 12)
26962208ca5SGleb Smirnoff {
27062208ca5SGleb Smirnoff a += k[0] + (((uint32_t)k[1])<<16);
27162208ca5SGleb Smirnoff b += k[2] + (((uint32_t)k[3])<<16);
27262208ca5SGleb Smirnoff c += k[4] + (((uint32_t)k[5])<<16);
27362208ca5SGleb Smirnoff mix(a,b,c);
27462208ca5SGleb Smirnoff length -= 12;
27562208ca5SGleb Smirnoff k += 6;
27662208ca5SGleb Smirnoff }
27762208ca5SGleb Smirnoff
27862208ca5SGleb Smirnoff /*----------------------------- handle the last (probably partial) block */
27962208ca5SGleb Smirnoff k8 = (const uint8_t *)k;
28062208ca5SGleb Smirnoff switch(length)
28162208ca5SGleb Smirnoff {
28262208ca5SGleb Smirnoff case 12: c+=k[4]+(((uint32_t)k[5])<<16);
28362208ca5SGleb Smirnoff b+=k[2]+(((uint32_t)k[3])<<16);
28462208ca5SGleb Smirnoff a+=k[0]+(((uint32_t)k[1])<<16);
28562208ca5SGleb Smirnoff break;
28662208ca5SGleb Smirnoff case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
28762208ca5SGleb Smirnoff case 10: c+=k[4];
28862208ca5SGleb Smirnoff b+=k[2]+(((uint32_t)k[3])<<16);
28962208ca5SGleb Smirnoff a+=k[0]+(((uint32_t)k[1])<<16);
29062208ca5SGleb Smirnoff break;
29162208ca5SGleb Smirnoff case 9 : c+=k8[8]; /* fall through */
29262208ca5SGleb Smirnoff case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
29362208ca5SGleb Smirnoff a+=k[0]+(((uint32_t)k[1])<<16);
29462208ca5SGleb Smirnoff break;
29562208ca5SGleb Smirnoff case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
29662208ca5SGleb Smirnoff case 6 : b+=k[2];
29762208ca5SGleb Smirnoff a+=k[0]+(((uint32_t)k[1])<<16);
29862208ca5SGleb Smirnoff break;
29962208ca5SGleb Smirnoff case 5 : b+=k8[4]; /* fall through */
30062208ca5SGleb Smirnoff case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
30162208ca5SGleb Smirnoff break;
30262208ca5SGleb Smirnoff case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
30362208ca5SGleb Smirnoff case 2 : a+=k[0];
30462208ca5SGleb Smirnoff break;
30562208ca5SGleb Smirnoff case 1 : a+=k8[0];
30662208ca5SGleb Smirnoff break;
30762208ca5SGleb Smirnoff case 0 : return c; /* zero length requires no mixing */
30862208ca5SGleb Smirnoff }
30962208ca5SGleb Smirnoff
31062208ca5SGleb Smirnoff } else { /* need to read the key one byte at a time */
31162208ca5SGleb Smirnoff const uint8_t *k = (const uint8_t *)key;
31262208ca5SGleb Smirnoff
31362208ca5SGleb Smirnoff /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
31462208ca5SGleb Smirnoff while (length > 12)
31562208ca5SGleb Smirnoff {
31662208ca5SGleb Smirnoff a += k[0];
31762208ca5SGleb Smirnoff a += ((uint32_t)k[1])<<8;
31862208ca5SGleb Smirnoff a += ((uint32_t)k[2])<<16;
31962208ca5SGleb Smirnoff a += ((uint32_t)k[3])<<24;
32062208ca5SGleb Smirnoff b += k[4];
32162208ca5SGleb Smirnoff b += ((uint32_t)k[5])<<8;
32262208ca5SGleb Smirnoff b += ((uint32_t)k[6])<<16;
32362208ca5SGleb Smirnoff b += ((uint32_t)k[7])<<24;
32462208ca5SGleb Smirnoff c += k[8];
32562208ca5SGleb Smirnoff c += ((uint32_t)k[9])<<8;
32662208ca5SGleb Smirnoff c += ((uint32_t)k[10])<<16;
32762208ca5SGleb Smirnoff c += ((uint32_t)k[11])<<24;
32862208ca5SGleb Smirnoff mix(a,b,c);
32962208ca5SGleb Smirnoff length -= 12;
33062208ca5SGleb Smirnoff k += 12;
33162208ca5SGleb Smirnoff }
33262208ca5SGleb Smirnoff
33362208ca5SGleb Smirnoff /*-------------------------------- last block: affect all 32 bits of (c) */
33462208ca5SGleb Smirnoff switch(length) /* all the case statements fall through */
33562208ca5SGleb Smirnoff {
33662208ca5SGleb Smirnoff case 12: c+=((uint32_t)k[11])<<24;
33762208ca5SGleb Smirnoff case 11: c+=((uint32_t)k[10])<<16;
33862208ca5SGleb Smirnoff case 10: c+=((uint32_t)k[9])<<8;
33962208ca5SGleb Smirnoff case 9 : c+=k[8];
34062208ca5SGleb Smirnoff case 8 : b+=((uint32_t)k[7])<<24;
34162208ca5SGleb Smirnoff case 7 : b+=((uint32_t)k[6])<<16;
34262208ca5SGleb Smirnoff case 6 : b+=((uint32_t)k[5])<<8;
34362208ca5SGleb Smirnoff case 5 : b+=k[4];
34462208ca5SGleb Smirnoff case 4 : a+=((uint32_t)k[3])<<24;
34562208ca5SGleb Smirnoff case 3 : a+=((uint32_t)k[2])<<16;
34662208ca5SGleb Smirnoff case 2 : a+=((uint32_t)k[1])<<8;
34762208ca5SGleb Smirnoff case 1 : a+=k[0];
34862208ca5SGleb Smirnoff break;
34962208ca5SGleb Smirnoff case 0 : return c;
35062208ca5SGleb Smirnoff }
35162208ca5SGleb Smirnoff }
35262208ca5SGleb Smirnoff
35362208ca5SGleb Smirnoff final(a,b,c);
35462208ca5SGleb Smirnoff return c;
35562208ca5SGleb Smirnoff }
35662208ca5SGleb Smirnoff
35762208ca5SGleb Smirnoff #else /* !(BYTE_ORDER == LITTLE_ENDIAN) */
35862208ca5SGleb Smirnoff
35962208ca5SGleb Smirnoff /*
36062208ca5SGleb Smirnoff * hashbig():
36162208ca5SGleb Smirnoff * This is the same as hashword() on big-endian machines. It is different
36262208ca5SGleb Smirnoff * from hashlittle() on all machines. hashbig() takes advantage of
36362208ca5SGleb Smirnoff * big-endian byte ordering.
36462208ca5SGleb Smirnoff */
jenkins_hash(const void * key,size_t length,uint32_t initval)36562208ca5SGleb Smirnoff uint32_t jenkins_hash( const void *key, size_t length, uint32_t initval)
36662208ca5SGleb Smirnoff {
36762208ca5SGleb Smirnoff uint32_t a,b,c;
36862208ca5SGleb Smirnoff union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
36962208ca5SGleb Smirnoff
37062208ca5SGleb Smirnoff /* Set up the internal state */
37162208ca5SGleb Smirnoff a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
37262208ca5SGleb Smirnoff
37362208ca5SGleb Smirnoff u.ptr = key;
37462208ca5SGleb Smirnoff if ((u.i & 0x3) == 0) {
37562208ca5SGleb Smirnoff const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
37662208ca5SGleb Smirnoff
37762208ca5SGleb Smirnoff /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
37862208ca5SGleb Smirnoff while (length > 12)
37962208ca5SGleb Smirnoff {
38062208ca5SGleb Smirnoff a += k[0];
38162208ca5SGleb Smirnoff b += k[1];
38262208ca5SGleb Smirnoff c += k[2];
38362208ca5SGleb Smirnoff mix(a,b,c);
38462208ca5SGleb Smirnoff length -= 12;
38562208ca5SGleb Smirnoff k += 3;
38662208ca5SGleb Smirnoff }
38762208ca5SGleb Smirnoff
38862208ca5SGleb Smirnoff /*----------------------------- handle the last (probably partial) block */
38962208ca5SGleb Smirnoff /*
39062208ca5SGleb Smirnoff * "k[2]<<8" actually reads beyond the end of the string, but
39162208ca5SGleb Smirnoff * then shifts out the part it's not allowed to read. Because the
39262208ca5SGleb Smirnoff * string is aligned, the illegal read is in the same word as the
39362208ca5SGleb Smirnoff * rest of the string. Every machine with memory protection I've seen
39462208ca5SGleb Smirnoff * does it on word boundaries, so is OK with this. But VALGRIND will
39562208ca5SGleb Smirnoff * still catch it and complain. The masking trick does make the hash
396*d10abf84SGordon Bergling * noticeably faster for short strings (like English words).
39762208ca5SGleb Smirnoff */
39862208ca5SGleb Smirnoff
39962208ca5SGleb Smirnoff switch(length)
40062208ca5SGleb Smirnoff {
40162208ca5SGleb Smirnoff case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
40262208ca5SGleb Smirnoff case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
40362208ca5SGleb Smirnoff case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
40462208ca5SGleb Smirnoff case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
40562208ca5SGleb Smirnoff case 8 : b+=k[1]; a+=k[0]; break;
40662208ca5SGleb Smirnoff case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
40762208ca5SGleb Smirnoff case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
40862208ca5SGleb Smirnoff case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
40962208ca5SGleb Smirnoff case 4 : a+=k[0]; break;
41062208ca5SGleb Smirnoff case 3 : a+=k[0]&0xffffff00; break;
41162208ca5SGleb Smirnoff case 2 : a+=k[0]&0xffff0000; break;
41262208ca5SGleb Smirnoff case 1 : a+=k[0]&0xff000000; break;
41362208ca5SGleb Smirnoff case 0 : return c; /* zero length strings require no mixing */
41462208ca5SGleb Smirnoff }
41562208ca5SGleb Smirnoff
41662208ca5SGleb Smirnoff } else { /* need to read the key one byte at a time */
41762208ca5SGleb Smirnoff const uint8_t *k = (const uint8_t *)key;
41862208ca5SGleb Smirnoff
41962208ca5SGleb Smirnoff /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
42062208ca5SGleb Smirnoff while (length > 12)
42162208ca5SGleb Smirnoff {
42262208ca5SGleb Smirnoff a += ((uint32_t)k[0])<<24;
42362208ca5SGleb Smirnoff a += ((uint32_t)k[1])<<16;
42462208ca5SGleb Smirnoff a += ((uint32_t)k[2])<<8;
42562208ca5SGleb Smirnoff a += ((uint32_t)k[3]);
42662208ca5SGleb Smirnoff b += ((uint32_t)k[4])<<24;
42762208ca5SGleb Smirnoff b += ((uint32_t)k[5])<<16;
42862208ca5SGleb Smirnoff b += ((uint32_t)k[6])<<8;
42962208ca5SGleb Smirnoff b += ((uint32_t)k[7]);
43062208ca5SGleb Smirnoff c += ((uint32_t)k[8])<<24;
43162208ca5SGleb Smirnoff c += ((uint32_t)k[9])<<16;
43262208ca5SGleb Smirnoff c += ((uint32_t)k[10])<<8;
43362208ca5SGleb Smirnoff c += ((uint32_t)k[11]);
43462208ca5SGleb Smirnoff mix(a,b,c);
43562208ca5SGleb Smirnoff length -= 12;
43662208ca5SGleb Smirnoff k += 12;
43762208ca5SGleb Smirnoff }
43862208ca5SGleb Smirnoff
43962208ca5SGleb Smirnoff /*-------------------------------- last block: affect all 32 bits of (c) */
44062208ca5SGleb Smirnoff switch(length) /* all the case statements fall through */
44162208ca5SGleb Smirnoff {
44262208ca5SGleb Smirnoff case 12: c+=k[11];
44362208ca5SGleb Smirnoff case 11: c+=((uint32_t)k[10])<<8;
44462208ca5SGleb Smirnoff case 10: c+=((uint32_t)k[9])<<16;
44562208ca5SGleb Smirnoff case 9 : c+=((uint32_t)k[8])<<24;
44662208ca5SGleb Smirnoff case 8 : b+=k[7];
44762208ca5SGleb Smirnoff case 7 : b+=((uint32_t)k[6])<<8;
44862208ca5SGleb Smirnoff case 6 : b+=((uint32_t)k[5])<<16;
44962208ca5SGleb Smirnoff case 5 : b+=((uint32_t)k[4])<<24;
45062208ca5SGleb Smirnoff case 4 : a+=k[3];
45162208ca5SGleb Smirnoff case 3 : a+=((uint32_t)k[2])<<8;
45262208ca5SGleb Smirnoff case 2 : a+=((uint32_t)k[1])<<16;
45362208ca5SGleb Smirnoff case 1 : a+=((uint32_t)k[0])<<24;
45462208ca5SGleb Smirnoff break;
45562208ca5SGleb Smirnoff case 0 : return c;
45662208ca5SGleb Smirnoff }
45762208ca5SGleb Smirnoff }
45862208ca5SGleb Smirnoff
45962208ca5SGleb Smirnoff final(a,b,c);
46062208ca5SGleb Smirnoff return c;
46162208ca5SGleb Smirnoff }
46262208ca5SGleb Smirnoff #endif
463