1*62208ca5SGleb Smirnoff /* 2*62208ca5SGleb Smirnoff * Taken from http://burtleburtle.net/bob/c/lookup3.c 3*62208ca5SGleb Smirnoff * $FreeBSD$ 4*62208ca5SGleb Smirnoff */ 5*62208ca5SGleb Smirnoff 6*62208ca5SGleb Smirnoff #include <sys/hash.h> 7*62208ca5SGleb Smirnoff #include <machine/endian.h> 8*62208ca5SGleb Smirnoff 9*62208ca5SGleb Smirnoff /* 10*62208ca5SGleb Smirnoff ------------------------------------------------------------------------------- 11*62208ca5SGleb Smirnoff lookup3.c, by Bob Jenkins, May 2006, Public Domain. 12*62208ca5SGleb Smirnoff 13*62208ca5SGleb Smirnoff These are functions for producing 32-bit hashes for hash table lookup. 14*62208ca5SGleb Smirnoff hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() 15*62208ca5SGleb Smirnoff are externally useful functions. Routines to test the hash are included 16*62208ca5SGleb Smirnoff if SELF_TEST is defined. You can use this free for any purpose. It's in 17*62208ca5SGleb Smirnoff the public domain. It has no warranty. 18*62208ca5SGleb Smirnoff 19*62208ca5SGleb Smirnoff You probably want to use hashlittle(). hashlittle() and hashbig() 20*62208ca5SGleb Smirnoff hash byte arrays. hashlittle() is is faster than hashbig() on 21*62208ca5SGleb Smirnoff little-endian machines. Intel and AMD are little-endian machines. 22*62208ca5SGleb Smirnoff On second thought, you probably want hashlittle2(), which is identical to 23*62208ca5SGleb Smirnoff hashlittle() except it returns two 32-bit hashes for the price of one. 24*62208ca5SGleb Smirnoff You could implement hashbig2() if you wanted but I haven't bothered here. 25*62208ca5SGleb Smirnoff 26*62208ca5SGleb Smirnoff If you want to find a hash of, say, exactly 7 integers, do 27*62208ca5SGleb Smirnoff a = i1; b = i2; c = i3; 28*62208ca5SGleb Smirnoff mix(a,b,c); 29*62208ca5SGleb Smirnoff a += i4; b += i5; c += i6; 30*62208ca5SGleb Smirnoff mix(a,b,c); 31*62208ca5SGleb Smirnoff a += i7; 32*62208ca5SGleb Smirnoff final(a,b,c); 33*62208ca5SGleb Smirnoff then use c as the hash value. If you have a variable length array of 34*62208ca5SGleb Smirnoff 4-byte integers to hash, use hashword(). If you have a byte array (like 35*62208ca5SGleb Smirnoff a character string), use hashlittle(). If you have several byte arrays, or 36*62208ca5SGleb Smirnoff a mix of things, see the comments above hashlittle(). 37*62208ca5SGleb Smirnoff 38*62208ca5SGleb Smirnoff Why is this so big? I read 12 bytes at a time into 3 4-byte integers, 39*62208ca5SGleb Smirnoff then mix those integers. This is fast (you can do a lot more thorough 40*62208ca5SGleb Smirnoff mixing with 12*3 instructions on 3 integers than you can with 3 instructions 41*62208ca5SGleb Smirnoff on 1 byte), but shoehorning those bytes into integers efficiently is messy. 42*62208ca5SGleb Smirnoff ------------------------------------------------------------------------------- 43*62208ca5SGleb Smirnoff */ 44*62208ca5SGleb Smirnoff 45*62208ca5SGleb Smirnoff #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) 46*62208ca5SGleb Smirnoff 47*62208ca5SGleb Smirnoff /* 48*62208ca5SGleb Smirnoff ------------------------------------------------------------------------------- 49*62208ca5SGleb Smirnoff mix -- mix 3 32-bit values reversibly. 50*62208ca5SGleb Smirnoff 51*62208ca5SGleb Smirnoff This is reversible, so any information in (a,b,c) before mix() is 52*62208ca5SGleb Smirnoff still in (a,b,c) after mix(). 53*62208ca5SGleb Smirnoff 54*62208ca5SGleb Smirnoff If four pairs of (a,b,c) inputs are run through mix(), or through 55*62208ca5SGleb Smirnoff mix() in reverse, there are at least 32 bits of the output that 56*62208ca5SGleb Smirnoff are sometimes the same for one pair and different for another pair. 57*62208ca5SGleb Smirnoff This was tested for: 58*62208ca5SGleb Smirnoff * pairs that differed by one bit, by two bits, in any combination 59*62208ca5SGleb Smirnoff of top bits of (a,b,c), or in any combination of bottom bits of 60*62208ca5SGleb Smirnoff (a,b,c). 61*62208ca5SGleb Smirnoff * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 62*62208ca5SGleb Smirnoff the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 63*62208ca5SGleb Smirnoff is commonly produced by subtraction) look like a single 1-bit 64*62208ca5SGleb Smirnoff difference. 65*62208ca5SGleb Smirnoff * the base values were pseudorandom, all zero but one bit set, or 66*62208ca5SGleb Smirnoff all zero plus a counter that starts at zero. 67*62208ca5SGleb Smirnoff 68*62208ca5SGleb Smirnoff Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that 69*62208ca5SGleb Smirnoff satisfy this are 70*62208ca5SGleb Smirnoff 4 6 8 16 19 4 71*62208ca5SGleb Smirnoff 9 15 3 18 27 15 72*62208ca5SGleb Smirnoff 14 9 3 7 17 3 73*62208ca5SGleb Smirnoff Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing 74*62208ca5SGleb Smirnoff for "differ" defined as + with a one-bit base and a two-bit delta. I 75*62208ca5SGleb Smirnoff used http://burtleburtle.net/bob/hash/avalanche.html to choose 76*62208ca5SGleb Smirnoff the operations, constants, and arrangements of the variables. 77*62208ca5SGleb Smirnoff 78*62208ca5SGleb Smirnoff This does not achieve avalanche. There are input bits of (a,b,c) 79*62208ca5SGleb Smirnoff that fail to affect some output bits of (a,b,c), especially of a. The 80*62208ca5SGleb Smirnoff most thoroughly mixed value is c, but it doesn't really even achieve 81*62208ca5SGleb Smirnoff avalanche in c. 82*62208ca5SGleb Smirnoff 83*62208ca5SGleb Smirnoff This allows some parallelism. Read-after-writes are good at doubling 84*62208ca5SGleb Smirnoff the number of bits affected, so the goal of mixing pulls in the opposite 85*62208ca5SGleb Smirnoff direction as the goal of parallelism. I did what I could. Rotates 86*62208ca5SGleb Smirnoff seem to cost as much as shifts on every machine I could lay my hands 87*62208ca5SGleb Smirnoff on, and rotates are much kinder to the top and bottom bits, so I used 88*62208ca5SGleb Smirnoff rotates. 89*62208ca5SGleb Smirnoff ------------------------------------------------------------------------------- 90*62208ca5SGleb Smirnoff */ 91*62208ca5SGleb Smirnoff #define mix(a,b,c) \ 92*62208ca5SGleb Smirnoff { \ 93*62208ca5SGleb Smirnoff a -= c; a ^= rot(c, 4); c += b; \ 94*62208ca5SGleb Smirnoff b -= a; b ^= rot(a, 6); a += c; \ 95*62208ca5SGleb Smirnoff c -= b; c ^= rot(b, 8); b += a; \ 96*62208ca5SGleb Smirnoff a -= c; a ^= rot(c,16); c += b; \ 97*62208ca5SGleb Smirnoff b -= a; b ^= rot(a,19); a += c; \ 98*62208ca5SGleb Smirnoff c -= b; c ^= rot(b, 4); b += a; \ 99*62208ca5SGleb Smirnoff } 100*62208ca5SGleb Smirnoff 101*62208ca5SGleb Smirnoff /* 102*62208ca5SGleb Smirnoff ------------------------------------------------------------------------------- 103*62208ca5SGleb Smirnoff final -- final mixing of 3 32-bit values (a,b,c) into c 104*62208ca5SGleb Smirnoff 105*62208ca5SGleb Smirnoff Pairs of (a,b,c) values differing in only a few bits will usually 106*62208ca5SGleb Smirnoff produce values of c that look totally different. This was tested for 107*62208ca5SGleb Smirnoff * pairs that differed by one bit, by two bits, in any combination 108*62208ca5SGleb Smirnoff of top bits of (a,b,c), or in any combination of bottom bits of 109*62208ca5SGleb Smirnoff (a,b,c). 110*62208ca5SGleb Smirnoff * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 111*62208ca5SGleb Smirnoff the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 112*62208ca5SGleb Smirnoff is commonly produced by subtraction) look like a single 1-bit 113*62208ca5SGleb Smirnoff difference. 114*62208ca5SGleb Smirnoff * the base values were pseudorandom, all zero but one bit set, or 115*62208ca5SGleb Smirnoff all zero plus a counter that starts at zero. 116*62208ca5SGleb Smirnoff 117*62208ca5SGleb Smirnoff These constants passed: 118*62208ca5SGleb Smirnoff 14 11 25 16 4 14 24 119*62208ca5SGleb Smirnoff 12 14 25 16 4 14 24 120*62208ca5SGleb Smirnoff and these came close: 121*62208ca5SGleb Smirnoff 4 8 15 26 3 22 24 122*62208ca5SGleb Smirnoff 10 8 15 26 3 22 24 123*62208ca5SGleb Smirnoff 11 8 15 26 3 22 24 124*62208ca5SGleb Smirnoff ------------------------------------------------------------------------------- 125*62208ca5SGleb Smirnoff */ 126*62208ca5SGleb Smirnoff #define final(a,b,c) \ 127*62208ca5SGleb Smirnoff { \ 128*62208ca5SGleb Smirnoff c ^= b; c -= rot(b,14); \ 129*62208ca5SGleb Smirnoff a ^= c; a -= rot(c,11); \ 130*62208ca5SGleb Smirnoff b ^= a; b -= rot(a,25); \ 131*62208ca5SGleb Smirnoff c ^= b; c -= rot(b,16); \ 132*62208ca5SGleb Smirnoff a ^= c; a -= rot(c,4); \ 133*62208ca5SGleb Smirnoff b ^= a; b -= rot(a,14); \ 134*62208ca5SGleb Smirnoff c ^= b; c -= rot(b,24); \ 135*62208ca5SGleb Smirnoff } 136*62208ca5SGleb Smirnoff 137*62208ca5SGleb Smirnoff /* 138*62208ca5SGleb Smirnoff -------------------------------------------------------------------- 139*62208ca5SGleb Smirnoff This works on all machines. To be useful, it requires 140*62208ca5SGleb Smirnoff -- that the key be an array of uint32_t's, and 141*62208ca5SGleb Smirnoff -- that the length be the number of uint32_t's in the key 142*62208ca5SGleb Smirnoff 143*62208ca5SGleb Smirnoff The function hashword() is identical to hashlittle() on little-endian 144*62208ca5SGleb Smirnoff machines, and identical to hashbig() on big-endian machines, 145*62208ca5SGleb Smirnoff except that the length has to be measured in uint32_ts rather than in 146*62208ca5SGleb Smirnoff bytes. hashlittle() is more complicated than hashword() only because 147*62208ca5SGleb Smirnoff hashlittle() has to dance around fitting the key bytes into registers. 148*62208ca5SGleb Smirnoff -------------------------------------------------------------------- 149*62208ca5SGleb Smirnoff */ 150*62208ca5SGleb Smirnoff uint32_t jenkins_hash32( 151*62208ca5SGleb Smirnoff const uint32_t *k, /* the key, an array of uint32_t values */ 152*62208ca5SGleb Smirnoff size_t length, /* the length of the key, in uint32_ts */ 153*62208ca5SGleb Smirnoff uint32_t initval) /* the previous hash, or an arbitrary value */ 154*62208ca5SGleb Smirnoff { 155*62208ca5SGleb Smirnoff uint32_t a,b,c; 156*62208ca5SGleb Smirnoff 157*62208ca5SGleb Smirnoff /* Set up the internal state */ 158*62208ca5SGleb Smirnoff a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval; 159*62208ca5SGleb Smirnoff 160*62208ca5SGleb Smirnoff /*------------------------------------------------- handle most of the key */ 161*62208ca5SGleb Smirnoff while (length > 3) 162*62208ca5SGleb Smirnoff { 163*62208ca5SGleb Smirnoff a += k[0]; 164*62208ca5SGleb Smirnoff b += k[1]; 165*62208ca5SGleb Smirnoff c += k[2]; 166*62208ca5SGleb Smirnoff mix(a,b,c); 167*62208ca5SGleb Smirnoff length -= 3; 168*62208ca5SGleb Smirnoff k += 3; 169*62208ca5SGleb Smirnoff } 170*62208ca5SGleb Smirnoff 171*62208ca5SGleb Smirnoff /*------------------------------------------- handle the last 3 uint32_t's */ 172*62208ca5SGleb Smirnoff switch(length) /* all the case statements fall through */ 173*62208ca5SGleb Smirnoff { 174*62208ca5SGleb Smirnoff case 3 : c+=k[2]; 175*62208ca5SGleb Smirnoff case 2 : b+=k[1]; 176*62208ca5SGleb Smirnoff case 1 : a+=k[0]; 177*62208ca5SGleb Smirnoff final(a,b,c); 178*62208ca5SGleb Smirnoff case 0: /* case 0: nothing left to add */ 179*62208ca5SGleb Smirnoff break; 180*62208ca5SGleb Smirnoff } 181*62208ca5SGleb Smirnoff /*------------------------------------------------------ report the result */ 182*62208ca5SGleb Smirnoff return c; 183*62208ca5SGleb Smirnoff } 184*62208ca5SGleb Smirnoff 185*62208ca5SGleb Smirnoff #if BYTE_ORDER == LITTLE_ENDIAN 186*62208ca5SGleb Smirnoff /* 187*62208ca5SGleb Smirnoff ------------------------------------------------------------------------------- 188*62208ca5SGleb Smirnoff hashlittle() -- hash a variable-length key into a 32-bit value 189*62208ca5SGleb Smirnoff k : the key (the unaligned variable-length array of bytes) 190*62208ca5SGleb Smirnoff length : the length of the key, counting by bytes 191*62208ca5SGleb Smirnoff initval : can be any 4-byte value 192*62208ca5SGleb Smirnoff Returns a 32-bit value. Every bit of the key affects every bit of 193*62208ca5SGleb Smirnoff the return value. Two keys differing by one or two bits will have 194*62208ca5SGleb Smirnoff totally different hash values. 195*62208ca5SGleb Smirnoff 196*62208ca5SGleb Smirnoff The best hash table sizes are powers of 2. There is no need to do 197*62208ca5SGleb Smirnoff mod a prime (mod is sooo slow!). If you need less than 32 bits, 198*62208ca5SGleb Smirnoff use a bitmask. For example, if you need only 10 bits, do 199*62208ca5SGleb Smirnoff h = (h & hashmask(10)); 200*62208ca5SGleb Smirnoff In which case, the hash table should have hashsize(10) elements. 201*62208ca5SGleb Smirnoff 202*62208ca5SGleb Smirnoff If you are hashing n strings (uint8_t **)k, do it like this: 203*62208ca5SGleb Smirnoff for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h); 204*62208ca5SGleb Smirnoff 205*62208ca5SGleb Smirnoff By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this 206*62208ca5SGleb Smirnoff code any way you wish, private, educational, or commercial. It's free. 207*62208ca5SGleb Smirnoff 208*62208ca5SGleb Smirnoff Use for hash table lookup, or anything where one collision in 2^^32 is 209*62208ca5SGleb Smirnoff acceptable. Do NOT use for cryptographic purposes. 210*62208ca5SGleb Smirnoff ------------------------------------------------------------------------------- 211*62208ca5SGleb Smirnoff */ 212*62208ca5SGleb Smirnoff 213*62208ca5SGleb Smirnoff uint32_t jenkins_hash( const void *key, size_t length, uint32_t initval) 214*62208ca5SGleb Smirnoff { 215*62208ca5SGleb Smirnoff uint32_t a,b,c; /* internal state */ 216*62208ca5SGleb Smirnoff union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ 217*62208ca5SGleb Smirnoff 218*62208ca5SGleb Smirnoff /* Set up the internal state */ 219*62208ca5SGleb Smirnoff a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; 220*62208ca5SGleb Smirnoff 221*62208ca5SGleb Smirnoff u.ptr = key; 222*62208ca5SGleb Smirnoff if ((u.i & 0x3) == 0) { 223*62208ca5SGleb Smirnoff const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ 224*62208ca5SGleb Smirnoff 225*62208ca5SGleb Smirnoff /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ 226*62208ca5SGleb Smirnoff while (length > 12) 227*62208ca5SGleb Smirnoff { 228*62208ca5SGleb Smirnoff a += k[0]; 229*62208ca5SGleb Smirnoff b += k[1]; 230*62208ca5SGleb Smirnoff c += k[2]; 231*62208ca5SGleb Smirnoff mix(a,b,c); 232*62208ca5SGleb Smirnoff length -= 12; 233*62208ca5SGleb Smirnoff k += 3; 234*62208ca5SGleb Smirnoff } 235*62208ca5SGleb Smirnoff 236*62208ca5SGleb Smirnoff /*----------------------------- handle the last (probably partial) block */ 237*62208ca5SGleb Smirnoff /* 238*62208ca5SGleb Smirnoff * "k[2]&0xffffff" actually reads beyond the end of the string, but 239*62208ca5SGleb Smirnoff * then masks off the part it's not allowed to read. Because the 240*62208ca5SGleb Smirnoff * string is aligned, the masked-off tail is in the same word as the 241*62208ca5SGleb Smirnoff * rest of the string. Every machine with memory protection I've seen 242*62208ca5SGleb Smirnoff * does it on word boundaries, so is OK with this. But VALGRIND will 243*62208ca5SGleb Smirnoff * still catch it and complain. The masking trick does make the hash 244*62208ca5SGleb Smirnoff * noticably faster for short strings (like English words). 245*62208ca5SGleb Smirnoff */ 246*62208ca5SGleb Smirnoff 247*62208ca5SGleb Smirnoff switch(length) 248*62208ca5SGleb Smirnoff { 249*62208ca5SGleb Smirnoff case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 250*62208ca5SGleb Smirnoff case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; 251*62208ca5SGleb Smirnoff case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; 252*62208ca5SGleb Smirnoff case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; 253*62208ca5SGleb Smirnoff case 8 : b+=k[1]; a+=k[0]; break; 254*62208ca5SGleb Smirnoff case 7 : b+=k[1]&0xffffff; a+=k[0]; break; 255*62208ca5SGleb Smirnoff case 6 : b+=k[1]&0xffff; a+=k[0]; break; 256*62208ca5SGleb Smirnoff case 5 : b+=k[1]&0xff; a+=k[0]; break; 257*62208ca5SGleb Smirnoff case 4 : a+=k[0]; break; 258*62208ca5SGleb Smirnoff case 3 : a+=k[0]&0xffffff; break; 259*62208ca5SGleb Smirnoff case 2 : a+=k[0]&0xffff; break; 260*62208ca5SGleb Smirnoff case 1 : a+=k[0]&0xff; break; 261*62208ca5SGleb Smirnoff case 0 : return c; /* zero length strings require no mixing */ 262*62208ca5SGleb Smirnoff } 263*62208ca5SGleb Smirnoff 264*62208ca5SGleb Smirnoff } else if ((u.i & 0x1) == 0) { 265*62208ca5SGleb Smirnoff const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ 266*62208ca5SGleb Smirnoff const uint8_t *k8; 267*62208ca5SGleb Smirnoff 268*62208ca5SGleb Smirnoff /*--------------- all but last block: aligned reads and different mixing */ 269*62208ca5SGleb Smirnoff while (length > 12) 270*62208ca5SGleb Smirnoff { 271*62208ca5SGleb Smirnoff a += k[0] + (((uint32_t)k[1])<<16); 272*62208ca5SGleb Smirnoff b += k[2] + (((uint32_t)k[3])<<16); 273*62208ca5SGleb Smirnoff c += k[4] + (((uint32_t)k[5])<<16); 274*62208ca5SGleb Smirnoff mix(a,b,c); 275*62208ca5SGleb Smirnoff length -= 12; 276*62208ca5SGleb Smirnoff k += 6; 277*62208ca5SGleb Smirnoff } 278*62208ca5SGleb Smirnoff 279*62208ca5SGleb Smirnoff /*----------------------------- handle the last (probably partial) block */ 280*62208ca5SGleb Smirnoff k8 = (const uint8_t *)k; 281*62208ca5SGleb Smirnoff switch(length) 282*62208ca5SGleb Smirnoff { 283*62208ca5SGleb Smirnoff case 12: c+=k[4]+(((uint32_t)k[5])<<16); 284*62208ca5SGleb Smirnoff b+=k[2]+(((uint32_t)k[3])<<16); 285*62208ca5SGleb Smirnoff a+=k[0]+(((uint32_t)k[1])<<16); 286*62208ca5SGleb Smirnoff break; 287*62208ca5SGleb Smirnoff case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 288*62208ca5SGleb Smirnoff case 10: c+=k[4]; 289*62208ca5SGleb Smirnoff b+=k[2]+(((uint32_t)k[3])<<16); 290*62208ca5SGleb Smirnoff a+=k[0]+(((uint32_t)k[1])<<16); 291*62208ca5SGleb Smirnoff break; 292*62208ca5SGleb Smirnoff case 9 : c+=k8[8]; /* fall through */ 293*62208ca5SGleb Smirnoff case 8 : b+=k[2]+(((uint32_t)k[3])<<16); 294*62208ca5SGleb Smirnoff a+=k[0]+(((uint32_t)k[1])<<16); 295*62208ca5SGleb Smirnoff break; 296*62208ca5SGleb Smirnoff case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 297*62208ca5SGleb Smirnoff case 6 : b+=k[2]; 298*62208ca5SGleb Smirnoff a+=k[0]+(((uint32_t)k[1])<<16); 299*62208ca5SGleb Smirnoff break; 300*62208ca5SGleb Smirnoff case 5 : b+=k8[4]; /* fall through */ 301*62208ca5SGleb Smirnoff case 4 : a+=k[0]+(((uint32_t)k[1])<<16); 302*62208ca5SGleb Smirnoff break; 303*62208ca5SGleb Smirnoff case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 304*62208ca5SGleb Smirnoff case 2 : a+=k[0]; 305*62208ca5SGleb Smirnoff break; 306*62208ca5SGleb Smirnoff case 1 : a+=k8[0]; 307*62208ca5SGleb Smirnoff break; 308*62208ca5SGleb Smirnoff case 0 : return c; /* zero length requires no mixing */ 309*62208ca5SGleb Smirnoff } 310*62208ca5SGleb Smirnoff 311*62208ca5SGleb Smirnoff } else { /* need to read the key one byte at a time */ 312*62208ca5SGleb Smirnoff const uint8_t *k = (const uint8_t *)key; 313*62208ca5SGleb Smirnoff 314*62208ca5SGleb Smirnoff /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ 315*62208ca5SGleb Smirnoff while (length > 12) 316*62208ca5SGleb Smirnoff { 317*62208ca5SGleb Smirnoff a += k[0]; 318*62208ca5SGleb Smirnoff a += ((uint32_t)k[1])<<8; 319*62208ca5SGleb Smirnoff a += ((uint32_t)k[2])<<16; 320*62208ca5SGleb Smirnoff a += ((uint32_t)k[3])<<24; 321*62208ca5SGleb Smirnoff b += k[4]; 322*62208ca5SGleb Smirnoff b += ((uint32_t)k[5])<<8; 323*62208ca5SGleb Smirnoff b += ((uint32_t)k[6])<<16; 324*62208ca5SGleb Smirnoff b += ((uint32_t)k[7])<<24; 325*62208ca5SGleb Smirnoff c += k[8]; 326*62208ca5SGleb Smirnoff c += ((uint32_t)k[9])<<8; 327*62208ca5SGleb Smirnoff c += ((uint32_t)k[10])<<16; 328*62208ca5SGleb Smirnoff c += ((uint32_t)k[11])<<24; 329*62208ca5SGleb Smirnoff mix(a,b,c); 330*62208ca5SGleb Smirnoff length -= 12; 331*62208ca5SGleb Smirnoff k += 12; 332*62208ca5SGleb Smirnoff } 333*62208ca5SGleb Smirnoff 334*62208ca5SGleb Smirnoff /*-------------------------------- last block: affect all 32 bits of (c) */ 335*62208ca5SGleb Smirnoff switch(length) /* all the case statements fall through */ 336*62208ca5SGleb Smirnoff { 337*62208ca5SGleb Smirnoff case 12: c+=((uint32_t)k[11])<<24; 338*62208ca5SGleb Smirnoff case 11: c+=((uint32_t)k[10])<<16; 339*62208ca5SGleb Smirnoff case 10: c+=((uint32_t)k[9])<<8; 340*62208ca5SGleb Smirnoff case 9 : c+=k[8]; 341*62208ca5SGleb Smirnoff case 8 : b+=((uint32_t)k[7])<<24; 342*62208ca5SGleb Smirnoff case 7 : b+=((uint32_t)k[6])<<16; 343*62208ca5SGleb Smirnoff case 6 : b+=((uint32_t)k[5])<<8; 344*62208ca5SGleb Smirnoff case 5 : b+=k[4]; 345*62208ca5SGleb Smirnoff case 4 : a+=((uint32_t)k[3])<<24; 346*62208ca5SGleb Smirnoff case 3 : a+=((uint32_t)k[2])<<16; 347*62208ca5SGleb Smirnoff case 2 : a+=((uint32_t)k[1])<<8; 348*62208ca5SGleb Smirnoff case 1 : a+=k[0]; 349*62208ca5SGleb Smirnoff break; 350*62208ca5SGleb Smirnoff case 0 : return c; 351*62208ca5SGleb Smirnoff } 352*62208ca5SGleb Smirnoff } 353*62208ca5SGleb Smirnoff 354*62208ca5SGleb Smirnoff final(a,b,c); 355*62208ca5SGleb Smirnoff return c; 356*62208ca5SGleb Smirnoff } 357*62208ca5SGleb Smirnoff 358*62208ca5SGleb Smirnoff #else /* !(BYTE_ORDER == LITTLE_ENDIAN) */ 359*62208ca5SGleb Smirnoff 360*62208ca5SGleb Smirnoff /* 361*62208ca5SGleb Smirnoff * hashbig(): 362*62208ca5SGleb Smirnoff * This is the same as hashword() on big-endian machines. It is different 363*62208ca5SGleb Smirnoff * from hashlittle() on all machines. hashbig() takes advantage of 364*62208ca5SGleb Smirnoff * big-endian byte ordering. 365*62208ca5SGleb Smirnoff */ 366*62208ca5SGleb Smirnoff uint32_t jenkins_hash( const void *key, size_t length, uint32_t initval) 367*62208ca5SGleb Smirnoff { 368*62208ca5SGleb Smirnoff uint32_t a,b,c; 369*62208ca5SGleb Smirnoff union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */ 370*62208ca5SGleb Smirnoff 371*62208ca5SGleb Smirnoff /* Set up the internal state */ 372*62208ca5SGleb Smirnoff a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; 373*62208ca5SGleb Smirnoff 374*62208ca5SGleb Smirnoff u.ptr = key; 375*62208ca5SGleb Smirnoff if ((u.i & 0x3) == 0) { 376*62208ca5SGleb Smirnoff const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ 377*62208ca5SGleb Smirnoff 378*62208ca5SGleb Smirnoff /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ 379*62208ca5SGleb Smirnoff while (length > 12) 380*62208ca5SGleb Smirnoff { 381*62208ca5SGleb Smirnoff a += k[0]; 382*62208ca5SGleb Smirnoff b += k[1]; 383*62208ca5SGleb Smirnoff c += k[2]; 384*62208ca5SGleb Smirnoff mix(a,b,c); 385*62208ca5SGleb Smirnoff length -= 12; 386*62208ca5SGleb Smirnoff k += 3; 387*62208ca5SGleb Smirnoff } 388*62208ca5SGleb Smirnoff 389*62208ca5SGleb Smirnoff /*----------------------------- handle the last (probably partial) block */ 390*62208ca5SGleb Smirnoff /* 391*62208ca5SGleb Smirnoff * "k[2]<<8" actually reads beyond the end of the string, but 392*62208ca5SGleb Smirnoff * then shifts out the part it's not allowed to read. Because the 393*62208ca5SGleb Smirnoff * string is aligned, the illegal read is in the same word as the 394*62208ca5SGleb Smirnoff * rest of the string. Every machine with memory protection I've seen 395*62208ca5SGleb Smirnoff * does it on word boundaries, so is OK with this. But VALGRIND will 396*62208ca5SGleb Smirnoff * still catch it and complain. The masking trick does make the hash 397*62208ca5SGleb Smirnoff * noticably faster for short strings (like English words). 398*62208ca5SGleb Smirnoff */ 399*62208ca5SGleb Smirnoff 400*62208ca5SGleb Smirnoff switch(length) 401*62208ca5SGleb Smirnoff { 402*62208ca5SGleb Smirnoff case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 403*62208ca5SGleb Smirnoff case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break; 404*62208ca5SGleb Smirnoff case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break; 405*62208ca5SGleb Smirnoff case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break; 406*62208ca5SGleb Smirnoff case 8 : b+=k[1]; a+=k[0]; break; 407*62208ca5SGleb Smirnoff case 7 : b+=k[1]&0xffffff00; a+=k[0]; break; 408*62208ca5SGleb Smirnoff case 6 : b+=k[1]&0xffff0000; a+=k[0]; break; 409*62208ca5SGleb Smirnoff case 5 : b+=k[1]&0xff000000; a+=k[0]; break; 410*62208ca5SGleb Smirnoff case 4 : a+=k[0]; break; 411*62208ca5SGleb Smirnoff case 3 : a+=k[0]&0xffffff00; break; 412*62208ca5SGleb Smirnoff case 2 : a+=k[0]&0xffff0000; break; 413*62208ca5SGleb Smirnoff case 1 : a+=k[0]&0xff000000; break; 414*62208ca5SGleb Smirnoff case 0 : return c; /* zero length strings require no mixing */ 415*62208ca5SGleb Smirnoff } 416*62208ca5SGleb Smirnoff 417*62208ca5SGleb Smirnoff } else { /* need to read the key one byte at a time */ 418*62208ca5SGleb Smirnoff const uint8_t *k = (const uint8_t *)key; 419*62208ca5SGleb Smirnoff 420*62208ca5SGleb Smirnoff /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ 421*62208ca5SGleb Smirnoff while (length > 12) 422*62208ca5SGleb Smirnoff { 423*62208ca5SGleb Smirnoff a += ((uint32_t)k[0])<<24; 424*62208ca5SGleb Smirnoff a += ((uint32_t)k[1])<<16; 425*62208ca5SGleb Smirnoff a += ((uint32_t)k[2])<<8; 426*62208ca5SGleb Smirnoff a += ((uint32_t)k[3]); 427*62208ca5SGleb Smirnoff b += ((uint32_t)k[4])<<24; 428*62208ca5SGleb Smirnoff b += ((uint32_t)k[5])<<16; 429*62208ca5SGleb Smirnoff b += ((uint32_t)k[6])<<8; 430*62208ca5SGleb Smirnoff b += ((uint32_t)k[7]); 431*62208ca5SGleb Smirnoff c += ((uint32_t)k[8])<<24; 432*62208ca5SGleb Smirnoff c += ((uint32_t)k[9])<<16; 433*62208ca5SGleb Smirnoff c += ((uint32_t)k[10])<<8; 434*62208ca5SGleb Smirnoff c += ((uint32_t)k[11]); 435*62208ca5SGleb Smirnoff mix(a,b,c); 436*62208ca5SGleb Smirnoff length -= 12; 437*62208ca5SGleb Smirnoff k += 12; 438*62208ca5SGleb Smirnoff } 439*62208ca5SGleb Smirnoff 440*62208ca5SGleb Smirnoff /*-------------------------------- last block: affect all 32 bits of (c) */ 441*62208ca5SGleb Smirnoff switch(length) /* all the case statements fall through */ 442*62208ca5SGleb Smirnoff { 443*62208ca5SGleb Smirnoff case 12: c+=k[11]; 444*62208ca5SGleb Smirnoff case 11: c+=((uint32_t)k[10])<<8; 445*62208ca5SGleb Smirnoff case 10: c+=((uint32_t)k[9])<<16; 446*62208ca5SGleb Smirnoff case 9 : c+=((uint32_t)k[8])<<24; 447*62208ca5SGleb Smirnoff case 8 : b+=k[7]; 448*62208ca5SGleb Smirnoff case 7 : b+=((uint32_t)k[6])<<8; 449*62208ca5SGleb Smirnoff case 6 : b+=((uint32_t)k[5])<<16; 450*62208ca5SGleb Smirnoff case 5 : b+=((uint32_t)k[4])<<24; 451*62208ca5SGleb Smirnoff case 4 : a+=k[3]; 452*62208ca5SGleb Smirnoff case 3 : a+=((uint32_t)k[2])<<8; 453*62208ca5SGleb Smirnoff case 2 : a+=((uint32_t)k[1])<<16; 454*62208ca5SGleb Smirnoff case 1 : a+=((uint32_t)k[0])<<24; 455*62208ca5SGleb Smirnoff break; 456*62208ca5SGleb Smirnoff case 0 : return c; 457*62208ca5SGleb Smirnoff } 458*62208ca5SGleb Smirnoff } 459*62208ca5SGleb Smirnoff 460*62208ca5SGleb Smirnoff final(a,b,c); 461*62208ca5SGleb Smirnoff return c; 462*62208ca5SGleb Smirnoff } 463*62208ca5SGleb Smirnoff #endif 464