1 /* 2 February 2013(Wouter) patch defines for BSD endianness, from Brad Smith. 3 January 2012(Wouter) added randomised initial value, fallout from 28c3. 4 March 2007(Wouter) adapted from lookup3.c original, add config.h include. 5 added #ifdef VALGRIND to remove 298,384,660 'unused variable k8' warnings. 6 added include of lookup3.h to check definitions match declarations. 7 removed include of stdint - config.h takes care of platform independence. 8 url http://burtleburtle.net/bob/hash/index.html. 9 */ 10 /* 11 ------------------------------------------------------------------------------- 12 lookup3.c, by Bob Jenkins, May 2006, Public Domain. 13 14 These are functions for producing 32-bit hashes for hash table lookup. 15 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() 16 are externally useful functions. Routines to test the hash are included 17 if SELF_TEST is defined. You can use this free for any purpose. It's in 18 the public domain. It has no warranty. 19 20 You probably want to use hashlittle(). hashlittle() and hashbig() 21 hash byte arrays. hashlittle() is is faster than hashbig() on 22 little-endian machines. Intel and AMD are little-endian machines. 23 On second thought, you probably want hashlittle2(), which is identical to 24 hashlittle() except it returns two 32-bit hashes for the price of one. 25 You could implement hashbig2() if you wanted but I haven't bothered here. 26 27 If you want to find a hash of, say, exactly 7 integers, do 28 a = i1; b = i2; c = i3; 29 mix(a,b,c); 30 a += i4; b += i5; c += i6; 31 mix(a,b,c); 32 a += i7; 33 final(a,b,c); 34 then use c as the hash value. If you have a variable length array of 35 4-byte integers to hash, use hashword(). If you have a byte array (like 36 a character string), use hashlittle(). If you have several byte arrays, or 37 a mix of things, see the comments above hashlittle(). 38 39 Why is this so big? I read 12 bytes at a time into 3 4-byte integers, 40 then mix those integers. This is fast (you can do a lot more thorough 41 mixing with 12*3 instructions on 3 integers than you can with 3 instructions 42 on 1 byte), but shoehorning those bytes into integers efficiently is messy. 43 ------------------------------------------------------------------------------- 44 */ 45 /*#define SELF_TEST 1*/ 46 47 #include "config.h" 48 #include "util/storage/lookup3.h" 49 #include <stdio.h> /* defines printf for tests */ 50 #include <time.h> /* defines time_t for timings in the test */ 51 /*#include <stdint.h> defines uint32_t etc (from config.h) */ 52 #include <sys/param.h> /* attempt to define endianness */ 53 #ifdef HAVE_SYS_TYPES_H 54 # include <sys/types.h> /* attempt to define endianness (solaris) */ 55 #endif 56 #ifdef linux 57 # include <endian.h> /* attempt to define endianness */ 58 #endif 59 #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) 60 #include <sys/endian.h> /* attempt to define endianness */ 61 #endif 62 #ifdef __OpenBSD__ 63 #include <machine/endian.h> /* attempt to define endianness */ 64 #endif 65 66 /* random initial value */ 67 static uint32_t raninit = (uint32_t)0xdeadbeef; 68 69 void 70 hash_set_raninit(uint32_t v) 71 { 72 raninit = v; 73 } 74 75 /* 76 * My best guess at if you are big-endian or little-endian. This may 77 * need adjustment. 78 */ 79 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ 80 __BYTE_ORDER == __LITTLE_ENDIAN) || \ 81 (defined(i386) || defined(__i386__) || defined(__i486__) || \ 82 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL) || defined(__x86)) 83 # define HASH_LITTLE_ENDIAN 1 84 # define HASH_BIG_ENDIAN 0 85 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ 86 __BYTE_ORDER == __BIG_ENDIAN) || \ 87 (defined(sparc) || defined(__sparc) || defined(__sparc__) || defined(POWERPC) || defined(mc68000) || defined(sel)) 88 # define HASH_LITTLE_ENDIAN 0 89 # define HASH_BIG_ENDIAN 1 90 #elif defined(_MACHINE_ENDIAN_H_) 91 /* test for machine_endian_h protects failure if some are empty strings */ 92 # if defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) && _BYTE_ORDER == _BIG_ENDIAN 93 # define HASH_LITTLE_ENDIAN 0 94 # define HASH_BIG_ENDIAN 1 95 # endif 96 # if defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && _BYTE_ORDER == _LITTLE_ENDIAN 97 # define HASH_LITTLE_ENDIAN 1 98 # define HASH_BIG_ENDIAN 0 99 # endif /* _MACHINE_ENDIAN_H_ */ 100 #else 101 # define HASH_LITTLE_ENDIAN 0 102 # define HASH_BIG_ENDIAN 0 103 #endif 104 105 #define hashsize(n) ((uint32_t)1<<(n)) 106 #define hashmask(n) (hashsize(n)-1) 107 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) 108 109 /* 110 ------------------------------------------------------------------------------- 111 mix -- mix 3 32-bit values reversibly. 112 113 This is reversible, so any information in (a,b,c) before mix() is 114 still in (a,b,c) after mix(). 115 116 If four pairs of (a,b,c) inputs are run through mix(), or through 117 mix() in reverse, there are at least 32 bits of the output that 118 are sometimes the same for one pair and different for another pair. 119 This was tested for: 120 * pairs that differed by one bit, by two bits, in any combination 121 of top bits of (a,b,c), or in any combination of bottom bits of 122 (a,b,c). 123 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 124 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 125 is commonly produced by subtraction) look like a single 1-bit 126 difference. 127 * the base values were pseudorandom, all zero but one bit set, or 128 all zero plus a counter that starts at zero. 129 130 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that 131 satisfy this are 132 4 6 8 16 19 4 133 9 15 3 18 27 15 134 14 9 3 7 17 3 135 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing 136 for "differ" defined as + with a one-bit base and a two-bit delta. I 137 used http://burtleburtle.net/bob/hash/avalanche.html to choose 138 the operations, constants, and arrangements of the variables. 139 140 This does not achieve avalanche. There are input bits of (a,b,c) 141 that fail to affect some output bits of (a,b,c), especially of a. The 142 most thoroughly mixed value is c, but it doesn't really even achieve 143 avalanche in c. 144 145 This allows some parallelism. Read-after-writes are good at doubling 146 the number of bits affected, so the goal of mixing pulls in the opposite 147 direction as the goal of parallelism. I did what I could. Rotates 148 seem to cost as much as shifts on every machine I could lay my hands 149 on, and rotates are much kinder to the top and bottom bits, so I used 150 rotates. 151 ------------------------------------------------------------------------------- 152 */ 153 #define mix(a,b,c) \ 154 { \ 155 a -= c; a ^= rot(c, 4); c += b; \ 156 b -= a; b ^= rot(a, 6); a += c; \ 157 c -= b; c ^= rot(b, 8); b += a; \ 158 a -= c; a ^= rot(c,16); c += b; \ 159 b -= a; b ^= rot(a,19); a += c; \ 160 c -= b; c ^= rot(b, 4); b += a; \ 161 } 162 163 /* 164 ------------------------------------------------------------------------------- 165 final -- final mixing of 3 32-bit values (a,b,c) into c 166 167 Pairs of (a,b,c) values differing in only a few bits will usually 168 produce values of c that look totally different. This was tested for 169 * pairs that differed by one bit, by two bits, in any combination 170 of top bits of (a,b,c), or in any combination of bottom bits of 171 (a,b,c). 172 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 173 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 174 is commonly produced by subtraction) look like a single 1-bit 175 difference. 176 * the base values were pseudorandom, all zero but one bit set, or 177 all zero plus a counter that starts at zero. 178 179 These constants passed: 180 14 11 25 16 4 14 24 181 12 14 25 16 4 14 24 182 and these came close: 183 4 8 15 26 3 22 24 184 10 8 15 26 3 22 24 185 11 8 15 26 3 22 24 186 ------------------------------------------------------------------------------- 187 */ 188 #define final(a,b,c) \ 189 { \ 190 c ^= b; c -= rot(b,14); \ 191 a ^= c; a -= rot(c,11); \ 192 b ^= a; b -= rot(a,25); \ 193 c ^= b; c -= rot(b,16); \ 194 a ^= c; a -= rot(c,4); \ 195 b ^= a; b -= rot(a,14); \ 196 c ^= b; c -= rot(b,24); \ 197 } 198 199 /* 200 -------------------------------------------------------------------- 201 This works on all machines. To be useful, it requires 202 -- that the key be an array of uint32_t's, and 203 -- that the length be the number of uint32_t's in the key 204 205 The function hashword() is identical to hashlittle() on little-endian 206 machines, and identical to hashbig() on big-endian machines, 207 except that the length has to be measured in uint32_ts rather than in 208 bytes. hashlittle() is more complicated than hashword() only because 209 hashlittle() has to dance around fitting the key bytes into registers. 210 -------------------------------------------------------------------- 211 */ 212 uint32_t hashword( 213 const uint32_t *k, /* the key, an array of uint32_t values */ 214 size_t length, /* the length of the key, in uint32_ts */ 215 uint32_t initval) /* the previous hash, or an arbitrary value */ 216 { 217 uint32_t a,b,c; 218 219 /* Set up the internal state */ 220 a = b = c = raninit + (((uint32_t)length)<<2) + initval; 221 222 /*------------------------------------------------- handle most of the key */ 223 while (length > 3) 224 { 225 a += k[0]; 226 b += k[1]; 227 c += k[2]; 228 mix(a,b,c); 229 length -= 3; 230 k += 3; 231 } 232 233 /*------------------------------------------- handle the last 3 uint32_t's */ 234 switch(length) /* all the case statements fall through */ 235 { 236 case 3 : c+=k[2]; 237 case 2 : b+=k[1]; 238 case 1 : a+=k[0]; 239 final(a,b,c); 240 case 0: /* case 0: nothing left to add */ 241 break; 242 } 243 /*------------------------------------------------------ report the result */ 244 return c; 245 } 246 247 248 #ifdef SELF_TEST 249 250 /* 251 -------------------------------------------------------------------- 252 hashword2() -- same as hashword(), but take two seeds and return two 253 32-bit values. pc and pb must both be nonnull, and *pc and *pb must 254 both be initialized with seeds. If you pass in (*pb)==0, the output 255 (*pc) will be the same as the return value from hashword(). 256 -------------------------------------------------------------------- 257 */ 258 void hashword2 ( 259 const uint32_t *k, /* the key, an array of uint32_t values */ 260 size_t length, /* the length of the key, in uint32_ts */ 261 uint32_t *pc, /* IN: seed OUT: primary hash value */ 262 uint32_t *pb) /* IN: more seed OUT: secondary hash value */ 263 { 264 uint32_t a,b,c; 265 266 /* Set up the internal state */ 267 a = b = c = raninit + ((uint32_t)(length<<2)) + *pc; 268 c += *pb; 269 270 /*------------------------------------------------- handle most of the key */ 271 while (length > 3) 272 { 273 a += k[0]; 274 b += k[1]; 275 c += k[2]; 276 mix(a,b,c); 277 length -= 3; 278 k += 3; 279 } 280 281 /*------------------------------------------- handle the last 3 uint32_t's */ 282 switch(length) /* all the case statements fall through */ 283 { 284 case 3 : c+=k[2]; 285 case 2 : b+=k[1]; 286 case 1 : a+=k[0]; 287 final(a,b,c); 288 case 0: /* case 0: nothing left to add */ 289 break; 290 } 291 /*------------------------------------------------------ report the result */ 292 *pc=c; *pb=b; 293 } 294 295 #endif /* SELF_TEST */ 296 297 /* 298 ------------------------------------------------------------------------------- 299 hashlittle() -- hash a variable-length key into a 32-bit value 300 k : the key (the unaligned variable-length array of bytes) 301 length : the length of the key, counting by bytes 302 initval : can be any 4-byte value 303 Returns a 32-bit value. Every bit of the key affects every bit of 304 the return value. Two keys differing by one or two bits will have 305 totally different hash values. 306 307 The best hash table sizes are powers of 2. There is no need to do 308 mod a prime (mod is sooo slow!). If you need less than 32 bits, 309 use a bitmask. For example, if you need only 10 bits, do 310 h = (h & hashmask(10)); 311 In which case, the hash table should have hashsize(10) elements. 312 313 If you are hashing n strings (uint8_t **)k, do it like this: 314 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h); 315 316 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this 317 code any way you wish, private, educational, or commercial. It's free. 318 319 Use for hash table lookup, or anything where one collision in 2^^32 is 320 acceptable. Do NOT use for cryptographic purposes. 321 ------------------------------------------------------------------------------- 322 */ 323 324 uint32_t hashlittle( const void *key, size_t length, uint32_t initval) 325 { 326 uint32_t a,b,c; /* internal state */ 327 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ 328 329 /* Set up the internal state */ 330 a = b = c = raninit + ((uint32_t)length) + initval; 331 332 u.ptr = key; 333 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { 334 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ 335 #ifdef VALGRIND 336 const uint8_t *k8; 337 #endif 338 339 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ 340 while (length > 12) 341 { 342 a += k[0]; 343 b += k[1]; 344 c += k[2]; 345 mix(a,b,c); 346 length -= 12; 347 k += 3; 348 } 349 350 /*----------------------------- handle the last (probably partial) block */ 351 /* 352 * "k[2]&0xffffff" actually reads beyond the end of the string, but 353 * then masks off the part it's not allowed to read. Because the 354 * string is aligned, the masked-off tail is in the same word as the 355 * rest of the string. Every machine with memory protection I've seen 356 * does it on word boundaries, so is OK with this. But VALGRIND will 357 * still catch it and complain. The masking trick does make the hash 358 * noticably faster for short strings (like English words). 359 */ 360 #ifndef VALGRIND 361 362 switch(length) 363 { 364 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 365 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; 366 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; 367 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; 368 case 8 : b+=k[1]; a+=k[0]; break; 369 case 7 : b+=k[1]&0xffffff; a+=k[0]; break; 370 case 6 : b+=k[1]&0xffff; a+=k[0]; break; 371 case 5 : b+=k[1]&0xff; a+=k[0]; break; 372 case 4 : a+=k[0]; break; 373 case 3 : a+=k[0]&0xffffff; break; 374 case 2 : a+=k[0]&0xffff; break; 375 case 1 : a+=k[0]&0xff; break; 376 case 0 : return c; /* zero length strings require no mixing */ 377 } 378 379 #else /* make valgrind happy */ 380 381 k8 = (const uint8_t *)k; 382 switch(length) 383 { 384 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 385 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 386 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ 387 case 9 : c+=k8[8]; /* fall through */ 388 case 8 : b+=k[1]; a+=k[0]; break; 389 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 390 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ 391 case 5 : b+=k8[4]; /* fall through */ 392 case 4 : a+=k[0]; break; 393 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 394 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ 395 case 1 : a+=k8[0]; break; 396 case 0 : return c; 397 } 398 399 #endif /* !valgrind */ 400 401 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { 402 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ 403 const uint8_t *k8; 404 405 /*--------------- all but last block: aligned reads and different mixing */ 406 while (length > 12) 407 { 408 a += k[0] + (((uint32_t)k[1])<<16); 409 b += k[2] + (((uint32_t)k[3])<<16); 410 c += k[4] + (((uint32_t)k[5])<<16); 411 mix(a,b,c); 412 length -= 12; 413 k += 6; 414 } 415 416 /*----------------------------- handle the last (probably partial) block */ 417 k8 = (const uint8_t *)k; 418 switch(length) 419 { 420 case 12: c+=k[4]+(((uint32_t)k[5])<<16); 421 b+=k[2]+(((uint32_t)k[3])<<16); 422 a+=k[0]+(((uint32_t)k[1])<<16); 423 break; 424 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 425 case 10: c+=k[4]; 426 b+=k[2]+(((uint32_t)k[3])<<16); 427 a+=k[0]+(((uint32_t)k[1])<<16); 428 break; 429 case 9 : c+=k8[8]; /* fall through */ 430 case 8 : b+=k[2]+(((uint32_t)k[3])<<16); 431 a+=k[0]+(((uint32_t)k[1])<<16); 432 break; 433 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 434 case 6 : b+=k[2]; 435 a+=k[0]+(((uint32_t)k[1])<<16); 436 break; 437 case 5 : b+=k8[4]; /* fall through */ 438 case 4 : a+=k[0]+(((uint32_t)k[1])<<16); 439 break; 440 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 441 case 2 : a+=k[0]; 442 break; 443 case 1 : a+=k8[0]; 444 break; 445 case 0 : return c; /* zero length requires no mixing */ 446 } 447 448 } else { /* need to read the key one byte at a time */ 449 const uint8_t *k = (const uint8_t *)key; 450 451 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ 452 while (length > 12) 453 { 454 a += k[0]; 455 a += ((uint32_t)k[1])<<8; 456 a += ((uint32_t)k[2])<<16; 457 a += ((uint32_t)k[3])<<24; 458 b += k[4]; 459 b += ((uint32_t)k[5])<<8; 460 b += ((uint32_t)k[6])<<16; 461 b += ((uint32_t)k[7])<<24; 462 c += k[8]; 463 c += ((uint32_t)k[9])<<8; 464 c += ((uint32_t)k[10])<<16; 465 c += ((uint32_t)k[11])<<24; 466 mix(a,b,c); 467 length -= 12; 468 k += 12; 469 } 470 471 /*-------------------------------- last block: affect all 32 bits of (c) */ 472 switch(length) /* all the case statements fall through */ 473 { 474 case 12: c+=((uint32_t)k[11])<<24; 475 case 11: c+=((uint32_t)k[10])<<16; 476 case 10: c+=((uint32_t)k[9])<<8; 477 case 9 : c+=k[8]; 478 case 8 : b+=((uint32_t)k[7])<<24; 479 case 7 : b+=((uint32_t)k[6])<<16; 480 case 6 : b+=((uint32_t)k[5])<<8; 481 case 5 : b+=k[4]; 482 case 4 : a+=((uint32_t)k[3])<<24; 483 case 3 : a+=((uint32_t)k[2])<<16; 484 case 2 : a+=((uint32_t)k[1])<<8; 485 case 1 : a+=k[0]; 486 break; 487 case 0 : return c; 488 } 489 } 490 491 final(a,b,c); 492 return c; 493 } 494 495 #ifdef SELF_TEST 496 497 /* 498 * hashlittle2: return 2 32-bit hash values 499 * 500 * This is identical to hashlittle(), except it returns two 32-bit hash 501 * values instead of just one. This is good enough for hash table 502 * lookup with 2^^64 buckets, or if you want a second hash if you're not 503 * happy with the first, or if you want a probably-unique 64-bit ID for 504 * the key. *pc is better mixed than *pb, so use *pc first. If you want 505 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)". 506 */ 507 void hashlittle2( 508 const void *key, /* the key to hash */ 509 size_t length, /* length of the key */ 510 uint32_t *pc, /* IN: primary initval, OUT: primary hash */ 511 uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */ 512 { 513 uint32_t a,b,c; /* internal state */ 514 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ 515 516 /* Set up the internal state */ 517 a = b = c = raninit + ((uint32_t)length) + *pc; 518 c += *pb; 519 520 u.ptr = key; 521 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { 522 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ 523 #ifdef VALGRIND 524 const uint8_t *k8; 525 #endif 526 527 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ 528 while (length > 12) 529 { 530 a += k[0]; 531 b += k[1]; 532 c += k[2]; 533 mix(a,b,c); 534 length -= 12; 535 k += 3; 536 } 537 538 /*----------------------------- handle the last (probably partial) block */ 539 /* 540 * "k[2]&0xffffff" actually reads beyond the end of the string, but 541 * then masks off the part it's not allowed to read. Because the 542 * string is aligned, the masked-off tail is in the same word as the 543 * rest of the string. Every machine with memory protection I've seen 544 * does it on word boundaries, so is OK with this. But VALGRIND will 545 * still catch it and complain. The masking trick does make the hash 546 * noticably faster for short strings (like English words). 547 */ 548 #ifndef VALGRIND 549 550 switch(length) 551 { 552 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 553 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; 554 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; 555 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; 556 case 8 : b+=k[1]; a+=k[0]; break; 557 case 7 : b+=k[1]&0xffffff; a+=k[0]; break; 558 case 6 : b+=k[1]&0xffff; a+=k[0]; break; 559 case 5 : b+=k[1]&0xff; a+=k[0]; break; 560 case 4 : a+=k[0]; break; 561 case 3 : a+=k[0]&0xffffff; break; 562 case 2 : a+=k[0]&0xffff; break; 563 case 1 : a+=k[0]&0xff; break; 564 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ 565 } 566 567 #else /* make valgrind happy */ 568 569 k8 = (const uint8_t *)k; 570 switch(length) 571 { 572 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 573 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 574 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ 575 case 9 : c+=k8[8]; /* fall through */ 576 case 8 : b+=k[1]; a+=k[0]; break; 577 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 578 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ 579 case 5 : b+=k8[4]; /* fall through */ 580 case 4 : a+=k[0]; break; 581 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 582 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ 583 case 1 : a+=k8[0]; break; 584 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ 585 } 586 587 #endif /* !valgrind */ 588 589 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { 590 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ 591 const uint8_t *k8; 592 593 /*--------------- all but last block: aligned reads and different mixing */ 594 while (length > 12) 595 { 596 a += k[0] + (((uint32_t)k[1])<<16); 597 b += k[2] + (((uint32_t)k[3])<<16); 598 c += k[4] + (((uint32_t)k[5])<<16); 599 mix(a,b,c); 600 length -= 12; 601 k += 6; 602 } 603 604 /*----------------------------- handle the last (probably partial) block */ 605 k8 = (const uint8_t *)k; 606 switch(length) 607 { 608 case 12: c+=k[4]+(((uint32_t)k[5])<<16); 609 b+=k[2]+(((uint32_t)k[3])<<16); 610 a+=k[0]+(((uint32_t)k[1])<<16); 611 break; 612 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 613 case 10: c+=k[4]; 614 b+=k[2]+(((uint32_t)k[3])<<16); 615 a+=k[0]+(((uint32_t)k[1])<<16); 616 break; 617 case 9 : c+=k8[8]; /* fall through */ 618 case 8 : b+=k[2]+(((uint32_t)k[3])<<16); 619 a+=k[0]+(((uint32_t)k[1])<<16); 620 break; 621 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 622 case 6 : b+=k[2]; 623 a+=k[0]+(((uint32_t)k[1])<<16); 624 break; 625 case 5 : b+=k8[4]; /* fall through */ 626 case 4 : a+=k[0]+(((uint32_t)k[1])<<16); 627 break; 628 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 629 case 2 : a+=k[0]; 630 break; 631 case 1 : a+=k8[0]; 632 break; 633 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ 634 } 635 636 } else { /* need to read the key one byte at a time */ 637 const uint8_t *k = (const uint8_t *)key; 638 639 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ 640 while (length > 12) 641 { 642 a += k[0]; 643 a += ((uint32_t)k[1])<<8; 644 a += ((uint32_t)k[2])<<16; 645 a += ((uint32_t)k[3])<<24; 646 b += k[4]; 647 b += ((uint32_t)k[5])<<8; 648 b += ((uint32_t)k[6])<<16; 649 b += ((uint32_t)k[7])<<24; 650 c += k[8]; 651 c += ((uint32_t)k[9])<<8; 652 c += ((uint32_t)k[10])<<16; 653 c += ((uint32_t)k[11])<<24; 654 mix(a,b,c); 655 length -= 12; 656 k += 12; 657 } 658 659 /*-------------------------------- last block: affect all 32 bits of (c) */ 660 switch(length) /* all the case statements fall through */ 661 { 662 case 12: c+=((uint32_t)k[11])<<24; 663 case 11: c+=((uint32_t)k[10])<<16; 664 case 10: c+=((uint32_t)k[9])<<8; 665 case 9 : c+=k[8]; 666 case 8 : b+=((uint32_t)k[7])<<24; 667 case 7 : b+=((uint32_t)k[6])<<16; 668 case 6 : b+=((uint32_t)k[5])<<8; 669 case 5 : b+=k[4]; 670 case 4 : a+=((uint32_t)k[3])<<24; 671 case 3 : a+=((uint32_t)k[2])<<16; 672 case 2 : a+=((uint32_t)k[1])<<8; 673 case 1 : a+=k[0]; 674 break; 675 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ 676 } 677 } 678 679 final(a,b,c); 680 *pc=c; *pb=b; 681 } 682 683 #endif /* SELF_TEST */ 684 685 #if 0 /* currently not used */ 686 687 /* 688 * hashbig(): 689 * This is the same as hashword() on big-endian machines. It is different 690 * from hashlittle() on all machines. hashbig() takes advantage of 691 * big-endian byte ordering. 692 */ 693 uint32_t hashbig( const void *key, size_t length, uint32_t initval) 694 { 695 uint32_t a,b,c; 696 union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */ 697 698 /* Set up the internal state */ 699 a = b = c = raninit + ((uint32_t)length) + initval; 700 701 u.ptr = key; 702 if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { 703 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ 704 #ifdef VALGRIND 705 const uint8_t *k8; 706 #endif 707 708 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ 709 while (length > 12) 710 { 711 a += k[0]; 712 b += k[1]; 713 c += k[2]; 714 mix(a,b,c); 715 length -= 12; 716 k += 3; 717 } 718 719 /*----------------------------- handle the last (probably partial) block */ 720 /* 721 * "k[2]<<8" actually reads beyond the end of the string, but 722 * then shifts out the part it's not allowed to read. Because the 723 * string is aligned, the illegal read is in the same word as the 724 * rest of the string. Every machine with memory protection I've seen 725 * does it on word boundaries, so is OK with this. But VALGRIND will 726 * still catch it and complain. The masking trick does make the hash 727 * noticably faster for short strings (like English words). 728 */ 729 #ifndef VALGRIND 730 731 switch(length) 732 { 733 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 734 case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break; 735 case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break; 736 case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break; 737 case 8 : b+=k[1]; a+=k[0]; break; 738 case 7 : b+=k[1]&0xffffff00; a+=k[0]; break; 739 case 6 : b+=k[1]&0xffff0000; a+=k[0]; break; 740 case 5 : b+=k[1]&0xff000000; a+=k[0]; break; 741 case 4 : a+=k[0]; break; 742 case 3 : a+=k[0]&0xffffff00; break; 743 case 2 : a+=k[0]&0xffff0000; break; 744 case 1 : a+=k[0]&0xff000000; break; 745 case 0 : return c; /* zero length strings require no mixing */ 746 } 747 748 #else /* make valgrind happy */ 749 750 k8 = (const uint8_t *)k; 751 switch(length) /* all the case statements fall through */ 752 { 753 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 754 case 11: c+=((uint32_t)k8[10])<<8; /* fall through */ 755 case 10: c+=((uint32_t)k8[9])<<16; /* fall through */ 756 case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */ 757 case 8 : b+=k[1]; a+=k[0]; break; 758 case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */ 759 case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */ 760 case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */ 761 case 4 : a+=k[0]; break; 762 case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */ 763 case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */ 764 case 1 : a+=((uint32_t)k8[0])<<24; break; 765 case 0 : return c; 766 } 767 768 #endif /* !VALGRIND */ 769 770 } else { /* need to read the key one byte at a time */ 771 const uint8_t *k = (const uint8_t *)key; 772 773 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ 774 while (length > 12) 775 { 776 a += ((uint32_t)k[0])<<24; 777 a += ((uint32_t)k[1])<<16; 778 a += ((uint32_t)k[2])<<8; 779 a += ((uint32_t)k[3]); 780 b += ((uint32_t)k[4])<<24; 781 b += ((uint32_t)k[5])<<16; 782 b += ((uint32_t)k[6])<<8; 783 b += ((uint32_t)k[7]); 784 c += ((uint32_t)k[8])<<24; 785 c += ((uint32_t)k[9])<<16; 786 c += ((uint32_t)k[10])<<8; 787 c += ((uint32_t)k[11]); 788 mix(a,b,c); 789 length -= 12; 790 k += 12; 791 } 792 793 /*-------------------------------- last block: affect all 32 bits of (c) */ 794 switch(length) /* all the case statements fall through */ 795 { 796 case 12: c+=k[11]; 797 case 11: c+=((uint32_t)k[10])<<8; 798 case 10: c+=((uint32_t)k[9])<<16; 799 case 9 : c+=((uint32_t)k[8])<<24; 800 case 8 : b+=k[7]; 801 case 7 : b+=((uint32_t)k[6])<<8; 802 case 6 : b+=((uint32_t)k[5])<<16; 803 case 5 : b+=((uint32_t)k[4])<<24; 804 case 4 : a+=k[3]; 805 case 3 : a+=((uint32_t)k[2])<<8; 806 case 2 : a+=((uint32_t)k[1])<<16; 807 case 1 : a+=((uint32_t)k[0])<<24; 808 break; 809 case 0 : return c; 810 } 811 } 812 813 final(a,b,c); 814 return c; 815 } 816 817 #endif /* 0 == currently not used */ 818 819 #ifdef SELF_TEST 820 821 /* used for timings */ 822 void driver1() 823 { 824 uint8_t buf[256]; 825 uint32_t i; 826 uint32_t h=0; 827 time_t a,z; 828 829 time(&a); 830 for (i=0; i<256; ++i) buf[i] = 'x'; 831 for (i=0; i<1; ++i) 832 { 833 h = hashlittle(&buf[0],1,h); 834 } 835 time(&z); 836 if (z-a > 0) printf("time %d %.8x\n", z-a, h); 837 } 838 839 /* check that every input bit changes every output bit half the time */ 840 #define HASHSTATE 1 841 #define HASHLEN 1 842 #define MAXPAIR 60 843 #define MAXLEN 70 844 void driver2() 845 { 846 uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1]; 847 uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z; 848 uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE]; 849 uint32_t x[HASHSTATE],y[HASHSTATE]; 850 uint32_t hlen; 851 852 printf("No more than %d trials should ever be needed \n",MAXPAIR/2); 853 for (hlen=0; hlen < MAXLEN; ++hlen) 854 { 855 z=0; 856 for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */ 857 { 858 for (j=0; j<8; ++j) /*------------------------ for each input bit, */ 859 { 860 for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */ 861 { 862 for (l=0; l<HASHSTATE; ++l) 863 e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0); 864 865 /*---- check that every output bit is affected by that input bit */ 866 for (k=0; k<MAXPAIR; k+=2) 867 { 868 uint32_t finished=1; 869 /* keys have one bit different */ 870 for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;} 871 /* have a and b be two keys differing in only one bit */ 872 a[i] ^= (k<<j); 873 a[i] ^= (k>>(8-j)); 874 c[0] = hashlittle(a, hlen, m); 875 b[i] ^= ((k+1)<<j); 876 b[i] ^= ((k+1)>>(8-j)); 877 d[0] = hashlittle(b, hlen, m); 878 /* check every bit is 1, 0, set, and not set at least once */ 879 for (l=0; l<HASHSTATE; ++l) 880 { 881 e[l] &= (c[l]^d[l]); 882 f[l] &= ~(c[l]^d[l]); 883 g[l] &= c[l]; 884 h[l] &= ~c[l]; 885 x[l] &= d[l]; 886 y[l] &= ~d[l]; 887 if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0; 888 } 889 if (finished) break; 890 } 891 if (k>z) z=k; 892 if (k==MAXPAIR) 893 { 894 printf("Some bit didn't change: "); 895 printf("%.8x %.8x %.8x %.8x %.8x %.8x ", 896 e[0],f[0],g[0],h[0],x[0],y[0]); 897 printf("i %d j %d m %d len %d\n", i, j, m, hlen); 898 } 899 if (z==MAXPAIR) goto done; 900 } 901 } 902 } 903 done: 904 if (z < MAXPAIR) 905 { 906 printf("Mix success %2d bytes %2d initvals ",i,m); 907 printf("required %d trials\n", z/2); 908 } 909 } 910 printf("\n"); 911 } 912 913 /* Check for reading beyond the end of the buffer and alignment problems */ 914 void driver3() 915 { 916 uint8_t buf[MAXLEN+20], *b; 917 uint32_t len; 918 uint8_t q[] = "This is the time for all good men to come to the aid of their country..."; 919 uint32_t h; 920 uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country..."; 921 uint32_t i; 922 uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country..."; 923 uint32_t j; 924 uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country..."; 925 uint32_t ref,x,y; 926 uint8_t *p; 927 928 printf("Endianness. These lines should all be the same (for values filled in):\n"); 929 printf("%.8x %.8x %.8x\n", 930 hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13), 931 hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13), 932 hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13)); 933 p = q; 934 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", 935 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), 936 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), 937 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), 938 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), 939 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), 940 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); 941 p = &qq[1]; 942 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", 943 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), 944 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), 945 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), 946 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), 947 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), 948 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); 949 p = &qqq[2]; 950 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", 951 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), 952 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), 953 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), 954 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), 955 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), 956 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); 957 p = &qqqq[3]; 958 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", 959 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), 960 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), 961 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), 962 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), 963 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), 964 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); 965 printf("\n"); 966 967 /* check that hashlittle2 and hashlittle produce the same results */ 968 i=47; j=0; 969 hashlittle2(q, sizeof(q), &i, &j); 970 if (hashlittle(q, sizeof(q), 47) != i) 971 printf("hashlittle2 and hashlittle mismatch\n"); 972 973 /* check that hashword2 and hashword produce the same results */ 974 len = raninit; 975 i=47, j=0; 976 hashword2(&len, 1, &i, &j); 977 if (hashword(&len, 1, 47) != i) 978 printf("hashword2 and hashword mismatch %x %x\n", 979 i, hashword(&len, 1, 47)); 980 981 /* check hashlittle doesn't read before or after the ends of the string */ 982 for (h=0, b=buf+1; h<8; ++h, ++b) 983 { 984 for (i=0; i<MAXLEN; ++i) 985 { 986 len = i; 987 for (j=0; j<i; ++j) *(b+j)=0; 988 989 /* these should all be equal */ 990 ref = hashlittle(b, len, (uint32_t)1); 991 *(b+i)=(uint8_t)~0; 992 *(b-1)=(uint8_t)~0; 993 x = hashlittle(b, len, (uint32_t)1); 994 y = hashlittle(b, len, (uint32_t)1); 995 if ((ref != x) || (ref != y)) 996 { 997 printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y, 998 h, i); 999 } 1000 } 1001 } 1002 } 1003 1004 /* check for problems with nulls */ 1005 void driver4() 1006 { 1007 uint8_t buf[1]; 1008 uint32_t h,i,state[HASHSTATE]; 1009 1010 1011 buf[0] = ~0; 1012 for (i=0; i<HASHSTATE; ++i) state[i] = 1; 1013 printf("These should all be different\n"); 1014 for (i=0, h=0; i<8; ++i) 1015 { 1016 h = hashlittle(buf, 0, h); 1017 printf("%2ld 0-byte strings, hash is %.8x\n", i, h); 1018 } 1019 } 1020 1021 1022 int main() 1023 { 1024 driver1(); /* test that the key is hashed: used for timings */ 1025 driver2(); /* test that whole key is hashed thoroughly */ 1026 driver3(); /* test that nothing but the key is hashed */ 1027 driver4(); /* test hashing multiple buffers (all buffers are null) */ 1028 return 1; 1029 } 1030 1031 #endif /* SELF_TEST */ 1032