xref: /freebsd/contrib/unbound/util/storage/lookup3.c (revision 63f537551380d2dab29fa402ad1269feae17e594)
1 /*
2   May 2019(Wouter) patch to enable the valgrind clean implementation all the
3      time.  This enables better security audit and checks, which is better
4      than the speedup.  Git issue #30.  Renamed the define ARRAY_CLEAN_ACCESS.
5   February 2013(Wouter) patch defines for BSD endianness, from Brad Smith.
6   January 2012(Wouter) added randomised initial value, fallout from 28c3.
7   March 2007(Wouter) adapted from lookup3.c original, add config.h include.
8      added #ifdef VALGRIND to remove 298,384,660 'unused variable k8' warnings.
9      added include of lookup3.h to check definitions match declarations.
10      removed include of stdint - config.h takes care of platform independence.
11      added fallthrough comments for new gcc warning suppression.
12   url http://burtleburtle.net/bob/hash/index.html.
13 */
14 /*
15 -------------------------------------------------------------------------------
16 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
17 
18 These are functions for producing 32-bit hashes for hash table lookup.
19 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
20 are externally useful functions.  Routines to test the hash are included
21 if SELF_TEST is defined.  You can use this free for any purpose.  It's in
22 the public domain.  It has no warranty.
23 
24 You probably want to use hashlittle().  hashlittle() and hashbig()
25 hash byte arrays.  hashlittle() is is faster than hashbig() on
26 little-endian machines.  Intel and AMD are little-endian machines.
27 On second thought, you probably want hashlittle2(), which is identical to
28 hashlittle() except it returns two 32-bit hashes for the price of one.
29 You could implement hashbig2() if you wanted but I haven't bothered here.
30 
31 If you want to find a hash of, say, exactly 7 integers, do
32   a = i1;  b = i2;  c = i3;
33   mix(a,b,c);
34   a += i4; b += i5; c += i6;
35   mix(a,b,c);
36   a += i7;
37   final(a,b,c);
38 then use c as the hash value.  If you have a variable length array of
39 4-byte integers to hash, use hashword().  If you have a byte array (like
40 a character string), use hashlittle().  If you have several byte arrays, or
41 a mix of things, see the comments above hashlittle().
42 
43 Why is this so big?  I read 12 bytes at a time into 3 4-byte integers,
44 then mix those integers.  This is fast (you can do a lot more thorough
45 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
46 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
47 -------------------------------------------------------------------------------
48 */
49 /*#define SELF_TEST 1*/
50 #define ARRAY_CLEAN_ACCESS 1
51 
52 #include "config.h"
53 #include "util/storage/lookup3.h"
54 #include <stdio.h>      /* defines printf for tests */
55 #include <time.h>       /* defines time_t for timings in the test */
56 
57 /*
58  * If our build system provides endianness info, signalled by
59  * HAVE_TARGET_ENDIANNESS and the presence or absence of TARGET_IS_BIG_ENDIAN,
60  * use that. Otherwise try to work out the endianness.
61  */
62 #if defined(HAVE_TARGET_ENDIANNESS)
63 # if defined(TARGET_IS_BIG_ENDIAN)
64 #  define HASH_LITTLE_ENDIAN 0
65 #  define HASH_BIG_ENDIAN 1
66 # else
67 #  define HASH_LITTLE_ENDIAN 1
68 #  define HASH_BIG_ENDIAN 0
69 # endif
70 #else
71 # include <sys/param.h>  /* attempt to define endianness */
72 # ifdef HAVE_SYS_TYPES_H
73 #  include <sys/types.h> /* attempt to define endianness (solaris) */
74 # endif
75 # if defined(linux) || defined(__OpenBSD__)
76 #  ifdef HAVE_ENDIAN_H
77 #    include <endian.h>    /* attempt to define endianness */
78 #  else
79 #    include <machine/endian.h> /* on older OpenBSD */
80 #  endif
81 # endif
82 # if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
83 #  include <sys/endian.h> /* attempt to define endianness */
84 # endif
85   /*
86    * My best guess at if you are big-endian or little-endian.  This may
87    * need adjustment.
88    */
89 # if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
90       __BYTE_ORDER == __LITTLE_ENDIAN) || \
91      (defined(i386) || defined(__i386__) || defined(__i486__) || \
92       defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL) || defined(__x86))
93 #  define HASH_LITTLE_ENDIAN 1
94 #  define HASH_BIG_ENDIAN 0
95 # elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
96         __BYTE_ORDER == __BIG_ENDIAN) || \
97        (defined(sparc) || defined(__sparc) || defined(__sparc__) || defined(POWERPC) || defined(mc68000) || defined(sel))
98 #  define HASH_LITTLE_ENDIAN 0
99 #  define HASH_BIG_ENDIAN 1
100 # elif defined(_MACHINE_ENDIAN_H_)
101   /* test for machine_endian_h protects failure if some are empty strings */
102 #  if defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) && _BYTE_ORDER == _BIG_ENDIAN
103 #   define HASH_LITTLE_ENDIAN 0
104 #   define HASH_BIG_ENDIAN 1
105 #  endif
106 #  if defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && _BYTE_ORDER == _LITTLE_ENDIAN
107 #   define HASH_LITTLE_ENDIAN 1
108 #   define HASH_BIG_ENDIAN 0
109 #  endif /* _MACHINE_ENDIAN_H_ */
110 # else
111 #  define HASH_LITTLE_ENDIAN 0
112 #  define HASH_BIG_ENDIAN 0
113 # endif
114 #endif /* defined(HAVE_TARGET_ENDIANNESS) */
115 
116 #define hashsize(n) ((uint32_t)1<<(n))
117 #define hashmask(n) (hashsize(n)-1)
118 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
119 
120 /* random initial value */
121 static uint32_t raninit = (uint32_t)0xdeadbeef;
122 
123 void
124 hash_set_raninit(uint32_t v)
125 {
126 	raninit = v;
127 }
128 
129 /*
130 -------------------------------------------------------------------------------
131 mix -- mix 3 32-bit values reversibly.
132 
133 This is reversible, so any information in (a,b,c) before mix() is
134 still in (a,b,c) after mix().
135 
136 If four pairs of (a,b,c) inputs are run through mix(), or through
137 mix() in reverse, there are at least 32 bits of the output that
138 are sometimes the same for one pair and different for another pair.
139 This was tested for:
140 * pairs that differed by one bit, by two bits, in any combination
141   of top bits of (a,b,c), or in any combination of bottom bits of
142   (a,b,c).
143 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
144   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
145   is commonly produced by subtraction) look like a single 1-bit
146   difference.
147 * the base values were pseudorandom, all zero but one bit set, or
148   all zero plus a counter that starts at zero.
149 
150 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
151 satisfy this are
152     4  6  8 16 19  4
153     9 15  3 18 27 15
154    14  9  3  7 17  3
155 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
156 for "differ" defined as + with a one-bit base and a two-bit delta.  I
157 used http://burtleburtle.net/bob/hash/avalanche.html to choose
158 the operations, constants, and arrangements of the variables.
159 
160 This does not achieve avalanche.  There are input bits of (a,b,c)
161 that fail to affect some output bits of (a,b,c), especially of a.  The
162 most thoroughly mixed value is c, but it doesn't really even achieve
163 avalanche in c.
164 
165 This allows some parallelism.  Read-after-writes are good at doubling
166 the number of bits affected, so the goal of mixing pulls in the opposite
167 direction as the goal of parallelism.  I did what I could.  Rotates
168 seem to cost as much as shifts on every machine I could lay my hands
169 on, and rotates are much kinder to the top and bottom bits, so I used
170 rotates.
171 -------------------------------------------------------------------------------
172 */
173 #define mix(a,b,c) \
174 { \
175   a -= c;  a ^= rot(c, 4);  c += b; \
176   b -= a;  b ^= rot(a, 6);  a += c; \
177   c -= b;  c ^= rot(b, 8);  b += a; \
178   a -= c;  a ^= rot(c,16);  c += b; \
179   b -= a;  b ^= rot(a,19);  a += c; \
180   c -= b;  c ^= rot(b, 4);  b += a; \
181 }
182 
183 /*
184 -------------------------------------------------------------------------------
185 final -- final mixing of 3 32-bit values (a,b,c) into c
186 
187 Pairs of (a,b,c) values differing in only a few bits will usually
188 produce values of c that look totally different.  This was tested for
189 * pairs that differed by one bit, by two bits, in any combination
190   of top bits of (a,b,c), or in any combination of bottom bits of
191   (a,b,c).
192 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
193   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
194   is commonly produced by subtraction) look like a single 1-bit
195   difference.
196 * the base values were pseudorandom, all zero but one bit set, or
197   all zero plus a counter that starts at zero.
198 
199 These constants passed:
200  14 11 25 16 4 14 24
201  12 14 25 16 4 14 24
202 and these came close:
203   4  8 15 26 3 22 24
204  10  8 15 26 3 22 24
205  11  8 15 26 3 22 24
206 -------------------------------------------------------------------------------
207 */
208 #define final(a,b,c) \
209 { \
210   c ^= b; c -= rot(b,14); \
211   a ^= c; a -= rot(c,11); \
212   b ^= a; b -= rot(a,25); \
213   c ^= b; c -= rot(b,16); \
214   a ^= c; a -= rot(c,4);  \
215   b ^= a; b -= rot(a,14); \
216   c ^= b; c -= rot(b,24); \
217 }
218 
219 /*
220 --------------------------------------------------------------------
221  This works on all machines.  To be useful, it requires
222  -- that the key be an array of uint32_t's, and
223  -- that the length be the number of uint32_t's in the key
224 
225  The function hashword() is identical to hashlittle() on little-endian
226  machines, and identical to hashbig() on big-endian machines,
227  except that the length has to be measured in uint32_ts rather than in
228  bytes.  hashlittle() is more complicated than hashword() only because
229  hashlittle() has to dance around fitting the key bytes into registers.
230 --------------------------------------------------------------------
231 */
232 uint32_t hashword(
233 const uint32_t *k,                   /* the key, an array of uint32_t values */
234 size_t          length,               /* the length of the key, in uint32_ts */
235 uint32_t        initval)         /* the previous hash, or an arbitrary value */
236 {
237   uint32_t a,b,c;
238 
239   /* Set up the internal state */
240   a = b = c = raninit + (((uint32_t)length)<<2) + initval;
241 
242   /*------------------------------------------------- handle most of the key */
243   while (length > 3)
244   {
245     a += k[0];
246     b += k[1];
247     c += k[2];
248     mix(a,b,c);
249     length -= 3;
250     k += 3;
251   }
252 
253   /*------------------------------------------- handle the last 3 uint32_t's */
254   switch(length)                     /* all the case statements fall through */
255   {
256   case 3 : c+=k[2];
257   	/* fallthrough */
258   case 2 : b+=k[1];
259   	/* fallthrough */
260   case 1 : a+=k[0];
261     final(a,b,c);
262   case 0:     /* case 0: nothing left to add */
263     break;
264   }
265   /*------------------------------------------------------ report the result */
266   return c;
267 }
268 
269 
270 #ifdef SELF_TEST
271 
272 /*
273 --------------------------------------------------------------------
274 hashword2() -- same as hashword(), but take two seeds and return two
275 32-bit values.  pc and pb must both be nonnull, and *pc and *pb must
276 both be initialized with seeds.  If you pass in (*pb)==0, the output
277 (*pc) will be the same as the return value from hashword().
278 --------------------------------------------------------------------
279 */
280 void hashword2 (
281 const uint32_t *k,                   /* the key, an array of uint32_t values */
282 size_t          length,               /* the length of the key, in uint32_ts */
283 uint32_t       *pc,                      /* IN: seed OUT: primary hash value */
284 uint32_t       *pb)               /* IN: more seed OUT: secondary hash value */
285 {
286   uint32_t a,b,c;
287 
288   /* Set up the internal state */
289   a = b = c = raninit + ((uint32_t)(length<<2)) + *pc;
290   c += *pb;
291 
292   /*------------------------------------------------- handle most of the key */
293   while (length > 3)
294   {
295     a += k[0];
296     b += k[1];
297     c += k[2];
298     mix(a,b,c);
299     length -= 3;
300     k += 3;
301   }
302 
303   /*------------------------------------------- handle the last 3 uint32_t's */
304   switch(length)                     /* all the case statements fall through */
305   {
306   case 3 : c+=k[2];
307   case 2 : b+=k[1];
308   case 1 : a+=k[0];
309     final(a,b,c);
310   case 0:     /* case 0: nothing left to add */
311     break;
312   }
313   /*------------------------------------------------------ report the result */
314   *pc=c; *pb=b;
315 }
316 
317 #endif /* SELF_TEST */
318 
319 /*
320 -------------------------------------------------------------------------------
321 hashlittle() -- hash a variable-length key into a 32-bit value
322   k       : the key (the unaligned variable-length array of bytes)
323   length  : the length of the key, counting by bytes
324   initval : can be any 4-byte value
325 Returns a 32-bit value.  Every bit of the key affects every bit of
326 the return value.  Two keys differing by one or two bits will have
327 totally different hash values.
328 
329 The best hash table sizes are powers of 2.  There is no need to do
330 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
331 use a bitmask.  For example, if you need only 10 bits, do
332   h = (h & hashmask(10));
333 In which case, the hash table should have hashsize(10) elements.
334 
335 If you are hashing n strings (uint8_t **)k, do it like this:
336   for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
337 
338 By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
339 code any way you wish, private, educational, or commercial.  It's free.
340 
341 Use for hash table lookup, or anything where one collision in 2^^32 is
342 acceptable.  Do NOT use for cryptographic purposes.
343 -------------------------------------------------------------------------------
344 */
345 
346 uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
347 {
348   uint32_t a,b,c;                                          /* internal state */
349   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
350 
351   /* Set up the internal state */
352   a = b = c = raninit + ((uint32_t)length) + initval;
353 
354   u.ptr = key;
355   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
356     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
357 #ifdef ARRAY_CLEAN_ACCESS
358     const uint8_t  *k8;
359 #endif
360 
361     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
362     while (length > 12)
363     {
364       a += k[0];
365       b += k[1];
366       c += k[2];
367       mix(a,b,c);
368       length -= 12;
369       k += 3;
370     }
371 
372     /*----------------------------- handle the last (probably partial) block */
373     /*
374      * "k[2]&0xffffff" actually reads beyond the end of the string, but
375      * then masks off the part it's not allowed to read.  Because the
376      * string is aligned, the masked-off tail is in the same word as the
377      * rest of the string.  Every machine with memory protection I've seen
378      * does it on word boundaries, so is OK with this.  But VALGRIND will
379      * still catch it and complain.  The masking trick does make the hash
380      * noticeably faster for short strings (like English words).
381      */
382 #ifndef ARRAY_CLEAN_ACCESS
383 
384     switch(length)
385     {
386     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
387     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
388     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
389     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
390     case 8 : b+=k[1]; a+=k[0]; break;
391     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
392     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
393     case 5 : b+=k[1]&0xff; a+=k[0]; break;
394     case 4 : a+=k[0]; break;
395     case 3 : a+=k[0]&0xffffff; break;
396     case 2 : a+=k[0]&0xffff; break;
397     case 1 : a+=k[0]&0xff; break;
398     case 0 : return c;              /* zero length strings require no mixing */
399     }
400 
401 #else /* make valgrind happy */
402 
403     k8 = (const uint8_t *)k;
404     switch(length)
405     {
406     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
407     case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
408     case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
409     case 9 : c+=k8[8];                   /* fall through */
410     case 8 : b+=k[1]; a+=k[0]; break;
411     case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
412     case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
413     case 5 : b+=k8[4];                   /* fall through */
414     case 4 : a+=k[0]; break;
415     case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
416     case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
417     case 1 : a+=k8[0]; break;
418     case 0 : return c;
419     }
420 
421 #endif /* !valgrind */
422 
423   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
424     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
425     const uint8_t  *k8;
426 
427     /*--------------- all but last block: aligned reads and different mixing */
428     while (length > 12)
429     {
430       a += k[0] + (((uint32_t)k[1])<<16);
431       b += k[2] + (((uint32_t)k[3])<<16);
432       c += k[4] + (((uint32_t)k[5])<<16);
433       mix(a,b,c);
434       length -= 12;
435       k += 6;
436     }
437 
438     /*----------------------------- handle the last (probably partial) block */
439     k8 = (const uint8_t *)k;
440     switch(length)
441     {
442     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
443              b+=k[2]+(((uint32_t)k[3])<<16);
444              a+=k[0]+(((uint32_t)k[1])<<16);
445              break;
446     case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
447     case 10: c+=k[4];
448              b+=k[2]+(((uint32_t)k[3])<<16);
449              a+=k[0]+(((uint32_t)k[1])<<16);
450              break;
451     case 9 : c+=k8[8];                      /* fall through */
452     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
453              a+=k[0]+(((uint32_t)k[1])<<16);
454              break;
455     case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
456     case 6 : b+=k[2];
457              a+=k[0]+(((uint32_t)k[1])<<16);
458              break;
459     case 5 : b+=k8[4];                      /* fall through */
460     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
461              break;
462     case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
463     case 2 : a+=k[0];
464              break;
465     case 1 : a+=k8[0];
466              break;
467     case 0 : return c;                     /* zero length requires no mixing */
468     }
469 
470   } else {                        /* need to read the key one byte at a time */
471     const uint8_t *k = (const uint8_t *)key;
472 
473     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
474     while (length > 12)
475     {
476       a += k[0];
477       a += ((uint32_t)k[1])<<8;
478       a += ((uint32_t)k[2])<<16;
479       a += ((uint32_t)k[3])<<24;
480       b += k[4];
481       b += ((uint32_t)k[5])<<8;
482       b += ((uint32_t)k[6])<<16;
483       b += ((uint32_t)k[7])<<24;
484       c += k[8];
485       c += ((uint32_t)k[9])<<8;
486       c += ((uint32_t)k[10])<<16;
487       c += ((uint32_t)k[11])<<24;
488       mix(a,b,c);
489       length -= 12;
490       k += 12;
491     }
492 
493     /*-------------------------------- last block: affect all 32 bits of (c) */
494     switch(length)                   /* all the case statements fall through */
495     {
496     case 12: c+=((uint32_t)k[11])<<24;
497   	/* fallthrough */
498     case 11: c+=((uint32_t)k[10])<<16;
499   	/* fallthrough */
500     case 10: c+=((uint32_t)k[9])<<8;
501   	/* fallthrough */
502     case 9 : c+=k[8];
503   	/* fallthrough */
504     case 8 : b+=((uint32_t)k[7])<<24;
505   	/* fallthrough */
506     case 7 : b+=((uint32_t)k[6])<<16;
507   	/* fallthrough */
508     case 6 : b+=((uint32_t)k[5])<<8;
509   	/* fallthrough */
510     case 5 : b+=k[4];
511   	/* fallthrough */
512     case 4 : a+=((uint32_t)k[3])<<24;
513   	/* fallthrough */
514     case 3 : a+=((uint32_t)k[2])<<16;
515   	/* fallthrough */
516     case 2 : a+=((uint32_t)k[1])<<8;
517   	/* fallthrough */
518     case 1 : a+=k[0];
519              break;
520     case 0 : return c;
521     }
522   }
523 
524   final(a,b,c);
525   return c;
526 }
527 
528 #ifdef SELF_TEST
529 
530 /*
531  * hashlittle2: return 2 32-bit hash values
532  *
533  * This is identical to hashlittle(), except it returns two 32-bit hash
534  * values instead of just one.  This is good enough for hash table
535  * lookup with 2^^64 buckets, or if you want a second hash if you're not
536  * happy with the first, or if you want a probably-unique 64-bit ID for
537  * the key.  *pc is better mixed than *pb, so use *pc first.  If you want
538  * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
539  */
540 void hashlittle2(
541   const void *key,       /* the key to hash */
542   size_t      length,    /* length of the key */
543   uint32_t   *pc,        /* IN: primary initval, OUT: primary hash */
544   uint32_t   *pb)        /* IN: secondary initval, OUT: secondary hash */
545 {
546   uint32_t a,b,c;                                          /* internal state */
547   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
548 
549   /* Set up the internal state */
550   a = b = c = raninit + ((uint32_t)length) + *pc;
551   c += *pb;
552 
553   u.ptr = key;
554   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
555     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
556 #ifdef VALGRIND
557     const uint8_t  *k8;
558 #endif
559 
560     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
561     while (length > 12)
562     {
563       a += k[0];
564       b += k[1];
565       c += k[2];
566       mix(a,b,c);
567       length -= 12;
568       k += 3;
569     }
570 
571     /*----------------------------- handle the last (probably partial) block */
572     /*
573      * "k[2]&0xffffff" actually reads beyond the end of the string, but
574      * then masks off the part it's not allowed to read.  Because the
575      * string is aligned, the masked-off tail is in the same word as the
576      * rest of the string.  Every machine with memory protection I've seen
577      * does it on word boundaries, so is OK with this.  But VALGRIND will
578      * still catch it and complain.  The masking trick does make the hash
579      * noticeably faster for short strings (like English words).
580      */
581 #ifndef VALGRIND
582 
583     switch(length)
584     {
585     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
586     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
587     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
588     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
589     case 8 : b+=k[1]; a+=k[0]; break;
590     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
591     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
592     case 5 : b+=k[1]&0xff; a+=k[0]; break;
593     case 4 : a+=k[0]; break;
594     case 3 : a+=k[0]&0xffffff; break;
595     case 2 : a+=k[0]&0xffff; break;
596     case 1 : a+=k[0]&0xff; break;
597     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
598     }
599 
600 #else /* make valgrind happy */
601 
602     k8 = (const uint8_t *)k;
603     switch(length)
604     {
605     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
606     case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
607     case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
608     case 9 : c+=k8[8];                   /* fall through */
609     case 8 : b+=k[1]; a+=k[0]; break;
610     case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
611     case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
612     case 5 : b+=k8[4];                   /* fall through */
613     case 4 : a+=k[0]; break;
614     case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
615     case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
616     case 1 : a+=k8[0]; break;
617     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
618     }
619 
620 #endif /* !valgrind */
621 
622   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
623     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
624     const uint8_t  *k8;
625 
626     /*--------------- all but last block: aligned reads and different mixing */
627     while (length > 12)
628     {
629       a += k[0] + (((uint32_t)k[1])<<16);
630       b += k[2] + (((uint32_t)k[3])<<16);
631       c += k[4] + (((uint32_t)k[5])<<16);
632       mix(a,b,c);
633       length -= 12;
634       k += 6;
635     }
636 
637     /*----------------------------- handle the last (probably partial) block */
638     k8 = (const uint8_t *)k;
639     switch(length)
640     {
641     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
642              b+=k[2]+(((uint32_t)k[3])<<16);
643              a+=k[0]+(((uint32_t)k[1])<<16);
644              break;
645     case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
646     case 10: c+=k[4];
647              b+=k[2]+(((uint32_t)k[3])<<16);
648              a+=k[0]+(((uint32_t)k[1])<<16);
649              break;
650     case 9 : c+=k8[8];                      /* fall through */
651     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
652              a+=k[0]+(((uint32_t)k[1])<<16);
653              break;
654     case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
655     case 6 : b+=k[2];
656              a+=k[0]+(((uint32_t)k[1])<<16);
657              break;
658     case 5 : b+=k8[4];                      /* fall through */
659     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
660              break;
661     case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
662     case 2 : a+=k[0];
663              break;
664     case 1 : a+=k8[0];
665              break;
666     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
667     }
668 
669   } else {                        /* need to read the key one byte at a time */
670     const uint8_t *k = (const uint8_t *)key;
671 
672     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
673     while (length > 12)
674     {
675       a += k[0];
676       a += ((uint32_t)k[1])<<8;
677       a += ((uint32_t)k[2])<<16;
678       a += ((uint32_t)k[3])<<24;
679       b += k[4];
680       b += ((uint32_t)k[5])<<8;
681       b += ((uint32_t)k[6])<<16;
682       b += ((uint32_t)k[7])<<24;
683       c += k[8];
684       c += ((uint32_t)k[9])<<8;
685       c += ((uint32_t)k[10])<<16;
686       c += ((uint32_t)k[11])<<24;
687       mix(a,b,c);
688       length -= 12;
689       k += 12;
690     }
691 
692     /*-------------------------------- last block: affect all 32 bits of (c) */
693     switch(length)                   /* all the case statements fall through */
694     {
695     case 12: c+=((uint32_t)k[11])<<24;
696     case 11: c+=((uint32_t)k[10])<<16;
697     case 10: c+=((uint32_t)k[9])<<8;
698     case 9 : c+=k[8];
699     case 8 : b+=((uint32_t)k[7])<<24;
700     case 7 : b+=((uint32_t)k[6])<<16;
701     case 6 : b+=((uint32_t)k[5])<<8;
702     case 5 : b+=k[4];
703     case 4 : a+=((uint32_t)k[3])<<24;
704     case 3 : a+=((uint32_t)k[2])<<16;
705     case 2 : a+=((uint32_t)k[1])<<8;
706     case 1 : a+=k[0];
707              break;
708     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
709     }
710   }
711 
712   final(a,b,c);
713   *pc=c; *pb=b;
714 }
715 
716 #endif /* SELF_TEST */
717 
718 #if 0	/* currently not used */
719 
720 /*
721  * hashbig():
722  * This is the same as hashword() on big-endian machines.  It is different
723  * from hashlittle() on all machines.  hashbig() takes advantage of
724  * big-endian byte ordering.
725  */
726 uint32_t hashbig( const void *key, size_t length, uint32_t initval)
727 {
728   uint32_t a,b,c;
729   union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
730 
731   /* Set up the internal state */
732   a = b = c = raninit + ((uint32_t)length) + initval;
733 
734   u.ptr = key;
735   if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
736     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
737 #ifdef VALGRIND
738     const uint8_t  *k8;
739 #endif
740 
741     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
742     while (length > 12)
743     {
744       a += k[0];
745       b += k[1];
746       c += k[2];
747       mix(a,b,c);
748       length -= 12;
749       k += 3;
750     }
751 
752     /*----------------------------- handle the last (probably partial) block */
753     /*
754      * "k[2]<<8" actually reads beyond the end of the string, but
755      * then shifts out the part it's not allowed to read.  Because the
756      * string is aligned, the illegal read is in the same word as the
757      * rest of the string.  Every machine with memory protection I've seen
758      * does it on word boundaries, so is OK with this.  But VALGRIND will
759      * still catch it and complain.  The masking trick does make the hash
760      * noticeably faster for short strings (like English words).
761      */
762 #ifndef VALGRIND
763 
764     switch(length)
765     {
766     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
767     case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
768     case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
769     case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
770     case 8 : b+=k[1]; a+=k[0]; break;
771     case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
772     case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
773     case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
774     case 4 : a+=k[0]; break;
775     case 3 : a+=k[0]&0xffffff00; break;
776     case 2 : a+=k[0]&0xffff0000; break;
777     case 1 : a+=k[0]&0xff000000; break;
778     case 0 : return c;              /* zero length strings require no mixing */
779     }
780 
781 #else  /* make valgrind happy */
782 
783     k8 = (const uint8_t *)k;
784     switch(length)                   /* all the case statements fall through */
785     {
786     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
787     case 11: c+=((uint32_t)k8[10])<<8;  /* fall through */
788     case 10: c+=((uint32_t)k8[9])<<16;  /* fall through */
789     case 9 : c+=((uint32_t)k8[8])<<24;  /* fall through */
790     case 8 : b+=k[1]; a+=k[0]; break;
791     case 7 : b+=((uint32_t)k8[6])<<8;   /* fall through */
792     case 6 : b+=((uint32_t)k8[5])<<16;  /* fall through */
793     case 5 : b+=((uint32_t)k8[4])<<24;  /* fall through */
794     case 4 : a+=k[0]; break;
795     case 3 : a+=((uint32_t)k8[2])<<8;   /* fall through */
796     case 2 : a+=((uint32_t)k8[1])<<16;  /* fall through */
797     case 1 : a+=((uint32_t)k8[0])<<24; break;
798     case 0 : return c;
799     }
800 
801 #endif /* !VALGRIND */
802 
803   } else {                        /* need to read the key one byte at a time */
804     const uint8_t *k = (const uint8_t *)key;
805 
806     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
807     while (length > 12)
808     {
809       a += ((uint32_t)k[0])<<24;
810       a += ((uint32_t)k[1])<<16;
811       a += ((uint32_t)k[2])<<8;
812       a += ((uint32_t)k[3]);
813       b += ((uint32_t)k[4])<<24;
814       b += ((uint32_t)k[5])<<16;
815       b += ((uint32_t)k[6])<<8;
816       b += ((uint32_t)k[7]);
817       c += ((uint32_t)k[8])<<24;
818       c += ((uint32_t)k[9])<<16;
819       c += ((uint32_t)k[10])<<8;
820       c += ((uint32_t)k[11]);
821       mix(a,b,c);
822       length -= 12;
823       k += 12;
824     }
825 
826     /*-------------------------------- last block: affect all 32 bits of (c) */
827     switch(length)                   /* all the case statements fall through */
828     {
829     case 12: c+=k[11];
830     case 11: c+=((uint32_t)k[10])<<8;
831     case 10: c+=((uint32_t)k[9])<<16;
832     case 9 : c+=((uint32_t)k[8])<<24;
833     case 8 : b+=k[7];
834     case 7 : b+=((uint32_t)k[6])<<8;
835     case 6 : b+=((uint32_t)k[5])<<16;
836     case 5 : b+=((uint32_t)k[4])<<24;
837     case 4 : a+=k[3];
838     case 3 : a+=((uint32_t)k[2])<<8;
839     case 2 : a+=((uint32_t)k[1])<<16;
840     case 1 : a+=((uint32_t)k[0])<<24;
841              break;
842     case 0 : return c;
843     }
844   }
845 
846   final(a,b,c);
847   return c;
848 }
849 
850 #endif /* 0 == currently not used */
851 
852 #ifdef SELF_TEST
853 
854 /* used for timings */
855 void driver1(void)
856 {
857   uint8_t buf[256];
858   uint32_t i;
859   uint32_t h=0;
860   time_t a,z;
861 
862   time(&a);
863   for (i=0; i<256; ++i) buf[i] = 'x';
864   for (i=0; i<1; ++i)
865   {
866     h = hashlittle(&buf[0],1,h);
867   }
868   time(&z);
869   if (z-a > 0) printf("time %d %.8x\n", z-a, h);
870 }
871 
872 /* check that every input bit changes every output bit half the time */
873 #define HASHSTATE 1
874 #define HASHLEN   1
875 #define MAXPAIR 60
876 #define MAXLEN  70
877 void driver2(void)
878 {
879   uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
880   uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
881   uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
882   uint32_t x[HASHSTATE],y[HASHSTATE];
883   uint32_t hlen;
884 
885   printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
886   for (hlen=0; hlen < MAXLEN; ++hlen)
887   {
888     z=0;
889     for (i=0; i<hlen; ++i)  /*----------------------- for each input byte, */
890     {
891       for (j=0; j<8; ++j)   /*------------------------ for each input bit, */
892       {
893 	for (m=1; m<8; ++m) /*------------ for several possible initvals, */
894 	{
895 	  for (l=0; l<HASHSTATE; ++l)
896 	    e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
897 
898       	  /*---- check that every output bit is affected by that input bit */
899 	  for (k=0; k<MAXPAIR; k+=2)
900 	  {
901 	    uint32_t finished=1;
902 	    /* keys have one bit different */
903 	    for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
904 	    /* have a and b be two keys differing in only one bit */
905 	    a[i] ^= (k<<j);
906 	    a[i] ^= (k>>(8-j));
907 	     c[0] = hashlittle(a, hlen, m);
908 	    b[i] ^= ((k+1)<<j);
909 	    b[i] ^= ((k+1)>>(8-j));
910 	     d[0] = hashlittle(b, hlen, m);
911 	    /* check every bit is 1, 0, set, and not set at least once */
912 	    for (l=0; l<HASHSTATE; ++l)
913 	    {
914 	      e[l] &= (c[l]^d[l]);
915 	      f[l] &= ~(c[l]^d[l]);
916 	      g[l] &= c[l];
917 	      h[l] &= ~c[l];
918 	      x[l] &= d[l];
919 	      y[l] &= ~d[l];
920 	      if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
921 	    }
922 	    if (finished) break;
923 	  }
924 	  if (k>z) z=k;
925 	  if (k==MAXPAIR)
926 	  {
927 	     printf("Some bit didn't change: ");
928 	     printf("%.8x %.8x %.8x %.8x %.8x %.8x  ",
929 	            e[0],f[0],g[0],h[0],x[0],y[0]);
930 	     printf("i %d j %d m %d len %d\n", i, j, m, hlen);
931 	  }
932 	  if (z==MAXPAIR) goto done;
933 	}
934       }
935     }
936    done:
937     if (z < MAXPAIR)
938     {
939       printf("Mix success  %2d bytes  %2d initvals  ",i,m);
940       printf("required  %d  trials\n", z/2);
941     }
942   }
943   printf("\n");
944 }
945 
946 /* Check for reading beyond the end of the buffer and alignment problems */
947 void driver3(void)
948 {
949   uint8_t buf[MAXLEN+20], *b;
950   uint32_t len;
951   uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
952   uint32_t h;
953   uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
954   uint32_t i;
955   uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
956   uint32_t j;
957   uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
958   uint32_t ref,x,y;
959   uint8_t *p;
960 
961   printf("Endianness.  These lines should all be the same (for values filled in):\n");
962   printf("%.8x                            %.8x                            %.8x\n",
963          hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
964          hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
965          hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
966   p = q;
967   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
968          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
969          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
970          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
971          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
972          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
973          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
974   p = &qq[1];
975   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
976          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
977          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
978          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
979          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
980          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
981          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
982   p = &qqq[2];
983   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
984          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
985          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
986          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
987          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
988          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
989          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
990   p = &qqqq[3];
991   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
992          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
993          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
994          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
995          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
996          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
997          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
998   printf("\n");
999 
1000   /* check that hashlittle2 and hashlittle produce the same results */
1001   i=47; j=0;
1002   hashlittle2(q, sizeof(q), &i, &j);
1003   if (hashlittle(q, sizeof(q), 47) != i)
1004     printf("hashlittle2 and hashlittle mismatch\n");
1005 
1006   /* check that hashword2 and hashword produce the same results */
1007   len = raninit;
1008   i=47, j=0;
1009   hashword2(&len, 1, &i, &j);
1010   if (hashword(&len, 1, 47) != i)
1011     printf("hashword2 and hashword mismatch %x %x\n",
1012 	   i, hashword(&len, 1, 47));
1013 
1014   /* check hashlittle doesn't read before or after the ends of the string */
1015   for (h=0, b=buf+1; h<8; ++h, ++b)
1016   {
1017     for (i=0; i<MAXLEN; ++i)
1018     {
1019       len = i;
1020       for (j=0; j<i; ++j) *(b+j)=0;
1021 
1022       /* these should all be equal */
1023       ref = hashlittle(b, len, (uint32_t)1);
1024       *(b+i)=(uint8_t)~0;
1025       *(b-1)=(uint8_t)~0;
1026       x = hashlittle(b, len, (uint32_t)1);
1027       y = hashlittle(b, len, (uint32_t)1);
1028       if ((ref != x) || (ref != y))
1029       {
1030 	printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
1031                h, i);
1032       }
1033     }
1034   }
1035 }
1036 
1037 /* check for problems with nulls */
1038  void driver4(void)
1039 {
1040   uint8_t buf[1];
1041   uint32_t h,i,state[HASHSTATE];
1042 
1043 
1044   buf[0] = ~0;
1045   for (i=0; i<HASHSTATE; ++i) state[i] = 1;
1046   printf("These should all be different\n");
1047   for (i=0, h=0; i<8; ++i)
1048   {
1049     h = hashlittle(buf, 0, h);
1050     printf("%2ld  0-byte strings, hash is  %.8x\n", i, h);
1051   }
1052 }
1053 
1054 
1055 int main(void)
1056 {
1057   driver1();   /* test that the key is hashed: used for timings */
1058   driver2();   /* test that whole key is hashed thoroughly */
1059   driver3();   /* test that nothing but the key is hashed */
1060   driver4();   /* test hashing multiple buffers (all buffers are null) */
1061   return 1;
1062 }
1063 
1064 #endif  /* SELF_TEST */
1065