xref: /freebsd/contrib/unbound/util/storage/lookup3.c (revision e2eeea75eb8b6dd50c1298067a0655880d186734)
1 /*
2   May 2019(Wouter) patch to enable the valgrind clean implementation all the
3      time.  This enables better security audit and checks, which is better
4      than the speedup.  Git issue #30.  Renamed the define ARRAY_CLEAN_ACCESS.
5   February 2013(Wouter) patch defines for BSD endianness, from Brad Smith.
6   January 2012(Wouter) added randomised initial value, fallout from 28c3.
7   March 2007(Wouter) adapted from lookup3.c original, add config.h include.
8      added #ifdef VALGRIND to remove 298,384,660 'unused variable k8' warnings.
9      added include of lookup3.h to check definitions match declarations.
10      removed include of stdint - config.h takes care of platform independence.
11      added fallthrough comments for new gcc warning suppression.
12   url http://burtleburtle.net/bob/hash/index.html.
13 */
14 /*
15 -------------------------------------------------------------------------------
16 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
17 
18 These are functions for producing 32-bit hashes for hash table lookup.
19 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
20 are externally useful functions.  Routines to test the hash are included
21 if SELF_TEST is defined.  You can use this free for any purpose.  It's in
22 the public domain.  It has no warranty.
23 
24 You probably want to use hashlittle().  hashlittle() and hashbig()
25 hash byte arrays.  hashlittle() is is faster than hashbig() on
26 little-endian machines.  Intel and AMD are little-endian machines.
27 On second thought, you probably want hashlittle2(), which is identical to
28 hashlittle() except it returns two 32-bit hashes for the price of one.
29 You could implement hashbig2() if you wanted but I haven't bothered here.
30 
31 If you want to find a hash of, say, exactly 7 integers, do
32   a = i1;  b = i2;  c = i3;
33   mix(a,b,c);
34   a += i4; b += i5; c += i6;
35   mix(a,b,c);
36   a += i7;
37   final(a,b,c);
38 then use c as the hash value.  If you have a variable length array of
39 4-byte integers to hash, use hashword().  If you have a byte array (like
40 a character string), use hashlittle().  If you have several byte arrays, or
41 a mix of things, see the comments above hashlittle().
42 
43 Why is this so big?  I read 12 bytes at a time into 3 4-byte integers,
44 then mix those integers.  This is fast (you can do a lot more thorough
45 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
46 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
47 -------------------------------------------------------------------------------
48 */
49 /*#define SELF_TEST 1*/
50 #define ARRAY_CLEAN_ACCESS 1
51 
52 #include "config.h"
53 #include "util/storage/lookup3.h"
54 #include <stdio.h>      /* defines printf for tests */
55 #include <time.h>       /* defines time_t for timings in the test */
56 /*#include <stdint.h>     defines uint32_t etc  (from config.h) */
57 #include <sys/param.h>  /* attempt to define endianness */
58 #ifdef HAVE_SYS_TYPES_H
59 # include <sys/types.h> /* attempt to define endianness (solaris) */
60 #endif
61 #if defined(linux) || defined(__OpenBSD__)
62 #  ifdef HAVE_ENDIAN_H
63 #    include <endian.h>    /* attempt to define endianness */
64 #  else
65 #    include <machine/endian.h> /* on older OpenBSD */
66 #  endif
67 #endif
68 #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
69 #include <sys/endian.h> /* attempt to define endianness */
70 #endif
71 
72 /* random initial value */
73 static uint32_t raninit = (uint32_t)0xdeadbeef;
74 
75 void
76 hash_set_raninit(uint32_t v)
77 {
78 	raninit = v;
79 }
80 
81 /*
82  * My best guess at if you are big-endian or little-endian.  This may
83  * need adjustment.
84  */
85 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
86      __BYTE_ORDER == __LITTLE_ENDIAN) || \
87     (defined(i386) || defined(__i386__) || defined(__i486__) || \
88      defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL) || defined(__x86))
89 # define HASH_LITTLE_ENDIAN 1
90 # define HASH_BIG_ENDIAN 0
91 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
92        __BYTE_ORDER == __BIG_ENDIAN) || \
93       (defined(sparc) || defined(__sparc) || defined(__sparc__) || defined(POWERPC) || defined(mc68000) || defined(sel))
94 # define HASH_LITTLE_ENDIAN 0
95 # define HASH_BIG_ENDIAN 1
96 #elif defined(_MACHINE_ENDIAN_H_)
97 /* test for machine_endian_h protects failure if some are empty strings */
98 # if defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) && _BYTE_ORDER == _BIG_ENDIAN
99 #  define HASH_LITTLE_ENDIAN 0
100 #  define HASH_BIG_ENDIAN 1
101 # endif
102 # if defined(_BYTE_ORDER) && defined(_LITTLE_ENDIAN) && _BYTE_ORDER == _LITTLE_ENDIAN
103 #  define HASH_LITTLE_ENDIAN 1
104 #  define HASH_BIG_ENDIAN 0
105 # endif /* _MACHINE_ENDIAN_H_ */
106 #else
107 # define HASH_LITTLE_ENDIAN 0
108 # define HASH_BIG_ENDIAN 0
109 #endif
110 
111 #define hashsize(n) ((uint32_t)1<<(n))
112 #define hashmask(n) (hashsize(n)-1)
113 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
114 
115 /*
116 -------------------------------------------------------------------------------
117 mix -- mix 3 32-bit values reversibly.
118 
119 This is reversible, so any information in (a,b,c) before mix() is
120 still in (a,b,c) after mix().
121 
122 If four pairs of (a,b,c) inputs are run through mix(), or through
123 mix() in reverse, there are at least 32 bits of the output that
124 are sometimes the same for one pair and different for another pair.
125 This was tested for:
126 * pairs that differed by one bit, by two bits, in any combination
127   of top bits of (a,b,c), or in any combination of bottom bits of
128   (a,b,c).
129 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
130   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
131   is commonly produced by subtraction) look like a single 1-bit
132   difference.
133 * the base values were pseudorandom, all zero but one bit set, or
134   all zero plus a counter that starts at zero.
135 
136 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
137 satisfy this are
138     4  6  8 16 19  4
139     9 15  3 18 27 15
140    14  9  3  7 17  3
141 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
142 for "differ" defined as + with a one-bit base and a two-bit delta.  I
143 used http://burtleburtle.net/bob/hash/avalanche.html to choose
144 the operations, constants, and arrangements of the variables.
145 
146 This does not achieve avalanche.  There are input bits of (a,b,c)
147 that fail to affect some output bits of (a,b,c), especially of a.  The
148 most thoroughly mixed value is c, but it doesn't really even achieve
149 avalanche in c.
150 
151 This allows some parallelism.  Read-after-writes are good at doubling
152 the number of bits affected, so the goal of mixing pulls in the opposite
153 direction as the goal of parallelism.  I did what I could.  Rotates
154 seem to cost as much as shifts on every machine I could lay my hands
155 on, and rotates are much kinder to the top and bottom bits, so I used
156 rotates.
157 -------------------------------------------------------------------------------
158 */
159 #define mix(a,b,c) \
160 { \
161   a -= c;  a ^= rot(c, 4);  c += b; \
162   b -= a;  b ^= rot(a, 6);  a += c; \
163   c -= b;  c ^= rot(b, 8);  b += a; \
164   a -= c;  a ^= rot(c,16);  c += b; \
165   b -= a;  b ^= rot(a,19);  a += c; \
166   c -= b;  c ^= rot(b, 4);  b += a; \
167 }
168 
169 /*
170 -------------------------------------------------------------------------------
171 final -- final mixing of 3 32-bit values (a,b,c) into c
172 
173 Pairs of (a,b,c) values differing in only a few bits will usually
174 produce values of c that look totally different.  This was tested for
175 * pairs that differed by one bit, by two bits, in any combination
176   of top bits of (a,b,c), or in any combination of bottom bits of
177   (a,b,c).
178 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
179   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
180   is commonly produced by subtraction) look like a single 1-bit
181   difference.
182 * the base values were pseudorandom, all zero but one bit set, or
183   all zero plus a counter that starts at zero.
184 
185 These constants passed:
186  14 11 25 16 4 14 24
187  12 14 25 16 4 14 24
188 and these came close:
189   4  8 15 26 3 22 24
190  10  8 15 26 3 22 24
191  11  8 15 26 3 22 24
192 -------------------------------------------------------------------------------
193 */
194 #define final(a,b,c) \
195 { \
196   c ^= b; c -= rot(b,14); \
197   a ^= c; a -= rot(c,11); \
198   b ^= a; b -= rot(a,25); \
199   c ^= b; c -= rot(b,16); \
200   a ^= c; a -= rot(c,4);  \
201   b ^= a; b -= rot(a,14); \
202   c ^= b; c -= rot(b,24); \
203 }
204 
205 /*
206 --------------------------------------------------------------------
207  This works on all machines.  To be useful, it requires
208  -- that the key be an array of uint32_t's, and
209  -- that the length be the number of uint32_t's in the key
210 
211  The function hashword() is identical to hashlittle() on little-endian
212  machines, and identical to hashbig() on big-endian machines,
213  except that the length has to be measured in uint32_ts rather than in
214  bytes.  hashlittle() is more complicated than hashword() only because
215  hashlittle() has to dance around fitting the key bytes into registers.
216 --------------------------------------------------------------------
217 */
218 uint32_t hashword(
219 const uint32_t *k,                   /* the key, an array of uint32_t values */
220 size_t          length,               /* the length of the key, in uint32_ts */
221 uint32_t        initval)         /* the previous hash, or an arbitrary value */
222 {
223   uint32_t a,b,c;
224 
225   /* Set up the internal state */
226   a = b = c = raninit + (((uint32_t)length)<<2) + initval;
227 
228   /*------------------------------------------------- handle most of the key */
229   while (length > 3)
230   {
231     a += k[0];
232     b += k[1];
233     c += k[2];
234     mix(a,b,c);
235     length -= 3;
236     k += 3;
237   }
238 
239   /*------------------------------------------- handle the last 3 uint32_t's */
240   switch(length)                     /* all the case statements fall through */
241   {
242   case 3 : c+=k[2];
243   	/* fallthrough */
244   case 2 : b+=k[1];
245   	/* fallthrough */
246   case 1 : a+=k[0];
247     final(a,b,c);
248   case 0:     /* case 0: nothing left to add */
249     break;
250   }
251   /*------------------------------------------------------ report the result */
252   return c;
253 }
254 
255 
256 #ifdef SELF_TEST
257 
258 /*
259 --------------------------------------------------------------------
260 hashword2() -- same as hashword(), but take two seeds and return two
261 32-bit values.  pc and pb must both be nonnull, and *pc and *pb must
262 both be initialized with seeds.  If you pass in (*pb)==0, the output
263 (*pc) will be the same as the return value from hashword().
264 --------------------------------------------------------------------
265 */
266 void hashword2 (
267 const uint32_t *k,                   /* the key, an array of uint32_t values */
268 size_t          length,               /* the length of the key, in uint32_ts */
269 uint32_t       *pc,                      /* IN: seed OUT: primary hash value */
270 uint32_t       *pb)               /* IN: more seed OUT: secondary hash value */
271 {
272   uint32_t a,b,c;
273 
274   /* Set up the internal state */
275   a = b = c = raninit + ((uint32_t)(length<<2)) + *pc;
276   c += *pb;
277 
278   /*------------------------------------------------- handle most of the key */
279   while (length > 3)
280   {
281     a += k[0];
282     b += k[1];
283     c += k[2];
284     mix(a,b,c);
285     length -= 3;
286     k += 3;
287   }
288 
289   /*------------------------------------------- handle the last 3 uint32_t's */
290   switch(length)                     /* all the case statements fall through */
291   {
292   case 3 : c+=k[2];
293   case 2 : b+=k[1];
294   case 1 : a+=k[0];
295     final(a,b,c);
296   case 0:     /* case 0: nothing left to add */
297     break;
298   }
299   /*------------------------------------------------------ report the result */
300   *pc=c; *pb=b;
301 }
302 
303 #endif /* SELF_TEST */
304 
305 /*
306 -------------------------------------------------------------------------------
307 hashlittle() -- hash a variable-length key into a 32-bit value
308   k       : the key (the unaligned variable-length array of bytes)
309   length  : the length of the key, counting by bytes
310   initval : can be any 4-byte value
311 Returns a 32-bit value.  Every bit of the key affects every bit of
312 the return value.  Two keys differing by one or two bits will have
313 totally different hash values.
314 
315 The best hash table sizes are powers of 2.  There is no need to do
316 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
317 use a bitmask.  For example, if you need only 10 bits, do
318   h = (h & hashmask(10));
319 In which case, the hash table should have hashsize(10) elements.
320 
321 If you are hashing n strings (uint8_t **)k, do it like this:
322   for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
323 
324 By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
325 code any way you wish, private, educational, or commercial.  It's free.
326 
327 Use for hash table lookup, or anything where one collision in 2^^32 is
328 acceptable.  Do NOT use for cryptographic purposes.
329 -------------------------------------------------------------------------------
330 */
331 
332 uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
333 {
334   uint32_t a,b,c;                                          /* internal state */
335   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
336 
337   /* Set up the internal state */
338   a = b = c = raninit + ((uint32_t)length) + initval;
339 
340   u.ptr = key;
341   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
342     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
343 #ifdef ARRAY_CLEAN_ACCESS
344     const uint8_t  *k8;
345 #endif
346 
347     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
348     while (length > 12)
349     {
350       a += k[0];
351       b += k[1];
352       c += k[2];
353       mix(a,b,c);
354       length -= 12;
355       k += 3;
356     }
357 
358     /*----------------------------- handle the last (probably partial) block */
359     /*
360      * "k[2]&0xffffff" actually reads beyond the end of the string, but
361      * then masks off the part it's not allowed to read.  Because the
362      * string is aligned, the masked-off tail is in the same word as the
363      * rest of the string.  Every machine with memory protection I've seen
364      * does it on word boundaries, so is OK with this.  But VALGRIND will
365      * still catch it and complain.  The masking trick does make the hash
366      * noticeably faster for short strings (like English words).
367      */
368 #ifndef ARRAY_CLEAN_ACCESS
369 
370     switch(length)
371     {
372     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
373     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
374     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
375     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
376     case 8 : b+=k[1]; a+=k[0]; break;
377     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
378     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
379     case 5 : b+=k[1]&0xff; a+=k[0]; break;
380     case 4 : a+=k[0]; break;
381     case 3 : a+=k[0]&0xffffff; break;
382     case 2 : a+=k[0]&0xffff; break;
383     case 1 : a+=k[0]&0xff; break;
384     case 0 : return c;              /* zero length strings require no mixing */
385     }
386 
387 #else /* make valgrind happy */
388 
389     k8 = (const uint8_t *)k;
390     switch(length)
391     {
392     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
393     case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
394     case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
395     case 9 : c+=k8[8];                   /* fall through */
396     case 8 : b+=k[1]; a+=k[0]; break;
397     case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
398     case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
399     case 5 : b+=k8[4];                   /* fall through */
400     case 4 : a+=k[0]; break;
401     case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
402     case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
403     case 1 : a+=k8[0]; break;
404     case 0 : return c;
405     }
406 
407 #endif /* !valgrind */
408 
409   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
410     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
411     const uint8_t  *k8;
412 
413     /*--------------- all but last block: aligned reads and different mixing */
414     while (length > 12)
415     {
416       a += k[0] + (((uint32_t)k[1])<<16);
417       b += k[2] + (((uint32_t)k[3])<<16);
418       c += k[4] + (((uint32_t)k[5])<<16);
419       mix(a,b,c);
420       length -= 12;
421       k += 6;
422     }
423 
424     /*----------------------------- handle the last (probably partial) block */
425     k8 = (const uint8_t *)k;
426     switch(length)
427     {
428     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
429              b+=k[2]+(((uint32_t)k[3])<<16);
430              a+=k[0]+(((uint32_t)k[1])<<16);
431              break;
432     case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
433     case 10: c+=k[4];
434              b+=k[2]+(((uint32_t)k[3])<<16);
435              a+=k[0]+(((uint32_t)k[1])<<16);
436              break;
437     case 9 : c+=k8[8];                      /* fall through */
438     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
439              a+=k[0]+(((uint32_t)k[1])<<16);
440              break;
441     case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
442     case 6 : b+=k[2];
443              a+=k[0]+(((uint32_t)k[1])<<16);
444              break;
445     case 5 : b+=k8[4];                      /* fall through */
446     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
447              break;
448     case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
449     case 2 : a+=k[0];
450              break;
451     case 1 : a+=k8[0];
452              break;
453     case 0 : return c;                     /* zero length requires no mixing */
454     }
455 
456   } else {                        /* need to read the key one byte at a time */
457     const uint8_t *k = (const uint8_t *)key;
458 
459     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
460     while (length > 12)
461     {
462       a += k[0];
463       a += ((uint32_t)k[1])<<8;
464       a += ((uint32_t)k[2])<<16;
465       a += ((uint32_t)k[3])<<24;
466       b += k[4];
467       b += ((uint32_t)k[5])<<8;
468       b += ((uint32_t)k[6])<<16;
469       b += ((uint32_t)k[7])<<24;
470       c += k[8];
471       c += ((uint32_t)k[9])<<8;
472       c += ((uint32_t)k[10])<<16;
473       c += ((uint32_t)k[11])<<24;
474       mix(a,b,c);
475       length -= 12;
476       k += 12;
477     }
478 
479     /*-------------------------------- last block: affect all 32 bits of (c) */
480     switch(length)                   /* all the case statements fall through */
481     {
482     case 12: c+=((uint32_t)k[11])<<24;
483   	/* fallthrough */
484     case 11: c+=((uint32_t)k[10])<<16;
485   	/* fallthrough */
486     case 10: c+=((uint32_t)k[9])<<8;
487   	/* fallthrough */
488     case 9 : c+=k[8];
489   	/* fallthrough */
490     case 8 : b+=((uint32_t)k[7])<<24;
491   	/* fallthrough */
492     case 7 : b+=((uint32_t)k[6])<<16;
493   	/* fallthrough */
494     case 6 : b+=((uint32_t)k[5])<<8;
495   	/* fallthrough */
496     case 5 : b+=k[4];
497   	/* fallthrough */
498     case 4 : a+=((uint32_t)k[3])<<24;
499   	/* fallthrough */
500     case 3 : a+=((uint32_t)k[2])<<16;
501   	/* fallthrough */
502     case 2 : a+=((uint32_t)k[1])<<8;
503   	/* fallthrough */
504     case 1 : a+=k[0];
505              break;
506     case 0 : return c;
507     }
508   }
509 
510   final(a,b,c);
511   return c;
512 }
513 
514 #ifdef SELF_TEST
515 
516 /*
517  * hashlittle2: return 2 32-bit hash values
518  *
519  * This is identical to hashlittle(), except it returns two 32-bit hash
520  * values instead of just one.  This is good enough for hash table
521  * lookup with 2^^64 buckets, or if you want a second hash if you're not
522  * happy with the first, or if you want a probably-unique 64-bit ID for
523  * the key.  *pc is better mixed than *pb, so use *pc first.  If you want
524  * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
525  */
526 void hashlittle2(
527   const void *key,       /* the key to hash */
528   size_t      length,    /* length of the key */
529   uint32_t   *pc,        /* IN: primary initval, OUT: primary hash */
530   uint32_t   *pb)        /* IN: secondary initval, OUT: secondary hash */
531 {
532   uint32_t a,b,c;                                          /* internal state */
533   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
534 
535   /* Set up the internal state */
536   a = b = c = raninit + ((uint32_t)length) + *pc;
537   c += *pb;
538 
539   u.ptr = key;
540   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
541     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
542 #ifdef VALGRIND
543     const uint8_t  *k8;
544 #endif
545 
546     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
547     while (length > 12)
548     {
549       a += k[0];
550       b += k[1];
551       c += k[2];
552       mix(a,b,c);
553       length -= 12;
554       k += 3;
555     }
556 
557     /*----------------------------- handle the last (probably partial) block */
558     /*
559      * "k[2]&0xffffff" actually reads beyond the end of the string, but
560      * then masks off the part it's not allowed to read.  Because the
561      * string is aligned, the masked-off tail is in the same word as the
562      * rest of the string.  Every machine with memory protection I've seen
563      * does it on word boundaries, so is OK with this.  But VALGRIND will
564      * still catch it and complain.  The masking trick does make the hash
565      * noticeably faster for short strings (like English words).
566      */
567 #ifndef VALGRIND
568 
569     switch(length)
570     {
571     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
572     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
573     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
574     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
575     case 8 : b+=k[1]; a+=k[0]; break;
576     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
577     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
578     case 5 : b+=k[1]&0xff; a+=k[0]; break;
579     case 4 : a+=k[0]; break;
580     case 3 : a+=k[0]&0xffffff; break;
581     case 2 : a+=k[0]&0xffff; break;
582     case 1 : a+=k[0]&0xff; break;
583     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
584     }
585 
586 #else /* make valgrind happy */
587 
588     k8 = (const uint8_t *)k;
589     switch(length)
590     {
591     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
592     case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
593     case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
594     case 9 : c+=k8[8];                   /* fall through */
595     case 8 : b+=k[1]; a+=k[0]; break;
596     case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
597     case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
598     case 5 : b+=k8[4];                   /* fall through */
599     case 4 : a+=k[0]; break;
600     case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
601     case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
602     case 1 : a+=k8[0]; break;
603     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
604     }
605 
606 #endif /* !valgrind */
607 
608   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
609     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
610     const uint8_t  *k8;
611 
612     /*--------------- all but last block: aligned reads and different mixing */
613     while (length > 12)
614     {
615       a += k[0] + (((uint32_t)k[1])<<16);
616       b += k[2] + (((uint32_t)k[3])<<16);
617       c += k[4] + (((uint32_t)k[5])<<16);
618       mix(a,b,c);
619       length -= 12;
620       k += 6;
621     }
622 
623     /*----------------------------- handle the last (probably partial) block */
624     k8 = (const uint8_t *)k;
625     switch(length)
626     {
627     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
628              b+=k[2]+(((uint32_t)k[3])<<16);
629              a+=k[0]+(((uint32_t)k[1])<<16);
630              break;
631     case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
632     case 10: c+=k[4];
633              b+=k[2]+(((uint32_t)k[3])<<16);
634              a+=k[0]+(((uint32_t)k[1])<<16);
635              break;
636     case 9 : c+=k8[8];                      /* fall through */
637     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
638              a+=k[0]+(((uint32_t)k[1])<<16);
639              break;
640     case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
641     case 6 : b+=k[2];
642              a+=k[0]+(((uint32_t)k[1])<<16);
643              break;
644     case 5 : b+=k8[4];                      /* fall through */
645     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
646              break;
647     case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
648     case 2 : a+=k[0];
649              break;
650     case 1 : a+=k8[0];
651              break;
652     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
653     }
654 
655   } else {                        /* need to read the key one byte at a time */
656     const uint8_t *k = (const uint8_t *)key;
657 
658     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
659     while (length > 12)
660     {
661       a += k[0];
662       a += ((uint32_t)k[1])<<8;
663       a += ((uint32_t)k[2])<<16;
664       a += ((uint32_t)k[3])<<24;
665       b += k[4];
666       b += ((uint32_t)k[5])<<8;
667       b += ((uint32_t)k[6])<<16;
668       b += ((uint32_t)k[7])<<24;
669       c += k[8];
670       c += ((uint32_t)k[9])<<8;
671       c += ((uint32_t)k[10])<<16;
672       c += ((uint32_t)k[11])<<24;
673       mix(a,b,c);
674       length -= 12;
675       k += 12;
676     }
677 
678     /*-------------------------------- last block: affect all 32 bits of (c) */
679     switch(length)                   /* all the case statements fall through */
680     {
681     case 12: c+=((uint32_t)k[11])<<24;
682     case 11: c+=((uint32_t)k[10])<<16;
683     case 10: c+=((uint32_t)k[9])<<8;
684     case 9 : c+=k[8];
685     case 8 : b+=((uint32_t)k[7])<<24;
686     case 7 : b+=((uint32_t)k[6])<<16;
687     case 6 : b+=((uint32_t)k[5])<<8;
688     case 5 : b+=k[4];
689     case 4 : a+=((uint32_t)k[3])<<24;
690     case 3 : a+=((uint32_t)k[2])<<16;
691     case 2 : a+=((uint32_t)k[1])<<8;
692     case 1 : a+=k[0];
693              break;
694     case 0 : *pc=c; *pb=b; return;  /* zero length strings require no mixing */
695     }
696   }
697 
698   final(a,b,c);
699   *pc=c; *pb=b;
700 }
701 
702 #endif /* SELF_TEST */
703 
704 #if 0	/* currently not used */
705 
706 /*
707  * hashbig():
708  * This is the same as hashword() on big-endian machines.  It is different
709  * from hashlittle() on all machines.  hashbig() takes advantage of
710  * big-endian byte ordering.
711  */
712 uint32_t hashbig( const void *key, size_t length, uint32_t initval)
713 {
714   uint32_t a,b,c;
715   union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
716 
717   /* Set up the internal state */
718   a = b = c = raninit + ((uint32_t)length) + initval;
719 
720   u.ptr = key;
721   if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
722     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
723 #ifdef VALGRIND
724     const uint8_t  *k8;
725 #endif
726 
727     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
728     while (length > 12)
729     {
730       a += k[0];
731       b += k[1];
732       c += k[2];
733       mix(a,b,c);
734       length -= 12;
735       k += 3;
736     }
737 
738     /*----------------------------- handle the last (probably partial) block */
739     /*
740      * "k[2]<<8" actually reads beyond the end of the string, but
741      * then shifts out the part it's not allowed to read.  Because the
742      * string is aligned, the illegal read is in the same word as the
743      * rest of the string.  Every machine with memory protection I've seen
744      * does it on word boundaries, so is OK with this.  But VALGRIND will
745      * still catch it and complain.  The masking trick does make the hash
746      * noticeably faster for short strings (like English words).
747      */
748 #ifndef VALGRIND
749 
750     switch(length)
751     {
752     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
753     case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
754     case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
755     case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
756     case 8 : b+=k[1]; a+=k[0]; break;
757     case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
758     case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
759     case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
760     case 4 : a+=k[0]; break;
761     case 3 : a+=k[0]&0xffffff00; break;
762     case 2 : a+=k[0]&0xffff0000; break;
763     case 1 : a+=k[0]&0xff000000; break;
764     case 0 : return c;              /* zero length strings require no mixing */
765     }
766 
767 #else  /* make valgrind happy */
768 
769     k8 = (const uint8_t *)k;
770     switch(length)                   /* all the case statements fall through */
771     {
772     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
773     case 11: c+=((uint32_t)k8[10])<<8;  /* fall through */
774     case 10: c+=((uint32_t)k8[9])<<16;  /* fall through */
775     case 9 : c+=((uint32_t)k8[8])<<24;  /* fall through */
776     case 8 : b+=k[1]; a+=k[0]; break;
777     case 7 : b+=((uint32_t)k8[6])<<8;   /* fall through */
778     case 6 : b+=((uint32_t)k8[5])<<16;  /* fall through */
779     case 5 : b+=((uint32_t)k8[4])<<24;  /* fall through */
780     case 4 : a+=k[0]; break;
781     case 3 : a+=((uint32_t)k8[2])<<8;   /* fall through */
782     case 2 : a+=((uint32_t)k8[1])<<16;  /* fall through */
783     case 1 : a+=((uint32_t)k8[0])<<24; break;
784     case 0 : return c;
785     }
786 
787 #endif /* !VALGRIND */
788 
789   } else {                        /* need to read the key one byte at a time */
790     const uint8_t *k = (const uint8_t *)key;
791 
792     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
793     while (length > 12)
794     {
795       a += ((uint32_t)k[0])<<24;
796       a += ((uint32_t)k[1])<<16;
797       a += ((uint32_t)k[2])<<8;
798       a += ((uint32_t)k[3]);
799       b += ((uint32_t)k[4])<<24;
800       b += ((uint32_t)k[5])<<16;
801       b += ((uint32_t)k[6])<<8;
802       b += ((uint32_t)k[7]);
803       c += ((uint32_t)k[8])<<24;
804       c += ((uint32_t)k[9])<<16;
805       c += ((uint32_t)k[10])<<8;
806       c += ((uint32_t)k[11]);
807       mix(a,b,c);
808       length -= 12;
809       k += 12;
810     }
811 
812     /*-------------------------------- last block: affect all 32 bits of (c) */
813     switch(length)                   /* all the case statements fall through */
814     {
815     case 12: c+=k[11];
816     case 11: c+=((uint32_t)k[10])<<8;
817     case 10: c+=((uint32_t)k[9])<<16;
818     case 9 : c+=((uint32_t)k[8])<<24;
819     case 8 : b+=k[7];
820     case 7 : b+=((uint32_t)k[6])<<8;
821     case 6 : b+=((uint32_t)k[5])<<16;
822     case 5 : b+=((uint32_t)k[4])<<24;
823     case 4 : a+=k[3];
824     case 3 : a+=((uint32_t)k[2])<<8;
825     case 2 : a+=((uint32_t)k[1])<<16;
826     case 1 : a+=((uint32_t)k[0])<<24;
827              break;
828     case 0 : return c;
829     }
830   }
831 
832   final(a,b,c);
833   return c;
834 }
835 
836 #endif /* 0 == currently not used */
837 
838 #ifdef SELF_TEST
839 
840 /* used for timings */
841 void driver1(void)
842 {
843   uint8_t buf[256];
844   uint32_t i;
845   uint32_t h=0;
846   time_t a,z;
847 
848   time(&a);
849   for (i=0; i<256; ++i) buf[i] = 'x';
850   for (i=0; i<1; ++i)
851   {
852     h = hashlittle(&buf[0],1,h);
853   }
854   time(&z);
855   if (z-a > 0) printf("time %d %.8x\n", z-a, h);
856 }
857 
858 /* check that every input bit changes every output bit half the time */
859 #define HASHSTATE 1
860 #define HASHLEN   1
861 #define MAXPAIR 60
862 #define MAXLEN  70
863 void driver2(void)
864 {
865   uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
866   uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
867   uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
868   uint32_t x[HASHSTATE],y[HASHSTATE];
869   uint32_t hlen;
870 
871   printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
872   for (hlen=0; hlen < MAXLEN; ++hlen)
873   {
874     z=0;
875     for (i=0; i<hlen; ++i)  /*----------------------- for each input byte, */
876     {
877       for (j=0; j<8; ++j)   /*------------------------ for each input bit, */
878       {
879 	for (m=1; m<8; ++m) /*------------ for several possible initvals, */
880 	{
881 	  for (l=0; l<HASHSTATE; ++l)
882 	    e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
883 
884       	  /*---- check that every output bit is affected by that input bit */
885 	  for (k=0; k<MAXPAIR; k+=2)
886 	  {
887 	    uint32_t finished=1;
888 	    /* keys have one bit different */
889 	    for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
890 	    /* have a and b be two keys differing in only one bit */
891 	    a[i] ^= (k<<j);
892 	    a[i] ^= (k>>(8-j));
893 	     c[0] = hashlittle(a, hlen, m);
894 	    b[i] ^= ((k+1)<<j);
895 	    b[i] ^= ((k+1)>>(8-j));
896 	     d[0] = hashlittle(b, hlen, m);
897 	    /* check every bit is 1, 0, set, and not set at least once */
898 	    for (l=0; l<HASHSTATE; ++l)
899 	    {
900 	      e[l] &= (c[l]^d[l]);
901 	      f[l] &= ~(c[l]^d[l]);
902 	      g[l] &= c[l];
903 	      h[l] &= ~c[l];
904 	      x[l] &= d[l];
905 	      y[l] &= ~d[l];
906 	      if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
907 	    }
908 	    if (finished) break;
909 	  }
910 	  if (k>z) z=k;
911 	  if (k==MAXPAIR)
912 	  {
913 	     printf("Some bit didn't change: ");
914 	     printf("%.8x %.8x %.8x %.8x %.8x %.8x  ",
915 	            e[0],f[0],g[0],h[0],x[0],y[0]);
916 	     printf("i %d j %d m %d len %d\n", i, j, m, hlen);
917 	  }
918 	  if (z==MAXPAIR) goto done;
919 	}
920       }
921     }
922    done:
923     if (z < MAXPAIR)
924     {
925       printf("Mix success  %2d bytes  %2d initvals  ",i,m);
926       printf("required  %d  trials\n", z/2);
927     }
928   }
929   printf("\n");
930 }
931 
932 /* Check for reading beyond the end of the buffer and alignment problems */
933 void driver3(void)
934 {
935   uint8_t buf[MAXLEN+20], *b;
936   uint32_t len;
937   uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
938   uint32_t h;
939   uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
940   uint32_t i;
941   uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
942   uint32_t j;
943   uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
944   uint32_t ref,x,y;
945   uint8_t *p;
946 
947   printf("Endianness.  These lines should all be the same (for values filled in):\n");
948   printf("%.8x                            %.8x                            %.8x\n",
949          hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
950          hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
951          hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
952   p = q;
953   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
954          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
955          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
956          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
957          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
958          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
959          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
960   p = &qq[1];
961   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
962          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
963          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
964          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
965          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
966          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
967          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
968   p = &qqq[2];
969   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
970          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
971          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
972          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
973          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
974          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
975          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
976   p = &qqqq[3];
977   printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
978          hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
979          hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
980          hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
981          hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
982          hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
983          hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
984   printf("\n");
985 
986   /* check that hashlittle2 and hashlittle produce the same results */
987   i=47; j=0;
988   hashlittle2(q, sizeof(q), &i, &j);
989   if (hashlittle(q, sizeof(q), 47) != i)
990     printf("hashlittle2 and hashlittle mismatch\n");
991 
992   /* check that hashword2 and hashword produce the same results */
993   len = raninit;
994   i=47, j=0;
995   hashword2(&len, 1, &i, &j);
996   if (hashword(&len, 1, 47) != i)
997     printf("hashword2 and hashword mismatch %x %x\n",
998 	   i, hashword(&len, 1, 47));
999 
1000   /* check hashlittle doesn't read before or after the ends of the string */
1001   for (h=0, b=buf+1; h<8; ++h, ++b)
1002   {
1003     for (i=0; i<MAXLEN; ++i)
1004     {
1005       len = i;
1006       for (j=0; j<i; ++j) *(b+j)=0;
1007 
1008       /* these should all be equal */
1009       ref = hashlittle(b, len, (uint32_t)1);
1010       *(b+i)=(uint8_t)~0;
1011       *(b-1)=(uint8_t)~0;
1012       x = hashlittle(b, len, (uint32_t)1);
1013       y = hashlittle(b, len, (uint32_t)1);
1014       if ((ref != x) || (ref != y))
1015       {
1016 	printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
1017                h, i);
1018       }
1019     }
1020   }
1021 }
1022 
1023 /* check for problems with nulls */
1024  void driver4(void)
1025 {
1026   uint8_t buf[1];
1027   uint32_t h,i,state[HASHSTATE];
1028 
1029 
1030   buf[0] = ~0;
1031   for (i=0; i<HASHSTATE; ++i) state[i] = 1;
1032   printf("These should all be different\n");
1033   for (i=0, h=0; i<8; ++i)
1034   {
1035     h = hashlittle(buf, 0, h);
1036     printf("%2ld  0-byte strings, hash is  %.8x\n", i, h);
1037   }
1038 }
1039 
1040 
1041 int main(void)
1042 {
1043   driver1();   /* test that the key is hashed: used for timings */
1044   driver2();   /* test that whole key is hashed thoroughly */
1045   driver3();   /* test that nothing but the key is hashed */
1046   driver4();   /* test hashing multiple buffers (all buffers are null) */
1047   return 1;
1048 }
1049 
1050 #endif  /* SELF_TEST */
1051