1*7c478bd9Sstevel@tonic-gate /*-
2*7c478bd9Sstevel@tonic-gate * See the file LICENSE for redistribution information.
3*7c478bd9Sstevel@tonic-gate *
4*7c478bd9Sstevel@tonic-gate * Copyright (c) 1996, 1997
5*7c478bd9Sstevel@tonic-gate * Sleepycat Software. All rights reserved.
6*7c478bd9Sstevel@tonic-gate */
7*7c478bd9Sstevel@tonic-gate /*
8*7c478bd9Sstevel@tonic-gate * Copyright (c) 1990, 1993
9*7c478bd9Sstevel@tonic-gate * Margo Seltzer. All rights reserved.
10*7c478bd9Sstevel@tonic-gate */
11*7c478bd9Sstevel@tonic-gate /*
12*7c478bd9Sstevel@tonic-gate * Copyright (c) 1990, 1993
13*7c478bd9Sstevel@tonic-gate * The Regents of the University of California. All rights reserved.
14*7c478bd9Sstevel@tonic-gate *
15*7c478bd9Sstevel@tonic-gate * This code is derived from software contributed to Berkeley by
16*7c478bd9Sstevel@tonic-gate * Margo Seltzer.
17*7c478bd9Sstevel@tonic-gate *
18*7c478bd9Sstevel@tonic-gate * Redistribution and use in source and binary forms, with or without
19*7c478bd9Sstevel@tonic-gate * modification, are permitted provided that the following conditions
20*7c478bd9Sstevel@tonic-gate * are met:
21*7c478bd9Sstevel@tonic-gate * 1. Redistributions of source code must retain the above copyright
22*7c478bd9Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer.
23*7c478bd9Sstevel@tonic-gate * 2. Redistributions in binary form must reproduce the above copyright
24*7c478bd9Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer in the
25*7c478bd9Sstevel@tonic-gate * documentation and/or other materials provided with the distribution.
26*7c478bd9Sstevel@tonic-gate * 3. All advertising materials mentioning features or use of this software
27*7c478bd9Sstevel@tonic-gate * must display the following acknowledgement:
28*7c478bd9Sstevel@tonic-gate * This product includes software developed by the University of
29*7c478bd9Sstevel@tonic-gate * California, Berkeley and its contributors.
30*7c478bd9Sstevel@tonic-gate * 4. Neither the name of the University nor the names of its contributors
31*7c478bd9Sstevel@tonic-gate * may be used to endorse or promote products derived from this software
32*7c478bd9Sstevel@tonic-gate * without specific prior written permission.
33*7c478bd9Sstevel@tonic-gate *
34*7c478bd9Sstevel@tonic-gate * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35*7c478bd9Sstevel@tonic-gate * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36*7c478bd9Sstevel@tonic-gate * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37*7c478bd9Sstevel@tonic-gate * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38*7c478bd9Sstevel@tonic-gate * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39*7c478bd9Sstevel@tonic-gate * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40*7c478bd9Sstevel@tonic-gate * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41*7c478bd9Sstevel@tonic-gate * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42*7c478bd9Sstevel@tonic-gate * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43*7c478bd9Sstevel@tonic-gate * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44*7c478bd9Sstevel@tonic-gate * SUCH DAMAGE.
45*7c478bd9Sstevel@tonic-gate */
46*7c478bd9Sstevel@tonic-gate /*
47*7c478bd9Sstevel@tonic-gate * Copyright (c) 1998 by Sun Microsystems, Inc.
48*7c478bd9Sstevel@tonic-gate * All rights reserved.
49*7c478bd9Sstevel@tonic-gate */
50*7c478bd9Sstevel@tonic-gate
51*7c478bd9Sstevel@tonic-gate #include "config.h"
52*7c478bd9Sstevel@tonic-gate
53*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
54*7c478bd9Sstevel@tonic-gate
55*7c478bd9Sstevel@tonic-gate #ifndef lint
56*7c478bd9Sstevel@tonic-gate static const char sccsid[] = "@(#)hash_func.c 10.7 (Sleepycat) 9/16/97";
57*7c478bd9Sstevel@tonic-gate static const char sccsi2[] = "%W% (Sun) %G%";
58*7c478bd9Sstevel@tonic-gate #endif /* not lint */
59*7c478bd9Sstevel@tonic-gate
60*7c478bd9Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES
61*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
62*7c478bd9Sstevel@tonic-gate #endif
63*7c478bd9Sstevel@tonic-gate
64*7c478bd9Sstevel@tonic-gate #include "db_int.h"
65*7c478bd9Sstevel@tonic-gate #include "db_page.h"
66*7c478bd9Sstevel@tonic-gate #include "hash.h"
67*7c478bd9Sstevel@tonic-gate
68*7c478bd9Sstevel@tonic-gate /*
69*7c478bd9Sstevel@tonic-gate * __ham_func2 --
70*7c478bd9Sstevel@tonic-gate * Phong Vo's linear congruential hash.
71*7c478bd9Sstevel@tonic-gate *
72*7c478bd9Sstevel@tonic-gate * PUBLIC: u_int32_t __ham_func2 __P((const void *, u_int32_t));
73*7c478bd9Sstevel@tonic-gate */
74*7c478bd9Sstevel@tonic-gate #define DCHARHASH(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c))
75*7c478bd9Sstevel@tonic-gate
76*7c478bd9Sstevel@tonic-gate u_int32_t
__ham_func2(key,len)77*7c478bd9Sstevel@tonic-gate __ham_func2(key, len)
78*7c478bd9Sstevel@tonic-gate const void *key;
79*7c478bd9Sstevel@tonic-gate u_int32_t len;
80*7c478bd9Sstevel@tonic-gate {
81*7c478bd9Sstevel@tonic-gate const u_int8_t *e, *k;
82*7c478bd9Sstevel@tonic-gate u_int32_t h;
83*7c478bd9Sstevel@tonic-gate u_int8_t c;
84*7c478bd9Sstevel@tonic-gate
85*7c478bd9Sstevel@tonic-gate k = key;
86*7c478bd9Sstevel@tonic-gate e = k + len;
87*7c478bd9Sstevel@tonic-gate for (h = 0; k != e;) {
88*7c478bd9Sstevel@tonic-gate c = *k++;
89*7c478bd9Sstevel@tonic-gate if (!c && k > e)
90*7c478bd9Sstevel@tonic-gate break;
91*7c478bd9Sstevel@tonic-gate DCHARHASH(h, c);
92*7c478bd9Sstevel@tonic-gate }
93*7c478bd9Sstevel@tonic-gate return (h);
94*7c478bd9Sstevel@tonic-gate }
95*7c478bd9Sstevel@tonic-gate
96*7c478bd9Sstevel@tonic-gate /*
97*7c478bd9Sstevel@tonic-gate * __ham_func3 --
98*7c478bd9Sstevel@tonic-gate * Ozan Yigit's original sdbm hash.
99*7c478bd9Sstevel@tonic-gate *
100*7c478bd9Sstevel@tonic-gate * Ugly, but fast. Break the string up into 8 byte units. On the first time
101*7c478bd9Sstevel@tonic-gate * through the loop get the "leftover bytes" (strlen % 8). On every other
102*7c478bd9Sstevel@tonic-gate * iteration, perform 8 HASHC's so we handle all 8 bytes. Essentially, this
103*7c478bd9Sstevel@tonic-gate * saves us 7 cmp & branch instructions.
104*7c478bd9Sstevel@tonic-gate *
105*7c478bd9Sstevel@tonic-gate * PUBLIC: u_int32_t __ham_func3 __P((const void *, u_int32_t));
106*7c478bd9Sstevel@tonic-gate */
107*7c478bd9Sstevel@tonic-gate u_int32_t
__ham_func3(key,len)108*7c478bd9Sstevel@tonic-gate __ham_func3(key, len)
109*7c478bd9Sstevel@tonic-gate const void *key;
110*7c478bd9Sstevel@tonic-gate u_int32_t len;
111*7c478bd9Sstevel@tonic-gate {
112*7c478bd9Sstevel@tonic-gate const u_int8_t *k;
113*7c478bd9Sstevel@tonic-gate u_int32_t n, loop;
114*7c478bd9Sstevel@tonic-gate
115*7c478bd9Sstevel@tonic-gate if (len == 0)
116*7c478bd9Sstevel@tonic-gate return (0);
117*7c478bd9Sstevel@tonic-gate
118*7c478bd9Sstevel@tonic-gate #define HASHC n = *k++ + 65599 * n
119*7c478bd9Sstevel@tonic-gate n = 0;
120*7c478bd9Sstevel@tonic-gate k = key;
121*7c478bd9Sstevel@tonic-gate
122*7c478bd9Sstevel@tonic-gate loop = (len + 8 - 1) >> 3;
123*7c478bd9Sstevel@tonic-gate switch (len & (8 - 1)) {
124*7c478bd9Sstevel@tonic-gate case 0:
125*7c478bd9Sstevel@tonic-gate do {
126*7c478bd9Sstevel@tonic-gate HASHC;
127*7c478bd9Sstevel@tonic-gate case 7:
128*7c478bd9Sstevel@tonic-gate HASHC;
129*7c478bd9Sstevel@tonic-gate case 6:
130*7c478bd9Sstevel@tonic-gate HASHC;
131*7c478bd9Sstevel@tonic-gate case 5:
132*7c478bd9Sstevel@tonic-gate HASHC;
133*7c478bd9Sstevel@tonic-gate case 4:
134*7c478bd9Sstevel@tonic-gate HASHC;
135*7c478bd9Sstevel@tonic-gate case 3:
136*7c478bd9Sstevel@tonic-gate HASHC;
137*7c478bd9Sstevel@tonic-gate case 2:
138*7c478bd9Sstevel@tonic-gate HASHC;
139*7c478bd9Sstevel@tonic-gate case 1:
140*7c478bd9Sstevel@tonic-gate HASHC;
141*7c478bd9Sstevel@tonic-gate } while (--loop);
142*7c478bd9Sstevel@tonic-gate }
143*7c478bd9Sstevel@tonic-gate return (n);
144*7c478bd9Sstevel@tonic-gate }
145*7c478bd9Sstevel@tonic-gate
146*7c478bd9Sstevel@tonic-gate /*
147*7c478bd9Sstevel@tonic-gate * __ham_func4 --
148*7c478bd9Sstevel@tonic-gate * Chris Torek's hash function. Although this function performs only
149*7c478bd9Sstevel@tonic-gate * slightly worse than __ham_func5 on strings, it performs horribly on
150*7c478bd9Sstevel@tonic-gate * numbers.
151*7c478bd9Sstevel@tonic-gate *
152*7c478bd9Sstevel@tonic-gate * PUBLIC: u_int32_t __ham_func4 __P((const void *, u_int32_t));
153*7c478bd9Sstevel@tonic-gate */
154*7c478bd9Sstevel@tonic-gate u_int32_t
__ham_func4(key,len)155*7c478bd9Sstevel@tonic-gate __ham_func4(key, len)
156*7c478bd9Sstevel@tonic-gate const void *key;
157*7c478bd9Sstevel@tonic-gate u_int32_t len;
158*7c478bd9Sstevel@tonic-gate {
159*7c478bd9Sstevel@tonic-gate const u_int8_t *k;
160*7c478bd9Sstevel@tonic-gate u_int32_t h, loop;
161*7c478bd9Sstevel@tonic-gate
162*7c478bd9Sstevel@tonic-gate if (len == 0)
163*7c478bd9Sstevel@tonic-gate return (0);
164*7c478bd9Sstevel@tonic-gate
165*7c478bd9Sstevel@tonic-gate #define HASH4a h = (h << 5) - h + *k++;
166*7c478bd9Sstevel@tonic-gate #define HASH4b h = (h << 5) + h + *k++;
167*7c478bd9Sstevel@tonic-gate #define HASH4 HASH4b
168*7c478bd9Sstevel@tonic-gate h = 0;
169*7c478bd9Sstevel@tonic-gate k = key;
170*7c478bd9Sstevel@tonic-gate
171*7c478bd9Sstevel@tonic-gate loop = (len + 8 - 1) >> 3;
172*7c478bd9Sstevel@tonic-gate switch (len & (8 - 1)) {
173*7c478bd9Sstevel@tonic-gate case 0:
174*7c478bd9Sstevel@tonic-gate do {
175*7c478bd9Sstevel@tonic-gate HASH4;
176*7c478bd9Sstevel@tonic-gate case 7:
177*7c478bd9Sstevel@tonic-gate HASH4;
178*7c478bd9Sstevel@tonic-gate case 6:
179*7c478bd9Sstevel@tonic-gate HASH4;
180*7c478bd9Sstevel@tonic-gate case 5:
181*7c478bd9Sstevel@tonic-gate HASH4;
182*7c478bd9Sstevel@tonic-gate case 4:
183*7c478bd9Sstevel@tonic-gate HASH4;
184*7c478bd9Sstevel@tonic-gate case 3:
185*7c478bd9Sstevel@tonic-gate HASH4;
186*7c478bd9Sstevel@tonic-gate case 2:
187*7c478bd9Sstevel@tonic-gate HASH4;
188*7c478bd9Sstevel@tonic-gate case 1:
189*7c478bd9Sstevel@tonic-gate HASH4;
190*7c478bd9Sstevel@tonic-gate } while (--loop);
191*7c478bd9Sstevel@tonic-gate }
192*7c478bd9Sstevel@tonic-gate return (h);
193*7c478bd9Sstevel@tonic-gate }
194*7c478bd9Sstevel@tonic-gate
195*7c478bd9Sstevel@tonic-gate /*
196*7c478bd9Sstevel@tonic-gate * Fowler/Noll/Vo hash
197*7c478bd9Sstevel@tonic-gate *
198*7c478bd9Sstevel@tonic-gate * The basis of the hash algorithm was taken from an idea sent by email to the
199*7c478bd9Sstevel@tonic-gate * IEEE Posix P1003.2 mailing list from Phong Vo (kpv@research.att.com) and
200*7c478bd9Sstevel@tonic-gate * Glenn Fowler (gsf@research.att.com). Landon Curt Noll (chongo@toad.com)
201*7c478bd9Sstevel@tonic-gate * later improved on their algorithm.
202*7c478bd9Sstevel@tonic-gate *
203*7c478bd9Sstevel@tonic-gate * The magic is in the interesting relationship between the special prime
204*7c478bd9Sstevel@tonic-gate * 16777619 (2^24 + 403) and 2^32 and 2^8.
205*7c478bd9Sstevel@tonic-gate *
206*7c478bd9Sstevel@tonic-gate * This hash produces the fewest collisions of any function that we've seen so
207*7c478bd9Sstevel@tonic-gate * far, and works well on both numbers and strings.
208*7c478bd9Sstevel@tonic-gate *
209*7c478bd9Sstevel@tonic-gate * PUBLIC: u_int32_t __ham_func5 __P((const void *, u_int32_t));
210*7c478bd9Sstevel@tonic-gate */
211*7c478bd9Sstevel@tonic-gate u_int32_t
__ham_func5(key,len)212*7c478bd9Sstevel@tonic-gate __ham_func5(key, len)
213*7c478bd9Sstevel@tonic-gate const void *key;
214*7c478bd9Sstevel@tonic-gate u_int32_t len;
215*7c478bd9Sstevel@tonic-gate {
216*7c478bd9Sstevel@tonic-gate const u_int8_t *k, *e;
217*7c478bd9Sstevel@tonic-gate u_int32_t h;
218*7c478bd9Sstevel@tonic-gate
219*7c478bd9Sstevel@tonic-gate k = key;
220*7c478bd9Sstevel@tonic-gate e = k + len;
221*7c478bd9Sstevel@tonic-gate for (h = 0; k < e; ++k) {
222*7c478bd9Sstevel@tonic-gate h *= 16777619;
223*7c478bd9Sstevel@tonic-gate h ^= *k;
224*7c478bd9Sstevel@tonic-gate }
225*7c478bd9Sstevel@tonic-gate return (h);
226*7c478bd9Sstevel@tonic-gate }
227