1*7c478bd9Sstevel@tonic-gate /*- 2*7c478bd9Sstevel@tonic-gate * See the file LICENSE for redistribution information. 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * Copyright (c) 1996, 1997 5*7c478bd9Sstevel@tonic-gate * Sleepycat Software. All rights reserved. 6*7c478bd9Sstevel@tonic-gate */ 7*7c478bd9Sstevel@tonic-gate /* 8*7c478bd9Sstevel@tonic-gate * Copyright (c) 1990, 1993 9*7c478bd9Sstevel@tonic-gate * Margo Seltzer. All rights reserved. 10*7c478bd9Sstevel@tonic-gate */ 11*7c478bd9Sstevel@tonic-gate /* 12*7c478bd9Sstevel@tonic-gate * Copyright (c) 1990, 1993 13*7c478bd9Sstevel@tonic-gate * The Regents of the University of California. All rights reserved. 14*7c478bd9Sstevel@tonic-gate * 15*7c478bd9Sstevel@tonic-gate * This code is derived from software contributed to Berkeley by 16*7c478bd9Sstevel@tonic-gate * Margo Seltzer. 17*7c478bd9Sstevel@tonic-gate * 18*7c478bd9Sstevel@tonic-gate * Redistribution and use in source and binary forms, with or without 19*7c478bd9Sstevel@tonic-gate * modification, are permitted provided that the following conditions 20*7c478bd9Sstevel@tonic-gate * are met: 21*7c478bd9Sstevel@tonic-gate * 1. Redistributions of source code must retain the above copyright 22*7c478bd9Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer. 23*7c478bd9Sstevel@tonic-gate * 2. Redistributions in binary form must reproduce the above copyright 24*7c478bd9Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer in the 25*7c478bd9Sstevel@tonic-gate * documentation and/or other materials provided with the distribution. 26*7c478bd9Sstevel@tonic-gate * 3. All advertising materials mentioning features or use of this software 27*7c478bd9Sstevel@tonic-gate * must display the following acknowledgement: 28*7c478bd9Sstevel@tonic-gate * This product includes software developed by the University of 29*7c478bd9Sstevel@tonic-gate * California, Berkeley and its contributors. 30*7c478bd9Sstevel@tonic-gate * 4. Neither the name of the University nor the names of its contributors 31*7c478bd9Sstevel@tonic-gate * may be used to endorse or promote products derived from this software 32*7c478bd9Sstevel@tonic-gate * without specific prior written permission. 33*7c478bd9Sstevel@tonic-gate * 34*7c478bd9Sstevel@tonic-gate * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 35*7c478bd9Sstevel@tonic-gate * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 36*7c478bd9Sstevel@tonic-gate * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 37*7c478bd9Sstevel@tonic-gate * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 38*7c478bd9Sstevel@tonic-gate * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 39*7c478bd9Sstevel@tonic-gate * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 40*7c478bd9Sstevel@tonic-gate * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 41*7c478bd9Sstevel@tonic-gate * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 42*7c478bd9Sstevel@tonic-gate * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 43*7c478bd9Sstevel@tonic-gate * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 44*7c478bd9Sstevel@tonic-gate * SUCH DAMAGE. 45*7c478bd9Sstevel@tonic-gate */ 46*7c478bd9Sstevel@tonic-gate /* 47*7c478bd9Sstevel@tonic-gate * Copyright (c) 1998 by Sun Microsystems, Inc. 48*7c478bd9Sstevel@tonic-gate * All rights reserved. 49*7c478bd9Sstevel@tonic-gate */ 50*7c478bd9Sstevel@tonic-gate 51*7c478bd9Sstevel@tonic-gate #include "config.h" 52*7c478bd9Sstevel@tonic-gate 53*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 54*7c478bd9Sstevel@tonic-gate 55*7c478bd9Sstevel@tonic-gate #ifndef lint 56*7c478bd9Sstevel@tonic-gate static const char sccsid[] = "@(#)hash_func.c 10.7 (Sleepycat) 9/16/97"; 57*7c478bd9Sstevel@tonic-gate static const char sccsi2[] = "%W% (Sun) %G%"; 58*7c478bd9Sstevel@tonic-gate #endif /* not lint */ 59*7c478bd9Sstevel@tonic-gate 60*7c478bd9Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES 61*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 62*7c478bd9Sstevel@tonic-gate #endif 63*7c478bd9Sstevel@tonic-gate 64*7c478bd9Sstevel@tonic-gate #include "db_int.h" 65*7c478bd9Sstevel@tonic-gate #include "db_page.h" 66*7c478bd9Sstevel@tonic-gate #include "hash.h" 67*7c478bd9Sstevel@tonic-gate 68*7c478bd9Sstevel@tonic-gate /* 69*7c478bd9Sstevel@tonic-gate * __ham_func2 -- 70*7c478bd9Sstevel@tonic-gate * Phong Vo's linear congruential hash. 71*7c478bd9Sstevel@tonic-gate * 72*7c478bd9Sstevel@tonic-gate * PUBLIC: u_int32_t __ham_func2 __P((const void *, u_int32_t)); 73*7c478bd9Sstevel@tonic-gate */ 74*7c478bd9Sstevel@tonic-gate #define DCHARHASH(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c)) 75*7c478bd9Sstevel@tonic-gate 76*7c478bd9Sstevel@tonic-gate u_int32_t 77*7c478bd9Sstevel@tonic-gate __ham_func2(key, len) 78*7c478bd9Sstevel@tonic-gate const void *key; 79*7c478bd9Sstevel@tonic-gate u_int32_t len; 80*7c478bd9Sstevel@tonic-gate { 81*7c478bd9Sstevel@tonic-gate const u_int8_t *e, *k; 82*7c478bd9Sstevel@tonic-gate u_int32_t h; 83*7c478bd9Sstevel@tonic-gate u_int8_t c; 84*7c478bd9Sstevel@tonic-gate 85*7c478bd9Sstevel@tonic-gate k = key; 86*7c478bd9Sstevel@tonic-gate e = k + len; 87*7c478bd9Sstevel@tonic-gate for (h = 0; k != e;) { 88*7c478bd9Sstevel@tonic-gate c = *k++; 89*7c478bd9Sstevel@tonic-gate if (!c && k > e) 90*7c478bd9Sstevel@tonic-gate break; 91*7c478bd9Sstevel@tonic-gate DCHARHASH(h, c); 92*7c478bd9Sstevel@tonic-gate } 93*7c478bd9Sstevel@tonic-gate return (h); 94*7c478bd9Sstevel@tonic-gate } 95*7c478bd9Sstevel@tonic-gate 96*7c478bd9Sstevel@tonic-gate /* 97*7c478bd9Sstevel@tonic-gate * __ham_func3 -- 98*7c478bd9Sstevel@tonic-gate * Ozan Yigit's original sdbm hash. 99*7c478bd9Sstevel@tonic-gate * 100*7c478bd9Sstevel@tonic-gate * Ugly, but fast. Break the string up into 8 byte units. On the first time 101*7c478bd9Sstevel@tonic-gate * through the loop get the "leftover bytes" (strlen % 8). On every other 102*7c478bd9Sstevel@tonic-gate * iteration, perform 8 HASHC's so we handle all 8 bytes. Essentially, this 103*7c478bd9Sstevel@tonic-gate * saves us 7 cmp & branch instructions. 104*7c478bd9Sstevel@tonic-gate * 105*7c478bd9Sstevel@tonic-gate * PUBLIC: u_int32_t __ham_func3 __P((const void *, u_int32_t)); 106*7c478bd9Sstevel@tonic-gate */ 107*7c478bd9Sstevel@tonic-gate u_int32_t 108*7c478bd9Sstevel@tonic-gate __ham_func3(key, len) 109*7c478bd9Sstevel@tonic-gate const void *key; 110*7c478bd9Sstevel@tonic-gate u_int32_t len; 111*7c478bd9Sstevel@tonic-gate { 112*7c478bd9Sstevel@tonic-gate const u_int8_t *k; 113*7c478bd9Sstevel@tonic-gate u_int32_t n, loop; 114*7c478bd9Sstevel@tonic-gate 115*7c478bd9Sstevel@tonic-gate if (len == 0) 116*7c478bd9Sstevel@tonic-gate return (0); 117*7c478bd9Sstevel@tonic-gate 118*7c478bd9Sstevel@tonic-gate #define HASHC n = *k++ + 65599 * n 119*7c478bd9Sstevel@tonic-gate n = 0; 120*7c478bd9Sstevel@tonic-gate k = key; 121*7c478bd9Sstevel@tonic-gate 122*7c478bd9Sstevel@tonic-gate loop = (len + 8 - 1) >> 3; 123*7c478bd9Sstevel@tonic-gate switch (len & (8 - 1)) { 124*7c478bd9Sstevel@tonic-gate case 0: 125*7c478bd9Sstevel@tonic-gate do { 126*7c478bd9Sstevel@tonic-gate HASHC; 127*7c478bd9Sstevel@tonic-gate case 7: 128*7c478bd9Sstevel@tonic-gate HASHC; 129*7c478bd9Sstevel@tonic-gate case 6: 130*7c478bd9Sstevel@tonic-gate HASHC; 131*7c478bd9Sstevel@tonic-gate case 5: 132*7c478bd9Sstevel@tonic-gate HASHC; 133*7c478bd9Sstevel@tonic-gate case 4: 134*7c478bd9Sstevel@tonic-gate HASHC; 135*7c478bd9Sstevel@tonic-gate case 3: 136*7c478bd9Sstevel@tonic-gate HASHC; 137*7c478bd9Sstevel@tonic-gate case 2: 138*7c478bd9Sstevel@tonic-gate HASHC; 139*7c478bd9Sstevel@tonic-gate case 1: 140*7c478bd9Sstevel@tonic-gate HASHC; 141*7c478bd9Sstevel@tonic-gate } while (--loop); 142*7c478bd9Sstevel@tonic-gate } 143*7c478bd9Sstevel@tonic-gate return (n); 144*7c478bd9Sstevel@tonic-gate } 145*7c478bd9Sstevel@tonic-gate 146*7c478bd9Sstevel@tonic-gate /* 147*7c478bd9Sstevel@tonic-gate * __ham_func4 -- 148*7c478bd9Sstevel@tonic-gate * Chris Torek's hash function. Although this function performs only 149*7c478bd9Sstevel@tonic-gate * slightly worse than __ham_func5 on strings, it performs horribly on 150*7c478bd9Sstevel@tonic-gate * numbers. 151*7c478bd9Sstevel@tonic-gate * 152*7c478bd9Sstevel@tonic-gate * PUBLIC: u_int32_t __ham_func4 __P((const void *, u_int32_t)); 153*7c478bd9Sstevel@tonic-gate */ 154*7c478bd9Sstevel@tonic-gate u_int32_t 155*7c478bd9Sstevel@tonic-gate __ham_func4(key, len) 156*7c478bd9Sstevel@tonic-gate const void *key; 157*7c478bd9Sstevel@tonic-gate u_int32_t len; 158*7c478bd9Sstevel@tonic-gate { 159*7c478bd9Sstevel@tonic-gate const u_int8_t *k; 160*7c478bd9Sstevel@tonic-gate u_int32_t h, loop; 161*7c478bd9Sstevel@tonic-gate 162*7c478bd9Sstevel@tonic-gate if (len == 0) 163*7c478bd9Sstevel@tonic-gate return (0); 164*7c478bd9Sstevel@tonic-gate 165*7c478bd9Sstevel@tonic-gate #define HASH4a h = (h << 5) - h + *k++; 166*7c478bd9Sstevel@tonic-gate #define HASH4b h = (h << 5) + h + *k++; 167*7c478bd9Sstevel@tonic-gate #define HASH4 HASH4b 168*7c478bd9Sstevel@tonic-gate h = 0; 169*7c478bd9Sstevel@tonic-gate k = key; 170*7c478bd9Sstevel@tonic-gate 171*7c478bd9Sstevel@tonic-gate loop = (len + 8 - 1) >> 3; 172*7c478bd9Sstevel@tonic-gate switch (len & (8 - 1)) { 173*7c478bd9Sstevel@tonic-gate case 0: 174*7c478bd9Sstevel@tonic-gate do { 175*7c478bd9Sstevel@tonic-gate HASH4; 176*7c478bd9Sstevel@tonic-gate case 7: 177*7c478bd9Sstevel@tonic-gate HASH4; 178*7c478bd9Sstevel@tonic-gate case 6: 179*7c478bd9Sstevel@tonic-gate HASH4; 180*7c478bd9Sstevel@tonic-gate case 5: 181*7c478bd9Sstevel@tonic-gate HASH4; 182*7c478bd9Sstevel@tonic-gate case 4: 183*7c478bd9Sstevel@tonic-gate HASH4; 184*7c478bd9Sstevel@tonic-gate case 3: 185*7c478bd9Sstevel@tonic-gate HASH4; 186*7c478bd9Sstevel@tonic-gate case 2: 187*7c478bd9Sstevel@tonic-gate HASH4; 188*7c478bd9Sstevel@tonic-gate case 1: 189*7c478bd9Sstevel@tonic-gate HASH4; 190*7c478bd9Sstevel@tonic-gate } while (--loop); 191*7c478bd9Sstevel@tonic-gate } 192*7c478bd9Sstevel@tonic-gate return (h); 193*7c478bd9Sstevel@tonic-gate } 194*7c478bd9Sstevel@tonic-gate 195*7c478bd9Sstevel@tonic-gate /* 196*7c478bd9Sstevel@tonic-gate * Fowler/Noll/Vo hash 197*7c478bd9Sstevel@tonic-gate * 198*7c478bd9Sstevel@tonic-gate * The basis of the hash algorithm was taken from an idea sent by email to the 199*7c478bd9Sstevel@tonic-gate * IEEE Posix P1003.2 mailing list from Phong Vo (kpv@research.att.com) and 200*7c478bd9Sstevel@tonic-gate * Glenn Fowler (gsf@research.att.com). Landon Curt Noll (chongo@toad.com) 201*7c478bd9Sstevel@tonic-gate * later improved on their algorithm. 202*7c478bd9Sstevel@tonic-gate * 203*7c478bd9Sstevel@tonic-gate * The magic is in the interesting relationship between the special prime 204*7c478bd9Sstevel@tonic-gate * 16777619 (2^24 + 403) and 2^32 and 2^8. 205*7c478bd9Sstevel@tonic-gate * 206*7c478bd9Sstevel@tonic-gate * This hash produces the fewest collisions of any function that we've seen so 207*7c478bd9Sstevel@tonic-gate * far, and works well on both numbers and strings. 208*7c478bd9Sstevel@tonic-gate * 209*7c478bd9Sstevel@tonic-gate * PUBLIC: u_int32_t __ham_func5 __P((const void *, u_int32_t)); 210*7c478bd9Sstevel@tonic-gate */ 211*7c478bd9Sstevel@tonic-gate u_int32_t 212*7c478bd9Sstevel@tonic-gate __ham_func5(key, len) 213*7c478bd9Sstevel@tonic-gate const void *key; 214*7c478bd9Sstevel@tonic-gate u_int32_t len; 215*7c478bd9Sstevel@tonic-gate { 216*7c478bd9Sstevel@tonic-gate const u_int8_t *k, *e; 217*7c478bd9Sstevel@tonic-gate u_int32_t h; 218*7c478bd9Sstevel@tonic-gate 219*7c478bd9Sstevel@tonic-gate k = key; 220*7c478bd9Sstevel@tonic-gate e = k + len; 221*7c478bd9Sstevel@tonic-gate for (h = 0; k < e; ++k) { 222*7c478bd9Sstevel@tonic-gate h *= 16777619; 223*7c478bd9Sstevel@tonic-gate h ^= *k; 224*7c478bd9Sstevel@tonic-gate } 225*7c478bd9Sstevel@tonic-gate return (h); 226*7c478bd9Sstevel@tonic-gate } 227