xref: /freebsd/lib/libc/db/hash/hash_func.c (revision dc36d6f9bb1753f3808552f3afd30eda9a7b206a)
158f0484fSRodney W. Grimes /*-
2*8a16b7a1SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
3*8a16b7a1SPedro F. Giffuni  *
458f0484fSRodney W. Grimes  * Copyright (c) 1990, 1993
558f0484fSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
658f0484fSRodney W. Grimes  *
758f0484fSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
858f0484fSRodney W. Grimes  * Margo Seltzer.
958f0484fSRodney W. Grimes  *
1058f0484fSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
1158f0484fSRodney W. Grimes  * modification, are permitted provided that the following conditions
1258f0484fSRodney W. Grimes  * are met:
1358f0484fSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
1458f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
1558f0484fSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
1658f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
1758f0484fSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
18fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
1958f0484fSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
2058f0484fSRodney W. Grimes  *    without specific prior written permission.
2158f0484fSRodney W. Grimes  *
2258f0484fSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2358f0484fSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2458f0484fSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2558f0484fSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2658f0484fSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2758f0484fSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2858f0484fSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2958f0484fSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3058f0484fSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3158f0484fSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3258f0484fSRodney W. Grimes  * SUCH DAMAGE.
3358f0484fSRodney W. Grimes  */
3458f0484fSRodney W. Grimes 
3558f0484fSRodney W. Grimes #include <sys/types.h>
3658f0484fSRodney W. Grimes 
3758f0484fSRodney W. Grimes #include <db.h>
3858f0484fSRodney W. Grimes #include "hash.h"
3958f0484fSRodney W. Grimes #include "page.h"
4058f0484fSRodney W. Grimes #include "extern.h"
4158f0484fSRodney W. Grimes 
429fc74a87SXin LI #ifdef notdef
43e0554a53SJacques Vidrine static u_int32_t hash1(const void *, size_t) __unused;
44e0554a53SJacques Vidrine static u_int32_t hash2(const void *, size_t) __unused;
45e0554a53SJacques Vidrine static u_int32_t hash3(const void *, size_t) __unused;
469fc74a87SXin LI #endif
47c05ac53bSDavid E. O'Brien static u_int32_t hash4(const void *, size_t);
4858f0484fSRodney W. Grimes 
499fc74a87SXin LI /* Default hash function. */
501372519bSDavid E. O'Brien u_int32_t (*__default_hash)(const void *, size_t) = hash4;
5158f0484fSRodney W. Grimes 
529fc74a87SXin LI #ifdef notdef
5358f0484fSRodney W. Grimes /*
5458f0484fSRodney W. Grimes  * Assume that we've already split the bucket to which this key hashes,
5558f0484fSRodney W. Grimes  * calculate that bucket, and check that in fact we did already split it.
5658f0484fSRodney W. Grimes  *
579fc74a87SXin LI  * EJB's original hsearch hash.
5858f0484fSRodney W. Grimes  */
5958f0484fSRodney W. Grimes #define PRIME1		37
6058f0484fSRodney W. Grimes #define PRIME2		1048583
6158f0484fSRodney W. Grimes 
629fc74a87SXin LI u_int32_t
hash1(const void * key,size_t len)639fc74a87SXin LI hash1(const void *key, size_t len)
6458f0484fSRodney W. Grimes {
658fb3f3f6SDavid E. O'Brien 	u_int32_t h;
669fc74a87SXin LI 	u_int8_t *k;
6758f0484fSRodney W. Grimes 
689fc74a87SXin LI 	h = 0;
699fc74a87SXin LI 	k = (u_int8_t *)key;
7058f0484fSRodney W. Grimes 	/* Convert string to integer */
719fc74a87SXin LI 	while (len--)
729fc74a87SXin LI 		h = h * PRIME1 ^ (*k++ - ' ');
7358f0484fSRodney W. Grimes 	h %= PRIME2;
7458f0484fSRodney W. Grimes 	return (h);
7558f0484fSRodney W. Grimes }
7658f0484fSRodney W. Grimes 
7758f0484fSRodney W. Grimes /*
789fc74a87SXin LI  * Phong Vo's linear congruential hash
7958f0484fSRodney W. Grimes  */
8058f0484fSRodney W. Grimes #define dcharhash(h, c)	((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c))
8158f0484fSRodney W. Grimes 
829fc74a87SXin LI u_int32_t
hash2(const void * key,size_t len)839fc74a87SXin LI hash2(const void *key, size_t len)
8458f0484fSRodney W. Grimes {
858fb3f3f6SDavid E. O'Brien 	u_int32_t h;
869fc74a87SXin LI 	u_int8_t *e, c, *k;
8758f0484fSRodney W. Grimes 
889fc74a87SXin LI 	k = (u_int8_t *)key;
899fc74a87SXin LI 	e = k + len;
909fc74a87SXin LI 	for (h = 0; k != e;) {
919fc74a87SXin LI 		c = *k++;
929fc74a87SXin LI 		if (!c && k > e)
9358f0484fSRodney W. Grimes 			break;
9458f0484fSRodney W. Grimes 		dcharhash(h, c);
9558f0484fSRodney W. Grimes 	}
9658f0484fSRodney W. Grimes 	return (h);
9758f0484fSRodney W. Grimes }
9858f0484fSRodney W. Grimes 
9958f0484fSRodney W. Grimes /*
10058f0484fSRodney W. Grimes  * This is INCREDIBLY ugly, but fast.  We break the string up into 8 byte
10158f0484fSRodney W. Grimes  * units.  On the first time through the loop we get the "leftover bytes"
10258f0484fSRodney W. Grimes  * (strlen % 8).  On every other iteration, we perform 8 HASHC's so we handle
10358f0484fSRodney W. Grimes  * all 8 bytes.  Essentially, this saves us 7 cmp & branch instructions.  If
10458f0484fSRodney W. Grimes  * this routine is heavily used enough, it's worth the ugly coding.
10558f0484fSRodney W. Grimes  *
1069fc74a87SXin LI  * Ozan Yigit's original sdbm hash.
10758f0484fSRodney W. Grimes  */
1089fc74a87SXin LI u_int32_t
hash3(const void * key,size_t len)1099fc74a87SXin LI hash3(const void *key, size_t len)
11058f0484fSRodney W. Grimes {
1119fc74a87SXin LI 	u_int32_t n, loop;
1129fc74a87SXin LI 	u_int8_t *k;
11358f0484fSRodney W. Grimes 
1149fc74a87SXin LI #define HASHC   n = *k++ + 65599 * n
11558f0484fSRodney W. Grimes 
1169fc74a87SXin LI 	n = 0;
1179fc74a87SXin LI 	k = (u_int8_t *)key;
11858f0484fSRodney W. Grimes 	if (len > 0) {
11958f0484fSRodney W. Grimes 		loop = (len + 8 - 1) >> 3;
12058f0484fSRodney W. Grimes 
12158f0484fSRodney W. Grimes 		switch (len & (8 - 1)) {
12258f0484fSRodney W. Grimes 		case 0:
1239fc74a87SXin LI 			do {	/* All fall throughs */
12458f0484fSRodney W. Grimes 				HASHC;
12558f0484fSRodney W. Grimes 		case 7:
12658f0484fSRodney W. Grimes 				HASHC;
12758f0484fSRodney W. Grimes 		case 6:
12858f0484fSRodney W. Grimes 				HASHC;
12958f0484fSRodney W. Grimes 		case 5:
13058f0484fSRodney W. Grimes 				HASHC;
13158f0484fSRodney W. Grimes 		case 4:
13258f0484fSRodney W. Grimes 				HASHC;
13358f0484fSRodney W. Grimes 		case 3:
13458f0484fSRodney W. Grimes 				HASHC;
13558f0484fSRodney W. Grimes 		case 2:
13658f0484fSRodney W. Grimes 				HASHC;
13758f0484fSRodney W. Grimes 		case 1:
13858f0484fSRodney W. Grimes 				HASHC;
13958f0484fSRodney W. Grimes 			} while (--loop);
14058f0484fSRodney W. Grimes 		}
14158f0484fSRodney W. Grimes 
1429fc74a87SXin LI 	}
1439fc74a87SXin LI 	return (n);
1449fc74a87SXin LI }
1459fc74a87SXin LI #endif /* notdef */
1469fc74a87SXin LI 
1479fc74a87SXin LI /* Chris Torek's hash function. */
1489fc74a87SXin LI u_int32_t
hash4(const void * key,size_t len)1499fc74a87SXin LI hash4(const void *key, size_t len)
15058f0484fSRodney W. Grimes {
1519fc74a87SXin LI 	u_int32_t h, loop;
1529fc74a87SXin LI 	const u_int8_t *k;
15358f0484fSRodney W. Grimes 
1549fc74a87SXin LI #define HASH4a   h = (h << 5) - h + *k++;
1559fc74a87SXin LI #define HASH4b   h = (h << 5) + h + *k++;
15658f0484fSRodney W. Grimes #define HASH4 HASH4b
15758f0484fSRodney W. Grimes 
15858f0484fSRodney W. Grimes 	h = 0;
1599fc74a87SXin LI 	k = key;
16058f0484fSRodney W. Grimes 	if (len > 0) {
16158f0484fSRodney W. Grimes 		loop = (len + 8 - 1) >> 3;
16258f0484fSRodney W. Grimes 
16358f0484fSRodney W. Grimes 		switch (len & (8 - 1)) {
16458f0484fSRodney W. Grimes 		case 0:
1659fc74a87SXin LI 			do {	/* All fall throughs */
16658f0484fSRodney W. Grimes 				HASH4;
16758f0484fSRodney W. Grimes 		case 7:
16858f0484fSRodney W. Grimes 				HASH4;
16958f0484fSRodney W. Grimes 		case 6:
17058f0484fSRodney W. Grimes 				HASH4;
17158f0484fSRodney W. Grimes 		case 5:
17258f0484fSRodney W. Grimes 				HASH4;
17358f0484fSRodney W. Grimes 		case 4:
17458f0484fSRodney W. Grimes 				HASH4;
17558f0484fSRodney W. Grimes 		case 3:
17658f0484fSRodney W. Grimes 				HASH4;
17758f0484fSRodney W. Grimes 		case 2:
17858f0484fSRodney W. Grimes 				HASH4;
17958f0484fSRodney W. Grimes 		case 1:
18058f0484fSRodney W. Grimes 				HASH4;
18158f0484fSRodney W. Grimes 			} while (--loop);
18258f0484fSRodney W. Grimes 		}
1839fc74a87SXin LI 
18458f0484fSRodney W. Grimes 	}
18558f0484fSRodney W. Grimes 	return (h);
18658f0484fSRodney W. Grimes }
187