xref: /titanic_52/usr/src/lib/krb5/plugins/kdb/db2/libdb2/hash/hash.c (revision 159d09a20817016f09b3ea28d1bdada4a336bb91)
1 /*-
2  * Copyright (c) 1990, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Margo Seltzer.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #if defined(LIBC_SCCS) && !defined(lint)
38 static char sccsid[] = "@(#)hash.c	8.12 (Berkeley) 11/7/95";
39 #endif /* LIBC_SCCS and not lint */
40 
41 #undef _TS_ERRNO_
42 #include <sys/param.h>
43 #include <sys/stat.h>
44 
45 #include <errno.h>
46 #include <fcntl.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <libintl.h>
52 #ifdef DEBUG
53 #include <assert.h>
54 #endif
55 
56 #include "db-int.h"
57 #include "hash.h"
58 #include "page.h"
59 #include "extern.h"
60 
61 static int32_t flush_meta __P((HTAB *));
62 static int32_t hash_access __P((HTAB *, ACTION, const DBT *, DBT *));
63 static int32_t hash_close __P((DB *));
64 static int32_t hash_delete __P((const DB *, const DBT *, u_int32_t));
65 static int32_t hash_fd __P((const DB *));
66 static int32_t hash_get __P((const DB *, const DBT *, DBT *, u_int32_t));
67 static int32_t hash_put __P((const DB *, DBT *, const DBT *, u_int32_t));
68 static int32_t hash_seq __P((const DB *, DBT *, DBT *, u_int32_t));
69 static int32_t hash_sync __P((const DB *, u_int32_t));
70 static int32_t hdestroy __P((HTAB *));
71 static int32_t cursor_get __P((const DB *, CURSOR *, DBT *, DBT *, \
72 	u_int32_t));
73 static int32_t cursor_delete __P((const DB *, CURSOR *, u_int32_t));
74 static HTAB *init_hash __P((HTAB *, const char *, const HASHINFO *));
75 static int32_t init_htab __P((HTAB *, int32_t));
76 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
77 static void swap_header __P((HTAB *));
78 static void swap_header_copy __P((HASHHDR *, HASHHDR *));
79 #endif
80 static u_int32_t hget_header __P((HTAB *, u_int32_t));
81 static void hput_header __P((HTAB *));
82 
83 #define RETURN_ERROR(ERR, LOC)	{ save_errno = ERR; goto LOC; }
84 
85 /* Return values */
86 #define	SUCCESS	 (0)
87 #define	ERROR	(-1)
88 #define	ABNORMAL (1)
89 
90 #ifdef HASH_STATISTICS
91 u_int32_t hash_accesses, hash_collisions, hash_expansions, hash_overflows,
92 	hash_bigpages;
93 #endif
94 
95 /************************** INTERFACE ROUTINES ***************************/
96 /* OPEN/CLOSE */
97 
98 extern DB *
99 __kdb2_hash_open(file, flags, mode, info, dflags)
100 	const char *file;
101 	int32_t flags, mode, dflags;
102 	const HASHINFO *info;	/* Special directives for create */
103 {
104 	struct stat statbuf;
105 	DB *dbp;
106 	DBT mpool_key;
107 	HTAB *hashp;
108 	int32_t bpages, csize, new_table, save_errno, specified_file;
109 
110 	if ((flags & O_ACCMODE) == O_WRONLY) {
111 		errno = EINVAL;
112 		return (NULL);
113 	}
114 	if (!(hashp = (HTAB *)calloc(1, sizeof(HTAB))))
115 		return (NULL);
116 	hashp->fp = -1;
117 
118 	/* set this now, before file goes away... */
119 	specified_file = (file != NULL);
120 	if (!file) {
121 		/*
122 		 * If we are root and thus have access to /var/run, then use
123 		 * it, else tempnam(3) will use /var/tmp.
124 		 */
125 		if (!(file = tempnam("/var/run", NULL))) {
126 			/*
127 			 * No memory here is not the only failure
128 			 * possibility, but probably the most likely.
129 			 */
130 			errno = ENOMEM;
131 			free(hashp);
132 			return(NULL);
133 		}
134 
135 		/* store the file name so that we can unlink it later */
136 		hashp->fname = file;
137 #ifdef DEBUG
138 			fprintf(stderr, dgettext(TEXT_DOMAIN,
139 			"Using file name %s.\n"), file);
140 #endif
141 	}
142 	/*
143 	 * Even if user wants write only, we need to be able to read
144 	 * the actual file, so we need to open it read/write. But, the
145 	 * field in the hashp structure needs to be accurate so that
146 	 * we can check accesses.
147 	 */
148 	hashp->flags = flags;
149 	hashp->save_file = specified_file && (hashp->flags & O_RDWR);
150 
151 	new_table = 0;
152 	if (!file || (flags & O_TRUNC) ||
153 	    (stat(file, &statbuf) && (errno == ENOENT))) {
154 		if (errno == ENOENT)
155 			errno = 0;	/* In case someone looks at errno. */
156 		new_table = 1;
157 	}
158 	if (file) {
159 		if ((hashp->fp = open(file, flags|O_BINARY, mode)) == -1)
160 			RETURN_ERROR(errno, error0);
161 		(void)fcntl(hashp->fp, F_SETFD, 1);
162 	}
163 
164 	/* Process arguments to set up hash table header. */
165 	if (new_table) {
166 		if (!(hashp = init_hash(hashp, file, info)))
167 			RETURN_ERROR(errno, error1);
168 	} else {
169 		/* Table already exists */
170 		if (info && info->hash)
171 			hashp->hash = info->hash;
172 		else
173 			hashp->hash = __default_hash;
174 
175 		/* copy metadata from page into header */
176 		if (hget_header(hashp,
177 		    (info && info->bsize ? info->bsize : DEF_BUCKET_SIZE)) !=
178 		    sizeof(HASHHDR))
179 			RETURN_ERROR(EFTYPE, error1);
180 
181 		/* Verify file type, versions and hash function */
182 		if (hashp->hdr.magic != HASHMAGIC)
183 			RETURN_ERROR(EFTYPE, error1);
184 #define	OLDHASHVERSION	1
185 		if (hashp->hdr.version != HASHVERSION &&
186 		    hashp->hdr.version != OLDHASHVERSION)
187 			RETURN_ERROR(EFTYPE, error1);
188 		if (hashp->hash(CHARKEY, sizeof(CHARKEY))
189 		    != hashp->hdr.h_charkey)
190 			RETURN_ERROR(EFTYPE, error1);
191 		/*
192 		 * Figure out how many segments we need.  Max_Bucket is the
193 		 * maximum bucket number, so the number of buckets is
194 		 * max_bucket + 1.
195 		 */
196 
197 		/* Read in bitmaps */
198 		bpages = (hashp->hdr.spares[hashp->hdr.ovfl_point] +
199 		    (hashp->hdr.bsize << BYTE_SHIFT) - 1) >>
200 		    (hashp->hdr.bshift + BYTE_SHIFT);
201 
202 		hashp->nmaps = bpages;
203 		(void)memset(&hashp->mapp[0], 0, bpages * sizeof(u_int32_t *));
204 	}
205 
206 	/* start up mpool */
207 	mpool_key.data = (u_int8_t *)file;
208 	mpool_key.size = strlen(file);
209 
210 	if (info && info->cachesize)
211 		csize = info->cachesize / hashp->hdr.bsize;
212 	else
213 		csize = DEF_CACHESIZE / hashp->hdr.bsize;
214 	hashp->mp = mpool_open(&mpool_key, hashp->fp, hashp->hdr.bsize, csize);
215 
216 	if (!hashp->mp)
217 		RETURN_ERROR(errno, error1);
218 	mpool_filter(hashp->mp, __pgin_routine, __pgout_routine, hashp);
219 
220 	/*
221 	 * For a new table, set up the bitmaps.
222 	 */
223 	if (new_table &&
224 	   init_htab(hashp, info && info->nelem ? info->nelem : 1))
225 		goto error2;
226 
227 	/* initialize the cursor queue */
228 	TAILQ_INIT(&hashp->curs_queue);
229 	hashp->seq_cursor = NULL;
230 
231 
232 	/* get a chunk of memory for our split buffer */
233 	hashp->split_buf = (PAGE16 *)malloc(hashp->hdr.bsize);
234 	if (!hashp->split_buf)
235 		goto error2;
236 
237 	hashp->new_file = new_table;
238 
239 	if (!(dbp = (DB *)malloc(sizeof(DB))))
240 		goto error2;
241 
242 	dbp->internal = hashp;
243 	dbp->close = hash_close;
244 	dbp->del = hash_delete;
245 	dbp->fd = hash_fd;
246 	dbp->get = hash_get;
247 	dbp->put = hash_put;
248 	dbp->seq = hash_seq;
249 	dbp->sync = hash_sync;
250 	dbp->type = DB_HASH;
251 
252 #ifdef DEBUG
253 	(void)fprintf(stderr,
254 	    "%s\n%s%lx\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n",
255 	    "init_htab:",
256 	    "TABLE POINTER   ", (void *)hashp,
257 	    "BUCKET SIZE     ", hashp->hdr.bsize,
258 	    "BUCKET SHIFT    ", hashp->hdr.bshift,
259 	    "FILL FACTOR     ", hashp->hdr.ffactor,
260 	    "MAX BUCKET      ", hashp->hdr.max_bucket,
261 	    "OVFL POINT      ", hashp->hdr.ovfl_point,
262 	    "LAST FREED      ", hashp->hdr.last_freed,
263 	    "HIGH MASK       ", hashp->hdr.high_mask,
264 	    "LOW  MASK       ", hashp->hdr.low_mask,
265 	    "NKEYS           ", hashp->hdr.nkeys);
266 #endif
267 #ifdef HASH_STATISTICS
268 	hash_overflows = hash_accesses = hash_collisions = hash_expansions = 0;
269 	hash_bigpages = 0;
270 #endif
271 	return (dbp);
272 
273 error2:
274 	save_errno = errno;
275 	hdestroy(hashp);
276 	errno = save_errno;
277 	return (NULL);
278 
279 error1:
280 	if (hashp != NULL)
281 		(void)close(hashp->fp);
282 
283 error0:
284 	if (!specified_file)
285 		free((void*)(hashp->fname)); /* SUNW14resync */
286 	free(hashp);
287 	errno = save_errno;
288 	return (NULL);
289 }
290 
291 static int32_t
292 hash_close(dbp)
293 	DB *dbp;
294 {
295 	HTAB *hashp;
296 	int32_t retval;
297 
298 	if (!dbp)
299 		return (ERROR);
300 
301 	hashp = (HTAB *)dbp->internal;
302 	retval = hdestroy(hashp);
303 	free(dbp);
304 	return (retval);
305 }
306 
307 static int32_t
308 hash_fd(dbp)
309 	const DB *dbp;
310 {
311 	HTAB *hashp;
312 
313 	if (!dbp)
314 		return (ERROR);
315 
316 	hashp = (HTAB *)dbp->internal;
317 	if (hashp->fp == -1) {
318 		errno = ENOENT;
319 		return (-1);
320 	}
321 	return (hashp->fp);
322 }
323 
324 /************************** LOCAL CREATION ROUTINES **********************/
325 static HTAB *
326 init_hash(hashp, file, info)
327 	HTAB *hashp;
328 	const char *file;
329 	const HASHINFO *info;
330 {
331 	struct stat statbuf;
332 	int32_t nelem;
333 
334 	nelem = 1;
335 	hashp->hdr.nkeys = 0;
336 	hashp->hdr.lorder = DB_BYTE_ORDER;
337 	hashp->hdr.bsize = DEF_BUCKET_SIZE;
338 	hashp->hdr.bshift = DEF_BUCKET_SHIFT;
339 	hashp->hdr.ffactor = DEF_FFACTOR;
340 	hashp->hash = __default_hash;
341 	memset(hashp->hdr.spares, 0, sizeof(hashp->hdr.spares));
342 	memset(hashp->hdr.bitmaps, 0, sizeof(hashp->hdr.bitmaps));
343 
344 	/* Fix bucket size to be optimal for file system */
345 	if (file != NULL) {
346 		if (stat(file, &statbuf))
347 			return (NULL);
348 		hashp->hdr.bsize = statbuf.st_blksize;
349 		hashp->hdr.bshift = __log2(hashp->hdr.bsize);
350 	}
351 	if (info) {
352 		if (info->bsize) {
353 			/* Round pagesize up to power of 2 */
354 			hashp->hdr.bshift = __log2(info->bsize);
355 			hashp->hdr.bsize = 1 << hashp->hdr.bshift;
356 			if (hashp->hdr.bsize > MAX_BSIZE) {
357 				errno = EINVAL;
358 				return (NULL);
359 			}
360 		}
361 		if (info->ffactor)
362 			hashp->hdr.ffactor = info->ffactor;
363 		if (info->hash)
364 			hashp->hash = info->hash;
365 		if (info->lorder) {
366 			if ((info->lorder != DB_BIG_ENDIAN) &&
367 			    (info->lorder != DB_LITTLE_ENDIAN)) {
368 				errno = EINVAL;
369 				return (NULL);
370 			}
371 			hashp->hdr.lorder = info->lorder;
372 		}
373 	}
374 	return (hashp);
375 }
376 
377 /*
378  * Returns 0 on No Error
379  */
380 static int32_t
381 init_htab(hashp, nelem)
382 	HTAB *hashp;
383 	int32_t nelem;
384 {
385 	int32_t l2, nbuckets;
386 
387 	/*
388 	 * Divide number of elements by the fill factor and determine a
389 	 * desired number of buckets.  Allocate space for the next greater
390 	 * power of two number of buckets.
391 	 */
392 	nelem = (nelem - 1) / hashp->hdr.ffactor + 1;
393 
394 	l2 = __log2(MAX(nelem, 2));
395 	nbuckets = 1 << l2;
396 
397 	hashp->hdr.spares[l2] = l2 + 1;
398 	hashp->hdr.spares[l2 + 1] = l2 + 1;
399 	hashp->hdr.ovfl_point = l2;
400 	hashp->hdr.last_freed = 2;
401 
402 	hashp->hdr.max_bucket = hashp->hdr.low_mask = nbuckets - 1;
403 	hashp->hdr.high_mask = (nbuckets << 1) - 1;
404 
405 	/*
406 	 * The number of header pages is the size of the header divided by
407 	 * the amount of freespace on header pages (the page size - the
408 	 * size of 1 integer where the length of the header info on that
409 	 * page is stored) plus another page if it didn't divide evenly.
410 	 */
411 	hashp->hdr.hdrpages =
412 	    (sizeof(HASHHDR) / (hashp->hdr.bsize - HEADER_OVERHEAD)) +
413 	    (((sizeof(HASHHDR) % (hashp->hdr.bsize - HEADER_OVERHEAD)) == 0)
414 	    ? 0 : 1);
415 
416 	/* Create pages for these buckets */
417 	/*
418 	for (i = 0; i <= hashp->hdr.max_bucket; i++) {
419 		if (__new_page(hashp, (u_int32_t)i, A_BUCKET) != 0)
420 			return (-1);
421 	}
422 	*/
423 
424 	/* First bitmap page is at: splitpoint l2 page offset 1 */
425 	if (__ibitmap(hashp, OADDR_OF(l2, 1), l2 + 1, 0))
426 		return (-1);
427 
428 	return (0);
429 }
430 
431 /*
432  * Functions to get/put hash header.  We access the file directly.
433  */
434 static u_int32_t
435 hget_header(hashp, page_size)
436 	HTAB *hashp;
437 	u_int32_t page_size;
438 {
439 	u_int32_t num_copied, i;
440 	u_int8_t *hdr_dest;
441 
442 	num_copied = 0;
443 	i = 0;
444 
445 	hdr_dest = (u_int8_t *)&hashp->hdr;
446 
447 	/*
448 	 * XXX
449 	 * This should not be printing to stderr on a "normal" error case.
450 	 */
451 	lseek(hashp->fp, 0, SEEK_SET);
452 	num_copied = read(hashp->fp, hdr_dest, sizeof(HASHHDR));
453 	if (num_copied != sizeof(HASHHDR)) {
454 		/* Solaris Kerberos: Make sure to print a newline */
455 		fprintf(stderr, dgettext(TEXT_DOMAIN,
456 			"hash: could not retrieve header\n"));
457 		return (0);
458 	}
459 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
460 	swap_header(hashp);
461 #endif
462 	return (num_copied);
463 }
464 
465 static void
466 hput_header(hashp)
467 	HTAB *hashp;
468 {
469 	HASHHDR *whdrp;
470 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
471 	HASHHDR whdr;
472 #endif
473 	u_int32_t num_copied, i;
474 
475 	num_copied = i = 0;
476 
477 	whdrp = &hashp->hdr;
478 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
479 	whdrp = &whdr;
480 	swap_header_copy(&hashp->hdr, whdrp);
481 #endif
482 
483 	lseek(hashp->fp, 0, SEEK_SET);
484 	num_copied = write(hashp->fp, whdrp, sizeof(HASHHDR));
485 	if (num_copied != sizeof(HASHHDR))
486 		/* Solaris Kerberos: Make sure to print a newline */
487 		(void)fprintf(stderr, dgettext(TEXT_DOMAIN,
488 			"hash: could not write hash header\n"));
489 	return;
490 }
491 
492 /********************** DESTROY/CLOSE ROUTINES ************************/
493 
494 /*
495  * Flushes any changes to the file if necessary and destroys the hashp
496  * structure, freeing all allocated space.
497  */
498 static int32_t
499 hdestroy(hashp)
500 	HTAB *hashp;
501 {
502 	int32_t save_errno;
503 
504 	save_errno = 0;
505 
506 #ifdef HASH_STATISTICS
507 	{ int i;
508 	(void)fprintf(stderr, dgettext(TEXT_DOMAIN,
509 			"hdestroy: accesses %ld collisions %ld\n"),
510 	    hash_accesses, hash_collisions);
511 	(void)fprintf(stderr,
512 	   dgettext(TEXT_DOMAIN,
513 			 "hdestroy: expansions %ld\n"), hash_expansions);
514 	(void)fprintf(stderr,
515 	    dgettext(TEXT_DOMAIN,
516 			"hdestroy: overflows %ld\n"), hash_overflows);
517 	(void)fprintf(stderr,
518 	    dgettext(TEXT_DOMAIN,
519 			"hdestroy: big key/data pages %ld\n"), hash_bigpages);
520 	(void)fprintf(stderr,
521 	    "keys %ld maxp %d\n", hashp->hdr.nkeys, hashp->hdr.max_bucket);
522 
523 	for (i = 0; i < NCACHED; i++)
524 		(void)fprintf(stderr,
525 		    "spares[%d] = %d\n", i, hashp->hdr.spares[i]);
526 	}
527 #endif
528 
529 	if (flush_meta(hashp) && !save_errno)
530 		save_errno = errno;
531 
532 	/* Free the split page */
533 	if (hashp->split_buf)
534 		free(hashp->split_buf);
535 
536 	/* Free the big key and big data returns */
537 	if (hashp->bigkey_buf)
538 		free(hashp->bigkey_buf);
539 	if (hashp->bigdata_buf)
540 		free(hashp->bigdata_buf);
541 
542 	/* XXX This should really iterate over the cursor queue, but
543 	   it's not clear how to do that, and the only cursor a hash
544 	   table ever creates is the one used by hash_seq().  Passing
545 	   NULL as the first arg is also a kludge, but I know that
546 	   it's never used, so I do it.  The intent is to plug the
547 	   memory leak.  Correctness can come later. */
548 
549 	if (hashp->seq_cursor)
550 		hashp->seq_cursor->delete(NULL, hashp->seq_cursor, 0);
551 
552 	/* shut down mpool */
553 	mpool_sync(hashp->mp);
554 	mpool_close(hashp->mp);
555 
556 	if (hashp->fp != -1)
557 		(void)close(hashp->fp);
558 
559 	/*
560 	 * *** This may cause problems if hashp->fname is set in any case
561 	 * other than the case that we are generating a temporary file name.
562 	 * Note that the new version of mpool should support temporary
563 	 * files within mpool itself.
564 	 */
565 	if (hashp->fname && !hashp->save_file) {
566 #ifdef DEBUG
567 		fprintf(stderr, dgettext(TEXT_DOMAIN,
568 			"Unlinking file %s.\n"), hashp->fname);
569 #endif
570 		/* we need to chmod the file to allow it to be deleted... */
571 		chmod(hashp->fname, 0700);
572 		unlink(hashp->fname);
573 		/* destroy the temporary name */
574 		free((void *)(hashp->fname)); /* SUNW14resync */
575 	}
576 	free(hashp);
577 
578 	if (save_errno) {
579 		errno = save_errno;
580 		return (ERROR);
581 	}
582 	return (SUCCESS);
583 }
584 
585 /*
586  * Write modified pages to disk
587  *
588  * Returns:
589  *	 0 == OK
590  *	-1 ERROR
591  */
592 static int32_t
593 hash_sync(dbp, flags)
594 	const DB *dbp;
595 	u_int32_t flags;
596 {
597 	HTAB *hashp;
598 
599 	hashp = (HTAB *)dbp->internal;
600 
601 	/*
602 	 * XXX
603 	 * Check success/failure conditions.
604 	 */
605 	return (flush_meta(hashp) || mpool_sync(hashp->mp));
606 }
607 
608 /*
609  * Returns:
610  *	 0 == OK
611  *	-1 indicates that errno should be set
612  */
613 static int32_t
614 flush_meta(hashp)
615 	HTAB *hashp;
616 {
617 	int32_t i;
618 
619 	if (!hashp->save_file)
620 		return (0);
621 	hashp->hdr.magic = HASHMAGIC;
622 	hashp->hdr.version = HASHVERSION;
623 	hashp->hdr.h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY));
624 
625 	/* write out metadata */
626 	hput_header(hashp);
627 
628 	for (i = 0; i < NCACHED; i++)
629 		if (hashp->mapp[i]) {
630 			if (__put_page(hashp,
631 			    (PAGE16 *)hashp->mapp[i], A_BITMAP, 1))
632 				return (-1);
633 			hashp->mapp[i] = NULL;
634 		}
635 	return (0);
636 }
637 
638 /*******************************SEARCH ROUTINES *****************************/
639 /*
640  * All the access routines return
641  *
642  * Returns:
643  *	 0 on SUCCESS
644  *	 1 to indicate an external ERROR (i.e. key not found, etc)
645  *	-1 to indicate an internal ERROR (i.e. out of memory, etc)
646  */
647 
648 /* *** make sure this is true! */
649 
650 static int32_t
651 hash_get(dbp, key, data, flag)
652 	const DB *dbp;
653 	const DBT *key;
654 	DBT *data;
655 	u_int32_t flag;
656 {
657 	HTAB *hashp;
658 
659 	hashp = (HTAB *)dbp->internal;
660 	if (flag) {
661 		hashp->local_errno = errno = EINVAL;
662 		return (ERROR);
663 	}
664 	return (hash_access(hashp, HASH_GET, key, data));
665 }
666 
667 static int32_t
668 hash_put(dbp, key, data, flag)
669 	const DB *dbp;
670 	DBT *key;
671 	const DBT *data;
672 	u_int32_t flag;
673 {
674 	HTAB *hashp;
675 
676 	hashp = (HTAB *)dbp->internal;
677 	if (flag && flag != R_NOOVERWRITE) {
678 		hashp->local_errno = errno = EINVAL;
679 		return (ERROR);
680 	}
681 	if ((hashp->flags & O_ACCMODE) == O_RDONLY) {
682 		hashp->local_errno = errno = EPERM;
683 		return (ERROR);
684 	}
685 	return (hash_access(hashp, flag == R_NOOVERWRITE ?
686 		HASH_PUTNEW : HASH_PUT, key, (DBT *)data));
687 }
688 
689 static int32_t
690 hash_delete(dbp, key, flag)
691 	const DB *dbp;
692 	const DBT *key;
693 	u_int32_t flag;		/* Ignored */
694 {
695 	HTAB *hashp;
696 
697 	hashp = (HTAB *)dbp->internal;
698 	if (flag) {
699 		hashp->local_errno = errno = EINVAL;
700 		return (ERROR);
701 	}
702 	if ((hashp->flags & O_ACCMODE) == O_RDONLY) {
703 		hashp->local_errno = errno = EPERM;
704 		return (ERROR);
705 	}
706 
707 	return (hash_access(hashp, HASH_DELETE, key, NULL));
708 }
709 
710 /*
711  * Assume that hashp has been set in wrapper routine.
712  */
713 static int32_t
714 hash_access(hashp, action, key, val)
715 	HTAB *hashp;
716 	ACTION action;
717 	const DBT *key;
718 	DBT *val;
719 {
720 	DBT page_key, page_val;
721 	CURSOR cursor;
722 	ITEM_INFO item_info;
723 	u_int32_t bucket;
724 	u_int32_t num_items;
725 
726 #ifdef HASH_STATISTICS
727 	hash_accesses++;
728 #endif
729 
730 	num_items = 0;
731 
732 	/*
733 	 * Set up item_info so that we're looking for space to add an item
734 	 * as we cycle through the pages looking for the key.
735 	 */
736 	if (action == HASH_PUT || action == HASH_PUTNEW) {
737 		if (ISBIG(key->size + val->size, hashp))
738 			item_info.seek_size = PAIR_OVERHEAD;
739 		else
740 			item_info.seek_size = key->size + val->size;
741 	} else
742 		item_info.seek_size = 0;
743 	item_info.seek_found_page = 0;
744 
745 	bucket = __call_hash(hashp, (int8_t *)key->data, key->size);
746 
747 	cursor.pagep = NULL;
748 	__get_item_reset(hashp, &cursor);
749 
750 	cursor.bucket = bucket;
751 	while (1) {
752 		__get_item_next(hashp, &cursor, &page_key, &page_val, &item_info);
753 		if (item_info.status == ITEM_ERROR)
754 			return (ABNORMAL);
755 		if (item_info.status == ITEM_NO_MORE)
756 			break;
757 		num_items++;
758 		if (item_info.key_off == BIGPAIR) {
759 			/*
760 			 * !!!
761 			 * 0 is a valid index.
762 			 */
763 			if (__find_bigpair(hashp, &cursor, (int8_t *)key->data,
764 			    key->size) > 0)
765 				goto found;
766 		} else if (key->size == page_key.size &&
767 		    !memcmp(key->data, page_key.data, key->size))
768 			goto found;
769 	}
770 #ifdef HASH_STATISTICS
771 	hash_collisions++;
772 #endif
773 	__get_item_done(hashp, &cursor);
774 
775 	/*
776 	 * At this point, item_info will list either the last page in
777 	 * the chain, or the last page in the chain plus a pgno for where
778 	 * to find the first page in the chain with space for the
779 	 * item we wish to add.
780 	 */
781 
782 	/* Not found */
783 	switch (action) {
784 	case HASH_PUT:
785 	case HASH_PUTNEW:
786 		if (__addel(hashp, &item_info, key, val, num_items, 0))
787 			return (ERROR);
788 		break;
789 	case HASH_GET:
790 	case HASH_DELETE:
791 	default:
792 		return (ABNORMAL);
793 	}
794 
795 	if (item_info.caused_expand)
796 		__expand_table(hashp);
797 	return (SUCCESS);
798 
799 found:	__get_item_done(hashp, &cursor);
800 
801 	switch (action) {
802 	case HASH_PUTNEW:
803 		/* mpool_put(hashp->mp, pagep, 0); */
804 		return (ABNORMAL);
805 	case HASH_GET:
806 		if (item_info.key_off == BIGPAIR) {
807 			if (__big_return(hashp, &item_info, val, 0))
808 				return (ERROR);
809 		} else {
810 			val->data = page_val.data;
811 			val->size = page_val.size;
812 		}
813 		/* *** data may not be available! */
814 		break;
815 	case HASH_PUT:
816 		if (__delpair(hashp, &cursor, &item_info) ||
817 		    __addel(hashp, &item_info, key, val, UNKNOWN, 0))
818 			return (ERROR);
819 		__get_item_done(hashp, &cursor);
820 		if (item_info.caused_expand)
821 			__expand_table(hashp);
822 		break;
823 	case HASH_DELETE:
824 		if (__delpair(hashp, &cursor, &item_info))
825 			return (ERROR);
826 		break;
827 	default:
828 		abort();
829 	}
830 	return (SUCCESS);
831 }
832 
833 /* ****************** CURSORS ********************************** */
834 CURSOR *
835 __cursor_creat(dbp)
836 	const DB *dbp;
837 {
838 	CURSOR *new_curs;
839 	HTAB *hashp;
840 
841 	new_curs = (CURSOR *)malloc(sizeof(struct cursor_t));
842 	if (!new_curs)
843 		return NULL;
844 	new_curs->internal =
845 	    (struct item_info *)malloc(sizeof(struct item_info));
846 	if (!new_curs->internal) {
847 		free(new_curs);
848 		return NULL;
849 	}
850 	new_curs->get = cursor_get;
851 	new_curs->delete = cursor_delete;
852 
853 	new_curs->bucket = 0;
854 	new_curs->pgno = INVALID_PGNO;
855 	new_curs->ndx = 0;
856 	new_curs->pgndx = 0;
857 	new_curs->pagep = NULL;
858 
859 	/* place onto queue of cursors */
860 	hashp = (HTAB *)dbp->internal;
861 	TAILQ_INSERT_TAIL(&hashp->curs_queue, new_curs, queue);
862 
863 	return new_curs;
864 }
865 
866 static int32_t
867 cursor_get(dbp, cursorp, key, val, flags)
868 	const DB *dbp;
869 	CURSOR *cursorp;
870 	DBT *key, *val;
871 	u_int32_t flags;
872 {
873 	HTAB *hashp;
874 	ITEM_INFO item_info;
875 
876 	hashp = (HTAB *)dbp->internal;
877 
878 	if (flags && flags != R_FIRST && flags != R_NEXT) {
879 		hashp->local_errno = errno = EINVAL;
880 		return (ERROR);
881 	}
882 #ifdef HASH_STATISTICS
883 	hash_accesses++;
884 #endif
885 
886 	item_info.seek_size = 0;
887 
888 	if (flags == R_FIRST)
889 		__get_item_first(hashp, cursorp, key, val, &item_info);
890 	else
891 		__get_item_next(hashp, cursorp, key, val, &item_info);
892 
893 	/*
894 	 * This needs to be changed around.  As is, get_item_next advances
895 	 * the pointers on the page but this function actually advances
896 	 * bucket pointers.  This works, since the only other place we
897 	 * use get_item_next is in hash_access which only deals with one
898 	 * bucket at a time.  However, there is the problem that certain other
899 	 * functions (such as find_bigpair and delpair) depend on the
900 	 * pgndx member of the cursor.  Right now, they are using pngdx - 1
901 	 * since indices refer to the __next__ item that is to be fetched
902 	 * from the page.  This is ugly, as you may have noticed, whoever
903 	 * you are.  The best solution would be to depend on item_infos to
904 	 * deal with _current_ information, and have the cursors only
905 	 * deal with _next_ information.  In that scheme, get_item_next
906 	 * would also advance buckets.  Version 3...
907 	 */
908 
909 
910 	/*
911 	 * Must always enter this loop to do error handling and
912 	 * check for big key/data pair.
913 	 */
914 	while (1) {
915 		if (item_info.status == ITEM_OK) {
916 			if (item_info.key_off == BIGPAIR &&
917 			    __big_keydata(hashp, cursorp->pagep, key, val,
918 			    item_info.pgndx))
919 				return (ABNORMAL);
920 
921 			break;
922 		} else if (item_info.status != ITEM_NO_MORE)
923 			return (ABNORMAL);
924 
925 		__put_page(hashp, cursorp->pagep, A_RAW, 0);
926 		cursorp->ndx = cursorp->pgndx = 0;
927 		cursorp->bucket++;
928 		cursorp->pgno = INVALID_PGNO;
929 		cursorp->pagep = NULL;
930 		if (cursorp->bucket > hashp->hdr.max_bucket)
931 			return (ABNORMAL);
932 		__get_item_next(hashp, cursorp, key, val, &item_info);
933 	}
934 
935 	__get_item_done(hashp, cursorp);
936 	return (0);
937 }
938 
939 static int32_t
940 cursor_delete(dbp, cursor, flags)
941 	const DB *dbp;
942 	CURSOR *cursor;
943 	u_int32_t flags;
944 {
945 	/* XXX this is empirically determined, so it might not be completely
946 	   correct, but it seems to work.  At the very least it fixes
947 	   a memory leak */
948 
949 	free(cursor->internal);
950 	free(cursor);
951 
952 	return (0);
953 }
954 
955 static int32_t
956 hash_seq(dbp, key, val, flag)
957 	const DB *dbp;
958 	DBT *key, *val;
959 	u_int32_t flag;
960 {
961 	HTAB *hashp;
962 
963 	/*
964 	 * Seq just uses the default cursor to go sequecing through the
965 	 * database.  Note that the default cursor is the first in the list.
966 	 */
967 
968 	hashp = (HTAB *)dbp->internal;
969 	if (!hashp->seq_cursor)
970 		hashp->seq_cursor = __cursor_creat(dbp);
971 
972 	return (hashp->seq_cursor->get(dbp, hashp->seq_cursor, key, val, flag));
973 }
974 
975 /********************************* UTILITIES ************************/
976 
977 /*
978  * Returns:
979  *	 0 ==> OK
980  *	-1 ==> Error
981  */
982 int32_t
983 __expand_table(hashp)
984 	HTAB *hashp;
985 {
986 	u_int32_t old_bucket, new_bucket;
987 	int32_t spare_ndx;
988 
989 #ifdef HASH_STATISTICS
990 	hash_expansions++;
991 #endif
992 	new_bucket = ++hashp->hdr.max_bucket;
993 	old_bucket = (hashp->hdr.max_bucket & hashp->hdr.low_mask);
994 
995 	/* Get a page for this new bucket */
996 	if (__new_page(hashp, new_bucket, A_BUCKET) != 0)
997 		return (-1);
998 
999 	/*
1000 	 * If the split point is increasing (hdr.max_bucket's log base 2
1001 	 * increases), we need to copy the current contents of the spare
1002 	 * split bucket to the next bucket.
1003 	 */
1004 	spare_ndx = __log2(hashp->hdr.max_bucket + 1);
1005 	if (spare_ndx > hashp->hdr.ovfl_point) {
1006 		hashp->hdr.spares[spare_ndx] = hashp->hdr.spares[hashp->hdr.ovfl_point];
1007 		hashp->hdr.ovfl_point = spare_ndx;
1008 	}
1009 	if (new_bucket > hashp->hdr.high_mask) {
1010 		/* Starting a new doubling */
1011 		hashp->hdr.low_mask = hashp->hdr.high_mask;
1012 		hashp->hdr.high_mask = new_bucket | hashp->hdr.low_mask;
1013 	}
1014 	if (BUCKET_TO_PAGE(new_bucket) > MAX_PAGES(hashp)) {
1015 		fprintf(stderr,dgettext(TEXT_DOMAIN,
1016 			 "hash: Cannot allocate new bucket.  Pages exhausted.\n"));
1017 		return (-1);
1018 	}
1019 	/* Relocate records to the new bucket */
1020 	return (__split_page(hashp, old_bucket, new_bucket));
1021 }
1022 
1023 u_int32_t
1024 __call_hash(hashp, k, len)
1025 	HTAB *hashp;
1026 	int8_t *k;
1027 	int32_t len;
1028 {
1029 	int32_t n, bucket;
1030 
1031 	n = hashp->hash(k, len);
1032 	bucket = n & hashp->hdr.high_mask;
1033 	if (bucket > hashp->hdr.max_bucket)
1034 		bucket = bucket & hashp->hdr.low_mask;
1035 	return (bucket);
1036 }
1037 
1038 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
1039 /*
1040  * Hashp->hdr needs to be byteswapped.
1041  */
1042 static void
1043 swap_header_copy(srcp, destp)
1044 	HASHHDR *srcp, *destp;
1045 {
1046 	int32_t i;
1047 
1048 	P_32_COPY(srcp->magic, destp->magic);
1049 	P_32_COPY(srcp->version, destp->version);
1050 	P_32_COPY(srcp->lorder, destp->lorder);
1051 	P_32_COPY(srcp->bsize, destp->bsize);
1052 	P_32_COPY(srcp->bshift, destp->bshift);
1053 	P_32_COPY(srcp->ovfl_point, destp->ovfl_point);
1054 	P_32_COPY(srcp->last_freed, destp->last_freed);
1055 	P_32_COPY(srcp->max_bucket, destp->max_bucket);
1056 	P_32_COPY(srcp->high_mask, destp->high_mask);
1057 	P_32_COPY(srcp->low_mask, destp->low_mask);
1058 	P_32_COPY(srcp->ffactor, destp->ffactor);
1059 	P_32_COPY(srcp->nkeys, destp->nkeys);
1060 	P_32_COPY(srcp->hdrpages, destp->hdrpages);
1061 	P_32_COPY(srcp->h_charkey, destp->h_charkey);
1062 	for (i = 0; i < NCACHED; i++) {
1063 		P_32_COPY(srcp->spares[i], destp->spares[i]);
1064 		P_16_COPY(srcp->bitmaps[i], destp->bitmaps[i]);
1065 	}
1066 }
1067 
1068 static void
1069 swap_header(hashp)
1070 	HTAB *hashp;
1071 {
1072 	HASHHDR *hdrp;
1073 	int32_t i;
1074 
1075 	hdrp = &hashp->hdr;
1076 
1077 	M_32_SWAP(hdrp->magic);
1078 	M_32_SWAP(hdrp->version);
1079 	M_32_SWAP(hdrp->lorder);
1080 	M_32_SWAP(hdrp->bsize);
1081 	M_32_SWAP(hdrp->bshift);
1082 	M_32_SWAP(hdrp->ovfl_point);
1083 	M_32_SWAP(hdrp->last_freed);
1084 	M_32_SWAP(hdrp->max_bucket);
1085 	M_32_SWAP(hdrp->high_mask);
1086 	M_32_SWAP(hdrp->low_mask);
1087 	M_32_SWAP(hdrp->ffactor);
1088 	M_32_SWAP(hdrp->nkeys);
1089 	M_32_SWAP(hdrp->hdrpages);
1090 	M_32_SWAP(hdrp->h_charkey);
1091 	for (i = 0; i < NCACHED; i++) {
1092 		M_32_SWAP(hdrp->spares[i]);
1093 		M_16_SWAP(hdrp->bitmaps[i]);
1094 	}
1095 }
1096 #endif /* DB_BYTE_ORDER == DB_LITTLE_ENDIAN */
1097