1 /*-
2 * Copyright (c) 1990, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Margo Seltzer.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #if defined(LIBC_SCCS) && !defined(lint)
38 static char sccsid[] = "@(#)hash.c 8.12 (Berkeley) 11/7/95";
39 #endif /* LIBC_SCCS and not lint */
40
41 #include <sys/param.h>
42 #include <sys/stat.h>
43
44 #include <errno.h>
45 #include <fcntl.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #ifdef DEBUG
51 #include <assert.h>
52 #endif
53
54 #include "db-int.h"
55 #include "hash.h"
56 #include "page.h"
57 #include "extern.h"
58
59 static int32_t flush_meta __P((HTAB *));
60 static int32_t hash_access __P((HTAB *, ACTION, const DBT *, DBT *));
61 static int32_t hash_close __P((DB *));
62 static int32_t hash_delete __P((const DB *, const DBT *, u_int32_t));
63 static int32_t hash_fd __P((const DB *));
64 static int32_t hash_get __P((const DB *, const DBT *, DBT *, u_int32_t));
65 static int32_t hash_put __P((const DB *, DBT *, const DBT *, u_int32_t));
66 static int32_t hash_seq __P((const DB *, DBT *, DBT *, u_int32_t));
67 static int32_t hash_sync __P((const DB *, u_int32_t));
68 static int32_t hdestroy __P((HTAB *));
69 static int32_t cursor_get __P((const DB *, CURSOR *, DBT *, DBT *, \
70 u_int32_t));
71 static int32_t cursor_delete __P((const DB *, CURSOR *, u_int32_t));
72 static HTAB *init_hash __P((HTAB *, const char *, const HASHINFO *));
73 static int32_t init_htab __P((HTAB *, int32_t));
74 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
75 static void swap_header __P((HTAB *));
76 static void swap_header_copy __P((HASHHDR *, HASHHDR *));
77 #endif
78 static u_int32_t hget_header __P((HTAB *, u_int32_t));
79 static void hput_header __P((HTAB *));
80
81 #define RETURN_ERROR(ERR, LOC) { save_errno = ERR; goto LOC; }
82
83 /* Return values */
84 #define SUCCESS (0)
85 #define ERROR (-1)
86 #define ABNORMAL (1)
87
88 #ifdef HASH_STATISTICS
89 u_int32_t hash_accesses, hash_collisions, hash_expansions, hash_overflows,
90 hash_bigpages;
91 #endif
92
93 /************************** INTERFACE ROUTINES ***************************/
94 /* OPEN/CLOSE */
95
96 extern DB *
97 __kdb2_hash_open(file, flags, mode, info, dflags)
98 const char *file;
99 int flags, mode, dflags;
100 const HASHINFO *info; /* Special directives for create */
101 {
102 struct stat statbuf;
103 DB *dbp;
104 DBT mpool_key;
105 HTAB *hashp;
106 int32_t bpages, csize, new_table, save_errno;
107
108 if (!file || (flags & O_ACCMODE) == O_WRONLY) {
109 errno = EINVAL;
110 return (NULL);
111 }
112 if (!(hashp = (HTAB *)calloc(1, sizeof(HTAB))))
113 return (NULL);
114 hashp->fp = -1;
115 /*
116 * Even if user wants write only, we need to be able to read
117 * the actual file, so we need to open it read/write. But, the
118 * field in the hashp structure needs to be accurate so that
119 * we can check accesses.
120 */
121 hashp->flags = flags;
122 hashp->save_file = hashp->flags & O_RDWR;
123
124 new_table = 0;
125 if (!file || (flags & O_TRUNC) ||
126 (stat(file, &statbuf) && (errno == ENOENT))) {
127 if (errno == ENOENT)
128 errno = 0; /* In case someone looks at errno. */
129 new_table = 1;
130 }
131 if (file) {
132 if ((hashp->fp = open(file, flags|O_BINARY, mode)) == -1)
133 RETURN_ERROR(errno, error0);
134 (void)fcntl(hashp->fp, F_SETFD, 1);
135 }
136
137 /* Process arguments to set up hash table header. */
138 if (new_table) {
139 if (!(hashp = init_hash(hashp, file, info)))
140 RETURN_ERROR(errno, error1);
141 } else {
142 /* Table already exists */
143 if (info && info->hash)
144 hashp->hash = info->hash;
145 else
146 hashp->hash = __default_hash;
147
148 /* copy metadata from page into header */
149 if (hget_header(hashp,
150 (info && info->bsize ? info->bsize : DEF_BUCKET_SIZE)) !=
151 sizeof(HASHHDR))
152 RETURN_ERROR(EFTYPE, error1);
153
154 /* Verify file type, versions and hash function */
155 if (hashp->hdr.magic != HASHMAGIC)
156 RETURN_ERROR(EFTYPE, error1);
157 #define OLDHASHVERSION 1
158 if (hashp->hdr.version != HASHVERSION &&
159 hashp->hdr.version != OLDHASHVERSION)
160 RETURN_ERROR(EFTYPE, error1);
161 if (hashp->hash(CHARKEY, sizeof(CHARKEY))
162 != hashp->hdr.h_charkey)
163 RETURN_ERROR(EFTYPE, error1);
164 /*
165 * Figure out how many segments we need. Max_Bucket is the
166 * maximum bucket number, so the number of buckets is
167 * max_bucket + 1.
168 */
169
170 /* Read in bitmaps */
171 bpages = (hashp->hdr.spares[hashp->hdr.ovfl_point] +
172 (hashp->hdr.bsize << BYTE_SHIFT) - 1) >>
173 (hashp->hdr.bshift + BYTE_SHIFT);
174
175 hashp->nmaps = bpages;
176 (void)memset(&hashp->mapp[0], 0, bpages * sizeof(u_int32_t *));
177 }
178
179 /* start up mpool */
180 mpool_key.data = (u_int8_t *)file;
181 mpool_key.size = strlen(file);
182
183 if (info && info->cachesize)
184 csize = info->cachesize / hashp->hdr.bsize;
185 else
186 csize = DEF_CACHESIZE / hashp->hdr.bsize;
187 hashp->mp = mpool_open(&mpool_key, hashp->fp, hashp->hdr.bsize, csize);
188
189 if (!hashp->mp)
190 RETURN_ERROR(errno, error1);
191 mpool_filter(hashp->mp, __pgin_routine, __pgout_routine, hashp);
192
193 /*
194 * For a new table, set up the bitmaps.
195 */
196 if (new_table &&
197 init_htab(hashp, info && info->nelem ? info->nelem : 1))
198 goto error2;
199
200 /* initialize the cursor queue */
201 TAILQ_INIT(&hashp->curs_queue);
202 hashp->seq_cursor = NULL;
203
204
205 /* get a chunk of memory for our split buffer */
206 hashp->split_buf = (PAGE16 *)malloc(hashp->hdr.bsize);
207 if (!hashp->split_buf)
208 goto error2;
209
210 hashp->new_file = new_table;
211
212 if (!(dbp = (DB *)malloc(sizeof(DB))))
213 goto error2;
214
215 dbp->internal = hashp;
216 dbp->close = hash_close;
217 dbp->del = hash_delete;
218 dbp->fd = hash_fd;
219 dbp->get = hash_get;
220 dbp->put = hash_put;
221 dbp->seq = hash_seq;
222 dbp->sync = hash_sync;
223 dbp->type = DB_HASH;
224
225 #ifdef DEBUG
226 (void)fprintf(stderr,
227 "%s\n%s%lx\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n",
228 "init_htab:",
229 "TABLE POINTER ", (void *)hashp,
230 "BUCKET SIZE ", hashp->hdr.bsize,
231 "BUCKET SHIFT ", hashp->hdr.bshift,
232 "FILL FACTOR ", hashp->hdr.ffactor,
233 "MAX BUCKET ", hashp->hdr.max_bucket,
234 "OVFL POINT ", hashp->hdr.ovfl_point,
235 "LAST FREED ", hashp->hdr.last_freed,
236 "HIGH MASK ", hashp->hdr.high_mask,
237 "LOW MASK ", hashp->hdr.low_mask,
238 "NKEYS ", hashp->hdr.nkeys);
239 #endif
240 #ifdef HASH_STATISTICS
241 hash_overflows = hash_accesses = hash_collisions = hash_expansions = 0;
242 hash_bigpages = 0;
243 #endif
244 return (dbp);
245
246 error2:
247 save_errno = errno;
248 hdestroy(hashp);
249 errno = save_errno;
250 return (NULL);
251
252 error1:
253 if (hashp != NULL)
254 (void)close(hashp->fp);
255
256 error0:
257 free(hashp);
258 errno = save_errno;
259 return (NULL);
260 }
261
262 static int32_t
hash_close(dbp)263 hash_close(dbp)
264 DB *dbp;
265 {
266 HTAB *hashp;
267 int32_t retval;
268
269 if (!dbp)
270 return (ERROR);
271
272 hashp = (HTAB *)dbp->internal;
273 retval = hdestroy(hashp);
274 free(dbp);
275 return (retval);
276 }
277
278 static int32_t
hash_fd(dbp)279 hash_fd(dbp)
280 const DB *dbp;
281 {
282 HTAB *hashp;
283
284 if (!dbp)
285 return (ERROR);
286
287 hashp = (HTAB *)dbp->internal;
288 if (hashp->fp == -1) {
289 errno = ENOENT;
290 return (-1);
291 }
292 return (hashp->fp);
293 }
294
295 /************************** LOCAL CREATION ROUTINES **********************/
296 static HTAB *
init_hash(hashp,file,info)297 init_hash(hashp, file, info)
298 HTAB *hashp;
299 const char *file;
300 const HASHINFO *info;
301 {
302 struct stat statbuf;
303
304 hashp->hdr.nkeys = 0;
305 hashp->hdr.lorder = DB_BYTE_ORDER;
306 hashp->hdr.bsize = DEF_BUCKET_SIZE;
307 hashp->hdr.bshift = DEF_BUCKET_SHIFT;
308 hashp->hdr.ffactor = DEF_FFACTOR;
309 hashp->hash = __default_hash;
310 memset(hashp->hdr.spares, 0, sizeof(hashp->hdr.spares));
311 memset(hashp->hdr.bitmaps, 0, sizeof(hashp->hdr.bitmaps));
312
313 /* Fix bucket size to be optimal for file system */
314 if (file != NULL) {
315 if (stat(file, &statbuf))
316 return (NULL);
317 hashp->hdr.bsize = statbuf.st_blksize;
318 if (hashp->hdr.bsize > MAX_BSIZE)
319 hashp->hdr.bsize = MAX_BSIZE;
320 hashp->hdr.bshift = __log2(hashp->hdr.bsize);
321 }
322 if (info) {
323 if (info->bsize) {
324 /* Round pagesize up to power of 2 */
325 hashp->hdr.bshift = __log2(info->bsize);
326 hashp->hdr.bsize = 1 << hashp->hdr.bshift;
327 if (hashp->hdr.bsize > MAX_BSIZE) {
328 errno = EINVAL;
329 return (NULL);
330 }
331 }
332 if (info->ffactor)
333 hashp->hdr.ffactor = info->ffactor;
334 if (info->hash)
335 hashp->hash = info->hash;
336 if (info->lorder) {
337 if ((info->lorder != DB_BIG_ENDIAN) &&
338 (info->lorder != DB_LITTLE_ENDIAN)) {
339 errno = EINVAL;
340 return (NULL);
341 }
342 hashp->hdr.lorder = info->lorder;
343 }
344 }
345 return (hashp);
346 }
347
348 /*
349 * Returns 0 on No Error
350 */
351 static int32_t
init_htab(hashp,nelem)352 init_htab(hashp, nelem)
353 HTAB *hashp;
354 int32_t nelem;
355 {
356 int32_t l2, nbuckets;
357
358 /*
359 * Divide number of elements by the fill factor and determine a
360 * desired number of buckets. Allocate space for the next greater
361 * power of two number of buckets.
362 */
363 nelem = (nelem - 1) / hashp->hdr.ffactor + 1;
364
365 l2 = __log2(MAX(nelem, 2));
366 nbuckets = 1 << l2;
367
368 hashp->hdr.spares[l2] = l2 + 1;
369 hashp->hdr.spares[l2 + 1] = l2 + 1;
370 hashp->hdr.ovfl_point = l2;
371 hashp->hdr.last_freed = 2;
372
373 hashp->hdr.max_bucket = hashp->hdr.low_mask = nbuckets - 1;
374 hashp->hdr.high_mask = (nbuckets << 1) - 1;
375
376 /*
377 * The number of header pages is the size of the header divided by
378 * the amount of freespace on header pages (the page size - the
379 * size of 1 integer where the length of the header info on that
380 * page is stored) plus another page if it didn't divide evenly.
381 */
382 hashp->hdr.hdrpages =
383 (sizeof(HASHHDR) / (hashp->hdr.bsize - HEADER_OVERHEAD)) +
384 (((sizeof(HASHHDR) % (hashp->hdr.bsize - HEADER_OVERHEAD)) == 0)
385 ? 0 : 1);
386
387 /* Create pages for these buckets */
388 /*
389 for (i = 0; i <= hashp->hdr.max_bucket; i++) {
390 if (__new_page(hashp, (u_int32_t)i, A_BUCKET) != 0)
391 return (-1);
392 }
393 */
394
395 /* First bitmap page is at: splitpoint l2 page offset 1 */
396 if (__ibitmap(hashp, OADDR_OF(l2, 1), l2 + 1, 0))
397 return (-1);
398
399 return (0);
400 }
401
402 /*
403 * Functions to get/put hash header. We access the file directly.
404 */
405 static u_int32_t
hget_header(hashp,page_size)406 hget_header(hashp, page_size)
407 HTAB *hashp;
408 u_int32_t page_size;
409 {
410 u_int32_t num_copied;
411 u_int8_t *hdr_dest;
412
413 num_copied = 0;
414
415 hdr_dest = (u_int8_t *)&hashp->hdr;
416
417 /*
418 * XXX
419 * This should not be printing to stderr on a "normal" error case.
420 */
421 lseek(hashp->fp, 0, SEEK_SET);
422 num_copied = read(hashp->fp, hdr_dest, sizeof(HASHHDR));
423 if (num_copied != sizeof(HASHHDR)) {
424 fprintf(stderr, "hash: could not retrieve header");
425 return (0);
426 }
427 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
428 swap_header(hashp);
429 #endif
430 return (num_copied);
431 }
432
433 static void
hput_header(hashp)434 hput_header(hashp)
435 HTAB *hashp;
436 {
437 HASHHDR *whdrp;
438 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
439 HASHHDR whdr;
440 #endif
441 u_int32_t num_copied;
442
443 num_copied = 0;
444
445 whdrp = &hashp->hdr;
446 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
447 whdrp = &whdr;
448 swap_header_copy(&hashp->hdr, whdrp);
449 #endif
450
451 lseek(hashp->fp, 0, SEEK_SET);
452 num_copied = write(hashp->fp, whdrp, sizeof(HASHHDR));
453 if (num_copied != sizeof(HASHHDR))
454 (void)fprintf(stderr, "hash: could not write hash header");
455 return;
456 }
457
458 /********************** DESTROY/CLOSE ROUTINES ************************/
459
460 /*
461 * Flushes any changes to the file if necessary and destroys the hashp
462 * structure, freeing all allocated space.
463 */
464 static int32_t
hdestroy(hashp)465 hdestroy(hashp)
466 HTAB *hashp;
467 {
468 int32_t save_errno;
469
470 save_errno = 0;
471
472 #ifdef HASH_STATISTICS
473 { int i;
474 (void)fprintf(stderr, "hdestroy: accesses %ld collisions %ld\n",
475 hash_accesses, hash_collisions);
476 (void)fprintf(stderr,
477 "hdestroy: expansions %ld\n", hash_expansions);
478 (void)fprintf(stderr,
479 "hdestroy: overflows %ld\n", hash_overflows);
480 (void)fprintf(stderr,
481 "hdestroy: big key/data pages %ld\n", hash_bigpages);
482 (void)fprintf(stderr,
483 "keys %ld maxp %d\n", hashp->hdr.nkeys, hashp->hdr.max_bucket);
484
485 for (i = 0; i < NCACHED; i++)
486 (void)fprintf(stderr,
487 "spares[%d] = %d\n", i, hashp->hdr.spares[i]);
488 }
489 #endif
490
491 if (flush_meta(hashp) && !save_errno)
492 save_errno = errno;
493
494 /* Free the split page */
495 if (hashp->split_buf)
496 free(hashp->split_buf);
497
498 /* Free the big key and big data returns */
499 if (hashp->bigkey_buf)
500 free(hashp->bigkey_buf);
501 if (hashp->bigdata_buf)
502 free(hashp->bigdata_buf);
503
504 /* XXX This should really iterate over the cursor queue, but
505 it's not clear how to do that, and the only cursor a hash
506 table ever creates is the one used by hash_seq(). Passing
507 NULL as the first arg is also a kludge, but I know that
508 it's never used, so I do it. The intent is to plug the
509 memory leak. Correctness can come later. */
510
511 if (hashp->seq_cursor)
512 hashp->seq_cursor->delete(NULL, hashp->seq_cursor, 0);
513
514 /* shut down mpool */
515 mpool_sync(hashp->mp);
516 mpool_close(hashp->mp);
517
518 if (hashp->fp != -1)
519 (void)close(hashp->fp);
520
521 /*
522 * *** This may cause problems if hashp->fname is set in any case
523 * other than the case that we are generating a temporary file name.
524 * Note that the new version of mpool should support temporary
525 * files within mpool itself.
526 */
527 if (hashp->fname && !hashp->save_file) {
528 #ifdef DEBUG
529 fprintf(stderr, "Unlinking file %s.\n", hashp->fname);
530 #endif
531 /* we need to chmod the file to allow it to be deleted... */
532 chmod(hashp->fname, 0700);
533 unlink(hashp->fname);
534 }
535 free(hashp);
536
537 if (save_errno) {
538 errno = save_errno;
539 return (ERROR);
540 }
541 return (SUCCESS);
542 }
543
544 /*
545 * Write modified pages to disk
546 *
547 * Returns:
548 * 0 == OK
549 * -1 ERROR
550 */
551 static int32_t
hash_sync(dbp,flags)552 hash_sync(dbp, flags)
553 const DB *dbp;
554 u_int32_t flags;
555 {
556 HTAB *hashp;
557
558 hashp = (HTAB *)dbp->internal;
559
560 /*
561 * XXX
562 * Check success/failure conditions.
563 */
564 return (flush_meta(hashp) || mpool_sync(hashp->mp));
565 }
566
567 /*
568 * Returns:
569 * 0 == OK
570 * -1 indicates that errno should be set
571 */
572 static int32_t
flush_meta(hashp)573 flush_meta(hashp)
574 HTAB *hashp;
575 {
576 int32_t i;
577
578 if (!hashp->save_file)
579 return (0);
580 hashp->hdr.magic = HASHMAGIC;
581 hashp->hdr.version = HASHVERSION;
582 hashp->hdr.h_charkey = hashp->hash(CHARKEY, sizeof(CHARKEY));
583
584 /* write out metadata */
585 hput_header(hashp);
586
587 for (i = 0; i < NCACHED; i++)
588 if (hashp->mapp[i]) {
589 if (__put_page(hashp,
590 (PAGE16 *)hashp->mapp[i], A_BITMAP, 1))
591 return (-1);
592 hashp->mapp[i] = NULL;
593 }
594 return (0);
595 }
596
597 /*******************************SEARCH ROUTINES *****************************/
598 /*
599 * All the access routines return
600 *
601 * Returns:
602 * 0 on SUCCESS
603 * 1 to indicate an external ERROR (i.e. key not found, etc)
604 * -1 to indicate an internal ERROR (i.e. out of memory, etc)
605 */
606
607 /* *** make sure this is true! */
608
609 static int32_t
hash_get(dbp,key,data,flag)610 hash_get(dbp, key, data, flag)
611 const DB *dbp;
612 const DBT *key;
613 DBT *data;
614 u_int32_t flag;
615 {
616 HTAB *hashp;
617
618 hashp = (HTAB *)dbp->internal;
619 if (flag) {
620 hashp->local_errno = errno = EINVAL;
621 return (ERROR);
622 }
623 return (hash_access(hashp, HASH_GET, key, data));
624 }
625
626 static int32_t
hash_put(dbp,key,data,flag)627 hash_put(dbp, key, data, flag)
628 const DB *dbp;
629 DBT *key;
630 const DBT *data;
631 u_int32_t flag;
632 {
633 HTAB *hashp;
634
635 hashp = (HTAB *)dbp->internal;
636 if (flag && flag != R_NOOVERWRITE) {
637 hashp->local_errno = errno = EINVAL;
638 return (ERROR);
639 }
640 if ((hashp->flags & O_ACCMODE) == O_RDONLY) {
641 hashp->local_errno = errno = EPERM;
642 return (ERROR);
643 }
644 return (hash_access(hashp, flag == R_NOOVERWRITE ?
645 HASH_PUTNEW : HASH_PUT, key, (DBT *)data));
646 }
647
648 static int32_t
hash_delete(dbp,key,flag)649 hash_delete(dbp, key, flag)
650 const DB *dbp;
651 const DBT *key;
652 u_int32_t flag; /* Ignored */
653 {
654 HTAB *hashp;
655
656 hashp = (HTAB *)dbp->internal;
657 if (flag) {
658 hashp->local_errno = errno = EINVAL;
659 return (ERROR);
660 }
661 if ((hashp->flags & O_ACCMODE) == O_RDONLY) {
662 hashp->local_errno = errno = EPERM;
663 return (ERROR);
664 }
665
666 return (hash_access(hashp, HASH_DELETE, key, NULL));
667 }
668
669 /*
670 * Assume that hashp has been set in wrapper routine.
671 */
672 static int32_t
hash_access(hashp,action,key,val)673 hash_access(hashp, action, key, val)
674 HTAB *hashp;
675 ACTION action;
676 const DBT *key;
677 DBT *val;
678 {
679 DBT page_key, page_val;
680 CURSOR cursor;
681 ITEM_INFO item_info;
682 u_int32_t bucket;
683 u_int32_t num_items;
684
685 #ifdef HASH_STATISTICS
686 hash_accesses++;
687 #endif
688
689 num_items = 0;
690
691 /*
692 * Set up item_info so that we're looking for space to add an item
693 * as we cycle through the pages looking for the key.
694 */
695 if (action == HASH_PUT || action == HASH_PUTNEW) {
696 if (ISBIG(key->size + val->size, hashp))
697 item_info.seek_size = PAIR_OVERHEAD;
698 else
699 item_info.seek_size = key->size + val->size;
700 } else
701 item_info.seek_size = 0;
702 item_info.seek_found_page = 0;
703
704 bucket = __call_hash(hashp, (int8_t *)key->data, key->size);
705
706 cursor.pagep = NULL;
707 __get_item_reset(hashp, &cursor);
708
709 cursor.bucket = bucket;
710 while (1) {
711 __get_item_next(hashp, &cursor, &page_key, &page_val, &item_info);
712 if (item_info.status == ITEM_ERROR)
713 return (ABNORMAL);
714 if (item_info.status == ITEM_NO_MORE)
715 break;
716 num_items++;
717 if (item_info.key_off == BIGPAIR) {
718 /*
719 * !!!
720 * 0 is a valid index.
721 */
722 if (__find_bigpair(hashp, &cursor, (int8_t *)key->data,
723 key->size) > 0)
724 goto found;
725 } else if (key->size == page_key.size &&
726 !memcmp(key->data, page_key.data, key->size))
727 goto found;
728 }
729 #ifdef HASH_STATISTICS
730 hash_collisions++;
731 #endif
732 __get_item_done(hashp, &cursor);
733
734 /*
735 * At this point, item_info will list either the last page in
736 * the chain, or the last page in the chain plus a pgno for where
737 * to find the first page in the chain with space for the
738 * item we wish to add.
739 */
740
741 /* Not found */
742 switch (action) {
743 case HASH_PUT:
744 case HASH_PUTNEW:
745 if (__addel(hashp, &item_info, key, val, num_items, 0))
746 return (ERROR);
747 break;
748 case HASH_GET:
749 case HASH_DELETE:
750 default:
751 return (ABNORMAL);
752 }
753
754 if (item_info.caused_expand)
755 __expand_table(hashp);
756 return (SUCCESS);
757
758 found: __get_item_done(hashp, &cursor);
759
760 switch (action) {
761 case HASH_PUTNEW:
762 /* mpool_put(hashp->mp, pagep, 0); */
763 return (ABNORMAL);
764 case HASH_GET:
765 if (item_info.key_off == BIGPAIR) {
766 if (__big_return(hashp, &item_info, val, 0))
767 return (ERROR);
768 } else {
769 val->data = page_val.data;
770 val->size = page_val.size;
771 }
772 /* *** data may not be available! */
773 break;
774 case HASH_PUT:
775 if (__delpair(hashp, &cursor, &item_info) ||
776 __addel(hashp, &item_info, key, val, UNKNOWN, 0))
777 return (ERROR);
778 __get_item_done(hashp, &cursor);
779 if (item_info.caused_expand)
780 __expand_table(hashp);
781 break;
782 case HASH_DELETE:
783 if (__delpair(hashp, &cursor, &item_info))
784 return (ERROR);
785 break;
786 default:
787 abort();
788 }
789 return (SUCCESS);
790 }
791
792 /* ****************** CURSORS ********************************** */
793 CURSOR *
__cursor_creat(dbp)794 __cursor_creat(dbp)
795 const DB *dbp;
796 {
797 CURSOR *new_curs;
798 HTAB *hashp;
799
800 new_curs = (CURSOR *)malloc(sizeof(struct cursor_t));
801 if (!new_curs)
802 return NULL;
803 new_curs->internal =
804 (struct item_info *)malloc(sizeof(struct item_info));
805 if (!new_curs->internal) {
806 free(new_curs);
807 return NULL;
808 }
809 new_curs->get = cursor_get;
810 new_curs->delete = cursor_delete;
811
812 new_curs->bucket = 0;
813 new_curs->pgno = INVALID_PGNO;
814 new_curs->ndx = 0;
815 new_curs->pgndx = 0;
816 new_curs->pagep = NULL;
817
818 /* place onto queue of cursors */
819 hashp = (HTAB *)dbp->internal;
820 TAILQ_INSERT_TAIL(&hashp->curs_queue, new_curs, queue);
821
822 return new_curs;
823 }
824
825 static int32_t
cursor_get(dbp,cursorp,key,val,flags)826 cursor_get(dbp, cursorp, key, val, flags)
827 const DB *dbp;
828 CURSOR *cursorp;
829 DBT *key, *val;
830 u_int32_t flags;
831 {
832 HTAB *hashp;
833 ITEM_INFO item_info;
834
835 hashp = (HTAB *)dbp->internal;
836
837 if (flags && flags != R_FIRST && flags != R_NEXT) {
838 hashp->local_errno = errno = EINVAL;
839 return (ERROR);
840 }
841 #ifdef HASH_STATISTICS
842 hash_accesses++;
843 #endif
844
845 item_info.seek_size = 0;
846
847 if (flags == R_FIRST)
848 __get_item_first(hashp, cursorp, key, val, &item_info);
849 else
850 __get_item_next(hashp, cursorp, key, val, &item_info);
851
852 /*
853 * This needs to be changed around. As is, get_item_next advances
854 * the pointers on the page but this function actually advances
855 * bucket pointers. This works, since the only other place we
856 * use get_item_next is in hash_access which only deals with one
857 * bucket at a time. However, there is the problem that certain other
858 * functions (such as find_bigpair and delpair) depend on the
859 * pgndx member of the cursor. Right now, they are using pngdx - 1
860 * since indices refer to the __next__ item that is to be fetched
861 * from the page. This is ugly, as you may have noticed, whoever
862 * you are. The best solution would be to depend on item_infos to
863 * deal with _current_ information, and have the cursors only
864 * deal with _next_ information. In that scheme, get_item_next
865 * would also advance buckets. Version 3...
866 */
867
868
869 /*
870 * Must always enter this loop to do error handling and
871 * check for big key/data pair.
872 */
873 while (1) {
874 if (item_info.status == ITEM_OK) {
875 if (item_info.key_off == BIGPAIR &&
876 __big_keydata(hashp, cursorp->pagep, key, val,
877 item_info.pgndx))
878 return (ABNORMAL);
879
880 break;
881 } else if (item_info.status != ITEM_NO_MORE)
882 return (ABNORMAL);
883
884 __put_page(hashp, cursorp->pagep, A_RAW, 0);
885 cursorp->ndx = cursorp->pgndx = 0;
886 cursorp->bucket++;
887 cursorp->pgno = INVALID_PGNO;
888 cursorp->pagep = NULL;
889 if (cursorp->bucket > hashp->hdr.max_bucket)
890 return (ABNORMAL);
891 __get_item_next(hashp, cursorp, key, val, &item_info);
892 }
893
894 __get_item_done(hashp, cursorp);
895 return (0);
896 }
897
898 static int32_t
cursor_delete(dbp,cursor,flags)899 cursor_delete(dbp, cursor, flags)
900 const DB *dbp;
901 CURSOR *cursor;
902 u_int32_t flags;
903 {
904 /* XXX this is empirically determined, so it might not be completely
905 correct, but it seems to work. At the very least it fixes
906 a memory leak */
907
908 free(cursor->internal);
909 free(cursor);
910
911 return (0);
912 }
913
914 static int32_t
hash_seq(dbp,key,val,flag)915 hash_seq(dbp, key, val, flag)
916 const DB *dbp;
917 DBT *key, *val;
918 u_int32_t flag;
919 {
920 HTAB *hashp;
921
922 /*
923 * Seq just uses the default cursor to go sequecing through the
924 * database. Note that the default cursor is the first in the list.
925 */
926
927 hashp = (HTAB *)dbp->internal;
928 if (!hashp->seq_cursor)
929 hashp->seq_cursor = __cursor_creat(dbp);
930
931 return (hashp->seq_cursor->get(dbp, hashp->seq_cursor, key, val, flag));
932 }
933
934 /********************************* UTILITIES ************************/
935
936 /*
937 * Returns:
938 * 0 ==> OK
939 * -1 ==> Error
940 */
941 int32_t
__expand_table(hashp)942 __expand_table(hashp)
943 HTAB *hashp;
944 {
945 u_int32_t old_bucket, new_bucket;
946 int32_t spare_ndx;
947
948 #ifdef HASH_STATISTICS
949 hash_expansions++;
950 #endif
951 new_bucket = ++hashp->hdr.max_bucket;
952 old_bucket = (hashp->hdr.max_bucket & hashp->hdr.low_mask);
953
954 /* Get a page for this new bucket */
955 if (__new_page(hashp, new_bucket, A_BUCKET) != 0)
956 return (-1);
957
958 /*
959 * If the split point is increasing (hdr.max_bucket's log base 2
960 * increases), we need to copy the current contents of the spare
961 * split bucket to the next bucket.
962 */
963 spare_ndx = __log2(hashp->hdr.max_bucket + 1);
964 if (spare_ndx > hashp->hdr.ovfl_point) {
965 hashp->hdr.spares[spare_ndx] = hashp->hdr.spares[hashp->hdr.ovfl_point];
966 hashp->hdr.ovfl_point = spare_ndx;
967 }
968 if (new_bucket > hashp->hdr.high_mask) {
969 /* Starting a new doubling */
970 hashp->hdr.low_mask = hashp->hdr.high_mask;
971 hashp->hdr.high_mask = new_bucket | hashp->hdr.low_mask;
972 }
973 if (BUCKET_TO_PAGE(new_bucket) > MAX_PAGES(hashp)) {
974 fprintf(stderr, "hash: Cannot allocate new bucket. Pages exhausted.\n");
975 return (-1);
976 }
977 /* Relocate records to the new bucket */
978 return (__split_page(hashp, old_bucket, new_bucket));
979 }
980
981 u_int32_t
__call_hash(hashp,k,len)982 __call_hash(hashp, k, len)
983 HTAB *hashp;
984 int8_t *k;
985 int32_t len;
986 {
987 u_int32_t n, bucket;
988
989 n = hashp->hash(k, len);
990 bucket = n & hashp->hdr.high_mask;
991 if (bucket > hashp->hdr.max_bucket)
992 bucket = bucket & hashp->hdr.low_mask;
993 return (bucket);
994 }
995
996 #if DB_BYTE_ORDER == DB_LITTLE_ENDIAN
997 /*
998 * Hashp->hdr needs to be byteswapped.
999 */
1000 static void
swap_header_copy(srcp,destp)1001 swap_header_copy(srcp, destp)
1002 HASHHDR *srcp, *destp;
1003 {
1004 int32_t i;
1005
1006 P_32_COPY(srcp->magic, destp->magic);
1007 P_32_COPY(srcp->version, destp->version);
1008 P_32_COPY(srcp->lorder, destp->lorder);
1009 P_32_COPY(srcp->bsize, destp->bsize);
1010 P_32_COPY(srcp->bshift, destp->bshift);
1011 P_32_COPY(srcp->ovfl_point, destp->ovfl_point);
1012 P_32_COPY(srcp->last_freed, destp->last_freed);
1013 P_32_COPY(srcp->max_bucket, destp->max_bucket);
1014 P_32_COPY(srcp->high_mask, destp->high_mask);
1015 P_32_COPY(srcp->low_mask, destp->low_mask);
1016 P_32_COPY(srcp->ffactor, destp->ffactor);
1017 P_32_COPY(srcp->nkeys, destp->nkeys);
1018 P_32_COPY(srcp->hdrpages, destp->hdrpages);
1019 P_32_COPY(srcp->h_charkey, destp->h_charkey);
1020 for (i = 0; i < NCACHED; i++) {
1021 P_32_COPY(srcp->spares[i], destp->spares[i]);
1022 P_16_COPY(srcp->bitmaps[i], destp->bitmaps[i]);
1023 }
1024 }
1025
1026 static void
swap_header(hashp)1027 swap_header(hashp)
1028 HTAB *hashp;
1029 {
1030 HASHHDR *hdrp;
1031 int32_t i;
1032
1033 hdrp = &hashp->hdr;
1034
1035 M_32_SWAP(hdrp->magic);
1036 M_32_SWAP(hdrp->version);
1037 M_32_SWAP(hdrp->lorder);
1038 M_32_SWAP(hdrp->bsize);
1039 M_32_SWAP(hdrp->bshift);
1040 M_32_SWAP(hdrp->ovfl_point);
1041 M_32_SWAP(hdrp->last_freed);
1042 M_32_SWAP(hdrp->max_bucket);
1043 M_32_SWAP(hdrp->high_mask);
1044 M_32_SWAP(hdrp->low_mask);
1045 M_32_SWAP(hdrp->ffactor);
1046 M_32_SWAP(hdrp->nkeys);
1047 M_32_SWAP(hdrp->hdrpages);
1048 M_32_SWAP(hdrp->h_charkey);
1049 for (i = 0; i < NCACHED; i++) {
1050 M_32_SWAP(hdrp->spares[i]);
1051 M_16_SWAP(hdrp->bitmaps[i]);
1052 }
1053 }
1054 #endif /* DB_BYTE_ORDER == DB_LITTLE_ENDIAN */
1055