xref: /titanic_51/usr/src/cmd/sendmail/db/hash/hash.c (revision b533f56bf95137d3de6666bd923e15ec373ea611)
1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997, 1998
5  *	Sleepycat Software.  All rights reserved.
6  */
7 /*
8  * Copyright (c) 1990, 1993, 1994
9  *	Margo Seltzer.  All rights reserved.
10  */
11 /*
12  * Copyright (c) 1990, 1993, 1994
13  *	The Regents of the University of California.  All rights reserved.
14  *
15  * This code is derived from software contributed to Berkeley by
16  * Margo Seltzer.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions
20  * are met:
21  * 1. Redistributions of source code must retain the above copyright
22  *    notice, this list of conditions and the following disclaimer.
23  * 2. Redistributions in binary form must reproduce the above copyright
24  *    notice, this list of conditions and the following disclaimer in the
25  *    documentation and/or other materials provided with the distribution.
26  * 3. All advertising materials mentioning features or use of this software
27  *    must display the following acknowledgement:
28  *	This product includes software developed by the University of
29  *	California, Berkeley and its contributors.
30  * 4. Neither the name of the University nor the names of its contributors
31  *    may be used to endorse or promote products derived from this software
32  *    without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44  * SUCH DAMAGE.
45  */
46 
47 #include "config.h"
48 
49 #ifndef lint
50 static const char sccsid[] = "@(#)hash.c	10.63 (Sleepycat) 12/11/98";
51 #endif /* not lint */
52 
53 #ifndef NO_SYSTEM_INCLUDES
54 #include <sys/types.h>
55 
56 #include <errno.h>
57 #include <stdlib.h>
58 #include <string.h>
59 #endif
60 
61 #include "db_int.h"
62 #include "shqueue.h"
63 #include "db_page.h"
64 #include "db_am.h"
65 #include "db_ext.h"
66 #include "hash.h"
67 #include "btree.h"
68 #include "log.h"
69 #include "db_shash.h"
70 #include "lock.h"
71 #include "lock_ext.h"
72 
73 static int  __ham_c_close __P((DBC *));
74 static int  __ham_c_del __P((DBC *, u_int32_t));
75 static int  __ham_c_destroy __P((DBC *));
76 static int  __ham_c_get __P((DBC *, DBT *, DBT *, u_int32_t));
77 static int  __ham_c_put __P((DBC *, DBT *, DBT *, u_int32_t));
78 static int  __ham_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
79 static int  __ham_dup_return __P((DBC *, DBT *, u_int32_t));
80 static int  __ham_expand_table __P((DBC *));
81 static void __ham_init_htab __P((DBC *, u_int32_t, u_int32_t));
82 static int  __ham_lookup __P((DBC *, const DBT *, u_int32_t, db_lockmode_t));
83 static int  __ham_overwrite __P((DBC *, DBT *));
84 
85 /************************** INTERFACE ROUTINES ***************************/
86 /* OPEN/CLOSE */
87 
88 /*
89  * __ham_open --
90  *
91  * PUBLIC: int __ham_open __P((DB *, DB_INFO *));
92  */
93 int
94 __ham_open(dbp, dbinfo)
95 	DB *dbp;
96 	DB_INFO *dbinfo;
97 {
98 	DB_ENV *dbenv;
99 	DBC *dbc;
100 	HASH_CURSOR *hcp;
101 	int file_existed, ret;
102 
103 	dbc = NULL;
104 	dbenv = dbp->dbenv;
105 
106 	/* Set the hash function if specified by the user. */
107 	if (dbinfo != NULL && dbinfo->h_hash != NULL)
108 		dbp->h_hash = dbinfo->h_hash;
109 
110 	/*
111 	 * Initialize the remaining fields of the dbp.  The only function
112 	 * that differs from the default set is __ham_stat().
113 	 */
114 	dbp->internal = NULL;
115 	dbp->am_close = __ham_close;
116 	dbp->del = __ham_delete;
117 	dbp->stat = __ham_stat;
118 
119 	/* Get a cursor we can use for the rest of this function. */
120 	if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
121 		goto out;
122 
123 	hcp = (HASH_CURSOR *)dbc->internal;
124 	GET_META(dbp, hcp, ret);
125 	if (ret != 0)
126 		goto out;
127 
128 	/*
129 	 * If this is a new file, initialize it, and put it back dirty.
130 	 */
131 
132 	/* Initialize the hdr structure */
133 	if (hcp->hdr->magic == DB_HASHMAGIC) {
134 		file_existed = 1;
135 		/* File exists, verify the data in the header. */
136 		if (dbp->h_hash == NULL)
137 			dbp->h_hash =
138 			    hcp->hdr->version < 5 ? __ham_func4 : __ham_func5;
139 		if (dbp->h_hash(CHARKEY, sizeof(CHARKEY)) !=
140 		    hcp->hdr->h_charkey) {
141 			__db_err(dbp->dbenv, "hash: incompatible hash function");
142 			ret = EINVAL;
143 			goto out;
144 		}
145 		if (F_ISSET(hcp->hdr, DB_HASH_DUP))
146 			F_SET(dbp, DB_AM_DUP);
147 	} else {
148 		/*
149 		 * File does not exist, we must initialize the header.  If
150 		 * locking is enabled that means getting a write lock first.
151 		 */
152 		file_existed = 0;
153 		if (F_ISSET(dbp, DB_AM_LOCKING) &&
154 		    ((ret = lock_put(dbenv->lk_info, hcp->hlock)) != 0 ||
155 		    (ret = lock_get(dbenv->lk_info, dbc->locker, 0,
156 		        &dbc->lock_dbt, DB_LOCK_WRITE, &hcp->hlock)) != 0)) {
157 			if (ret < 0)
158 				ret = EAGAIN;
159 			goto out;
160 		}
161 
162 		__ham_init_htab(dbc, dbinfo != NULL ? dbinfo->h_nelem : 0,
163 		    dbinfo != NULL ? dbinfo->h_ffactor : 0);
164 		if (F_ISSET(dbp, DB_AM_DUP))
165 			F_SET(hcp->hdr, DB_HASH_DUP);
166 		if ((ret = __ham_dirty_page(dbp, (PAGE *)hcp->hdr)) != 0)
167 			goto out;
168 	}
169 
170 	/* Release the meta data page */
171 	RELEASE_META(dbp, hcp);
172 	if ((ret  = dbc->c_close(dbc)) != 0)
173 		goto out;
174 
175 	/* Sync the file so that we know that the meta data goes to disk. */
176 	if (!file_existed && (ret = dbp->sync(dbp, 0)) != 0)
177 		goto out;
178 	return (0);
179 
180 out:	(void)__ham_close(dbp);
181 	return (ret);
182 }
183 
184 /*
185  * PUBLIC: int __ham_close __P((DB *));
186  */
187 int
188 __ham_close(dbp)
189 	DB *dbp;
190 {
191 	COMPQUIET(dbp, NULL);
192 	return (0);
193 }
194 
195 /************************** LOCAL CREATION ROUTINES **********************/
196 /*
197  * Returns 0 on No Error
198  */
199 static void
200 __ham_init_htab(dbc, nelem, ffactor)
201 	DBC *dbc;
202 	u_int32_t nelem, ffactor;
203 {
204 	DB *dbp;
205 	HASH_CURSOR *hcp;
206 	int32_t l2, nbuckets;
207 
208 	hcp = (HASH_CURSOR *)dbc->internal;
209 	dbp = dbc->dbp;
210 	memset(hcp->hdr, 0, sizeof(HASHHDR));
211 	hcp->hdr->ffactor = ffactor;
212 	hcp->hdr->pagesize = dbp->pgsize;
213 	ZERO_LSN(hcp->hdr->lsn);
214 	hcp->hdr->magic = DB_HASHMAGIC;
215 	hcp->hdr->version = DB_HASHVERSION;
216 
217 	if (dbp->h_hash == NULL)
218 		dbp->h_hash = hcp->hdr->version < 5 ? __ham_func4 : __ham_func5;
219 	hcp->hdr->h_charkey = dbp->h_hash(CHARKEY, sizeof(CHARKEY));
220 	if (nelem != 0 && hcp->hdr->ffactor != 0) {
221 		nelem = (nelem - 1) / hcp->hdr->ffactor + 1;
222 		l2 = __db_log2(nelem > 2 ? nelem : 2);
223 	} else
224 		l2 = 2;
225 
226 	nbuckets = 1 << l2;
227 
228 	hcp->hdr->ovfl_point = l2;
229 	hcp->hdr->last_freed = PGNO_INVALID;
230 
231 	hcp->hdr->max_bucket = hcp->hdr->high_mask = nbuckets - 1;
232 	hcp->hdr->low_mask = (nbuckets >> 1) - 1;
233 	memcpy(hcp->hdr->uid, dbp->fileid, DB_FILE_ID_LEN);
234 }
235 
236 static int
237 __ham_delete(dbp, txn, key, flags)
238 	DB *dbp;
239 	DB_TXN *txn;
240 	DBT *key;
241 	u_int32_t flags;
242 {
243 	DBC *dbc;
244 	HASH_CURSOR *hcp;
245 	int ret, tret;
246 
247 	DB_PANIC_CHECK(dbp);
248 
249 	if ((ret =
250 	    __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
251 		return (ret);
252 
253 	if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
254 		return (ret);
255 
256 	DEBUG_LWRITE(dbc, txn, "ham_delete", key, NULL, flags);
257 
258 	hcp = (HASH_CURSOR *)dbc->internal;
259 	GET_META(dbp, hcp, ret);
260 	if (ret != 0)
261 		goto out;
262 
263 	hcp->stats.hash_deleted++;
264 	if ((ret = __ham_lookup(dbc, key, 0, DB_LOCK_WRITE)) == 0)
265 		if (F_ISSET(hcp, H_OK))
266 			ret = __ham_del_pair(dbc, 1);
267 		else
268 			ret = DB_NOTFOUND;
269 
270 	RELEASE_META(dbp, hcp);
271 out:	if ((tret = dbc->c_close(dbc)) != 0 && ret == 0)
272 		ret = tret;
273 	return (ret);
274 }
275 
276 /* ****************** CURSORS ********************************** */
277 /*
278  * __ham_c_init --
279  *	Initialize the hash-specific portion of a cursor.
280  *
281  * PUBLIC: int __ham_c_init __P((DBC *));
282  */
283 int
284 __ham_c_init(dbc)
285 	DBC *dbc;
286   {
287 	HASH_CURSOR *new_curs;
288 	int ret;
289 
290 	if ((ret = __os_calloc(1, sizeof(struct cursor_t), &new_curs)) != 0)
291 		return (ret);
292 	if ((ret =
293 	    __os_malloc(dbc->dbp->pgsize, NULL, &new_curs->split_buf)) != 0) {
294 		__os_free(new_curs, sizeof(*new_curs));
295 		return (ret);
296 	}
297 
298 	new_curs->dbc = dbc;
299 
300 	dbc->internal = new_curs;
301 	dbc->c_am_close = __ham_c_close;
302 	dbc->c_am_destroy = __ham_c_destroy;
303 	dbc->c_del = __ham_c_del;
304 	dbc->c_get = __ham_c_get;
305 	dbc->c_put = __ham_c_put;
306 
307 	__ham_item_init(new_curs);
308 
309 	return (0);
310 }
311 
312 /*
313  * __ham_c_close --
314  *	Close down the cursor from a single use.
315  */
316 static int
317 __ham_c_close(dbc)
318 	DBC *dbc;
319 {
320 	int ret;
321 
322 	if ((ret = __ham_item_done(dbc, 0)) != 0)
323 		return (ret);
324 
325 	__ham_item_init((HASH_CURSOR *)dbc->internal);
326 	return (0);
327 }
328 
329 /*
330  * __ham_c_destroy --
331  *	Cleanup the access method private part of a cursor.
332  */
333 static int
334 __ham_c_destroy(dbc)
335 	DBC *dbc;
336 {
337 	HASH_CURSOR *hcp;
338 
339 	hcp = (HASH_CURSOR *)dbc->internal;
340 	if (hcp->split_buf != NULL)
341 		__os_free(hcp->split_buf, dbc->dbp->pgsize);
342 	__os_free(hcp, sizeof(HASH_CURSOR));
343 
344 	return (0);
345 }
346 
347 static int
348 __ham_c_del(dbc, flags)
349 	DBC *dbc;
350 	u_int32_t flags;
351 {
352 	DB *dbp;
353 	DBT repldbt;
354 	HASH_CURSOR *hcp;
355 	HASH_CURSOR save_curs;
356 	db_pgno_t ppgno, chg_pgno;
357 	int ret, t_ret;
358 
359 	DEBUG_LWRITE(dbc, dbc->txn, "ham_c_del", NULL, NULL, flags);
360 	dbp = dbc->dbp;
361 	DB_PANIC_CHECK(dbp);
362 	hcp = (HASH_CURSOR *)dbc->internal;
363 
364 	if ((ret = __db_cdelchk(dbc->dbp, flags,
365 	    F_ISSET(dbc->dbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
366 		return (ret);
367 
368 	if (F_ISSET(hcp, H_DELETED))
369 		return (DB_NOTFOUND);
370 
371 	/*
372 	 * If we are in the concurrent DB product and this cursor
373 	 * is not a write cursor, then this request is invalid.
374 	 * If it is a simple write cursor, then we need to upgrade its
375 	 * lock.
376 	 */
377 	if (F_ISSET(dbp, DB_AM_CDB)) {
378 		/* Make sure it's a valid update cursor. */
379 		if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
380 			return (EINVAL);
381 
382 		if (F_ISSET(dbc, DBC_RMW) &&
383 		    (ret = lock_get(dbp->dbenv->lk_info, dbc->locker,
384 		    DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
385 		    &dbc->mylock)) != 0)
386 			return (EAGAIN);
387 	}
388 
389 	GET_META(dbp, hcp, ret);
390 	if (ret != 0)
391 		return (ret);
392 
393 	SAVE_CURSOR(hcp, &save_curs);
394 	hcp->stats.hash_deleted++;
395 
396 	if ((ret = __ham_get_cpage(dbc, DB_LOCK_WRITE)) != 0)
397 		goto out;
398 	if (F_ISSET(hcp, H_ISDUP) && hcp->dpgno != PGNO_INVALID) {
399 		/*
400 		 * We are about to remove a duplicate from offpage.
401 		 *
402 		 * There are 4 cases.
403 		 * 1. We will remove an item on a page, but there are more
404 		 *    items on that page.
405 		 * 2. We will remove the last item on a page, but there is a
406 		 *    following page of duplicates.
407 		 * 3. We will remove the last item on a page, this page was the
408 		 *    last page in a duplicate set, but there were dups before
409 		 *    it.
410 		 * 4. We will remove the last item on a page, removing the last
411 		 *    duplicate.
412 		 * In case 1 hcp->dpagep is unchanged.
413 		 * In case 2 hcp->dpagep comes back pointing to the next dup
414 		 *     page.
415 		 * In case 3 hcp->dpagep comes back NULL.
416 		 * In case 4 hcp->dpagep comes back NULL.
417 		 *
418 		 * Case 4 results in deleting the pair off the master page.
419 		 * The normal code for doing this knows how to delete the
420 		 * duplicates, so we will handle this case in the normal code.
421 		 */
422 		ppgno = PREV_PGNO(hcp->dpagep);
423 		if (ppgno == PGNO_INVALID &&
424 		    NEXT_PGNO(hcp->dpagep) == PGNO_INVALID &&
425 		    NUM_ENT(hcp->dpagep) == 1)
426 			goto normal;
427 
428 		/* Remove item from duplicate page. */
429 		chg_pgno = hcp->dpgno;
430 		if ((ret = __db_drem(dbc,
431 		    &hcp->dpagep, hcp->dndx, __ham_del_page)) != 0)
432 			goto out;
433 
434 		if (hcp->dpagep == NULL) {
435 			if (ppgno != PGNO_INVALID) {		/* Case 3 */
436 				hcp->dpgno = ppgno;
437 				if ((ret = __ham_get_cpage(dbc,
438 				    DB_LOCK_READ)) != 0)
439 					goto out;
440 				hcp->dndx = NUM_ENT(hcp->dpagep);
441 				F_SET(hcp, H_DELETED);
442 			} else {				/* Case 4 */
443 				ret = __ham_del_pair(dbc, 1);
444 				hcp->dpgno = PGNO_INVALID;
445 				/*
446 				 * Delpair updated the cursor queue, so we
447 				 * don't have to do that here.
448 				 */
449 				chg_pgno = PGNO_INVALID;
450 			}
451 		} else if (PGNO(hcp->dpagep) != hcp->dpgno) {
452 			hcp->dndx = 0;				/* Case 2 */
453 			hcp->dpgno = PGNO(hcp->dpagep);
454 			if (ppgno == PGNO_INVALID)
455 				memcpy(HOFFDUP_PGNO(P_ENTRY(hcp->pagep,
456 				    H_DATAINDEX(hcp->bndx))),
457 				    &hcp->dpgno, sizeof(db_pgno_t));
458 			/*
459 			 * We need to put the master page here, because
460 			 * although we have a duplicate page, the master
461 			 * page is dirty, and ham_item_done assumes that
462 			 * if you have a duplicate page, it's the only one
463 			 * that can be dirty.
464 			 */
465 			ret = __ham_put_page(dbp, hcp->pagep, 1);
466 			hcp->pagep = NULL;
467 			F_SET(hcp, H_DELETED);
468 		} else						/* Case 1 */
469 			F_SET(hcp, H_DELETED);
470 		if (chg_pgno != PGNO_INVALID)
471 			__ham_c_update(hcp, chg_pgno, 0, 0, 1);
472 	} else if (F_ISSET(hcp, H_ISDUP)) {			/* on page */
473 		if (hcp->dup_off == 0 && DUP_SIZE(hcp->dup_len) ==
474 		    LEN_HDATA(hcp->pagep, hcp->hdr->pagesize, hcp->bndx))
475 			ret = __ham_del_pair(dbc, 1);
476 		else {
477 			repldbt.flags = 0;
478 			F_SET(&repldbt, DB_DBT_PARTIAL);
479 			repldbt.doff = hcp->dup_off;
480 			repldbt.dlen = DUP_SIZE(hcp->dup_len);
481 			repldbt.size = 0;
482 			repldbt.data =
483 			    HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx));
484 			ret = __ham_replpair(dbc, &repldbt, 0);
485 			hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
486 			F_SET(hcp, H_DELETED);
487 			__ham_c_update(hcp, hcp->pgno,
488 			    DUP_SIZE(hcp->dup_len), 0, 1);
489 		}
490 
491 	} else
492 		/* Not a duplicate */
493 normal:		ret = __ham_del_pair(dbc, 1);
494 
495 out:	if ((t_ret = __ham_item_done(dbc, ret == 0)) != 0 && ret == 0)
496 		ret = t_ret;
497 	RELEASE_META(dbp, hcp);
498 	RESTORE_CURSOR(dbp, hcp, &save_curs, ret);
499 	if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
500 		(void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
501 		    DB_LOCK_IWRITE, 0);
502 	return (ret);
503 }
504 
505 static int
506 __ham_c_get(dbc, key, data, flags)
507 	DBC *dbc;
508 	DBT *key;
509 	DBT *data;
510 	u_int32_t flags;
511 {
512 	DB *dbp;
513 	HASH_CURSOR *hcp, save_curs;
514 	db_lockmode_t lock_type;
515 	int get_key, ret, t_ret;
516 
517 	DEBUG_LREAD(dbc, dbc->txn, "ham_c_get",
518 	    flags == DB_SET || flags == DB_SET_RANGE ? key : NULL,
519 	    NULL, flags);
520 
521 	hcp = (HASH_CURSOR *)dbc->internal;
522 	dbp = dbc->dbp;
523 	DB_PANIC_CHECK(dbp);
524 	SAVE_CURSOR(hcp, &save_curs);
525 	if ((ret =
526 	    __db_cgetchk(dbp, key, data, flags, IS_VALID(hcp))) != 0)
527 		return (ret);
528 
529 	/* Clear OR'd in additional bits so we can check for flag equality. */
530 	if (LF_ISSET(DB_RMW)) {
531 		lock_type = DB_LOCK_WRITE;
532 		LF_CLR(DB_RMW);
533 	} else
534 		lock_type = DB_LOCK_READ;
535 
536 	GET_META(dbp, hcp, ret);
537 	if (ret != 0)
538 		return (ret);
539 	hcp->stats.hash_get++;
540 	hcp->seek_size = 0;
541 
542 	ret = 0;
543 	get_key = 1;
544 	switch (flags) {
545 	case DB_PREV:
546 		if (hcp->bucket != BUCKET_INVALID) {
547 			ret = __ham_item_prev(dbc, lock_type);
548 			break;
549 		}
550 		/* FALLTHROUGH */
551 	case DB_LAST:
552 		ret = __ham_item_last(dbc, lock_type);
553 		break;
554 	case DB_FIRST:
555 		ret = __ham_item_first(dbc, lock_type);
556 		break;
557 	case DB_NEXT_DUP:
558 		if (hcp->bucket == BUCKET_INVALID)
559 			ret = EINVAL;
560 		else {
561 			F_SET(hcp, H_DUPONLY);
562 			ret = __ham_item_next(dbc, lock_type);
563 		}
564 		break;
565 	case DB_NEXT:
566 		if (hcp->bucket == BUCKET_INVALID)
567 			hcp->bucket = 0;
568 		ret = __ham_item_next(dbc, lock_type);
569 		break;
570 	case DB_SET:
571 	case DB_SET_RANGE:
572 	case DB_GET_BOTH:
573 		if (F_ISSET(dbc, DBC_CONTINUE)) {
574 			F_SET(hcp, H_DUPONLY);
575 			ret = __ham_item_next(dbc, lock_type);
576 		} else if (F_ISSET(dbc, DBC_KEYSET))
577 			ret = __ham_item(dbc, lock_type);
578 		else
579 			ret = __ham_lookup(dbc, key, 0, lock_type);
580 		get_key = 0;
581 		break;
582 	case DB_CURRENT:
583 		if (F_ISSET(hcp, H_DELETED)) {
584 			ret = DB_KEYEMPTY;
585 			goto out;
586 		}
587 
588 		ret = __ham_item(dbc, lock_type);
589 		break;
590 	}
591 
592 	/*
593 	 * Must always enter this loop to do error handling and
594 	 * check for big key/data pair.
595 	 */
596 	while (1) {
597 		if (ret != 0 && ret != DB_NOTFOUND)
598 			goto out1;
599 		else if (F_ISSET(hcp, H_OK)) {
600 			/* Get the key. */
601 			if (get_key && (ret = __db_ret(dbp, hcp->pagep,
602 			    H_KEYINDEX(hcp->bndx), key, &dbc->rkey.data,
603 			    &dbc->rkey.size)) != 0)
604 				goto out1;
605 
606 			ret = __ham_dup_return(dbc, data, flags);
607 			break;
608 		} else if (!F_ISSET(hcp, H_NOMORE)) {
609 			abort();
610 			break;
611 		}
612 
613 		/*
614 		 * Ran out of entries in a bucket; change buckets.
615 		 */
616 		switch (flags) {
617 			case DB_LAST:
618 			case DB_PREV:
619 				ret = __ham_item_done(dbc, 0);
620 				if (hcp->bucket == 0) {
621 					ret = DB_NOTFOUND;
622 					goto out1;
623 				}
624 				hcp->bucket--;
625 				hcp->bndx = NDX_INVALID;
626 				if (ret == 0)
627 					ret = __ham_item_prev(dbc, lock_type);
628 				break;
629 			case DB_FIRST:
630 			case DB_NEXT:
631 				ret = __ham_item_done(dbc, 0);
632 				hcp->bndx = NDX_INVALID;
633 				hcp->bucket++;
634 				hcp->pgno = PGNO_INVALID;
635 				hcp->pagep = NULL;
636 				if (hcp->bucket > hcp->hdr->max_bucket) {
637 					ret = DB_NOTFOUND;
638 					goto out1;
639 				}
640 				if (ret == 0)
641 					ret = __ham_item_next(dbc, lock_type);
642 				break;
643 			case DB_GET_BOTH:
644 			case DB_NEXT_DUP:
645 			case DB_SET:
646 			case DB_SET_RANGE:
647 				/* Key not found. */
648 				ret = DB_NOTFOUND;
649 				goto out1;
650 		}
651 	}
652 out1:	if ((t_ret = __ham_item_done(dbc, 0)) != 0 && ret == 0)
653 		ret = t_ret;
654 out:	RELEASE_META(dbp, hcp);
655 	RESTORE_CURSOR(dbp, hcp, &save_curs, ret);
656 	return (ret);
657 }
658 
659 static int
660 __ham_c_put(dbc, key, data, flags)
661 	DBC *dbc;
662 	DBT *key;
663 	DBT *data;
664 	u_int32_t flags;
665 {
666 	DB *dbp;
667 	DBT tmp_val, *myval;
668 	HASH_CURSOR *hcp, save_curs;
669 	u_int32_t nbytes;
670 	int ret, t_ret;
671 
672 	dbp = dbc->dbp;
673 	DB_PANIC_CHECK(dbp);
674 	DEBUG_LWRITE(dbc, dbc->txn, "ham_c_put",
675 	    flags == DB_KEYFIRST || flags == DB_KEYLAST ? key : NULL,
676 	    data, flags);
677 	hcp = (HASH_CURSOR *)dbc->internal;
678 
679 	if ((ret = __db_cputchk(dbp, key, data, flags,
680 	    F_ISSET(dbp, DB_AM_RDONLY), IS_VALID(hcp))) != 0)
681 		return (ret);
682 
683 	if (F_ISSET(hcp, H_DELETED) &&
684 	    flags != DB_KEYFIRST && flags != DB_KEYLAST)
685 		return (DB_NOTFOUND);
686 
687 	/*
688 	 * If we are in the concurrent DB product and this cursor
689 	 * is not a write cursor, then this request is invalid.
690 	 * If it is a simple write cursor, then we need to upgrade its
691 	 * lock.
692 	 */
693 	if (F_ISSET(dbp, DB_AM_CDB)) {
694 		/* Make sure it's a valid update cursor. */
695 		if (!F_ISSET(dbc, DBC_RMW | DBC_WRITER))
696 			return (EINVAL);
697 
698 		if (F_ISSET(dbc, DBC_RMW) &&
699 		    (ret = lock_get(dbp->dbenv->lk_info, dbc->locker,
700 		    DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
701 		    &dbc->mylock)) != 0)
702 			return (EAGAIN);
703 	}
704 
705 	GET_META(dbp, hcp, ret);
706 	if (ret != 0)
707 		return (ret);
708 
709 	SAVE_CURSOR(hcp, &save_curs);
710 	hcp->stats.hash_put++;
711 
712 	switch (flags) {
713 	case DB_KEYLAST:
714 	case DB_KEYFIRST:
715 		nbytes = (ISBIG(hcp, key->size) ? HOFFPAGE_PSIZE :
716 		    HKEYDATA_PSIZE(key->size)) +
717 		    (ISBIG(hcp, data->size) ? HOFFPAGE_PSIZE :
718 		    HKEYDATA_PSIZE(data->size));
719 		if ((ret = __ham_lookup(dbc,
720 		    key, nbytes, DB_LOCK_WRITE)) == DB_NOTFOUND) {
721 			ret = 0;
722 			if (hcp->seek_found_page != PGNO_INVALID &&
723 			    hcp->seek_found_page != hcp->pgno) {
724 				if ((ret = __ham_item_done(dbc, 0)) != 0)
725 					goto out;
726 				hcp->pgno = hcp->seek_found_page;
727 				hcp->bndx = NDX_INVALID;
728 			}
729 
730 			if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
731 				/*
732 				 * A partial put, but the key does not exist
733 				 * and we are not beginning the write at 0.
734 				 * We must create a data item padded up to doff
735 				 * and then write the new bytes represented by
736 				 * val.
737 				 */
738 				if ((ret = __ham_init_dbt(&tmp_val,
739 				    data->size + data->doff,
740 				    &dbc->rdata.data, &dbc->rdata.size)) == 0) {
741 					memset(tmp_val.data, 0, data->doff);
742 					memcpy((u_int8_t *)tmp_val.data +
743 					    data->doff, data->data, data->size);
744 					myval = &tmp_val;
745 				}
746 			} else
747 				myval = (DBT *)data;
748 
749 			if (ret == 0)
750 				ret = __ham_add_el(dbc, key, myval, H_KEYDATA);
751 			goto done;
752 		}
753 		break;
754 	case DB_BEFORE:
755 	case DB_AFTER:
756 	case DB_CURRENT:
757 		ret = __ham_item(dbc, DB_LOCK_WRITE);
758 		break;
759 	}
760 
761 	if (ret == 0) {
762 		if ((flags == DB_CURRENT && !F_ISSET(hcp, H_ISDUP)) ||
763 		    ((flags == DB_KEYFIRST || flags == DB_KEYLAST) &&
764 		    !F_ISSET(dbp, DB_AM_DUP)))
765 			ret = __ham_overwrite(dbc, data);
766 		else
767 			ret = __ham_add_dup(dbc, data, flags);
768 	}
769 
770 done:	if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
771 		ret = __ham_expand_table(dbc);
772 		F_CLR(hcp, H_EXPAND);
773 	}
774 
775 	if ((t_ret = __ham_item_done(dbc, ret == 0)) != 0 && ret == 0)
776 		ret = t_ret;
777 
778 out:	RELEASE_META(dbp, hcp);
779 	RESTORE_CURSOR(dbp, hcp, &save_curs, ret);
780 	if (F_ISSET(dbp, DB_AM_CDB) && F_ISSET(dbc, DBC_RMW))
781 		(void)__lock_downgrade(dbp->dbenv->lk_info, dbc->mylock,
782 		    DB_LOCK_IWRITE, 0);
783 	return (ret);
784 }
785 
786 /********************************* UTILITIES ************************/
787 
788 /*
789  * __ham_expand_table --
790  */
791 static int
792 __ham_expand_table(dbc)
793 	DBC *dbc;
794 {
795 	DB *dbp;
796 	HASH_CURSOR *hcp;
797 	DB_LSN new_lsn;
798 	u_int32_t old_bucket, new_bucket, spare_ndx;
799 	int ret;
800 
801 	dbp = dbc->dbp;
802 	hcp = (HASH_CURSOR *)dbc->internal;
803 	ret = 0;
804 	DIRTY_META(dbp, hcp, ret);
805 	if (ret)
806 		return (ret);
807 
808 	/*
809 	 * If the split point is about to increase, make sure that we
810 	 * have enough extra pages.  The calculation here is weird.
811 	 * We'd like to do this after we've upped max_bucket, but it's
812 	 * too late then because we've logged the meta-data split.  What
813 	 * we'll do between then and now is increment max bucket and then
814 	 * see what the log of one greater than that is; here we have to
815 	 * look at the log of max + 2.  VERY NASTY STUFF.
816 	 */
817 	if (__db_log2(hcp->hdr->max_bucket + 2) > hcp->hdr->ovfl_point) {
818 		/*
819 		 * We are about to shift the split point.  Make sure that
820 		 * if the next doubling is going to be big (more than 8
821 		 * pages), we have some extra pages around.
822 		 */
823 		if (hcp->hdr->max_bucket + 1 >= 8 &&
824 		    hcp->hdr->spares[hcp->hdr->ovfl_point] <
825 		    hcp->hdr->spares[hcp->hdr->ovfl_point - 1] +
826 		    hcp->hdr->ovfl_point + 1)
827 			__ham_init_ovflpages(dbc);
828 	}
829 
830 	/* Now we can log the meta-data split. */
831 	if (DB_LOGGING(dbc)) {
832 		if ((ret = __ham_splitmeta_log(dbp->dbenv->lg_info,
833 		    dbc->txn, &new_lsn, 0, dbp->log_fileid,
834 		    hcp->hdr->max_bucket, hcp->hdr->ovfl_point,
835 		    hcp->hdr->spares[hcp->hdr->ovfl_point],
836 		    &hcp->hdr->lsn)) != 0)
837 			return (ret);
838 
839 		hcp->hdr->lsn = new_lsn;
840 	}
841 
842 	hcp->stats.hash_expansions++;
843 	new_bucket = ++hcp->hdr->max_bucket;
844 	old_bucket = (hcp->hdr->max_bucket & hcp->hdr->low_mask);
845 
846 	/*
847 	 * If the split point is increasing, copy the current contents
848 	 * of the spare split bucket to the next bucket.
849 	 */
850 	spare_ndx = __db_log2(hcp->hdr->max_bucket + 1);
851 	if (spare_ndx > hcp->hdr->ovfl_point) {
852 		hcp->hdr->spares[spare_ndx] =
853 		    hcp->hdr->spares[hcp->hdr->ovfl_point];
854 		hcp->hdr->ovfl_point = spare_ndx;
855 	}
856 
857 	if (new_bucket > hcp->hdr->high_mask) {
858 		/* Starting a new doubling */
859 		hcp->hdr->low_mask = hcp->hdr->high_mask;
860 		hcp->hdr->high_mask = new_bucket | hcp->hdr->low_mask;
861 	}
862 
863 	if (BUCKET_TO_PAGE(hcp, new_bucket) > MAX_PAGES(hcp)) {
864 		__db_err(dbp->dbenv,
865 		    "hash: Cannot allocate new bucket.  Pages exhausted.");
866 		return (ENOSPC);
867 	}
868 
869 	/* Relocate records to the new bucket */
870 	return (__ham_split_page(dbc, old_bucket, new_bucket));
871 }
872 
873 /*
874  * PUBLIC: u_int32_t __ham_call_hash __P((HASH_CURSOR *, u_int8_t *, int32_t));
875  */
876 u_int32_t
877 __ham_call_hash(hcp, k, len)
878 	HASH_CURSOR *hcp;
879 	u_int8_t *k;
880 	int32_t len;
881 {
882 	u_int32_t n, bucket;
883 
884 	n = (u_int32_t)(hcp->dbc->dbp->h_hash(k, len));
885 
886 	bucket = n & hcp->hdr->high_mask;
887 	if (bucket > hcp->hdr->max_bucket)
888 		bucket = bucket & hcp->hdr->low_mask;
889 	return (bucket);
890 }
891 
892 /*
893  * Check for duplicates, and call __db_ret appropriately.  Release
894  * everything held by the cursor.
895  */
896 static int
897 __ham_dup_return(dbc, val, flags)
898 	DBC *dbc;
899 	DBT *val;
900 	u_int32_t flags;
901 {
902 	DB *dbp;
903 	HASH_CURSOR *hcp;
904 	PAGE *pp;
905 	DBT *myval, tmp_val;
906 	db_indx_t ndx;
907 	db_pgno_t pgno;
908 	u_int32_t off, tlen;
909 	u_int8_t *hk, type;
910 	int cmp, ret;
911 	db_indx_t len;
912 
913 	/* Check for duplicate and return the first one. */
914 	dbp = dbc->dbp;
915 	hcp = (HASH_CURSOR *)dbc->internal;
916 	ndx = H_DATAINDEX(hcp->bndx);
917 	type = HPAGE_TYPE(hcp->pagep, ndx);
918 	pp = hcp->pagep;
919 	myval = val;
920 
921 	/*
922 	 * There are 4 cases:
923 	 * 1. We are not in duplicate, simply call db_ret.
924 	 * 2. We are looking at keys and stumbled onto a duplicate.
925 	 * 3. We are in the middle of a duplicate set. (ISDUP set)
926 	 * 4. This is a duplicate and we need to return a specific item.
927 	 */
928 
929 	/*
930 	 * Here we check for the case where we just stumbled onto a
931 	 * duplicate.  In this case, we do initialization and then
932 	 * let the normal duplicate code handle it.
933 	 */
934 	if (!F_ISSET(hcp, H_ISDUP))
935 		if (type == H_DUPLICATE) {
936 			F_SET(hcp, H_ISDUP);
937 			hcp->dup_tlen = LEN_HDATA(hcp->pagep,
938 			    hcp->hdr->pagesize, hcp->bndx);
939 			hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
940 			if (flags == DB_LAST || flags == DB_PREV) {
941 				hcp->dndx = 0;
942 				hcp->dup_off = 0;
943 				do {
944 					memcpy(&len,
945 					    HKEYDATA_DATA(hk) + hcp->dup_off,
946 					    sizeof(db_indx_t));
947 					hcp->dup_off += DUP_SIZE(len);
948 					hcp->dndx++;
949 				} while (hcp->dup_off < hcp->dup_tlen);
950 				hcp->dup_off -= DUP_SIZE(len);
951 				hcp->dndx--;
952 			} else {
953 				memcpy(&len,
954 				    HKEYDATA_DATA(hk), sizeof(db_indx_t));
955 				hcp->dup_off = 0;
956 				hcp->dndx = 0;
957 			}
958 			hcp->dup_len = len;
959 		} else if (type == H_OFFDUP) {
960 			F_SET(hcp, H_ISDUP);
961 			memcpy(&pgno, HOFFDUP_PGNO(P_ENTRY(hcp->pagep, ndx)),
962 			    sizeof(db_pgno_t));
963 			if (flags == DB_LAST || flags == DB_PREV) {
964 				if ((ret = __db_dend(dbc,
965 				    pgno, &hcp->dpagep)) != 0)
966 					return (ret);
967 				hcp->dpgno = PGNO(hcp->dpagep);
968 				hcp->dndx = NUM_ENT(hcp->dpagep) - 1;
969 			} else if ((ret = __ham_next_cpage(dbc,
970 			    pgno, 0, H_ISDUP)) != 0)
971 				return (ret);
972 		}
973 
974 
975 	/*
976 	 * If we are retrieving a specific key/data pair, then we
977 	 * may need to adjust the cursor before returning data.
978 	 */
979 	if (flags == DB_GET_BOTH) {
980 		if (F_ISSET(hcp, H_ISDUP)) {
981 			if (hcp->dpgno != PGNO_INVALID) {
982 				if ((ret = __db_dsearch(dbc, 0, val,
983 				    hcp->dpgno, &hcp->dndx, &hcp->dpagep, &cmp))
984 				    != 0)
985 					return (ret);
986 				if (cmp == 0)
987 					hcp->dpgno = PGNO(hcp->dpagep);
988 			} else {
989 				__ham_dsearch(dbc, val, &off, &cmp);
990 				hcp->dup_off = off;
991 			}
992 		} else {
993 			hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
994 			if (((HKEYDATA *)hk)->type == H_OFFPAGE) {
995 				memcpy(&tlen,
996 				    HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
997 				memcpy(&pgno,
998 				    HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
999 				if ((ret = __db_moff(dbp, val,
1000 				    pgno, tlen, dbp->dup_compare, &cmp)) != 0)
1001 					return (ret);
1002 			} else {
1003 				/*
1004 				 * We do not zero tmp_val since the comparison
1005 				 * routines may only look at data and size.
1006 				 */
1007 				tmp_val.data = HKEYDATA_DATA(hk);
1008 				tmp_val.size = LEN_HDATA(hcp->pagep,
1009 				    dbp->pgsize, hcp->bndx);
1010 				cmp = dbp->dup_compare == NULL ?
1011 				    __bam_defcmp(&tmp_val, val) :
1012 				    dbp->dup_compare(&tmp_val, val);
1013 			}
1014 		}
1015 
1016 		if (cmp != 0)
1017 			return (DB_NOTFOUND);
1018 	}
1019 
1020 	/*
1021 	 * Now, everything is initialized, grab a duplicate if
1022 	 * necessary.
1023 	 */
1024 	if (F_ISSET(hcp, H_ISDUP))
1025 		if (hcp->dpgno != PGNO_INVALID) {
1026 			pp = hcp->dpagep;
1027 			ndx = hcp->dndx;
1028 		} else {
1029 			/*
1030 			 * Copy the DBT in case we are retrieving into user
1031 			 * memory and we need the parameters for it.  If the
1032 			 * user requested a partial, then we need to adjust
1033 			 * the user's parameters to get the partial of the
1034 			 * duplicate which is itself a partial.
1035 			 */
1036 			memcpy(&tmp_val, val, sizeof(*val));
1037 			if (F_ISSET(&tmp_val, DB_DBT_PARTIAL)) {
1038 				/*
1039 				 * Take the user's length unless it would go
1040 				 * beyond the end of the duplicate.
1041 				 */
1042 				if (tmp_val.doff + hcp->dup_off > hcp->dup_len)
1043 					tmp_val.dlen = 0;
1044 				else if (tmp_val.dlen + tmp_val.doff >
1045 				    hcp->dup_len)
1046 					tmp_val.dlen =
1047 					    hcp->dup_len - tmp_val.doff;
1048 
1049 				/*
1050 				 * Calculate the new offset.
1051 				 */
1052 				tmp_val.doff += hcp->dup_off;
1053 			} else {
1054 				F_SET(&tmp_val, DB_DBT_PARTIAL);
1055 				tmp_val.dlen = hcp->dup_len;
1056 				tmp_val.doff = hcp->dup_off + sizeof(db_indx_t);
1057 			}
1058 			myval = &tmp_val;
1059 		}
1060 
1061 
1062 	/*
1063 	 * Finally, if we had a duplicate, pp, ndx, and myval should be
1064 	 * set appropriately.
1065 	 */
1066 	if ((ret = __db_ret(dbp, pp, ndx, myval, &dbc->rdata.data,
1067 	    &dbc->rdata.size)) != 0)
1068 		return (ret);
1069 
1070 	/*
1071 	 * In case we sent a temporary off to db_ret, set the real
1072 	 * return values.
1073 	 */
1074 	val->data = myval->data;
1075 	val->size = myval->size;
1076 
1077 	return (0);
1078 }
1079 
1080 static int
1081 __ham_overwrite(dbc, nval)
1082 	DBC *dbc;
1083 	DBT *nval;
1084 {
1085 	HASH_CURSOR *hcp;
1086 	DBT *myval, tmp_val;
1087 	u_int8_t *hk;
1088 
1089 	hcp = (HASH_CURSOR *)dbc->internal;
1090 	if (F_ISSET(dbc->dbp, DB_AM_DUP))
1091 		return (__ham_add_dup(dbc, nval, DB_KEYLAST));
1092 	else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
1093 		/* Put/overwrite */
1094 		memcpy(&tmp_val, nval, sizeof(*nval));
1095 		F_SET(&tmp_val, DB_DBT_PARTIAL);
1096 		tmp_val.doff = 0;
1097 		hk = H_PAIRDATA(hcp->pagep, hcp->bndx);
1098 		if (HPAGE_PTYPE(hk) == H_OFFPAGE)
1099 			memcpy(&tmp_val.dlen,
1100 			    HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1101 		else
1102 			tmp_val.dlen = LEN_HDATA(hcp->pagep,
1103 			    hcp->hdr->pagesize,hcp->bndx);
1104 		myval = &tmp_val;
1105 	} else /* Regular partial put */
1106 		myval = nval;
1107 
1108 	return (__ham_replpair(dbc, myval, 0));
1109 }
1110 
1111 /*
1112  * Given a key and a cursor, sets the cursor to the page/ndx on which
1113  * the key resides.  If the key is found, the cursor H_OK flag is set
1114  * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
1115  * If the key is not found, the H_OK flag is not set.  If the sought
1116  * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
1117  * are set indicating where an add might take place.  If it is 0,
1118  * non of the cursor pointer field are valid.
1119  */
1120 static int
1121 __ham_lookup(dbc, key, sought, mode)
1122 	DBC *dbc;
1123 	const DBT *key;
1124 	u_int32_t sought;
1125 	db_lockmode_t mode;
1126 {
1127 	DB *dbp;
1128 	HASH_CURSOR *hcp;
1129 	db_pgno_t pgno;
1130 	u_int32_t tlen;
1131 	int match, ret, t_ret;
1132 	u_int8_t *hk;
1133 
1134 	dbp = dbc->dbp;
1135 	hcp = (HASH_CURSOR *)dbc->internal;
1136 	/*
1137 	 * Set up cursor so that we're looking for space to add an item
1138 	 * as we cycle through the pages looking for the key.
1139 	 */
1140 	if ((ret = __ham_item_reset(dbc)) != 0)
1141 		return (ret);
1142 	hcp->seek_size = sought;
1143 
1144 	hcp->bucket = __ham_call_hash(hcp, (u_int8_t *)key->data, key->size);
1145 	while (1) {
1146 		if ((ret = __ham_item_next(dbc, mode)) != 0)
1147 			return (ret);
1148 
1149 		if (F_ISSET(hcp, H_NOMORE))
1150 			break;
1151 
1152 		hk = H_PAIRKEY(hcp->pagep, hcp->bndx);
1153 		switch (HPAGE_PTYPE(hk)) {
1154 		case H_OFFPAGE:
1155 			memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
1156 			if (tlen == key->size) {
1157 				memcpy(&pgno,
1158 				    HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
1159 				if ((ret = __db_moff(dbp,
1160 				    key, pgno, tlen, NULL, &match)) != 0)
1161 					return (ret);
1162 				if (match == 0) {
1163 					F_SET(hcp, H_OK);
1164 					return (0);
1165 				}
1166 			}
1167 			break;
1168 		case H_KEYDATA:
1169 			if (key->size == LEN_HKEY(hcp->pagep,
1170 			    hcp->hdr->pagesize, hcp->bndx) &&
1171 			    memcmp(key->data,
1172 			    HKEYDATA_DATA(hk), key->size) == 0) {
1173 				F_SET(hcp, H_OK);
1174 				return (0);
1175 			}
1176 			break;
1177 		case H_DUPLICATE:
1178 		case H_OFFDUP:
1179 			/*
1180 			 * These are errors because keys are never
1181 			 * duplicated, only data items are.
1182 			 */
1183 			return (__db_pgfmt(dbp, PGNO(hcp->pagep)));
1184 		}
1185 		hcp->stats.hash_collisions++;
1186 	}
1187 
1188 	/*
1189 	 * Item was not found, adjust cursor properly.
1190 	 */
1191 
1192 	if (sought != 0)
1193 		return (ret);
1194 
1195 	if ((t_ret = __ham_item_done(dbc, 0)) != 0 && ret == 0)
1196 		ret = t_ret;
1197 	return (ret);
1198 }
1199 
1200 /*
1201  * Initialize a dbt using some possibly already allocated storage
1202  * for items.
1203  * PUBLIC: int __ham_init_dbt __P((DBT *, u_int32_t, void **, u_int32_t *));
1204  */
1205 int
1206 __ham_init_dbt(dbt, size, bufp, sizep)
1207 	DBT *dbt;
1208 	u_int32_t size;
1209 	void **bufp;
1210 	u_int32_t *sizep;
1211 {
1212 	int ret;
1213 
1214 	memset(dbt, 0, sizeof(*dbt));
1215 	if (*sizep < size) {
1216 		if ((ret = __os_realloc(bufp, size)) != 0) {
1217 			*sizep = 0;
1218 			return (ret);
1219 		}
1220 		*sizep = size;
1221 	}
1222 	dbt->data = *bufp;
1223 	dbt->size = size;
1224 	return (0);
1225 }
1226 
1227 /*
1228  * Adjust the cursor after an insert or delete.  The cursor passed is
1229  * the one that was operated upon; we just need to check any of the
1230  * others.
1231  *
1232  * len indicates the length of the item added/deleted
1233  * add indicates if the item indicated by the cursor has just been
1234  * added (add == 1) or deleted (add == 0).
1235  * dup indicates if the addition occurred into a duplicate set.
1236  *
1237  * PUBLIC: void __ham_c_update
1238  * PUBLIC:    __P((HASH_CURSOR *, db_pgno_t, u_int32_t, int, int));
1239  */
1240 void
1241 __ham_c_update(hcp, chg_pgno, len, add, is_dup)
1242 	HASH_CURSOR *hcp;
1243 	db_pgno_t chg_pgno;
1244 	u_int32_t len;
1245 	int add, is_dup;
1246 {
1247 	DB *dbp;
1248 	DBC *cp;
1249 	HASH_CURSOR *lcp;
1250 	int page_deleted;
1251 
1252 	/*
1253 	 * Regular adds are always at the end of a given page, so we never
1254 	 * have to adjust anyone's cursor after a regular add.
1255 	 */
1256 	if (!is_dup && add)
1257 		return;
1258 
1259 	/*
1260 	 * Determine if a page was deleted.    If this is a regular update
1261 	 * (i.e., not is_dup) then the deleted page's number will be that in
1262 	 * chg_pgno, and the pgno in the cursor will be different.  If this
1263 	 * was an onpage-duplicate, then the same conditions apply.  If this
1264 	 * was an off-page duplicate, then we need to verify if hcp->dpgno
1265 	 * is the same (no delete) or different (delete) than chg_pgno.
1266 	 */
1267 	if (!is_dup || hcp->dpgno == PGNO_INVALID)
1268 		page_deleted =
1269 		    chg_pgno != PGNO_INVALID && chg_pgno != hcp->pgno;
1270 	else
1271 		page_deleted =
1272 		    chg_pgno != PGNO_INVALID && chg_pgno != hcp->dpgno;
1273 
1274 	dbp = hcp->dbc->dbp;
1275 	DB_THREAD_LOCK(dbp);
1276 
1277 	for (cp = TAILQ_FIRST(&dbp->active_queue); cp != NULL;
1278 	    cp = TAILQ_NEXT(cp, links)) {
1279 		if (cp->internal == hcp)
1280 			continue;
1281 
1282 		lcp = (HASH_CURSOR *)cp->internal;
1283 
1284 		if (!is_dup && lcp->pgno != chg_pgno)
1285 			continue;
1286 
1287 		if (is_dup) {
1288 			if (F_ISSET(hcp, H_DELETED) && lcp->pgno != chg_pgno)
1289 				continue;
1290 			if (!F_ISSET(hcp, H_DELETED) && lcp->dpgno != chg_pgno)
1291 				continue;
1292 		}
1293 
1294 		if (page_deleted) {
1295 			if (is_dup) {
1296 				lcp->dpgno = hcp->dpgno;
1297 				lcp->dndx = hcp->dndx;
1298 			} else {
1299 				lcp->pgno = hcp->pgno;
1300 				lcp->bndx = hcp->bndx;
1301 				lcp->bucket = hcp->bucket;
1302 			}
1303 			F_CLR(lcp, H_ISDUP);
1304 			continue;
1305 		}
1306 
1307 		if (!is_dup && lcp->bndx > hcp->bndx)
1308 			lcp->bndx--;
1309 		else if (!is_dup && lcp->bndx == hcp->bndx)
1310 			F_SET(lcp, H_DELETED);
1311 		else if (is_dup && lcp->bndx == hcp->bndx) {
1312 			/* Assign dpgno in case there was page conversion. */
1313 			lcp->dpgno = hcp->dpgno;
1314 			if (add && lcp->dndx >= hcp->dndx )
1315 				lcp->dndx++;
1316 			else if (!add && lcp->dndx > hcp->dndx)
1317 				lcp->dndx--;
1318 			else if (!add && lcp->dndx == hcp->dndx)
1319 				F_SET(lcp, H_DELETED);
1320 
1321 			/* Now adjust on-page information. */
1322 			if (lcp->dpgno == PGNO_INVALID)
1323 				if (add) {
1324 					lcp->dup_tlen += len;
1325 					if (lcp->dndx > hcp->dndx)
1326 						lcp->dup_off += len;
1327 				} else {
1328 					lcp->dup_tlen -= len;
1329 					if (lcp->dndx > hcp->dndx)
1330 						lcp->dup_off -= len;
1331 				}
1332 		}
1333 	}
1334 	DB_THREAD_UNLOCK(dbp);
1335 }
1336 
1337