xref: /titanic_41/usr/src/cmd/sendmail/db/btree/bt_open.c (revision 7c2fbfb345896881c631598ee3852ce9ce33fb07)
1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997, 1998
5  *	Sleepycat Software.  All rights reserved.
6  */
7 /*
8  * Copyright (c) 1990, 1993, 1994, 1995, 1996
9  *	Keith Bostic.  All rights reserved.
10  */
11 /*
12  * Copyright (c) 1990, 1993, 1994, 1995
13  *	The Regents of the University of California.  All rights reserved.
14  *
15  * This code is derived from software contributed to Berkeley by
16  * Mike Olson.
17  *
18  * Redistribution and use in source and binary forms, with or without
19  * modification, are permitted provided that the following conditions
20  * are met:
21  * 1. Redistributions of source code must retain the above copyright
22  *    notice, this list of conditions and the following disclaimer.
23  * 2. Redistributions in binary form must reproduce the above copyright
24  *    notice, this list of conditions and the following disclaimer in the
25  *    documentation and/or other materials provided with the distribution.
26  * 3. All advertising materials mentioning features or use of this software
27  *    must display the following acknowledgement:
28  *	This product includes software developed by the University of
29  *	California, Berkeley and its contributors.
30  * 4. Neither the name of the University nor the names of its contributors
31  *    may be used to endorse or promote products derived from this software
32  *    without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
35  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
44  * SUCH DAMAGE.
45  */
46 
47 #include "config.h"
48 
49 #ifndef lint
50 static const char sccsid[] = "@(#)bt_open.c	10.39 (Sleepycat) 11/21/98";
51 #endif /* not lint */
52 
53 #ifndef NO_SYSTEM_INCLUDES
54 #include <sys/types.h>
55 
56 #include <errno.h>
57 #include <limits.h>
58 #include <string.h>
59 #endif
60 
61 #include "db_int.h"
62 #include "db_page.h"
63 #include "btree.h"
64 
65 /*
66  * __bam_open --
67  *	Open a btree.
68  *
69  * PUBLIC: int __bam_open __P((DB *, DB_INFO *));
70  */
71 int
72 __bam_open(dbp, dbinfo)
73 	DB *dbp;
74 	DB_INFO *dbinfo;
75 {
76 	BTREE *t;
77 	int ret;
78 
79 	/* Allocate and initialize the private btree structure. */
80 	if ((ret = __os_calloc(1, sizeof(BTREE), &t)) != 0)
81 		return (ret);
82 	dbp->internal = t;
83 
84 	/*
85 	 * Intention is to make sure all of the user's selections are okay
86 	 * here and then use them without checking.
87 	 */
88 	if (dbinfo == NULL) {
89 		t->bt_minkey = DEFMINKEYPAGE;
90 		t->bt_compare = __bam_defcmp;
91 		t->bt_prefix = __bam_defpfx;
92 	} else {
93 		/* Minimum number of keys per page. */
94 		if (dbinfo->bt_minkey == 0)
95 			t->bt_minkey = DEFMINKEYPAGE;
96 		else {
97 			if (dbinfo->bt_minkey < 2)
98 				goto einval;
99 			t->bt_minkey = dbinfo->bt_minkey;
100 		}
101 
102 		/* Maximum number of keys per page. */
103 		if (dbinfo->bt_maxkey == 0)
104 			t->bt_maxkey = 0;
105 		else {
106 			if (dbinfo->bt_maxkey < 1)
107 				goto einval;
108 			t->bt_maxkey = dbinfo->bt_maxkey;
109 		}
110 
111 		/*
112 		 * If no comparison, use default comparison.  If no comparison
113 		 * and no prefix, use default prefix.  (We can't default the
114 		 * prefix if the user supplies a comparison routine; shortening
115 		 * the keys may break their comparison algorithm.  We don't
116 		 * permit the user to specify a prefix routine if they didn't
117 		 * also specify a comparison routine, they can't know enough
118 		 * about our comparison routine to get it right.)
119 		 */
120 		if ((t->bt_compare = dbinfo->bt_compare) == NULL) {
121 			if (dbinfo->bt_prefix != NULL)
122 				goto einval;
123 			t->bt_compare = __bam_defcmp;
124 			t->bt_prefix = __bam_defpfx;
125 		} else
126 			t->bt_prefix = dbinfo->bt_prefix;
127 	}
128 
129 	/* Initialize the remaining fields/methods of the DB. */
130 	dbp->am_close = __bam_close;
131 	dbp->del = __bam_delete;
132 	dbp->stat = __bam_stat;
133 
134 	/* Start up the tree. */
135 	if ((ret = __bam_read_root(dbp)) != 0)
136 		goto err;
137 
138 	/* Set the overflow page size. */
139 	__bam_setovflsize(dbp);
140 
141 	return (0);
142 
143 einval:	ret = EINVAL;
144 
145 err:	__os_free(t, sizeof(BTREE));
146 	return (ret);
147 }
148 
149 /*
150  * __bam_close --
151  *	Close a btree.
152  *
153  * PUBLIC: int __bam_close __P((DB *));
154  */
155 int
156 __bam_close(dbp)
157 	DB *dbp;
158 {
159 	__os_free(dbp->internal, sizeof(BTREE));
160 	dbp->internal = NULL;
161 
162 	return (0);
163 }
164 
165 /*
166  * __bam_setovflsize --
167  *
168  * PUBLIC: void __bam_setovflsize __P((DB *));
169  */
170 void
171 __bam_setovflsize(dbp)
172 	DB *dbp;
173 {
174 	BTREE *t;
175 
176 	t = dbp->internal;
177 
178 	/*
179 	 * !!!
180 	 * Correction for recno, which doesn't know anything about minimum
181 	 * keys per page.
182 	 */
183 	if (t->bt_minkey == 0)
184 		t->bt_minkey = DEFMINKEYPAGE;
185 
186 	/*
187 	 * The btree data structure requires that at least two key/data pairs
188 	 * can fit on a page, but other than that there's no fixed requirement.
189 	 * Translate the minimum number of items into the bytes a key/data pair
190 	 * can use before being placed on an overflow page.  We calculate for
191 	 * the worst possible alignment by assuming every item requires the
192 	 * maximum alignment for padding.
193 	 *
194 	 * Recno uses the btree bt_ovflsize value -- it's close enough.
195 	 */
196 	t->bt_ovflsize = (dbp->pgsize - P_OVERHEAD) / (t->bt_minkey * P_INDX)
197 	    - (BKEYDATA_PSIZE(0) + ALIGN(1, 4));
198 }
199 
200 /*
201  * __bam_read_root --
202  *	Check (and optionally create) a tree.
203  *
204  * PUBLIC: int __bam_read_root __P((DB *));
205  */
206 int
207 __bam_read_root(dbp)
208 	DB *dbp;
209 {
210 	BTMETA *meta;
211 	BTREE *t;
212 	DBC *dbc;
213 	DB_LOCK metalock, rootlock;
214 	PAGE *root;
215 	db_pgno_t pgno;
216 	int ret, t_ret;
217 
218 	ret = 0;
219 	t = dbp->internal;
220 
221 	/* Get a cursor. */
222 	if ((ret = dbp->cursor(dbp, NULL, &dbc, 0)) != 0)
223 		return (ret);
224 
225 	/* Get, and optionally create the metadata page. */
226 	pgno = PGNO_METADATA;
227 	if ((ret =
228 	    __bam_lget(dbc, 0, PGNO_METADATA, DB_LOCK_WRITE, &metalock)) != 0)
229 		goto err;
230 	if ((ret =
231 	    memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, (PAGE **)&meta)) != 0) {
232 		(void)__BT_LPUT(dbc, metalock);
233 		goto err;
234 	}
235 
236 	/*
237 	 * If the magic number is correct, we're not creating the tree.
238 	 * Correct any fields that may not be right.  Note, all of the
239 	 * local flags were set by db_open(3).
240 	 */
241 	if (meta->magic != 0) {
242 		t->bt_maxkey = meta->maxkey;
243 		t->bt_minkey = meta->minkey;
244 
245 		(void)memp_fput(dbp->mpf, (PAGE *)meta, 0);
246 		(void)__BT_LPUT(dbc, metalock);
247 		goto done;
248 	}
249 
250 	/* Initialize the tree structure metadata information. */
251 	memset(meta, 0, sizeof(BTMETA));
252 	ZERO_LSN(meta->lsn);
253 	meta->pgno = PGNO_METADATA;
254 	meta->magic = DB_BTREEMAGIC;
255 	meta->version = DB_BTREEVERSION;
256 	meta->pagesize = dbp->pgsize;
257 	meta->maxkey = t->bt_maxkey;
258 	meta->minkey = t->bt_minkey;
259 	meta->free = PGNO_INVALID;
260 	if (dbp->type == DB_RECNO)
261 		F_SET(meta, BTM_RECNO);
262 	if (F_ISSET(dbp, DB_AM_DUP))
263 		F_SET(meta, BTM_DUP);
264 	if (F_ISSET(dbp, DB_RE_FIXEDLEN))
265 		F_SET(meta, BTM_FIXEDLEN);
266 	if (F_ISSET(dbp, DB_BT_RECNUM))
267 		F_SET(meta, BTM_RECNUM);
268 	if (F_ISSET(dbp, DB_RE_RENUMBER))
269 		F_SET(meta, BTM_RENUMBER);
270 	memcpy(meta->uid, dbp->fileid, DB_FILE_ID_LEN);
271 
272 	/* Create and initialize a root page. */
273 	pgno = PGNO_ROOT;
274 	if ((ret =
275 	    __bam_lget(dbc, 0, PGNO_ROOT, DB_LOCK_WRITE, &rootlock)) != 0)
276 		goto err;
277 	if ((ret = memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, &root)) != 0) {
278 		(void)__BT_LPUT(dbc, rootlock);
279 		goto err;
280 	}
281 	P_INIT(root, dbp->pgsize, PGNO_ROOT, PGNO_INVALID,
282 	    PGNO_INVALID, 1, dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE);
283 	ZERO_LSN(root->lsn);
284 
285 	/* Release the metadata and root pages. */
286 	if ((ret = memp_fput(dbp->mpf, (PAGE *)meta, DB_MPOOL_DIRTY)) != 0)
287 		goto err;
288 	if ((ret = memp_fput(dbp->mpf, root, DB_MPOOL_DIRTY)) != 0)
289 		goto err;
290 
291 	/*
292 	 * Flush the metadata and root pages to disk -- since the user can't
293 	 * transaction protect open, the pages have to exist during recovery.
294 	 *
295 	 * XXX
296 	 * It's not useful to return not-yet-flushed here -- convert it to
297 	 * an error.
298 	 */
299 	if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
300 		ret = EINVAL;
301 
302 	/* Release the locks. */
303 	(void)__BT_LPUT(dbc, metalock);
304 	(void)__BT_LPUT(dbc, rootlock);
305 
306 err:
307 done:	if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
308 		ret = t_ret;
309 	return (ret);
310 }
311