1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1998
5  *	Sleepycat Software.  All rights reserved.
6  */
7 
8 #include "config.h"
9 
10 #ifndef lint
11 static const char sccsid[] = "@(#)db_am.c	10.15 (Sleepycat) 12/30/98";
12 #endif /* not lint */
13 
14 #ifndef NO_SYSTEM_INCLUDES
15 #include <sys/types.h>
16 
17 #include <errno.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #endif
21 
22 #include "db_int.h"
23 #include "shqueue.h"
24 #include "db_page.h"
25 #include "db_shash.h"
26 #include "mp.h"
27 #include "btree.h"
28 #include "hash.h"
29 #include "db_am.h"
30 #include "db_ext.h"
31 
32 static int __db_c_close __P((DBC *));
33 static int __db_cursor __P((DB *, DB_TXN *, DBC **, u_int32_t));
34 static int __db_fd __P((DB *, int *));
35 static int __db_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
36 static int __db_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
37 
38 /*
39  * __db_init_wrapper --
40  *	Wrapper layer to implement generic DB functions.
41  *
42  * PUBLIC: int __db_init_wrapper __P((DB *));
43  */
44 int
__db_init_wrapper(dbp)45 __db_init_wrapper(dbp)
46 	DB *dbp;
47 {
48 	dbp->close = __db_close;
49 	dbp->cursor = __db_cursor;
50 	dbp->del = NULL;		/* !!! Must be set by access method. */
51 	dbp->fd = __db_fd;
52 	dbp->get = __db_get;
53 	dbp->join = __db_join;
54 	dbp->put = __db_put;
55 	dbp->stat = NULL;		/* !!! Must be set by access method. */
56 	dbp->sync = __db_sync;
57 
58 	return (0);
59 }
60 
61 /*
62  * __db_cursor --
63  *	Allocate and return a cursor.
64  */
65 static int
__db_cursor(dbp,txn,dbcp,flags)66 __db_cursor(dbp, txn, dbcp, flags)
67 	DB *dbp;
68 	DB_TXN *txn;
69 	DBC **dbcp;
70 	u_int32_t flags;
71 {
72 	DBC *dbc, *adbc;
73 	int ret;
74 	db_lockmode_t mode;
75 	u_int32_t op;
76 
77 	DB_PANIC_CHECK(dbp);
78 
79 	/* Take one from the free list if it's available. */
80 	DB_THREAD_LOCK(dbp);
81 	if ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL)
82 		TAILQ_REMOVE(&dbp->free_queue, dbc, links);
83 	else {
84 		DB_THREAD_UNLOCK(dbp);
85 
86 		if ((ret = __os_calloc(1, sizeof(DBC), &dbc)) != 0)
87 			return (ret);
88 
89 		dbc->dbp = dbp;
90 		dbc->c_close = __db_c_close;
91 
92 		/* Set up locking information. */
93 		if (F_ISSET(dbp, DB_AM_LOCKING | DB_AM_CDB)) {
94  			/*
95  			 * If we are not threaded, then there is no need to
96  			 * create new locker ids.  We know that no one else
97  			 * is running concurrently using this DB, so we can
98  			 * take a peek at any cursors on the active queue.
99  			 */
100  			if (!F_ISSET(dbp, DB_AM_THREAD) &&
101  			    (adbc = TAILQ_FIRST(&dbp->active_queue)) != NULL)
102  				dbc->lid = adbc->lid;
103  			else
104  				if ((ret = lock_id(dbp->dbenv->lk_info,
105  				    &dbc->lid)) != 0)
106  					goto err;
107 
108 			memcpy(dbc->lock.fileid, dbp->fileid, DB_FILE_ID_LEN);
109 			if (F_ISSET(dbp, DB_AM_CDB)) {
110 				dbc->lock_dbt.size = DB_FILE_ID_LEN;
111 				dbc->lock_dbt.data = dbc->lock.fileid;
112 			} else {
113 				dbc->lock_dbt.size = sizeof(dbc->lock);
114 				dbc->lock_dbt.data = &dbc->lock;
115 			}
116 		}
117 
118 		switch (dbp->type) {
119 		case DB_BTREE:
120 		case DB_RECNO:
121 			if ((ret = __bam_c_init(dbc)) != 0)
122 				goto err;
123 			break;
124 		case DB_HASH:
125 			if ((ret = __ham_c_init(dbc)) != 0)
126 				goto err;
127 			break;
128 		default:
129 			ret = EINVAL;
130 			goto err;
131 		}
132 
133 		DB_THREAD_LOCK(dbp);
134 	}
135 
136 	if ((dbc->txn = txn) == NULL)
137 		dbc->locker = dbc->lid;
138 	else
139 		dbc->locker = txn->txnid;
140 
141 	TAILQ_INSERT_TAIL(&dbp->active_queue, dbc, links);
142 	DB_THREAD_UNLOCK(dbp);
143 
144 	/*
145 	 * If this is the concurrent DB product, then we do all locking
146 	 * in the interface, which is right here.
147 	 */
148 	if (F_ISSET(dbp, DB_AM_CDB)) {
149 		op = LF_ISSET(DB_OPFLAGS_MASK);
150 		mode = (op == DB_WRITELOCK) ? DB_LOCK_WRITE :
151 		    (LF_ISSET(DB_RMW) ? DB_LOCK_IWRITE : DB_LOCK_READ);
152 		if ((ret = lock_get(dbp->dbenv->lk_info, dbc->locker, 0,
153 		    &dbc->lock_dbt, mode, &dbc->mylock)) != 0) {
154 			(void)__db_c_close(dbc);
155 			return (EAGAIN);
156 		}
157 		if (LF_ISSET(DB_RMW))
158 			F_SET(dbc, DBC_RMW);
159 		if (op == DB_WRITELOCK)
160 			F_SET(dbc, DBC_WRITER);
161 	}
162 
163 	*dbcp = dbc;
164 	return (0);
165 
166 err:	__os_free(dbc, sizeof(*dbc));
167 	return (ret);
168 }
169 
170 /*
171  * __db_c_close --
172  *	Close the cursor (recycle for later use).
173  */
174 static int
__db_c_close(dbc)175 __db_c_close(dbc)
176 	DBC *dbc;
177 {
178 	DB *dbp;
179 	int ret, t_ret;
180 
181 	dbp = dbc->dbp;
182 
183 	DB_PANIC_CHECK(dbp);
184 
185 	ret = 0;
186 
187 	/*
188 	 * We cannot release the lock until after we've called the
189 	 * access method specific routine, since btrees may have pending
190 	 * deletes.
191 	 */
192 
193 	/* Remove the cursor from the active queue. */
194 	DB_THREAD_LOCK(dbp);
195 	TAILQ_REMOVE(&dbp->active_queue, dbc, links);
196 	DB_THREAD_UNLOCK(dbp);
197 
198 	/* Call the access specific cursor close routine. */
199 	if ((t_ret = dbc->c_am_close(dbc)) != 0 && ret == 0)
200 		t_ret = ret;
201 
202 	/* Release the lock. */
203 	if (F_ISSET(dbc->dbp, DB_AM_CDB) && dbc->mylock != LOCK_INVALID) {
204 		ret = lock_put(dbc->dbp->dbenv->lk_info, dbc->mylock);
205 		dbc->mylock = LOCK_INVALID;
206 	}
207 
208 	/* Clean up the cursor. */
209 	dbc->flags = 0;
210 
211 #ifdef DEBUG
212 	/*
213 	 * Check for leftover locks, unless we're running with transactions.
214 	 *
215 	 * If we're running tests, display any locks currently held.  It's
216 	 * possible that some applications may hold locks for long periods,
217 	 * e.g., conference room locks, but the DB tests should never close
218 	 * holding locks.
219 	 */
220 	if (F_ISSET(dbp, DB_AM_LOCKING) && dbc->lid == dbc->locker) {
221 		DB_LOCKREQ request;
222 
223 		request.op = DB_LOCK_DUMP;
224 		if ((t_ret = lock_vec(dbp->dbenv->lk_info,
225 		    dbc->locker, 0, &request, 1, NULL)) != 0 && ret == 0)
226 			ret = EAGAIN;
227 	}
228 #endif
229 	/* Move the cursor to the free queue. */
230 	DB_THREAD_LOCK(dbp);
231 	TAILQ_INSERT_TAIL(&dbp->free_queue, dbc, links);
232 	DB_THREAD_UNLOCK(dbp);
233 
234 	return (ret);
235 }
236 
237 #ifdef DEBUG
238 /*
239  * __db_cprint --
240  *	Display the current cursor list.
241  *
242  * PUBLIC: int __db_cprint __P((DB *));
243  */
244 int
__db_cprint(dbp)245 __db_cprint(dbp)
246 	DB *dbp;
247 {
248 	static const FN fn[] = {
249 		{ DBC_RECOVER, 	"recover" },
250 		{ DBC_RMW, 	"read-modify-write" },
251 		{ 0 },
252 	};
253 	DBC *dbc;
254 
255 	DB_THREAD_LOCK(dbp);
256 	for (dbc = TAILQ_FIRST(&dbp->active_queue);
257 	    dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
258 		fprintf(stderr,
259 		    "%#0x: dbp: %#0x txn: %#0x lid: %lu locker: %lu",
260 		    (u_int)dbc, (u_int)dbc->dbp, (u_int)dbc->txn,
261 		    (u_long)dbc->lid, (u_long)dbc->locker);
262 		__db_prflags(dbc->flags, fn, stderr);
263 		fprintf(stderr, "\n");
264 	}
265 	DB_THREAD_UNLOCK(dbp);
266 
267 	return (0);
268 }
269 #endif /* DEBUG */
270 
271 /*
272  * __db_c_destroy --
273  *	Destroy the cursor.
274  *
275  * PUBLIC: int __db_c_destroy __P((DBC *));
276  */
277 int
__db_c_destroy(dbc)278 __db_c_destroy(dbc)
279 	DBC *dbc;
280 {
281 	DB *dbp;
282 	int ret;
283 
284 	dbp = dbc->dbp;
285 
286 	/* Remove the cursor from the free queue. */
287 	DB_THREAD_LOCK(dbp);
288 	TAILQ_REMOVE(&dbp->free_queue, dbc, links);
289 	DB_THREAD_UNLOCK(dbp);
290 
291 	/* Call the access specific cursor destroy routine. */
292 	ret = dbc->c_am_destroy == NULL ? 0 : dbc->c_am_destroy(dbc);
293 
294 	/* Free up allocated memory. */
295 	if (dbc->rkey.data != NULL)
296 		__os_free(dbc->rkey.data, dbc->rkey.ulen);
297 	if (dbc->rdata.data != NULL)
298 		__os_free(dbc->rdata.data, dbc->rdata.ulen);
299 	__os_free(dbc, sizeof(*dbc));
300 
301 	return (0);
302 }
303 
304 /*
305  * db_fd --
306  *	Return a file descriptor for flock'ing.
307  */
308 static int
__db_fd(dbp,fdp)309 __db_fd(dbp, fdp)
310         DB *dbp;
311 	int *fdp;
312 {
313 	DB_PANIC_CHECK(dbp);
314 
315 	/*
316 	 * XXX
317 	 * Truly spectacular layering violation.
318 	 */
319 	return (__mp_xxx_fd(dbp->mpf, fdp));
320 }
321 
322 /*
323  * __db_get --
324  *	Return a key/data pair.
325  */
326 static int
__db_get(dbp,txn,key,data,flags)327 __db_get(dbp, txn, key, data, flags)
328 	DB *dbp;
329 	DB_TXN *txn;
330 	DBT *key, *data;
331 	u_int32_t flags;
332 {
333 	DBC *dbc;
334 	int ret, t_ret;
335 
336 	DB_PANIC_CHECK(dbp);
337 
338 	if ((ret = __db_getchk(dbp, key, data, flags)) != 0)
339 		return (ret);
340 
341 	if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0)
342 		return (ret);
343 
344 	DEBUG_LREAD(dbc, txn, "__db_get", key, NULL, flags);
345 
346 	ret = dbc->c_get(dbc, key, data,
347 	    flags == 0 || flags == DB_RMW ? flags | DB_SET : flags);
348 
349 	if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
350 		ret = t_ret;
351 
352 	return (ret);
353 }
354 
355 /*
356  * __db_put --
357  *	Store a key/data pair.
358  */
359 static int
__db_put(dbp,txn,key,data,flags)360 __db_put(dbp, txn, key, data, flags)
361 	DB *dbp;
362 	DB_TXN *txn;
363 	DBT *key, *data;
364 	u_int32_t flags;
365 {
366 	DBC *dbc;
367 	DBT tdata;
368 	int ret, t_ret;
369 
370 	DB_PANIC_CHECK(dbp);
371 
372 	if ((ret = __db_putchk(dbp, key, data,
373 	    flags, F_ISSET(dbp, DB_AM_RDONLY), F_ISSET(dbp, DB_AM_DUP))) != 0)
374 		return (ret);
375 
376 	if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
377 		return (ret);
378 
379 	DEBUG_LWRITE(dbc, txn, "__db_put", key, data, flags);
380 
381 	if (flags == DB_NOOVERWRITE) {
382 		/*
383 		 * Set DB_DBT_USERMEM, this might be a threaded application and
384 		 * the flags checking will catch us.  We don't want the actual
385 		 * data, so request a partial of length 0.
386 		 */
387 		memset(&tdata, 0, sizeof(tdata));
388 		F_SET(&tdata, DB_DBT_USERMEM | DB_DBT_PARTIAL);
389 		if ((ret = dbc->c_get(dbc, key, &tdata, DB_SET | DB_RMW)) == 0)
390 			ret = DB_KEYEXIST;
391 		else if (ret == DB_NOTFOUND)
392 			ret = 0;
393 	}
394 	if (ret == 0)
395 		ret = dbc->c_put(dbc, key, data, DB_KEYLAST);
396 
397 	if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
398 		ret = t_ret;
399 
400 	return (ret);
401 }
402 
403 /*
404  * __db_sync --
405  *	Flush the database cache.
406  *
407  * PUBLIC: int __db_sync __P((DB *, u_int32_t));
408  */
409 int
__db_sync(dbp,flags)410 __db_sync(dbp, flags)
411 	DB *dbp;
412 	u_int32_t flags;
413 {
414 	int ret;
415 
416 	DB_PANIC_CHECK(dbp);
417 
418 	if ((ret = __db_syncchk(dbp, flags)) != 0)
419 		return (ret);
420 
421 	/* If it wasn't possible to modify the file, we're done. */
422 	if (F_ISSET(dbp, DB_AM_INMEM | DB_AM_RDONLY))
423 		return (0);
424 
425 	/* Flush any dirty pages from the cache to the backing file. */
426 	if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
427 		ret = 0;
428 
429 	return (ret);
430 }
431