1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1998
5 * Sleepycat Software. All rights reserved.
6 */
7
8 #pragma ident "%Z%%M% %I% %E% SMI"
9
10 #include "config.h"
11
12 #ifndef lint
13 static const char sccsid[] = "@(#)db_am.c 10.15 (Sleepycat) 12/30/98";
14 #endif /* not lint */
15
16 #ifndef NO_SYSTEM_INCLUDES
17 #include <sys/types.h>
18
19 #include <errno.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #endif
23
24 #include "db_int.h"
25 #include "shqueue.h"
26 #include "db_page.h"
27 #include "db_shash.h"
28 #include "mp.h"
29 #include "btree.h"
30 #include "hash.h"
31 #include "db_am.h"
32 #include "db_ext.h"
33
34 static int __db_c_close __P((DBC *));
35 static int __db_cursor __P((DB *, DB_TXN *, DBC **, u_int32_t));
36 static int __db_fd __P((DB *, int *));
37 static int __db_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
38 static int __db_put __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t));
39
40 /*
41 * __db_init_wrapper --
42 * Wrapper layer to implement generic DB functions.
43 *
44 * PUBLIC: int __db_init_wrapper __P((DB *));
45 */
46 int
__db_init_wrapper(dbp)47 __db_init_wrapper(dbp)
48 DB *dbp;
49 {
50 dbp->close = __db_close;
51 dbp->cursor = __db_cursor;
52 dbp->del = NULL; /* !!! Must be set by access method. */
53 dbp->fd = __db_fd;
54 dbp->get = __db_get;
55 dbp->join = __db_join;
56 dbp->put = __db_put;
57 dbp->stat = NULL; /* !!! Must be set by access method. */
58 dbp->sync = __db_sync;
59
60 return (0);
61 }
62
63 /*
64 * __db_cursor --
65 * Allocate and return a cursor.
66 */
67 static int
__db_cursor(dbp,txn,dbcp,flags)68 __db_cursor(dbp, txn, dbcp, flags)
69 DB *dbp;
70 DB_TXN *txn;
71 DBC **dbcp;
72 u_int32_t flags;
73 {
74 DBC *dbc, *adbc;
75 int ret;
76 db_lockmode_t mode;
77 u_int32_t op;
78
79 DB_PANIC_CHECK(dbp);
80
81 /* Take one from the free list if it's available. */
82 DB_THREAD_LOCK(dbp);
83 if ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL)
84 TAILQ_REMOVE(&dbp->free_queue, dbc, links);
85 else {
86 DB_THREAD_UNLOCK(dbp);
87
88 if ((ret = __os_calloc(1, sizeof(DBC), &dbc)) != 0)
89 return (ret);
90
91 dbc->dbp = dbp;
92 dbc->c_close = __db_c_close;
93
94 /* Set up locking information. */
95 if (F_ISSET(dbp, DB_AM_LOCKING | DB_AM_CDB)) {
96 /*
97 * If we are not threaded, then there is no need to
98 * create new locker ids. We know that no one else
99 * is running concurrently using this DB, so we can
100 * take a peek at any cursors on the active queue.
101 */
102 if (!F_ISSET(dbp, DB_AM_THREAD) &&
103 (adbc = TAILQ_FIRST(&dbp->active_queue)) != NULL)
104 dbc->lid = adbc->lid;
105 else
106 if ((ret = lock_id(dbp->dbenv->lk_info,
107 &dbc->lid)) != 0)
108 goto err;
109
110 memcpy(dbc->lock.fileid, dbp->fileid, DB_FILE_ID_LEN);
111 if (F_ISSET(dbp, DB_AM_CDB)) {
112 dbc->lock_dbt.size = DB_FILE_ID_LEN;
113 dbc->lock_dbt.data = dbc->lock.fileid;
114 } else {
115 dbc->lock_dbt.size = sizeof(dbc->lock);
116 dbc->lock_dbt.data = &dbc->lock;
117 }
118 }
119
120 switch (dbp->type) {
121 case DB_BTREE:
122 case DB_RECNO:
123 if ((ret = __bam_c_init(dbc)) != 0)
124 goto err;
125 break;
126 case DB_HASH:
127 if ((ret = __ham_c_init(dbc)) != 0)
128 goto err;
129 break;
130 default:
131 ret = EINVAL;
132 goto err;
133 }
134
135 DB_THREAD_LOCK(dbp);
136 }
137
138 if ((dbc->txn = txn) == NULL)
139 dbc->locker = dbc->lid;
140 else
141 dbc->locker = txn->txnid;
142
143 TAILQ_INSERT_TAIL(&dbp->active_queue, dbc, links);
144 DB_THREAD_UNLOCK(dbp);
145
146 /*
147 * If this is the concurrent DB product, then we do all locking
148 * in the interface, which is right here.
149 */
150 if (F_ISSET(dbp, DB_AM_CDB)) {
151 op = LF_ISSET(DB_OPFLAGS_MASK);
152 mode = (op == DB_WRITELOCK) ? DB_LOCK_WRITE :
153 (LF_ISSET(DB_RMW) ? DB_LOCK_IWRITE : DB_LOCK_READ);
154 if ((ret = lock_get(dbp->dbenv->lk_info, dbc->locker, 0,
155 &dbc->lock_dbt, mode, &dbc->mylock)) != 0) {
156 (void)__db_c_close(dbc);
157 return (EAGAIN);
158 }
159 if (LF_ISSET(DB_RMW))
160 F_SET(dbc, DBC_RMW);
161 if (op == DB_WRITELOCK)
162 F_SET(dbc, DBC_WRITER);
163 }
164
165 *dbcp = dbc;
166 return (0);
167
168 err: __os_free(dbc, sizeof(*dbc));
169 return (ret);
170 }
171
172 /*
173 * __db_c_close --
174 * Close the cursor (recycle for later use).
175 */
176 static int
__db_c_close(dbc)177 __db_c_close(dbc)
178 DBC *dbc;
179 {
180 DB *dbp;
181 int ret, t_ret;
182
183 dbp = dbc->dbp;
184
185 DB_PANIC_CHECK(dbp);
186
187 ret = 0;
188
189 /*
190 * We cannot release the lock until after we've called the
191 * access method specific routine, since btrees may have pending
192 * deletes.
193 */
194
195 /* Remove the cursor from the active queue. */
196 DB_THREAD_LOCK(dbp);
197 TAILQ_REMOVE(&dbp->active_queue, dbc, links);
198 DB_THREAD_UNLOCK(dbp);
199
200 /* Call the access specific cursor close routine. */
201 if ((t_ret = dbc->c_am_close(dbc)) != 0 && ret == 0)
202 t_ret = ret;
203
204 /* Release the lock. */
205 if (F_ISSET(dbc->dbp, DB_AM_CDB) && dbc->mylock != LOCK_INVALID) {
206 ret = lock_put(dbc->dbp->dbenv->lk_info, dbc->mylock);
207 dbc->mylock = LOCK_INVALID;
208 }
209
210 /* Clean up the cursor. */
211 dbc->flags = 0;
212
213 #ifdef DEBUG
214 /*
215 * Check for leftover locks, unless we're running with transactions.
216 *
217 * If we're running tests, display any locks currently held. It's
218 * possible that some applications may hold locks for long periods,
219 * e.g., conference room locks, but the DB tests should never close
220 * holding locks.
221 */
222 if (F_ISSET(dbp, DB_AM_LOCKING) && dbc->lid == dbc->locker) {
223 DB_LOCKREQ request;
224
225 request.op = DB_LOCK_DUMP;
226 if ((t_ret = lock_vec(dbp->dbenv->lk_info,
227 dbc->locker, 0, &request, 1, NULL)) != 0 && ret == 0)
228 ret = EAGAIN;
229 }
230 #endif
231 /* Move the cursor to the free queue. */
232 DB_THREAD_LOCK(dbp);
233 TAILQ_INSERT_TAIL(&dbp->free_queue, dbc, links);
234 DB_THREAD_UNLOCK(dbp);
235
236 return (ret);
237 }
238
239 #ifdef DEBUG
240 /*
241 * __db_cprint --
242 * Display the current cursor list.
243 *
244 * PUBLIC: int __db_cprint __P((DB *));
245 */
246 int
__db_cprint(dbp)247 __db_cprint(dbp)
248 DB *dbp;
249 {
250 static const FN fn[] = {
251 { DBC_RECOVER, "recover" },
252 { DBC_RMW, "read-modify-write" },
253 { 0 },
254 };
255 DBC *dbc;
256
257 DB_THREAD_LOCK(dbp);
258 for (dbc = TAILQ_FIRST(&dbp->active_queue);
259 dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
260 fprintf(stderr,
261 "%#0x: dbp: %#0x txn: %#0x lid: %lu locker: %lu",
262 (u_int)dbc, (u_int)dbc->dbp, (u_int)dbc->txn,
263 (u_long)dbc->lid, (u_long)dbc->locker);
264 __db_prflags(dbc->flags, fn, stderr);
265 fprintf(stderr, "\n");
266 }
267 DB_THREAD_UNLOCK(dbp);
268
269 return (0);
270 }
271 #endif /* DEBUG */
272
273 /*
274 * __db_c_destroy --
275 * Destroy the cursor.
276 *
277 * PUBLIC: int __db_c_destroy __P((DBC *));
278 */
279 int
__db_c_destroy(dbc)280 __db_c_destroy(dbc)
281 DBC *dbc;
282 {
283 DB *dbp;
284 int ret;
285
286 dbp = dbc->dbp;
287
288 /* Remove the cursor from the free queue. */
289 DB_THREAD_LOCK(dbp);
290 TAILQ_REMOVE(&dbp->free_queue, dbc, links);
291 DB_THREAD_UNLOCK(dbp);
292
293 /* Call the access specific cursor destroy routine. */
294 ret = dbc->c_am_destroy == NULL ? 0 : dbc->c_am_destroy(dbc);
295
296 /* Free up allocated memory. */
297 if (dbc->rkey.data != NULL)
298 __os_free(dbc->rkey.data, dbc->rkey.ulen);
299 if (dbc->rdata.data != NULL)
300 __os_free(dbc->rdata.data, dbc->rdata.ulen);
301 __os_free(dbc, sizeof(*dbc));
302
303 return (0);
304 }
305
306 /*
307 * db_fd --
308 * Return a file descriptor for flock'ing.
309 */
310 static int
__db_fd(dbp,fdp)311 __db_fd(dbp, fdp)
312 DB *dbp;
313 int *fdp;
314 {
315 DB_PANIC_CHECK(dbp);
316
317 /*
318 * XXX
319 * Truly spectacular layering violation.
320 */
321 return (__mp_xxx_fd(dbp->mpf, fdp));
322 }
323
324 /*
325 * __db_get --
326 * Return a key/data pair.
327 */
328 static int
__db_get(dbp,txn,key,data,flags)329 __db_get(dbp, txn, key, data, flags)
330 DB *dbp;
331 DB_TXN *txn;
332 DBT *key, *data;
333 u_int32_t flags;
334 {
335 DBC *dbc;
336 int ret, t_ret;
337
338 DB_PANIC_CHECK(dbp);
339
340 if ((ret = __db_getchk(dbp, key, data, flags)) != 0)
341 return (ret);
342
343 if ((ret = dbp->cursor(dbp, txn, &dbc, 0)) != 0)
344 return (ret);
345
346 DEBUG_LREAD(dbc, txn, "__db_get", key, NULL, flags);
347
348 ret = dbc->c_get(dbc, key, data,
349 flags == 0 || flags == DB_RMW ? flags | DB_SET : flags);
350
351 if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
352 ret = t_ret;
353
354 return (ret);
355 }
356
357 /*
358 * __db_put --
359 * Store a key/data pair.
360 */
361 static int
__db_put(dbp,txn,key,data,flags)362 __db_put(dbp, txn, key, data, flags)
363 DB *dbp;
364 DB_TXN *txn;
365 DBT *key, *data;
366 u_int32_t flags;
367 {
368 DBC *dbc;
369 DBT tdata;
370 int ret, t_ret;
371
372 DB_PANIC_CHECK(dbp);
373
374 if ((ret = __db_putchk(dbp, key, data,
375 flags, F_ISSET(dbp, DB_AM_RDONLY), F_ISSET(dbp, DB_AM_DUP))) != 0)
376 return (ret);
377
378 if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
379 return (ret);
380
381 DEBUG_LWRITE(dbc, txn, "__db_put", key, data, flags);
382
383 if (flags == DB_NOOVERWRITE) {
384 /*
385 * Set DB_DBT_USERMEM, this might be a threaded application and
386 * the flags checking will catch us. We don't want the actual
387 * data, so request a partial of length 0.
388 */
389 memset(&tdata, 0, sizeof(tdata));
390 F_SET(&tdata, DB_DBT_USERMEM | DB_DBT_PARTIAL);
391 if ((ret = dbc->c_get(dbc, key, &tdata, DB_SET | DB_RMW)) == 0)
392 ret = DB_KEYEXIST;
393 else if (ret == DB_NOTFOUND)
394 ret = 0;
395 }
396 if (ret == 0)
397 ret = dbc->c_put(dbc, key, data, DB_KEYLAST);
398
399 if ((t_ret = __db_c_close(dbc)) != 0 && ret == 0)
400 ret = t_ret;
401
402 return (ret);
403 }
404
405 /*
406 * __db_sync --
407 * Flush the database cache.
408 *
409 * PUBLIC: int __db_sync __P((DB *, u_int32_t));
410 */
411 int
__db_sync(dbp,flags)412 __db_sync(dbp, flags)
413 DB *dbp;
414 u_int32_t flags;
415 {
416 int ret;
417
418 DB_PANIC_CHECK(dbp);
419
420 if ((ret = __db_syncchk(dbp, flags)) != 0)
421 return (ret);
422
423 /* If it wasn't possible to modify the file, we're done. */
424 if (F_ISSET(dbp, DB_AM_INMEM | DB_AM_RDONLY))
425 return (0);
426
427 /* Flush any dirty pages from the cache to the backing file. */
428 if ((ret = memp_fsync(dbp->mpf)) == DB_INCOMPLETE)
429 ret = 0;
430
431 return (ret);
432 }
433