1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
7 #include "config.h"
8
9 #ifndef lint
10 static const char sccsid[] = "@(#)log_get.c 10.38 (Sleepycat) 10/3/98";
11 #endif /* not lint */
12
13 #ifndef NO_SYSTEM_INCLUDES
14 #include <sys/types.h>
15
16 #include <errno.h>
17 #include <string.h>
18 #include <unistd.h>
19 #endif
20
21 #include "db_int.h"
22 #include "shqueue.h"
23 #include "db_page.h"
24 #include "log.h"
25 #include "hash.h"
26 #include "common_ext.h"
27
28 /*
29 * log_get --
30 * Get a log record.
31 */
32 int
log_get(dblp,alsn,dbt,flags)33 log_get(dblp, alsn, dbt, flags)
34 DB_LOG *dblp;
35 DB_LSN *alsn;
36 DBT *dbt;
37 u_int32_t flags;
38 {
39 int ret;
40
41 LOG_PANIC_CHECK(dblp);
42
43 /* Validate arguments. */
44 if (flags != DB_CHECKPOINT && flags != DB_CURRENT &&
45 flags != DB_FIRST && flags != DB_LAST &&
46 flags != DB_NEXT && flags != DB_PREV && flags != DB_SET)
47 return (__db_ferr(dblp->dbenv, "log_get", 1));
48
49 if (F_ISSET(dblp, DB_AM_THREAD)) {
50 if (flags == DB_NEXT || flags == DB_PREV || flags == DB_CURRENT)
51 return (__db_ferr(dblp->dbenv, "log_get", 1));
52 if (!F_ISSET(dbt, DB_DBT_USERMEM | DB_DBT_MALLOC))
53 return (__db_ferr(dblp->dbenv, "threaded data", 1));
54 }
55
56 LOCK_LOGREGION(dblp);
57
58 /*
59 * If we get one of the log's header records, repeat the operation.
60 * This assumes that applications don't ever request the log header
61 * records by LSN, but that seems reasonable to me.
62 */
63 ret = __log_get(dblp, alsn, dbt, flags, 0);
64 if (ret == 0 && alsn->offset == 0) {
65 switch (flags) {
66 case DB_FIRST:
67 flags = DB_NEXT;
68 break;
69 case DB_LAST:
70 flags = DB_PREV;
71 break;
72 }
73 ret = __log_get(dblp, alsn, dbt, flags, 0);
74 }
75
76 UNLOCK_LOGREGION(dblp);
77
78 return (ret);
79 }
80
81 /*
82 * __log_get --
83 * Get a log record; internal version.
84 *
85 * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int));
86 */
87 int
__log_get(dblp,alsn,dbt,flags,silent)88 __log_get(dblp, alsn, dbt, flags, silent)
89 DB_LOG *dblp;
90 DB_LSN *alsn;
91 DBT *dbt;
92 u_int32_t flags;
93 int silent;
94 {
95 DB_LSN nlsn;
96 HDR hdr;
97 LOG *lp;
98 size_t len;
99 ssize_t nr;
100 int cnt, ret;
101 char *np, *tbuf;
102 const char *fail;
103 void *p, *shortp;
104
105 lp = dblp->lp;
106 fail = np = tbuf = NULL;
107
108 nlsn = dblp->c_lsn;
109 switch (flags) {
110 case DB_CHECKPOINT:
111 nlsn = lp->chkpt_lsn;
112 if (IS_ZERO_LSN(nlsn)) {
113 __db_err(dblp->dbenv,
114 "log_get: unable to find checkpoint record: no checkpoint set.");
115 ret = ENOENT;
116 goto err2;
117 }
118 break;
119 case DB_NEXT: /* Next log record. */
120 if (!IS_ZERO_LSN(nlsn)) {
121 /* Increment the cursor by the cursor record size. */
122 nlsn.offset += dblp->c_len;
123 break;
124 }
125 /* FALLTHROUGH */
126 case DB_FIRST: /* Find the first log record. */
127 /* Find the first log file. */
128 if ((ret = __log_find(dblp, 1, &cnt)) != 0)
129 goto err2;
130
131 /*
132 * We may have only entered records in the buffer, and not
133 * yet written a log file. If no log files were found and
134 * there's anything in the buffer, it belongs to file 1.
135 */
136 if (cnt == 0)
137 cnt = 1;
138
139 nlsn.file = cnt;
140 nlsn.offset = 0;
141 break;
142 case DB_CURRENT: /* Current log record. */
143 break;
144 case DB_PREV: /* Previous log record. */
145 if (!IS_ZERO_LSN(nlsn)) {
146 /* If at start-of-file, move to the previous file. */
147 if (nlsn.offset == 0) {
148 if (nlsn.file == 1 ||
149 __log_valid(dblp, nlsn.file - 1, 0) != 0)
150 return (DB_NOTFOUND);
151
152 --nlsn.file;
153 nlsn.offset = dblp->c_off;
154 } else
155 nlsn.offset = dblp->c_off;
156 break;
157 }
158 /* FALLTHROUGH */
159 case DB_LAST: /* Last log record. */
160 nlsn.file = lp->lsn.file;
161 nlsn.offset = lp->lsn.offset - lp->len;
162 break;
163 case DB_SET: /* Set log record. */
164 nlsn = *alsn;
165 break;
166 }
167
168 retry:
169 /* Return 1 if the request is past end-of-file. */
170 if (nlsn.file > lp->lsn.file ||
171 (nlsn.file == lp->lsn.file && nlsn.offset >= lp->lsn.offset))
172 return (DB_NOTFOUND);
173
174 /* If we've switched files, discard the current fd. */
175 if (dblp->c_lsn.file != nlsn.file && dblp->c_fd != -1) {
176 (void)__os_close(dblp->c_fd);
177 dblp->c_fd = -1;
178 }
179
180 /* If the entire record is in the in-memory buffer, copy it out. */
181 if (nlsn.file == lp->lsn.file && nlsn.offset >= lp->w_off) {
182 /* Copy the header. */
183 p = lp->buf + (nlsn.offset - lp->w_off);
184 memcpy(&hdr, p, sizeof(HDR));
185
186 /* Copy the record. */
187 len = hdr.len - sizeof(HDR);
188 if ((ret = __db_retcopy(dbt, (u_int8_t *)p + sizeof(HDR),
189 len, &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
190 goto err1;
191 goto cksum;
192 }
193
194 /* Acquire a file descriptor. */
195 if (dblp->c_fd == -1) {
196 if ((ret = __log_name(dblp, nlsn.file,
197 &np, &dblp->c_fd, DB_RDONLY | DB_SEQUENTIAL)) != 0) {
198 fail = np;
199 goto err1;
200 }
201 __os_freestr(np);
202 np = NULL;
203 }
204
205 /* Seek to the header offset and read the header. */
206 if ((ret =
207 __os_seek(dblp->c_fd, 0, 0, nlsn.offset, 0, SEEK_SET)) != 0) {
208 fail = "seek";
209 goto err1;
210 }
211 if ((ret = __os_read(dblp->c_fd, &hdr, sizeof(HDR), &nr)) != 0) {
212 fail = "read";
213 goto err1;
214 }
215 if (nr == sizeof(HDR))
216 shortp = NULL;
217 else {
218 /* If read returns EOF, try the next file. */
219 if (nr == 0) {
220 if (flags != DB_NEXT || nlsn.file == lp->lsn.file)
221 goto corrupt;
222
223 /* Move to the next file. */
224 ++nlsn.file;
225 nlsn.offset = 0;
226 goto retry;
227 }
228
229 /*
230 * If read returns a short count the rest of the record has
231 * to be in the in-memory buffer.
232 */
233 if (lp->b_off < sizeof(HDR) - nr)
234 goto corrupt;
235
236 /* Get the rest of the header from the in-memory buffer. */
237 memcpy((u_int8_t *)&hdr + nr, lp->buf, sizeof(HDR) - nr);
238 shortp = lp->buf + (sizeof(HDR) - nr);
239 }
240
241 /*
242 * Check for buffers of 0's, that's what we usually see during
243 * recovery, although it's certainly not something on which we
244 * can depend.
245 */
246 if (hdr.len <= sizeof(HDR))
247 goto corrupt;
248 len = hdr.len - sizeof(HDR);
249
250 /* If we've already moved to the in-memory buffer, fill from there. */
251 if (shortp != NULL) {
252 if (lp->b_off < ((u_int8_t *)shortp - lp->buf) + len)
253 goto corrupt;
254 if ((ret = __db_retcopy(dbt, shortp, len,
255 &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
256 goto err1;
257 goto cksum;
258 }
259
260 /*
261 * Allocate temporary memory to hold the record.
262 *
263 * XXX
264 * We're calling malloc(3) with a region locked. This isn't
265 * a good idea.
266 */
267 if ((ret = __os_malloc(len, NULL, &tbuf)) != 0)
268 goto err1;
269
270 /*
271 * Read the record into the buffer. If read returns a short count,
272 * there was an error or the rest of the record is in the in-memory
273 * buffer. Note, the information may be garbage if we're in recovery,
274 * so don't read past the end of the buffer's memory.
275 */
276 if ((ret = __os_read(dblp->c_fd, tbuf, len, &nr)) != 0) {
277 fail = "read";
278 goto err1;
279 }
280 if (len - nr > sizeof(lp->buf))
281 goto corrupt;
282 if (nr != (ssize_t)len) {
283 if (lp->b_off < len - nr)
284 goto corrupt;
285
286 /* Get the rest of the record from the in-memory buffer. */
287 memcpy((u_int8_t *)tbuf + nr, lp->buf, len - nr);
288 }
289
290 /* Copy the record into the user's DBT. */
291 if ((ret = __db_retcopy(dbt, tbuf, len,
292 &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
293 goto err1;
294 __os_free(tbuf, 0);
295 tbuf = NULL;
296
297 cksum: if (hdr.cksum != __ham_func4(dbt->data, dbt->size)) {
298 if (!silent)
299 __db_err(dblp->dbenv, "log_get: checksum mismatch");
300 goto corrupt;
301 }
302
303 /* Update the cursor and the return lsn. */
304 dblp->c_off = hdr.prev;
305 dblp->c_len = hdr.len;
306 dblp->c_lsn = *alsn = nlsn;
307
308 return (0);
309
310 corrupt:/*
311 * This is the catchall -- for some reason we didn't find enough
312 * information or it wasn't reasonable information, and it wasn't
313 * because a system call failed.
314 */
315 ret = EIO;
316 fail = "read";
317
318 err1: if (!silent)
319 if (fail == NULL)
320 __db_err(dblp->dbenv, "log_get: %s", strerror(ret));
321 else
322 __db_err(dblp->dbenv,
323 "log_get: %s: %s", fail, strerror(ret));
324 err2: if (np != NULL)
325 __os_freestr(np);
326 if (tbuf != NULL)
327 __os_free(tbuf, 0);
328 return (ret);
329 }
330