1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997, 1998
5  *	Sleepycat Software.  All rights reserved.
6  */
7 #include "config.h"
8 
9 #ifndef lint
10 static const char sccsid[] = "@(#)log_get.c	10.38 (Sleepycat) 10/3/98";
11 #endif /* not lint */
12 
13 #ifndef NO_SYSTEM_INCLUDES
14 #include <sys/types.h>
15 
16 #include <errno.h>
17 #include <string.h>
18 #include <unistd.h>
19 #endif
20 
21 #include "db_int.h"
22 #include "shqueue.h"
23 #include "db_page.h"
24 #include "log.h"
25 #include "hash.h"
26 #include "common_ext.h"
27 
28 /*
29  * log_get --
30  *	Get a log record.
31  */
32 int
log_get(dblp,alsn,dbt,flags)33 log_get(dblp, alsn, dbt, flags)
34 	DB_LOG *dblp;
35 	DB_LSN *alsn;
36 	DBT *dbt;
37 	u_int32_t flags;
38 {
39 	int ret;
40 
41 	LOG_PANIC_CHECK(dblp);
42 
43 	/* Validate arguments. */
44 	if (flags != DB_CHECKPOINT && flags != DB_CURRENT &&
45 	    flags != DB_FIRST && flags != DB_LAST &&
46 	    flags != DB_NEXT && flags != DB_PREV && flags != DB_SET)
47 		return (__db_ferr(dblp->dbenv, "log_get", 1));
48 
49 	if (F_ISSET(dblp, DB_AM_THREAD)) {
50 		if (flags == DB_NEXT || flags == DB_PREV || flags == DB_CURRENT)
51 			return (__db_ferr(dblp->dbenv, "log_get", 1));
52 		if (!F_ISSET(dbt, DB_DBT_USERMEM | DB_DBT_MALLOC))
53 			return (__db_ferr(dblp->dbenv, "threaded data", 1));
54 	}
55 
56 	LOCK_LOGREGION(dblp);
57 
58 	/*
59 	 * If we get one of the log's header records, repeat the operation.
60 	 * This assumes that applications don't ever request the log header
61 	 * records by LSN, but that seems reasonable to me.
62 	 */
63 	ret = __log_get(dblp, alsn, dbt, flags, 0);
64 	if (ret == 0 && alsn->offset == 0) {
65 		switch (flags) {
66 		case DB_FIRST:
67 			flags = DB_NEXT;
68 			break;
69 		case DB_LAST:
70 			flags = DB_PREV;
71 			break;
72 		}
73 		ret = __log_get(dblp, alsn, dbt, flags, 0);
74 	}
75 
76 	UNLOCK_LOGREGION(dblp);
77 
78 	return (ret);
79 }
80 
81 /*
82  * __log_get --
83  *	Get a log record; internal version.
84  *
85  * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int));
86  */
87 int
__log_get(dblp,alsn,dbt,flags,silent)88 __log_get(dblp, alsn, dbt, flags, silent)
89 	DB_LOG *dblp;
90 	DB_LSN *alsn;
91 	DBT *dbt;
92 	u_int32_t flags;
93 	int silent;
94 {
95 	DB_LSN nlsn;
96 	HDR hdr;
97 	LOG *lp;
98 	size_t len;
99 	ssize_t nr;
100 	int cnt, ret;
101 	char *np, *tbuf;
102 	const char *fail;
103 	void *p, *shortp;
104 
105 	lp = dblp->lp;
106 	fail = np = tbuf = NULL;
107 
108 	nlsn = dblp->c_lsn;
109 	switch (flags) {
110 	case DB_CHECKPOINT:
111 		nlsn = lp->chkpt_lsn;
112 		if (IS_ZERO_LSN(nlsn)) {
113 			__db_err(dblp->dbenv,
114 	"log_get: unable to find checkpoint record: no checkpoint set.");
115 			ret = ENOENT;
116 			goto err2;
117 		}
118 		break;
119 	case DB_NEXT:				/* Next log record. */
120 		if (!IS_ZERO_LSN(nlsn)) {
121 			/* Increment the cursor by the cursor record size. */
122 			nlsn.offset += dblp->c_len;
123 			break;
124 		}
125 		/* FALLTHROUGH */
126 	case DB_FIRST:				/* Find the first log record. */
127 		/* Find the first log file. */
128 		if ((ret = __log_find(dblp, 1, &cnt)) != 0)
129 			goto err2;
130 
131 		/*
132 		 * We may have only entered records in the buffer, and not
133 		 * yet written a log file.  If no log files were found and
134 		 * there's anything in the buffer, it belongs to file 1.
135 		 */
136 		if (cnt == 0)
137 			cnt = 1;
138 
139 		nlsn.file = cnt;
140 		nlsn.offset = 0;
141 		break;
142 	case DB_CURRENT:			/* Current log record. */
143 		break;
144 	case DB_PREV:				/* Previous log record. */
145 		if (!IS_ZERO_LSN(nlsn)) {
146 			/* If at start-of-file, move to the previous file. */
147 			if (nlsn.offset == 0) {
148 				if (nlsn.file == 1 ||
149 				    __log_valid(dblp, nlsn.file - 1, 0) != 0)
150 					return (DB_NOTFOUND);
151 
152 				--nlsn.file;
153 				nlsn.offset = dblp->c_off;
154 			} else
155 				nlsn.offset = dblp->c_off;
156 			break;
157 		}
158 		/* FALLTHROUGH */
159 	case DB_LAST:				/* Last log record. */
160 		nlsn.file = lp->lsn.file;
161 		nlsn.offset = lp->lsn.offset - lp->len;
162 		break;
163 	case DB_SET:				/* Set log record. */
164 		nlsn = *alsn;
165 		break;
166 	}
167 
168 retry:
169 	/* Return 1 if the request is past end-of-file. */
170 	if (nlsn.file > lp->lsn.file ||
171 	    (nlsn.file == lp->lsn.file && nlsn.offset >= lp->lsn.offset))
172 		return (DB_NOTFOUND);
173 
174 	/* If we've switched files, discard the current fd. */
175 	if (dblp->c_lsn.file != nlsn.file && dblp->c_fd != -1) {
176 		(void)__os_close(dblp->c_fd);
177 		dblp->c_fd = -1;
178 	}
179 
180 	/* If the entire record is in the in-memory buffer, copy it out. */
181 	if (nlsn.file == lp->lsn.file && nlsn.offset >= lp->w_off) {
182 		/* Copy the header. */
183 		p = lp->buf + (nlsn.offset - lp->w_off);
184 		memcpy(&hdr, p, sizeof(HDR));
185 
186 		/* Copy the record. */
187 		len = hdr.len - sizeof(HDR);
188 		if ((ret = __db_retcopy(dbt, (u_int8_t *)p + sizeof(HDR),
189 		    len, &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
190 			goto err1;
191 		goto cksum;
192 	}
193 
194 	/* Acquire a file descriptor. */
195 	if (dblp->c_fd == -1) {
196 		if ((ret = __log_name(dblp, nlsn.file,
197 		    &np, &dblp->c_fd, DB_RDONLY | DB_SEQUENTIAL)) != 0) {
198 			fail = np;
199 			goto err1;
200 		}
201 		__os_freestr(np);
202 		np = NULL;
203 	}
204 
205 	/* Seek to the header offset and read the header. */
206 	if ((ret =
207 	    __os_seek(dblp->c_fd, 0, 0, nlsn.offset, 0, SEEK_SET)) != 0) {
208 		fail = "seek";
209 		goto err1;
210 	}
211 	if ((ret = __os_read(dblp->c_fd, &hdr, sizeof(HDR), &nr)) != 0) {
212 		fail = "read";
213 		goto err1;
214 	}
215 	if (nr == sizeof(HDR))
216 		shortp = NULL;
217 	else {
218 		/* If read returns EOF, try the next file. */
219 		if (nr == 0) {
220 			if (flags != DB_NEXT || nlsn.file == lp->lsn.file)
221 				goto corrupt;
222 
223 			/* Move to the next file. */
224 			++nlsn.file;
225 			nlsn.offset = 0;
226 			goto retry;
227 		}
228 
229 		/*
230 		 * If read returns a short count the rest of the record has
231 		 * to be in the in-memory buffer.
232 		 */
233 		if (lp->b_off < sizeof(HDR) - nr)
234 			goto corrupt;
235 
236 		/* Get the rest of the header from the in-memory buffer. */
237 		memcpy((u_int8_t *)&hdr + nr, lp->buf, sizeof(HDR) - nr);
238 		shortp = lp->buf + (sizeof(HDR) - nr);
239 	}
240 
241 	/*
242 	 * Check for buffers of 0's, that's what we usually see during
243 	 * recovery, although it's certainly not something on which we
244 	 * can depend.
245 	 */
246 	if (hdr.len <= sizeof(HDR))
247 		goto corrupt;
248 	len = hdr.len - sizeof(HDR);
249 
250 	/* If we've already moved to the in-memory buffer, fill from there. */
251 	if (shortp != NULL) {
252 		if (lp->b_off < ((u_int8_t *)shortp - lp->buf) + len)
253 			goto corrupt;
254 		if ((ret = __db_retcopy(dbt, shortp, len,
255 		    &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
256 			goto err1;
257 		goto cksum;
258 	}
259 
260 	/*
261 	 * Allocate temporary memory to hold the record.
262 	 *
263 	 * XXX
264 	 * We're calling malloc(3) with a region locked.  This isn't
265 	 * a good idea.
266 	 */
267 	if ((ret = __os_malloc(len, NULL, &tbuf)) != 0)
268 		goto err1;
269 
270 	/*
271 	 * Read the record into the buffer.  If read returns a short count,
272 	 * there was an error or the rest of the record is in the in-memory
273 	 * buffer.  Note, the information may be garbage if we're in recovery,
274 	 * so don't read past the end of the buffer's memory.
275 	 */
276 	if ((ret = __os_read(dblp->c_fd, tbuf, len, &nr)) != 0) {
277 		fail = "read";
278 		goto err1;
279 	}
280 	if (len - nr > sizeof(lp->buf))
281 		goto corrupt;
282 	if (nr != (ssize_t)len) {
283 		if (lp->b_off < len - nr)
284 			goto corrupt;
285 
286 		/* Get the rest of the record from the in-memory buffer. */
287 		memcpy((u_int8_t *)tbuf + nr, lp->buf, len - nr);
288 	}
289 
290 	/* Copy the record into the user's DBT. */
291 	if ((ret = __db_retcopy(dbt, tbuf, len,
292 	    &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
293 		goto err1;
294 	__os_free(tbuf, 0);
295 	tbuf = NULL;
296 
297 cksum:	if (hdr.cksum != __ham_func4(dbt->data, dbt->size)) {
298 		if (!silent)
299 			__db_err(dblp->dbenv, "log_get: checksum mismatch");
300 		goto corrupt;
301 	}
302 
303 	/* Update the cursor and the return lsn. */
304 	dblp->c_off = hdr.prev;
305 	dblp->c_len = hdr.len;
306 	dblp->c_lsn = *alsn = nlsn;
307 
308 	return (0);
309 
310 corrupt:/*
311 	 * This is the catchall -- for some reason we didn't find enough
312 	 * information or it wasn't reasonable information, and it wasn't
313 	 * because a system call failed.
314 	 */
315 	ret = EIO;
316 	fail = "read";
317 
318 err1:	if (!silent)
319 		if (fail == NULL)
320 			__db_err(dblp->dbenv, "log_get: %s", strerror(ret));
321 		else
322 			__db_err(dblp->dbenv,
323 			    "log_get: %s: %s", fail, strerror(ret));
324 err2:	if (np != NULL)
325 		__os_freestr(np);
326 	if (tbuf != NULL)
327 		__os_free(tbuf, 0);
328 	return (ret);
329 }
330