1 /*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996, 1997, 1998 5 * Sleepycat Software. All rights reserved. 6 */ 7 #include "config.h" 8 9 #ifndef lint 10 static const char sccsid[] = "@(#)log_get.c 10.38 (Sleepycat) 10/3/98"; 11 #endif /* not lint */ 12 13 #ifndef NO_SYSTEM_INCLUDES 14 #include <sys/types.h> 15 16 #include <errno.h> 17 #include <string.h> 18 #include <unistd.h> 19 #endif 20 21 #include "db_int.h" 22 #include "shqueue.h" 23 #include "db_page.h" 24 #include "log.h" 25 #include "hash.h" 26 #include "common_ext.h" 27 28 /* 29 * log_get -- 30 * Get a log record. 31 */ 32 int 33 log_get(dblp, alsn, dbt, flags) 34 DB_LOG *dblp; 35 DB_LSN *alsn; 36 DBT *dbt; 37 u_int32_t flags; 38 { 39 int ret; 40 41 LOG_PANIC_CHECK(dblp); 42 43 /* Validate arguments. */ 44 if (flags != DB_CHECKPOINT && flags != DB_CURRENT && 45 flags != DB_FIRST && flags != DB_LAST && 46 flags != DB_NEXT && flags != DB_PREV && flags != DB_SET) 47 return (__db_ferr(dblp->dbenv, "log_get", 1)); 48 49 if (F_ISSET(dblp, DB_AM_THREAD)) { 50 if (flags == DB_NEXT || flags == DB_PREV || flags == DB_CURRENT) 51 return (__db_ferr(dblp->dbenv, "log_get", 1)); 52 if (!F_ISSET(dbt, DB_DBT_USERMEM | DB_DBT_MALLOC)) 53 return (__db_ferr(dblp->dbenv, "threaded data", 1)); 54 } 55 56 LOCK_LOGREGION(dblp); 57 58 /* 59 * If we get one of the log's header records, repeat the operation. 60 * This assumes that applications don't ever request the log header 61 * records by LSN, but that seems reasonable to me. 62 */ 63 ret = __log_get(dblp, alsn, dbt, flags, 0); 64 if (ret == 0 && alsn->offset == 0) { 65 switch (flags) { 66 case DB_FIRST: 67 flags = DB_NEXT; 68 break; 69 case DB_LAST: 70 flags = DB_PREV; 71 break; 72 } 73 ret = __log_get(dblp, alsn, dbt, flags, 0); 74 } 75 76 UNLOCK_LOGREGION(dblp); 77 78 return (ret); 79 } 80 81 /* 82 * __log_get -- 83 * Get a log record; internal version. 84 * 85 * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int)); 86 */ 87 int 88 __log_get(dblp, alsn, dbt, flags, silent) 89 DB_LOG *dblp; 90 DB_LSN *alsn; 91 DBT *dbt; 92 u_int32_t flags; 93 int silent; 94 { 95 DB_LSN nlsn; 96 HDR hdr; 97 LOG *lp; 98 size_t len; 99 ssize_t nr; 100 int cnt, ret; 101 char *np, *tbuf; 102 const char *fail; 103 void *p, *shortp; 104 105 lp = dblp->lp; 106 fail = np = tbuf = NULL; 107 108 nlsn = dblp->c_lsn; 109 switch (flags) { 110 case DB_CHECKPOINT: 111 nlsn = lp->chkpt_lsn; 112 if (IS_ZERO_LSN(nlsn)) { 113 __db_err(dblp->dbenv, 114 "log_get: unable to find checkpoint record: no checkpoint set."); 115 ret = ENOENT; 116 goto err2; 117 } 118 break; 119 case DB_NEXT: /* Next log record. */ 120 if (!IS_ZERO_LSN(nlsn)) { 121 /* Increment the cursor by the cursor record size. */ 122 nlsn.offset += dblp->c_len; 123 break; 124 } 125 /* FALLTHROUGH */ 126 case DB_FIRST: /* Find the first log record. */ 127 /* Find the first log file. */ 128 if ((ret = __log_find(dblp, 1, &cnt)) != 0) 129 goto err2; 130 131 /* 132 * We may have only entered records in the buffer, and not 133 * yet written a log file. If no log files were found and 134 * there's anything in the buffer, it belongs to file 1. 135 */ 136 if (cnt == 0) 137 cnt = 1; 138 139 nlsn.file = cnt; 140 nlsn.offset = 0; 141 break; 142 case DB_CURRENT: /* Current log record. */ 143 break; 144 case DB_PREV: /* Previous log record. */ 145 if (!IS_ZERO_LSN(nlsn)) { 146 /* If at start-of-file, move to the previous file. */ 147 if (nlsn.offset == 0) { 148 if (nlsn.file == 1 || 149 __log_valid(dblp, nlsn.file - 1, 0) != 0) 150 return (DB_NOTFOUND); 151 152 --nlsn.file; 153 nlsn.offset = dblp->c_off; 154 } else 155 nlsn.offset = dblp->c_off; 156 break; 157 } 158 /* FALLTHROUGH */ 159 case DB_LAST: /* Last log record. */ 160 nlsn.file = lp->lsn.file; 161 nlsn.offset = lp->lsn.offset - lp->len; 162 break; 163 case DB_SET: /* Set log record. */ 164 nlsn = *alsn; 165 break; 166 } 167 168 retry: 169 /* Return 1 if the request is past end-of-file. */ 170 if (nlsn.file > lp->lsn.file || 171 (nlsn.file == lp->lsn.file && nlsn.offset >= lp->lsn.offset)) 172 return (DB_NOTFOUND); 173 174 /* If we've switched files, discard the current fd. */ 175 if (dblp->c_lsn.file != nlsn.file && dblp->c_fd != -1) { 176 (void)__os_close(dblp->c_fd); 177 dblp->c_fd = -1; 178 } 179 180 /* If the entire record is in the in-memory buffer, copy it out. */ 181 if (nlsn.file == lp->lsn.file && nlsn.offset >= lp->w_off) { 182 /* Copy the header. */ 183 p = lp->buf + (nlsn.offset - lp->w_off); 184 memcpy(&hdr, p, sizeof(HDR)); 185 186 /* Copy the record. */ 187 len = hdr.len - sizeof(HDR); 188 if ((ret = __db_retcopy(dbt, (u_int8_t *)p + sizeof(HDR), 189 len, &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0) 190 goto err1; 191 goto cksum; 192 } 193 194 /* Acquire a file descriptor. */ 195 if (dblp->c_fd == -1) { 196 if ((ret = __log_name(dblp, nlsn.file, 197 &np, &dblp->c_fd, DB_RDONLY | DB_SEQUENTIAL)) != 0) { 198 fail = np; 199 goto err1; 200 } 201 __os_freestr(np); 202 np = NULL; 203 } 204 205 /* Seek to the header offset and read the header. */ 206 if ((ret = 207 __os_seek(dblp->c_fd, 0, 0, nlsn.offset, 0, SEEK_SET)) != 0) { 208 fail = "seek"; 209 goto err1; 210 } 211 if ((ret = __os_read(dblp->c_fd, &hdr, sizeof(HDR), &nr)) != 0) { 212 fail = "read"; 213 goto err1; 214 } 215 if (nr == sizeof(HDR)) 216 shortp = NULL; 217 else { 218 /* If read returns EOF, try the next file. */ 219 if (nr == 0) { 220 if (flags != DB_NEXT || nlsn.file == lp->lsn.file) 221 goto corrupt; 222 223 /* Move to the next file. */ 224 ++nlsn.file; 225 nlsn.offset = 0; 226 goto retry; 227 } 228 229 /* 230 * If read returns a short count the rest of the record has 231 * to be in the in-memory buffer. 232 */ 233 if (lp->b_off < sizeof(HDR) - nr) 234 goto corrupt; 235 236 /* Get the rest of the header from the in-memory buffer. */ 237 memcpy((u_int8_t *)&hdr + nr, lp->buf, sizeof(HDR) - nr); 238 shortp = lp->buf + (sizeof(HDR) - nr); 239 } 240 241 /* 242 * Check for buffers of 0's, that's what we usually see during 243 * recovery, although it's certainly not something on which we 244 * can depend. 245 */ 246 if (hdr.len <= sizeof(HDR)) 247 goto corrupt; 248 len = hdr.len - sizeof(HDR); 249 250 /* If we've already moved to the in-memory buffer, fill from there. */ 251 if (shortp != NULL) { 252 if (lp->b_off < ((u_int8_t *)shortp - lp->buf) + len) 253 goto corrupt; 254 if ((ret = __db_retcopy(dbt, shortp, len, 255 &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0) 256 goto err1; 257 goto cksum; 258 } 259 260 /* 261 * Allocate temporary memory to hold the record. 262 * 263 * XXX 264 * We're calling malloc(3) with a region locked. This isn't 265 * a good idea. 266 */ 267 if ((ret = __os_malloc(len, NULL, &tbuf)) != 0) 268 goto err1; 269 270 /* 271 * Read the record into the buffer. If read returns a short count, 272 * there was an error or the rest of the record is in the in-memory 273 * buffer. Note, the information may be garbage if we're in recovery, 274 * so don't read past the end of the buffer's memory. 275 */ 276 if ((ret = __os_read(dblp->c_fd, tbuf, len, &nr)) != 0) { 277 fail = "read"; 278 goto err1; 279 } 280 if (len - nr > sizeof(lp->buf)) 281 goto corrupt; 282 if (nr != (ssize_t)len) { 283 if (lp->b_off < len - nr) 284 goto corrupt; 285 286 /* Get the rest of the record from the in-memory buffer. */ 287 memcpy((u_int8_t *)tbuf + nr, lp->buf, len - nr); 288 } 289 290 /* Copy the record into the user's DBT. */ 291 if ((ret = __db_retcopy(dbt, tbuf, len, 292 &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0) 293 goto err1; 294 __os_free(tbuf, 0); 295 tbuf = NULL; 296 297 cksum: if (hdr.cksum != __ham_func4(dbt->data, dbt->size)) { 298 if (!silent) 299 __db_err(dblp->dbenv, "log_get: checksum mismatch"); 300 goto corrupt; 301 } 302 303 /* Update the cursor and the return lsn. */ 304 dblp->c_off = hdr.prev; 305 dblp->c_len = hdr.len; 306 dblp->c_lsn = *alsn = nlsn; 307 308 return (0); 309 310 corrupt:/* 311 * This is the catchall -- for some reason we didn't find enough 312 * information or it wasn't reasonable information, and it wasn't 313 * because a system call failed. 314 */ 315 ret = EIO; 316 fail = "read"; 317 318 err1: if (!silent) 319 if (fail == NULL) 320 __db_err(dblp->dbenv, "log_get: %s", strerror(ret)); 321 else 322 __db_err(dblp->dbenv, 323 "log_get: %s: %s", fail, strerror(ret)); 324 err2: if (np != NULL) 325 __os_freestr(np); 326 if (tbuf != NULL) 327 __os_free(tbuf, 0); 328 return (ret); 329 } 330