1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
7 #include "config.h"
8
9 #ifndef lint
10 static const char sccsid[] = "@(#)log.c 10.63 (Sleepycat) 10/10/98";
11 #endif /* not lint */
12
13 #ifndef NO_SYSTEM_INCLUDES
14 #include <sys/types.h>
15
16 #include <errno.h>
17 #include <shqueue.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <unistd.h>
21 #endif
22
23 #include "db_int.h"
24 #include "shqueue.h"
25 #include "log.h"
26 #include "db_dispatch.h"
27 #include "txn.h"
28 #include "txn_auto.h"
29 #include "common_ext.h"
30
31 static int __log_recover __P((DB_LOG *));
32
33 /*
34 * log_open --
35 * Initialize and/or join a log.
36 */
37 int
log_open(path,flags,mode,dbenv,lpp)38 log_open(path, flags, mode, dbenv, lpp)
39 const char *path;
40 u_int32_t flags;
41 int mode;
42 DB_ENV *dbenv;
43 DB_LOG **lpp;
44 {
45 DB_LOG *dblp;
46 LOG *lp;
47 int ret;
48
49 /* Validate arguments. */
50 #ifdef HAVE_SPINLOCKS
51 #define OKFLAGS (DB_CREATE | DB_THREAD)
52 #else
53 #define OKFLAGS (DB_CREATE)
54 #endif
55 if ((ret = __db_fchk(dbenv, "log_open", flags, OKFLAGS)) != 0)
56 return (ret);
57
58 /* Create and initialize the DB_LOG structure. */
59 if ((ret = __os_calloc(1, sizeof(DB_LOG), &dblp)) != 0)
60 return (ret);
61
62 if (path != NULL && (ret = __os_strdup(path, &dblp->dir)) != 0)
63 goto err;
64
65 dblp->dbenv = dbenv;
66 dblp->lfd = -1;
67 ZERO_LSN(dblp->c_lsn);
68 dblp->c_fd = -1;
69
70 /*
71 * The log region isn't fixed size because we store the registered
72 * file names there. Make it fairly large so that we don't have to
73 * grow it.
74 */
75 #define DEF_LOG_SIZE (30 * 1024)
76
77 /* Map in the region. */
78 dblp->reginfo.dbenv = dbenv;
79 dblp->reginfo.appname = DB_APP_LOG;
80 if (path == NULL)
81 dblp->reginfo.path = NULL;
82 else
83 if ((ret = __os_strdup(path, &dblp->reginfo.path)) != 0)
84 goto err;
85 dblp->reginfo.file = DB_DEFAULT_LOG_FILE;
86 dblp->reginfo.mode = mode;
87 dblp->reginfo.size = DEF_LOG_SIZE;
88 dblp->reginfo.dbflags = flags;
89 dblp->reginfo.flags = REGION_SIZEDEF;
90 if ((ret = __db_rattach(&dblp->reginfo)) != 0)
91 goto err;
92
93 /*
94 * The LOG structure is first in the region, the rest of the region
95 * is free space.
96 */
97 dblp->lp = dblp->reginfo.addr;
98 dblp->addr = (u_int8_t *)dblp->lp + sizeof(LOG);
99
100 /* Initialize a created region. */
101 if (F_ISSET(&dblp->reginfo, REGION_CREATED)) {
102 __db_shalloc_init(dblp->addr, DEF_LOG_SIZE - sizeof(LOG));
103
104 /* Initialize the LOG structure. */
105 lp = dblp->lp;
106 lp->persist.lg_max = dbenv == NULL ? 0 : dbenv->lg_max;
107 if (lp->persist.lg_max == 0)
108 lp->persist.lg_max = DEFAULT_MAX;
109 lp->persist.magic = DB_LOGMAGIC;
110 lp->persist.version = DB_LOGVERSION;
111 lp->persist.mode = mode;
112 SH_TAILQ_INIT(&lp->fq);
113
114 /* Initialize LOG LSNs. */
115 lp->lsn.file = 1;
116 lp->lsn.offset = 0;
117 }
118
119 /* Initialize thread information, mutex. */
120 if (LF_ISSET(DB_THREAD)) {
121 F_SET(dblp, DB_AM_THREAD);
122 if ((ret = __db_shalloc(dblp->addr,
123 sizeof(db_mutex_t), MUTEX_ALIGNMENT, &dblp->mutexp)) != 0)
124 goto err;
125 (void)__db_mutex_init(dblp->mutexp, 0);
126 }
127
128 /*
129 * If doing recovery, try and recover any previous log files before
130 * releasing the lock.
131 */
132 if (F_ISSET(&dblp->reginfo, REGION_CREATED) &&
133 (ret = __log_recover(dblp)) != 0)
134 goto err;
135
136 UNLOCK_LOGREGION(dblp);
137 *lpp = dblp;
138 return (0);
139
140 err: if (dblp->reginfo.addr != NULL) {
141 if (dblp->mutexp != NULL)
142 __db_shalloc_free(dblp->addr, dblp->mutexp);
143
144 UNLOCK_LOGREGION(dblp);
145 (void)__db_rdetach(&dblp->reginfo);
146 if (F_ISSET(&dblp->reginfo, REGION_CREATED))
147 (void)log_unlink(path, 1, dbenv);
148 }
149
150 if (dblp->reginfo.path != NULL)
151 __os_freestr(dblp->reginfo.path);
152 if (dblp->dir != NULL)
153 __os_freestr(dblp->dir);
154 __os_free(dblp, sizeof(*dblp));
155 return (ret);
156 }
157
158 /*
159 * __log_panic --
160 * Panic a log.
161 *
162 * PUBLIC: void __log_panic __P((DB_ENV *));
163 */
164 void
__log_panic(dbenv)165 __log_panic(dbenv)
166 DB_ENV *dbenv;
167 {
168 if (dbenv->lg_info != NULL)
169 dbenv->lg_info->lp->rlayout.panic = 1;
170 }
171
172 /*
173 * __log_recover --
174 * Recover a log.
175 */
176 static int
__log_recover(dblp)177 __log_recover(dblp)
178 DB_LOG *dblp;
179 {
180 DBT dbt;
181 DB_LSN lsn;
182 LOG *lp;
183 u_int32_t chk;
184 int cnt, found_checkpoint, ret;
185
186 lp = dblp->lp;
187
188 /*
189 * Find a log file. If none exist, we simply return, leaving
190 * everything initialized to a new log.
191 */
192 if ((ret = __log_find(dblp, 0, &cnt)) != 0)
193 return (ret);
194 if (cnt == 0)
195 return (0);
196
197 /*
198 * We have the last useful log file and we've loaded any persistent
199 * information. Pretend that the log is larger than it can possibly
200 * be, and read the last file, looking for the last checkpoint and
201 * the log's end.
202 */
203 lp->lsn.file = cnt + 1;
204 lp->lsn.offset = 0;
205 lsn.file = cnt;
206 lsn.offset = 0;
207
208 /* Set the cursor. Shouldn't fail, leave error messages on. */
209 memset(&dbt, 0, sizeof(dbt));
210 if ((ret = __log_get(dblp, &lsn, &dbt, DB_SET, 0)) != 0)
211 return (ret);
212
213 /*
214 * Read to the end of the file, saving checkpoints. This will fail
215 * at some point, so turn off error messages.
216 */
217 found_checkpoint = 0;
218 while (__log_get(dblp, &lsn, &dbt, DB_NEXT, 1) == 0) {
219 if (dbt.size < sizeof(u_int32_t))
220 continue;
221 memcpy(&chk, dbt.data, sizeof(u_int32_t));
222 if (chk == DB_txn_ckp) {
223 lp->chkpt_lsn = lsn;
224 found_checkpoint = 1;
225 }
226 }
227
228 /*
229 * We now know where the end of the log is. Set the first LSN that
230 * we want to return to an application and the LSN of the last known
231 * record on disk.
232 */
233 lp->lsn = lp->s_lsn = lsn;
234 lp->lsn.offset += dblp->c_len;
235
236 /* Set up the current buffer information, too. */
237 lp->len = dblp->c_len;
238 lp->b_off = 0;
239 lp->w_off = lp->lsn.offset;
240
241 /*
242 * It's possible that we didn't find a checkpoint because there wasn't
243 * one in the last log file. Start searching.
244 */
245 while (!found_checkpoint && cnt > 1) {
246 lsn.file = --cnt;
247 lsn.offset = 0;
248
249 /* Set the cursor. Shouldn't fail, leave error messages on. */
250 if ((ret = __log_get(dblp, &lsn, &dbt, DB_SET, 0)) != 0)
251 return (ret);
252
253 /*
254 * Read to the end of the file, saving checkpoints. Shouldn't
255 * fail, leave error messages on.
256 */
257 while (__log_get(dblp, &lsn, &dbt, DB_NEXT, 0) == 0) {
258 if (dbt.size < sizeof(u_int32_t))
259 continue;
260 memcpy(&chk, dbt.data, sizeof(u_int32_t));
261 if (chk == DB_txn_ckp) {
262 lp->chkpt_lsn = lsn;
263 found_checkpoint = 1;
264 }
265 }
266 }
267 /*
268 * Reset the cursor lsn to the beginning of the log, so that an
269 * initial call to DB_NEXT does the right thing.
270 */
271 ZERO_LSN(dblp->c_lsn);
272
273 /* If we never find a checkpoint, that's okay, just 0 it out. */
274 if (!found_checkpoint)
275 ZERO_LSN(lp->chkpt_lsn);
276
277 /*
278 * !!!
279 * The test suite explicitly looks for this string -- don't change
280 * it here unless you also change it there.
281 */
282 __db_err(dblp->dbenv,
283 "Finding last valid log LSN: file: %lu offset %lu",
284 (u_long)lp->lsn.file, (u_long)lp->lsn.offset);
285
286 return (0);
287 }
288
289 /*
290 * __log_find --
291 * Try to find a log file. If find_first is set, valp will contain
292 * the number of the first log file, else it will contain the number of
293 * the last log file.
294 *
295 * PUBLIC: int __log_find __P((DB_LOG *, int, int *));
296 */
297 int
__log_find(dblp,find_first,valp)298 __log_find(dblp, find_first, valp)
299 DB_LOG *dblp;
300 int find_first, *valp;
301 {
302 u_int32_t clv, logval;
303 int cnt, fcnt, ret;
304 const char *dir;
305 char **names, *p, *q;
306
307 *valp = 0;
308
309 /* Find the directory name. */
310 if ((ret = __log_name(dblp, 1, &p, NULL, 0)) != 0)
311 return (ret);
312 if ((q = __db_rpath(p)) == NULL)
313 dir = PATH_DOT;
314 else {
315 *q = '\0';
316 dir = p;
317 }
318
319 /* Get the list of file names. */
320 ret = __os_dirlist(dir, &names, &fcnt);
321 __os_freestr(p);
322 if (ret != 0) {
323 __db_err(dblp->dbenv, "%s: %s", dir, strerror(ret));
324 return (ret);
325 }
326
327 /*
328 * Search for a valid log file name, return a value of 0 on
329 * failure.
330 *
331 * XXX
332 * Assumes that atoi(3) returns a 32-bit number.
333 */
334 for (cnt = fcnt, clv = logval = 0; --cnt >= 0;) {
335 if (strncmp(names[cnt], LFPREFIX, sizeof(LFPREFIX) - 1) != 0)
336 continue;
337
338 clv = atoi(names[cnt] + (sizeof(LFPREFIX) - 1));
339 if (find_first) {
340 if (logval != 0 && clv > logval)
341 continue;
342 } else
343 if (logval != 0 && clv < logval)
344 continue;
345
346 if (__log_valid(dblp, clv, 1) == 0)
347 logval = clv;
348 }
349
350 *valp = logval;
351
352 /* Discard the list. */
353 __os_dirfree(names, fcnt);
354
355 return (0);
356 }
357
358 /*
359 * log_valid --
360 * Validate a log file.
361 *
362 * PUBLIC: int __log_valid __P((DB_LOG *, u_int32_t, int));
363 */
364 int
__log_valid(dblp,number,set_persist)365 __log_valid(dblp, number, set_persist)
366 DB_LOG *dblp;
367 u_int32_t number;
368 int set_persist;
369 {
370 LOGP persist;
371 ssize_t nw;
372 char *fname;
373 int fd, ret;
374
375 /* Try to open the log file. */
376 if ((ret = __log_name(dblp,
377 number, &fname, &fd, DB_RDONLY | DB_SEQUENTIAL)) != 0) {
378 __os_freestr(fname);
379 return (ret);
380 }
381
382 /* Try to read the header. */
383 if ((ret = __os_seek(fd, 0, 0, sizeof(HDR), 0, SEEK_SET)) != 0 ||
384 (ret = __os_read(fd, &persist, sizeof(LOGP), &nw)) != 0 ||
385 nw != sizeof(LOGP)) {
386 if (ret == 0)
387 ret = EIO;
388
389 (void)__os_close(fd);
390
391 __db_err(dblp->dbenv,
392 "Ignoring log file: %s: %s", fname, strerror(ret));
393 goto err;
394 }
395 (void)__os_close(fd);
396
397 /* Validate the header. */
398 if (persist.magic != DB_LOGMAGIC) {
399 __db_err(dblp->dbenv,
400 "Ignoring log file: %s: magic number %lx, not %lx",
401 fname, (u_long)persist.magic, (u_long)DB_LOGMAGIC);
402 ret = EINVAL;
403 goto err;
404 }
405 if (persist.version < DB_LOGOLDVER || persist.version > DB_LOGVERSION) {
406 __db_err(dblp->dbenv,
407 "Ignoring log file: %s: unsupported log version %lu",
408 fname, (u_long)persist.version);
409 ret = EINVAL;
410 goto err;
411 }
412
413 /*
414 * If we're going to use this log file, set the region's persistent
415 * information based on the headers.
416 */
417 if (set_persist) {
418 dblp->lp->persist.lg_max = persist.lg_max;
419 dblp->lp->persist.mode = persist.mode;
420 }
421 ret = 0;
422
423 err: __os_freestr(fname);
424 return (ret);
425 }
426
427 /*
428 * log_close --
429 * Close a log.
430 */
431 int
log_close(dblp)432 log_close(dblp)
433 DB_LOG *dblp;
434 {
435 u_int32_t i;
436 int ret, t_ret;
437
438 LOG_PANIC_CHECK(dblp);
439
440 /* We may have opened files as part of XA; if so, close them. */
441 __log_close_files(dblp);
442
443 /* Discard the per-thread pointer. */
444 if (dblp->mutexp != NULL) {
445 LOCK_LOGREGION(dblp);
446 __db_shalloc_free(dblp->addr, dblp->mutexp);
447 UNLOCK_LOGREGION(dblp);
448 }
449
450 /* Close the region. */
451 ret = __db_rdetach(&dblp->reginfo);
452
453 /* Close open files, release allocated memory. */
454 if (dblp->lfd != -1 && (t_ret = __os_close(dblp->lfd)) != 0 && ret == 0)
455 ret = t_ret;
456 if (dblp->c_dbt.data != NULL)
457 __os_free(dblp->c_dbt.data, dblp->c_dbt.ulen);
458 if (dblp->c_fd != -1 &&
459 (t_ret = __os_close(dblp->c_fd)) != 0 && ret == 0)
460 ret = t_ret;
461 if (dblp->dbentry != NULL) {
462 for (i = 0; i < dblp->dbentry_cnt; i++)
463 if (dblp->dbentry[i].name != NULL)
464 __os_freestr(dblp->dbentry[i].name);
465 __os_free(dblp->dbentry,
466 (dblp->dbentry_cnt * sizeof(DB_ENTRY)));
467 }
468
469 if (dblp->dir != NULL)
470 __os_freestr(dblp->dir);
471
472 if (dblp->reginfo.path != NULL)
473 __os_freestr(dblp->reginfo.path);
474 __os_free(dblp, sizeof(*dblp));
475
476 return (ret);
477 }
478
479 /*
480 * log_unlink --
481 * Exit a log.
482 */
483 int
log_unlink(path,force,dbenv)484 log_unlink(path, force, dbenv)
485 const char *path;
486 int force;
487 DB_ENV *dbenv;
488 {
489 REGINFO reginfo;
490 int ret;
491
492 memset(®info, 0, sizeof(reginfo));
493 reginfo.dbenv = dbenv;
494 reginfo.appname = DB_APP_LOG;
495 if (path != NULL && (ret = __os_strdup(path, ®info.path)) != 0)
496 return (ret);
497 reginfo.file = DB_DEFAULT_LOG_FILE;
498 ret = __db_runlink(®info, force);
499 if (reginfo.path != NULL)
500 __os_freestr(reginfo.path);
501 return (ret);
502 }
503
504 /*
505 * log_stat --
506 * Return LOG statistics.
507 */
508 int
log_stat(dblp,gspp,db_malloc)509 log_stat(dblp, gspp, db_malloc)
510 DB_LOG *dblp;
511 DB_LOG_STAT **gspp;
512 void *(*db_malloc) __P((size_t));
513 {
514 LOG *lp;
515 int ret;
516
517 *gspp = NULL;
518 lp = dblp->lp;
519
520 LOG_PANIC_CHECK(dblp);
521
522 if ((ret = __os_malloc(sizeof(**gspp), db_malloc, gspp)) != 0)
523 return (ret);
524
525 /* Copy out the global statistics. */
526 LOCK_LOGREGION(dblp);
527 **gspp = lp->stat;
528
529 (*gspp)->st_magic = lp->persist.magic;
530 (*gspp)->st_version = lp->persist.version;
531 (*gspp)->st_mode = lp->persist.mode;
532 (*gspp)->st_lg_max = lp->persist.lg_max;
533
534 (*gspp)->st_region_nowait = lp->rlayout.lock.mutex_set_nowait;
535 (*gspp)->st_region_wait = lp->rlayout.lock.mutex_set_wait;
536
537 (*gspp)->st_cur_file = lp->lsn.file;
538 (*gspp)->st_cur_offset = lp->lsn.offset;
539
540 (*gspp)->st_refcnt = lp->rlayout.refcnt;
541 (*gspp)->st_regsize = lp->rlayout.size;
542
543 UNLOCK_LOGREGION(dblp);
544
545 return (0);
546 }
547