1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
7 /*
8 * Copyright (c) 1995, 1996
9 * The President and Fellows of Harvard University. All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40 #include "config.h"
41
42 #ifndef lint
43 static const char sccsid[] = "@(#)log_rec.c 10.26 (Sleepycat) 10/21/98";
44 #endif /* not lint */
45
46 #ifndef NO_SYSTEM_INCLUDES
47 #include <sys/types.h>
48
49 #include <errno.h>
50 #include <string.h>
51 #endif
52
53 #include "db_int.h"
54 #include "shqueue.h"
55 #include "log.h"
56 #include "db_dispatch.h"
57 #include "common_ext.h"
58
59 static int __log_do_open __P((DB_LOG *,
60 u_int8_t *, char *, DBTYPE, u_int32_t));
61 static int __log_lid_to_fname __P((DB_LOG *, u_int32_t, FNAME **));
62 static int __log_open_file __P((DB_LOG *, __log_register_args *));
63
64 /*
65 * PUBLIC: int __log_register_recover
66 * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *));
67 */
68 int
__log_register_recover(logp,dbtp,lsnp,redo,info)69 __log_register_recover(logp, dbtp, lsnp, redo, info)
70 DB_LOG *logp;
71 DBT *dbtp;
72 DB_LSN *lsnp;
73 int redo;
74 void *info;
75 {
76 DB_ENTRY *dbe;
77 __log_register_args *argp;
78 int ret;
79
80 #ifdef DEBUG_RECOVER
81 __log_register_print(logp, dbtp, lsnp, redo, info);
82 #endif
83 COMPQUIET(info, NULL);
84 COMPQUIET(lsnp, NULL);
85
86 F_SET(logp, DBC_RECOVER);
87
88 if ((ret = __log_register_read(dbtp->data, &argp)) != 0)
89 goto out;
90
91 if ((argp->opcode == LOG_CHECKPOINT && redo == TXN_OPENFILES) ||
92 (argp->opcode == LOG_OPEN &&
93 (redo == TXN_REDO || redo == TXN_OPENFILES ||
94 redo == TXN_FORWARD_ROLL)) ||
95 (argp->opcode == LOG_CLOSE &&
96 (redo == TXN_UNDO || redo == TXN_BACKWARD_ROLL))) {
97 /*
98 * If we are redoing an open or undoing a close, then we need
99 * to open a file.
100 */
101 ret = __log_open_file(logp, argp);
102 if (ret == ENOENT) {
103 if (redo == TXN_OPENFILES)
104 __db_err(logp->dbenv, "warning: %s: %s",
105 argp->name.data, strerror(ENOENT));
106 ret = 0;
107 }
108 } else if (argp->opcode != LOG_CHECKPOINT &&
109 argp->opcode != LOG_CLOSE) {
110 /*
111 * If we are undoing an open, then we need to close the file.
112 * Note that we do *not* close the file if we are redoing a
113 * close, because we do not log the reference counts on log
114 * files and we may have had the file open multiple times,
115 * and therefore, this close should just dec a reference
116 * count. However, since we only do one open during a
117 * checkpoint, this will inadvertently close the file.
118 *
119 * If the file is deleted, then we can just ignore this close.
120 * Otherwise, we should usually have a valid dbp we should
121 * close or whose reference count should be decremented.
122 * However, if we shut down without closing a file, we
123 * may, in fact, not have the file open, and that's OK.
124 */
125 LOCK_LOGTHREAD(logp);
126 if (argp->id < logp->dbentry_cnt) {
127 dbe = &logp->dbentry[argp->id];
128 if (dbe->dbp != NULL && --dbe->refcount == 0) {
129 ret = dbe->dbp->close(dbe->dbp, 0);
130 if (dbe->name != NULL) {
131 __os_freestr(dbe->name);
132 dbe->name = NULL;
133 }
134 (void)__log_rem_logid(logp, argp->id);
135 }
136 }
137 UNLOCK_LOGTHREAD(logp);
138 } else if (argp->opcode == LOG_CHECKPOINT && redo == TXN_UNDO &&
139 (argp->id >= logp->dbentry_cnt ||
140 (!logp->dbentry[argp->id].deleted &&
141 logp->dbentry[argp->id].dbp == NULL))) {
142 /*
143 * It's a checkpoint and we are rolling backward. It
144 * is possible that the system was shut down and thus
145 * ended with a stable checkpoint; this file was never
146 * closed and has therefore not been reopened yet. If
147 * so, we need to try to open it.
148 */
149 ret = __log_open_file(logp, argp);
150 if (ret == ENOENT) {
151 __db_err(logp->dbenv, "warning: %s: %s",
152 argp->name.data, strerror(ENOENT));
153 ret = 0;
154 }
155 }
156
157 out: F_CLR(logp, DBC_RECOVER);
158 if (argp != NULL)
159 __os_free(argp, 0);
160 return (ret);
161 }
162
163 /* Hand coded routines. */
164
165 /*
166 * Called during log_register recovery. Make sure that we have an
167 * entry in the dbentry table for this ndx.
168 * Returns 0 on success, non-zero on error.
169 */
170 static int
__log_open_file(lp,argp)171 __log_open_file(lp, argp)
172 DB_LOG *lp;
173 __log_register_args *argp;
174 {
175 DB_ENTRY *dbe;
176
177 if (argp->name.size == 0)
178 return(0);
179
180 /*
181 * Because of reference counting, we cannot automatically close files
182 * during recovery, so when we're opening, we have to check that the
183 * name we are opening is what we expect. If it's not, then we close
184 * the old file and open the new one.
185 */
186 LOCK_LOGTHREAD(lp);
187 if (argp->id < lp->dbentry_cnt)
188 dbe = &lp->dbentry[argp->id];
189 else
190 dbe = NULL;
191
192 if (dbe != NULL && (dbe->deleted == 1 || dbe->dbp != NULL) &&
193 dbe->name != NULL && argp->name.data != NULL &&
194 strncmp(argp->name.data, dbe->name, argp->name.size) == 0) {
195
196 dbe->refcount++;
197 UNLOCK_LOGTHREAD(lp);
198 return (0);
199 }
200 UNLOCK_LOGTHREAD(lp);
201
202 if (dbe != NULL && dbe->dbp != NULL) {
203 (void)dbe->dbp->close(dbe->dbp, 0);
204 if (dbe->name != NULL)
205 __os_freestr(dbe->name);
206 dbe->name = NULL;
207 (void)__log_rem_logid(lp, argp->id);
208 }
209
210
211 return (__log_do_open(lp,
212 argp->uid.data, argp->name.data, argp->ftype, argp->id));
213 }
214
215 /*
216 * __log_do_open --
217 * Open files referenced in the log. This is the part of the open that
218 * is not protected by the thread mutex.
219 */
220
221 static int
__log_do_open(lp,uid,name,ftype,ndx)222 __log_do_open(lp, uid, name, ftype, ndx)
223 DB_LOG *lp;
224 u_int8_t *uid;
225 char *name;
226 DBTYPE ftype;
227 u_int32_t ndx;
228 {
229 DB *dbp;
230 int ret;
231
232 dbp = NULL;
233 if ((ret = db_open(name, ftype, 0, 0, lp->dbenv, NULL, &dbp)) == 0) {
234 /*
235 * Verify that we are opening the same file that we were
236 * referring to when we wrote this log record.
237 */
238 if (memcmp(uid, dbp->fileid, DB_FILE_ID_LEN) != 0) {
239 (void)dbp->close(dbp, 0);
240 dbp = NULL;
241 ret = ENOENT;
242 }
243 }
244
245 if (ret == 0 || ret == ENOENT)
246 (void)__log_add_logid(lp, dbp, name, ndx);
247
248 return (ret);
249 }
250
251 /*
252 * __log_add_logid --
253 * Adds a DB entry to the log's DB entry table.
254 *
255 * PUBLIC: int __log_add_logid __P((DB_LOG *, DB *, const char *, u_int32_t));
256 */
257 int
__log_add_logid(logp,dbp,name,ndx)258 __log_add_logid(logp, dbp, name, ndx)
259 DB_LOG *logp;
260 DB *dbp;
261 const char *name;
262 u_int32_t ndx;
263 {
264 u_int32_t i;
265 int ret;
266
267 ret = 0;
268
269 LOCK_LOGTHREAD(logp);
270
271 /*
272 * Check if we need to grow the table. Note, ndx is 0-based (the
273 * index into the DB entry table) an dbentry_cnt is 1-based, the
274 * number of available slots.
275 */
276 if (logp->dbentry_cnt <= ndx) {
277 if ((ret = __os_realloc(&logp->dbentry,
278 (ndx + DB_GROW_SIZE) * sizeof(DB_ENTRY))) != 0)
279 goto err;
280
281 /* Initialize the new entries. */
282 for (i = logp->dbentry_cnt; i < ndx + DB_GROW_SIZE; i++) {
283 logp->dbentry[i].dbp = NULL;
284 logp->dbentry[i].deleted = 0;
285 logp->dbentry[i].name = NULL;
286 }
287
288 logp->dbentry_cnt = i;
289 }
290
291 /* Make space for the name and copy it in. */
292 if (name != NULL) {
293 if ((ret = __os_malloc(strlen(name) + 1,
294 NULL, &logp->dbentry[ndx].name)) != 0)
295 goto err;
296 strcpy(logp->dbentry[ndx].name, name);
297 }
298
299 if (logp->dbentry[ndx].deleted == 0 && logp->dbentry[ndx].dbp == NULL) {
300 logp->dbentry[ndx].dbp = dbp;
301 logp->dbentry[ndx].refcount = 1;
302 logp->dbentry[ndx].deleted = dbp == NULL;
303 } else
304 logp->dbentry[ndx].refcount++;
305
306
307 err: UNLOCK_LOGTHREAD(logp);
308 return (ret);
309 }
310
311
312 /*
313 * __db_fileid_to_db --
314 * Return the DB corresponding to the specified fileid.
315 *
316 * PUBLIC: int __db_fileid_to_db __P((DB_LOG *, DB **, u_int32_t));
317 */
318 int
__db_fileid_to_db(logp,dbpp,ndx)319 __db_fileid_to_db(logp, dbpp, ndx)
320 DB_LOG *logp;
321 DB **dbpp;
322 u_int32_t ndx;
323 {
324 int ret;
325 char *name;
326 FNAME *fname;
327
328 ret = 0;
329 LOCK_LOGTHREAD(logp);
330
331 /*
332 * Under XA, a process different than the one issuing DB
333 * operations may abort a transaction. In this case,
334 * recovery routines are run by a process that does not
335 * necessarily have the file open. In this case, we must
336 * open the file explicitly.
337 */
338 if (ndx >= logp->dbentry_cnt ||
339 (!logp->dbentry[ndx].deleted && logp->dbentry[ndx].dbp == NULL)) {
340 if (__log_lid_to_fname(logp, ndx, &fname) != 0) {
341 /* Couldn't find entry; this is a fatal error. */
342 ret = EINVAL;
343 goto err;
344 }
345 name = R_ADDR(logp, fname->name_off);
346 /*
347 * __log_do_open is called without protection of the
348 * log thread lock.
349 */
350 UNLOCK_LOGTHREAD(logp);
351 /*
352 * At this point, we are not holding the thread lock, so
353 * exit directly instead of going through the exit code
354 * at the bottom. If the __log_do_open succeeded, then
355 * we don't need to do any of the remaining error checking
356 * at the end of this routine.
357 */
358 if ((ret = __log_do_open(logp,
359 fname->ufid, name, fname->s_type, ndx)) != 0)
360 return (ret);
361 *dbpp = logp->dbentry[ndx].dbp;
362 return (0);
363 }
364
365 /*
366 * Return DB_DELETED if the file has been deleted
367 * (it's not an error).
368 */
369 if (logp->dbentry[ndx].deleted) {
370 ret = DB_DELETED;
371 goto err;
372 }
373
374 /*
375 * Otherwise return 0, but if we don't have a corresponding DB,
376 * it's an error.
377 */
378 if ((*dbpp = logp->dbentry[ndx].dbp) == NULL)
379 ret = ENOENT;
380
381 err: UNLOCK_LOGTHREAD(logp);
382 return (ret);
383 }
384
385 /*
386 * Close files that were opened by the recovery daemon.
387 *
388 * PUBLIC: void __log_close_files __P((DB_LOG *));
389 */
390 void
__log_close_files(logp)391 __log_close_files(logp)
392 DB_LOG *logp;
393 {
394 u_int32_t i;
395
396 LOCK_LOGTHREAD(logp);
397 for (i = 0; i < logp->dbentry_cnt; i++)
398 if (logp->dbentry[i].dbp) {
399 logp->dbentry[i].dbp->close(logp->dbentry[i].dbp, 0);
400 logp->dbentry[i].dbp = NULL;
401 logp->dbentry[i].deleted = 0;
402 }
403 F_CLR(logp, DBC_RECOVER);
404 UNLOCK_LOGTHREAD(logp);
405 }
406
407 /*
408 * PUBLIC: void __log_rem_logid __P((DB_LOG *, u_int32_t));
409 */
410 void
__log_rem_logid(logp,ndx)411 __log_rem_logid(logp, ndx)
412 DB_LOG *logp;
413 u_int32_t ndx;
414 {
415 LOCK_LOGTHREAD(logp);
416 if (--logp->dbentry[ndx].refcount == 0) {
417 logp->dbentry[ndx].dbp = NULL;
418 logp->dbentry[ndx].deleted = 0;
419 }
420 UNLOCK_LOGTHREAD(logp);
421 }
422
423 /*
424 * __log_lid_to_fname --
425 * Traverse the shared-memory region looking for the entry that
426 * matches the passed log fileid. Returns 0 on success; -1 on error.
427 */
428 static int
__log_lid_to_fname(dblp,lid,fnamep)429 __log_lid_to_fname(dblp, lid, fnamep)
430 DB_LOG *dblp;
431 u_int32_t lid;
432 FNAME **fnamep;
433 {
434 FNAME *fnp;
435
436 for (fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname);
437 fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
438 if (fnp->ref == 0) /* Entry not in use. */
439 continue;
440 if (fnp->id == lid) {
441 *fnamep = fnp;
442 return (0);
443 }
444 }
445 return (-1);
446 }
447