1 /*- 2 * See the file LICENSE for redistribution information. 3 * 4 * Copyright (c) 1996, 1997, 1998 5 * Sleepycat Software. All rights reserved. 6 */ 7 /* 8 * Copyright (c) 1995, 1996 9 * The President and Fellows of Harvard University. All rights reserved. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40 #include "config.h" 41 42 #ifndef lint 43 static const char sccsid[] = "@(#)log_rec.c 10.26 (Sleepycat) 10/21/98"; 44 #endif /* not lint */ 45 46 #ifndef NO_SYSTEM_INCLUDES 47 #include <sys/types.h> 48 49 #include <errno.h> 50 #include <string.h> 51 #endif 52 53 #include "db_int.h" 54 #include "shqueue.h" 55 #include "log.h" 56 #include "db_dispatch.h" 57 #include "common_ext.h" 58 59 static int __log_do_open __P((DB_LOG *, 60 u_int8_t *, char *, DBTYPE, u_int32_t)); 61 static int __log_lid_to_fname __P((DB_LOG *, u_int32_t, FNAME **)); 62 static int __log_open_file __P((DB_LOG *, __log_register_args *)); 63 64 /* 65 * PUBLIC: int __log_register_recover 66 * PUBLIC: __P((DB_LOG *, DBT *, DB_LSN *, int, void *)); 67 */ 68 int 69 __log_register_recover(logp, dbtp, lsnp, redo, info) 70 DB_LOG *logp; 71 DBT *dbtp; 72 DB_LSN *lsnp; 73 int redo; 74 void *info; 75 { 76 DB_ENTRY *dbe; 77 __log_register_args *argp; 78 int ret; 79 80 #ifdef DEBUG_RECOVER 81 __log_register_print(logp, dbtp, lsnp, redo, info); 82 #endif 83 COMPQUIET(info, NULL); 84 COMPQUIET(lsnp, NULL); 85 86 F_SET(logp, DBC_RECOVER); 87 88 if ((ret = __log_register_read(dbtp->data, &argp)) != 0) 89 goto out; 90 91 if ((argp->opcode == LOG_CHECKPOINT && redo == TXN_OPENFILES) || 92 (argp->opcode == LOG_OPEN && 93 (redo == TXN_REDO || redo == TXN_OPENFILES || 94 redo == TXN_FORWARD_ROLL)) || 95 (argp->opcode == LOG_CLOSE && 96 (redo == TXN_UNDO || redo == TXN_BACKWARD_ROLL))) { 97 /* 98 * If we are redoing an open or undoing a close, then we need 99 * to open a file. 100 */ 101 ret = __log_open_file(logp, argp); 102 if (ret == ENOENT) { 103 if (redo == TXN_OPENFILES) 104 __db_err(logp->dbenv, "warning: %s: %s", 105 argp->name.data, strerror(ENOENT)); 106 ret = 0; 107 } 108 } else if (argp->opcode != LOG_CHECKPOINT && 109 argp->opcode != LOG_CLOSE) { 110 /* 111 * If we are undoing an open, then we need to close the file. 112 * Note that we do *not* close the file if we are redoing a 113 * close, because we do not log the reference counts on log 114 * files and we may have had the file open multiple times, 115 * and therefore, this close should just dec a reference 116 * count. However, since we only do one open during a 117 * checkpoint, this will inadvertently close the file. 118 * 119 * If the file is deleted, then we can just ignore this close. 120 * Otherwise, we should usually have a valid dbp we should 121 * close or whose reference count should be decremented. 122 * However, if we shut down without closing a file, we 123 * may, in fact, not have the file open, and that's OK. 124 */ 125 LOCK_LOGTHREAD(logp); 126 if (argp->id < logp->dbentry_cnt) { 127 dbe = &logp->dbentry[argp->id]; 128 if (dbe->dbp != NULL && --dbe->refcount == 0) { 129 ret = dbe->dbp->close(dbe->dbp, 0); 130 if (dbe->name != NULL) { 131 __os_freestr(dbe->name); 132 dbe->name = NULL; 133 } 134 (void)__log_rem_logid(logp, argp->id); 135 } 136 } 137 UNLOCK_LOGTHREAD(logp); 138 } else if (argp->opcode == LOG_CHECKPOINT && redo == TXN_UNDO && 139 (argp->id >= logp->dbentry_cnt || 140 (!logp->dbentry[argp->id].deleted && 141 logp->dbentry[argp->id].dbp == NULL))) { 142 /* 143 * It's a checkpoint and we are rolling backward. It 144 * is possible that the system was shut down and thus 145 * ended with a stable checkpoint; this file was never 146 * closed and has therefore not been reopened yet. If 147 * so, we need to try to open it. 148 */ 149 ret = __log_open_file(logp, argp); 150 if (ret == ENOENT) { 151 __db_err(logp->dbenv, "warning: %s: %s", 152 argp->name.data, strerror(ENOENT)); 153 ret = 0; 154 } 155 } 156 157 out: F_CLR(logp, DBC_RECOVER); 158 if (argp != NULL) 159 __os_free(argp, 0); 160 return (ret); 161 } 162 163 /* Hand coded routines. */ 164 165 /* 166 * Called during log_register recovery. Make sure that we have an 167 * entry in the dbentry table for this ndx. 168 * Returns 0 on success, non-zero on error. 169 */ 170 static int 171 __log_open_file(lp, argp) 172 DB_LOG *lp; 173 __log_register_args *argp; 174 { 175 DB_ENTRY *dbe; 176 177 if (argp->name.size == 0) 178 return(0); 179 180 /* 181 * Because of reference counting, we cannot automatically close files 182 * during recovery, so when we're opening, we have to check that the 183 * name we are opening is what we expect. If it's not, then we close 184 * the old file and open the new one. 185 */ 186 LOCK_LOGTHREAD(lp); 187 if (argp->id < lp->dbentry_cnt) 188 dbe = &lp->dbentry[argp->id]; 189 else 190 dbe = NULL; 191 192 if (dbe != NULL && (dbe->deleted == 1 || dbe->dbp != NULL) && 193 dbe->name != NULL && argp->name.data != NULL && 194 strncmp(argp->name.data, dbe->name, argp->name.size) == 0) { 195 196 dbe->refcount++; 197 UNLOCK_LOGTHREAD(lp); 198 return (0); 199 } 200 UNLOCK_LOGTHREAD(lp); 201 202 if (dbe != NULL && dbe->dbp != NULL) { 203 (void)dbe->dbp->close(dbe->dbp, 0); 204 if (dbe->name != NULL) 205 __os_freestr(dbe->name); 206 dbe->name = NULL; 207 (void)__log_rem_logid(lp, argp->id); 208 } 209 210 211 return (__log_do_open(lp, 212 argp->uid.data, argp->name.data, argp->ftype, argp->id)); 213 } 214 215 /* 216 * __log_do_open -- 217 * Open files referenced in the log. This is the part of the open that 218 * is not protected by the thread mutex. 219 */ 220 221 static int 222 __log_do_open(lp, uid, name, ftype, ndx) 223 DB_LOG *lp; 224 u_int8_t *uid; 225 char *name; 226 DBTYPE ftype; 227 u_int32_t ndx; 228 { 229 DB *dbp; 230 int ret; 231 232 dbp = NULL; 233 if ((ret = db_open(name, ftype, 0, 0, lp->dbenv, NULL, &dbp)) == 0) { 234 /* 235 * Verify that we are opening the same file that we were 236 * referring to when we wrote this log record. 237 */ 238 if (memcmp(uid, dbp->fileid, DB_FILE_ID_LEN) != 0) { 239 (void)dbp->close(dbp, 0); 240 dbp = NULL; 241 ret = ENOENT; 242 } 243 } 244 245 if (ret == 0 || ret == ENOENT) 246 (void)__log_add_logid(lp, dbp, name, ndx); 247 248 return (ret); 249 } 250 251 /* 252 * __log_add_logid -- 253 * Adds a DB entry to the log's DB entry table. 254 * 255 * PUBLIC: int __log_add_logid __P((DB_LOG *, DB *, const char *, u_int32_t)); 256 */ 257 int 258 __log_add_logid(logp, dbp, name, ndx) 259 DB_LOG *logp; 260 DB *dbp; 261 const char *name; 262 u_int32_t ndx; 263 { 264 u_int32_t i; 265 int ret; 266 267 ret = 0; 268 269 LOCK_LOGTHREAD(logp); 270 271 /* 272 * Check if we need to grow the table. Note, ndx is 0-based (the 273 * index into the DB entry table) an dbentry_cnt is 1-based, the 274 * number of available slots. 275 */ 276 if (logp->dbentry_cnt <= ndx) { 277 if ((ret = __os_realloc(&logp->dbentry, 278 (ndx + DB_GROW_SIZE) * sizeof(DB_ENTRY))) != 0) 279 goto err; 280 281 /* Initialize the new entries. */ 282 for (i = logp->dbentry_cnt; i < ndx + DB_GROW_SIZE; i++) { 283 logp->dbentry[i].dbp = NULL; 284 logp->dbentry[i].deleted = 0; 285 logp->dbentry[i].name = NULL; 286 } 287 288 logp->dbentry_cnt = i; 289 } 290 291 /* Make space for the name and copy it in. */ 292 if (name != NULL) { 293 if ((ret = __os_malloc(strlen(name) + 1, 294 NULL, &logp->dbentry[ndx].name)) != 0) 295 goto err; 296 strcpy(logp->dbentry[ndx].name, name); 297 } 298 299 if (logp->dbentry[ndx].deleted == 0 && logp->dbentry[ndx].dbp == NULL) { 300 logp->dbentry[ndx].dbp = dbp; 301 logp->dbentry[ndx].refcount = 1; 302 logp->dbentry[ndx].deleted = dbp == NULL; 303 } else 304 logp->dbentry[ndx].refcount++; 305 306 307 err: UNLOCK_LOGTHREAD(logp); 308 return (ret); 309 } 310 311 312 /* 313 * __db_fileid_to_db -- 314 * Return the DB corresponding to the specified fileid. 315 * 316 * PUBLIC: int __db_fileid_to_db __P((DB_LOG *, DB **, u_int32_t)); 317 */ 318 int 319 __db_fileid_to_db(logp, dbpp, ndx) 320 DB_LOG *logp; 321 DB **dbpp; 322 u_int32_t ndx; 323 { 324 int ret; 325 char *name; 326 FNAME *fname; 327 328 ret = 0; 329 LOCK_LOGTHREAD(logp); 330 331 /* 332 * Under XA, a process different than the one issuing DB 333 * operations may abort a transaction. In this case, 334 * recovery routines are run by a process that does not 335 * necessarily have the file open. In this case, we must 336 * open the file explicitly. 337 */ 338 if (ndx >= logp->dbentry_cnt || 339 (!logp->dbentry[ndx].deleted && logp->dbentry[ndx].dbp == NULL)) { 340 if (__log_lid_to_fname(logp, ndx, &fname) != 0) { 341 /* Couldn't find entry; this is a fatal error. */ 342 ret = EINVAL; 343 goto err; 344 } 345 name = R_ADDR(logp, fname->name_off); 346 /* 347 * __log_do_open is called without protection of the 348 * log thread lock. 349 */ 350 UNLOCK_LOGTHREAD(logp); 351 /* 352 * At this point, we are not holding the thread lock, so 353 * exit directly instead of going through the exit code 354 * at the bottom. If the __log_do_open succeeded, then 355 * we don't need to do any of the remaining error checking 356 * at the end of this routine. 357 */ 358 if ((ret = __log_do_open(logp, 359 fname->ufid, name, fname->s_type, ndx)) != 0) 360 return (ret); 361 *dbpp = logp->dbentry[ndx].dbp; 362 return (0); 363 } 364 365 /* 366 * Return DB_DELETED if the file has been deleted 367 * (it's not an error). 368 */ 369 if (logp->dbentry[ndx].deleted) { 370 ret = DB_DELETED; 371 goto err; 372 } 373 374 /* 375 * Otherwise return 0, but if we don't have a corresponding DB, 376 * it's an error. 377 */ 378 if ((*dbpp = logp->dbentry[ndx].dbp) == NULL) 379 ret = ENOENT; 380 381 err: UNLOCK_LOGTHREAD(logp); 382 return (ret); 383 } 384 385 /* 386 * Close files that were opened by the recovery daemon. 387 * 388 * PUBLIC: void __log_close_files __P((DB_LOG *)); 389 */ 390 void 391 __log_close_files(logp) 392 DB_LOG *logp; 393 { 394 u_int32_t i; 395 396 LOCK_LOGTHREAD(logp); 397 for (i = 0; i < logp->dbentry_cnt; i++) 398 if (logp->dbentry[i].dbp) { 399 logp->dbentry[i].dbp->close(logp->dbentry[i].dbp, 0); 400 logp->dbentry[i].dbp = NULL; 401 logp->dbentry[i].deleted = 0; 402 } 403 F_CLR(logp, DBC_RECOVER); 404 UNLOCK_LOGTHREAD(logp); 405 } 406 407 /* 408 * PUBLIC: void __log_rem_logid __P((DB_LOG *, u_int32_t)); 409 */ 410 void 411 __log_rem_logid(logp, ndx) 412 DB_LOG *logp; 413 u_int32_t ndx; 414 { 415 LOCK_LOGTHREAD(logp); 416 if (--logp->dbentry[ndx].refcount == 0) { 417 logp->dbentry[ndx].dbp = NULL; 418 logp->dbentry[ndx].deleted = 0; 419 } 420 UNLOCK_LOGTHREAD(logp); 421 } 422 423 /* 424 * __log_lid_to_fname -- 425 * Traverse the shared-memory region looking for the entry that 426 * matches the passed log fileid. Returns 0 on success; -1 on error. 427 */ 428 static int 429 __log_lid_to_fname(dblp, lid, fnamep) 430 DB_LOG *dblp; 431 u_int32_t lid; 432 FNAME **fnamep; 433 { 434 FNAME *fnp; 435 436 for (fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname); 437 fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) { 438 if (fnp->ref == 0) /* Entry not in use. */ 439 continue; 440 if (fnp->id == lid) { 441 *fnamep = fnp; 442 return (0); 443 } 444 } 445 return (-1); 446 } 447