1*7c478bd9Sstevel@tonic-gate /*- 2*7c478bd9Sstevel@tonic-gate * See the file LICENSE for redistribution information. 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * Copyright (c) 1996, 1997, 1998 5*7c478bd9Sstevel@tonic-gate * Sleepycat Software. All rights reserved. 6*7c478bd9Sstevel@tonic-gate */ 7*7c478bd9Sstevel@tonic-gate #include "config.h" 8*7c478bd9Sstevel@tonic-gate 9*7c478bd9Sstevel@tonic-gate #ifndef lint 10*7c478bd9Sstevel@tonic-gate static const char sccsid[] = "@(#)log_put.c 10.44 (Sleepycat) 11/3/98"; 11*7c478bd9Sstevel@tonic-gate #endif /* not lint */ 12*7c478bd9Sstevel@tonic-gate 13*7c478bd9Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES 14*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 15*7c478bd9Sstevel@tonic-gate 16*7c478bd9Sstevel@tonic-gate #include <errno.h> 17*7c478bd9Sstevel@tonic-gate #include <stdio.h> 18*7c478bd9Sstevel@tonic-gate #include <string.h> 19*7c478bd9Sstevel@tonic-gate #include <time.h> 20*7c478bd9Sstevel@tonic-gate #include <unistd.h> 21*7c478bd9Sstevel@tonic-gate #endif 22*7c478bd9Sstevel@tonic-gate 23*7c478bd9Sstevel@tonic-gate #include "db_int.h" 24*7c478bd9Sstevel@tonic-gate #include "shqueue.h" 25*7c478bd9Sstevel@tonic-gate #include "db_page.h" 26*7c478bd9Sstevel@tonic-gate #include "log.h" 27*7c478bd9Sstevel@tonic-gate #include "hash.h" 28*7c478bd9Sstevel@tonic-gate #include "clib_ext.h" 29*7c478bd9Sstevel@tonic-gate #include "common_ext.h" 30*7c478bd9Sstevel@tonic-gate 31*7c478bd9Sstevel@tonic-gate static int __log_fill __P((DB_LOG *, DB_LSN *, void *, u_int32_t)); 32*7c478bd9Sstevel@tonic-gate static int __log_flush __P((DB_LOG *, const DB_LSN *)); 33*7c478bd9Sstevel@tonic-gate static int __log_newfd __P((DB_LOG *)); 34*7c478bd9Sstevel@tonic-gate static int __log_putr __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t)); 35*7c478bd9Sstevel@tonic-gate static int __log_write __P((DB_LOG *, void *, u_int32_t)); 36*7c478bd9Sstevel@tonic-gate 37*7c478bd9Sstevel@tonic-gate /* 38*7c478bd9Sstevel@tonic-gate * log_put -- 39*7c478bd9Sstevel@tonic-gate * Write a log record. 40*7c478bd9Sstevel@tonic-gate */ 41*7c478bd9Sstevel@tonic-gate int 42*7c478bd9Sstevel@tonic-gate log_put(dblp, lsn, dbt, flags) 43*7c478bd9Sstevel@tonic-gate DB_LOG *dblp; 44*7c478bd9Sstevel@tonic-gate DB_LSN *lsn; 45*7c478bd9Sstevel@tonic-gate const DBT *dbt; 46*7c478bd9Sstevel@tonic-gate u_int32_t flags; 47*7c478bd9Sstevel@tonic-gate { 48*7c478bd9Sstevel@tonic-gate int ret; 49*7c478bd9Sstevel@tonic-gate 50*7c478bd9Sstevel@tonic-gate LOG_PANIC_CHECK(dblp); 51*7c478bd9Sstevel@tonic-gate 52*7c478bd9Sstevel@tonic-gate /* Validate arguments. */ 53*7c478bd9Sstevel@tonic-gate if (flags != 0 && flags != DB_CHECKPOINT && 54*7c478bd9Sstevel@tonic-gate flags != DB_CURLSN && flags != DB_FLUSH) 55*7c478bd9Sstevel@tonic-gate return (__db_ferr(dblp->dbenv, "log_put", 0)); 56*7c478bd9Sstevel@tonic-gate 57*7c478bd9Sstevel@tonic-gate LOCK_LOGREGION(dblp); 58*7c478bd9Sstevel@tonic-gate ret = __log_put(dblp, lsn, dbt, flags); 59*7c478bd9Sstevel@tonic-gate UNLOCK_LOGREGION(dblp); 60*7c478bd9Sstevel@tonic-gate return (ret); 61*7c478bd9Sstevel@tonic-gate } 62*7c478bd9Sstevel@tonic-gate 63*7c478bd9Sstevel@tonic-gate /* 64*7c478bd9Sstevel@tonic-gate * __log_put -- 65*7c478bd9Sstevel@tonic-gate * Write a log record; internal version. 66*7c478bd9Sstevel@tonic-gate * 67*7c478bd9Sstevel@tonic-gate * PUBLIC: int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t)); 68*7c478bd9Sstevel@tonic-gate */ 69*7c478bd9Sstevel@tonic-gate int 70*7c478bd9Sstevel@tonic-gate __log_put(dblp, lsn, dbt, flags) 71*7c478bd9Sstevel@tonic-gate DB_LOG *dblp; 72*7c478bd9Sstevel@tonic-gate DB_LSN *lsn; 73*7c478bd9Sstevel@tonic-gate const DBT *dbt; 74*7c478bd9Sstevel@tonic-gate u_int32_t flags; 75*7c478bd9Sstevel@tonic-gate { 76*7c478bd9Sstevel@tonic-gate DBT fid_dbt, t; 77*7c478bd9Sstevel@tonic-gate DB_LSN r_unused; 78*7c478bd9Sstevel@tonic-gate FNAME *fnp; 79*7c478bd9Sstevel@tonic-gate LOG *lp; 80*7c478bd9Sstevel@tonic-gate u_int32_t lastoff; 81*7c478bd9Sstevel@tonic-gate int ret; 82*7c478bd9Sstevel@tonic-gate 83*7c478bd9Sstevel@tonic-gate lp = dblp->lp; 84*7c478bd9Sstevel@tonic-gate 85*7c478bd9Sstevel@tonic-gate /* 86*7c478bd9Sstevel@tonic-gate * If the application just wants to know where we are, fill in 87*7c478bd9Sstevel@tonic-gate * the information. Currently used by the transaction manager 88*7c478bd9Sstevel@tonic-gate * to avoid writing TXN_begin records. 89*7c478bd9Sstevel@tonic-gate */ 90*7c478bd9Sstevel@tonic-gate if (flags == DB_CURLSN) { 91*7c478bd9Sstevel@tonic-gate lsn->file = lp->lsn.file; 92*7c478bd9Sstevel@tonic-gate lsn->offset = lp->lsn.offset; 93*7c478bd9Sstevel@tonic-gate return (0); 94*7c478bd9Sstevel@tonic-gate } 95*7c478bd9Sstevel@tonic-gate 96*7c478bd9Sstevel@tonic-gate /* If this information won't fit in the file, swap files. */ 97*7c478bd9Sstevel@tonic-gate if (lp->lsn.offset + sizeof(HDR) + dbt->size > lp->persist.lg_max) { 98*7c478bd9Sstevel@tonic-gate if (sizeof(HDR) + 99*7c478bd9Sstevel@tonic-gate sizeof(LOGP) + dbt->size > lp->persist.lg_max) { 100*7c478bd9Sstevel@tonic-gate __db_err(dblp->dbenv, 101*7c478bd9Sstevel@tonic-gate "log_put: record larger than maximum file size"); 102*7c478bd9Sstevel@tonic-gate return (EINVAL); 103*7c478bd9Sstevel@tonic-gate } 104*7c478bd9Sstevel@tonic-gate 105*7c478bd9Sstevel@tonic-gate /* Flush the log. */ 106*7c478bd9Sstevel@tonic-gate if ((ret = __log_flush(dblp, NULL)) != 0) 107*7c478bd9Sstevel@tonic-gate return (ret); 108*7c478bd9Sstevel@tonic-gate 109*7c478bd9Sstevel@tonic-gate /* 110*7c478bd9Sstevel@tonic-gate * Save the last known offset from the previous file, we'll 111*7c478bd9Sstevel@tonic-gate * need it to initialize the persistent header information. 112*7c478bd9Sstevel@tonic-gate */ 113*7c478bd9Sstevel@tonic-gate lastoff = lp->lsn.offset; 114*7c478bd9Sstevel@tonic-gate 115*7c478bd9Sstevel@tonic-gate /* Point the current LSN to the new file. */ 116*7c478bd9Sstevel@tonic-gate ++lp->lsn.file; 117*7c478bd9Sstevel@tonic-gate lp->lsn.offset = 0; 118*7c478bd9Sstevel@tonic-gate 119*7c478bd9Sstevel@tonic-gate /* Reset the file write offset. */ 120*7c478bd9Sstevel@tonic-gate lp->w_off = 0; 121*7c478bd9Sstevel@tonic-gate } else 122*7c478bd9Sstevel@tonic-gate lastoff = 0; 123*7c478bd9Sstevel@tonic-gate 124*7c478bd9Sstevel@tonic-gate /* Initialize the LSN information returned to the user. */ 125*7c478bd9Sstevel@tonic-gate lsn->file = lp->lsn.file; 126*7c478bd9Sstevel@tonic-gate lsn->offset = lp->lsn.offset; 127*7c478bd9Sstevel@tonic-gate 128*7c478bd9Sstevel@tonic-gate /* 129*7c478bd9Sstevel@tonic-gate * Insert persistent information as the first record in every file. 130*7c478bd9Sstevel@tonic-gate * Note that the previous length is wrong for the very first record 131*7c478bd9Sstevel@tonic-gate * of the log, but that's okay, we check for it during retrieval. 132*7c478bd9Sstevel@tonic-gate */ 133*7c478bd9Sstevel@tonic-gate if (lp->lsn.offset == 0) { 134*7c478bd9Sstevel@tonic-gate t.data = &lp->persist; 135*7c478bd9Sstevel@tonic-gate t.size = sizeof(LOGP); 136*7c478bd9Sstevel@tonic-gate if ((ret = __log_putr(dblp, lsn, 137*7c478bd9Sstevel@tonic-gate &t, lastoff == 0 ? 0 : lastoff - lp->len)) != 0) 138*7c478bd9Sstevel@tonic-gate return (ret); 139*7c478bd9Sstevel@tonic-gate 140*7c478bd9Sstevel@tonic-gate /* Update the LSN information returned to the user. */ 141*7c478bd9Sstevel@tonic-gate lsn->file = lp->lsn.file; 142*7c478bd9Sstevel@tonic-gate lsn->offset = lp->lsn.offset; 143*7c478bd9Sstevel@tonic-gate } 144*7c478bd9Sstevel@tonic-gate 145*7c478bd9Sstevel@tonic-gate /* Write the application's log record. */ 146*7c478bd9Sstevel@tonic-gate if ((ret = __log_putr(dblp, lsn, dbt, lp->lsn.offset - lp->len)) != 0) 147*7c478bd9Sstevel@tonic-gate return (ret); 148*7c478bd9Sstevel@tonic-gate 149*7c478bd9Sstevel@tonic-gate /* 150*7c478bd9Sstevel@tonic-gate * On a checkpoint, we: 151*7c478bd9Sstevel@tonic-gate * Put out the checkpoint record (above). 152*7c478bd9Sstevel@tonic-gate * Save the LSN of the checkpoint in the shared region. 153*7c478bd9Sstevel@tonic-gate * Append the set of file name information into the log. 154*7c478bd9Sstevel@tonic-gate */ 155*7c478bd9Sstevel@tonic-gate if (flags == DB_CHECKPOINT) { 156*7c478bd9Sstevel@tonic-gate lp->chkpt_lsn = *lsn; 157*7c478bd9Sstevel@tonic-gate 158*7c478bd9Sstevel@tonic-gate for (fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname); 159*7c478bd9Sstevel@tonic-gate fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) { 160*7c478bd9Sstevel@tonic-gate if (fnp->ref == 0) /* Entry not in use. */ 161*7c478bd9Sstevel@tonic-gate continue; 162*7c478bd9Sstevel@tonic-gate memset(&t, 0, sizeof(t)); 163*7c478bd9Sstevel@tonic-gate t.data = R_ADDR(dblp, fnp->name_off); 164*7c478bd9Sstevel@tonic-gate t.size = strlen(t.data) + 1; 165*7c478bd9Sstevel@tonic-gate memset(&fid_dbt, 0, sizeof(fid_dbt)); 166*7c478bd9Sstevel@tonic-gate fid_dbt.data = fnp->ufid; 167*7c478bd9Sstevel@tonic-gate fid_dbt.size = DB_FILE_ID_LEN; 168*7c478bd9Sstevel@tonic-gate if ((ret = __log_register_log(dblp, NULL, &r_unused, 0, 169*7c478bd9Sstevel@tonic-gate LOG_CHECKPOINT, &t, &fid_dbt, fnp->id, fnp->s_type)) 170*7c478bd9Sstevel@tonic-gate != 0) 171*7c478bd9Sstevel@tonic-gate return (ret); 172*7c478bd9Sstevel@tonic-gate } 173*7c478bd9Sstevel@tonic-gate } 174*7c478bd9Sstevel@tonic-gate 175*7c478bd9Sstevel@tonic-gate /* 176*7c478bd9Sstevel@tonic-gate * On a checkpoint or when flush is requested, we: 177*7c478bd9Sstevel@tonic-gate * Flush the current buffer contents to disk. 178*7c478bd9Sstevel@tonic-gate * Sync the log to disk. 179*7c478bd9Sstevel@tonic-gate */ 180*7c478bd9Sstevel@tonic-gate if (flags == DB_FLUSH || flags == DB_CHECKPOINT) 181*7c478bd9Sstevel@tonic-gate if ((ret = __log_flush(dblp, NULL)) != 0) 182*7c478bd9Sstevel@tonic-gate return (ret); 183*7c478bd9Sstevel@tonic-gate 184*7c478bd9Sstevel@tonic-gate /* 185*7c478bd9Sstevel@tonic-gate * On a checkpoint, we: 186*7c478bd9Sstevel@tonic-gate * Save the time the checkpoint was written. 187*7c478bd9Sstevel@tonic-gate * Reset the bytes written since the last checkpoint. 188*7c478bd9Sstevel@tonic-gate */ 189*7c478bd9Sstevel@tonic-gate if (flags == DB_CHECKPOINT) { 190*7c478bd9Sstevel@tonic-gate (void)time(&lp->chkpt); 191*7c478bd9Sstevel@tonic-gate lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0; 192*7c478bd9Sstevel@tonic-gate } 193*7c478bd9Sstevel@tonic-gate return (0); 194*7c478bd9Sstevel@tonic-gate } 195*7c478bd9Sstevel@tonic-gate 196*7c478bd9Sstevel@tonic-gate /* 197*7c478bd9Sstevel@tonic-gate * __log_putr -- 198*7c478bd9Sstevel@tonic-gate * Actually put a record into the log. 199*7c478bd9Sstevel@tonic-gate */ 200*7c478bd9Sstevel@tonic-gate static int 201*7c478bd9Sstevel@tonic-gate __log_putr(dblp, lsn, dbt, prev) 202*7c478bd9Sstevel@tonic-gate DB_LOG *dblp; 203*7c478bd9Sstevel@tonic-gate DB_LSN *lsn; 204*7c478bd9Sstevel@tonic-gate const DBT *dbt; 205*7c478bd9Sstevel@tonic-gate u_int32_t prev; 206*7c478bd9Sstevel@tonic-gate { 207*7c478bd9Sstevel@tonic-gate HDR hdr; 208*7c478bd9Sstevel@tonic-gate LOG *lp; 209*7c478bd9Sstevel@tonic-gate int ret; 210*7c478bd9Sstevel@tonic-gate 211*7c478bd9Sstevel@tonic-gate lp = dblp->lp; 212*7c478bd9Sstevel@tonic-gate 213*7c478bd9Sstevel@tonic-gate /* 214*7c478bd9Sstevel@tonic-gate * Initialize the header. If we just switched files, lsn.offset will 215*7c478bd9Sstevel@tonic-gate * be 0, and what we really want is the offset of the previous record 216*7c478bd9Sstevel@tonic-gate * in the previous file. Fortunately, prev holds the value we want. 217*7c478bd9Sstevel@tonic-gate */ 218*7c478bd9Sstevel@tonic-gate hdr.prev = prev; 219*7c478bd9Sstevel@tonic-gate hdr.len = sizeof(HDR) + dbt->size; 220*7c478bd9Sstevel@tonic-gate hdr.cksum = __ham_func4(dbt->data, dbt->size); 221*7c478bd9Sstevel@tonic-gate 222*7c478bd9Sstevel@tonic-gate if ((ret = __log_fill(dblp, lsn, &hdr, sizeof(HDR))) != 0) 223*7c478bd9Sstevel@tonic-gate return (ret); 224*7c478bd9Sstevel@tonic-gate lp->len = sizeof(HDR); 225*7c478bd9Sstevel@tonic-gate lp->lsn.offset += sizeof(HDR); 226*7c478bd9Sstevel@tonic-gate 227*7c478bd9Sstevel@tonic-gate if ((ret = __log_fill(dblp, lsn, dbt->data, dbt->size)) != 0) 228*7c478bd9Sstevel@tonic-gate return (ret); 229*7c478bd9Sstevel@tonic-gate lp->len += dbt->size; 230*7c478bd9Sstevel@tonic-gate lp->lsn.offset += dbt->size; 231*7c478bd9Sstevel@tonic-gate return (0); 232*7c478bd9Sstevel@tonic-gate } 233*7c478bd9Sstevel@tonic-gate 234*7c478bd9Sstevel@tonic-gate /* 235*7c478bd9Sstevel@tonic-gate * log_flush -- 236*7c478bd9Sstevel@tonic-gate * Write all records less than or equal to the specified LSN. 237*7c478bd9Sstevel@tonic-gate */ 238*7c478bd9Sstevel@tonic-gate int 239*7c478bd9Sstevel@tonic-gate log_flush(dblp, lsn) 240*7c478bd9Sstevel@tonic-gate DB_LOG *dblp; 241*7c478bd9Sstevel@tonic-gate const DB_LSN *lsn; 242*7c478bd9Sstevel@tonic-gate { 243*7c478bd9Sstevel@tonic-gate int ret; 244*7c478bd9Sstevel@tonic-gate 245*7c478bd9Sstevel@tonic-gate LOG_PANIC_CHECK(dblp); 246*7c478bd9Sstevel@tonic-gate 247*7c478bd9Sstevel@tonic-gate LOCK_LOGREGION(dblp); 248*7c478bd9Sstevel@tonic-gate ret = __log_flush(dblp, lsn); 249*7c478bd9Sstevel@tonic-gate UNLOCK_LOGREGION(dblp); 250*7c478bd9Sstevel@tonic-gate return (ret); 251*7c478bd9Sstevel@tonic-gate } 252*7c478bd9Sstevel@tonic-gate 253*7c478bd9Sstevel@tonic-gate /* 254*7c478bd9Sstevel@tonic-gate * __log_flush -- 255*7c478bd9Sstevel@tonic-gate * Write all records less than or equal to the specified LSN; internal 256*7c478bd9Sstevel@tonic-gate * version. 257*7c478bd9Sstevel@tonic-gate */ 258*7c478bd9Sstevel@tonic-gate static int 259*7c478bd9Sstevel@tonic-gate __log_flush(dblp, lsn) 260*7c478bd9Sstevel@tonic-gate DB_LOG *dblp; 261*7c478bd9Sstevel@tonic-gate const DB_LSN *lsn; 262*7c478bd9Sstevel@tonic-gate { 263*7c478bd9Sstevel@tonic-gate DB_LSN t_lsn; 264*7c478bd9Sstevel@tonic-gate LOG *lp; 265*7c478bd9Sstevel@tonic-gate int current, ret; 266*7c478bd9Sstevel@tonic-gate 267*7c478bd9Sstevel@tonic-gate ret = 0; 268*7c478bd9Sstevel@tonic-gate lp = dblp->lp; 269*7c478bd9Sstevel@tonic-gate 270*7c478bd9Sstevel@tonic-gate /* 271*7c478bd9Sstevel@tonic-gate * If no LSN specified, flush the entire log by setting the flush LSN 272*7c478bd9Sstevel@tonic-gate * to the last LSN written in the log. Otherwise, check that the LSN 273*7c478bd9Sstevel@tonic-gate * isn't a non-existent record for the log. 274*7c478bd9Sstevel@tonic-gate */ 275*7c478bd9Sstevel@tonic-gate if (lsn == NULL) { 276*7c478bd9Sstevel@tonic-gate t_lsn.file = lp->lsn.file; 277*7c478bd9Sstevel@tonic-gate t_lsn.offset = lp->lsn.offset - lp->len; 278*7c478bd9Sstevel@tonic-gate lsn = &t_lsn; 279*7c478bd9Sstevel@tonic-gate } else 280*7c478bd9Sstevel@tonic-gate if (lsn->file > lp->lsn.file || 281*7c478bd9Sstevel@tonic-gate (lsn->file == lp->lsn.file && 282*7c478bd9Sstevel@tonic-gate lsn->offset > lp->lsn.offset - lp->len)) { 283*7c478bd9Sstevel@tonic-gate __db_err(dblp->dbenv, 284*7c478bd9Sstevel@tonic-gate "log_flush: LSN past current end-of-log"); 285*7c478bd9Sstevel@tonic-gate return (EINVAL); 286*7c478bd9Sstevel@tonic-gate } 287*7c478bd9Sstevel@tonic-gate 288*7c478bd9Sstevel@tonic-gate /* 289*7c478bd9Sstevel@tonic-gate * If the LSN is less than the last-sync'd LSN, we're done. Note, 290*7c478bd9Sstevel@tonic-gate * the last-sync LSN saved in s_lsn is the LSN of the first byte 291*7c478bd9Sstevel@tonic-gate * we absolutely know has been written to disk, so the test is <=. 292*7c478bd9Sstevel@tonic-gate */ 293*7c478bd9Sstevel@tonic-gate if (lsn->file < lp->s_lsn.file || 294*7c478bd9Sstevel@tonic-gate (lsn->file == lp->s_lsn.file && lsn->offset <= lp->s_lsn.offset)) 295*7c478bd9Sstevel@tonic-gate return (0); 296*7c478bd9Sstevel@tonic-gate 297*7c478bd9Sstevel@tonic-gate /* 298*7c478bd9Sstevel@tonic-gate * We may need to write the current buffer. We have to write the 299*7c478bd9Sstevel@tonic-gate * current buffer if the flush LSN is greater than or equal to the 300*7c478bd9Sstevel@tonic-gate * buffer's starting LSN. 301*7c478bd9Sstevel@tonic-gate */ 302*7c478bd9Sstevel@tonic-gate current = 0; 303*7c478bd9Sstevel@tonic-gate if (lp->b_off != 0 && log_compare(lsn, &lp->f_lsn) >= 0) { 304*7c478bd9Sstevel@tonic-gate if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0) 305*7c478bd9Sstevel@tonic-gate return (ret); 306*7c478bd9Sstevel@tonic-gate 307*7c478bd9Sstevel@tonic-gate lp->b_off = 0; 308*7c478bd9Sstevel@tonic-gate current = 1; 309*7c478bd9Sstevel@tonic-gate } 310*7c478bd9Sstevel@tonic-gate 311*7c478bd9Sstevel@tonic-gate /* 312*7c478bd9Sstevel@tonic-gate * It's possible that this thread may never have written to this log 313*7c478bd9Sstevel@tonic-gate * file. Acquire a file descriptor if we don't already have one. 314*7c478bd9Sstevel@tonic-gate */ 315*7c478bd9Sstevel@tonic-gate if (dblp->lfname != dblp->lp->lsn.file) 316*7c478bd9Sstevel@tonic-gate if ((ret = __log_newfd(dblp)) != 0) 317*7c478bd9Sstevel@tonic-gate return (ret); 318*7c478bd9Sstevel@tonic-gate 319*7c478bd9Sstevel@tonic-gate /* Sync all writes to disk. */ 320*7c478bd9Sstevel@tonic-gate if ((ret = __os_fsync(dblp->lfd)) != 0) { 321*7c478bd9Sstevel@tonic-gate __db_panic(dblp->dbenv, ret); 322*7c478bd9Sstevel@tonic-gate return (ret); 323*7c478bd9Sstevel@tonic-gate } 324*7c478bd9Sstevel@tonic-gate ++lp->stat.st_scount; 325*7c478bd9Sstevel@tonic-gate 326*7c478bd9Sstevel@tonic-gate /* 327*7c478bd9Sstevel@tonic-gate * Set the last-synced LSN, using the LSN of the current buffer. If 328*7c478bd9Sstevel@tonic-gate * the current buffer was flushed, we know the LSN of the first byte 329*7c478bd9Sstevel@tonic-gate * of the buffer is on disk, otherwise, we only know that the LSN of 330*7c478bd9Sstevel@tonic-gate * the record before the one beginning the current buffer is on disk. 331*7c478bd9Sstevel@tonic-gate * 332*7c478bd9Sstevel@tonic-gate * XXX 333*7c478bd9Sstevel@tonic-gate * Check to make sure that the saved lsn isn't 0 before we go making 334*7c478bd9Sstevel@tonic-gate * this change. If DB_CHECKPOINT was called before we actually wrote 335*7c478bd9Sstevel@tonic-gate * something, you can end up here without ever having written anything 336*7c478bd9Sstevel@tonic-gate * to a log file, and decrementing either s_lsn.file or s_lsn.offset 337*7c478bd9Sstevel@tonic-gate * will cause much sadness later on. 338*7c478bd9Sstevel@tonic-gate */ 339*7c478bd9Sstevel@tonic-gate lp->s_lsn = lp->f_lsn; 340*7c478bd9Sstevel@tonic-gate if (!current && lp->s_lsn.file != 0) 341*7c478bd9Sstevel@tonic-gate if (lp->s_lsn.offset == 0) { 342*7c478bd9Sstevel@tonic-gate --lp->s_lsn.file; 343*7c478bd9Sstevel@tonic-gate lp->s_lsn.offset = lp->persist.lg_max; 344*7c478bd9Sstevel@tonic-gate } else 345*7c478bd9Sstevel@tonic-gate --lp->s_lsn.offset; 346*7c478bd9Sstevel@tonic-gate 347*7c478bd9Sstevel@tonic-gate return (0); 348*7c478bd9Sstevel@tonic-gate } 349*7c478bd9Sstevel@tonic-gate 350*7c478bd9Sstevel@tonic-gate /* 351*7c478bd9Sstevel@tonic-gate * __log_fill -- 352*7c478bd9Sstevel@tonic-gate * Write information into the log. 353*7c478bd9Sstevel@tonic-gate */ 354*7c478bd9Sstevel@tonic-gate static int 355*7c478bd9Sstevel@tonic-gate __log_fill(dblp, lsn, addr, len) 356*7c478bd9Sstevel@tonic-gate DB_LOG *dblp; 357*7c478bd9Sstevel@tonic-gate DB_LSN *lsn; 358*7c478bd9Sstevel@tonic-gate void *addr; 359*7c478bd9Sstevel@tonic-gate u_int32_t len; 360*7c478bd9Sstevel@tonic-gate { 361*7c478bd9Sstevel@tonic-gate LOG *lp; 362*7c478bd9Sstevel@tonic-gate u_int32_t nrec; 363*7c478bd9Sstevel@tonic-gate size_t nw, remain; 364*7c478bd9Sstevel@tonic-gate int ret; 365*7c478bd9Sstevel@tonic-gate 366*7c478bd9Sstevel@tonic-gate /* Copy out the data. */ 367*7c478bd9Sstevel@tonic-gate for (lp = dblp->lp; len > 0;) { 368*7c478bd9Sstevel@tonic-gate /* 369*7c478bd9Sstevel@tonic-gate * If we're beginning a new buffer, note the user LSN to which 370*7c478bd9Sstevel@tonic-gate * the first byte of the buffer belongs. We have to know this 371*7c478bd9Sstevel@tonic-gate * when flushing the buffer so that we know if the in-memory 372*7c478bd9Sstevel@tonic-gate * buffer needs to be flushed. 373*7c478bd9Sstevel@tonic-gate */ 374*7c478bd9Sstevel@tonic-gate if (lp->b_off == 0) 375*7c478bd9Sstevel@tonic-gate lp->f_lsn = *lsn; 376*7c478bd9Sstevel@tonic-gate 377*7c478bd9Sstevel@tonic-gate /* 378*7c478bd9Sstevel@tonic-gate * If we're on a buffer boundary and the data is big enough, 379*7c478bd9Sstevel@tonic-gate * copy as many records as we can directly from the data. 380*7c478bd9Sstevel@tonic-gate */ 381*7c478bd9Sstevel@tonic-gate if (lp->b_off == 0 && len >= sizeof(lp->buf)) { 382*7c478bd9Sstevel@tonic-gate nrec = len / sizeof(lp->buf); 383*7c478bd9Sstevel@tonic-gate if ((ret = __log_write(dblp, 384*7c478bd9Sstevel@tonic-gate addr, nrec * sizeof(lp->buf))) != 0) 385*7c478bd9Sstevel@tonic-gate return (ret); 386*7c478bd9Sstevel@tonic-gate addr = (u_int8_t *)addr + nrec * sizeof(lp->buf); 387*7c478bd9Sstevel@tonic-gate len -= nrec * sizeof(lp->buf); 388*7c478bd9Sstevel@tonic-gate continue; 389*7c478bd9Sstevel@tonic-gate } 390*7c478bd9Sstevel@tonic-gate 391*7c478bd9Sstevel@tonic-gate /* Figure out how many bytes we can copy this time. */ 392*7c478bd9Sstevel@tonic-gate remain = sizeof(lp->buf) - lp->b_off; 393*7c478bd9Sstevel@tonic-gate nw = remain > len ? len : remain; 394*7c478bd9Sstevel@tonic-gate memcpy(lp->buf + lp->b_off, addr, nw); 395*7c478bd9Sstevel@tonic-gate addr = (u_int8_t *)addr + nw; 396*7c478bd9Sstevel@tonic-gate len -= nw; 397*7c478bd9Sstevel@tonic-gate lp->b_off += nw; 398*7c478bd9Sstevel@tonic-gate 399*7c478bd9Sstevel@tonic-gate /* If we fill the buffer, flush it. */ 400*7c478bd9Sstevel@tonic-gate if (lp->b_off == sizeof(lp->buf)) { 401*7c478bd9Sstevel@tonic-gate if ((ret = 402*7c478bd9Sstevel@tonic-gate __log_write(dblp, lp->buf, sizeof(lp->buf))) != 0) 403*7c478bd9Sstevel@tonic-gate return (ret); 404*7c478bd9Sstevel@tonic-gate lp->b_off = 0; 405*7c478bd9Sstevel@tonic-gate } 406*7c478bd9Sstevel@tonic-gate } 407*7c478bd9Sstevel@tonic-gate return (0); 408*7c478bd9Sstevel@tonic-gate } 409*7c478bd9Sstevel@tonic-gate 410*7c478bd9Sstevel@tonic-gate /* 411*7c478bd9Sstevel@tonic-gate * __log_write -- 412*7c478bd9Sstevel@tonic-gate * Write the log buffer to disk. 413*7c478bd9Sstevel@tonic-gate */ 414*7c478bd9Sstevel@tonic-gate static int 415*7c478bd9Sstevel@tonic-gate __log_write(dblp, addr, len) 416*7c478bd9Sstevel@tonic-gate DB_LOG *dblp; 417*7c478bd9Sstevel@tonic-gate void *addr; 418*7c478bd9Sstevel@tonic-gate u_int32_t len; 419*7c478bd9Sstevel@tonic-gate { 420*7c478bd9Sstevel@tonic-gate LOG *lp; 421*7c478bd9Sstevel@tonic-gate ssize_t nw; 422*7c478bd9Sstevel@tonic-gate int ret; 423*7c478bd9Sstevel@tonic-gate 424*7c478bd9Sstevel@tonic-gate /* 425*7c478bd9Sstevel@tonic-gate * If we haven't opened the log file yet or the current one 426*7c478bd9Sstevel@tonic-gate * has changed, acquire a new log file. 427*7c478bd9Sstevel@tonic-gate */ 428*7c478bd9Sstevel@tonic-gate lp = dblp->lp; 429*7c478bd9Sstevel@tonic-gate if (dblp->lfd == -1 || dblp->lfname != lp->lsn.file) 430*7c478bd9Sstevel@tonic-gate if ((ret = __log_newfd(dblp)) != 0) 431*7c478bd9Sstevel@tonic-gate return (ret); 432*7c478bd9Sstevel@tonic-gate 433*7c478bd9Sstevel@tonic-gate /* 434*7c478bd9Sstevel@tonic-gate * Seek to the offset in the file (someone may have written it 435*7c478bd9Sstevel@tonic-gate * since we last did). 436*7c478bd9Sstevel@tonic-gate */ 437*7c478bd9Sstevel@tonic-gate if ((ret = __os_seek(dblp->lfd, 0, 0, lp->w_off, 0, SEEK_SET)) != 0 || 438*7c478bd9Sstevel@tonic-gate (ret = __os_write(dblp->lfd, addr, len, &nw)) != 0) { 439*7c478bd9Sstevel@tonic-gate __db_panic(dblp->dbenv, ret); 440*7c478bd9Sstevel@tonic-gate return (ret); 441*7c478bd9Sstevel@tonic-gate } 442*7c478bd9Sstevel@tonic-gate if (nw != (int32_t)len) 443*7c478bd9Sstevel@tonic-gate return (EIO); 444*7c478bd9Sstevel@tonic-gate 445*7c478bd9Sstevel@tonic-gate /* Reset the buffer offset and update the seek offset. */ 446*7c478bd9Sstevel@tonic-gate lp->w_off += len; 447*7c478bd9Sstevel@tonic-gate 448*7c478bd9Sstevel@tonic-gate /* Update written statistics. */ 449*7c478bd9Sstevel@tonic-gate if ((lp->stat.st_w_bytes += len) >= MEGABYTE) { 450*7c478bd9Sstevel@tonic-gate lp->stat.st_w_bytes -= MEGABYTE; 451*7c478bd9Sstevel@tonic-gate ++lp->stat.st_w_mbytes; 452*7c478bd9Sstevel@tonic-gate } 453*7c478bd9Sstevel@tonic-gate if ((lp->stat.st_wc_bytes += len) >= MEGABYTE) { 454*7c478bd9Sstevel@tonic-gate lp->stat.st_wc_bytes -= MEGABYTE; 455*7c478bd9Sstevel@tonic-gate ++lp->stat.st_wc_mbytes; 456*7c478bd9Sstevel@tonic-gate } 457*7c478bd9Sstevel@tonic-gate ++lp->stat.st_wcount; 458*7c478bd9Sstevel@tonic-gate 459*7c478bd9Sstevel@tonic-gate return (0); 460*7c478bd9Sstevel@tonic-gate } 461*7c478bd9Sstevel@tonic-gate 462*7c478bd9Sstevel@tonic-gate /* 463*7c478bd9Sstevel@tonic-gate * log_file -- 464*7c478bd9Sstevel@tonic-gate * Map a DB_LSN to a file name. 465*7c478bd9Sstevel@tonic-gate */ 466*7c478bd9Sstevel@tonic-gate int 467*7c478bd9Sstevel@tonic-gate log_file(dblp, lsn, namep, len) 468*7c478bd9Sstevel@tonic-gate DB_LOG *dblp; 469*7c478bd9Sstevel@tonic-gate const DB_LSN *lsn; 470*7c478bd9Sstevel@tonic-gate char *namep; 471*7c478bd9Sstevel@tonic-gate size_t len; 472*7c478bd9Sstevel@tonic-gate { 473*7c478bd9Sstevel@tonic-gate int ret; 474*7c478bd9Sstevel@tonic-gate char *name; 475*7c478bd9Sstevel@tonic-gate 476*7c478bd9Sstevel@tonic-gate LOG_PANIC_CHECK(dblp); 477*7c478bd9Sstevel@tonic-gate 478*7c478bd9Sstevel@tonic-gate LOCK_LOGREGION(dblp); 479*7c478bd9Sstevel@tonic-gate ret = __log_name(dblp, lsn->file, &name, NULL, 0); 480*7c478bd9Sstevel@tonic-gate UNLOCK_LOGREGION(dblp); 481*7c478bd9Sstevel@tonic-gate if (ret != 0) 482*7c478bd9Sstevel@tonic-gate return (ret); 483*7c478bd9Sstevel@tonic-gate 484*7c478bd9Sstevel@tonic-gate /* Check to make sure there's enough room and copy the name. */ 485*7c478bd9Sstevel@tonic-gate if (len < strlen(name) + 1) { 486*7c478bd9Sstevel@tonic-gate *namep = '\0'; 487*7c478bd9Sstevel@tonic-gate return (ENOMEM); 488*7c478bd9Sstevel@tonic-gate } 489*7c478bd9Sstevel@tonic-gate (void)strcpy(namep, name); 490*7c478bd9Sstevel@tonic-gate __os_freestr(name); 491*7c478bd9Sstevel@tonic-gate 492*7c478bd9Sstevel@tonic-gate return (0); 493*7c478bd9Sstevel@tonic-gate } 494*7c478bd9Sstevel@tonic-gate 495*7c478bd9Sstevel@tonic-gate /* 496*7c478bd9Sstevel@tonic-gate * __log_newfd -- 497*7c478bd9Sstevel@tonic-gate * Acquire a file descriptor for the current log file. 498*7c478bd9Sstevel@tonic-gate */ 499*7c478bd9Sstevel@tonic-gate static int 500*7c478bd9Sstevel@tonic-gate __log_newfd(dblp) 501*7c478bd9Sstevel@tonic-gate DB_LOG *dblp; 502*7c478bd9Sstevel@tonic-gate { 503*7c478bd9Sstevel@tonic-gate int ret; 504*7c478bd9Sstevel@tonic-gate char *name; 505*7c478bd9Sstevel@tonic-gate 506*7c478bd9Sstevel@tonic-gate /* Close any previous file descriptor. */ 507*7c478bd9Sstevel@tonic-gate if (dblp->lfd != -1) { 508*7c478bd9Sstevel@tonic-gate (void)__os_close(dblp->lfd); 509*7c478bd9Sstevel@tonic-gate dblp->lfd = -1; 510*7c478bd9Sstevel@tonic-gate } 511*7c478bd9Sstevel@tonic-gate 512*7c478bd9Sstevel@tonic-gate /* Get the path of the new file and open it. */ 513*7c478bd9Sstevel@tonic-gate dblp->lfname = dblp->lp->lsn.file; 514*7c478bd9Sstevel@tonic-gate if ((ret = __log_name(dblp, 515*7c478bd9Sstevel@tonic-gate dblp->lfname, &name, &dblp->lfd, DB_CREATE | DB_SEQUENTIAL)) != 0) 516*7c478bd9Sstevel@tonic-gate __db_err(dblp->dbenv, "log_put: %s: %s", name, strerror(ret)); 517*7c478bd9Sstevel@tonic-gate 518*7c478bd9Sstevel@tonic-gate __os_freestr(name); 519*7c478bd9Sstevel@tonic-gate return (ret); 520*7c478bd9Sstevel@tonic-gate } 521*7c478bd9Sstevel@tonic-gate 522*7c478bd9Sstevel@tonic-gate /* 523*7c478bd9Sstevel@tonic-gate * __log_name -- 524*7c478bd9Sstevel@tonic-gate * Return the log name for a particular file, and optionally open it. 525*7c478bd9Sstevel@tonic-gate * 526*7c478bd9Sstevel@tonic-gate * PUBLIC: int __log_name __P((DB_LOG *, u_int32_t, char **, int *, u_int32_t)); 527*7c478bd9Sstevel@tonic-gate */ 528*7c478bd9Sstevel@tonic-gate int 529*7c478bd9Sstevel@tonic-gate __log_name(dblp, filenumber, namep, fdp, flags) 530*7c478bd9Sstevel@tonic-gate DB_LOG *dblp; 531*7c478bd9Sstevel@tonic-gate u_int32_t filenumber, flags; 532*7c478bd9Sstevel@tonic-gate char **namep; 533*7c478bd9Sstevel@tonic-gate int *fdp; 534*7c478bd9Sstevel@tonic-gate { 535*7c478bd9Sstevel@tonic-gate int ret; 536*7c478bd9Sstevel@tonic-gate char *oname; 537*7c478bd9Sstevel@tonic-gate char old[sizeof(LFPREFIX) + 5 + 20], new[sizeof(LFPREFIX) + 10 + 20]; 538*7c478bd9Sstevel@tonic-gate 539*7c478bd9Sstevel@tonic-gate /* 540*7c478bd9Sstevel@tonic-gate * !!! 541*7c478bd9Sstevel@tonic-gate * The semantics of this routine are bizarre. 542*7c478bd9Sstevel@tonic-gate * 543*7c478bd9Sstevel@tonic-gate * The reason for all of this is that we need a place where we can 544*7c478bd9Sstevel@tonic-gate * intercept requests for log files, and, if appropriate, check for 545*7c478bd9Sstevel@tonic-gate * both the old-style and new-style log file names. The trick is 546*7c478bd9Sstevel@tonic-gate * that all callers of this routine that are opening the log file 547*7c478bd9Sstevel@tonic-gate * read-only want to use an old-style file name if they can't find 548*7c478bd9Sstevel@tonic-gate * a match using a new-style name. The only down-side is that some 549*7c478bd9Sstevel@tonic-gate * callers may check for the old-style when they really don't need 550*7c478bd9Sstevel@tonic-gate * to, but that shouldn't mess up anything, and we only check for 551*7c478bd9Sstevel@tonic-gate * the old-style name when we've already failed to find a new-style 552*7c478bd9Sstevel@tonic-gate * one. 553*7c478bd9Sstevel@tonic-gate * 554*7c478bd9Sstevel@tonic-gate * Create a new-style file name, and if we're not going to open the 555*7c478bd9Sstevel@tonic-gate * file, return regardless. 556*7c478bd9Sstevel@tonic-gate */ 557*7c478bd9Sstevel@tonic-gate (void)snprintf(new, sizeof(new), LFNAME, filenumber); 558*7c478bd9Sstevel@tonic-gate if ((ret = __db_appname(dblp->dbenv, 559*7c478bd9Sstevel@tonic-gate DB_APP_LOG, dblp->dir, new, 0, NULL, namep)) != 0 || fdp == NULL) 560*7c478bd9Sstevel@tonic-gate return (ret); 561*7c478bd9Sstevel@tonic-gate 562*7c478bd9Sstevel@tonic-gate /* Open the new-style file -- if we succeed, we're done. */ 563*7c478bd9Sstevel@tonic-gate if ((ret = __db_open(*namep, 564*7c478bd9Sstevel@tonic-gate flags, flags, dblp->lp->persist.mode, fdp)) == 0) 565*7c478bd9Sstevel@tonic-gate return (0); 566*7c478bd9Sstevel@tonic-gate 567*7c478bd9Sstevel@tonic-gate /* 568*7c478bd9Sstevel@tonic-gate * The open failed... if the DB_RDONLY flag isn't set, we're done, 569*7c478bd9Sstevel@tonic-gate * the caller isn't interested in old-style files. 570*7c478bd9Sstevel@tonic-gate */ 571*7c478bd9Sstevel@tonic-gate if (!LF_ISSET(DB_RDONLY)) 572*7c478bd9Sstevel@tonic-gate return (ret); 573*7c478bd9Sstevel@tonic-gate 574*7c478bd9Sstevel@tonic-gate /* Create an old-style file name. */ 575*7c478bd9Sstevel@tonic-gate (void)snprintf(old, sizeof(old), LFNAME_V1, filenumber); 576*7c478bd9Sstevel@tonic-gate if ((ret = __db_appname(dblp->dbenv, 577*7c478bd9Sstevel@tonic-gate DB_APP_LOG, dblp->dir, old, 0, NULL, &oname)) != 0) 578*7c478bd9Sstevel@tonic-gate goto err; 579*7c478bd9Sstevel@tonic-gate 580*7c478bd9Sstevel@tonic-gate /* 581*7c478bd9Sstevel@tonic-gate * Open the old-style file -- if we succeed, we're done. Free the 582*7c478bd9Sstevel@tonic-gate * space allocated for the new-style name and return the old-style 583*7c478bd9Sstevel@tonic-gate * name to the caller. 584*7c478bd9Sstevel@tonic-gate */ 585*7c478bd9Sstevel@tonic-gate if ((ret = __db_open(oname, 586*7c478bd9Sstevel@tonic-gate flags, flags, dblp->lp->persist.mode, fdp)) == 0) { 587*7c478bd9Sstevel@tonic-gate __os_freestr(*namep); 588*7c478bd9Sstevel@tonic-gate *namep = oname; 589*7c478bd9Sstevel@tonic-gate return (0); 590*7c478bd9Sstevel@tonic-gate } 591*7c478bd9Sstevel@tonic-gate 592*7c478bd9Sstevel@tonic-gate /* 593*7c478bd9Sstevel@tonic-gate * Couldn't find either style of name -- return the new-style name 594*7c478bd9Sstevel@tonic-gate * for the caller's error message. If it's an old-style name that's 595*7c478bd9Sstevel@tonic-gate * actually missing we're going to confuse the user with the error 596*7c478bd9Sstevel@tonic-gate * message, but that implies that not only were we looking for an 597*7c478bd9Sstevel@tonic-gate * old-style name, but we expected it to exist and we weren't just 598*7c478bd9Sstevel@tonic-gate * looking for any log file. That's not a likely error. 599*7c478bd9Sstevel@tonic-gate */ 600*7c478bd9Sstevel@tonic-gate err: __os_freestr(oname); 601*7c478bd9Sstevel@tonic-gate return (ret); 602*7c478bd9Sstevel@tonic-gate } 603