xref: /titanic_41/usr/src/cmd/sendmail/db/log/log_put.c (revision 4d0e50075058332ce0cd62bc2669a8a4dea45da0)
1 /*-
2  * See the file LICENSE for redistribution information.
3  *
4  * Copyright (c) 1996, 1997, 1998
5  *	Sleepycat Software.  All rights reserved.
6  */
7 #include "config.h"
8 
9 #ifndef lint
10 static const char sccsid[] = "@(#)log_put.c	10.44 (Sleepycat) 11/3/98";
11 #endif /* not lint */
12 
13 #ifndef NO_SYSTEM_INCLUDES
14 #include <sys/types.h>
15 
16 #include <errno.h>
17 #include <stdio.h>
18 #include <string.h>
19 #include <time.h>
20 #include <unistd.h>
21 #endif
22 
23 #include "db_int.h"
24 #include "shqueue.h"
25 #include "db_page.h"
26 #include "log.h"
27 #include "hash.h"
28 #include "clib_ext.h"
29 #include "common_ext.h"
30 
31 static int __log_fill __P((DB_LOG *, DB_LSN *, void *, u_int32_t));
32 static int __log_flush __P((DB_LOG *, const DB_LSN *));
33 static int __log_newfd __P((DB_LOG *));
34 static int __log_putr __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t));
35 static int __log_write __P((DB_LOG *, void *, u_int32_t));
36 
37 /*
38  * log_put --
39  *	Write a log record.
40  */
41 int
42 log_put(dblp, lsn, dbt, flags)
43 	DB_LOG *dblp;
44 	DB_LSN *lsn;
45 	const DBT *dbt;
46 	u_int32_t flags;
47 {
48 	int ret;
49 
50 	LOG_PANIC_CHECK(dblp);
51 
52 	/* Validate arguments. */
53 	if (flags != 0 && flags != DB_CHECKPOINT &&
54 	    flags != DB_CURLSN && flags != DB_FLUSH)
55 		return (__db_ferr(dblp->dbenv, "log_put", 0));
56 
57 	LOCK_LOGREGION(dblp);
58 	ret = __log_put(dblp, lsn, dbt, flags);
59 	UNLOCK_LOGREGION(dblp);
60 	return (ret);
61 }
62 
63 /*
64  * __log_put --
65  *	Write a log record; internal version.
66  *
67  * PUBLIC: int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t));
68  */
69 int
70 __log_put(dblp, lsn, dbt, flags)
71 	DB_LOG *dblp;
72 	DB_LSN *lsn;
73 	const DBT *dbt;
74 	u_int32_t flags;
75 {
76 	DBT fid_dbt, t;
77 	DB_LSN r_unused;
78 	FNAME *fnp;
79 	LOG *lp;
80 	u_int32_t lastoff;
81 	int ret;
82 
83 	lp = dblp->lp;
84 
85 	/*
86 	 * If the application just wants to know where we are, fill in
87 	 * the information.  Currently used by the transaction manager
88 	 * to avoid writing TXN_begin records.
89 	 */
90 	if (flags == DB_CURLSN) {
91 		lsn->file = lp->lsn.file;
92 		lsn->offset = lp->lsn.offset;
93 		return (0);
94 	}
95 
96 	/* If this information won't fit in the file, swap files. */
97 	if (lp->lsn.offset + sizeof(HDR) + dbt->size > lp->persist.lg_max) {
98 		if (sizeof(HDR) +
99 		    sizeof(LOGP) + dbt->size > lp->persist.lg_max) {
100 			__db_err(dblp->dbenv,
101 			    "log_put: record larger than maximum file size");
102 			return (EINVAL);
103 		}
104 
105 		/* Flush the log. */
106 		if ((ret = __log_flush(dblp, NULL)) != 0)
107 			return (ret);
108 
109 		/*
110 		 * Save the last known offset from the previous file, we'll
111 		 * need it to initialize the persistent header information.
112 		 */
113 		lastoff = lp->lsn.offset;
114 
115 		/* Point the current LSN to the new file. */
116 		++lp->lsn.file;
117 		lp->lsn.offset = 0;
118 
119 		/* Reset the file write offset. */
120 		lp->w_off = 0;
121 	} else
122 		lastoff = 0;
123 
124 	/* Initialize the LSN information returned to the user. */
125 	lsn->file = lp->lsn.file;
126 	lsn->offset = lp->lsn.offset;
127 
128 	/*
129 	 * Insert persistent information as the first record in every file.
130 	 * Note that the previous length is wrong for the very first record
131 	 * of the log, but that's okay, we check for it during retrieval.
132 	 */
133 	if (lp->lsn.offset == 0) {
134 		t.data = &lp->persist;
135 		t.size = sizeof(LOGP);
136 		if ((ret = __log_putr(dblp, lsn,
137 		    &t, lastoff == 0 ? 0 : lastoff - lp->len)) != 0)
138 			return (ret);
139 
140 		/* Update the LSN information returned to the user. */
141 		lsn->file = lp->lsn.file;
142 		lsn->offset = lp->lsn.offset;
143 	}
144 
145 	/* Write the application's log record. */
146 	if ((ret = __log_putr(dblp, lsn, dbt, lp->lsn.offset - lp->len)) != 0)
147 		return (ret);
148 
149 	/*
150 	 * On a checkpoint, we:
151 	 *	Put out the checkpoint record (above).
152 	 *	Save the LSN of the checkpoint in the shared region.
153 	 *	Append the set of file name information into the log.
154 	 */
155 	if (flags == DB_CHECKPOINT) {
156 		lp->chkpt_lsn = *lsn;
157 
158 		for (fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname);
159 		    fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
160 			if (fnp->ref == 0)	/* Entry not in use. */
161 				continue;
162 			memset(&t, 0, sizeof(t));
163 			t.data = R_ADDR(dblp, fnp->name_off);
164 			t.size = strlen(t.data) + 1;
165 			memset(&fid_dbt, 0, sizeof(fid_dbt));
166 			fid_dbt.data = fnp->ufid;
167 			fid_dbt.size = DB_FILE_ID_LEN;
168 			if ((ret = __log_register_log(dblp, NULL, &r_unused, 0,
169 			    LOG_CHECKPOINT, &t, &fid_dbt, fnp->id, fnp->s_type))
170 			    != 0)
171 				return (ret);
172 		}
173 	}
174 
175 	/*
176 	 * On a checkpoint or when flush is requested, we:
177 	 *	Flush the current buffer contents to disk.
178 	 *	Sync the log to disk.
179 	 */
180 	if (flags == DB_FLUSH || flags == DB_CHECKPOINT)
181 		if ((ret = __log_flush(dblp, NULL)) != 0)
182 			return (ret);
183 
184 	/*
185 	 * On a checkpoint, we:
186 	 *	Save the time the checkpoint was written.
187 	 *	Reset the bytes written since the last checkpoint.
188 	 */
189 	if (flags == DB_CHECKPOINT) {
190 		(void)time(&lp->chkpt);
191 		lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0;
192 	}
193 	return (0);
194 }
195 
196 /*
197  * __log_putr --
198  *	Actually put a record into the log.
199  */
200 static int
201 __log_putr(dblp, lsn, dbt, prev)
202 	DB_LOG *dblp;
203 	DB_LSN *lsn;
204 	const DBT *dbt;
205 	u_int32_t prev;
206 {
207 	HDR hdr;
208 	LOG *lp;
209 	int ret;
210 
211 	lp = dblp->lp;
212 
213 	/*
214 	 * Initialize the header.  If we just switched files, lsn.offset will
215 	 * be 0, and what we really want is the offset of the previous record
216 	 * in the previous file.  Fortunately, prev holds the value we want.
217 	 */
218 	hdr.prev = prev;
219 	hdr.len = sizeof(HDR) + dbt->size;
220 	hdr.cksum = __ham_func4(dbt->data, dbt->size);
221 
222 	if ((ret = __log_fill(dblp, lsn, &hdr, sizeof(HDR))) != 0)
223 		return (ret);
224 	lp->len = sizeof(HDR);
225 	lp->lsn.offset += sizeof(HDR);
226 
227 	if ((ret = __log_fill(dblp, lsn, dbt->data, dbt->size)) != 0)
228 		return (ret);
229 	lp->len += dbt->size;
230 	lp->lsn.offset += dbt->size;
231 	return (0);
232 }
233 
234 /*
235  * log_flush --
236  *	Write all records less than or equal to the specified LSN.
237  */
238 int
239 log_flush(dblp, lsn)
240 	DB_LOG *dblp;
241 	const DB_LSN *lsn;
242 {
243 	int ret;
244 
245 	LOG_PANIC_CHECK(dblp);
246 
247 	LOCK_LOGREGION(dblp);
248 	ret = __log_flush(dblp, lsn);
249 	UNLOCK_LOGREGION(dblp);
250 	return (ret);
251 }
252 
253 /*
254  * __log_flush --
255  *	Write all records less than or equal to the specified LSN; internal
256  *	version.
257  */
258 static int
259 __log_flush(dblp, lsn)
260 	DB_LOG *dblp;
261 	const DB_LSN *lsn;
262 {
263 	DB_LSN t_lsn;
264 	LOG *lp;
265 	int current, ret;
266 
267 	ret = 0;
268 	lp = dblp->lp;
269 
270 	/*
271 	 * If no LSN specified, flush the entire log by setting the flush LSN
272 	 * to the last LSN written in the log.  Otherwise, check that the LSN
273 	 * isn't a non-existent record for the log.
274 	 */
275 	if (lsn == NULL) {
276 		t_lsn.file = lp->lsn.file;
277 		t_lsn.offset = lp->lsn.offset - lp->len;
278 		lsn = &t_lsn;
279 	} else
280 		if (lsn->file > lp->lsn.file ||
281 		    (lsn->file == lp->lsn.file &&
282 		    lsn->offset > lp->lsn.offset - lp->len)) {
283 			__db_err(dblp->dbenv,
284 			    "log_flush: LSN past current end-of-log");
285 			return (EINVAL);
286 		}
287 
288 	/*
289 	 * If the LSN is less than the last-sync'd LSN, we're done.  Note,
290 	 * the last-sync LSN saved in s_lsn is the LSN of the first byte
291 	 * we absolutely know has been written to disk, so the test is <=.
292 	 */
293 	if (lsn->file < lp->s_lsn.file ||
294 	    (lsn->file == lp->s_lsn.file && lsn->offset <= lp->s_lsn.offset))
295 		return (0);
296 
297 	/*
298 	 * We may need to write the current buffer.  We have to write the
299 	 * current buffer if the flush LSN is greater than or equal to the
300 	 * buffer's starting LSN.
301 	 */
302 	current = 0;
303 	if (lp->b_off != 0 && log_compare(lsn, &lp->f_lsn) >= 0) {
304 		if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
305 			return (ret);
306 
307 		lp->b_off = 0;
308 		current = 1;
309 	}
310 
311 	/*
312 	 * It's possible that this thread may never have written to this log
313 	 * file.  Acquire a file descriptor if we don't already have one.
314 	 */
315 	if (dblp->lfname != dblp->lp->lsn.file)
316 		if ((ret = __log_newfd(dblp)) != 0)
317 			return (ret);
318 
319 	/* Sync all writes to disk. */
320 	if ((ret = __os_fsync(dblp->lfd)) != 0) {
321 		__db_panic(dblp->dbenv, ret);
322 		return (ret);
323 	}
324 	++lp->stat.st_scount;
325 
326 	/*
327 	 * Set the last-synced LSN, using the LSN of the current buffer.  If
328 	 * the current buffer was flushed, we know the LSN of the first byte
329 	 * of the buffer is on disk, otherwise, we only know that the LSN of
330 	 * the record before the one beginning the current buffer is on disk.
331 	 *
332 	 * XXX
333 	 * Check to make sure that the saved lsn isn't 0 before we go making
334 	 * this change.  If DB_CHECKPOINT was called before we actually wrote
335 	 * something, you can end up here without ever having written anything
336 	 * to a log file, and decrementing either s_lsn.file or s_lsn.offset
337 	 * will cause much sadness later on.
338 	 */
339 	lp->s_lsn = lp->f_lsn;
340 	if (!current && lp->s_lsn.file != 0)
341 		if (lp->s_lsn.offset == 0) {
342 			--lp->s_lsn.file;
343 			lp->s_lsn.offset = lp->persist.lg_max;
344 		} else
345 			--lp->s_lsn.offset;
346 
347 	return (0);
348 }
349 
350 /*
351  * __log_fill --
352  *	Write information into the log.
353  */
354 static int
355 __log_fill(dblp, lsn, addr, len)
356 	DB_LOG *dblp;
357 	DB_LSN *lsn;
358 	void *addr;
359 	u_int32_t len;
360 {
361 	LOG *lp;
362 	u_int32_t nrec;
363 	size_t nw, remain;
364 	int ret;
365 
366 	/* Copy out the data. */
367 	for (lp = dblp->lp; len > 0;) {
368 		/*
369 		 * If we're beginning a new buffer, note the user LSN to which
370 		 * the first byte of the buffer belongs.  We have to know this
371 		 * when flushing the buffer so that we know if the in-memory
372 		 * buffer needs to be flushed.
373 		 */
374 		if (lp->b_off == 0)
375 			lp->f_lsn = *lsn;
376 
377 		/*
378 		 * If we're on a buffer boundary and the data is big enough,
379 		 * copy as many records as we can directly from the data.
380 		 */
381 		if (lp->b_off == 0 && len >= sizeof(lp->buf)) {
382 			nrec = len / sizeof(lp->buf);
383 			if ((ret = __log_write(dblp,
384 			    addr, nrec * sizeof(lp->buf))) != 0)
385 				return (ret);
386 			addr = (u_int8_t *)addr + nrec * sizeof(lp->buf);
387 			len -= nrec * sizeof(lp->buf);
388 			continue;
389 		}
390 
391 		/* Figure out how many bytes we can copy this time. */
392 		remain = sizeof(lp->buf) - lp->b_off;
393 		nw = remain > len ? len : remain;
394 		memcpy(lp->buf + lp->b_off, addr, nw);
395 		addr = (u_int8_t *)addr + nw;
396 		len -= nw;
397 		lp->b_off += nw;
398 
399 		/* If we fill the buffer, flush it. */
400 		if (lp->b_off == sizeof(lp->buf)) {
401 			if ((ret =
402 			    __log_write(dblp, lp->buf, sizeof(lp->buf))) != 0)
403 				return (ret);
404 			lp->b_off = 0;
405 		}
406 	}
407 	return (0);
408 }
409 
410 /*
411  * __log_write --
412  *	Write the log buffer to disk.
413  */
414 static int
415 __log_write(dblp, addr, len)
416 	DB_LOG *dblp;
417 	void *addr;
418 	u_int32_t len;
419 {
420 	LOG *lp;
421 	ssize_t nw;
422 	int ret;
423 
424 	/*
425 	 * If we haven't opened the log file yet or the current one
426 	 * has changed, acquire a new log file.
427 	 */
428 	lp = dblp->lp;
429 	if (dblp->lfd == -1 || dblp->lfname != lp->lsn.file)
430 		if ((ret = __log_newfd(dblp)) != 0)
431 			return (ret);
432 
433 	/*
434 	 * Seek to the offset in the file (someone may have written it
435 	 * since we last did).
436 	 */
437 	if ((ret = __os_seek(dblp->lfd, 0, 0, lp->w_off, 0, SEEK_SET)) != 0 ||
438 	    (ret = __os_write(dblp->lfd, addr, len, &nw)) != 0) {
439 		__db_panic(dblp->dbenv, ret);
440 		return (ret);
441 	}
442 	if (nw != (int32_t)len)
443 		return (EIO);
444 
445 	/* Reset the buffer offset and update the seek offset. */
446 	lp->w_off += len;
447 
448 	/* Update written statistics. */
449 	if ((lp->stat.st_w_bytes += len) >= MEGABYTE) {
450 		lp->stat.st_w_bytes -= MEGABYTE;
451 		++lp->stat.st_w_mbytes;
452 	}
453 	if ((lp->stat.st_wc_bytes += len) >= MEGABYTE) {
454 		lp->stat.st_wc_bytes -= MEGABYTE;
455 		++lp->stat.st_wc_mbytes;
456 	}
457 	++lp->stat.st_wcount;
458 
459 	return (0);
460 }
461 
462 /*
463  * log_file --
464  *	Map a DB_LSN to a file name.
465  */
466 int
467 log_file(dblp, lsn, namep, len)
468 	DB_LOG *dblp;
469 	const DB_LSN *lsn;
470 	char *namep;
471 	size_t len;
472 {
473 	int ret;
474 	char *name;
475 
476 	LOG_PANIC_CHECK(dblp);
477 
478 	LOCK_LOGREGION(dblp);
479 	ret = __log_name(dblp, lsn->file, &name, NULL, 0);
480 	UNLOCK_LOGREGION(dblp);
481 	if (ret != 0)
482 		return (ret);
483 
484 	/* Check to make sure there's enough room and copy the name. */
485 	if (len < strlen(name) + 1) {
486 		*namep = '\0';
487 		return (ENOMEM);
488 	}
489 	(void)strcpy(namep, name);
490 	__os_freestr(name);
491 
492 	return (0);
493 }
494 
495 /*
496  * __log_newfd --
497  *	Acquire a file descriptor for the current log file.
498  */
499 static int
500 __log_newfd(dblp)
501 	DB_LOG *dblp;
502 {
503 	int ret;
504 	char *name;
505 
506 	/* Close any previous file descriptor. */
507 	if (dblp->lfd != -1) {
508 		(void)__os_close(dblp->lfd);
509 		dblp->lfd = -1;
510 	}
511 
512 	/* Get the path of the new file and open it. */
513 	dblp->lfname = dblp->lp->lsn.file;
514 	if ((ret = __log_name(dblp,
515 	    dblp->lfname, &name, &dblp->lfd, DB_CREATE | DB_SEQUENTIAL)) != 0)
516 		__db_err(dblp->dbenv, "log_put: %s: %s", name, strerror(ret));
517 
518 	__os_freestr(name);
519 	return (ret);
520 }
521 
522 /*
523  * __log_name --
524  *	Return the log name for a particular file, and optionally open it.
525  *
526  * PUBLIC: int __log_name __P((DB_LOG *, u_int32_t, char **, int *, u_int32_t));
527  */
528 int
529 __log_name(dblp, filenumber, namep, fdp, flags)
530 	DB_LOG *dblp;
531 	u_int32_t filenumber, flags;
532 	char **namep;
533 	int *fdp;
534 {
535 	int ret;
536 	char *oname;
537 	char old[sizeof(LFPREFIX) + 5 + 20], new[sizeof(LFPREFIX) + 10 + 20];
538 
539 	/*
540 	 * !!!
541 	 * The semantics of this routine are bizarre.
542 	 *
543 	 * The reason for all of this is that we need a place where we can
544 	 * intercept requests for log files, and, if appropriate, check for
545 	 * both the old-style and new-style log file names.  The trick is
546 	 * that all callers of this routine that are opening the log file
547 	 * read-only want to use an old-style file name if they can't find
548 	 * a match using a new-style name.  The only down-side is that some
549 	 * callers may check for the old-style when they really don't need
550 	 * to, but that shouldn't mess up anything, and we only check for
551 	 * the old-style name when we've already failed to find a new-style
552 	 * one.
553 	 *
554 	 * Create a new-style file name, and if we're not going to open the
555 	 * file, return regardless.
556 	 */
557 	(void)snprintf(new, sizeof(new), LFNAME, filenumber);
558 	if ((ret = __db_appname(dblp->dbenv,
559 	    DB_APP_LOG, dblp->dir, new, 0, NULL, namep)) != 0 || fdp == NULL)
560 		return (ret);
561 
562 	/* Open the new-style file -- if we succeed, we're done. */
563 	if ((ret = __db_open(*namep,
564 	    flags, flags, dblp->lp->persist.mode, fdp)) == 0)
565 		return (0);
566 
567 	/*
568 	 * The open failed... if the DB_RDONLY flag isn't set, we're done,
569 	 * the caller isn't interested in old-style files.
570 	 */
571 	if (!LF_ISSET(DB_RDONLY))
572 		return (ret);
573 
574 	/* Create an old-style file name. */
575 	(void)snprintf(old, sizeof(old), LFNAME_V1, filenumber);
576 	if ((ret = __db_appname(dblp->dbenv,
577 	    DB_APP_LOG, dblp->dir, old, 0, NULL, &oname)) != 0)
578 		goto err;
579 
580 	/*
581 	 * Open the old-style file -- if we succeed, we're done.  Free the
582 	 * space allocated for the new-style name and return the old-style
583 	 * name to the caller.
584 	 */
585 	if ((ret = __db_open(oname,
586 	    flags, flags, dblp->lp->persist.mode, fdp)) == 0) {
587 		__os_freestr(*namep);
588 		*namep = oname;
589 		return (0);
590 	}
591 
592 	/*
593 	 * Couldn't find either style of name -- return the new-style name
594 	 * for the caller's error message.  If it's an old-style name that's
595 	 * actually missing we're going to confuse the user with the error
596 	 * message, but that implies that not only were we looking for an
597 	 * old-style name, but we expected it to exist and we weren't just
598 	 * looking for any log file.  That's not a likely error.
599 	 */
600 err:	__os_freestr(oname);
601 	return (ret);
602 }
603