xref: /titanic_41/usr/src/stand/lib/fs/ufs/lufsboot.c (revision 4a634bb80136cc001d14ab96addd9915105e5223)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*4a634bb8Sga159272  * Common Development and Distribution License (the "License").
6*4a634bb8Sga159272  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*4a634bb8Sga159272  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate #include <sys/param.h>
297c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
307c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fsdir.h>
317c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
327c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
337c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
347c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
357c478bd9Sstevel@tonic-gate #include <sys/promif.h>
367c478bd9Sstevel@tonic-gate #include <sys/machparam.h>
377c478bd9Sstevel@tonic-gate 
387c478bd9Sstevel@tonic-gate #include <sys/stat.h>
397c478bd9Sstevel@tonic-gate #include <sys/bootdebug.h>
407c478bd9Sstevel@tonic-gate #include <sys/salib.h>
417c478bd9Sstevel@tonic-gate #include <sys/saio.h>
427c478bd9Sstevel@tonic-gate #include <sys/filep.h>
437c478bd9Sstevel@tonic-gate 
447c478bd9Sstevel@tonic-gate 
457c478bd9Sstevel@tonic-gate /*
467c478bd9Sstevel@tonic-gate  * Big theory statement on how ufsboot makes use of the log
477c478bd9Sstevel@tonic-gate  * in case the filesystem wasn't shut down cleanly.
487c478bd9Sstevel@tonic-gate  *
497c478bd9Sstevel@tonic-gate  * The structure of the ufs on-disk log looks like this:
507c478bd9Sstevel@tonic-gate  *
517c478bd9Sstevel@tonic-gate  * +-----------------+
527c478bd9Sstevel@tonic-gate  * | SUPERBLOCK      |
537c478bd9Sstevel@tonic-gate  * | ...             |
547c478bd9Sstevel@tonic-gate  * | fs_logbno       +--> +-----------------------+
557c478bd9Sstevel@tonic-gate  * | ...             |    | EXTENT BLOCK          |
567c478bd9Sstevel@tonic-gate  * +-----------------+    |   ...                 |
577c478bd9Sstevel@tonic-gate  *                        |   nextents            |
587c478bd9Sstevel@tonic-gate  * +----------------------+   extents[0].pbno     |
597c478bd9Sstevel@tonic-gate  * |                      | { extents[1].pbno }   +------------+
607c478bd9Sstevel@tonic-gate  * |                      |   ...                 +--> ...     |
617c478bd9Sstevel@tonic-gate  * |                      +-----------------------+            |
627c478bd9Sstevel@tonic-gate  * v                                                           |
637c478bd9Sstevel@tonic-gate  * +-----------------------------+      \                      |
647c478bd9Sstevel@tonic-gate  * | ON-DISK LOG HEADER          |      |                      |
657c478bd9Sstevel@tonic-gate  * | ...                         |      |                      |
667c478bd9Sstevel@tonic-gate  * | od_head_lof                 +--+   |                      |
677c478bd9Sstevel@tonic-gate  * | ...                         |  |   |                      |
687c478bd9Sstevel@tonic-gate  * +-----------------------------+ <|---|- od_bol_lof          |
697c478bd9Sstevel@tonic-gate  * | sector (may contain deltas) |  |   |  (logical offset)    |
707c478bd9Sstevel@tonic-gate  * |   +-------------------------+  |   |                      |
717c478bd9Sstevel@tonic-gate  * |   | trailer (some ident#)   |  |    > extents[0].nbno     |
727c478bd9Sstevel@tonic-gate  * +---+-------------------------+  |   |  blocks ("sectors")  |
737c478bd9Sstevel@tonic-gate  * .                             .  |   |                      |
747c478bd9Sstevel@tonic-gate  * .                             .  |   |                      |
757c478bd9Sstevel@tonic-gate  * +-----------------------------+<-+   |                      |
767c478bd9Sstevel@tonic-gate  * | delta1 delta2       delta3  |      |                      |
777c478bd9Sstevel@tonic-gate  * | d +-------------------------+      |                      |
787c478bd9Sstevel@tonic-gate  * | e | ident#: od_head_ident   |      |                      |
797c478bd9Sstevel@tonic-gate  * +---+-------------------------+      /                      |
807c478bd9Sstevel@tonic-gate  *                                                             |
817c478bd9Sstevel@tonic-gate  * +-----------------------------+ <---------------------------+
827c478bd9Sstevel@tonic-gate  * | lta4    delta5 delta6    de |
837c478bd9Sstevel@tonic-gate  * | l +-------------------------+
847c478bd9Sstevel@tonic-gate  * | t | ident#: od_head_ident+1 |
857c478bd9Sstevel@tonic-gate  * +---+-------------------------+
867c478bd9Sstevel@tonic-gate  * .                             .
877c478bd9Sstevel@tonic-gate  * +-----------------------------+
887c478bd9Sstevel@tonic-gate  * | sector (may contain deltas) |
897c478bd9Sstevel@tonic-gate  * |          +------------------+
907c478bd9Sstevel@tonic-gate  * |          | trailer (ident#) |
917c478bd9Sstevel@tonic-gate  * +----------+------------------+ <-- od_eol_lof (logical offset)
927c478bd9Sstevel@tonic-gate  *
937c478bd9Sstevel@tonic-gate  * The ufs on-disk log has the following properties:
947c478bd9Sstevel@tonic-gate  *
957c478bd9Sstevel@tonic-gate  * 1. The log is made up from at least one extent. "fs_logbno" in
967c478bd9Sstevel@tonic-gate  *    the superblock points to where this is found.
977c478bd9Sstevel@tonic-gate  * 2. Extents describe the logical layout.
987c478bd9Sstevel@tonic-gate  *      - Logical offset 0 is the on-disk log header. It's also
997c478bd9Sstevel@tonic-gate  *        at the beginning of the first physical block.
1007c478bd9Sstevel@tonic-gate  *      - If there's more than one extent, the equation holds:
1017c478bd9Sstevel@tonic-gate  *             extent[i+1].lbno == extent[i].lbno + extent[i].nbno
1027c478bd9Sstevel@tonic-gate  *        i.e. logical offsets form a contiguous sequence. Yet on disk,
1037c478bd9Sstevel@tonic-gate  *        two logically-adjacent offsets may be located in two
1047c478bd9Sstevel@tonic-gate  *        physically disjoint extents, so logical offsets need to be
1057c478bd9Sstevel@tonic-gate  *        translated into physical disk block addresses for access.
1067c478bd9Sstevel@tonic-gate  *      - Various fields in the on-disk log header structure refer
1077c478bd9Sstevel@tonic-gate  *        to such logical log offsets.
1087c478bd9Sstevel@tonic-gate  * 3. The actual logical logspace begins after the log header, at
1097c478bd9Sstevel@tonic-gate  *    the logical offset indicated by "od_bol_lof". Every 512 Bytes
1107c478bd9Sstevel@tonic-gate  *    (a "sector" in terms of ufs logging) is a sector trailer which
1117c478bd9Sstevel@tonic-gate  *    contains a sequence number, the sector ident.
1127c478bd9Sstevel@tonic-gate  * 4. Deltas are packed tight in the remaining space, i.e. a delta
1137c478bd9Sstevel@tonic-gate  *    may be part of more than one sector. Reads from the logspace
1147c478bd9Sstevel@tonic-gate  *    must be split at sector boundaries, since the trailer is never
1157c478bd9Sstevel@tonic-gate  *    part of a delta. Delta sizes vary.
1167c478bd9Sstevel@tonic-gate  * 5. The field "od_head_lof" points to the start of the dirty part
1177c478bd9Sstevel@tonic-gate  *    of the log, i.e. to the first delta header. Likewise, "od_head_ident"
1187c478bd9Sstevel@tonic-gate  *    is the sequence number where the valid part of the log starts; if
1197c478bd9Sstevel@tonic-gate  *    the sector pointed to by "od_head_lof" has a sector ident different
1207c478bd9Sstevel@tonic-gate  *    from "od_head_ident", the log is empty.
1217c478bd9Sstevel@tonic-gate  * 6. The valid part of the log extends for as many sectors as their ident
1227c478bd9Sstevel@tonic-gate  *    numbers form a contiguous sequence. When reaching the logical end of
1237c478bd9Sstevel@tonic-gate  *    the log, "od_bol_lof", logical offsets wrap around to "od_bol_lof",
1247c478bd9Sstevel@tonic-gate  *    i.e. the log forms a circular buffer.
1257c478bd9Sstevel@tonic-gate  *
1267c478bd9Sstevel@tonic-gate  * For the strategy how to handle accessing the log, item 4. is the
1277c478bd9Sstevel@tonic-gate  * most important one - its consequence is that the log can only be
1287c478bd9Sstevel@tonic-gate  * read in one direction - forward, starting at the head.
1297c478bd9Sstevel@tonic-gate  *
1307c478bd9Sstevel@tonic-gate  * The task of identifying whether a given metadata block is
1317c478bd9Sstevel@tonic-gate  * actually in the log therefore requires reading the entire
1327c478bd9Sstevel@tonic-gate  * log. Doing so is memory-efficient but kills speed if re-done
1337c478bd9Sstevel@tonic-gate  * at every metadata read (64MB log size vs. 512 byte metadata
1347c478bd9Sstevel@tonic-gate  * block size: 128 times as much I/O, possibly only to find out
1357c478bd9Sstevel@tonic-gate  * that this block was not in the log ...).
1367c478bd9Sstevel@tonic-gate  *
1377c478bd9Sstevel@tonic-gate  * First thought to speed this up is to let ufsboot roll the log.
1387c478bd9Sstevel@tonic-gate  * But this is not possible because:
1397c478bd9Sstevel@tonic-gate  * - ufsboot currently does not implement any write functionality,
1407c478bd9Sstevel@tonic-gate  *   the boot-time ufs implementation is read-only.
1417c478bd9Sstevel@tonic-gate  * - firmware write interfaces may or may not be available, in any
1427c478bd9Sstevel@tonic-gate  *   case, they're rarely used and untested for such a purpose.
1437c478bd9Sstevel@tonic-gate  * - that would duplicate a lot of code, since at the moment only
1447c478bd9Sstevel@tonic-gate  *   kernel ufs logging implements log rolling.
1457c478bd9Sstevel@tonic-gate  * - the boot environment cannot be considered high-performance;
1467c478bd9Sstevel@tonic-gate  *   rolling the log there would be slow.
1477c478bd9Sstevel@tonic-gate  * - boot device and root device could well be different, creating
1487c478bd9Sstevel@tonic-gate  *   inconsistencies e.g. with a mirrored root if the log is rolled.
1497c478bd9Sstevel@tonic-gate  *
1507c478bd9Sstevel@tonic-gate  * Therefore, caching the log structural information (boot-relevant
1517c478bd9Sstevel@tonic-gate  * deltas and their logical log offset) is required for fast access
1527c478bd9Sstevel@tonic-gate  * to the data in the log. This code builds a logmap for that purpose.
1537c478bd9Sstevel@tonic-gate  *
1547c478bd9Sstevel@tonic-gate  * As a simple optimization, if we find the log is empty, we will not
1557c478bd9Sstevel@tonic-gate  * use it - log reader support for ufsboot has no noticeable overhead
1567c478bd9Sstevel@tonic-gate  * for clean logs, or for root filesystems that aren't logging.
1577c478bd9Sstevel@tonic-gate  */
1587c478bd9Sstevel@tonic-gate 
1597c478bd9Sstevel@tonic-gate #define	LB_HASHSHIFT		13
1607c478bd9Sstevel@tonic-gate #define	LB_HASHSIZE		(1 << LB_HASHSHIFT)
1617c478bd9Sstevel@tonic-gate #define	LB_HASHFUNC(mof)	(((mof) >> LB_HASHSHIFT) & (LB_HASHSIZE - 1))
1627c478bd9Sstevel@tonic-gate 
1637c478bd9Sstevel@tonic-gate #define	LOGBUF_MAXSIZE	(8*1024*1024)
1647c478bd9Sstevel@tonic-gate #define	LOGBUF_MINSIZE	(256*1024)
1657c478bd9Sstevel@tonic-gate 
1667c478bd9Sstevel@tonic-gate #define	LOG_IS_EMPTY	0
1677c478bd9Sstevel@tonic-gate #define	LOG_IS_OK	1
1687c478bd9Sstevel@tonic-gate #define	LOG_IS_ERRORED	2
1697c478bd9Sstevel@tonic-gate 
1707c478bd9Sstevel@tonic-gate /*
1717c478bd9Sstevel@tonic-gate  * We build a hashed logmap of those while scanning the log.
1727c478bd9Sstevel@tonic-gate  * sizeof(lb_map_t) is 40 on 64bit, 32 on 32bit; the max sized
1737c478bd9Sstevel@tonic-gate  * resalloc'ed buffer can accomodate around ~500k of those;
1747c478bd9Sstevel@tonic-gate  * this is approximately the maximum amount of deltas we'll
1757c478bd9Sstevel@tonic-gate  * see if a 64MB ufs log is completely filled. We'll make no
1767c478bd9Sstevel@tonic-gate  * attempt to free and reallocate the resalloc'ed buffer if
1777c478bd9Sstevel@tonic-gate  * we overflow, as conservative sizing should make that an
1787c478bd9Sstevel@tonic-gate  * impossibility. A future enhancement may allocate memory
1797c478bd9Sstevel@tonic-gate  * here as needed - once the boot time memory allocator
1807c478bd9Sstevel@tonic-gate  * supports that.
1817c478bd9Sstevel@tonic-gate  */
1827c478bd9Sstevel@tonic-gate typedef struct lb_mapentry {
1837c478bd9Sstevel@tonic-gate 	struct lb_mapentry	*l_next;	/* hash chaining */
1847c478bd9Sstevel@tonic-gate 	struct lb_mapentry	*l_prev;	/* hash chaining */
1857c478bd9Sstevel@tonic-gate 	int64_t		l_mof;		/* disk addr this delta is against */
1867c478bd9Sstevel@tonic-gate 	int16_t		l_nb;		/* size of delta */
1877c478bd9Sstevel@tonic-gate 	int16_t		l_flags;
1887c478bd9Sstevel@tonic-gate 	int32_t		l_lof;		/* log offset for delta header */
1897c478bd9Sstevel@tonic-gate 	int32_t		l_tid;		/* transaction this delta is part of */
1907c478bd9Sstevel@tonic-gate 	delta_t		l_typ;		/* see <sys/fs/ufs_trans.h> */
1917c478bd9Sstevel@tonic-gate } lb_me_t;
1927c478bd9Sstevel@tonic-gate 
1937c478bd9Sstevel@tonic-gate #define	LB_ISCANCELLED	1
1947c478bd9Sstevel@tonic-gate 
1957c478bd9Sstevel@tonic-gate #define	inslist(lh, l)	if ((*(lh))) {				\
1967c478bd9Sstevel@tonic-gate 				(*(lh))->l_prev->l_next = (l);	\
1977c478bd9Sstevel@tonic-gate 				(l)->l_next = (*(lh));		\
1987c478bd9Sstevel@tonic-gate 				(l)->l_prev = (*(lh))->l_prev;	\
1997c478bd9Sstevel@tonic-gate 				(*(lh))->l_prev = (l);		\
2007c478bd9Sstevel@tonic-gate 			} else {				\
2017c478bd9Sstevel@tonic-gate 				(l)->l_next = (l);		\
2027c478bd9Sstevel@tonic-gate 				(l)->l_prev = (l);		\
2037c478bd9Sstevel@tonic-gate 				(*(lh)) = l;			\
2047c478bd9Sstevel@tonic-gate 			}
2057c478bd9Sstevel@tonic-gate 
2067c478bd9Sstevel@tonic-gate #define	remlist(lh, l)	\
2077c478bd9Sstevel@tonic-gate 	if ((l)->l_next == (l)) {			\
2087c478bd9Sstevel@tonic-gate 		if (*(lh) != (l) || (l)->l_prev != (l))	\
2097c478bd9Sstevel@tonic-gate 			dprintf("Logmap hash inconsistency.\n");	\
2107c478bd9Sstevel@tonic-gate 		*(lh) = (lb_me_t *)NULL;		\
2117c478bd9Sstevel@tonic-gate 	} else {					\
2127c478bd9Sstevel@tonic-gate 		if (*(lh) == (l))			\
2137c478bd9Sstevel@tonic-gate 			*(lh) = (l)->l_next;		\
2147c478bd9Sstevel@tonic-gate 		(l)->l_prev->l_next = (l)->l_next;	\
2157c478bd9Sstevel@tonic-gate 		(l)->l_next->l_prev = (l)->l_prev;	\
2167c478bd9Sstevel@tonic-gate 	}
2177c478bd9Sstevel@tonic-gate 
2187c478bd9Sstevel@tonic-gate #define	lufs_alloc_me()	\
2197c478bd9Sstevel@tonic-gate 	(lb_me_t *)lufs_alloc_from_logbuf(sizeof (lb_me_t))
2207c478bd9Sstevel@tonic-gate 
2217c478bd9Sstevel@tonic-gate extern int		boothowto;
2227c478bd9Sstevel@tonic-gate static int		ufs_is_lufs = 0;
2237c478bd9Sstevel@tonic-gate static fileid_t		*logfp = (fileid_t *)NULL;
2247c478bd9Sstevel@tonic-gate static extent_block_t	*eb = (extent_block_t *)NULL;
2257c478bd9Sstevel@tonic-gate static ml_odunit_t	odi;
2267c478bd9Sstevel@tonic-gate 
2277c478bd9Sstevel@tonic-gate static char		logbuffer_min[LOGBUF_MINSIZE];
2287c478bd9Sstevel@tonic-gate static caddr_t		logbuffer = (caddr_t)NULL;
2297c478bd9Sstevel@tonic-gate static caddr_t		elogbuffer = (caddr_t)NULL;
2307c478bd9Sstevel@tonic-gate static caddr_t		logbuf_curptr;
2317c478bd9Sstevel@tonic-gate static lb_me_t		**loghash = (lb_me_t **)NULL;
2327c478bd9Sstevel@tonic-gate static lb_me_t		*lfreelist;
2337c478bd9Sstevel@tonic-gate 
2347c478bd9Sstevel@tonic-gate static uint32_t		curtid;
2357c478bd9Sstevel@tonic-gate 
2367c478bd9Sstevel@tonic-gate 
2377c478bd9Sstevel@tonic-gate int	lufs_support = 1;
2387c478bd9Sstevel@tonic-gate 
2397c478bd9Sstevel@tonic-gate void	lufs_boot_init(fileid_t *);
2407c478bd9Sstevel@tonic-gate void	lufs_closeall(void);
2417c478bd9Sstevel@tonic-gate void	lufs_merge_deltas(fileid_t *);
2427c478bd9Sstevel@tonic-gate 
2437c478bd9Sstevel@tonic-gate static	int	lufs_logscan(void);
2447c478bd9Sstevel@tonic-gate 
2457c478bd9Sstevel@tonic-gate extern	int	diskread(fileid_t *filep);
2467c478bd9Sstevel@tonic-gate extern	caddr_t	resalloc(enum RESOURCES, size_t, caddr_t, int);
2477c478bd9Sstevel@tonic-gate 
248*4a634bb8Sga159272 #if defined(__sparcv9)
2497c478bd9Sstevel@tonic-gate #define	LOGBUF_BASEADDR	((caddr_t)(SYSBASE - LOGBUF_MAXSIZE))
2507c478bd9Sstevel@tonic-gate #endif
2517c478bd9Sstevel@tonic-gate 
2527c478bd9Sstevel@tonic-gate static int
lufs_alloc_logbuf(void)2537c478bd9Sstevel@tonic-gate lufs_alloc_logbuf(void)
2547c478bd9Sstevel@tonic-gate {
2557c478bd9Sstevel@tonic-gate 	/*
2567c478bd9Sstevel@tonic-gate 	 * Allocate memory for caching the log. Since the logbuffer can
2577c478bd9Sstevel@tonic-gate 	 * potentially exceed the boot scratch memory limit, we use resalloc
2587c478bd9Sstevel@tonic-gate 	 * directly, passing the allocation to the low-level boot-time
2597c478bd9Sstevel@tonic-gate 	 * backend allocator. The chosen VA range is the top end of
2607c478bd9Sstevel@tonic-gate 	 * the kernel's segmap segment, so we're not interfering
2617c478bd9Sstevel@tonic-gate 	 * with the kernel because segmap is created at a time when
2627c478bd9Sstevel@tonic-gate 	 * the 2nd-stage boot has already been unloaded and this VA
2637c478bd9Sstevel@tonic-gate 	 * range was given back.
2647c478bd9Sstevel@tonic-gate 	 *
2657c478bd9Sstevel@tonic-gate 	 * On sparc platforms, the kernel cannot recover the memory
2667c478bd9Sstevel@tonic-gate 	 * obtained from resalloc because the page structs are allocated
2677c478bd9Sstevel@tonic-gate 	 * before the call to BOP_QUIESCE. To avoid leaking this
2687c478bd9Sstevel@tonic-gate 	 * memory, the logbuffer is allocated from a small bss array
2697c478bd9Sstevel@tonic-gate 	 * that should hold the logmap except in the most extreme cases.
2707c478bd9Sstevel@tonic-gate 	 * If the bss array is too small, the logbuffer is extended
2717c478bd9Sstevel@tonic-gate 	 * from resalloc 1 page at a time.
2727c478bd9Sstevel@tonic-gate 	 */
2737c478bd9Sstevel@tonic-gate 
2747c478bd9Sstevel@tonic-gate 	logbuffer = logbuffer_min;
2757c478bd9Sstevel@tonic-gate 	elogbuffer = logbuffer+LOGBUF_MINSIZE;
2767c478bd9Sstevel@tonic-gate 	logbuf_curptr = logbuffer;
2777c478bd9Sstevel@tonic-gate 	lfreelist = (lb_me_t *)NULL;
2787c478bd9Sstevel@tonic-gate 
2797c478bd9Sstevel@tonic-gate 	if (logbuffer == (caddr_t)NULL)
2807c478bd9Sstevel@tonic-gate 		return (0);
2817c478bd9Sstevel@tonic-gate 
2827c478bd9Sstevel@tonic-gate 	dprintf("Buffer for boot loader logging support: 0x%p, size 0x%x\n",
2837c478bd9Sstevel@tonic-gate 	    logbuffer, elogbuffer-logbuffer);
2847c478bd9Sstevel@tonic-gate 
2857c478bd9Sstevel@tonic-gate 	return (1);
2867c478bd9Sstevel@tonic-gate }
2877c478bd9Sstevel@tonic-gate 
2887c478bd9Sstevel@tonic-gate static void
lufs_free_logbuf()2897c478bd9Sstevel@tonic-gate lufs_free_logbuf()
2907c478bd9Sstevel@tonic-gate {
2917c478bd9Sstevel@tonic-gate 	/*
2927c478bd9Sstevel@tonic-gate 	 * Solaris/x86 has no prom_free() routine at this time.
2937c478bd9Sstevel@tonic-gate 	 * Reclaiming the VA range below KERNEL_TEXT on Solaris/x86
2947c478bd9Sstevel@tonic-gate 	 * is done by the kernel startup itself, in hat_unload_prom()
2957c478bd9Sstevel@tonic-gate 	 * after the bootloader has been quiesced.
2967c478bd9Sstevel@tonic-gate 	 *
2977c478bd9Sstevel@tonic-gate 	 * Solaris on sparc has a prom_free() routine that will update
2987c478bd9Sstevel@tonic-gate 	 *   the memlist properties to reflect the freeing of the
2997c478bd9Sstevel@tonic-gate 	 *   logbuffer. However, the sparc kernel cannot recover
3007c478bd9Sstevel@tonic-gate 	 *   the memory freed after the call to BOP_QUIESCE as the
3017c478bd9Sstevel@tonic-gate 	 *   page struct have already been allocated. We call
3027c478bd9Sstevel@tonic-gate 	 *   prom_free anyway so that the kernel can reclaim this
3037c478bd9Sstevel@tonic-gate 	 *   memory in the future.
3047c478bd9Sstevel@tonic-gate 	 */
3057c478bd9Sstevel@tonic-gate 	if (logbuffer == LOGBUF_BASEADDR)
3067c478bd9Sstevel@tonic-gate 		prom_free(logbuffer, elogbuffer-logbuffer);
3077c478bd9Sstevel@tonic-gate 	logbuffer = (caddr_t)NULL;
3087c478bd9Sstevel@tonic-gate }
3097c478bd9Sstevel@tonic-gate 
3107c478bd9Sstevel@tonic-gate static caddr_t
lufs_alloc_from_logbuf(size_t sz)3117c478bd9Sstevel@tonic-gate lufs_alloc_from_logbuf(size_t sz)
3127c478bd9Sstevel@tonic-gate {
3137c478bd9Sstevel@tonic-gate 	caddr_t tmpaddr;
3147c478bd9Sstevel@tonic-gate 	lb_me_t	*l;
3157c478bd9Sstevel@tonic-gate 
3167c478bd9Sstevel@tonic-gate 	/*
3177c478bd9Sstevel@tonic-gate 	 * Satisfy lb_me_t allocations from the freelist
3187c478bd9Sstevel@tonic-gate 	 * first if possible.
3197c478bd9Sstevel@tonic-gate 	 */
3207c478bd9Sstevel@tonic-gate 	if ((sz == sizeof (lb_me_t)) && lfreelist) {
3217c478bd9Sstevel@tonic-gate 		l = lfreelist;
3227c478bd9Sstevel@tonic-gate 		lfreelist = lfreelist->l_next;
3237c478bd9Sstevel@tonic-gate 		return ((caddr_t)l);
3247c478bd9Sstevel@tonic-gate 	}
3257c478bd9Sstevel@tonic-gate 	if (elogbuffer < logbuf_curptr + sz) {
3267c478bd9Sstevel@tonic-gate 		caddr_t np;
3277c478bd9Sstevel@tonic-gate 		size_t nsz;
3287c478bd9Sstevel@tonic-gate 
3297c478bd9Sstevel@tonic-gate 		/*
3307c478bd9Sstevel@tonic-gate 		 * Out of space in current chunk - try to add another.
3317c478bd9Sstevel@tonic-gate 		 */
3327c478bd9Sstevel@tonic-gate 		if (logbuffer == logbuffer_min) {
3337c478bd9Sstevel@tonic-gate 			np = LOGBUF_BASEADDR;
3347c478bd9Sstevel@tonic-gate 		} else {
3357c478bd9Sstevel@tonic-gate 			np = elogbuffer;
3367c478bd9Sstevel@tonic-gate 		}
3377c478bd9Sstevel@tonic-gate 		nsz = roundup(sz, PAGESIZE);
3387c478bd9Sstevel@tonic-gate 		if (np + nsz > LOGBUF_BASEADDR + LOGBUF_MAXSIZE) {
3397c478bd9Sstevel@tonic-gate 			return ((caddr_t)NULL);
3407c478bd9Sstevel@tonic-gate 		}
3417c478bd9Sstevel@tonic-gate 
3427c478bd9Sstevel@tonic-gate 		np = resalloc(RES_CHILDVIRT, nsz, np, 0UL);
3437c478bd9Sstevel@tonic-gate 		if (np == (caddr_t)NULL) {
3447c478bd9Sstevel@tonic-gate 			return ((caddr_t)NULL);
3457c478bd9Sstevel@tonic-gate 		}
3467c478bd9Sstevel@tonic-gate 		if (logbuffer == logbuffer_min)
3477c478bd9Sstevel@tonic-gate 			logbuffer = LOGBUF_BASEADDR;
3487c478bd9Sstevel@tonic-gate 		logbuf_curptr = np;
3497c478bd9Sstevel@tonic-gate 		elogbuffer = logbuf_curptr + nsz;
3507c478bd9Sstevel@tonic-gate 	}
3517c478bd9Sstevel@tonic-gate 
3527c478bd9Sstevel@tonic-gate 	tmpaddr = logbuf_curptr;
3537c478bd9Sstevel@tonic-gate 	logbuf_curptr += sz;
3547c478bd9Sstevel@tonic-gate 	bzero(tmpaddr, sz);
3557c478bd9Sstevel@tonic-gate 	return (tmpaddr);
3567c478bd9Sstevel@tonic-gate }
3577c478bd9Sstevel@tonic-gate 
3587c478bd9Sstevel@tonic-gate static int32_t
lufs_read_log(int32_t addr,caddr_t va,int nb)3597c478bd9Sstevel@tonic-gate lufs_read_log(int32_t addr, caddr_t va, int nb)
3607c478bd9Sstevel@tonic-gate {
3617c478bd9Sstevel@tonic-gate 	int		i, fastpath = 0;
3627c478bd9Sstevel@tonic-gate 	daddr_t		pblk, lblk;
3637c478bd9Sstevel@tonic-gate 	sect_trailer_t	*st;
3647c478bd9Sstevel@tonic-gate 	uint32_t	ident;
3657c478bd9Sstevel@tonic-gate 
3667c478bd9Sstevel@tonic-gate 	/*
3677c478bd9Sstevel@tonic-gate 	 * Fast path for skipping the read if no target buffer
3687c478bd9Sstevel@tonic-gate 	 * is specified. Don't do this for the initial scan.
3697c478bd9Sstevel@tonic-gate 	 */
3707c478bd9Sstevel@tonic-gate 	if (ufs_is_lufs && (va == (caddr_t)NULL))
3717c478bd9Sstevel@tonic-gate 		fastpath = 1;
3727c478bd9Sstevel@tonic-gate 
3737c478bd9Sstevel@tonic-gate 	while (nb) {
3747c478bd9Sstevel@tonic-gate 		/* log wraparound check */
3757c478bd9Sstevel@tonic-gate 		if (addr == odi.od_eol_lof)
3767c478bd9Sstevel@tonic-gate 			addr = odi.od_bol_lof;
3777c478bd9Sstevel@tonic-gate 		if (fastpath)
3787c478bd9Sstevel@tonic-gate 			goto read_done;
3797c478bd9Sstevel@tonic-gate 
3807c478bd9Sstevel@tonic-gate 		/*
3817c478bd9Sstevel@tonic-gate 		 * Translate logically-contiguous log offsets into physical
3827c478bd9Sstevel@tonic-gate 		 * block numbers. For a log consisting of a single extent:
3837c478bd9Sstevel@tonic-gate 		 *	pbno = btodb(addr) - extents[0].lbno;
3847c478bd9Sstevel@tonic-gate 		 * Otherwise, search for the extent which contains addr.
3857c478bd9Sstevel@tonic-gate 		 */
3867c478bd9Sstevel@tonic-gate 		pblk = 0;
3877c478bd9Sstevel@tonic-gate 		lblk = btodb(addr);
3887c478bd9Sstevel@tonic-gate 		for (i = 0; i < eb->nextents; i++) {
3897c478bd9Sstevel@tonic-gate 			if (lblk >= eb->extents[i].lbno &&
3907c478bd9Sstevel@tonic-gate 			    lblk < eb->extents[i].lbno +
3917c478bd9Sstevel@tonic-gate 			    eb->extents[i].nbno) {
3927c478bd9Sstevel@tonic-gate 				pblk = lblk - eb->extents[i].lbno +
3937c478bd9Sstevel@tonic-gate 				    eb->extents[i].pbno;
3947c478bd9Sstevel@tonic-gate 				break;
3957c478bd9Sstevel@tonic-gate 			}
3967c478bd9Sstevel@tonic-gate 		}
3977c478bd9Sstevel@tonic-gate 
3987c478bd9Sstevel@tonic-gate 		if (pblk == 0) {
3997c478bd9Sstevel@tonic-gate 			/*
4007c478bd9Sstevel@tonic-gate 			 * block #0 can never be in a log extent since this
4017c478bd9Sstevel@tonic-gate 			 * block always contains the primary superblock copy.
4027c478bd9Sstevel@tonic-gate 			 */
4037c478bd9Sstevel@tonic-gate 			dprintf("No log extent found for log offset 0x%llx.\n",
4047c478bd9Sstevel@tonic-gate 			    addr);
4057c478bd9Sstevel@tonic-gate 			return (0);
4067c478bd9Sstevel@tonic-gate 		}
4077c478bd9Sstevel@tonic-gate 
4087c478bd9Sstevel@tonic-gate 		/*
4097c478bd9Sstevel@tonic-gate 		 * Check whether the block we want is cached from the last
4107c478bd9Sstevel@tonic-gate 		 * read. If not, read it in now.
4117c478bd9Sstevel@tonic-gate 		 */
4127c478bd9Sstevel@tonic-gate 		if (logfp->fi_blocknum != pblk) {
4137c478bd9Sstevel@tonic-gate 			logfp->fi_blocknum = pblk;
4147c478bd9Sstevel@tonic-gate 			logfp->fi_memp = logfp->fi_buf;
4157c478bd9Sstevel@tonic-gate 			logfp->fi_count = DEV_BSIZE;
4167c478bd9Sstevel@tonic-gate 			logfp->fi_offset = 0;
4177c478bd9Sstevel@tonic-gate 			if (diskread(logfp)) {
4187c478bd9Sstevel@tonic-gate 				dprintf("I/O error reading the ufs log" \
4197c478bd9Sstevel@tonic-gate 				    " at block 0x%x.\n",
4207c478bd9Sstevel@tonic-gate 				    logfp->fi_blocknum);
4217c478bd9Sstevel@tonic-gate 				return (0);
4227c478bd9Sstevel@tonic-gate 			}
4237c478bd9Sstevel@tonic-gate 			/*
4247c478bd9Sstevel@tonic-gate 			 * Log structure verification. The block which we just
4257c478bd9Sstevel@tonic-gate 			 * read has an ident number that must match its offset
4267c478bd9Sstevel@tonic-gate 			 * in blocks from the head of the log. Since the log
4277c478bd9Sstevel@tonic-gate 			 * can wrap around, we have to check for that to get the
4287c478bd9Sstevel@tonic-gate 			 * ident right. Out-of-sequence idents can happen after
4297c478bd9Sstevel@tonic-gate 			 * power failures, panics during a partial transaction,
4307c478bd9Sstevel@tonic-gate 			 * media errors, ... - in any case, they mark the end of
4317c478bd9Sstevel@tonic-gate 			 * the valid part of the log.
4327c478bd9Sstevel@tonic-gate 			 */
4337c478bd9Sstevel@tonic-gate 			st = (sect_trailer_t *)(logfp->fi_memp +
4347c478bd9Sstevel@tonic-gate 			    LDL_USABLE_BSIZE);
4357c478bd9Sstevel@tonic-gate 			/* od_head_ident is where the sequence starts */
4367c478bd9Sstevel@tonic-gate 			ident = odi.od_head_ident;
4377c478bd9Sstevel@tonic-gate 			if (lblk >= lbtodb(odi.od_head_lof)) {
4387c478bd9Sstevel@tonic-gate 				/* no wraparound */
4397c478bd9Sstevel@tonic-gate 				ident += (lblk - lbtodb(odi.od_head_lof));
4407c478bd9Sstevel@tonic-gate 			} else {
4417c478bd9Sstevel@tonic-gate 				/* log wrapped around the end */
4427c478bd9Sstevel@tonic-gate 				ident += (lbtodb(odi.od_eol_lof) -
4437c478bd9Sstevel@tonic-gate 				    lbtodb(odi.od_head_lof));
4447c478bd9Sstevel@tonic-gate 				ident += (lblk - lbtodb(odi.od_bol_lof));
4457c478bd9Sstevel@tonic-gate 			}
4467c478bd9Sstevel@tonic-gate 
4477c478bd9Sstevel@tonic-gate 			if (ident != st->st_ident)
4487c478bd9Sstevel@tonic-gate 				return (0);
4497c478bd9Sstevel@tonic-gate 		}
4507c478bd9Sstevel@tonic-gate read_done:
4517c478bd9Sstevel@tonic-gate 		/*
4527c478bd9Sstevel@tonic-gate 		 * Copy the delta contents to the destination buffer if
4537c478bd9Sstevel@tonic-gate 		 * one was specified. Otherwise, just skip the contents.
4547c478bd9Sstevel@tonic-gate 		 */
4557c478bd9Sstevel@tonic-gate 		i = MIN(NB_LEFT_IN_SECTOR(addr), nb);
4567c478bd9Sstevel@tonic-gate 		if (va != NULL) {
4577c478bd9Sstevel@tonic-gate 			bcopy(logfp->fi_buf + (addr - ldbtob(lbtodb(addr))),
4587c478bd9Sstevel@tonic-gate 			    va, i);
4597c478bd9Sstevel@tonic-gate 			va += i;
4607c478bd9Sstevel@tonic-gate 		}
4617c478bd9Sstevel@tonic-gate 		nb -= i;
4627c478bd9Sstevel@tonic-gate 		addr += i;
4637c478bd9Sstevel@tonic-gate 		/*
4647c478bd9Sstevel@tonic-gate 		 * Skip sector trailer if necessary.
4657c478bd9Sstevel@tonic-gate 		 */
4667c478bd9Sstevel@tonic-gate 		if (NB_LEFT_IN_SECTOR(addr) == 0)
4677c478bd9Sstevel@tonic-gate 			addr += sizeof (sect_trailer_t);
4687c478bd9Sstevel@tonic-gate 	}
4697c478bd9Sstevel@tonic-gate 	return (addr);
4707c478bd9Sstevel@tonic-gate }
4717c478bd9Sstevel@tonic-gate 
4727c478bd9Sstevel@tonic-gate void
lufs_boot_init(fileid_t * filep)4737c478bd9Sstevel@tonic-gate lufs_boot_init(fileid_t *filep)
4747c478bd9Sstevel@tonic-gate {
4757c478bd9Sstevel@tonic-gate 	struct fs *sb = (struct fs *)filep->fi_memp;
4767c478bd9Sstevel@tonic-gate 	int err = 0;
4777c478bd9Sstevel@tonic-gate 
4787c478bd9Sstevel@tonic-gate 	/*
4797c478bd9Sstevel@tonic-gate 	 * boot_ufs_mountroot() should have called us with a
4807c478bd9Sstevel@tonic-gate 	 * filep pointing to the superblock. Verify that this
4817c478bd9Sstevel@tonic-gate 	 * is so first.
4827c478bd9Sstevel@tonic-gate 	 * Then check whether this filesystem has a dirty log.
4837c478bd9Sstevel@tonic-gate 	 * Also return if lufs support was disabled on request.
4847c478bd9Sstevel@tonic-gate 	 */
4857c478bd9Sstevel@tonic-gate 	if (!lufs_support ||
4867c478bd9Sstevel@tonic-gate 	    sb != (struct fs *)&filep->fi_devp->un_fs.di_fs ||
4877c478bd9Sstevel@tonic-gate 	    sb->fs_clean != FSLOG || sb->fs_logbno == NULL) {
4887c478bd9Sstevel@tonic-gate 		return;
4897c478bd9Sstevel@tonic-gate 	}
4907c478bd9Sstevel@tonic-gate 
4917c478bd9Sstevel@tonic-gate 	if (boothowto & RB_VERBOSE)
4927c478bd9Sstevel@tonic-gate 		printf("The boot filesystem is logging.\n");
4937c478bd9Sstevel@tonic-gate 
4947c478bd9Sstevel@tonic-gate 	/*
4957c478bd9Sstevel@tonic-gate 	 * The filesystem is logging, there is a log area
4967c478bd9Sstevel@tonic-gate 	 * allocated for it. Check the log state and determine
4977c478bd9Sstevel@tonic-gate 	 * whether it'll be possible to use this log.
4987c478bd9Sstevel@tonic-gate 	 */
4997c478bd9Sstevel@tonic-gate 
5007c478bd9Sstevel@tonic-gate 	/*
5017c478bd9Sstevel@tonic-gate 	 * Allocate a private fileid_t for use when reading
5027c478bd9Sstevel@tonic-gate 	 * from the log.
5037c478bd9Sstevel@tonic-gate 	 */
5047c478bd9Sstevel@tonic-gate 	eb = (extent_block_t *)bkmem_zalloc(sb->fs_bsize);
5057c478bd9Sstevel@tonic-gate 	logfp = (fileid_t *)bkmem_zalloc(sizeof (fileid_t));
5067c478bd9Sstevel@tonic-gate 	logfp->fi_memp = logfp->fi_buf;
5077c478bd9Sstevel@tonic-gate 	logfp->fi_devp = filep->fi_devp;
5087c478bd9Sstevel@tonic-gate 
5097c478bd9Sstevel@tonic-gate 	/*
5107c478bd9Sstevel@tonic-gate 	 * Read the extent block and verify that what we
5117c478bd9Sstevel@tonic-gate 	 * find there are actually lufs extents.
5127c478bd9Sstevel@tonic-gate 	 * Make it simple: the extent block including all
5137c478bd9Sstevel@tonic-gate 	 * extents cannot be larger than a filesystem block.
5147c478bd9Sstevel@tonic-gate 	 * So read a whole filesystem block, to make sure
5157c478bd9Sstevel@tonic-gate 	 * we have read all extents in the same operation.
5167c478bd9Sstevel@tonic-gate 	 */
5177c478bd9Sstevel@tonic-gate 	logfp->fi_blocknum = sb->fs_logbno;
5187c478bd9Sstevel@tonic-gate 	logfp->fi_count = sb->fs_bsize;
5197c478bd9Sstevel@tonic-gate 	logfp->fi_memp = (caddr_t)eb;
5207c478bd9Sstevel@tonic-gate 	logfp->fi_offset = 0;
5217c478bd9Sstevel@tonic-gate 	if (diskread(logfp) || eb->type != LUFS_EXTENTS) {
5227c478bd9Sstevel@tonic-gate 		dprintf("Failed to read log extent block.\n");
5237c478bd9Sstevel@tonic-gate 		err = LOG_IS_ERRORED;
5247c478bd9Sstevel@tonic-gate 		goto out;
5257c478bd9Sstevel@tonic-gate 	}
5267c478bd9Sstevel@tonic-gate 
5277c478bd9Sstevel@tonic-gate 	/*
5287c478bd9Sstevel@tonic-gate 	 * Read the on disk log header. If that fails,
5297c478bd9Sstevel@tonic-gate 	 * try the backup copy on the adjacent block.
5307c478bd9Sstevel@tonic-gate 	 */
5317c478bd9Sstevel@tonic-gate 	logfp->fi_blocknum = eb->extents[0].pbno;
5327c478bd9Sstevel@tonic-gate 	logfp->fi_count = sizeof (ml_odunit_t);
5337c478bd9Sstevel@tonic-gate 	logfp->fi_memp = (caddr_t)&odi;
5347c478bd9Sstevel@tonic-gate 	logfp->fi_offset = 0;
5357c478bd9Sstevel@tonic-gate 	if (diskread(logfp)) {
5367c478bd9Sstevel@tonic-gate 		logfp->fi_blocknum = eb->extents[0].pbno + 1;
5377c478bd9Sstevel@tonic-gate 		logfp->fi_count = sizeof (ml_odunit_t);
5387c478bd9Sstevel@tonic-gate 		logfp->fi_memp = (caddr_t)&odi;
5397c478bd9Sstevel@tonic-gate 		logfp->fi_offset = 0;
5407c478bd9Sstevel@tonic-gate 		if (diskread(logfp)) {
5417c478bd9Sstevel@tonic-gate 			dprintf("Failed to read on-disk log header.\n");
5427c478bd9Sstevel@tonic-gate 			err = LOG_IS_ERRORED;
5437c478bd9Sstevel@tonic-gate 			goto out;
5447c478bd9Sstevel@tonic-gate 		}
5457c478bd9Sstevel@tonic-gate 	}
5467c478bd9Sstevel@tonic-gate 
5477c478bd9Sstevel@tonic-gate 	/*
5487c478bd9Sstevel@tonic-gate 	 * Verify that we understand this log, and
5497c478bd9Sstevel@tonic-gate 	 * that the log isn't bad or empty.
5507c478bd9Sstevel@tonic-gate 	 */
5517c478bd9Sstevel@tonic-gate 	if (odi.od_version != LUFS_VERSION_LATEST) {
5527c478bd9Sstevel@tonic-gate 		dprintf("On-disk log format v%d != supported format v%d.\n",
5537c478bd9Sstevel@tonic-gate 		    odi.od_version, LUFS_VERSION_LATEST);
5547c478bd9Sstevel@tonic-gate 		err = LOG_IS_ERRORED;
5557c478bd9Sstevel@tonic-gate 	} else if (odi.od_badlog) {
5567c478bd9Sstevel@tonic-gate 		dprintf("On-disk log is marked bad.\n");
5577c478bd9Sstevel@tonic-gate 		err = LOG_IS_ERRORED;
5587c478bd9Sstevel@tonic-gate 	} else if (odi.od_chksum != odi.od_head_ident + odi.od_tail_ident) {
5597c478bd9Sstevel@tonic-gate 		dprintf("On-disk log checksum %d != ident sum %d.\n",
5607c478bd9Sstevel@tonic-gate 		    odi.od_chksum, odi.od_head_ident + odi.od_tail_ident);
5617c478bd9Sstevel@tonic-gate 		err = LOG_IS_ERRORED;
5627c478bd9Sstevel@tonic-gate 	} else {
5637c478bd9Sstevel@tonic-gate 		/*
5647c478bd9Sstevel@tonic-gate 		 * All consistency checks ok. Scan the log, build the
5657c478bd9Sstevel@tonic-gate 		 * log hash. If this succeeds we'll be using the log
5667c478bd9Sstevel@tonic-gate 		 * when reading from this filesystem.
5677c478bd9Sstevel@tonic-gate 		 */
5687c478bd9Sstevel@tonic-gate 		err = lufs_logscan();
5697c478bd9Sstevel@tonic-gate 	}
5707c478bd9Sstevel@tonic-gate out:
5717c478bd9Sstevel@tonic-gate 	ufs_is_lufs = 1;
5727c478bd9Sstevel@tonic-gate 	switch (err) {
5737c478bd9Sstevel@tonic-gate 	case LOG_IS_EMPTY:
5747c478bd9Sstevel@tonic-gate 		if (boothowto & RB_VERBOSE)
5757c478bd9Sstevel@tonic-gate 			printf("The ufs log is empty and will not be used.\n");
5767c478bd9Sstevel@tonic-gate 		lufs_closeall();
5777c478bd9Sstevel@tonic-gate 		break;
5787c478bd9Sstevel@tonic-gate 	case LOG_IS_OK:
5797c478bd9Sstevel@tonic-gate 		if (boothowto & RB_VERBOSE)
5807c478bd9Sstevel@tonic-gate 			printf("Using the ufs log.\n");
5817c478bd9Sstevel@tonic-gate 		break;
5827c478bd9Sstevel@tonic-gate 	case LOG_IS_ERRORED:
5837c478bd9Sstevel@tonic-gate 		if (boothowto & RB_VERBOSE)
5847c478bd9Sstevel@tonic-gate 			printf("Couldn't build log hash. Can't use ufs log.\n");
5857c478bd9Sstevel@tonic-gate 		lufs_closeall();
5867c478bd9Sstevel@tonic-gate 		break;
5877c478bd9Sstevel@tonic-gate 	default:
5887c478bd9Sstevel@tonic-gate 		dprintf("Invalid error %d while scanning the ufs log.\n", err);
5897c478bd9Sstevel@tonic-gate 		break;
5907c478bd9Sstevel@tonic-gate 	}
5917c478bd9Sstevel@tonic-gate }
5927c478bd9Sstevel@tonic-gate 
5937c478bd9Sstevel@tonic-gate static int
lufs_logscan_read(int32_t * addr,struct delta * d)5947c478bd9Sstevel@tonic-gate lufs_logscan_read(int32_t *addr, struct delta *d)
5957c478bd9Sstevel@tonic-gate {
5967c478bd9Sstevel@tonic-gate 	*addr = lufs_read_log(*addr, (caddr_t)d, sizeof (struct delta));
5977c478bd9Sstevel@tonic-gate 
5987c478bd9Sstevel@tonic-gate 	if (*addr == 0 ||
5997c478bd9Sstevel@tonic-gate 	    d->d_typ < DT_NONE || d->d_typ > DT_MAX ||
6007c478bd9Sstevel@tonic-gate 	    d->d_nb >= odi.od_logsize)
6017c478bd9Sstevel@tonic-gate 		return (0);
6027c478bd9Sstevel@tonic-gate 
6037c478bd9Sstevel@tonic-gate 	return (1);
6047c478bd9Sstevel@tonic-gate }
6057c478bd9Sstevel@tonic-gate 
6067c478bd9Sstevel@tonic-gate static int
lufs_logscan_skip(int32_t * addr,struct delta * d)6077c478bd9Sstevel@tonic-gate lufs_logscan_skip(int32_t *addr, struct delta *d)
6087c478bd9Sstevel@tonic-gate {
6097c478bd9Sstevel@tonic-gate 	switch (d->d_typ) {
6107c478bd9Sstevel@tonic-gate 	case DT_COMMIT:
6117c478bd9Sstevel@tonic-gate 		/*
6127c478bd9Sstevel@tonic-gate 		 * A DT_COMMIT delta has no size as such, but will
6137c478bd9Sstevel@tonic-gate 		 * always "fill up" the sector that contains it.
6147c478bd9Sstevel@tonic-gate 		 * The next delta header is found at the beginning
6157c478bd9Sstevel@tonic-gate 		 * of the next 512-Bytes sector, adjust "addr" to
6167c478bd9Sstevel@tonic-gate 		 * reflect that.
6177c478bd9Sstevel@tonic-gate 		 */
6187c478bd9Sstevel@tonic-gate 		*addr += ((*addr & (DEV_BSIZE - 1))) ?
6197c478bd9Sstevel@tonic-gate 		    NB_LEFT_IN_SECTOR(*addr) +
6207c478bd9Sstevel@tonic-gate 		    sizeof (sect_trailer_t) : 0;
6217c478bd9Sstevel@tonic-gate 		return (1);
6227c478bd9Sstevel@tonic-gate 	case DT_CANCEL:
6237c478bd9Sstevel@tonic-gate 	case DT_ABZERO:
6247c478bd9Sstevel@tonic-gate 		/*
6257c478bd9Sstevel@tonic-gate 		 * These types of deltas occupy no space in the log
6267c478bd9Sstevel@tonic-gate 		 */
6277c478bd9Sstevel@tonic-gate 		return (1);
6287c478bd9Sstevel@tonic-gate 	default:
6297c478bd9Sstevel@tonic-gate 		/*
6307c478bd9Sstevel@tonic-gate 		 * Skip over the delta contents.
6317c478bd9Sstevel@tonic-gate 		 */
6327c478bd9Sstevel@tonic-gate 		*addr = lufs_read_log(*addr, NULL, d->d_nb);
6337c478bd9Sstevel@tonic-gate 	}
6347c478bd9Sstevel@tonic-gate 
6357c478bd9Sstevel@tonic-gate 	return (*addr != NULL);
6367c478bd9Sstevel@tonic-gate }
6377c478bd9Sstevel@tonic-gate 
6387c478bd9Sstevel@tonic-gate static void
lufs_logscan_freecancel(void)6397c478bd9Sstevel@tonic-gate lufs_logscan_freecancel(void)
6407c478bd9Sstevel@tonic-gate {
6417c478bd9Sstevel@tonic-gate 	lb_me_t		**lh, *l, *lnext;
6427c478bd9Sstevel@tonic-gate 	int		i;
6437c478bd9Sstevel@tonic-gate 
6447c478bd9Sstevel@tonic-gate 	/*
6457c478bd9Sstevel@tonic-gate 	 * Walk the entire log hash and put cancelled entries
6467c478bd9Sstevel@tonic-gate 	 * onto the freelist. Corner cases:
6477c478bd9Sstevel@tonic-gate 	 * a) empty hash chain (*lh == NULL)
6487c478bd9Sstevel@tonic-gate 	 * b) only one entry in chain, and that is cancelled.
6497c478bd9Sstevel@tonic-gate 	 *    If for every cancelled delta another one would've
6507c478bd9Sstevel@tonic-gate 	 *    been added, this situation couldn't occur, but a
6517c478bd9Sstevel@tonic-gate 	 *    DT_CANCEL delta can lead to this as it is never
6527c478bd9Sstevel@tonic-gate 	 *    added.
6537c478bd9Sstevel@tonic-gate 	 */
6547c478bd9Sstevel@tonic-gate 	for (i = 0; i < LB_HASHSIZE; i++) {
6557c478bd9Sstevel@tonic-gate 		lh = &loghash[i];
6567c478bd9Sstevel@tonic-gate 		l = *lh;
6577c478bd9Sstevel@tonic-gate 		do {
6587c478bd9Sstevel@tonic-gate 			if (*lh == (lb_me_t *)NULL)
6597c478bd9Sstevel@tonic-gate 				break;
6607c478bd9Sstevel@tonic-gate 			lnext = l->l_next;
6617c478bd9Sstevel@tonic-gate 			if (l->l_flags & LB_ISCANCELLED) {
6627c478bd9Sstevel@tonic-gate 				remlist(lh, l);
6637c478bd9Sstevel@tonic-gate 				bzero((caddr_t)l, sizeof (lb_me_t));
6647c478bd9Sstevel@tonic-gate 				l->l_next = lfreelist;
6657c478bd9Sstevel@tonic-gate 				lfreelist = l;
6667c478bd9Sstevel@tonic-gate 				/*
6677c478bd9Sstevel@tonic-gate 				 * Just removed the hash head. In order not
6687c478bd9Sstevel@tonic-gate 				 * to terminate the while loop, respin chain
6697c478bd9Sstevel@tonic-gate 				 * walk for this hash chain.
6707c478bd9Sstevel@tonic-gate 				 */
6717c478bd9Sstevel@tonic-gate 				if (lnext == *lh) {
6727c478bd9Sstevel@tonic-gate 					i--;
6737c478bd9Sstevel@tonic-gate 					break;
6747c478bd9Sstevel@tonic-gate 				}
6757c478bd9Sstevel@tonic-gate 			}
6767c478bd9Sstevel@tonic-gate 			l = lnext;
6777c478bd9Sstevel@tonic-gate 		} while (l != *lh);
6787c478bd9Sstevel@tonic-gate 	}
6797c478bd9Sstevel@tonic-gate }
6807c478bd9Sstevel@tonic-gate 
6817c478bd9Sstevel@tonic-gate static int
lufs_logscan_addmap(int32_t * addr,struct delta * d)6827c478bd9Sstevel@tonic-gate lufs_logscan_addmap(int32_t *addr, struct delta *d)
6837c478bd9Sstevel@tonic-gate {
6847c478bd9Sstevel@tonic-gate 	lb_me_t		**lh, *l;
6857c478bd9Sstevel@tonic-gate 
6867c478bd9Sstevel@tonic-gate 	switch (d->d_typ) {
6877c478bd9Sstevel@tonic-gate 	case DT_COMMIT:
6887c478bd9Sstevel@tonic-gate 		/*
6897c478bd9Sstevel@tonic-gate 		 * Handling DT_COMMIT deltas is special. We need to:
6907c478bd9Sstevel@tonic-gate 		 * 1. increase the transaction ID
6917c478bd9Sstevel@tonic-gate 		 * 2. remove cancelled entries.
6927c478bd9Sstevel@tonic-gate 		 */
6937c478bd9Sstevel@tonic-gate 		lufs_logscan_freecancel();
6947c478bd9Sstevel@tonic-gate 		curtid++;
6957c478bd9Sstevel@tonic-gate 		break;
6967c478bd9Sstevel@tonic-gate 	case DT_INODE:
6977c478bd9Sstevel@tonic-gate 		/*
6987c478bd9Sstevel@tonic-gate 		 * Deltas against parts of on-disk inodes are
6997c478bd9Sstevel@tonic-gate 		 * assumed to be timestamps. Ignore those.
7007c478bd9Sstevel@tonic-gate 		 */
7017c478bd9Sstevel@tonic-gate 		if (d->d_nb != sizeof (struct dinode))
7027c478bd9Sstevel@tonic-gate 			break;
7037c478bd9Sstevel@tonic-gate 		/* FALLTHROUGH */
7047c478bd9Sstevel@tonic-gate 	case DT_CANCEL:
7057c478bd9Sstevel@tonic-gate 	case DT_ABZERO:
7067c478bd9Sstevel@tonic-gate 	case DT_AB:
7077c478bd9Sstevel@tonic-gate 	case DT_DIR:
7087c478bd9Sstevel@tonic-gate 	case DT_FBI:
7097c478bd9Sstevel@tonic-gate 		/*
7107c478bd9Sstevel@tonic-gate 		 * These types of deltas contain and/or modify structural
7117c478bd9Sstevel@tonic-gate 		 * information that is needed for booting the system:
7127c478bd9Sstevel@tonic-gate 		 * - where to find a file (DT_DIR, DT_FBI)
7137c478bd9Sstevel@tonic-gate 		 * - the file itself (DT_INODE)
7147c478bd9Sstevel@tonic-gate 		 * - data blocks associated with a file (DT_AB, DT_ABZERO)
7157c478bd9Sstevel@tonic-gate 		 *
7167c478bd9Sstevel@tonic-gate 		 * Building the hash chains becomes complicated because there
7177c478bd9Sstevel@tonic-gate 		 * may exist an older (== previously added) entry that overlaps
7187c478bd9Sstevel@tonic-gate 		 * with the one we want to add.
7197c478bd9Sstevel@tonic-gate 		 * Four cases must be distinguished:
7207c478bd9Sstevel@tonic-gate 		 * 1. The new delta is an exact match for an existing one,
7217c478bd9Sstevel@tonic-gate 		 *    or is a superset of an existing one, and both
7227c478bd9Sstevel@tonic-gate 		 *    belong to the same transaction.
7237c478bd9Sstevel@tonic-gate 		 *    The new delta completely supersedes the old one, so
7247c478bd9Sstevel@tonic-gate 		 *    remove that and reuse the structure for the new.
7257c478bd9Sstevel@tonic-gate 		 *    Then add the new delta to the head of the hashchain.
7267c478bd9Sstevel@tonic-gate 		 * 2. The new delta is an exact match for an existing one,
7277c478bd9Sstevel@tonic-gate 		 *    or is a superset of an existing one, but the two
7287c478bd9Sstevel@tonic-gate 		 *    belong to different transactions (i.e. the old one is
7297c478bd9Sstevel@tonic-gate 		 *    committed).
7307c478bd9Sstevel@tonic-gate 		 *    The existing one is marked to be cancelled when the
7317c478bd9Sstevel@tonic-gate 		 *    next DT_COMMIT record is found, and the hash chain
7327c478bd9Sstevel@tonic-gate 		 *    walk is continued as there may be more existing entries
7337c478bd9Sstevel@tonic-gate 		 *    found which overlap the new delta (happens if that is
7347c478bd9Sstevel@tonic-gate 		 *    a superset of those in the log).
7357c478bd9Sstevel@tonic-gate 		 *    Once no more overlaps are found, goto 4.
7367c478bd9Sstevel@tonic-gate 		 * 3. An existing entry completely covers the new one.
7377c478bd9Sstevel@tonic-gate 		 *    The new delta is then added directly before this
7387c478bd9Sstevel@tonic-gate 		 *    existing one.
7397c478bd9Sstevel@tonic-gate 		 * 4. No (more) overlaps with existing entries are found.
7407c478bd9Sstevel@tonic-gate 		 *    Unless this is a DT_CANCEL delta, whose only purpose
7417c478bd9Sstevel@tonic-gate 		 *    is already handled by marking overlapping entries for
7427c478bd9Sstevel@tonic-gate 		 *    cancellation, add the new delta at the hash chain head.
7437c478bd9Sstevel@tonic-gate 		 *
7447c478bd9Sstevel@tonic-gate 		 * This strategy makes sure that the hash chains are properly
7457c478bd9Sstevel@tonic-gate 		 * ordered. lufs_merge_deltas() walks the hash chain backward,
7467c478bd9Sstevel@tonic-gate 		 * which then ensures that delta merging is done in the same
7477c478bd9Sstevel@tonic-gate 		 * order as those deltas occur in the log - remember, the
7487c478bd9Sstevel@tonic-gate 		 * log can only be read in one direction.
7497c478bd9Sstevel@tonic-gate 		 *
7507c478bd9Sstevel@tonic-gate 		 */
7517c478bd9Sstevel@tonic-gate 		lh = &loghash[LB_HASHFUNC(d->d_mof)];
7527c478bd9Sstevel@tonic-gate 		l = *lh;
7537c478bd9Sstevel@tonic-gate 		do {
7547c478bd9Sstevel@tonic-gate 			if (l == (lb_me_t *)NULL)
7557c478bd9Sstevel@tonic-gate 				break;
7567c478bd9Sstevel@tonic-gate 			/*
7577c478bd9Sstevel@tonic-gate 			 * This covers the first two cases above.
7587c478bd9Sstevel@tonic-gate 			 * If this is a perfect match from the same transaction,
7597c478bd9Sstevel@tonic-gate 			 * and it isn't already cancelled, we simply replace it
7607c478bd9Sstevel@tonic-gate 			 * with its newer incarnation.
7617c478bd9Sstevel@tonic-gate 			 * Otherwise, mark it for cancellation. Handling of
7627c478bd9Sstevel@tonic-gate 			 * DT_COMMIT is going to remove it, then.
7637c478bd9Sstevel@tonic-gate 			 */
7647c478bd9Sstevel@tonic-gate 			if (WITHIN(l->l_mof, l->l_nb, d->d_mof, d->d_nb)) {
7657c478bd9Sstevel@tonic-gate 				if (!(l->l_flags & LB_ISCANCELLED)) {
7667c478bd9Sstevel@tonic-gate 					if (l->l_tid == curtid &&
7677c478bd9Sstevel@tonic-gate 					    d->d_typ != DT_CANCEL) {
7687c478bd9Sstevel@tonic-gate 						remlist(lh, l);
7697c478bd9Sstevel@tonic-gate 						l->l_mof = d->d_mof;
7707c478bd9Sstevel@tonic-gate 						l->l_lof = *addr;
7717c478bd9Sstevel@tonic-gate 						l->l_nb = d->d_nb;
7727c478bd9Sstevel@tonic-gate 						l->l_typ = d->d_typ;
7737c478bd9Sstevel@tonic-gate 						l->l_flags = 0;
7747c478bd9Sstevel@tonic-gate 						l->l_tid = curtid;
7757c478bd9Sstevel@tonic-gate 						inslist(lh, l);
7767c478bd9Sstevel@tonic-gate 						return (1);
7777c478bd9Sstevel@tonic-gate 					} else {
7787c478bd9Sstevel@tonic-gate 						/*
7797c478bd9Sstevel@tonic-gate 						 * 2nd case - cancel only.
7807c478bd9Sstevel@tonic-gate 						 */
7817c478bd9Sstevel@tonic-gate 						l->l_flags |= LB_ISCANCELLED;
7827c478bd9Sstevel@tonic-gate 					}
7837c478bd9Sstevel@tonic-gate 				}
7847c478bd9Sstevel@tonic-gate 			} else if (WITHIN(d->d_mof, d->d_nb,
7857c478bd9Sstevel@tonic-gate 			    l->l_mof, l->l_nb)) {
7867c478bd9Sstevel@tonic-gate 				/*
7877c478bd9Sstevel@tonic-gate 				 * This is the third case above.
7887c478bd9Sstevel@tonic-gate 				 * With deltas DT_ABZERO/DT_AB and DT_FBI/DT_DIR
7897c478bd9Sstevel@tonic-gate 				 * this may happen - an existing previous delta
7907c478bd9Sstevel@tonic-gate 				 * is larger than the current one we're planning
7917c478bd9Sstevel@tonic-gate 				 * to add - DT_ABZERO deltas are supersets of
7927c478bd9Sstevel@tonic-gate 				 * DT_AB deltas, and likewise DT_FBI/DT_DIR.
7937c478bd9Sstevel@tonic-gate 				 * In order to do merging correctly, such deltas
7947c478bd9Sstevel@tonic-gate 				 * put up a barrier for new ones that overlap,
7957c478bd9Sstevel@tonic-gate 				 * and we have to add the new delta immediately
7967c478bd9Sstevel@tonic-gate 				 * before (!) the existing one.
7977c478bd9Sstevel@tonic-gate 				 */
7987c478bd9Sstevel@tonic-gate 				lb_me_t *newl;
7997c478bd9Sstevel@tonic-gate 				newl = lufs_alloc_me();
8007c478bd9Sstevel@tonic-gate 				if (newl == (lb_me_t *)NULL) {
8017c478bd9Sstevel@tonic-gate 					/*
8027c478bd9Sstevel@tonic-gate 					 * No memory. Throw away everything
8037c478bd9Sstevel@tonic-gate 					 * and try booting without logging
8047c478bd9Sstevel@tonic-gate 					 * support.
8057c478bd9Sstevel@tonic-gate 					 */
8067c478bd9Sstevel@tonic-gate 					curtid = 0;
8077c478bd9Sstevel@tonic-gate 					return (0);
8087c478bd9Sstevel@tonic-gate 				}
8097c478bd9Sstevel@tonic-gate 				newl->l_mof = d->d_mof;
8107c478bd9Sstevel@tonic-gate 				newl->l_lof = *addr;	/* "payload" address */
8117c478bd9Sstevel@tonic-gate 				newl->l_nb = d->d_nb;
8127c478bd9Sstevel@tonic-gate 				newl->l_typ = d->d_typ;
8137c478bd9Sstevel@tonic-gate 				newl->l_tid = curtid;
8147c478bd9Sstevel@tonic-gate 				newl->l_prev = l->l_prev;
8157c478bd9Sstevel@tonic-gate 				newl->l_next = l;
8167c478bd9Sstevel@tonic-gate 				l->l_prev->l_next = newl;
8177c478bd9Sstevel@tonic-gate 				l->l_prev = newl;
8187c478bd9Sstevel@tonic-gate 				if (*lh == l)
8197c478bd9Sstevel@tonic-gate 					*lh = newl;
8207c478bd9Sstevel@tonic-gate 				return (1);
8217c478bd9Sstevel@tonic-gate 			}
8227c478bd9Sstevel@tonic-gate 			l = l->l_next;
8237c478bd9Sstevel@tonic-gate 		} while (l != *lh);
8247c478bd9Sstevel@tonic-gate 
8257c478bd9Sstevel@tonic-gate 		/*
8267c478bd9Sstevel@tonic-gate 		 * This is case 4., add a new delta at the head of the chain.
8277c478bd9Sstevel@tonic-gate 		 *
8287c478bd9Sstevel@tonic-gate 		 * If the new delta is a DT_CANCEL entry, we handled it by
8297c478bd9Sstevel@tonic-gate 		 * marking everything it covered for cancellation. We can
8307c478bd9Sstevel@tonic-gate 		 * get by without actually adding the delta itself to the
8317c478bd9Sstevel@tonic-gate 		 * hash, as it'd need to be removed by the commit code anyway.
8327c478bd9Sstevel@tonic-gate 		 */
8337c478bd9Sstevel@tonic-gate 		if (d->d_typ == DT_CANCEL)
8347c478bd9Sstevel@tonic-gate 			break;
8357c478bd9Sstevel@tonic-gate 
8367c478bd9Sstevel@tonic-gate 		l = lufs_alloc_me();
8377c478bd9Sstevel@tonic-gate 		if (l == (lb_me_t *)NULL) {
8387c478bd9Sstevel@tonic-gate 			/*
8397c478bd9Sstevel@tonic-gate 			 * No memory. Throw away everything
8407c478bd9Sstevel@tonic-gate 			 * and try booting without logging
8417c478bd9Sstevel@tonic-gate 			 * support.
8427c478bd9Sstevel@tonic-gate 			 */
8437c478bd9Sstevel@tonic-gate 			curtid = 0;
8447c478bd9Sstevel@tonic-gate 			return (0);
8457c478bd9Sstevel@tonic-gate 		}
8467c478bd9Sstevel@tonic-gate 		l->l_mof = d->d_mof;
8477c478bd9Sstevel@tonic-gate 		l->l_lof = *addr;	/* this is the "payload" address */
8487c478bd9Sstevel@tonic-gate 		l->l_nb = d->d_nb;
8497c478bd9Sstevel@tonic-gate 		l->l_typ = d->d_typ;
8507c478bd9Sstevel@tonic-gate 		l->l_tid = curtid;
8517c478bd9Sstevel@tonic-gate 		inslist(lh, l);
8527c478bd9Sstevel@tonic-gate 		break;
8537c478bd9Sstevel@tonic-gate 	default:
8547c478bd9Sstevel@tonic-gate 		break;
8557c478bd9Sstevel@tonic-gate 	}
8567c478bd9Sstevel@tonic-gate 	return (1);
8577c478bd9Sstevel@tonic-gate }
8587c478bd9Sstevel@tonic-gate 
8597c478bd9Sstevel@tonic-gate static int
lufs_logscan_prescan(void)8607c478bd9Sstevel@tonic-gate lufs_logscan_prescan(void)
8617c478bd9Sstevel@tonic-gate {
8627c478bd9Sstevel@tonic-gate 	/*
8637c478bd9Sstevel@tonic-gate 	 * Simulate a full log by setting the tail to be one sector
8647c478bd9Sstevel@tonic-gate 	 * behind the head. This will make the logscan read all
8657c478bd9Sstevel@tonic-gate 	 * of the log until an out-of-sequence sector ident is
8667c478bd9Sstevel@tonic-gate 	 * found.
8677c478bd9Sstevel@tonic-gate 	 */
8687c478bd9Sstevel@tonic-gate 	odi.od_tail_lof = dbtob(btodb(odi.od_head_lof)) - DEV_BSIZE;
8697c478bd9Sstevel@tonic-gate 	if (odi.od_tail_lof < odi.od_bol_lof)
8707c478bd9Sstevel@tonic-gate 		odi.od_tail_lof = odi.od_eol_lof - DEV_BSIZE;
8717c478bd9Sstevel@tonic-gate 	if (odi.od_tail_lof >= odi.od_eol_lof)
8727c478bd9Sstevel@tonic-gate 		odi.od_tail_lof = odi.od_bol_lof;
8737c478bd9Sstevel@tonic-gate 
8747c478bd9Sstevel@tonic-gate 	/*
8757c478bd9Sstevel@tonic-gate 	 * While sector trailers maintain TID values, od_head_tid
8767c478bd9Sstevel@tonic-gate 	 * is not being updated by the kernel ufs logging support
8777c478bd9Sstevel@tonic-gate 	 * at this time. We therefore count transactions ourselves
8787c478bd9Sstevel@tonic-gate 	 * starting at zero - as does the kernel ufs logscan code.
8797c478bd9Sstevel@tonic-gate 	 */
8807c478bd9Sstevel@tonic-gate 	curtid = 0;
8817c478bd9Sstevel@tonic-gate 
8827c478bd9Sstevel@tonic-gate 	if (!lufs_alloc_logbuf()) {
8837c478bd9Sstevel@tonic-gate 		dprintf("Failed to allocate log buffer.\n");
8847c478bd9Sstevel@tonic-gate 		return (0);
8857c478bd9Sstevel@tonic-gate 	}
8867c478bd9Sstevel@tonic-gate 
8877c478bd9Sstevel@tonic-gate 	loghash = (lb_me_t **)lufs_alloc_from_logbuf(
8887c478bd9Sstevel@tonic-gate 	    LB_HASHSIZE * sizeof (lb_me_t *));
8897c478bd9Sstevel@tonic-gate 	if (loghash == (lb_me_t **)NULL) {
8907c478bd9Sstevel@tonic-gate 		dprintf("Can't allocate loghash[] array.");
8917c478bd9Sstevel@tonic-gate 		return (0);
8927c478bd9Sstevel@tonic-gate 	}
8937c478bd9Sstevel@tonic-gate 	return (1);
8947c478bd9Sstevel@tonic-gate }
8957c478bd9Sstevel@tonic-gate 
8967c478bd9Sstevel@tonic-gate /*
8977c478bd9Sstevel@tonic-gate  * This function must remove all uncommitted entries (l->l_tid == curtid)
8987c478bd9Sstevel@tonic-gate  * from the log hash. Doing this, we implicitly delete pending cancellations
8997c478bd9Sstevel@tonic-gate  * as well.
9007c478bd9Sstevel@tonic-gate  * It uses the same hash walk algorithm as lufs_logscan_freecancel(). Only
9017c478bd9Sstevel@tonic-gate  * the check for entries that need to be removed is different.
9027c478bd9Sstevel@tonic-gate  */
9037c478bd9Sstevel@tonic-gate static void
lufs_logscan_postscan(void)9047c478bd9Sstevel@tonic-gate lufs_logscan_postscan(void)
9057c478bd9Sstevel@tonic-gate {
9067c478bd9Sstevel@tonic-gate 	lb_me_t	**lh, *l, *lnext;
9077c478bd9Sstevel@tonic-gate 	int	i;
9087c478bd9Sstevel@tonic-gate 
9097c478bd9Sstevel@tonic-gate 	for (i = 0; i < LB_HASHSIZE; i++) {
9107c478bd9Sstevel@tonic-gate 		lh = &loghash[i];
9117c478bd9Sstevel@tonic-gate 		l = *lh;
9127c478bd9Sstevel@tonic-gate 		do {
9137c478bd9Sstevel@tonic-gate 			if (l == (lb_me_t *)NULL)
9147c478bd9Sstevel@tonic-gate 				break;
9157c478bd9Sstevel@tonic-gate 			lnext = l->l_next;
9167c478bd9Sstevel@tonic-gate 			if (l->l_tid == curtid) {
9177c478bd9Sstevel@tonic-gate 				remlist(lh, l);
9187c478bd9Sstevel@tonic-gate 				bzero((caddr_t)l, sizeof (lb_me_t));
9197c478bd9Sstevel@tonic-gate 				l->l_next = lfreelist;
9207c478bd9Sstevel@tonic-gate 				lfreelist = l;
9217c478bd9Sstevel@tonic-gate 				if (*lh == (lb_me_t *)NULL)
9227c478bd9Sstevel@tonic-gate 					break;
9237c478bd9Sstevel@tonic-gate 				/*
9247c478bd9Sstevel@tonic-gate 				 * Just removed the hash head. In order not
9257c478bd9Sstevel@tonic-gate 				 * to terminate the while loop, respin chain
9267c478bd9Sstevel@tonic-gate 				 * walk for this hash chain.
9277c478bd9Sstevel@tonic-gate 				 */
9287c478bd9Sstevel@tonic-gate 				if (lnext == *lh) {
9297c478bd9Sstevel@tonic-gate 					i--;
9307c478bd9Sstevel@tonic-gate 					break;
9317c478bd9Sstevel@tonic-gate 				}
9327c478bd9Sstevel@tonic-gate 			} else {
9337c478bd9Sstevel@tonic-gate 				l->l_flags &= ~(LB_ISCANCELLED);
9347c478bd9Sstevel@tonic-gate 			}
9357c478bd9Sstevel@tonic-gate 			l = lnext;
9367c478bd9Sstevel@tonic-gate 		} while (l != *lh);
9377c478bd9Sstevel@tonic-gate 	}
9387c478bd9Sstevel@tonic-gate }
9397c478bd9Sstevel@tonic-gate 
9407c478bd9Sstevel@tonic-gate /*
9417c478bd9Sstevel@tonic-gate  * This function builds the log hash. It performs the same sequence
9427c478bd9Sstevel@tonic-gate  * of actions at logscan as the kernel ufs logging support:
9437c478bd9Sstevel@tonic-gate  * - Prepare the log for scanning by simulating a full log.
9447c478bd9Sstevel@tonic-gate  * - As long as sectors read from the log have contiguous idents, do:
9457c478bd9Sstevel@tonic-gate  *	read the delta header
9467c478bd9Sstevel@tonic-gate  *	add the delta to the logmap
9477c478bd9Sstevel@tonic-gate  *	skip over the contents to the start of the next delta header
9487c478bd9Sstevel@tonic-gate  * - After terminating the scan, remove uncommitted entries.
9497c478bd9Sstevel@tonic-gate  *
9507c478bd9Sstevel@tonic-gate  * This function cannot fail except if mapping the logbuffer area
9517c478bd9Sstevel@tonic-gate  * during lufs_logscan_prescan() fails. If there is a structural
9527c478bd9Sstevel@tonic-gate  * integrity problem and the on-disk log cannot be read, we'll
9537c478bd9Sstevel@tonic-gate  * treat this as the same situation as an uncommitted transaction
9547c478bd9Sstevel@tonic-gate  * at the end of the log (or, corner case of that, an empty log
9557c478bd9Sstevel@tonic-gate  * with no committed transactions in it at all).
9567c478bd9Sstevel@tonic-gate  *
9577c478bd9Sstevel@tonic-gate  */
9587c478bd9Sstevel@tonic-gate static int
lufs_logscan(void)9597c478bd9Sstevel@tonic-gate lufs_logscan(void)
9607c478bd9Sstevel@tonic-gate {
9617c478bd9Sstevel@tonic-gate 	int32_t		addr;
9627c478bd9Sstevel@tonic-gate 	struct delta	d;
9637c478bd9Sstevel@tonic-gate 
9647c478bd9Sstevel@tonic-gate 	if (!lufs_logscan_prescan())
9657c478bd9Sstevel@tonic-gate 		return (LOG_IS_ERRORED);
9667c478bd9Sstevel@tonic-gate 
9677c478bd9Sstevel@tonic-gate 	addr = odi.od_head_lof;
9687c478bd9Sstevel@tonic-gate 
9697c478bd9Sstevel@tonic-gate 	/*
9707c478bd9Sstevel@tonic-gate 	 * Note that addr == od_tail_lof means a completely filled
9717c478bd9Sstevel@tonic-gate 	 * log. This almost never happens, so the common exit path
9727c478bd9Sstevel@tonic-gate 	 * from this loop is via one of the 'break's.
9737c478bd9Sstevel@tonic-gate 	 */
9747c478bd9Sstevel@tonic-gate 	while (addr != odi.od_tail_lof) {
9757c478bd9Sstevel@tonic-gate 		if (!lufs_logscan_read(&addr, &d))
9767c478bd9Sstevel@tonic-gate 			break;
9777c478bd9Sstevel@tonic-gate 		if (!lufs_logscan_addmap(&addr, &d))
9787c478bd9Sstevel@tonic-gate 			return (LOG_IS_ERRORED);
9797c478bd9Sstevel@tonic-gate 		if (!lufs_logscan_skip(&addr, &d))
9807c478bd9Sstevel@tonic-gate 			break;
9817c478bd9Sstevel@tonic-gate 	}
9827c478bd9Sstevel@tonic-gate 
9837c478bd9Sstevel@tonic-gate 	lufs_logscan_postscan();
9847c478bd9Sstevel@tonic-gate 	/*
9857c478bd9Sstevel@tonic-gate 	 * Check whether the log contains data, and if so whether
9867c478bd9Sstevel@tonic-gate 	 * it contains committed data.
9877c478bd9Sstevel@tonic-gate 	 */
9887c478bd9Sstevel@tonic-gate 	if (addr == odi.od_head_lof || curtid == 0) {
9897c478bd9Sstevel@tonic-gate 		return (LOG_IS_EMPTY);
9907c478bd9Sstevel@tonic-gate 	}
9917c478bd9Sstevel@tonic-gate 	return (LOG_IS_OK);
9927c478bd9Sstevel@tonic-gate }
9937c478bd9Sstevel@tonic-gate 
9947c478bd9Sstevel@tonic-gate /*
9957c478bd9Sstevel@tonic-gate  * A metadata block was read from disk. Check whether the logmap
9967c478bd9Sstevel@tonic-gate  * has a delta against this byte range, and if so read it in, since
9977c478bd9Sstevel@tonic-gate  * the data in the log is more recent than what was read from other
9987c478bd9Sstevel@tonic-gate  * places on the disk.
9997c478bd9Sstevel@tonic-gate  */
10007c478bd9Sstevel@tonic-gate void
lufs_merge_deltas(fileid_t * fp)10017c478bd9Sstevel@tonic-gate lufs_merge_deltas(fileid_t *fp)
10027c478bd9Sstevel@tonic-gate {
10037c478bd9Sstevel@tonic-gate 	int		nb;
10047c478bd9Sstevel@tonic-gate 	int64_t		bof;
10057c478bd9Sstevel@tonic-gate 	lb_me_t		**lh, *l;
10067c478bd9Sstevel@tonic-gate 	int32_t		skip;
10077c478bd9Sstevel@tonic-gate 
10087c478bd9Sstevel@tonic-gate 	/*
10097c478bd9Sstevel@tonic-gate 	 * No logmap: Empty log. Nothing to do here.
10107c478bd9Sstevel@tonic-gate 	 */
10117c478bd9Sstevel@tonic-gate 	if (!ufs_is_lufs || logbuffer == (caddr_t)NULL)
10127c478bd9Sstevel@tonic-gate 		return;
10137c478bd9Sstevel@tonic-gate 
10147c478bd9Sstevel@tonic-gate 	bof = ldbtob(fp->fi_blocknum);
10157c478bd9Sstevel@tonic-gate 	nb = fp->fi_count;
10167c478bd9Sstevel@tonic-gate 
10177c478bd9Sstevel@tonic-gate 	/*
10187c478bd9Sstevel@tonic-gate 	 * Search the log hash.
10197c478bd9Sstevel@tonic-gate 	 * Merge deltas if an overlap is found.
10207c478bd9Sstevel@tonic-gate 	 */
10217c478bd9Sstevel@tonic-gate 
10227c478bd9Sstevel@tonic-gate 	lh = &loghash[LB_HASHFUNC(bof)];
10237c478bd9Sstevel@tonic-gate 
10247c478bd9Sstevel@tonic-gate 	if (*lh == (lb_me_t *)NULL)
10257c478bd9Sstevel@tonic-gate 		return;
10267c478bd9Sstevel@tonic-gate 
10277c478bd9Sstevel@tonic-gate 	l = *lh;
10287c478bd9Sstevel@tonic-gate 
10297c478bd9Sstevel@tonic-gate 	do {
10307c478bd9Sstevel@tonic-gate 		l = l->l_prev;
10317c478bd9Sstevel@tonic-gate 		if (OVERLAP(l->l_mof, l->l_nb, bof, nb)) {
10327c478bd9Sstevel@tonic-gate 			/*
10337c478bd9Sstevel@tonic-gate 			 * Found a delta in the log hash which overlaps
10347c478bd9Sstevel@tonic-gate 			 * with the current metadata block. Read the
10357c478bd9Sstevel@tonic-gate 			 * actual delta payload from the on-disk log
10367c478bd9Sstevel@tonic-gate 			 * directly into the file buffer.
10377c478bd9Sstevel@tonic-gate 			 */
10387c478bd9Sstevel@tonic-gate 			if (l->l_typ != DT_ABZERO) {
10397c478bd9Sstevel@tonic-gate 				/*
10407c478bd9Sstevel@tonic-gate 				 * We have to actually read this part of the
10417c478bd9Sstevel@tonic-gate 				 * log as it could contain a sector trailer, or
10427c478bd9Sstevel@tonic-gate 				 * wrap around the end of the log.
10437c478bd9Sstevel@tonic-gate 				 * If it did, the second offset generation would
10447c478bd9Sstevel@tonic-gate 				 * be incorrect if we'd started at l->l_lof.
10457c478bd9Sstevel@tonic-gate 				 */
10467c478bd9Sstevel@tonic-gate 				if (!(skip = lufs_read_log(l->l_lof, NULL,
10477c478bd9Sstevel@tonic-gate 				    MAX(bof - l->l_mof, 0))))
10487c478bd9Sstevel@tonic-gate 					dprintf("scan/merge error, pre-skip\n");
10497c478bd9Sstevel@tonic-gate 				if (!(skip = lufs_read_log(skip,
10507c478bd9Sstevel@tonic-gate 				    fp->fi_memp + MAX(l->l_mof - bof, 0),
10517c478bd9Sstevel@tonic-gate 				    MIN(l->l_mof + l->l_nb, bof + nb) -
10527c478bd9Sstevel@tonic-gate 				    MAX(l->l_mof, bof))))
10537c478bd9Sstevel@tonic-gate 					dprintf("scan/merge error, merge\n");
10547c478bd9Sstevel@tonic-gate 			} else {
10557c478bd9Sstevel@tonic-gate 				/*
10567c478bd9Sstevel@tonic-gate 				 * DT_ABZERO requires no disk access, just
10577c478bd9Sstevel@tonic-gate 				 * clear the byte range which overlaps with
10587c478bd9Sstevel@tonic-gate 				 * the delta.
10597c478bd9Sstevel@tonic-gate 				 */
10607c478bd9Sstevel@tonic-gate 				bzero(fp->fi_memp + MAX(l->l_mof - bof, 0),
10617c478bd9Sstevel@tonic-gate 				    MIN(l->l_mof + l->l_nb, bof + nb) -
10627c478bd9Sstevel@tonic-gate 				    MAX(l->l_mof, bof));
10637c478bd9Sstevel@tonic-gate 			}
10647c478bd9Sstevel@tonic-gate 		}
10657c478bd9Sstevel@tonic-gate 	} while (l->l_prev != (*lh)->l_prev);
10667c478bd9Sstevel@tonic-gate 
10677c478bd9Sstevel@tonic-gate 	printf("*\b");
10687c478bd9Sstevel@tonic-gate }
10697c478bd9Sstevel@tonic-gate 
10707c478bd9Sstevel@tonic-gate void
lufs_closeall(void)10717c478bd9Sstevel@tonic-gate lufs_closeall(void)
10727c478bd9Sstevel@tonic-gate {
10737c478bd9Sstevel@tonic-gate 	if (ufs_is_lufs) {
10747c478bd9Sstevel@tonic-gate 		bkmem_free((char *)eb, logfp->fi_devp->un_fs.di_fs.fs_bsize);
10757c478bd9Sstevel@tonic-gate 		bkmem_free((char *)logfp, sizeof (fileid_t));
10767c478bd9Sstevel@tonic-gate 		eb = (extent_block_t *)NULL;
10777c478bd9Sstevel@tonic-gate 		bzero((caddr_t)&odi, sizeof (ml_odunit_t));
10787c478bd9Sstevel@tonic-gate 		logfp = (fileid_t *)NULL;
10797c478bd9Sstevel@tonic-gate 		lufs_free_logbuf();
10807c478bd9Sstevel@tonic-gate 		ufs_is_lufs = 0;
10817c478bd9Sstevel@tonic-gate 	}
10827c478bd9Sstevel@tonic-gate }
1083