xref: /titanic_53/usr/src/boot/sys/boot/common/bcache.c (revision a5a5c3b743b38bcec1dcdc221fc56e3c2272e79f)
14a5d661aSToomas Soome /*-
24a5d661aSToomas Soome  * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
34a5d661aSToomas Soome  * Copyright 2015 Toomas Soome <tsoome@me.com>
44a5d661aSToomas Soome  * All rights reserved.
54a5d661aSToomas Soome  *
64a5d661aSToomas Soome  * Redistribution and use in source and binary forms, with or without
74a5d661aSToomas Soome  * modification, are permitted provided that the following conditions
84a5d661aSToomas Soome  * are met:
94a5d661aSToomas Soome  * 1. Redistributions of source code must retain the above copyright
104a5d661aSToomas Soome  *    notice, this list of conditions and the following disclaimer.
114a5d661aSToomas Soome  * 2. Redistributions in binary form must reproduce the above copyright
124a5d661aSToomas Soome  *    notice, this list of conditions and the following disclaimer in the
134a5d661aSToomas Soome  *    documentation and/or other materials provided with the distribution.
144a5d661aSToomas Soome  *
154a5d661aSToomas Soome  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
164a5d661aSToomas Soome  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
174a5d661aSToomas Soome  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
184a5d661aSToomas Soome  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
194a5d661aSToomas Soome  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
204a5d661aSToomas Soome  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
214a5d661aSToomas Soome  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
224a5d661aSToomas Soome  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
234a5d661aSToomas Soome  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
244a5d661aSToomas Soome  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
254a5d661aSToomas Soome  * SUCH DAMAGE.
264a5d661aSToomas Soome  */
274a5d661aSToomas Soome 
284a5d661aSToomas Soome #include <sys/cdefs.h>
294a5d661aSToomas Soome #include <sys/param.h>
304a5d661aSToomas Soome 
314a5d661aSToomas Soome /*
324a5d661aSToomas Soome  * Simple hashed block cache
334a5d661aSToomas Soome  */
344a5d661aSToomas Soome 
354a5d661aSToomas Soome #include <sys/stdint.h>
364a5d661aSToomas Soome 
374a5d661aSToomas Soome #include <stand.h>
384a5d661aSToomas Soome #include <string.h>
394a5d661aSToomas Soome #include <strings.h>
404a5d661aSToomas Soome 
414a5d661aSToomas Soome #include "bootstrap.h"
424a5d661aSToomas Soome 
434a5d661aSToomas Soome /* #define BCACHE_DEBUG */
444a5d661aSToomas Soome 
454a5d661aSToomas Soome #ifdef BCACHE_DEBUG
464a5d661aSToomas Soome # define DEBUG(fmt, args...)	printf("%s: " fmt "\n" , __func__ , ## args)
474a5d661aSToomas Soome #else
484a5d661aSToomas Soome # define DEBUG(fmt, args...)
494a5d661aSToomas Soome #endif
504a5d661aSToomas Soome 
514a5d661aSToomas Soome struct bcachectl
524a5d661aSToomas Soome {
534a5d661aSToomas Soome     daddr_t	bc_blkno;
544a5d661aSToomas Soome     int		bc_count;
554a5d661aSToomas Soome };
564a5d661aSToomas Soome 
574a5d661aSToomas Soome /*
584a5d661aSToomas Soome  * bcache per device node. cache is allocated on device first open and freed
594a5d661aSToomas Soome  * on last close, to save memory. The issue there is the size; biosdisk
604a5d661aSToomas Soome  * supports up to 31 (0x1f) devices. Classic setup would use single disk
614a5d661aSToomas Soome  * to boot from, but this has changed with zfs.
624a5d661aSToomas Soome  */
634a5d661aSToomas Soome struct bcache {
644a5d661aSToomas Soome     struct bcachectl	*bcache_ctl;
654a5d661aSToomas Soome     caddr_t		bcache_data;
66f2e61c99SToomas Soome     size_t		bcache_nblks;
674a5d661aSToomas Soome     size_t		ra;
684a5d661aSToomas Soome };
694a5d661aSToomas Soome 
704a5d661aSToomas Soome static u_int bcache_total_nblks;	/* set by bcache_init */
714a5d661aSToomas Soome static u_int bcache_blksize;		/* set by bcache_init */
724a5d661aSToomas Soome static u_int bcache_numdev;		/* set by bcache_add_dev */
734a5d661aSToomas Soome /* statistics */
744a5d661aSToomas Soome static u_int bcache_units;	/* number of devices with cache */
754a5d661aSToomas Soome static u_int bcache_unit_nblks;	/* nblocks per unit */
764a5d661aSToomas Soome static u_int bcache_hits;
774a5d661aSToomas Soome static u_int bcache_misses;
784a5d661aSToomas Soome static u_int bcache_ops;
794a5d661aSToomas Soome static u_int bcache_bypasses;
804a5d661aSToomas Soome static u_int bcache_bcount;
814a5d661aSToomas Soome static u_int bcache_rablks;
824a5d661aSToomas Soome 
834a5d661aSToomas Soome #define	BHASH(bc, blkno)	((blkno) & ((bc)->bcache_nblks - 1))
844a5d661aSToomas Soome #define	BCACHE_LOOKUP(bc, blkno)	\
854a5d661aSToomas Soome 	((bc)->bcache_ctl[BHASH((bc), (blkno))].bc_blkno != (blkno))
864a5d661aSToomas Soome #define	BCACHE_READAHEAD	256
874a5d661aSToomas Soome #define	BCACHE_MINREADAHEAD	32
88f2e61c99SToomas Soome #define	BCACHE_MARKER		0xdeadbeef
894a5d661aSToomas Soome 
904a5d661aSToomas Soome static void	bcache_invalidate(struct bcache *bc, daddr_t blkno);
914a5d661aSToomas Soome static void	bcache_insert(struct bcache *bc, daddr_t blkno);
924a5d661aSToomas Soome static void	bcache_free_instance(struct bcache *bc);
934a5d661aSToomas Soome 
944a5d661aSToomas Soome /*
954a5d661aSToomas Soome  * Initialise the cache for (nblks) of (bsize).
964a5d661aSToomas Soome  */
974a5d661aSToomas Soome void
bcache_init(size_t nblks,size_t bsize)98f2e61c99SToomas Soome bcache_init(size_t nblks, size_t bsize)
994a5d661aSToomas Soome {
1004a5d661aSToomas Soome     /* set up control data */
1014a5d661aSToomas Soome     bcache_total_nblks = nblks;
1024a5d661aSToomas Soome     bcache_blksize = bsize;
1034a5d661aSToomas Soome }
1044a5d661aSToomas Soome 
1054a5d661aSToomas Soome /*
1064a5d661aSToomas Soome  * add number of devices to bcache. we have to divide cache space
1074a5d661aSToomas Soome  * between the devices, so bcache_add_dev() can be used to set up the
1084a5d661aSToomas Soome  * number. The issue is, we need to get the number before actual allocations.
1094a5d661aSToomas Soome  * bcache_add_dev() is supposed to be called from device init() call, so the
1104a5d661aSToomas Soome  * assumption is, devsw dv_init is called for plain devices first, and
1114a5d661aSToomas Soome  * for zfs, last.
1124a5d661aSToomas Soome  */
1134a5d661aSToomas Soome void
bcache_add_dev(int devices)1144a5d661aSToomas Soome bcache_add_dev(int devices)
1154a5d661aSToomas Soome {
1164a5d661aSToomas Soome     bcache_numdev += devices;
1174a5d661aSToomas Soome }
1184a5d661aSToomas Soome 
1194a5d661aSToomas Soome void *
bcache_allocate(void)1204a5d661aSToomas Soome bcache_allocate(void)
1214a5d661aSToomas Soome {
1224a5d661aSToomas Soome     u_int i;
1234a5d661aSToomas Soome     struct bcache *bc = malloc(sizeof (struct bcache));
1244a5d661aSToomas Soome     int disks = bcache_numdev;
125f2e61c99SToomas Soome     uint32_t *marker;
1264a5d661aSToomas Soome 
1274a5d661aSToomas Soome     if (disks == 0)
1284a5d661aSToomas Soome 	disks = 1;	/* safe guard */
1294a5d661aSToomas Soome 
1304a5d661aSToomas Soome     if (bc == NULL) {
1314a5d661aSToomas Soome 	errno = ENOMEM;
1324a5d661aSToomas Soome 	return (bc);
1334a5d661aSToomas Soome     }
1344a5d661aSToomas Soome 
1354a5d661aSToomas Soome     /*
1364a5d661aSToomas Soome      * the bcache block count must be power of 2 for hash function
1374a5d661aSToomas Soome      */
1384a5d661aSToomas Soome     i = fls(disks) - 1;		/* highbit - 1 */
1394a5d661aSToomas Soome     if (disks > (1 << i))	/* next power of 2 */
1404a5d661aSToomas Soome 	i++;
1414a5d661aSToomas Soome 
1424a5d661aSToomas Soome     bc->bcache_nblks = bcache_total_nblks >> i;
1434a5d661aSToomas Soome     bcache_unit_nblks = bc->bcache_nblks;
144f2e61c99SToomas Soome     bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize +
145f2e61c99SToomas Soome 	sizeof (uint32_t));
1464a5d661aSToomas Soome     if (bc->bcache_data == NULL) {
1474a5d661aSToomas Soome 	/* dont error out yet. fall back to 32 blocks and try again */
1484a5d661aSToomas Soome 	bc->bcache_nblks = 32;
149f2e61c99SToomas Soome 	bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize +
150f2e61c99SToomas Soome 	    sizeof (uint32_t));
1514a5d661aSToomas Soome     }
1524a5d661aSToomas Soome 
1534a5d661aSToomas Soome     bc->bcache_ctl = malloc(bc->bcache_nblks * sizeof(struct bcachectl));
1544a5d661aSToomas Soome 
1554a5d661aSToomas Soome     if ((bc->bcache_data == NULL) || (bc->bcache_ctl == NULL)) {
1564a5d661aSToomas Soome 	bcache_free_instance(bc);
1574a5d661aSToomas Soome 	errno = ENOMEM;
1584a5d661aSToomas Soome 	return (NULL);
1594a5d661aSToomas Soome     }
160f2e61c99SToomas Soome     /* Insert cache end marker. */
161f2e61c99SToomas Soome     marker = (uint32_t *)(bc->bcache_data + bc->bcache_nblks * bcache_blksize);
162f2e61c99SToomas Soome     *marker = BCACHE_MARKER;
1634a5d661aSToomas Soome 
1644a5d661aSToomas Soome     /* Flush the cache */
1654a5d661aSToomas Soome     for (i = 0; i < bc->bcache_nblks; i++) {
1664a5d661aSToomas Soome 	bc->bcache_ctl[i].bc_count = -1;
1674a5d661aSToomas Soome 	bc->bcache_ctl[i].bc_blkno = -1;
1684a5d661aSToomas Soome     }
1694a5d661aSToomas Soome     bcache_units++;
1704a5d661aSToomas Soome     bc->ra = BCACHE_READAHEAD;	/* optimistic read ahead */
1714a5d661aSToomas Soome     return (bc);
1724a5d661aSToomas Soome }
1734a5d661aSToomas Soome 
1744a5d661aSToomas Soome void
bcache_free(void * cache)1754a5d661aSToomas Soome bcache_free(void *cache)
1764a5d661aSToomas Soome {
1774a5d661aSToomas Soome     struct bcache *bc = cache;
1784a5d661aSToomas Soome 
1794a5d661aSToomas Soome     if (bc == NULL)
1804a5d661aSToomas Soome 	return;
1814a5d661aSToomas Soome 
1824a5d661aSToomas Soome     bcache_free_instance(bc);
1834a5d661aSToomas Soome     bcache_units--;
1844a5d661aSToomas Soome }
1854a5d661aSToomas Soome 
1864a5d661aSToomas Soome /*
1874a5d661aSToomas Soome  * Handle a write request; write directly to the disk, and populate the
1884a5d661aSToomas Soome  * cache with the new values.
1894a5d661aSToomas Soome  */
1904a5d661aSToomas Soome static int
write_strategy(void * devdata,int rw,daddr_t blk,size_t size,char * buf,size_t * rsize)191976852c7SToomas Soome write_strategy(void *devdata, int rw, daddr_t blk, size_t size,
192976852c7SToomas Soome     char *buf, size_t *rsize)
1934a5d661aSToomas Soome {
1944a5d661aSToomas Soome     struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
1954a5d661aSToomas Soome     struct bcache		*bc = dd->dv_cache;
1964a5d661aSToomas Soome     daddr_t			i, nblk;
1974a5d661aSToomas Soome 
1984a5d661aSToomas Soome     nblk = size / bcache_blksize;
1994a5d661aSToomas Soome 
2004a5d661aSToomas Soome     /* Invalidate the blocks being written */
2014a5d661aSToomas Soome     for (i = 0; i < nblk; i++) {
2024a5d661aSToomas Soome 	bcache_invalidate(bc, blk + i);
2034a5d661aSToomas Soome     }
2044a5d661aSToomas Soome 
2054a5d661aSToomas Soome     /* Write the blocks */
206976852c7SToomas Soome     return (dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize));
2074a5d661aSToomas Soome }
2084a5d661aSToomas Soome 
2094a5d661aSToomas Soome /*
2104a5d661aSToomas Soome  * Handle a read request; fill in parts of the request that can
2114a5d661aSToomas Soome  * be satisfied by the cache, use the supplied strategy routine to do
2124a5d661aSToomas Soome  * device I/O and then use the I/O results to populate the cache.
2134a5d661aSToomas Soome  */
2144a5d661aSToomas Soome static int
read_strategy(void * devdata,int rw,daddr_t blk,size_t size,char * buf,size_t * rsize)215976852c7SToomas Soome read_strategy(void *devdata, int rw, daddr_t blk, size_t size,
216976852c7SToomas Soome     char *buf, size_t *rsize)
2174a5d661aSToomas Soome {
2184a5d661aSToomas Soome     struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
2194a5d661aSToomas Soome     struct bcache		*bc = dd->dv_cache;
2204a5d661aSToomas Soome     size_t			i, nblk, p_size, r_size, complete, ra;
2214a5d661aSToomas Soome     int				result;
2224a5d661aSToomas Soome     daddr_t			p_blk;
2234a5d661aSToomas Soome     caddr_t			p_buf;
224f2e61c99SToomas Soome     uint32_t			*marker;
2254a5d661aSToomas Soome 
2264a5d661aSToomas Soome     if (bc == NULL) {
2274a5d661aSToomas Soome 	errno = ENODEV;
2284a5d661aSToomas Soome 	return (-1);
2294a5d661aSToomas Soome     }
230f2e61c99SToomas Soome     marker = (uint32_t *)(bc->bcache_data + bc->bcache_nblks * bcache_blksize);
2314a5d661aSToomas Soome 
2324a5d661aSToomas Soome     if (rsize != NULL)
2334a5d661aSToomas Soome 	*rsize = 0;
2344a5d661aSToomas Soome 
2354a5d661aSToomas Soome     nblk = size / bcache_blksize;
236976852c7SToomas Soome     if (nblk == 0 && size != 0)
2374a5d661aSToomas Soome 	nblk++;
2384a5d661aSToomas Soome     result = 0;
2394a5d661aSToomas Soome     complete = 1;
2404a5d661aSToomas Soome 
2414a5d661aSToomas Soome     /* Satisfy any cache hits up front, break on first miss */
2424a5d661aSToomas Soome     for (i = 0; i < nblk; i++) {
2434a5d661aSToomas Soome 	if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i))) {
2444a5d661aSToomas Soome 	    bcache_misses += (nblk - i);
2454a5d661aSToomas Soome 	    complete = 0;
2464a5d661aSToomas Soome 	    if (nblk - i > BCACHE_MINREADAHEAD && bc->ra > BCACHE_MINREADAHEAD)
2474a5d661aSToomas Soome 		bc->ra >>= 1;	/* reduce read ahead */
2484a5d661aSToomas Soome 	    break;
2494a5d661aSToomas Soome 	} else {
2504a5d661aSToomas Soome 	    bcache_hits++;
2514a5d661aSToomas Soome 	}
2524a5d661aSToomas Soome     }
2534a5d661aSToomas Soome 
2544a5d661aSToomas Soome    if (complete) {	/* whole set was in cache, return it */
2554a5d661aSToomas Soome 	if (bc->ra < BCACHE_READAHEAD)
2564a5d661aSToomas Soome 		bc->ra <<= 1;	/* increase read ahead */
257976852c7SToomas Soome 	bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)), buf, size);
2584a5d661aSToomas Soome 	goto done;
2594a5d661aSToomas Soome    }
2604a5d661aSToomas Soome 
2614a5d661aSToomas Soome     /*
2624a5d661aSToomas Soome      * Fill in any misses. From check we have i pointing to first missing
2634a5d661aSToomas Soome      * block, read in all remaining blocks + readahead.
2644a5d661aSToomas Soome      * We have space at least for nblk - i before bcache wraps.
2654a5d661aSToomas Soome      */
2664a5d661aSToomas Soome     p_blk = blk + i;
2674a5d661aSToomas Soome     p_buf = bc->bcache_data + (bcache_blksize * BHASH(bc, p_blk));
2684a5d661aSToomas Soome     r_size = bc->bcache_nblks - BHASH(bc, p_blk); /* remaining blocks */
2694a5d661aSToomas Soome 
2704a5d661aSToomas Soome     p_size = MIN(r_size, nblk - i);	/* read at least those blocks */
2714a5d661aSToomas Soome 
272f2e61c99SToomas Soome     /*
273f2e61c99SToomas Soome      * The read ahead size setup.
274f2e61c99SToomas Soome      * While the read ahead can save us IO, it also can complicate things:
275f2e61c99SToomas Soome      * 1. We do not want to read ahead by wrapping around the
276f2e61c99SToomas Soome      *	bcache end - this would complicate the cache management.
277f2e61c99SToomas Soome      * 2. We are using bc->ra as dynamic hint for read ahead size,
278f2e61c99SToomas Soome      *	detected cache hits will increase the read-ahead block count, and
279f2e61c99SToomas Soome      *	misses will decrease, see the code above.
280f2e61c99SToomas Soome      * 3. The bcache is sized by 512B blocks, however, the underlying device
281f2e61c99SToomas Soome      *	may have a larger sector size, and we should perform the IO by
282f2e61c99SToomas Soome      *	taking into account these larger sector sizes. We could solve this by
283f2e61c99SToomas Soome      *	passing the sector size to bcache_allocate(), or by using ioctl(), but
284f2e61c99SToomas Soome      *	in this version we are using the constant, 16 blocks, and are rounding
285f2e61c99SToomas Soome      *	read ahead block count down to multiple of 16.
286f2e61c99SToomas Soome      *	Using the constant has two reasons, we are not entirely sure if the
287f2e61c99SToomas Soome      *	BIOS disk interface is providing the correct value for sector size.
288f2e61c99SToomas Soome      *	And secondly, this way we get the most conservative setup for the ra.
289f2e61c99SToomas Soome      *
290f2e61c99SToomas Soome      * The selection of multiple of 16 blocks (8KB) is quite arbitrary, however,
291f2e61c99SToomas Soome      * we want to cover CDs (2K) and 4K disks.
292f2e61c99SToomas Soome      * bcache_allocate() will always fall back to a minimum of 32 blocks.
293f2e61c99SToomas Soome      * Our choice of 16 read ahead blocks will always fit inside the bcache.
294f2e61c99SToomas Soome      */
295f2e61c99SToomas Soome 
296*a5a5c3b7SToomas Soome     if ((rw & F_NORA) == F_NORA)
297*a5a5c3b7SToomas Soome 	ra = 0;
298*a5a5c3b7SToomas Soome     else
2994a5d661aSToomas Soome 	ra = bc->bcache_nblks - BHASH(bc, p_blk + p_size);
300*a5a5c3b7SToomas Soome 
301f2e61c99SToomas Soome     if (ra != 0 && ra != bc->bcache_nblks) { /* do we have RA space? */
302f2e61c99SToomas Soome 	ra = MIN(bc->ra, ra - 1);
303f2e61c99SToomas Soome 	ra = rounddown(ra, 16);		/* multiple of 16 blocks */
3044a5d661aSToomas Soome 	p_size += ra;
3054a5d661aSToomas Soome     }
3064a5d661aSToomas Soome 
3074a5d661aSToomas Soome     /* invalidate bcache */
3084a5d661aSToomas Soome     for (i = 0; i < p_size; i++) {
3094a5d661aSToomas Soome 	bcache_invalidate(bc, p_blk + i);
3104a5d661aSToomas Soome     }
3114a5d661aSToomas Soome 
3124a5d661aSToomas Soome     r_size = 0;
3134a5d661aSToomas Soome     /*
3144a5d661aSToomas Soome      * with read-ahead, it may happen we are attempting to read past
3154a5d661aSToomas Soome      * disk end, as bcache has no information about disk size.
3164a5d661aSToomas Soome      * in such case we should get partial read if some blocks can be
3174a5d661aSToomas Soome      * read or error, if no blocks can be read.
3184a5d661aSToomas Soome      * in either case we should return the data in bcache and only
3194a5d661aSToomas Soome      * return error if there is no data.
3204a5d661aSToomas Soome      */
321*a5a5c3b7SToomas Soome     rw &= F_MASK;
322976852c7SToomas Soome     result = dd->dv_strategy(dd->dv_devdata, rw, p_blk,
3234a5d661aSToomas Soome 	p_size * bcache_blksize, p_buf, &r_size);
3244a5d661aSToomas Soome 
3254a5d661aSToomas Soome     r_size /= bcache_blksize;
3264a5d661aSToomas Soome     for (i = 0; i < r_size; i++)
3274a5d661aSToomas Soome 	bcache_insert(bc, p_blk + i);
3284a5d661aSToomas Soome 
3294a5d661aSToomas Soome     /* update ra statistics */
3304a5d661aSToomas Soome     if (r_size != 0) {
3314a5d661aSToomas Soome 	if (r_size < p_size)
3324a5d661aSToomas Soome 	    bcache_rablks += (p_size - r_size);
3334a5d661aSToomas Soome 	else
3344a5d661aSToomas Soome 	    bcache_rablks += ra;
3354a5d661aSToomas Soome     }
3364a5d661aSToomas Soome 
3374a5d661aSToomas Soome     /* check how much data can we copy */
3384a5d661aSToomas Soome     for (i = 0; i < nblk; i++) {
3394a5d661aSToomas Soome 	if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i)))
3404a5d661aSToomas Soome 	    break;
3414a5d661aSToomas Soome     }
3424a5d661aSToomas Soome 
3434a5d661aSToomas Soome     if (size > i * bcache_blksize)
3444a5d661aSToomas Soome 	size = i * bcache_blksize;
3454a5d661aSToomas Soome 
3464a5d661aSToomas Soome     if (size != 0) {
347976852c7SToomas Soome 	bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)), buf, size);
3484a5d661aSToomas Soome 	result = 0;
3494a5d661aSToomas Soome     }
3504a5d661aSToomas Soome 
351f2e61c99SToomas Soome     if (*marker != BCACHE_MARKER) {
352f2e61c99SToomas Soome 	printf("BUG: bcache corruption detected: nblks: %zu p_blk: %lu, "
353f2e61c99SToomas Soome 	    "p_size: %zu, ra: %zu\n", bc->bcache_nblks,
354f2e61c99SToomas Soome 	    (long unsigned)BHASH(bc, p_blk), p_size, ra);
355f2e61c99SToomas Soome     }
356f2e61c99SToomas Soome 
3574a5d661aSToomas Soome  done:
3584a5d661aSToomas Soome     if ((result == 0) && (rsize != NULL))
3594a5d661aSToomas Soome 	*rsize = size;
3604a5d661aSToomas Soome     return(result);
3614a5d661aSToomas Soome }
3624a5d661aSToomas Soome 
3634a5d661aSToomas Soome /*
3644a5d661aSToomas Soome  * Requests larger than 1/2 cache size will be bypassed and go
3654a5d661aSToomas Soome  * directly to the disk.  XXX tune this.
3664a5d661aSToomas Soome  */
3674a5d661aSToomas Soome int
bcache_strategy(void * devdata,int rw,daddr_t blk,size_t size,char * buf,size_t * rsize)368976852c7SToomas Soome bcache_strategy(void *devdata, int rw, daddr_t blk, size_t size,
369976852c7SToomas Soome     char *buf, size_t *rsize)
3704a5d661aSToomas Soome {
3714a5d661aSToomas Soome     struct bcache_devdata	*dd = (struct bcache_devdata *)devdata;
3724a5d661aSToomas Soome     struct bcache		*bc = dd->dv_cache;
3734a5d661aSToomas Soome     u_int bcache_nblks = 0;
3744a5d661aSToomas Soome     int nblk, cblk, ret;
3754a5d661aSToomas Soome     size_t csize, isize, total;
3764a5d661aSToomas Soome 
3774a5d661aSToomas Soome     bcache_ops++;
3784a5d661aSToomas Soome 
3794a5d661aSToomas Soome     if (bc != NULL)
3804a5d661aSToomas Soome 	bcache_nblks = bc->bcache_nblks;
3814a5d661aSToomas Soome 
3824a5d661aSToomas Soome     /* bypass large requests, or when the cache is inactive */
3834a5d661aSToomas Soome     if (bc == NULL ||
384976852c7SToomas Soome 	((size * 2 / bcache_blksize) > bcache_nblks)) {
385f2e61c99SToomas Soome 	DEBUG("bypass %zu from %qu", size / bcache_blksize, blk);
3864a5d661aSToomas Soome 	bcache_bypasses++;
387*a5a5c3b7SToomas Soome 	rw &= F_MASK;
388976852c7SToomas Soome 	return (dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize));
3894a5d661aSToomas Soome     }
3904a5d661aSToomas Soome 
391*a5a5c3b7SToomas Soome     switch (rw & F_MASK) {
3924a5d661aSToomas Soome     case F_READ:
3934a5d661aSToomas Soome 	nblk = size / bcache_blksize;
394976852c7SToomas Soome 	if (size != 0 && nblk == 0)
3954a5d661aSToomas Soome 	    nblk++;	/* read at least one block */
3964a5d661aSToomas Soome 
3974a5d661aSToomas Soome 	ret = 0;
3984a5d661aSToomas Soome 	total = 0;
3994a5d661aSToomas Soome 	while(size) {
4004a5d661aSToomas Soome 	    cblk = bcache_nblks - BHASH(bc, blk); /* # of blocks left */
4014a5d661aSToomas Soome 	    cblk = MIN(cblk, nblk);
4024a5d661aSToomas Soome 
4034a5d661aSToomas Soome 	    if (size <= bcache_blksize)
4044a5d661aSToomas Soome 		csize = size;
405976852c7SToomas Soome 	    else
4064a5d661aSToomas Soome 		csize = cblk * bcache_blksize;
4074a5d661aSToomas Soome 
408976852c7SToomas Soome 	    ret = read_strategy(devdata, rw, blk, csize, buf+total, &isize);
4094a5d661aSToomas Soome 
4104a5d661aSToomas Soome 	    /*
4114a5d661aSToomas Soome 	     * we may have error from read ahead, if we have read some data
4124a5d661aSToomas Soome 	     * return partial read.
4134a5d661aSToomas Soome 	     */
4144a5d661aSToomas Soome 	    if (ret != 0 || isize == 0) {
4154a5d661aSToomas Soome 		if (total != 0)
4164a5d661aSToomas Soome 		    ret = 0;
4174a5d661aSToomas Soome 		break;
4184a5d661aSToomas Soome 	    }
419976852c7SToomas Soome 	    blk += isize / bcache_blksize;
4204a5d661aSToomas Soome 	    total += isize;
4214a5d661aSToomas Soome 	    size -= isize;
4224a5d661aSToomas Soome 	    nblk = size / bcache_blksize;
4234a5d661aSToomas Soome 	}
4244a5d661aSToomas Soome 
4254a5d661aSToomas Soome 	if (rsize)
4264a5d661aSToomas Soome 	    *rsize = total;
4274a5d661aSToomas Soome 
4284a5d661aSToomas Soome 	return (ret);
4294a5d661aSToomas Soome     case F_WRITE:
430*a5a5c3b7SToomas Soome 	return write_strategy(devdata, F_WRITE, blk, size, buf, rsize);
4314a5d661aSToomas Soome     }
4324a5d661aSToomas Soome     return -1;
4334a5d661aSToomas Soome }
4344a5d661aSToomas Soome 
4354a5d661aSToomas Soome /*
4364a5d661aSToomas Soome  * Free allocated bcache instance
4374a5d661aSToomas Soome  */
4384a5d661aSToomas Soome static void
bcache_free_instance(struct bcache * bc)4394a5d661aSToomas Soome bcache_free_instance(struct bcache *bc)
4404a5d661aSToomas Soome {
4414a5d661aSToomas Soome     if (bc != NULL) {
4424a5d661aSToomas Soome 	if (bc->bcache_ctl)
4434a5d661aSToomas Soome 	    free(bc->bcache_ctl);
4444a5d661aSToomas Soome 	if (bc->bcache_data)
4454a5d661aSToomas Soome 	    free(bc->bcache_data);
4464a5d661aSToomas Soome 	free(bc);
4474a5d661aSToomas Soome     }
4484a5d661aSToomas Soome }
4494a5d661aSToomas Soome 
4504a5d661aSToomas Soome /*
4514a5d661aSToomas Soome  * Insert a block into the cache.
4524a5d661aSToomas Soome  */
4534a5d661aSToomas Soome static void
bcache_insert(struct bcache * bc,daddr_t blkno)4544a5d661aSToomas Soome bcache_insert(struct bcache *bc, daddr_t blkno)
4554a5d661aSToomas Soome {
4564a5d661aSToomas Soome     u_int	cand;
4574a5d661aSToomas Soome 
4584a5d661aSToomas Soome     cand = BHASH(bc, blkno);
4594a5d661aSToomas Soome 
4604a5d661aSToomas Soome     DEBUG("insert blk %llu -> %u # %d", blkno, cand, bcache_bcount);
4614a5d661aSToomas Soome     bc->bcache_ctl[cand].bc_blkno = blkno;
4624a5d661aSToomas Soome     bc->bcache_ctl[cand].bc_count = bcache_bcount++;
4634a5d661aSToomas Soome }
4644a5d661aSToomas Soome 
4654a5d661aSToomas Soome /*
4664a5d661aSToomas Soome  * Invalidate a block from the cache.
4674a5d661aSToomas Soome  */
4684a5d661aSToomas Soome static void
bcache_invalidate(struct bcache * bc,daddr_t blkno)4694a5d661aSToomas Soome bcache_invalidate(struct bcache *bc, daddr_t blkno)
4704a5d661aSToomas Soome {
4714a5d661aSToomas Soome     u_int	i;
4724a5d661aSToomas Soome 
4734a5d661aSToomas Soome     i = BHASH(bc, blkno);
4744a5d661aSToomas Soome     if (bc->bcache_ctl[i].bc_blkno == blkno) {
4754a5d661aSToomas Soome 	bc->bcache_ctl[i].bc_count = -1;
4764a5d661aSToomas Soome 	bc->bcache_ctl[i].bc_blkno = -1;
4774a5d661aSToomas Soome 	DEBUG("invalidate blk %llu", blkno);
4784a5d661aSToomas Soome     }
4794a5d661aSToomas Soome }
4804a5d661aSToomas Soome 
4814a5d661aSToomas Soome #ifndef BOOT2
4824a5d661aSToomas Soome COMMAND_SET(bcachestat, "bcachestat", "get disk block cache stats", command_bcache);
4834a5d661aSToomas Soome 
4844a5d661aSToomas Soome static int
command_bcache(int argc,char * argv[]__attribute ((unused)))4854a5d661aSToomas Soome command_bcache(int argc, char *argv[] __attribute((unused)))
4864a5d661aSToomas Soome {
4874a5d661aSToomas Soome     if (argc != 1) {
4884a5d661aSToomas Soome 	command_errmsg = "wrong number of arguments";
4894a5d661aSToomas Soome 	return(CMD_ERROR);
4904a5d661aSToomas Soome     }
4914a5d661aSToomas Soome 
4924a5d661aSToomas Soome     printf("\ncache blocks: %d\n", bcache_total_nblks);
4934a5d661aSToomas Soome     printf("cache blocksz: %d\n", bcache_blksize);
4944a5d661aSToomas Soome     printf("cache readahead: %d\n", bcache_rablks);
4954a5d661aSToomas Soome     printf("unit cache blocks: %d\n", bcache_unit_nblks);
4964a5d661aSToomas Soome     printf("cached units: %d\n", bcache_units);
4974a5d661aSToomas Soome     printf("%d ops  %d bypasses  %d hits  %d misses\n", bcache_ops,
4984a5d661aSToomas Soome 	bcache_bypasses, bcache_hits, bcache_misses);
4994a5d661aSToomas Soome     return(CMD_OK);
5004a5d661aSToomas Soome }
5014a5d661aSToomas Soome #endif
502