14a5d661aSToomas Soome /*-
24a5d661aSToomas Soome * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
34a5d661aSToomas Soome * Copyright 2015 Toomas Soome <tsoome@me.com>
44a5d661aSToomas Soome * All rights reserved.
54a5d661aSToomas Soome *
64a5d661aSToomas Soome * Redistribution and use in source and binary forms, with or without
74a5d661aSToomas Soome * modification, are permitted provided that the following conditions
84a5d661aSToomas Soome * are met:
94a5d661aSToomas Soome * 1. Redistributions of source code must retain the above copyright
104a5d661aSToomas Soome * notice, this list of conditions and the following disclaimer.
114a5d661aSToomas Soome * 2. Redistributions in binary form must reproduce the above copyright
124a5d661aSToomas Soome * notice, this list of conditions and the following disclaimer in the
134a5d661aSToomas Soome * documentation and/or other materials provided with the distribution.
144a5d661aSToomas Soome *
154a5d661aSToomas Soome * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
164a5d661aSToomas Soome * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
174a5d661aSToomas Soome * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
184a5d661aSToomas Soome * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
194a5d661aSToomas Soome * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
204a5d661aSToomas Soome * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
214a5d661aSToomas Soome * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
224a5d661aSToomas Soome * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
234a5d661aSToomas Soome * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
244a5d661aSToomas Soome * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
254a5d661aSToomas Soome * SUCH DAMAGE.
264a5d661aSToomas Soome */
274a5d661aSToomas Soome
284a5d661aSToomas Soome #include <sys/cdefs.h>
294a5d661aSToomas Soome #include <sys/param.h>
304a5d661aSToomas Soome
314a5d661aSToomas Soome /*
324a5d661aSToomas Soome * Simple hashed block cache
334a5d661aSToomas Soome */
344a5d661aSToomas Soome
354a5d661aSToomas Soome #include <sys/stdint.h>
364a5d661aSToomas Soome
374a5d661aSToomas Soome #include <stand.h>
384a5d661aSToomas Soome #include <string.h>
394a5d661aSToomas Soome #include <strings.h>
404a5d661aSToomas Soome
414a5d661aSToomas Soome #include "bootstrap.h"
424a5d661aSToomas Soome
434a5d661aSToomas Soome /* #define BCACHE_DEBUG */
444a5d661aSToomas Soome
454a5d661aSToomas Soome #ifdef BCACHE_DEBUG
464a5d661aSToomas Soome # define DEBUG(fmt, args...) printf("%s: " fmt "\n" , __func__ , ## args)
474a5d661aSToomas Soome #else
484a5d661aSToomas Soome # define DEBUG(fmt, args...)
494a5d661aSToomas Soome #endif
504a5d661aSToomas Soome
514a5d661aSToomas Soome struct bcachectl
524a5d661aSToomas Soome {
534a5d661aSToomas Soome daddr_t bc_blkno;
544a5d661aSToomas Soome int bc_count;
554a5d661aSToomas Soome };
564a5d661aSToomas Soome
574a5d661aSToomas Soome /*
584a5d661aSToomas Soome * bcache per device node. cache is allocated on device first open and freed
594a5d661aSToomas Soome * on last close, to save memory. The issue there is the size; biosdisk
604a5d661aSToomas Soome * supports up to 31 (0x1f) devices. Classic setup would use single disk
614a5d661aSToomas Soome * to boot from, but this has changed with zfs.
624a5d661aSToomas Soome */
634a5d661aSToomas Soome struct bcache {
644a5d661aSToomas Soome struct bcachectl *bcache_ctl;
654a5d661aSToomas Soome caddr_t bcache_data;
66f2e61c99SToomas Soome size_t bcache_nblks;
674a5d661aSToomas Soome size_t ra;
684a5d661aSToomas Soome };
694a5d661aSToomas Soome
704a5d661aSToomas Soome static u_int bcache_total_nblks; /* set by bcache_init */
714a5d661aSToomas Soome static u_int bcache_blksize; /* set by bcache_init */
724a5d661aSToomas Soome static u_int bcache_numdev; /* set by bcache_add_dev */
734a5d661aSToomas Soome /* statistics */
744a5d661aSToomas Soome static u_int bcache_units; /* number of devices with cache */
754a5d661aSToomas Soome static u_int bcache_unit_nblks; /* nblocks per unit */
764a5d661aSToomas Soome static u_int bcache_hits;
774a5d661aSToomas Soome static u_int bcache_misses;
784a5d661aSToomas Soome static u_int bcache_ops;
794a5d661aSToomas Soome static u_int bcache_bypasses;
804a5d661aSToomas Soome static u_int bcache_bcount;
814a5d661aSToomas Soome static u_int bcache_rablks;
824a5d661aSToomas Soome
834a5d661aSToomas Soome #define BHASH(bc, blkno) ((blkno) & ((bc)->bcache_nblks - 1))
844a5d661aSToomas Soome #define BCACHE_LOOKUP(bc, blkno) \
854a5d661aSToomas Soome ((bc)->bcache_ctl[BHASH((bc), (blkno))].bc_blkno != (blkno))
864a5d661aSToomas Soome #define BCACHE_READAHEAD 256
874a5d661aSToomas Soome #define BCACHE_MINREADAHEAD 32
88f2e61c99SToomas Soome #define BCACHE_MARKER 0xdeadbeef
894a5d661aSToomas Soome
904a5d661aSToomas Soome static void bcache_invalidate(struct bcache *bc, daddr_t blkno);
914a5d661aSToomas Soome static void bcache_insert(struct bcache *bc, daddr_t blkno);
924a5d661aSToomas Soome static void bcache_free_instance(struct bcache *bc);
934a5d661aSToomas Soome
944a5d661aSToomas Soome /*
954a5d661aSToomas Soome * Initialise the cache for (nblks) of (bsize).
964a5d661aSToomas Soome */
974a5d661aSToomas Soome void
bcache_init(size_t nblks,size_t bsize)98f2e61c99SToomas Soome bcache_init(size_t nblks, size_t bsize)
994a5d661aSToomas Soome {
1004a5d661aSToomas Soome /* set up control data */
1014a5d661aSToomas Soome bcache_total_nblks = nblks;
1024a5d661aSToomas Soome bcache_blksize = bsize;
1034a5d661aSToomas Soome }
1044a5d661aSToomas Soome
1054a5d661aSToomas Soome /*
1064a5d661aSToomas Soome * add number of devices to bcache. we have to divide cache space
1074a5d661aSToomas Soome * between the devices, so bcache_add_dev() can be used to set up the
1084a5d661aSToomas Soome * number. The issue is, we need to get the number before actual allocations.
1094a5d661aSToomas Soome * bcache_add_dev() is supposed to be called from device init() call, so the
1104a5d661aSToomas Soome * assumption is, devsw dv_init is called for plain devices first, and
1114a5d661aSToomas Soome * for zfs, last.
1124a5d661aSToomas Soome */
1134a5d661aSToomas Soome void
bcache_add_dev(int devices)1144a5d661aSToomas Soome bcache_add_dev(int devices)
1154a5d661aSToomas Soome {
1164a5d661aSToomas Soome bcache_numdev += devices;
1174a5d661aSToomas Soome }
1184a5d661aSToomas Soome
1194a5d661aSToomas Soome void *
bcache_allocate(void)1204a5d661aSToomas Soome bcache_allocate(void)
1214a5d661aSToomas Soome {
1224a5d661aSToomas Soome u_int i;
1234a5d661aSToomas Soome struct bcache *bc = malloc(sizeof (struct bcache));
1244a5d661aSToomas Soome int disks = bcache_numdev;
125f2e61c99SToomas Soome uint32_t *marker;
1264a5d661aSToomas Soome
1274a5d661aSToomas Soome if (disks == 0)
1284a5d661aSToomas Soome disks = 1; /* safe guard */
1294a5d661aSToomas Soome
1304a5d661aSToomas Soome if (bc == NULL) {
1314a5d661aSToomas Soome errno = ENOMEM;
1324a5d661aSToomas Soome return (bc);
1334a5d661aSToomas Soome }
1344a5d661aSToomas Soome
1354a5d661aSToomas Soome /*
1364a5d661aSToomas Soome * the bcache block count must be power of 2 for hash function
1374a5d661aSToomas Soome */
1384a5d661aSToomas Soome i = fls(disks) - 1; /* highbit - 1 */
1394a5d661aSToomas Soome if (disks > (1 << i)) /* next power of 2 */
1404a5d661aSToomas Soome i++;
1414a5d661aSToomas Soome
1424a5d661aSToomas Soome bc->bcache_nblks = bcache_total_nblks >> i;
1434a5d661aSToomas Soome bcache_unit_nblks = bc->bcache_nblks;
144f2e61c99SToomas Soome bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize +
145f2e61c99SToomas Soome sizeof (uint32_t));
1464a5d661aSToomas Soome if (bc->bcache_data == NULL) {
1474a5d661aSToomas Soome /* dont error out yet. fall back to 32 blocks and try again */
1484a5d661aSToomas Soome bc->bcache_nblks = 32;
149f2e61c99SToomas Soome bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize +
150f2e61c99SToomas Soome sizeof (uint32_t));
1514a5d661aSToomas Soome }
1524a5d661aSToomas Soome
1534a5d661aSToomas Soome bc->bcache_ctl = malloc(bc->bcache_nblks * sizeof(struct bcachectl));
1544a5d661aSToomas Soome
1554a5d661aSToomas Soome if ((bc->bcache_data == NULL) || (bc->bcache_ctl == NULL)) {
1564a5d661aSToomas Soome bcache_free_instance(bc);
1574a5d661aSToomas Soome errno = ENOMEM;
1584a5d661aSToomas Soome return (NULL);
1594a5d661aSToomas Soome }
160f2e61c99SToomas Soome /* Insert cache end marker. */
161f2e61c99SToomas Soome marker = (uint32_t *)(bc->bcache_data + bc->bcache_nblks * bcache_blksize);
162f2e61c99SToomas Soome *marker = BCACHE_MARKER;
1634a5d661aSToomas Soome
1644a5d661aSToomas Soome /* Flush the cache */
1654a5d661aSToomas Soome for (i = 0; i < bc->bcache_nblks; i++) {
1664a5d661aSToomas Soome bc->bcache_ctl[i].bc_count = -1;
1674a5d661aSToomas Soome bc->bcache_ctl[i].bc_blkno = -1;
1684a5d661aSToomas Soome }
1694a5d661aSToomas Soome bcache_units++;
1704a5d661aSToomas Soome bc->ra = BCACHE_READAHEAD; /* optimistic read ahead */
1714a5d661aSToomas Soome return (bc);
1724a5d661aSToomas Soome }
1734a5d661aSToomas Soome
1744a5d661aSToomas Soome void
bcache_free(void * cache)1754a5d661aSToomas Soome bcache_free(void *cache)
1764a5d661aSToomas Soome {
1774a5d661aSToomas Soome struct bcache *bc = cache;
1784a5d661aSToomas Soome
1794a5d661aSToomas Soome if (bc == NULL)
1804a5d661aSToomas Soome return;
1814a5d661aSToomas Soome
1824a5d661aSToomas Soome bcache_free_instance(bc);
1834a5d661aSToomas Soome bcache_units--;
1844a5d661aSToomas Soome }
1854a5d661aSToomas Soome
1864a5d661aSToomas Soome /*
1874a5d661aSToomas Soome * Handle a write request; write directly to the disk, and populate the
1884a5d661aSToomas Soome * cache with the new values.
1894a5d661aSToomas Soome */
1904a5d661aSToomas Soome static int
write_strategy(void * devdata,int rw,daddr_t blk,size_t size,char * buf,size_t * rsize)191976852c7SToomas Soome write_strategy(void *devdata, int rw, daddr_t blk, size_t size,
192976852c7SToomas Soome char *buf, size_t *rsize)
1934a5d661aSToomas Soome {
1944a5d661aSToomas Soome struct bcache_devdata *dd = (struct bcache_devdata *)devdata;
1954a5d661aSToomas Soome struct bcache *bc = dd->dv_cache;
1964a5d661aSToomas Soome daddr_t i, nblk;
1974a5d661aSToomas Soome
1984a5d661aSToomas Soome nblk = size / bcache_blksize;
1994a5d661aSToomas Soome
2004a5d661aSToomas Soome /* Invalidate the blocks being written */
2014a5d661aSToomas Soome for (i = 0; i < nblk; i++) {
2024a5d661aSToomas Soome bcache_invalidate(bc, blk + i);
2034a5d661aSToomas Soome }
2044a5d661aSToomas Soome
2054a5d661aSToomas Soome /* Write the blocks */
206976852c7SToomas Soome return (dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize));
2074a5d661aSToomas Soome }
2084a5d661aSToomas Soome
2094a5d661aSToomas Soome /*
2104a5d661aSToomas Soome * Handle a read request; fill in parts of the request that can
2114a5d661aSToomas Soome * be satisfied by the cache, use the supplied strategy routine to do
2124a5d661aSToomas Soome * device I/O and then use the I/O results to populate the cache.
2134a5d661aSToomas Soome */
2144a5d661aSToomas Soome static int
read_strategy(void * devdata,int rw,daddr_t blk,size_t size,char * buf,size_t * rsize)215976852c7SToomas Soome read_strategy(void *devdata, int rw, daddr_t blk, size_t size,
216976852c7SToomas Soome char *buf, size_t *rsize)
2174a5d661aSToomas Soome {
2184a5d661aSToomas Soome struct bcache_devdata *dd = (struct bcache_devdata *)devdata;
2194a5d661aSToomas Soome struct bcache *bc = dd->dv_cache;
2204a5d661aSToomas Soome size_t i, nblk, p_size, r_size, complete, ra;
2214a5d661aSToomas Soome int result;
2224a5d661aSToomas Soome daddr_t p_blk;
2234a5d661aSToomas Soome caddr_t p_buf;
224f2e61c99SToomas Soome uint32_t *marker;
2254a5d661aSToomas Soome
2264a5d661aSToomas Soome if (bc == NULL) {
2274a5d661aSToomas Soome errno = ENODEV;
2284a5d661aSToomas Soome return (-1);
2294a5d661aSToomas Soome }
230f2e61c99SToomas Soome marker = (uint32_t *)(bc->bcache_data + bc->bcache_nblks * bcache_blksize);
2314a5d661aSToomas Soome
2324a5d661aSToomas Soome if (rsize != NULL)
2334a5d661aSToomas Soome *rsize = 0;
2344a5d661aSToomas Soome
2354a5d661aSToomas Soome nblk = size / bcache_blksize;
236976852c7SToomas Soome if (nblk == 0 && size != 0)
2374a5d661aSToomas Soome nblk++;
2384a5d661aSToomas Soome result = 0;
2394a5d661aSToomas Soome complete = 1;
2404a5d661aSToomas Soome
2414a5d661aSToomas Soome /* Satisfy any cache hits up front, break on first miss */
2424a5d661aSToomas Soome for (i = 0; i < nblk; i++) {
2434a5d661aSToomas Soome if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i))) {
2444a5d661aSToomas Soome bcache_misses += (nblk - i);
2454a5d661aSToomas Soome complete = 0;
2464a5d661aSToomas Soome if (nblk - i > BCACHE_MINREADAHEAD && bc->ra > BCACHE_MINREADAHEAD)
2474a5d661aSToomas Soome bc->ra >>= 1; /* reduce read ahead */
2484a5d661aSToomas Soome break;
2494a5d661aSToomas Soome } else {
2504a5d661aSToomas Soome bcache_hits++;
2514a5d661aSToomas Soome }
2524a5d661aSToomas Soome }
2534a5d661aSToomas Soome
2544a5d661aSToomas Soome if (complete) { /* whole set was in cache, return it */
2554a5d661aSToomas Soome if (bc->ra < BCACHE_READAHEAD)
2564a5d661aSToomas Soome bc->ra <<= 1; /* increase read ahead */
257976852c7SToomas Soome bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)), buf, size);
2584a5d661aSToomas Soome goto done;
2594a5d661aSToomas Soome }
2604a5d661aSToomas Soome
2614a5d661aSToomas Soome /*
2624a5d661aSToomas Soome * Fill in any misses. From check we have i pointing to first missing
2634a5d661aSToomas Soome * block, read in all remaining blocks + readahead.
2644a5d661aSToomas Soome * We have space at least for nblk - i before bcache wraps.
2654a5d661aSToomas Soome */
2664a5d661aSToomas Soome p_blk = blk + i;
2674a5d661aSToomas Soome p_buf = bc->bcache_data + (bcache_blksize * BHASH(bc, p_blk));
2684a5d661aSToomas Soome r_size = bc->bcache_nblks - BHASH(bc, p_blk); /* remaining blocks */
2694a5d661aSToomas Soome
2704a5d661aSToomas Soome p_size = MIN(r_size, nblk - i); /* read at least those blocks */
2714a5d661aSToomas Soome
272f2e61c99SToomas Soome /*
273f2e61c99SToomas Soome * The read ahead size setup.
274f2e61c99SToomas Soome * While the read ahead can save us IO, it also can complicate things:
275f2e61c99SToomas Soome * 1. We do not want to read ahead by wrapping around the
276f2e61c99SToomas Soome * bcache end - this would complicate the cache management.
277f2e61c99SToomas Soome * 2. We are using bc->ra as dynamic hint for read ahead size,
278f2e61c99SToomas Soome * detected cache hits will increase the read-ahead block count, and
279f2e61c99SToomas Soome * misses will decrease, see the code above.
280f2e61c99SToomas Soome * 3. The bcache is sized by 512B blocks, however, the underlying device
281f2e61c99SToomas Soome * may have a larger sector size, and we should perform the IO by
282f2e61c99SToomas Soome * taking into account these larger sector sizes. We could solve this by
283f2e61c99SToomas Soome * passing the sector size to bcache_allocate(), or by using ioctl(), but
284f2e61c99SToomas Soome * in this version we are using the constant, 16 blocks, and are rounding
285f2e61c99SToomas Soome * read ahead block count down to multiple of 16.
286f2e61c99SToomas Soome * Using the constant has two reasons, we are not entirely sure if the
287f2e61c99SToomas Soome * BIOS disk interface is providing the correct value for sector size.
288f2e61c99SToomas Soome * And secondly, this way we get the most conservative setup for the ra.
289f2e61c99SToomas Soome *
290f2e61c99SToomas Soome * The selection of multiple of 16 blocks (8KB) is quite arbitrary, however,
291f2e61c99SToomas Soome * we want to cover CDs (2K) and 4K disks.
292f2e61c99SToomas Soome * bcache_allocate() will always fall back to a minimum of 32 blocks.
293f2e61c99SToomas Soome * Our choice of 16 read ahead blocks will always fit inside the bcache.
294f2e61c99SToomas Soome */
295f2e61c99SToomas Soome
296*a5a5c3b7SToomas Soome if ((rw & F_NORA) == F_NORA)
297*a5a5c3b7SToomas Soome ra = 0;
298*a5a5c3b7SToomas Soome else
2994a5d661aSToomas Soome ra = bc->bcache_nblks - BHASH(bc, p_blk + p_size);
300*a5a5c3b7SToomas Soome
301f2e61c99SToomas Soome if (ra != 0 && ra != bc->bcache_nblks) { /* do we have RA space? */
302f2e61c99SToomas Soome ra = MIN(bc->ra, ra - 1);
303f2e61c99SToomas Soome ra = rounddown(ra, 16); /* multiple of 16 blocks */
3044a5d661aSToomas Soome p_size += ra;
3054a5d661aSToomas Soome }
3064a5d661aSToomas Soome
3074a5d661aSToomas Soome /* invalidate bcache */
3084a5d661aSToomas Soome for (i = 0; i < p_size; i++) {
3094a5d661aSToomas Soome bcache_invalidate(bc, p_blk + i);
3104a5d661aSToomas Soome }
3114a5d661aSToomas Soome
3124a5d661aSToomas Soome r_size = 0;
3134a5d661aSToomas Soome /*
3144a5d661aSToomas Soome * with read-ahead, it may happen we are attempting to read past
3154a5d661aSToomas Soome * disk end, as bcache has no information about disk size.
3164a5d661aSToomas Soome * in such case we should get partial read if some blocks can be
3174a5d661aSToomas Soome * read or error, if no blocks can be read.
3184a5d661aSToomas Soome * in either case we should return the data in bcache and only
3194a5d661aSToomas Soome * return error if there is no data.
3204a5d661aSToomas Soome */
321*a5a5c3b7SToomas Soome rw &= F_MASK;
322976852c7SToomas Soome result = dd->dv_strategy(dd->dv_devdata, rw, p_blk,
3234a5d661aSToomas Soome p_size * bcache_blksize, p_buf, &r_size);
3244a5d661aSToomas Soome
3254a5d661aSToomas Soome r_size /= bcache_blksize;
3264a5d661aSToomas Soome for (i = 0; i < r_size; i++)
3274a5d661aSToomas Soome bcache_insert(bc, p_blk + i);
3284a5d661aSToomas Soome
3294a5d661aSToomas Soome /* update ra statistics */
3304a5d661aSToomas Soome if (r_size != 0) {
3314a5d661aSToomas Soome if (r_size < p_size)
3324a5d661aSToomas Soome bcache_rablks += (p_size - r_size);
3334a5d661aSToomas Soome else
3344a5d661aSToomas Soome bcache_rablks += ra;
3354a5d661aSToomas Soome }
3364a5d661aSToomas Soome
3374a5d661aSToomas Soome /* check how much data can we copy */
3384a5d661aSToomas Soome for (i = 0; i < nblk; i++) {
3394a5d661aSToomas Soome if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i)))
3404a5d661aSToomas Soome break;
3414a5d661aSToomas Soome }
3424a5d661aSToomas Soome
3434a5d661aSToomas Soome if (size > i * bcache_blksize)
3444a5d661aSToomas Soome size = i * bcache_blksize;
3454a5d661aSToomas Soome
3464a5d661aSToomas Soome if (size != 0) {
347976852c7SToomas Soome bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)), buf, size);
3484a5d661aSToomas Soome result = 0;
3494a5d661aSToomas Soome }
3504a5d661aSToomas Soome
351f2e61c99SToomas Soome if (*marker != BCACHE_MARKER) {
352f2e61c99SToomas Soome printf("BUG: bcache corruption detected: nblks: %zu p_blk: %lu, "
353f2e61c99SToomas Soome "p_size: %zu, ra: %zu\n", bc->bcache_nblks,
354f2e61c99SToomas Soome (long unsigned)BHASH(bc, p_blk), p_size, ra);
355f2e61c99SToomas Soome }
356f2e61c99SToomas Soome
3574a5d661aSToomas Soome done:
3584a5d661aSToomas Soome if ((result == 0) && (rsize != NULL))
3594a5d661aSToomas Soome *rsize = size;
3604a5d661aSToomas Soome return(result);
3614a5d661aSToomas Soome }
3624a5d661aSToomas Soome
3634a5d661aSToomas Soome /*
3644a5d661aSToomas Soome * Requests larger than 1/2 cache size will be bypassed and go
3654a5d661aSToomas Soome * directly to the disk. XXX tune this.
3664a5d661aSToomas Soome */
3674a5d661aSToomas Soome int
bcache_strategy(void * devdata,int rw,daddr_t blk,size_t size,char * buf,size_t * rsize)368976852c7SToomas Soome bcache_strategy(void *devdata, int rw, daddr_t blk, size_t size,
369976852c7SToomas Soome char *buf, size_t *rsize)
3704a5d661aSToomas Soome {
3714a5d661aSToomas Soome struct bcache_devdata *dd = (struct bcache_devdata *)devdata;
3724a5d661aSToomas Soome struct bcache *bc = dd->dv_cache;
3734a5d661aSToomas Soome u_int bcache_nblks = 0;
3744a5d661aSToomas Soome int nblk, cblk, ret;
3754a5d661aSToomas Soome size_t csize, isize, total;
3764a5d661aSToomas Soome
3774a5d661aSToomas Soome bcache_ops++;
3784a5d661aSToomas Soome
3794a5d661aSToomas Soome if (bc != NULL)
3804a5d661aSToomas Soome bcache_nblks = bc->bcache_nblks;
3814a5d661aSToomas Soome
3824a5d661aSToomas Soome /* bypass large requests, or when the cache is inactive */
3834a5d661aSToomas Soome if (bc == NULL ||
384976852c7SToomas Soome ((size * 2 / bcache_blksize) > bcache_nblks)) {
385f2e61c99SToomas Soome DEBUG("bypass %zu from %qu", size / bcache_blksize, blk);
3864a5d661aSToomas Soome bcache_bypasses++;
387*a5a5c3b7SToomas Soome rw &= F_MASK;
388976852c7SToomas Soome return (dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize));
3894a5d661aSToomas Soome }
3904a5d661aSToomas Soome
391*a5a5c3b7SToomas Soome switch (rw & F_MASK) {
3924a5d661aSToomas Soome case F_READ:
3934a5d661aSToomas Soome nblk = size / bcache_blksize;
394976852c7SToomas Soome if (size != 0 && nblk == 0)
3954a5d661aSToomas Soome nblk++; /* read at least one block */
3964a5d661aSToomas Soome
3974a5d661aSToomas Soome ret = 0;
3984a5d661aSToomas Soome total = 0;
3994a5d661aSToomas Soome while(size) {
4004a5d661aSToomas Soome cblk = bcache_nblks - BHASH(bc, blk); /* # of blocks left */
4014a5d661aSToomas Soome cblk = MIN(cblk, nblk);
4024a5d661aSToomas Soome
4034a5d661aSToomas Soome if (size <= bcache_blksize)
4044a5d661aSToomas Soome csize = size;
405976852c7SToomas Soome else
4064a5d661aSToomas Soome csize = cblk * bcache_blksize;
4074a5d661aSToomas Soome
408976852c7SToomas Soome ret = read_strategy(devdata, rw, blk, csize, buf+total, &isize);
4094a5d661aSToomas Soome
4104a5d661aSToomas Soome /*
4114a5d661aSToomas Soome * we may have error from read ahead, if we have read some data
4124a5d661aSToomas Soome * return partial read.
4134a5d661aSToomas Soome */
4144a5d661aSToomas Soome if (ret != 0 || isize == 0) {
4154a5d661aSToomas Soome if (total != 0)
4164a5d661aSToomas Soome ret = 0;
4174a5d661aSToomas Soome break;
4184a5d661aSToomas Soome }
419976852c7SToomas Soome blk += isize / bcache_blksize;
4204a5d661aSToomas Soome total += isize;
4214a5d661aSToomas Soome size -= isize;
4224a5d661aSToomas Soome nblk = size / bcache_blksize;
4234a5d661aSToomas Soome }
4244a5d661aSToomas Soome
4254a5d661aSToomas Soome if (rsize)
4264a5d661aSToomas Soome *rsize = total;
4274a5d661aSToomas Soome
4284a5d661aSToomas Soome return (ret);
4294a5d661aSToomas Soome case F_WRITE:
430*a5a5c3b7SToomas Soome return write_strategy(devdata, F_WRITE, blk, size, buf, rsize);
4314a5d661aSToomas Soome }
4324a5d661aSToomas Soome return -1;
4334a5d661aSToomas Soome }
4344a5d661aSToomas Soome
4354a5d661aSToomas Soome /*
4364a5d661aSToomas Soome * Free allocated bcache instance
4374a5d661aSToomas Soome */
4384a5d661aSToomas Soome static void
bcache_free_instance(struct bcache * bc)4394a5d661aSToomas Soome bcache_free_instance(struct bcache *bc)
4404a5d661aSToomas Soome {
4414a5d661aSToomas Soome if (bc != NULL) {
4424a5d661aSToomas Soome if (bc->bcache_ctl)
4434a5d661aSToomas Soome free(bc->bcache_ctl);
4444a5d661aSToomas Soome if (bc->bcache_data)
4454a5d661aSToomas Soome free(bc->bcache_data);
4464a5d661aSToomas Soome free(bc);
4474a5d661aSToomas Soome }
4484a5d661aSToomas Soome }
4494a5d661aSToomas Soome
4504a5d661aSToomas Soome /*
4514a5d661aSToomas Soome * Insert a block into the cache.
4524a5d661aSToomas Soome */
4534a5d661aSToomas Soome static void
bcache_insert(struct bcache * bc,daddr_t blkno)4544a5d661aSToomas Soome bcache_insert(struct bcache *bc, daddr_t blkno)
4554a5d661aSToomas Soome {
4564a5d661aSToomas Soome u_int cand;
4574a5d661aSToomas Soome
4584a5d661aSToomas Soome cand = BHASH(bc, blkno);
4594a5d661aSToomas Soome
4604a5d661aSToomas Soome DEBUG("insert blk %llu -> %u # %d", blkno, cand, bcache_bcount);
4614a5d661aSToomas Soome bc->bcache_ctl[cand].bc_blkno = blkno;
4624a5d661aSToomas Soome bc->bcache_ctl[cand].bc_count = bcache_bcount++;
4634a5d661aSToomas Soome }
4644a5d661aSToomas Soome
4654a5d661aSToomas Soome /*
4664a5d661aSToomas Soome * Invalidate a block from the cache.
4674a5d661aSToomas Soome */
4684a5d661aSToomas Soome static void
bcache_invalidate(struct bcache * bc,daddr_t blkno)4694a5d661aSToomas Soome bcache_invalidate(struct bcache *bc, daddr_t blkno)
4704a5d661aSToomas Soome {
4714a5d661aSToomas Soome u_int i;
4724a5d661aSToomas Soome
4734a5d661aSToomas Soome i = BHASH(bc, blkno);
4744a5d661aSToomas Soome if (bc->bcache_ctl[i].bc_blkno == blkno) {
4754a5d661aSToomas Soome bc->bcache_ctl[i].bc_count = -1;
4764a5d661aSToomas Soome bc->bcache_ctl[i].bc_blkno = -1;
4774a5d661aSToomas Soome DEBUG("invalidate blk %llu", blkno);
4784a5d661aSToomas Soome }
4794a5d661aSToomas Soome }
4804a5d661aSToomas Soome
4814a5d661aSToomas Soome #ifndef BOOT2
4824a5d661aSToomas Soome COMMAND_SET(bcachestat, "bcachestat", "get disk block cache stats", command_bcache);
4834a5d661aSToomas Soome
4844a5d661aSToomas Soome static int
command_bcache(int argc,char * argv[]__attribute ((unused)))4854a5d661aSToomas Soome command_bcache(int argc, char *argv[] __attribute((unused)))
4864a5d661aSToomas Soome {
4874a5d661aSToomas Soome if (argc != 1) {
4884a5d661aSToomas Soome command_errmsg = "wrong number of arguments";
4894a5d661aSToomas Soome return(CMD_ERROR);
4904a5d661aSToomas Soome }
4914a5d661aSToomas Soome
4924a5d661aSToomas Soome printf("\ncache blocks: %d\n", bcache_total_nblks);
4934a5d661aSToomas Soome printf("cache blocksz: %d\n", bcache_blksize);
4944a5d661aSToomas Soome printf("cache readahead: %d\n", bcache_rablks);
4954a5d661aSToomas Soome printf("unit cache blocks: %d\n", bcache_unit_nblks);
4964a5d661aSToomas Soome printf("cached units: %d\n", bcache_units);
4974a5d661aSToomas Soome printf("%d ops %d bypasses %d hits %d misses\n", bcache_ops,
4984a5d661aSToomas Soome bcache_bypasses, bcache_hits, bcache_misses);
4994a5d661aSToomas Soome return(CMD_OK);
5004a5d661aSToomas Soome }
5014a5d661aSToomas Soome #endif
502