xref: /freebsd/lib/libc/db/mpool/mpool.c (revision dc36d6f9bb1753f3808552f3afd30eda9a7b206a)
158f0484fSRodney W. Grimes /*-
2*8a16b7a1SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
3*8a16b7a1SPedro F. Giffuni  *
4f1e396bcSPaul Traina  * Copyright (c) 1990, 1993, 1994
558f0484fSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
658f0484fSRodney W. Grimes  *
758f0484fSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
858f0484fSRodney W. Grimes  * modification, are permitted provided that the following conditions
958f0484fSRodney W. Grimes  * are met:
1058f0484fSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
1158f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
1258f0484fSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
1358f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
1458f0484fSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
15fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
1658f0484fSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
1758f0484fSRodney W. Grimes  *    without specific prior written permission.
1858f0484fSRodney W. Grimes  *
1958f0484fSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2058f0484fSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2158f0484fSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2258f0484fSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2358f0484fSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2458f0484fSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2558f0484fSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2658f0484fSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2758f0484fSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2858f0484fSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2958f0484fSRodney W. Grimes  * SUCH DAMAGE.
3058f0484fSRodney W. Grimes  */
3158f0484fSRodney W. Grimes 
32d201fe46SDaniel Eischen #include "namespace.h"
3358f0484fSRodney W. Grimes #include <sys/param.h>
34f1e396bcSPaul Traina #include <sys/queue.h>
3558f0484fSRodney W. Grimes #include <sys/stat.h>
3658f0484fSRodney W. Grimes 
3758f0484fSRodney W. Grimes #include <errno.h>
3858f0484fSRodney W. Grimes #include <stdio.h>
3958f0484fSRodney W. Grimes #include <stdlib.h>
4058f0484fSRodney W. Grimes #include <string.h>
4158f0484fSRodney W. Grimes #include <unistd.h>
42d201fe46SDaniel Eischen #include "un-namespace.h"
4358f0484fSRodney W. Grimes 
4458f0484fSRodney W. Grimes #include <db.h>
45f1e396bcSPaul Traina 
4658f0484fSRodney W. Grimes #define	__MPOOLINTERFACE_PRIVATE
47f1e396bcSPaul Traina #include <mpool.h>
4858f0484fSRodney W. Grimes 
49c05ac53bSDavid E. O'Brien static BKT *mpool_bkt(MPOOL *);
50c05ac53bSDavid E. O'Brien static BKT *mpool_look(MPOOL *, pgno_t);
51c05ac53bSDavid E. O'Brien static int  mpool_write(MPOOL *, BKT *);
5258f0484fSRodney W. Grimes 
5358f0484fSRodney W. Grimes /*
54f1e396bcSPaul Traina  * mpool_open --
55f1e396bcSPaul Traina  *	Initialize a memory pool.
5658f0484fSRodney W. Grimes  */
570ac22237SXin LI /* ARGSUSED */
5858f0484fSRodney W. Grimes MPOOL *
mpool_open(void * key,int fd,pgno_t pagesize,pgno_t maxcache)590ac22237SXin LI mpool_open(void *key, int fd, pgno_t pagesize, pgno_t maxcache)
6058f0484fSRodney W. Grimes {
6158f0484fSRodney W. Grimes 	struct stat sb;
6258f0484fSRodney W. Grimes 	MPOOL *mp;
6358f0484fSRodney W. Grimes 	int entry;
6458f0484fSRodney W. Grimes 
65f1e396bcSPaul Traina 	/*
66f1e396bcSPaul Traina 	 * Get information about the file.
67f1e396bcSPaul Traina 	 *
68f1e396bcSPaul Traina 	 * XXX
69f1e396bcSPaul Traina 	 * We don't currently handle pipes, although we should.
70f1e396bcSPaul Traina 	 */
71d201fe46SDaniel Eischen 	if (_fstat(fd, &sb))
7258f0484fSRodney W. Grimes 		return (NULL);
7358f0484fSRodney W. Grimes 	if (!S_ISREG(sb.st_mode)) {
7458f0484fSRodney W. Grimes 		errno = ESPIPE;
7558f0484fSRodney W. Grimes 		return (NULL);
7658f0484fSRodney W. Grimes 	}
7758f0484fSRodney W. Grimes 
78f1e396bcSPaul Traina 	/* Allocate and initialize the MPOOL cookie. */
79f1e396bcSPaul Traina 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
8058f0484fSRodney W. Grimes 		return (NULL);
81fabacd3aSPoul-Henning Kamp 	TAILQ_INIT(&mp->lqh);
8258f0484fSRodney W. Grimes 	for (entry = 0; entry < HASHSIZE; ++entry)
83fabacd3aSPoul-Henning Kamp 		TAILQ_INIT(&mp->hqh[entry]);
8458f0484fSRodney W. Grimes 	mp->maxcache = maxcache;
8558f0484fSRodney W. Grimes 	mp->npages = sb.st_size / pagesize;
86f1e396bcSPaul Traina 	mp->pagesize = pagesize;
8758f0484fSRodney W. Grimes 	mp->fd = fd;
8858f0484fSRodney W. Grimes 	return (mp);
8958f0484fSRodney W. Grimes }
9058f0484fSRodney W. Grimes 
9158f0484fSRodney W. Grimes /*
92f1e396bcSPaul Traina  * mpool_filter --
93f1e396bcSPaul Traina  *	Initialize input/output filters.
9458f0484fSRodney W. Grimes  */
9558f0484fSRodney W. Grimes void
mpool_filter(MPOOL * mp,void (* pgin)(void *,pgno_t,void *),void (* pgout)(void *,pgno_t,void *),void * pgcookie)960ac22237SXin LI mpool_filter(MPOOL *mp, void (*pgin) (void *, pgno_t, void *),
970ac22237SXin LI     void (*pgout) (void *, pgno_t, void *), void *pgcookie)
9858f0484fSRodney W. Grimes {
9958f0484fSRodney W. Grimes 	mp->pgin = pgin;
10058f0484fSRodney W. Grimes 	mp->pgout = pgout;
10158f0484fSRodney W. Grimes 	mp->pgcookie = pgcookie;
10258f0484fSRodney W. Grimes }
10358f0484fSRodney W. Grimes 
10458f0484fSRodney W. Grimes /*
105f1e396bcSPaul Traina  * mpool_new --
106f1e396bcSPaul Traina  *	Get a new page of memory.
10758f0484fSRodney W. Grimes  */
10858f0484fSRodney W. Grimes void *
mpool_new(MPOOL * mp,pgno_t * pgnoaddr,u_int flags)1099fc74a87SXin LI mpool_new(MPOOL *mp, pgno_t *pgnoaddr, u_int flags)
11058f0484fSRodney W. Grimes {
111f1e396bcSPaul Traina 	struct _hqh *head;
112f1e396bcSPaul Traina 	BKT *bp;
11358f0484fSRodney W. Grimes 
114f1e396bcSPaul Traina 	if (mp->npages == MAX_PAGE_NUMBER) {
115f1e396bcSPaul Traina 		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
116f1e396bcSPaul Traina 		abort();
117f1e396bcSPaul Traina 	}
11858f0484fSRodney W. Grimes #ifdef STATISTICS
11958f0484fSRodney W. Grimes 	++mp->pagenew;
12058f0484fSRodney W. Grimes #endif
12158f0484fSRodney W. Grimes 	/*
122f1e396bcSPaul Traina 	 * Get a BKT from the cache.  Assign a new page number, attach
123f1e396bcSPaul Traina 	 * it to the head of the hash chain, the tail of the lru chain,
124f1e396bcSPaul Traina 	 * and return.
12558f0484fSRodney W. Grimes 	 */
126f1e396bcSPaul Traina 	if ((bp = mpool_bkt(mp)) == NULL)
12758f0484fSRodney W. Grimes 		return (NULL);
1289fc74a87SXin LI 	if (flags == MPOOL_PAGE_REQUEST) {
1299fc74a87SXin LI 		mp->npages++;
1309fc74a87SXin LI 		bp->pgno = *pgnoaddr;
1319fc74a87SXin LI 	} else
1329fc74a87SXin LI 		bp->pgno = *pgnoaddr = mp->npages++;
1339fc74a87SXin LI 
1349fc74a87SXin LI 	bp->flags = MPOOL_PINNED | MPOOL_INUSE;
135f1e396bcSPaul Traina 
136f1e396bcSPaul Traina 	head = &mp->hqh[HASHKEY(bp->pgno)];
137fabacd3aSPoul-Henning Kamp 	TAILQ_INSERT_HEAD(head, bp, hq);
138fabacd3aSPoul-Henning Kamp 	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
139f1e396bcSPaul Traina 	return (bp->page);
14058f0484fSRodney W. Grimes }
14158f0484fSRodney W. Grimes 
1429fc74a87SXin LI int
mpool_delete(MPOOL * mp,void * page)1439fc74a87SXin LI mpool_delete(MPOOL *mp, void *page)
1449fc74a87SXin LI {
1459fc74a87SXin LI 	struct _hqh *head;
1469fc74a87SXin LI 	BKT *bp;
1479fc74a87SXin LI 
1489fc74a87SXin LI 	bp = (BKT *)((char *)page - sizeof(BKT));
1499fc74a87SXin LI 
1509fc74a87SXin LI #ifdef DEBUG
1519fc74a87SXin LI 	if (!(bp->flags & MPOOL_PINNED)) {
1529fc74a87SXin LI 		(void)fprintf(stderr,
1539fc74a87SXin LI 		    "mpool_delete: page %d not pinned\n", bp->pgno);
1549fc74a87SXin LI 		abort();
1559fc74a87SXin LI 	}
1569fc74a87SXin LI #endif
1579fc74a87SXin LI 
1589fc74a87SXin LI 	/* Remove from the hash and lru queues. */
1599fc74a87SXin LI 	head = &mp->hqh[HASHKEY(bp->pgno)];
1609fc74a87SXin LI 	TAILQ_REMOVE(head, bp, hq);
1619fc74a87SXin LI 	TAILQ_REMOVE(&mp->lqh, bp, q);
1629fc74a87SXin LI 
1639fc74a87SXin LI 	free(bp);
1649fc74a87SXin LI 	mp->curcache--;
1659fc74a87SXin LI 	return (RET_SUCCESS);
1669fc74a87SXin LI }
1679fc74a87SXin LI 
16858f0484fSRodney W. Grimes /*
169f1e396bcSPaul Traina  * mpool_get
170f1e396bcSPaul Traina  *	Get a page.
17158f0484fSRodney W. Grimes  */
1720ac22237SXin LI /* ARGSUSED */
17358f0484fSRodney W. Grimes void *
mpool_get(MPOOL * mp,pgno_t pgno,u_int flags)1740ac22237SXin LI mpool_get(MPOOL *mp, pgno_t pgno,
1750ac22237SXin LI     u_int flags)		/* XXX not used? */
17658f0484fSRodney W. Grimes {
177f1e396bcSPaul Traina 	struct _hqh *head;
178f1e396bcSPaul Traina 	BKT *bp;
17958f0484fSRodney W. Grimes 	off_t off;
18058f0484fSRodney W. Grimes 	int nr;
18158f0484fSRodney W. Grimes 
182f1e396bcSPaul Traina #ifdef STATISTICS
183f1e396bcSPaul Traina 	++mp->pageget;
184f1e396bcSPaul Traina #endif
18558f0484fSRodney W. Grimes 
186f1e396bcSPaul Traina 	/* Check for a page that is cached. */
187f1e396bcSPaul Traina 	if ((bp = mpool_look(mp, pgno)) != NULL) {
188f1e396bcSPaul Traina #ifdef DEBUG
1899fc74a87SXin LI 		if (!(flags & MPOOL_IGNOREPIN) && bp->flags & MPOOL_PINNED) {
190f1e396bcSPaul Traina 			(void)fprintf(stderr,
191f1e396bcSPaul Traina 			    "mpool_get: page %d already pinned\n", bp->pgno);
192f1e396bcSPaul Traina 			abort();
193f1e396bcSPaul Traina 		}
194f1e396bcSPaul Traina #endif
195f1e396bcSPaul Traina 		/*
196f1e396bcSPaul Traina 		 * Move the page to the head of the hash chain and the tail
197f1e396bcSPaul Traina 		 * of the lru chain.
198f1e396bcSPaul Traina 		 */
199f1e396bcSPaul Traina 		head = &mp->hqh[HASHKEY(bp->pgno)];
200fabacd3aSPoul-Henning Kamp 		TAILQ_REMOVE(head, bp, hq);
201fabacd3aSPoul-Henning Kamp 		TAILQ_INSERT_HEAD(head, bp, hq);
202fabacd3aSPoul-Henning Kamp 		TAILQ_REMOVE(&mp->lqh, bp, q);
203fabacd3aSPoul-Henning Kamp 		TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
204f1e396bcSPaul Traina 
205f1e396bcSPaul Traina 		/* Return a pinned page. */
206f1e396bcSPaul Traina 		bp->flags |= MPOOL_PINNED;
207f1e396bcSPaul Traina 		return (bp->page);
208f1e396bcSPaul Traina 	}
209f1e396bcSPaul Traina 
210f1e396bcSPaul Traina 	/* Get a page from the cache. */
211f1e396bcSPaul Traina 	if ((bp = mpool_bkt(mp)) == NULL)
212f1e396bcSPaul Traina 		return (NULL);
213f1e396bcSPaul Traina 
214f1e396bcSPaul Traina 	/* Read in the contents. */
2159fc74a87SXin LI 	off = mp->pagesize * pgno;
216e8ee08baSXin LI 	if ((nr = pread(mp->fd, bp->page, mp->pagesize, off)) != (ssize_t)mp->pagesize) {
2179fc74a87SXin LI 		switch (nr) {
2189fc74a87SXin LI 		case -1:
2199fc74a87SXin LI 			/* errno is set for us by pread(). */
2209fc74a87SXin LI 			free(bp);
2219fc74a87SXin LI 			mp->curcache--;
2229fc74a87SXin LI 			return (NULL);
2239fc74a87SXin LI 		case 0:
2249fc74a87SXin LI 			/*
2259fc74a87SXin LI 			 * A zero-length read means you need to create a
2269fc74a87SXin LI 			 * new page.
2279fc74a87SXin LI 			 */
2289fc74a87SXin LI 			memset(bp->page, 0, mp->pagesize);
2299fc74a87SXin LI 			break;
2309fc74a87SXin LI 		default:
2319fc74a87SXin LI 			/* A partial read is definitely bad. */
2329fc74a87SXin LI 			free(bp);
2339fc74a87SXin LI 			mp->curcache--;
2349fc74a87SXin LI 			errno = EINVAL;
2359fc74a87SXin LI 			return (NULL);
2369fc74a87SXin LI 		}
2379fc74a87SXin LI 	}
23858f0484fSRodney W. Grimes #ifdef STATISTICS
23958f0484fSRodney W. Grimes 	++mp->pageread;
24058f0484fSRodney W. Grimes #endif
24158f0484fSRodney W. Grimes 
242f1e396bcSPaul Traina 	/* Set the page number, pin the page. */
243f1e396bcSPaul Traina 	bp->pgno = pgno;
2449fc74a87SXin LI 	if (!(flags & MPOOL_IGNOREPIN))
245f1e396bcSPaul Traina 		bp->flags = MPOOL_PINNED;
2469fc74a87SXin LI 	bp->flags |= MPOOL_INUSE;
247f1e396bcSPaul Traina 
248f1e396bcSPaul Traina 	/*
249f1e396bcSPaul Traina 	 * Add the page to the head of the hash chain and the tail
250f1e396bcSPaul Traina 	 * of the lru chain.
251f1e396bcSPaul Traina 	 */
252f1e396bcSPaul Traina 	head = &mp->hqh[HASHKEY(bp->pgno)];
253fabacd3aSPoul-Henning Kamp 	TAILQ_INSERT_HEAD(head, bp, hq);
254fabacd3aSPoul-Henning Kamp 	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
255f1e396bcSPaul Traina 
256f1e396bcSPaul Traina 	/* Run through the user's filter. */
257f1e396bcSPaul Traina 	if (mp->pgin != NULL)
258f1e396bcSPaul Traina 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
259f1e396bcSPaul Traina 
260f1e396bcSPaul Traina 	return (bp->page);
26158f0484fSRodney W. Grimes }
26258f0484fSRodney W. Grimes 
26358f0484fSRodney W. Grimes /*
264f1e396bcSPaul Traina  * mpool_put
265f1e396bcSPaul Traina  *	Return a page.
26658f0484fSRodney W. Grimes  */
2670ac22237SXin LI /* ARGSUSED */
26858f0484fSRodney W. Grimes int
mpool_put(MPOOL * mp,void * page,u_int flags)2690ac22237SXin LI mpool_put(MPOOL *mp, void *page, u_int flags)
27058f0484fSRodney W. Grimes {
271f1e396bcSPaul Traina 	BKT *bp;
27258f0484fSRodney W. Grimes 
27358f0484fSRodney W. Grimes #ifdef STATISTICS
27458f0484fSRodney W. Grimes 	++mp->pageput;
27558f0484fSRodney W. Grimes #endif
276f1e396bcSPaul Traina 	bp = (BKT *)((char *)page - sizeof(BKT));
27758f0484fSRodney W. Grimes #ifdef DEBUG
278f1e396bcSPaul Traina 	if (!(bp->flags & MPOOL_PINNED)) {
279f1e396bcSPaul Traina 		(void)fprintf(stderr,
280f1e396bcSPaul Traina 		    "mpool_put: page %d not pinned\n", bp->pgno);
281f1e396bcSPaul Traina 		abort();
28258f0484fSRodney W. Grimes 	}
28358f0484fSRodney W. Grimes #endif
284f1e396bcSPaul Traina 	bp->flags &= ~MPOOL_PINNED;
2859fc74a87SXin LI 	if (flags & MPOOL_DIRTY)
286f1e396bcSPaul Traina 		bp->flags |= flags & MPOOL_DIRTY;
28758f0484fSRodney W. Grimes 	return (RET_SUCCESS);
28858f0484fSRodney W. Grimes }
28958f0484fSRodney W. Grimes 
29058f0484fSRodney W. Grimes /*
291f1e396bcSPaul Traina  * mpool_close
292f1e396bcSPaul Traina  *	Close the buffer pool.
29358f0484fSRodney W. Grimes  */
29458f0484fSRodney W. Grimes int
mpool_close(MPOOL * mp)2950ac22237SXin LI mpool_close(MPOOL *mp)
29658f0484fSRodney W. Grimes {
297f1e396bcSPaul Traina 	BKT *bp;
29858f0484fSRodney W. Grimes 
29958f0484fSRodney W. Grimes 	/* Free up any space allocated to the lru pages. */
300429d4912SBrian Feldman 	while (!TAILQ_EMPTY(&mp->lqh)) {
301429d4912SBrian Feldman 		bp = TAILQ_FIRST(&mp->lqh);
302fabacd3aSPoul-Henning Kamp 		TAILQ_REMOVE(&mp->lqh, bp, q);
303f1e396bcSPaul Traina 		free(bp);
30458f0484fSRodney W. Grimes 	}
305f1e396bcSPaul Traina 
306f1e396bcSPaul Traina 	/* Free the MPOOL cookie. */
30758f0484fSRodney W. Grimes 	free(mp);
30858f0484fSRodney W. Grimes 	return (RET_SUCCESS);
30958f0484fSRodney W. Grimes }
31058f0484fSRodney W. Grimes 
31158f0484fSRodney W. Grimes /*
312f1e396bcSPaul Traina  * mpool_sync
313f1e396bcSPaul Traina  *	Sync the pool to disk.
31458f0484fSRodney W. Grimes  */
31558f0484fSRodney W. Grimes int
mpool_sync(MPOOL * mp)3160ac22237SXin LI mpool_sync(MPOOL *mp)
31758f0484fSRodney W. Grimes {
318f1e396bcSPaul Traina 	BKT *bp;
31958f0484fSRodney W. Grimes 
320f1e396bcSPaul Traina 	/* Walk the lru chain, flushing any dirty pages to disk. */
321fabacd3aSPoul-Henning Kamp 	TAILQ_FOREACH(bp, &mp->lqh, q)
322f1e396bcSPaul Traina 		if (bp->flags & MPOOL_DIRTY &&
323f1e396bcSPaul Traina 		    mpool_write(mp, bp) == RET_ERROR)
32458f0484fSRodney W. Grimes 			return (RET_ERROR);
325f1e396bcSPaul Traina 
326f1e396bcSPaul Traina 	/* Sync the file descriptor. */
3279233c4d9SJason Evans 	return (_fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
32858f0484fSRodney W. Grimes }
32958f0484fSRodney W. Grimes 
33058f0484fSRodney W. Grimes /*
331f1e396bcSPaul Traina  * mpool_bkt
332f1e396bcSPaul Traina  *	Get a page from the cache (or create one).
33358f0484fSRodney W. Grimes  */
33458f0484fSRodney W. Grimes static BKT *
mpool_bkt(MPOOL * mp)3350ac22237SXin LI mpool_bkt(MPOOL *mp)
33658f0484fSRodney W. Grimes {
337f1e396bcSPaul Traina 	struct _hqh *head;
338f1e396bcSPaul Traina 	BKT *bp;
33958f0484fSRodney W. Grimes 
340f1e396bcSPaul Traina 	/* If under the max cached, always create a new page. */
34158f0484fSRodney W. Grimes 	if (mp->curcache < mp->maxcache)
34258f0484fSRodney W. Grimes 		goto new;
34358f0484fSRodney W. Grimes 
34458f0484fSRodney W. Grimes 	/*
345f1e396bcSPaul Traina 	 * If the cache is max'd out, walk the lru list for a buffer we
346f1e396bcSPaul Traina 	 * can flush.  If we find one, write it (if necessary) and take it
347f1e396bcSPaul Traina 	 * off any lists.  If we don't find anything we grow the cache anyway.
34858f0484fSRodney W. Grimes 	 * The cache never shrinks.
34958f0484fSRodney W. Grimes 	 */
350fabacd3aSPoul-Henning Kamp 	TAILQ_FOREACH(bp, &mp->lqh, q)
351f1e396bcSPaul Traina 		if (!(bp->flags & MPOOL_PINNED)) {
352f1e396bcSPaul Traina 			/* Flush if dirty. */
353f1e396bcSPaul Traina 			if (bp->flags & MPOOL_DIRTY &&
354f1e396bcSPaul Traina 			    mpool_write(mp, bp) == RET_ERROR)
35558f0484fSRodney W. Grimes 				return (NULL);
35658f0484fSRodney W. Grimes #ifdef STATISTICS
35758f0484fSRodney W. Grimes 			++mp->pageflush;
35858f0484fSRodney W. Grimes #endif
359f1e396bcSPaul Traina 			/* Remove from the hash and lru queues. */
360f1e396bcSPaul Traina 			head = &mp->hqh[HASHKEY(bp->pgno)];
361fabacd3aSPoul-Henning Kamp 			TAILQ_REMOVE(head, bp, hq);
362fabacd3aSPoul-Henning Kamp 			TAILQ_REMOVE(&mp->lqh, bp, q);
36358f0484fSRodney W. Grimes #ifdef DEBUG
364f1e396bcSPaul Traina 			{ void *spage;
365f1e396bcSPaul Traina 				spage = bp->page;
366f1e396bcSPaul Traina 				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
367f1e396bcSPaul Traina 				bp->page = spage;
36858f0484fSRodney W. Grimes 			}
36958f0484fSRodney W. Grimes #endif
3709fc74a87SXin LI 			bp->flags = 0;
371f1e396bcSPaul Traina 			return (bp);
37258f0484fSRodney W. Grimes 		}
37358f0484fSRodney W. Grimes 
374c9f30aaaSXin LI new:	if ((bp = (BKT *)calloc(1, sizeof(BKT) + mp->pagesize)) == NULL)
37558f0484fSRodney W. Grimes 		return (NULL);
37658f0484fSRodney W. Grimes #ifdef STATISTICS
37758f0484fSRodney W. Grimes 	++mp->pagealloc;
37858f0484fSRodney W. Grimes #endif
379f1e396bcSPaul Traina 	bp->page = (char *)bp + sizeof(BKT);
3809fc74a87SXin LI 	bp->flags = 0;
38158f0484fSRodney W. Grimes 	++mp->curcache;
382f1e396bcSPaul Traina 	return (bp);
38358f0484fSRodney W. Grimes }
38458f0484fSRodney W. Grimes 
38558f0484fSRodney W. Grimes /*
386f1e396bcSPaul Traina  * mpool_write
387f1e396bcSPaul Traina  *	Write a page to disk.
38858f0484fSRodney W. Grimes  */
38958f0484fSRodney W. Grimes static int
mpool_write(MPOOL * mp,BKT * bp)3900ac22237SXin LI mpool_write(MPOOL *mp, BKT *bp)
39158f0484fSRodney W. Grimes {
39258f0484fSRodney W. Grimes 	off_t off;
39358f0484fSRodney W. Grimes 
39458f0484fSRodney W. Grimes #ifdef STATISTICS
39558f0484fSRodney W. Grimes 	++mp->pagewrite;
39658f0484fSRodney W. Grimes #endif
397f1e396bcSPaul Traina 
398f1e396bcSPaul Traina 	/* Run through the user's filter. */
399f1e396bcSPaul Traina 	if (mp->pgout)
400f1e396bcSPaul Traina 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
401f1e396bcSPaul Traina 
402f1e396bcSPaul Traina 	off = mp->pagesize * bp->pgno;
403e8ee08baSXin LI 	if (pwrite(mp->fd, bp->page, mp->pagesize, off) != (ssize_t)mp->pagesize)
40458f0484fSRodney W. Grimes 		return (RET_ERROR);
405f1e396bcSPaul Traina 
4069fc74a87SXin LI 	/*
4079fc74a87SXin LI 	 * Re-run through the input filter since this page may soon be
4089fc74a87SXin LI 	 * accessed via the cache, and whatever the user's output filter
4099fc74a87SXin LI 	 * did may screw things up if we don't let the input filter
4109fc74a87SXin LI 	 * restore the in-core copy.
4119fc74a87SXin LI 	 */
4129fc74a87SXin LI 	if (mp->pgin)
4139fc74a87SXin LI 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
4149fc74a87SXin LI 
415f1e396bcSPaul Traina 	bp->flags &= ~MPOOL_DIRTY;
41658f0484fSRodney W. Grimes 	return (RET_SUCCESS);
41758f0484fSRodney W. Grimes }
41858f0484fSRodney W. Grimes 
41958f0484fSRodney W. Grimes /*
420f1e396bcSPaul Traina  * mpool_look
421f1e396bcSPaul Traina  *	Lookup a page in the cache.
42258f0484fSRodney W. Grimes  */
42358f0484fSRodney W. Grimes static BKT *
mpool_look(MPOOL * mp,pgno_t pgno)4240ac22237SXin LI mpool_look(MPOOL *mp, pgno_t pgno)
42558f0484fSRodney W. Grimes {
426f1e396bcSPaul Traina 	struct _hqh *head;
427f1e396bcSPaul Traina 	BKT *bp;
42858f0484fSRodney W. Grimes 
429f1e396bcSPaul Traina 	head = &mp->hqh[HASHKEY(pgno)];
430fabacd3aSPoul-Henning Kamp 	TAILQ_FOREACH(bp, head, hq)
4319fc74a87SXin LI 		if ((bp->pgno == pgno) &&
4329fc74a87SXin LI 			((bp->flags & MPOOL_INUSE) == MPOOL_INUSE)) {
43358f0484fSRodney W. Grimes #ifdef STATISTICS
43458f0484fSRodney W. Grimes 			++mp->cachehit;
43558f0484fSRodney W. Grimes #endif
436f1e396bcSPaul Traina 			return (bp);
43758f0484fSRodney W. Grimes 		}
43858f0484fSRodney W. Grimes #ifdef STATISTICS
43958f0484fSRodney W. Grimes 	++mp->cachemiss;
44058f0484fSRodney W. Grimes #endif
44158f0484fSRodney W. Grimes 	return (NULL);
44258f0484fSRodney W. Grimes }
44358f0484fSRodney W. Grimes 
44458f0484fSRodney W. Grimes #ifdef STATISTICS
44558f0484fSRodney W. Grimes /*
446f1e396bcSPaul Traina  * mpool_stat
447f1e396bcSPaul Traina  *	Print out cache statistics.
44858f0484fSRodney W. Grimes  */
44958f0484fSRodney W. Grimes void
mpool_stat(MPOOL * mp)4500ac22237SXin LI mpool_stat(MPOOL *mp)
45158f0484fSRodney W. Grimes {
452f1e396bcSPaul Traina 	BKT *bp;
45358f0484fSRodney W. Grimes 	int cnt;
45458f0484fSRodney W. Grimes 	char *sep;
45558f0484fSRodney W. Grimes 
4567efabbb9SXin LI 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
45758f0484fSRodney W. Grimes 	(void)fprintf(stderr,
4587efabbb9SXin LI 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
45958f0484fSRodney W. Grimes 	    mp->pagesize, mp->curcache, mp->maxcache);
46058f0484fSRodney W. Grimes 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
46158f0484fSRodney W. Grimes 	    mp->pageput, mp->pageget, mp->pagenew);
46258f0484fSRodney W. Grimes 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
46358f0484fSRodney W. Grimes 	    mp->pagealloc, mp->pageflush);
46458f0484fSRodney W. Grimes 	if (mp->cachehit + mp->cachemiss)
46558f0484fSRodney W. Grimes 		(void)fprintf(stderr,
46658f0484fSRodney W. Grimes 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
46758f0484fSRodney W. Grimes 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
46858f0484fSRodney W. Grimes 		    * 100, mp->cachehit, mp->cachemiss);
46958f0484fSRodney W. Grimes 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
47058f0484fSRodney W. Grimes 	    mp->pageread, mp->pagewrite);
47158f0484fSRodney W. Grimes 
47258f0484fSRodney W. Grimes 	sep = "";
47358f0484fSRodney W. Grimes 	cnt = 0;
474fabacd3aSPoul-Henning Kamp 	TAILQ_FOREACH(bp, &mp->lqh, q) {
475f1e396bcSPaul Traina 		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
476f1e396bcSPaul Traina 		if (bp->flags & MPOOL_DIRTY)
47758f0484fSRodney W. Grimes 			(void)fprintf(stderr, "d");
478f1e396bcSPaul Traina 		if (bp->flags & MPOOL_PINNED)
47958f0484fSRodney W. Grimes 			(void)fprintf(stderr, "P");
48058f0484fSRodney W. Grimes 		if (++cnt == 10) {
48158f0484fSRodney W. Grimes 			sep = "\n";
48258f0484fSRodney W. Grimes 			cnt = 0;
48358f0484fSRodney W. Grimes 		} else
48458f0484fSRodney W. Grimes 			sep = ", ";
48558f0484fSRodney W. Grimes 
48658f0484fSRodney W. Grimes 	}
48758f0484fSRodney W. Grimes 	(void)fprintf(stderr, "\n");
48858f0484fSRodney W. Grimes }
48958f0484fSRodney W. Grimes #endif
490