xref: /freebsd/lib/libc/db/mpool/mpool.c (revision afe61c15161c324a7af299a9b8457aba5afc92db)
1 /*-
2  * Copyright (c) 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #if defined(LIBC_SCCS) && !defined(lint)
35 static char sccsid[] = "@(#)mpool.c	8.2 (Berkeley) 2/21/94";
36 #endif /* LIBC_SCCS and not lint */
37 
38 #include <sys/param.h>
39 #include <sys/stat.h>
40 
41 #include <errno.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 
47 #include <db.h>
48 #define	__MPOOLINTERFACE_PRIVATE
49 #include "mpool.h"
50 
51 static BKT *mpool_bkt __P((MPOOL *));
52 static BKT *mpool_look __P((MPOOL *, pgno_t));
53 static int  mpool_write __P((MPOOL *, BKT *));
54 #ifdef DEBUG
55 static void __mpoolerr __P((const char *fmt, ...));
56 #endif
57 
58 /*
59  * MPOOL_OPEN -- initialize a memory pool.
60  *
61  * Parameters:
62  *	key:		Shared buffer key.
63  *	fd:		File descriptor.
64  *	pagesize:	File page size.
65  *	maxcache:	Max number of cached pages.
66  *
67  * Returns:
68  *	MPOOL pointer, NULL on error.
69  */
70 MPOOL *
71 mpool_open(key, fd, pagesize, maxcache)
72 	DBT *key;
73 	int fd;
74 	pgno_t pagesize, maxcache;
75 {
76 	struct stat sb;
77 	MPOOL *mp;
78 	int entry;
79 
80 	if (fstat(fd, &sb))
81 		return (NULL);
82 	/* XXX
83 	 * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so
84 	 * that stat(2) returns true for ISSOCK on pipes.  Until then, this is
85 	 * fairly close.
86 	 */
87 	if (!S_ISREG(sb.st_mode)) {
88 		errno = ESPIPE;
89 		return (NULL);
90 	}
91 
92 	if ((mp = (MPOOL *)malloc(sizeof(MPOOL))) == NULL)
93 		return (NULL);
94 	mp->free.cnext = mp->free.cprev = (BKT *)&mp->free;
95 	mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru;
96 	for (entry = 0; entry < HASHSIZE; ++entry)
97 		mp->hashtable[entry].hnext = mp->hashtable[entry].hprev =
98 		    mp->hashtable[entry].cnext = mp->hashtable[entry].cprev =
99 		    (BKT *)&mp->hashtable[entry];
100 	mp->curcache = 0;
101 	mp->maxcache = maxcache;
102 	mp->pagesize = pagesize;
103 	mp->npages = sb.st_size / pagesize;
104 	mp->fd = fd;
105 	mp->pgcookie = NULL;
106 	mp->pgin = mp->pgout = NULL;
107 
108 #ifdef STATISTICS
109 	mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush =
110 	    mp->pageget = mp->pagenew = mp->pageput = mp->pageread =
111 	    mp->pagewrite = 0;
112 #endif
113 	return (mp);
114 }
115 
116 /*
117  * MPOOL_FILTER -- initialize input/output filters.
118  *
119  * Parameters:
120  *	pgin:		Page in conversion routine.
121  *	pgout:		Page out conversion routine.
122  *	pgcookie:	Cookie for page in/out routines.
123  */
124 void
125 mpool_filter(mp, pgin, pgout, pgcookie)
126 	MPOOL *mp;
127 	void (*pgin) __P((void *, pgno_t, void *));
128 	void (*pgout) __P((void *, pgno_t, void *));
129 	void *pgcookie;
130 {
131 	mp->pgin = pgin;
132 	mp->pgout = pgout;
133 	mp->pgcookie = pgcookie;
134 }
135 
136 /*
137  * MPOOL_NEW -- get a new page
138  *
139  * Parameters:
140  *	mp:		mpool cookie
141  *	pgnoadddr:	place to store new page number
142  * Returns:
143  *	RET_ERROR, RET_SUCCESS
144  */
145 void *
146 mpool_new(mp, pgnoaddr)
147 	MPOOL *mp;
148 	pgno_t *pgnoaddr;
149 {
150 	BKT *b;
151 	BKTHDR *hp;
152 
153 #ifdef STATISTICS
154 	++mp->pagenew;
155 #endif
156 	/*
157 	 * Get a BKT from the cache.  Assign a new page number, attach it to
158 	 * the hash and lru chains and return.
159 	 */
160 	if ((b = mpool_bkt(mp)) == NULL)
161 		return (NULL);
162 	*pgnoaddr = b->pgno = mp->npages++;
163 	b->flags = MPOOL_PINNED;
164 	inshash(b, b->pgno);
165 	inschain(b, &mp->lru);
166 	return (b->page);
167 }
168 
169 /*
170  * MPOOL_GET -- get a page from the pool
171  *
172  * Parameters:
173  *	mp:	mpool cookie
174  *	pgno:	page number
175  *	flags:	not used
176  *
177  * Returns:
178  *	RET_ERROR, RET_SUCCESS
179  */
180 void *
181 mpool_get(mp, pgno, flags)
182 	MPOOL *mp;
183 	pgno_t pgno;
184 	u_int flags;		/* XXX not used? */
185 {
186 	BKT *b;
187 	BKTHDR *hp;
188 	off_t off;
189 	int nr;
190 
191 	/*
192 	 * If asking for a specific page that is already in the cache, find
193 	 * it and return it.
194 	 */
195 	if (b = mpool_look(mp, pgno)) {
196 #ifdef STATISTICS
197 		++mp->pageget;
198 #endif
199 #ifdef DEBUG
200 		if (b->flags & MPOOL_PINNED)
201 			__mpoolerr("mpool_get: page %d already pinned",
202 			    b->pgno);
203 #endif
204 		rmchain(b);
205 		inschain(b, &mp->lru);
206 		b->flags |= MPOOL_PINNED;
207 		return (b->page);
208 	}
209 
210 	/* Not allowed to retrieve a non-existent page. */
211 	if (pgno >= mp->npages) {
212 		errno = EINVAL;
213 		return (NULL);
214 	}
215 
216 	/* Get a page from the cache. */
217 	if ((b = mpool_bkt(mp)) == NULL)
218 		return (NULL);
219 	b->pgno = pgno;
220 	b->flags = MPOOL_PINNED;
221 
222 #ifdef STATISTICS
223 	++mp->pageread;
224 #endif
225 	/* Read in the contents. */
226 	off = mp->pagesize * pgno;
227 	if (lseek(mp->fd, off, SEEK_SET) != off)
228 		return (NULL);
229 	if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) {
230 		if (nr >= 0)
231 			errno = EFTYPE;
232 		return (NULL);
233 	}
234 	if (mp->pgin)
235 		(mp->pgin)(mp->pgcookie, b->pgno, b->page);
236 
237 	inshash(b, b->pgno);
238 	inschain(b, &mp->lru);
239 #ifdef STATISTICS
240 	++mp->pageget;
241 #endif
242 	return (b->page);
243 }
244 
245 /*
246  * MPOOL_PUT -- return a page to the pool
247  *
248  * Parameters:
249  *	mp:	mpool cookie
250  *	page:	page pointer
251  *	pgno:	page number
252  *
253  * Returns:
254  *	RET_ERROR, RET_SUCCESS
255  */
256 int
257 mpool_put(mp, page, flags)
258 	MPOOL *mp;
259 	void *page;
260 	u_int flags;
261 {
262 	BKT *baddr;
263 #ifdef DEBUG
264 	BKT *b;
265 #endif
266 
267 #ifdef STATISTICS
268 	++mp->pageput;
269 #endif
270 	baddr = (BKT *)((char *)page - sizeof(BKT));
271 #ifdef DEBUG
272 	if (!(baddr->flags & MPOOL_PINNED))
273 		__mpoolerr("mpool_put: page %d not pinned", b->pgno);
274 	for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) {
275 		if (b == (BKT *)&mp->lru)
276 			__mpoolerr("mpool_put: %0x: bad address", baddr);
277 		if (b == baddr)
278 			break;
279 	}
280 #endif
281 	baddr->flags &= ~MPOOL_PINNED;
282 	baddr->flags |= flags & MPOOL_DIRTY;
283 	return (RET_SUCCESS);
284 }
285 
286 /*
287  * MPOOL_CLOSE -- close the buffer pool
288  *
289  * Parameters:
290  *	mp:	mpool cookie
291  *
292  * Returns:
293  *	RET_ERROR, RET_SUCCESS
294  */
295 int
296 mpool_close(mp)
297 	MPOOL *mp;
298 {
299 	BKT *b, *next;
300 
301 	/* Free up any space allocated to the lru pages. */
302 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) {
303 		next = b->cprev;
304 		free(b);
305 	}
306 	free(mp);
307 	return (RET_SUCCESS);
308 }
309 
310 /*
311  * MPOOL_SYNC -- sync the file to disk.
312  *
313  * Parameters:
314  *	mp:	mpool cookie
315  *
316  * Returns:
317  *	RET_ERROR, RET_SUCCESS
318  */
319 int
320 mpool_sync(mp)
321 	MPOOL *mp;
322 {
323 	BKT *b;
324 
325 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev)
326 		if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR)
327 			return (RET_ERROR);
328 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
329 }
330 
331 /*
332  * MPOOL_BKT -- get/create a BKT from the cache
333  *
334  * Parameters:
335  *	mp:	mpool cookie
336  *
337  * Returns:
338  *	NULL on failure and a pointer to the BKT on success
339  */
340 static BKT *
341 mpool_bkt(mp)
342 	MPOOL *mp;
343 {
344 	BKT *b;
345 
346 	if (mp->curcache < mp->maxcache)
347 		goto new;
348 
349 	/*
350 	 * If the cache is maxxed out, search the lru list for a buffer we
351 	 * can flush.  If we find one, write it if necessary and take it off
352 	 * any lists.  If we don't find anything we grow the cache anyway.
353 	 * The cache never shrinks.
354 	 */
355 	for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev)
356 		if (!(b->flags & MPOOL_PINNED)) {
357 			if (b->flags & MPOOL_DIRTY &&
358 			    mpool_write(mp, b) == RET_ERROR)
359 				return (NULL);
360 			rmhash(b);
361 			rmchain(b);
362 #ifdef STATISTICS
363 			++mp->pageflush;
364 #endif
365 #ifdef DEBUG
366 			{
367 				void *spage;
368 				spage = b->page;
369 				memset(b, 0xff, sizeof(BKT) + mp->pagesize);
370 				b->page = spage;
371 			}
372 #endif
373 			return (b);
374 		}
375 
376 new:	if ((b = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
377 		return (NULL);
378 #ifdef STATISTICS
379 	++mp->pagealloc;
380 #endif
381 #ifdef DEBUG
382 	memset(b, 0xff, sizeof(BKT) + mp->pagesize);
383 #endif
384 	b->page = (char *)b + sizeof(BKT);
385 	++mp->curcache;
386 	return (b);
387 }
388 
389 /*
390  * MPOOL_WRITE -- sync a page to disk
391  *
392  * Parameters:
393  *	mp:	mpool cookie
394  *
395  * Returns:
396  *	RET_ERROR, RET_SUCCESS
397  */
398 static int
399 mpool_write(mp, b)
400 	MPOOL *mp;
401 	BKT *b;
402 {
403 	off_t off;
404 
405 	if (mp->pgout)
406 		(mp->pgout)(mp->pgcookie, b->pgno, b->page);
407 
408 #ifdef STATISTICS
409 	++mp->pagewrite;
410 #endif
411 	off = mp->pagesize * b->pgno;
412 	if (lseek(mp->fd, off, SEEK_SET) != off)
413 		return (RET_ERROR);
414 	if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize)
415 		return (RET_ERROR);
416 	b->flags &= ~MPOOL_DIRTY;
417 	return (RET_SUCCESS);
418 }
419 
420 /*
421  * MPOOL_LOOK -- lookup a page
422  *
423  * Parameters:
424  *	mp:	mpool cookie
425  *	pgno:	page number
426  *
427  * Returns:
428  *	NULL on failure and a pointer to the BKT on success
429  */
430 static BKT *
431 mpool_look(mp, pgno)
432 	MPOOL *mp;
433 	pgno_t pgno;
434 {
435 	register BKT *b;
436 	register BKTHDR *tb;
437 
438 	/* XXX
439 	 * If find the buffer, put it first on the hash chain so can
440 	 * find it again quickly.
441 	 */
442 	tb = &mp->hashtable[HASHKEY(pgno)];
443 	for (b = tb->hnext; b != (BKT *)tb; b = b->hnext)
444 		if (b->pgno == pgno) {
445 #ifdef STATISTICS
446 			++mp->cachehit;
447 #endif
448 			return (b);
449 		}
450 #ifdef STATISTICS
451 	++mp->cachemiss;
452 #endif
453 	return (NULL);
454 }
455 
456 #ifdef STATISTICS
457 /*
458  * MPOOL_STAT -- cache statistics
459  *
460  * Parameters:
461  *	mp:	mpool cookie
462  */
463 void
464 mpool_stat(mp)
465 	MPOOL *mp;
466 {
467 	BKT *b;
468 	int cnt;
469 	char *sep;
470 
471 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
472 	(void)fprintf(stderr,
473 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
474 	    mp->pagesize, mp->curcache, mp->maxcache);
475 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
476 	    mp->pageput, mp->pageget, mp->pagenew);
477 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
478 	    mp->pagealloc, mp->pageflush);
479 	if (mp->cachehit + mp->cachemiss)
480 		(void)fprintf(stderr,
481 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
482 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
483 		    * 100, mp->cachehit, mp->cachemiss);
484 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
485 	    mp->pageread, mp->pagewrite);
486 
487 	sep = "";
488 	cnt = 0;
489 	for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) {
490 		(void)fprintf(stderr, "%s%d", sep, b->pgno);
491 		if (b->flags & MPOOL_DIRTY)
492 			(void)fprintf(stderr, "d");
493 		if (b->flags & MPOOL_PINNED)
494 			(void)fprintf(stderr, "P");
495 		if (++cnt == 10) {
496 			sep = "\n";
497 			cnt = 0;
498 		} else
499 			sep = ", ";
500 
501 	}
502 	(void)fprintf(stderr, "\n");
503 }
504 #endif
505 
506 #ifdef DEBUG
507 #if __STDC__
508 #include <stdarg.h>
509 #else
510 #include <varargs.h>
511 #endif
512 
513 static void
514 #if __STDC__
515 __mpoolerr(const char *fmt, ...)
516 #else
517 __mpoolerr(fmt, va_alist)
518 	char *fmt;
519 	va_dcl
520 #endif
521 {
522 	va_list ap;
523 #if __STDC__
524 	va_start(ap, fmt);
525 #else
526 	va_start(ap);
527 #endif
528 	(void)vfprintf(stderr, fmt, ap);
529 	va_end(ap);
530 	(void)fprintf(stderr, "\n");
531 	abort();
532 	/* NOTREACHED */
533 }
534 #endif
535