xref: /freebsd/crypto/krb5/src/plugins/kdb/db2/libdb2/mpool/mpool.c (revision cb2887746f8b9dd4ad6b1e757cdc053a08b25a2e)
1 /*-
2  * Copyright (c) 1990, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #if defined(LIBC_SCCS) && !defined(lint)
35 static char sccsid[] = "@(#)mpool.c	8.7 (Berkeley) 11/2/95";
36 #endif /* LIBC_SCCS and not lint */
37 
38 #include <sys/param.h>
39 #include <sys/stat.h>
40 
41 #include <errno.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 
47 #include "db-int.h"
48 #include "mpool.h"
49 
50 static BKT *mpool_bkt __P((MPOOL *));
51 static BKT *mpool_look __P((MPOOL *, db_pgno_t));
52 static int  mpool_write __P((MPOOL *, BKT *));
53 
54 /*
55  * mpool_open --
56  *	Initialize a memory pool.
57  */
58 MPOOL *
59 mpool_open(void *key, int fd, db_pgno_t pagesize, db_pgno_t maxcache)
60 {
61 	struct stat sb;
62 	MPOOL *mp;
63 	int entry;
64 
65 	/*
66 	 * Get information about the file.
67 	 *
68 	 * XXX
69 	 * We don't currently handle pipes, although we should.
70 	 */
71 	if (fstat(fd, &sb))
72 		return (NULL);
73 	if (!S_ISREG(sb.st_mode)) {
74 		errno = ESPIPE;
75 		return (NULL);
76 	}
77 
78 	/* Allocate and initialize the MPOOL cookie. */
79 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
80 		return (NULL);
81 	TAILQ_INIT(&mp->lqh);
82 	for (entry = 0; entry < HASHSIZE; ++entry)
83 		TAILQ_INIT(&mp->hqh[entry]);
84 	mp->maxcache = maxcache;
85 	mp->npages = sb.st_size / pagesize;
86 	mp->pagesize = pagesize;
87 	mp->fd = fd;
88 	return (mp);
89 }
90 
91 /*
92  * mpool_filter --
93  *	Initialize input/output filters.
94  */
95 void
96 mpool_filter(MPOOL *mp, void (*pgin) __P((void *, db_pgno_t, void *)),
97 	     void (*pgout) __P((void *, db_pgno_t, void *)), void *pgcookie)
98 {
99 	mp->pgin = pgin;
100 	mp->pgout = pgout;
101 	mp->pgcookie = pgcookie;
102 }
103 
104 /*
105  * mpool_new --
106  *	Get a new page of memory.
107  */
108 void *
109 mpool_new(MPOOL *mp, db_pgno_t *pgnoaddr, u_int flags)
110 {
111 	struct _hqh *head;
112 	BKT *bp;
113 
114 	if (mp->npages == MAX_PAGE_NUMBER) {
115 		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
116 		abort();
117 	}
118 #ifdef STATISTICS
119 	++mp->pagenew;
120 #endif
121 	/*
122 	 * Get a BKT from the cache.  Assign a new page number, attach
123 	 * it to the head of the hash chain, the tail of the lru chain,
124 	 * and return.
125 	 */
126 	if ((bp = mpool_bkt(mp)) == NULL)
127 		return (NULL);
128 	if (flags == MPOOL_PAGE_REQUEST) {
129 		mp->npages++;
130 		bp->pgno = *pgnoaddr;
131 	} else
132 		bp->pgno = *pgnoaddr = mp->npages++;
133 
134 	bp->flags = MPOOL_PINNED | MPOOL_INUSE;
135 
136 	head = &mp->hqh[HASHKEY(bp->pgno)];
137 	TAILQ_INSERT_HEAD(head, bp, hq);
138 	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
139 	return (bp->page);
140 }
141 
142 int
143 mpool_delete(MPOOL *mp, void *page)
144 {
145 	struct _hqh *head;
146 	BKT *bp;
147 
148 	bp = (void *)((char *)page - sizeof(BKT));
149 
150 #ifdef DEBUG
151 	if (!(bp->flags & MPOOL_PINNED)) {
152 		(void)fprintf(stderr,
153 		    "mpool_delete: page %d not pinned\n", bp->pgno);
154 		abort();
155 	}
156 #endif
157 
158 	/* Remove from the hash and lru queues. */
159 	head = &mp->hqh[HASHKEY(bp->pgno)];
160 	TAILQ_REMOVE(head, bp, hq);
161 	TAILQ_REMOVE(&mp->lqh, bp, q);
162 
163 	free(bp);
164 	return (RET_SUCCESS);
165 }
166 
167 /*
168  * mpool_get
169  *	Get a page.
170  */
171 void *
172 mpool_get(MPOOL *mp, db_pgno_t pgno, u_int flags)
173 {
174 	struct _hqh *head;
175 	BKT *bp;
176 	off_t off;
177 	int nr;
178 
179 #ifdef STATISTICS
180 	++mp->pageget;
181 #endif
182 
183 	/* Check for a page that is cached. */
184 	if ((bp = mpool_look(mp, pgno)) != NULL) {
185 #ifdef DEBUG
186 		if (!(flags & MPOOL_IGNOREPIN) && bp->flags & MPOOL_PINNED) {
187 			(void)fprintf(stderr,
188 			    "mpool_get: page %d already pinned\n", bp->pgno);
189 			abort();
190 		}
191 #endif
192 		/*
193 		 * Move the page to the head of the hash chain and the tail
194 		 * of the lru chain.
195 		 */
196 		head = &mp->hqh[HASHKEY(bp->pgno)];
197 		TAILQ_REMOVE(head, bp, hq);
198 		TAILQ_INSERT_HEAD(head, bp, hq);
199 		TAILQ_REMOVE(&mp->lqh, bp, q);
200 		TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
201 
202 		/* Return a pinned page. */
203 		if (!(flags & MPOOL_IGNOREPIN))
204 			bp->flags |= MPOOL_PINNED;
205 		return (bp->page);
206 	}
207 
208 	/* Get a page from the cache. */
209 	if ((bp = mpool_bkt(mp)) == NULL)
210 		return (NULL);
211 
212 	/* Read in the contents. */
213 #ifdef STATISTICS
214 	++mp->pageread;
215 #endif
216 	off = mp->pagesize * pgno;
217 	if (off / mp->pagesize != pgno) {
218 	    /* Run past the end of the file, or at least the part we
219 	       can address without large-file support?  */
220 	    errno = E2BIG;
221 	    return NULL;
222 	}
223 	if (lseek(mp->fd, off, SEEK_SET) != off)
224 		return (NULL);
225 
226 	if ((nr = read(mp->fd, bp->page, mp->pagesize)) !=
227 	    (ssize_t)mp->pagesize) {
228 		if (nr > 0) {
229 			/* A partial read is definitely bad. */
230 			errno = EINVAL;
231 			return (NULL);
232 		} else {
233 			/*
234 			 * A zero-length reads, means you need to create a
235 			 * new page.
236 			 */
237 			memset(bp->page, 0, mp->pagesize);
238 		}
239 	}
240 
241 	/* Set the page number, pin the page. */
242 	bp->pgno = pgno;
243 	if (!(flags & MPOOL_IGNOREPIN))
244 		bp->flags = MPOOL_PINNED;
245 	bp->flags |= MPOOL_INUSE;
246 
247 	/*
248 	 * Add the page to the head of the hash chain and the tail
249 	 * of the lru chain.
250 	 */
251 	head = &mp->hqh[HASHKEY(bp->pgno)];
252 	TAILQ_INSERT_HEAD(head, bp, hq);
253 	TAILQ_INSERT_TAIL(&mp->lqh, bp, q);
254 
255 	/* Run through the user's filter. */
256 	if (mp->pgin != NULL)
257 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
258 
259 	return (bp->page);
260 }
261 
262 /*
263  * mpool_put
264  *	Return a page.
265  */
266 int
267 mpool_put(MPOOL *mp, void *page, u_int flags)
268 {
269 	BKT *bp;
270 
271 #ifdef STATISTICS
272 	++mp->pageput;
273 #endif
274 	bp = (void *)((char *)page - sizeof(BKT));
275 #ifdef DEBUG
276 	if (!(bp->flags & MPOOL_PINNED)) {
277 		(void)fprintf(stderr,
278 		    "mpool_put: page %d not pinned\n", bp->pgno);
279 		abort();
280 	}
281 #endif
282 	bp->flags &= ~MPOOL_PINNED;
283 	if (flags & MPOOL_DIRTY)
284 		bp->flags |= flags & MPOOL_DIRTY;
285 	return (RET_SUCCESS);
286 }
287 
288 /*
289  * mpool_close
290  *	Close the buffer pool.
291  */
292 int
293 mpool_close(MPOOL *mp)
294 {
295 	BKT *bp;
296 
297 	/* Free up any space allocated to the lru pages. */
298 	while ((bp = mp->lqh.tqh_first) != NULL) {
299 		TAILQ_REMOVE(&mp->lqh, mp->lqh.tqh_first, q);
300 		free(bp);
301 	}
302 
303 	/* Free the MPOOL cookie. */
304 	free(mp);
305 	return (RET_SUCCESS);
306 }
307 
308 /*
309  * mpool_sync
310  *	Sync the pool to disk.
311  */
312 int
313 mpool_sync(MPOOL *mp)
314 {
315 	BKT *bp;
316 
317 	/* Walk the lru chain, flushing any dirty pages to disk. */
318 	for (bp = mp->lqh.tqh_first; bp != NULL; bp = bp->q.tqe_next)
319 		if (bp->flags & MPOOL_DIRTY &&
320 		    mpool_write(mp, bp) == RET_ERROR)
321 			return (RET_ERROR);
322 
323 	/* Sync the file descriptor. */
324 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
325 }
326 
327 /*
328  * mpool_bkt
329  *	Get a page from the cache (or create one).
330  */
331 static BKT *
332 mpool_bkt(MPOOL *mp)
333 {
334 	struct _hqh *head;
335 	BKT *bp;
336 
337 	/* If under the max cached, always create a new page. */
338 	if (mp->curcache < mp->maxcache)
339 		goto new;
340 
341 	/*
342 	 * If the cache is max'd out, walk the lru list for a buffer we
343 	 * can flush.  If we find one, write it (if necessary) and take it
344 	 * off any lists.  If we don't find anything we grow the cache anyway.
345 	 * The cache never shrinks.
346 	 */
347 	for (bp = mp->lqh.tqh_first; bp != NULL; bp = bp->q.tqe_next)
348 		if (!(bp->flags & MPOOL_PINNED)) {
349 			/* Flush if dirty. */
350 			if (bp->flags & MPOOL_DIRTY &&
351 			    mpool_write(mp, bp) == RET_ERROR)
352 				return (NULL);
353 #ifdef STATISTICS
354 			++mp->pageflush;
355 #endif
356 			/* Remove from the hash and lru queues. */
357 			head = &mp->hqh[HASHKEY(bp->pgno)];
358 			TAILQ_REMOVE(head, bp, hq);
359 			TAILQ_REMOVE(&mp->lqh, bp, q);
360 #if defined(DEBUG) && !defined(DEBUG_IDX0SPLIT)
361 			{ void *spage;
362 				spage = bp->page;
363 				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
364 				bp->page = spage;
365 			}
366 #endif
367 			bp->flags = 0;
368 			return (bp);
369 		}
370 
371 new:	if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
372 		return (NULL);
373 #ifdef STATISTICS
374 	++mp->pagealloc;
375 #endif
376 #if defined(DEBUG) || defined(PURIFY) || 1
377 	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
378 #endif
379 	bp->page = (char *)bp + sizeof(BKT);
380 	bp->flags = 0;
381 	++mp->curcache;
382 	return (bp);
383 }
384 
385 /*
386  * mpool_write
387  *	Write a page to disk.
388  */
389 static int
390 mpool_write(MPOOL *mp, BKT *bp)
391 {
392 	off_t off;
393 
394 #ifdef STATISTICS
395 	++mp->pagewrite;
396 #endif
397 
398 	/* Run through the user's filter. */
399 	if (mp->pgout)
400 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
401 
402 	off = mp->pagesize * bp->pgno;
403 	if (off / mp->pagesize != bp->pgno) {
404 	    /* Run past the end of the file, or at least the part we
405 	       can address without large-file support?  */
406 	    errno = E2BIG;
407 	    return RET_ERROR;
408 	}
409 	if (lseek(mp->fd, off, SEEK_SET) != off)
410 		return (RET_ERROR);
411 	if (write(mp->fd, bp->page, mp->pagesize) !=
412 	    (ssize_t)mp->pagesize)
413 		return (RET_ERROR);
414 
415 	/*
416 	 * Re-run through the input filter since this page may soon be
417 	 * accessed via the cache, and whatever the user's output filter
418 	 * did may screw things up if we don't let the input filter
419 	 * restore the in-core copy.
420 	 */
421 	if (mp->pgin)
422 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
423 	bp->flags &= ~MPOOL_DIRTY;
424 	return (RET_SUCCESS);
425 }
426 
427 /*
428  * mpool_look
429  *	Lookup a page in the cache.
430  */
431 static BKT *
432 mpool_look(MPOOL *mp, db_pgno_t pgno)
433 {
434 	struct _hqh *head;
435 	BKT *bp;
436 
437 	head = &mp->hqh[HASHKEY(pgno)];
438 	for (bp = head->tqh_first; bp != NULL; bp = bp->hq.tqe_next)
439 		if ((bp->pgno == pgno) && (bp->flags & MPOOL_INUSE)) {
440 #ifdef STATISTICS
441 			++mp->cachehit;
442 #endif
443 			return (bp);
444 		}
445 #ifdef STATISTICS
446 	++mp->cachemiss;
447 #endif
448 	return (NULL);
449 }
450 
451 #ifdef STATISTICS
452 /*
453  * mpool_stat
454  *	Print out cache statistics.
455  */
456 void
457 mpool_stat(MPOOL *mp)
458 {
459 	BKT *bp;
460 	int cnt;
461 	char *sep;
462 
463 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
464 	(void)fprintf(stderr,
465 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
466 	    mp->pagesize, mp->curcache, mp->maxcache);
467 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
468 	    mp->pageput, mp->pageget, mp->pagenew);
469 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
470 	    mp->pagealloc, mp->pageflush);
471 	if (mp->cachehit + mp->cachemiss)
472 		(void)fprintf(stderr,
473 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
474 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
475 		    * 100, mp->cachehit, mp->cachemiss);
476 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
477 	    mp->pageread, mp->pagewrite);
478 
479 	sep = "";
480 	cnt = 0;
481 	for (bp = mp->lqh.tqh_first; bp != NULL; bp = bp->q.tqe_next) {
482 		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
483 		if (bp->flags & MPOOL_DIRTY)
484 			(void)fprintf(stderr, "d");
485 		if (bp->flags & MPOOL_PINNED)
486 			(void)fprintf(stderr, "P");
487 		if (++cnt == 10) {
488 			sep = "\n";
489 			cnt = 0;
490 		} else
491 			sep = ", ";
492 
493 	}
494 	(void)fprintf(stderr, "\n");
495 }
496 #else
497 void
498 mpool_stat(MPOOL *mp)
499 {
500 }
501 #endif
502