xref: /illumos-gate/usr/src/lib/krb5/plugins/kdb/db2/libdb2/mpool/mpool.c (revision bea83d026ee1bd1b2a2419e1d0232f107a5d7d9b)
1 #pragma ident	"%Z%%M%	%I%	%E% SMI"
2 
3 /*-
4  * Copyright (c) 1990, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #if defined(LIBC_SCCS) && !defined(lint)
37 static char sccsid[] = "@(#)mpool.c	8.7 (Berkeley) 11/2/95";
38 #endif /* LIBC_SCCS and not lint */
39 
40 #include <sys/param.h>
41 #include <sys/stat.h>
42 
43 #include <errno.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 
49 #include "db-int.h"
50 #include "mpool.h"
51 
52 static BKT *mpool_bkt __P((MPOOL *));
53 static BKT *mpool_look __P((MPOOL *, db_pgno_t));
54 static int  mpool_write __P((MPOOL *, BKT *));
55 
56 /*
57  * mpool_open --
58  *	Initialize a memory pool.
59  */
60 MPOOL *
61 mpool_open(key, fd, pagesize, maxcache)
62 	void *key;
63 	int fd;
64 	db_pgno_t pagesize, maxcache;
65 {
66 	struct stat sb;
67 	MPOOL *mp;
68 	int entry;
69 
70 	/*
71 	 * Get information about the file.
72 	 *
73 	 * XXX
74 	 * We don't currently handle pipes, although we should.
75 	 */
76 	if (fstat(fd, &sb))
77 		return (NULL);
78 	if (!S_ISREG(sb.st_mode)) {
79 		errno = ESPIPE;
80 		return (NULL);
81 	}
82 
83 	/* Allocate and initialize the MPOOL cookie. */
84 	if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
85 		return (NULL);
86 	CIRCLEQ_INIT(&mp->lqh);
87 	for (entry = 0; entry < HASHSIZE; ++entry)
88 		CIRCLEQ_INIT(&mp->hqh[entry]);
89 	mp->maxcache = maxcache;
90 	mp->npages = sb.st_size / pagesize;
91 	mp->pagesize = pagesize;
92 	mp->fd = fd;
93 	return (mp);
94 }
95 
96 /*
97  * mpool_filter --
98  *	Initialize input/output filters.
99  */
100 void
101 mpool_filter(mp, pgin, pgout, pgcookie)
102 	MPOOL *mp;
103 	void (*pgin) __P((void *, db_pgno_t, void *));
104 	void (*pgout) __P((void *, db_pgno_t, void *));
105 	void *pgcookie;
106 {
107 	mp->pgin = pgin;
108 	mp->pgout = pgout;
109 	mp->pgcookie = pgcookie;
110 }
111 
112 /*
113  * mpool_new --
114  *	Get a new page of memory.
115  */
116 void *
117 mpool_new(mp, pgnoaddr, flags)
118 	MPOOL *mp;
119 	db_pgno_t *pgnoaddr;
120 	u_int flags;
121 {
122 	struct _hqh *head;
123 	BKT *bp;
124 
125 	if (mp->npages == MAX_PAGE_NUMBER) {
126 		(void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
127 		abort();
128 	}
129 #ifdef STATISTICS
130 	++mp->pagenew;
131 #endif
132 	/*
133 	 * Get a BKT from the cache.  Assign a new page number, attach
134 	 * it to the head of the hash chain, the tail of the lru chain,
135 	 * and return.
136 	 */
137 	if ((bp = mpool_bkt(mp)) == NULL)
138 		return (NULL);
139 	if (flags == MPOOL_PAGE_REQUEST) {
140 		mp->npages++;
141 		bp->pgno = *pgnoaddr;
142 	} else
143 		bp->pgno = *pgnoaddr = mp->npages++;
144 
145 	bp->flags = MPOOL_PINNED | MPOOL_INUSE;
146 
147 	head = &mp->hqh[HASHKEY(bp->pgno)];
148 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
149 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
150 	return (bp->page);
151 }
152 
153 int
154 mpool_delete(mp, page)
155 	MPOOL *mp;
156 	void *page;
157 {
158 	struct _hqh *head;
159 	BKT *bp;
160 
161 	bp = (BKT *)((char *)page - sizeof(BKT));
162 
163 #ifdef DEBUG
164 	if (!(bp->flags & MPOOL_PINNED)) {
165 		(void)fprintf(stderr,
166 		    "mpool_delete: page %d not pinned\n", bp->pgno);
167 		abort();
168 	}
169 #endif
170 
171 	/* Remove from the hash and lru queues. */
172 	head = &mp->hqh[HASHKEY(bp->pgno)];
173 	CIRCLEQ_REMOVE(head, bp, hq);
174 	CIRCLEQ_REMOVE(&mp->lqh, bp, q);
175 
176 	free(bp);
177 	return (RET_SUCCESS);
178 }
179 
180 /*
181  * mpool_get
182  *	Get a page.
183  */
184 void *
185 mpool_get(mp, pgno, flags)
186 	MPOOL *mp;
187 	db_pgno_t pgno;
188 	u_int flags;				/* XXX not used? */
189 {
190 	struct _hqh *head;
191 	BKT *bp;
192 	off_t off;
193 	int nr;
194 
195 #ifdef STATISTICS
196 	++mp->pageget;
197 #endif
198 
199 	/* Check for a page that is cached. */
200 	if ((bp = mpool_look(mp, pgno)) != NULL) {
201 #ifdef DEBUG
202 		if (!(flags & MPOOL_IGNOREPIN) && bp->flags & MPOOL_PINNED) {
203 			(void)fprintf(stderr,
204 			    "mpool_get: page %d already pinned\n", bp->pgno);
205 			abort();
206 		}
207 #endif
208 		/*
209 		 * Move the page to the head of the hash chain and the tail
210 		 * of the lru chain.
211 		 */
212 		head = &mp->hqh[HASHKEY(bp->pgno)];
213 		CIRCLEQ_REMOVE(head, bp, hq);
214 		CIRCLEQ_INSERT_HEAD(head, bp, hq);
215 		CIRCLEQ_REMOVE(&mp->lqh, bp, q);
216 		CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
217 
218 		/* Return a pinned page. */
219 		bp->flags |= MPOOL_PINNED;
220 		return (bp->page);
221 	}
222 
223 	/* Get a page from the cache. */
224 	if ((bp = mpool_bkt(mp)) == NULL)
225 		return (NULL);
226 
227 	/* Read in the contents. */
228 #ifdef STATISTICS
229 	++mp->pageread;
230 #endif
231 	off = mp->pagesize * pgno;
232 	if (off / mp->pagesize != pgno) {
233 	    /* Run past the end of the file, or at least the part we
234 	       can address without large-file support?  */
235 	    errno = E2BIG;
236 	    return NULL;
237 	}
238 	if (lseek(mp->fd, off, SEEK_SET) != off)
239 		return (NULL);
240 
241 	if ((nr = read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) {
242 		if (nr > 0) {
243 			/* A partial read is definitely bad. */
244 			errno = EINVAL;
245 			return (NULL);
246 		} else {
247 			/*
248 			 * A zero-length reads, means you need to create a
249 			 * new page.
250 			 */
251 			memset(bp->page, 0, mp->pagesize);
252 		}
253 	}
254 
255 	/* Set the page number, pin the page. */
256 	bp->pgno = pgno;
257 	if (!(flags & MPOOL_IGNOREPIN))
258 		bp->flags = MPOOL_PINNED;
259 	bp->flags |= MPOOL_INUSE;
260 
261 	/*
262 	 * Add the page to the head of the hash chain and the tail
263 	 * of the lru chain.
264 	 */
265 	head = &mp->hqh[HASHKEY(bp->pgno)];
266 	CIRCLEQ_INSERT_HEAD(head, bp, hq);
267 	CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
268 
269 	/* Run through the user's filter. */
270 	if (mp->pgin != NULL)
271 		(mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
272 
273 	return (bp->page);
274 }
275 
276 /*
277  * mpool_put
278  *	Return a page.
279  */
280 int
281 mpool_put(mp, page, flags)
282 	MPOOL *mp;
283 	void *page;
284 	u_int flags;
285 {
286 	BKT *bp;
287 
288 #ifdef STATISTICS
289 	++mp->pageput;
290 #endif
291 	bp = (BKT *)((char *)page - sizeof(BKT));
292 #ifdef DEBUG
293 	if (!(bp->flags & MPOOL_PINNED)) {
294 		(void)fprintf(stderr,
295 		    "mpool_put: page %d not pinned\n", bp->pgno);
296 		abort();
297 	}
298 #endif
299 	bp->flags &= ~MPOOL_PINNED;
300 	if (flags & MPOOL_DIRTY)
301 		bp->flags |= flags & MPOOL_DIRTY;
302 	return (RET_SUCCESS);
303 }
304 
305 /*
306  * mpool_close
307  *	Close the buffer pool.
308  */
309 int
310 mpool_close(mp)
311 	MPOOL *mp;
312 {
313 	BKT *bp;
314 
315 	/* Free up any space allocated to the lru pages. */
316 	while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
317 		CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
318 		free(bp);
319 	}
320 
321 	/* Free the MPOOL cookie. */
322 	free(mp);
323 	return (RET_SUCCESS);
324 }
325 
326 /*
327  * mpool_sync
328  *	Sync the pool to disk.
329  */
330 int
331 mpool_sync(mp)
332 	MPOOL *mp;
333 {
334 	BKT *bp;
335 
336 	/* Walk the lru chain, flushing any dirty pages to disk. */
337 	for (bp = mp->lqh.cqh_first;
338 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
339 		if (bp->flags & MPOOL_DIRTY &&
340 		    mpool_write(mp, bp) == RET_ERROR)
341 			return (RET_ERROR);
342 
343 	/* Sync the file descriptor. */
344 	return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
345 }
346 
347 /*
348  * mpool_bkt
349  *	Get a page from the cache (or create one).
350  */
351 static BKT *
352 mpool_bkt(mp)
353 	MPOOL *mp;
354 {
355 	struct _hqh *head;
356 	BKT *bp;
357 
358 	/* If under the max cached, always create a new page. */
359 	if (mp->curcache < mp->maxcache)
360 		goto new;
361 
362 	/*
363 	 * If the cache is max'd out, walk the lru list for a buffer we
364 	 * can flush.  If we find one, write it (if necessary) and take it
365 	 * off any lists.  If we don't find anything we grow the cache anyway.
366 	 * The cache never shrinks.
367 	 */
368 	for (bp = mp->lqh.cqh_first;
369 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
370 		if (!(bp->flags & MPOOL_PINNED)) {
371 			/* Flush if dirty. */
372 			if (bp->flags & MPOOL_DIRTY &&
373 			    mpool_write(mp, bp) == RET_ERROR)
374 				return (NULL);
375 #ifdef STATISTICS
376 			++mp->pageflush;
377 #endif
378 			/* Remove from the hash and lru queues. */
379 			head = &mp->hqh[HASHKEY(bp->pgno)];
380 			CIRCLEQ_REMOVE(head, bp, hq);
381 			CIRCLEQ_REMOVE(&mp->lqh, bp, q);
382 #ifdef DEBUG
383 			{ void *spage;
384 				spage = bp->page;
385 				memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
386 				bp->page = spage;
387 			}
388 #endif
389 			bp->flags = 0;
390 			return (bp);
391 		}
392 
393 new:	if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
394 		return (NULL);
395 #ifdef STATISTICS
396 	++mp->pagealloc;
397 #endif
398 #if defined(DEBUG) || defined(PURIFY)
399 	memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
400 #endif
401 	bp->page = (char *)bp + sizeof(BKT);
402 	bp->flags = 0;
403 	++mp->curcache;
404 	return (bp);
405 }
406 
407 /*
408  * mpool_write
409  *	Write a page to disk.
410  */
411 static int
412 mpool_write(mp, bp)
413 	MPOOL *mp;
414 	BKT *bp;
415 {
416 	off_t off;
417 
418 #ifdef STATISTICS
419 	++mp->pagewrite;
420 #endif
421 
422 	/* Run through the user's filter. */
423 	if (mp->pgout)
424 		(mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
425 
426 	off = mp->pagesize * bp->pgno;
427 	if (off / mp->pagesize != bp->pgno) {
428 	    /* Run past the end of the file, or at least the part we
429 	       can address without large-file support?  */
430 	    errno = E2BIG;
431 	    return RET_ERROR;
432 	}
433 	if (lseek(mp->fd, off, SEEK_SET) != off)
434 		return (RET_ERROR);
435 	if (write(mp->fd, bp->page, mp->pagesize) != mp->pagesize)
436 		return (RET_ERROR);
437 
438 	bp->flags &= ~MPOOL_DIRTY;
439 	return (RET_SUCCESS);
440 }
441 
442 /*
443  * mpool_look
444  *	Lookup a page in the cache.
445  */
446 static BKT *
447 mpool_look(mp, pgno)
448 	MPOOL *mp;
449 	db_pgno_t pgno;
450 {
451 	struct _hqh *head;
452 	BKT *bp;
453 
454 	head = &mp->hqh[HASHKEY(pgno)];
455 	for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
456 		if ((bp->pgno == pgno) &&
457 			(bp->flags & MPOOL_INUSE == MPOOL_INUSE)) {
458 #ifdef STATISTICS
459 			++mp->cachehit;
460 #endif
461 			return (bp);
462 		}
463 #ifdef STATISTICS
464 	++mp->cachemiss;
465 #endif
466 	return (NULL);
467 }
468 
469 #ifdef STATISTICS
470 /*
471  * mpool_stat
472  *	Print out cache statistics.
473  */
474 void
475 mpool_stat(mp)
476 	MPOOL *mp;
477 {
478 	BKT *bp;
479 	int cnt;
480 	char *sep;
481 
482 	(void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
483 	(void)fprintf(stderr,
484 	    "page size %lu, cacheing %lu pages of %lu page max cache\n",
485 	    mp->pagesize, mp->curcache, mp->maxcache);
486 	(void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
487 	    mp->pageput, mp->pageget, mp->pagenew);
488 	(void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
489 	    mp->pagealloc, mp->pageflush);
490 	if (mp->cachehit + mp->cachemiss)
491 		(void)fprintf(stderr,
492 		    "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
493 		    ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
494 		    * 100, mp->cachehit, mp->cachemiss);
495 	(void)fprintf(stderr, "%lu page reads, %lu page writes\n",
496 	    mp->pageread, mp->pagewrite);
497 
498 	sep = "";
499 	cnt = 0;
500 	for (bp = mp->lqh.cqh_first;
501 	    bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
502 		(void)fprintf(stderr, "%s%d", sep, bp->pgno);
503 		if (bp->flags & MPOOL_DIRTY)
504 			(void)fprintf(stderr, "d");
505 		if (bp->flags & MPOOL_PINNED)
506 			(void)fprintf(stderr, "P");
507 		if (++cnt == 10) {
508 			sep = "\n";
509 			cnt = 0;
510 		} else
511 			sep = ", ";
512 
513 	}
514 	(void)fprintf(stderr, "\n");
515 }
516 #endif
517