1 /*-
2 * Copyright (c) 1990, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 #if defined(LIBC_SCCS) && !defined(lint)
35 static char sccsid[] = "@(#)mpool.c 8.7 (Berkeley) 11/2/95";
36 #endif /* LIBC_SCCS and not lint */
37
38 #include <sys/param.h>
39 #include <sys/stat.h>
40
41 #include <errno.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46
47 #include "db-int.h"
48 #include "mpool.h"
49
50 static BKT *mpool_bkt __P((MPOOL *));
51 static BKT *mpool_look __P((MPOOL *, db_pgno_t));
52 static int mpool_write __P((MPOOL *, BKT *));
53
54 /*
55 * mpool_open --
56 * Initialize a memory pool.
57 */
58 MPOOL *
mpool_open(key,fd,pagesize,maxcache)59 mpool_open(key, fd, pagesize, maxcache)
60 void *key;
61 int fd;
62 db_pgno_t pagesize, maxcache;
63 {
64 struct stat sb;
65 MPOOL *mp;
66 int entry;
67
68 /*
69 * Get information about the file.
70 *
71 * XXX
72 * We don't currently handle pipes, although we should.
73 */
74 if (fstat(fd, &sb))
75 return (NULL);
76 if (!S_ISREG(sb.st_mode)) {
77 errno = ESPIPE;
78 return (NULL);
79 }
80
81 /* Allocate and initialize the MPOOL cookie. */
82 if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL)
83 return (NULL);
84 CIRCLEQ_INIT(&mp->lqh);
85 for (entry = 0; entry < HASHSIZE; ++entry)
86 CIRCLEQ_INIT(&mp->hqh[entry]);
87 mp->maxcache = maxcache;
88 mp->npages = sb.st_size / pagesize;
89 mp->pagesize = pagesize;
90 mp->fd = fd;
91 return (mp);
92 }
93
94 /*
95 * mpool_filter --
96 * Initialize input/output filters.
97 */
98 void
mpool_filter(mp,pgin,pgout,pgcookie)99 mpool_filter(mp, pgin, pgout, pgcookie)
100 MPOOL *mp;
101 void (*pgin) __P((void *, db_pgno_t, void *));
102 void (*pgout) __P((void *, db_pgno_t, void *));
103 void *pgcookie;
104 {
105 mp->pgin = pgin;
106 mp->pgout = pgout;
107 mp->pgcookie = pgcookie;
108 }
109
110 /*
111 * mpool_new --
112 * Get a new page of memory.
113 */
114 void *
mpool_new(mp,pgnoaddr,flags)115 mpool_new(mp, pgnoaddr, flags)
116 MPOOL *mp;
117 db_pgno_t *pgnoaddr;
118 u_int flags;
119 {
120 struct _hqh *head;
121 BKT *bp;
122
123 if (mp->npages == MAX_PAGE_NUMBER) {
124 (void)fprintf(stderr, "mpool_new: page allocation overflow.\n");
125 abort();
126 }
127 #ifdef STATISTICS
128 ++mp->pagenew;
129 #endif
130 /*
131 * Get a BKT from the cache. Assign a new page number, attach
132 * it to the head of the hash chain, the tail of the lru chain,
133 * and return.
134 */
135 if ((bp = mpool_bkt(mp)) == NULL)
136 return (NULL);
137 if (flags == MPOOL_PAGE_REQUEST) {
138 mp->npages++;
139 bp->pgno = *pgnoaddr;
140 } else
141 bp->pgno = *pgnoaddr = mp->npages++;
142
143 bp->flags = MPOOL_PINNED | MPOOL_INUSE;
144
145 head = &mp->hqh[HASHKEY(bp->pgno)];
146 CIRCLEQ_INSERT_HEAD(head, bp, hq);
147 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
148 return (bp->page);
149 }
150
151 int
mpool_delete(mp,page)152 mpool_delete(mp, page)
153 MPOOL *mp;
154 void *page;
155 {
156 struct _hqh *head;
157 BKT *bp;
158
159 bp = (BKT *)((char *)page - sizeof(BKT));
160
161 #ifdef DEBUG
162 if (!(bp->flags & MPOOL_PINNED)) {
163 (void)fprintf(stderr,
164 "mpool_delete: page %d not pinned\n", bp->pgno);
165 abort();
166 }
167 #endif
168
169 /* Remove from the hash and lru queues. */
170 head = &mp->hqh[HASHKEY(bp->pgno)];
171 CIRCLEQ_REMOVE(head, bp, hq);
172 CIRCLEQ_REMOVE(&mp->lqh, bp, q);
173
174 free(bp);
175 return (RET_SUCCESS);
176 }
177
178 /*
179 * mpool_get
180 * Get a page.
181 */
182 void *
mpool_get(mp,pgno,flags)183 mpool_get(mp, pgno, flags)
184 MPOOL *mp;
185 db_pgno_t pgno;
186 u_int flags; /* XXX not used? */
187 {
188 struct _hqh *head;
189 BKT *bp;
190 off_t off;
191 int nr;
192
193 #ifdef STATISTICS
194 ++mp->pageget;
195 #endif
196
197 /* Check for a page that is cached. */
198 if ((bp = mpool_look(mp, pgno)) != NULL) {
199 #ifdef DEBUG
200 if (!(flags & MPOOL_IGNOREPIN) && bp->flags & MPOOL_PINNED) {
201 (void)fprintf(stderr,
202 "mpool_get: page %d already pinned\n", bp->pgno);
203 abort();
204 }
205 #endif
206 /*
207 * Move the page to the head of the hash chain and the tail
208 * of the lru chain.
209 */
210 head = &mp->hqh[HASHKEY(bp->pgno)];
211 CIRCLEQ_REMOVE(head, bp, hq);
212 CIRCLEQ_INSERT_HEAD(head, bp, hq);
213 CIRCLEQ_REMOVE(&mp->lqh, bp, q);
214 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
215
216 /* Return a pinned page. */
217 bp->flags |= MPOOL_PINNED;
218 return (bp->page);
219 }
220
221 /* Get a page from the cache. */
222 if ((bp = mpool_bkt(mp)) == NULL)
223 return (NULL);
224
225 /* Read in the contents. */
226 #ifdef STATISTICS
227 ++mp->pageread;
228 #endif
229 off = mp->pagesize * pgno;
230 if (off / mp->pagesize != pgno) {
231 /* Run past the end of the file, or at least the part we
232 can address without large-file support? */
233 errno = E2BIG;
234 return NULL;
235 }
236 if (lseek(mp->fd, off, SEEK_SET) != off)
237 return (NULL);
238
239 if ((nr = read(mp->fd, bp->page, mp->pagesize)) != mp->pagesize) {
240 if (nr > 0) {
241 /* A partial read is definitely bad. */
242 errno = EINVAL;
243 return (NULL);
244 } else {
245 /*
246 * A zero-length reads, means you need to create a
247 * new page.
248 */
249 memset(bp->page, 0, mp->pagesize);
250 }
251 }
252
253 /* Set the page number, pin the page. */
254 bp->pgno = pgno;
255 if (!(flags & MPOOL_IGNOREPIN))
256 bp->flags = MPOOL_PINNED;
257 bp->flags |= MPOOL_INUSE;
258
259 /*
260 * Add the page to the head of the hash chain and the tail
261 * of the lru chain.
262 */
263 head = &mp->hqh[HASHKEY(bp->pgno)];
264 CIRCLEQ_INSERT_HEAD(head, bp, hq);
265 CIRCLEQ_INSERT_TAIL(&mp->lqh, bp, q);
266
267 /* Run through the user's filter. */
268 if (mp->pgin != NULL)
269 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page);
270
271 return (bp->page);
272 }
273
274 /*
275 * mpool_put
276 * Return a page.
277 */
278 int
mpool_put(mp,page,flags)279 mpool_put(mp, page, flags)
280 MPOOL *mp;
281 void *page;
282 u_int flags;
283 {
284 BKT *bp;
285
286 #ifdef STATISTICS
287 ++mp->pageput;
288 #endif
289 bp = (BKT *)((char *)page - sizeof(BKT));
290 #ifdef DEBUG
291 if (!(bp->flags & MPOOL_PINNED)) {
292 (void)fprintf(stderr,
293 "mpool_put: page %d not pinned\n", bp->pgno);
294 abort();
295 }
296 #endif
297 bp->flags &= ~MPOOL_PINNED;
298 if (flags & MPOOL_DIRTY)
299 bp->flags |= flags & MPOOL_DIRTY;
300 return (RET_SUCCESS);
301 }
302
303 /*
304 * mpool_close
305 * Close the buffer pool.
306 */
307 int
mpool_close(mp)308 mpool_close(mp)
309 MPOOL *mp;
310 {
311 BKT *bp;
312
313 /* Free up any space allocated to the lru pages. */
314 while ((bp = mp->lqh.cqh_first) != (void *)&mp->lqh) {
315 CIRCLEQ_REMOVE(&mp->lqh, mp->lqh.cqh_first, q);
316 free(bp);
317 }
318
319 /* Free the MPOOL cookie. */
320 free(mp);
321 return (RET_SUCCESS);
322 }
323
324 /*
325 * mpool_sync
326 * Sync the pool to disk.
327 */
328 int
mpool_sync(mp)329 mpool_sync(mp)
330 MPOOL *mp;
331 {
332 BKT *bp;
333
334 /* Walk the lru chain, flushing any dirty pages to disk. */
335 for (bp = mp->lqh.cqh_first;
336 bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
337 if (bp->flags & MPOOL_DIRTY &&
338 mpool_write(mp, bp) == RET_ERROR)
339 return (RET_ERROR);
340
341 /* Sync the file descriptor. */
342 return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS);
343 }
344
345 /*
346 * mpool_bkt
347 * Get a page from the cache (or create one).
348 */
349 static BKT *
mpool_bkt(mp)350 mpool_bkt(mp)
351 MPOOL *mp;
352 {
353 struct _hqh *head;
354 BKT *bp;
355
356 /* If under the max cached, always create a new page. */
357 if (mp->curcache < mp->maxcache)
358 goto new;
359
360 /*
361 * If the cache is max'd out, walk the lru list for a buffer we
362 * can flush. If we find one, write it (if necessary) and take it
363 * off any lists. If we don't find anything we grow the cache anyway.
364 * The cache never shrinks.
365 */
366 for (bp = mp->lqh.cqh_first;
367 bp != (void *)&mp->lqh; bp = bp->q.cqe_next)
368 if (!(bp->flags & MPOOL_PINNED)) {
369 /* Flush if dirty. */
370 if (bp->flags & MPOOL_DIRTY &&
371 mpool_write(mp, bp) == RET_ERROR)
372 return (NULL);
373 #ifdef STATISTICS
374 ++mp->pageflush;
375 #endif
376 /* Remove from the hash and lru queues. */
377 head = &mp->hqh[HASHKEY(bp->pgno)];
378 CIRCLEQ_REMOVE(head, bp, hq);
379 CIRCLEQ_REMOVE(&mp->lqh, bp, q);
380 #ifdef DEBUG
381 { void *spage;
382 spage = bp->page;
383 memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
384 bp->page = spage;
385 }
386 #endif
387 bp->flags = 0;
388 return (bp);
389 }
390
391 new: if ((bp = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL)
392 return (NULL);
393 #ifdef STATISTICS
394 ++mp->pagealloc;
395 #endif
396 #if defined(DEBUG) || defined(PURIFY) || 1
397 memset(bp, 0xff, sizeof(BKT) + mp->pagesize);
398 #endif
399 bp->page = (char *)bp + sizeof(BKT);
400 bp->flags = 0;
401 ++mp->curcache;
402 return (bp);
403 }
404
405 /*
406 * mpool_write
407 * Write a page to disk.
408 */
409 static int
mpool_write(mp,bp)410 mpool_write(mp, bp)
411 MPOOL *mp;
412 BKT *bp;
413 {
414 off_t off;
415
416 #ifdef STATISTICS
417 ++mp->pagewrite;
418 #endif
419
420 /* Run through the user's filter. */
421 if (mp->pgout)
422 (mp->pgout)(mp->pgcookie, bp->pgno, bp->page);
423
424 off = mp->pagesize * bp->pgno;
425 if (off / mp->pagesize != bp->pgno) {
426 /* Run past the end of the file, or at least the part we
427 can address without large-file support? */
428 errno = E2BIG;
429 return RET_ERROR;
430 }
431 if (lseek(mp->fd, off, SEEK_SET) != off)
432 return (RET_ERROR);
433 if (write(mp->fd, bp->page, mp->pagesize) != mp->pagesize)
434 return (RET_ERROR);
435
436 bp->flags &= ~MPOOL_DIRTY;
437 return (RET_SUCCESS);
438 }
439
440 /*
441 * mpool_look
442 * Lookup a page in the cache.
443 */
444 static BKT *
mpool_look(mp,pgno)445 mpool_look(mp, pgno)
446 MPOOL *mp;
447 db_pgno_t pgno;
448 {
449 struct _hqh *head;
450 BKT *bp;
451
452 head = &mp->hqh[HASHKEY(pgno)];
453 for (bp = head->cqh_first; bp != (void *)head; bp = bp->hq.cqe_next)
454 if ((bp->pgno == pgno) && (bp->flags & MPOOL_INUSE)) {
455 #ifdef STATISTICS
456 ++mp->cachehit;
457 #endif
458 return (bp);
459 }
460 #ifdef STATISTICS
461 ++mp->cachemiss;
462 #endif
463 return (NULL);
464 }
465
466 #ifdef STATISTICS
467 /*
468 * mpool_stat
469 * Print out cache statistics.
470 */
471 void
mpool_stat(mp)472 mpool_stat(mp)
473 MPOOL *mp;
474 {
475 BKT *bp;
476 int cnt;
477 char *sep;
478
479 (void)fprintf(stderr, "%lu pages in the file\n", mp->npages);
480 (void)fprintf(stderr,
481 "page size %lu, cacheing %lu pages of %lu page max cache\n",
482 mp->pagesize, mp->curcache, mp->maxcache);
483 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n",
484 mp->pageput, mp->pageget, mp->pagenew);
485 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n",
486 mp->pagealloc, mp->pageflush);
487 if (mp->cachehit + mp->cachemiss)
488 (void)fprintf(stderr,
489 "%.0f%% cache hit rate (%lu hits, %lu misses)\n",
490 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss))
491 * 100, mp->cachehit, mp->cachemiss);
492 (void)fprintf(stderr, "%lu page reads, %lu page writes\n",
493 mp->pageread, mp->pagewrite);
494
495 sep = "";
496 cnt = 0;
497 for (bp = mp->lqh.cqh_first;
498 bp != (void *)&mp->lqh; bp = bp->q.cqe_next) {
499 (void)fprintf(stderr, "%s%d", sep, bp->pgno);
500 if (bp->flags & MPOOL_DIRTY)
501 (void)fprintf(stderr, "d");
502 if (bp->flags & MPOOL_PINNED)
503 (void)fprintf(stderr, "P");
504 if (++cnt == 10) {
505 sep = "\n";
506 cnt = 0;
507 } else
508 sep = ", ";
509
510 }
511 (void)fprintf(stderr, "\n");
512 }
513 #endif
514