1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #if defined(LIBC_SCCS) && !defined(lint) 35 static char sccsid[] = "@(#)mpool.c 8.2 (Berkeley) 2/21/94"; 36 #endif /* LIBC_SCCS and not lint */ 37 38 #include <sys/param.h> 39 #include <sys/stat.h> 40 41 #include <errno.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <string.h> 45 #include <unistd.h> 46 47 #include <db.h> 48 #define __MPOOLINTERFACE_PRIVATE 49 #include "mpool.h" 50 51 static BKT *mpool_bkt __P((MPOOL *)); 52 static BKT *mpool_look __P((MPOOL *, pgno_t)); 53 static int mpool_write __P((MPOOL *, BKT *)); 54 #ifdef DEBUG 55 static void __mpoolerr __P((const char *fmt, ...)); 56 #endif 57 58 /* 59 * MPOOL_OPEN -- initialize a memory pool. 60 * 61 * Parameters: 62 * key: Shared buffer key. 63 * fd: File descriptor. 64 * pagesize: File page size. 65 * maxcache: Max number of cached pages. 66 * 67 * Returns: 68 * MPOOL pointer, NULL on error. 69 */ 70 MPOOL * 71 mpool_open(key, fd, pagesize, maxcache) 72 DBT *key; 73 int fd; 74 pgno_t pagesize, maxcache; 75 { 76 struct stat sb; 77 MPOOL *mp; 78 int entry; 79 80 if (fstat(fd, &sb)) 81 return (NULL); 82 /* XXX 83 * We should only set st_size to 0 for pipes -- 4.4BSD has the fix so 84 * that stat(2) returns true for ISSOCK on pipes. Until then, this is 85 * fairly close. 86 */ 87 if (!S_ISREG(sb.st_mode)) { 88 errno = ESPIPE; 89 return (NULL); 90 } 91 92 if ((mp = (MPOOL *)malloc(sizeof(MPOOL))) == NULL) 93 return (NULL); 94 mp->free.cnext = mp->free.cprev = (BKT *)&mp->free; 95 mp->lru.cnext = mp->lru.cprev = (BKT *)&mp->lru; 96 for (entry = 0; entry < HASHSIZE; ++entry) 97 mp->hashtable[entry].hnext = mp->hashtable[entry].hprev = 98 mp->hashtable[entry].cnext = mp->hashtable[entry].cprev = 99 (BKT *)&mp->hashtable[entry]; 100 mp->curcache = 0; 101 mp->maxcache = maxcache; 102 mp->pagesize = pagesize; 103 mp->npages = sb.st_size / pagesize; 104 mp->fd = fd; 105 mp->pgcookie = NULL; 106 mp->pgin = mp->pgout = NULL; 107 108 #ifdef STATISTICS 109 mp->cachehit = mp->cachemiss = mp->pagealloc = mp->pageflush = 110 mp->pageget = mp->pagenew = mp->pageput = mp->pageread = 111 mp->pagewrite = 0; 112 #endif 113 return (mp); 114 } 115 116 /* 117 * MPOOL_FILTER -- initialize input/output filters. 118 * 119 * Parameters: 120 * pgin: Page in conversion routine. 121 * pgout: Page out conversion routine. 122 * pgcookie: Cookie for page in/out routines. 123 */ 124 void 125 mpool_filter(mp, pgin, pgout, pgcookie) 126 MPOOL *mp; 127 void (*pgin) __P((void *, pgno_t, void *)); 128 void (*pgout) __P((void *, pgno_t, void *)); 129 void *pgcookie; 130 { 131 mp->pgin = pgin; 132 mp->pgout = pgout; 133 mp->pgcookie = pgcookie; 134 } 135 136 /* 137 * MPOOL_NEW -- get a new page 138 * 139 * Parameters: 140 * mp: mpool cookie 141 * pgnoadddr: place to store new page number 142 * Returns: 143 * RET_ERROR, RET_SUCCESS 144 */ 145 void * 146 mpool_new(mp, pgnoaddr) 147 MPOOL *mp; 148 pgno_t *pgnoaddr; 149 { 150 BKT *b; 151 BKTHDR *hp; 152 153 #ifdef STATISTICS 154 ++mp->pagenew; 155 #endif 156 /* 157 * Get a BKT from the cache. Assign a new page number, attach it to 158 * the hash and lru chains and return. 159 */ 160 if ((b = mpool_bkt(mp)) == NULL) 161 return (NULL); 162 *pgnoaddr = b->pgno = mp->npages++; 163 b->flags = MPOOL_PINNED; 164 inshash(b, b->pgno); 165 inschain(b, &mp->lru); 166 return (b->page); 167 } 168 169 /* 170 * MPOOL_GET -- get a page from the pool 171 * 172 * Parameters: 173 * mp: mpool cookie 174 * pgno: page number 175 * flags: not used 176 * 177 * Returns: 178 * RET_ERROR, RET_SUCCESS 179 */ 180 void * 181 mpool_get(mp, pgno, flags) 182 MPOOL *mp; 183 pgno_t pgno; 184 u_int flags; /* XXX not used? */ 185 { 186 BKT *b; 187 BKTHDR *hp; 188 off_t off; 189 int nr; 190 191 /* 192 * If asking for a specific page that is already in the cache, find 193 * it and return it. 194 */ 195 if (b = mpool_look(mp, pgno)) { 196 #ifdef STATISTICS 197 ++mp->pageget; 198 #endif 199 #ifdef DEBUG 200 if (b->flags & MPOOL_PINNED) 201 __mpoolerr("mpool_get: page %d already pinned", 202 b->pgno); 203 #endif 204 rmchain(b); 205 inschain(b, &mp->lru); 206 b->flags |= MPOOL_PINNED; 207 return (b->page); 208 } 209 210 /* Not allowed to retrieve a non-existent page. */ 211 if (pgno >= mp->npages) { 212 errno = EINVAL; 213 return (NULL); 214 } 215 216 /* Get a page from the cache. */ 217 if ((b = mpool_bkt(mp)) == NULL) 218 return (NULL); 219 b->pgno = pgno; 220 b->flags = MPOOL_PINNED; 221 222 #ifdef STATISTICS 223 ++mp->pageread; 224 #endif 225 /* Read in the contents. */ 226 off = mp->pagesize * pgno; 227 if (lseek(mp->fd, off, SEEK_SET) != off) 228 return (NULL); 229 if ((nr = read(mp->fd, b->page, mp->pagesize)) != mp->pagesize) { 230 if (nr >= 0) 231 errno = EFTYPE; 232 return (NULL); 233 } 234 if (mp->pgin) 235 (mp->pgin)(mp->pgcookie, b->pgno, b->page); 236 237 inshash(b, b->pgno); 238 inschain(b, &mp->lru); 239 #ifdef STATISTICS 240 ++mp->pageget; 241 #endif 242 return (b->page); 243 } 244 245 /* 246 * MPOOL_PUT -- return a page to the pool 247 * 248 * Parameters: 249 * mp: mpool cookie 250 * page: page pointer 251 * pgno: page number 252 * 253 * Returns: 254 * RET_ERROR, RET_SUCCESS 255 */ 256 int 257 mpool_put(mp, page, flags) 258 MPOOL *mp; 259 void *page; 260 u_int flags; 261 { 262 BKT *baddr; 263 #ifdef DEBUG 264 BKT *b; 265 #endif 266 267 #ifdef STATISTICS 268 ++mp->pageput; 269 #endif 270 baddr = (BKT *)((char *)page - sizeof(BKT)); 271 #ifdef DEBUG 272 if (!(baddr->flags & MPOOL_PINNED)) 273 __mpoolerr("mpool_put: page %d not pinned", b->pgno); 274 for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 275 if (b == (BKT *)&mp->lru) 276 __mpoolerr("mpool_put: %0x: bad address", baddr); 277 if (b == baddr) 278 break; 279 } 280 #endif 281 baddr->flags &= ~MPOOL_PINNED; 282 baddr->flags |= flags & MPOOL_DIRTY; 283 return (RET_SUCCESS); 284 } 285 286 /* 287 * MPOOL_CLOSE -- close the buffer pool 288 * 289 * Parameters: 290 * mp: mpool cookie 291 * 292 * Returns: 293 * RET_ERROR, RET_SUCCESS 294 */ 295 int 296 mpool_close(mp) 297 MPOOL *mp; 298 { 299 BKT *b, *next; 300 301 /* Free up any space allocated to the lru pages. */ 302 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = next) { 303 next = b->cprev; 304 free(b); 305 } 306 free(mp); 307 return (RET_SUCCESS); 308 } 309 310 /* 311 * MPOOL_SYNC -- sync the file to disk. 312 * 313 * Parameters: 314 * mp: mpool cookie 315 * 316 * Returns: 317 * RET_ERROR, RET_SUCCESS 318 */ 319 int 320 mpool_sync(mp) 321 MPOOL *mp; 322 { 323 BKT *b; 324 325 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 326 if (b->flags & MPOOL_DIRTY && mpool_write(mp, b) == RET_ERROR) 327 return (RET_ERROR); 328 return (fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); 329 } 330 331 /* 332 * MPOOL_BKT -- get/create a BKT from the cache 333 * 334 * Parameters: 335 * mp: mpool cookie 336 * 337 * Returns: 338 * NULL on failure and a pointer to the BKT on success 339 */ 340 static BKT * 341 mpool_bkt(mp) 342 MPOOL *mp; 343 { 344 BKT *b; 345 346 if (mp->curcache < mp->maxcache) 347 goto new; 348 349 /* 350 * If the cache is maxxed out, search the lru list for a buffer we 351 * can flush. If we find one, write it if necessary and take it off 352 * any lists. If we don't find anything we grow the cache anyway. 353 * The cache never shrinks. 354 */ 355 for (b = mp->lru.cprev; b != (BKT *)&mp->lru; b = b->cprev) 356 if (!(b->flags & MPOOL_PINNED)) { 357 if (b->flags & MPOOL_DIRTY && 358 mpool_write(mp, b) == RET_ERROR) 359 return (NULL); 360 rmhash(b); 361 rmchain(b); 362 #ifdef STATISTICS 363 ++mp->pageflush; 364 #endif 365 #ifdef DEBUG 366 { 367 void *spage; 368 spage = b->page; 369 memset(b, 0xff, sizeof(BKT) + mp->pagesize); 370 b->page = spage; 371 } 372 #endif 373 return (b); 374 } 375 376 new: if ((b = (BKT *)malloc(sizeof(BKT) + mp->pagesize)) == NULL) 377 return (NULL); 378 #ifdef STATISTICS 379 ++mp->pagealloc; 380 #endif 381 #ifdef DEBUG 382 memset(b, 0xff, sizeof(BKT) + mp->pagesize); 383 #endif 384 b->page = (char *)b + sizeof(BKT); 385 ++mp->curcache; 386 return (b); 387 } 388 389 /* 390 * MPOOL_WRITE -- sync a page to disk 391 * 392 * Parameters: 393 * mp: mpool cookie 394 * 395 * Returns: 396 * RET_ERROR, RET_SUCCESS 397 */ 398 static int 399 mpool_write(mp, b) 400 MPOOL *mp; 401 BKT *b; 402 { 403 off_t off; 404 405 if (mp->pgout) 406 (mp->pgout)(mp->pgcookie, b->pgno, b->page); 407 408 #ifdef STATISTICS 409 ++mp->pagewrite; 410 #endif 411 off = mp->pagesize * b->pgno; 412 if (lseek(mp->fd, off, SEEK_SET) != off) 413 return (RET_ERROR); 414 if (write(mp->fd, b->page, mp->pagesize) != mp->pagesize) 415 return (RET_ERROR); 416 b->flags &= ~MPOOL_DIRTY; 417 return (RET_SUCCESS); 418 } 419 420 /* 421 * MPOOL_LOOK -- lookup a page 422 * 423 * Parameters: 424 * mp: mpool cookie 425 * pgno: page number 426 * 427 * Returns: 428 * NULL on failure and a pointer to the BKT on success 429 */ 430 static BKT * 431 mpool_look(mp, pgno) 432 MPOOL *mp; 433 pgno_t pgno; 434 { 435 register BKT *b; 436 register BKTHDR *tb; 437 438 /* XXX 439 * If find the buffer, put it first on the hash chain so can 440 * find it again quickly. 441 */ 442 tb = &mp->hashtable[HASHKEY(pgno)]; 443 for (b = tb->hnext; b != (BKT *)tb; b = b->hnext) 444 if (b->pgno == pgno) { 445 #ifdef STATISTICS 446 ++mp->cachehit; 447 #endif 448 return (b); 449 } 450 #ifdef STATISTICS 451 ++mp->cachemiss; 452 #endif 453 return (NULL); 454 } 455 456 #ifdef STATISTICS 457 /* 458 * MPOOL_STAT -- cache statistics 459 * 460 * Parameters: 461 * mp: mpool cookie 462 */ 463 void 464 mpool_stat(mp) 465 MPOOL *mp; 466 { 467 BKT *b; 468 int cnt; 469 char *sep; 470 471 (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); 472 (void)fprintf(stderr, 473 "page size %lu, cacheing %lu pages of %lu page max cache\n", 474 mp->pagesize, mp->curcache, mp->maxcache); 475 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 476 mp->pageput, mp->pageget, mp->pagenew); 477 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 478 mp->pagealloc, mp->pageflush); 479 if (mp->cachehit + mp->cachemiss) 480 (void)fprintf(stderr, 481 "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 482 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 483 * 100, mp->cachehit, mp->cachemiss); 484 (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 485 mp->pageread, mp->pagewrite); 486 487 sep = ""; 488 cnt = 0; 489 for (b = mp->lru.cnext; b != (BKT *)&mp->lru; b = b->cnext) { 490 (void)fprintf(stderr, "%s%d", sep, b->pgno); 491 if (b->flags & MPOOL_DIRTY) 492 (void)fprintf(stderr, "d"); 493 if (b->flags & MPOOL_PINNED) 494 (void)fprintf(stderr, "P"); 495 if (++cnt == 10) { 496 sep = "\n"; 497 cnt = 0; 498 } else 499 sep = ", "; 500 501 } 502 (void)fprintf(stderr, "\n"); 503 } 504 #endif 505 506 #ifdef DEBUG 507 #if __STDC__ 508 #include <stdarg.h> 509 #else 510 #include <varargs.h> 511 #endif 512 513 static void 514 #if __STDC__ 515 __mpoolerr(const char *fmt, ...) 516 #else 517 __mpoolerr(fmt, va_alist) 518 char *fmt; 519 va_dcl 520 #endif 521 { 522 va_list ap; 523 #if __STDC__ 524 va_start(ap, fmt); 525 #else 526 va_start(ap); 527 #endif 528 (void)vfprintf(stderr, fmt, ap); 529 va_end(ap); 530 (void)fprintf(stderr, "\n"); 531 abort(); 532 /* NOTREACHED */ 533 } 534 #endif 535