1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #include "namespace.h" 33 #include <sys/param.h> 34 #include <sys/queue.h> 35 #include <sys/stat.h> 36 37 #include <errno.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <unistd.h> 42 #include "un-namespace.h" 43 44 #include <db.h> 45 46 #define __MPOOLINTERFACE_PRIVATE 47 #include <mpool.h> 48 49 static BKT *mpool_bkt(MPOOL *); 50 static BKT *mpool_look(MPOOL *, pgno_t); 51 static int mpool_write(MPOOL *, BKT *); 52 53 /* 54 * mpool_open -- 55 * Initialize a memory pool. 56 */ 57 /* ARGSUSED */ 58 MPOOL * 59 mpool_open(void *key, int fd, pgno_t pagesize, pgno_t maxcache) 60 { 61 struct stat sb; 62 MPOOL *mp; 63 int entry; 64 65 /* 66 * Get information about the file. 67 * 68 * XXX 69 * We don't currently handle pipes, although we should. 70 */ 71 if (_fstat(fd, &sb)) 72 return (NULL); 73 if (!S_ISREG(sb.st_mode)) { 74 errno = ESPIPE; 75 return (NULL); 76 } 77 78 /* Allocate and initialize the MPOOL cookie. */ 79 if ((mp = (MPOOL *)calloc(1, sizeof(MPOOL))) == NULL) 80 return (NULL); 81 TAILQ_INIT(&mp->lqh); 82 for (entry = 0; entry < HASHSIZE; ++entry) 83 TAILQ_INIT(&mp->hqh[entry]); 84 mp->maxcache = maxcache; 85 mp->npages = sb.st_size / pagesize; 86 mp->pagesize = pagesize; 87 mp->fd = fd; 88 return (mp); 89 } 90 91 /* 92 * mpool_filter -- 93 * Initialize input/output filters. 94 */ 95 void 96 mpool_filter(MPOOL *mp, void (*pgin) (void *, pgno_t, void *), 97 void (*pgout) (void *, pgno_t, void *), void *pgcookie) 98 { 99 mp->pgin = pgin; 100 mp->pgout = pgout; 101 mp->pgcookie = pgcookie; 102 } 103 104 /* 105 * mpool_new -- 106 * Get a new page of memory. 107 */ 108 void * 109 mpool_new(MPOOL *mp, pgno_t *pgnoaddr, u_int flags) 110 { 111 struct _hqh *head; 112 BKT *bp; 113 114 if (mp->npages == MAX_PAGE_NUMBER) { 115 (void)fprintf(stderr, "mpool_new: page allocation overflow.\n"); 116 abort(); 117 } 118 #ifdef STATISTICS 119 ++mp->pagenew; 120 #endif 121 /* 122 * Get a BKT from the cache. Assign a new page number, attach 123 * it to the head of the hash chain, the tail of the lru chain, 124 * and return. 125 */ 126 if ((bp = mpool_bkt(mp)) == NULL) 127 return (NULL); 128 if (flags == MPOOL_PAGE_REQUEST) { 129 mp->npages++; 130 bp->pgno = *pgnoaddr; 131 } else 132 bp->pgno = *pgnoaddr = mp->npages++; 133 134 bp->flags = MPOOL_PINNED | MPOOL_INUSE; 135 136 head = &mp->hqh[HASHKEY(bp->pgno)]; 137 TAILQ_INSERT_HEAD(head, bp, hq); 138 TAILQ_INSERT_TAIL(&mp->lqh, bp, q); 139 return (bp->page); 140 } 141 142 int 143 mpool_delete(MPOOL *mp, void *page) 144 { 145 struct _hqh *head; 146 BKT *bp; 147 148 bp = (BKT *)((char *)page - sizeof(BKT)); 149 150 #ifdef DEBUG 151 if (!(bp->flags & MPOOL_PINNED)) { 152 (void)fprintf(stderr, 153 "mpool_delete: page %d not pinned\n", bp->pgno); 154 abort(); 155 } 156 #endif 157 158 /* Remove from the hash and lru queues. */ 159 head = &mp->hqh[HASHKEY(bp->pgno)]; 160 TAILQ_REMOVE(head, bp, hq); 161 TAILQ_REMOVE(&mp->lqh, bp, q); 162 163 free(bp); 164 mp->curcache--; 165 return (RET_SUCCESS); 166 } 167 168 /* 169 * mpool_get 170 * Get a page. 171 */ 172 /* ARGSUSED */ 173 void * 174 mpool_get(MPOOL *mp, pgno_t pgno, 175 u_int flags) /* XXX not used? */ 176 { 177 struct _hqh *head; 178 BKT *bp; 179 off_t off; 180 int nr; 181 182 #ifdef STATISTICS 183 ++mp->pageget; 184 #endif 185 186 /* Check for a page that is cached. */ 187 if ((bp = mpool_look(mp, pgno)) != NULL) { 188 #ifdef DEBUG 189 if (!(flags & MPOOL_IGNOREPIN) && bp->flags & MPOOL_PINNED) { 190 (void)fprintf(stderr, 191 "mpool_get: page %d already pinned\n", bp->pgno); 192 abort(); 193 } 194 #endif 195 /* 196 * Move the page to the head of the hash chain and the tail 197 * of the lru chain. 198 */ 199 head = &mp->hqh[HASHKEY(bp->pgno)]; 200 TAILQ_REMOVE(head, bp, hq); 201 TAILQ_INSERT_HEAD(head, bp, hq); 202 TAILQ_REMOVE(&mp->lqh, bp, q); 203 TAILQ_INSERT_TAIL(&mp->lqh, bp, q); 204 205 /* Return a pinned page. */ 206 bp->flags |= MPOOL_PINNED; 207 return (bp->page); 208 } 209 210 /* Get a page from the cache. */ 211 if ((bp = mpool_bkt(mp)) == NULL) 212 return (NULL); 213 214 /* Read in the contents. */ 215 off = mp->pagesize * pgno; 216 if ((nr = pread(mp->fd, bp->page, mp->pagesize, off)) != (ssize_t)mp->pagesize) { 217 switch (nr) { 218 case -1: 219 /* errno is set for us by pread(). */ 220 free(bp); 221 mp->curcache--; 222 return (NULL); 223 case 0: 224 /* 225 * A zero-length read means you need to create a 226 * new page. 227 */ 228 memset(bp->page, 0, mp->pagesize); 229 break; 230 default: 231 /* A partial read is definitely bad. */ 232 free(bp); 233 mp->curcache--; 234 errno = EINVAL; 235 return (NULL); 236 } 237 } 238 #ifdef STATISTICS 239 ++mp->pageread; 240 #endif 241 242 /* Set the page number, pin the page. */ 243 bp->pgno = pgno; 244 if (!(flags & MPOOL_IGNOREPIN)) 245 bp->flags = MPOOL_PINNED; 246 bp->flags |= MPOOL_INUSE; 247 248 /* 249 * Add the page to the head of the hash chain and the tail 250 * of the lru chain. 251 */ 252 head = &mp->hqh[HASHKEY(bp->pgno)]; 253 TAILQ_INSERT_HEAD(head, bp, hq); 254 TAILQ_INSERT_TAIL(&mp->lqh, bp, q); 255 256 /* Run through the user's filter. */ 257 if (mp->pgin != NULL) 258 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page); 259 260 return (bp->page); 261 } 262 263 /* 264 * mpool_put 265 * Return a page. 266 */ 267 /* ARGSUSED */ 268 int 269 mpool_put(MPOOL *mp, void *page, u_int flags) 270 { 271 BKT *bp; 272 273 #ifdef STATISTICS 274 ++mp->pageput; 275 #endif 276 bp = (BKT *)((char *)page - sizeof(BKT)); 277 #ifdef DEBUG 278 if (!(bp->flags & MPOOL_PINNED)) { 279 (void)fprintf(stderr, 280 "mpool_put: page %d not pinned\n", bp->pgno); 281 abort(); 282 } 283 #endif 284 bp->flags &= ~MPOOL_PINNED; 285 if (flags & MPOOL_DIRTY) 286 bp->flags |= flags & MPOOL_DIRTY; 287 return (RET_SUCCESS); 288 } 289 290 /* 291 * mpool_close 292 * Close the buffer pool. 293 */ 294 int 295 mpool_close(MPOOL *mp) 296 { 297 BKT *bp; 298 299 /* Free up any space allocated to the lru pages. */ 300 while (!TAILQ_EMPTY(&mp->lqh)) { 301 bp = TAILQ_FIRST(&mp->lqh); 302 TAILQ_REMOVE(&mp->lqh, bp, q); 303 free(bp); 304 } 305 306 /* Free the MPOOL cookie. */ 307 free(mp); 308 return (RET_SUCCESS); 309 } 310 311 /* 312 * mpool_sync 313 * Sync the pool to disk. 314 */ 315 int 316 mpool_sync(MPOOL *mp) 317 { 318 BKT *bp; 319 320 /* Walk the lru chain, flushing any dirty pages to disk. */ 321 TAILQ_FOREACH(bp, &mp->lqh, q) 322 if (bp->flags & MPOOL_DIRTY && 323 mpool_write(mp, bp) == RET_ERROR) 324 return (RET_ERROR); 325 326 /* Sync the file descriptor. */ 327 return (_fsync(mp->fd) ? RET_ERROR : RET_SUCCESS); 328 } 329 330 /* 331 * mpool_bkt 332 * Get a page from the cache (or create one). 333 */ 334 static BKT * 335 mpool_bkt(MPOOL *mp) 336 { 337 struct _hqh *head; 338 BKT *bp; 339 340 /* If under the max cached, always create a new page. */ 341 if (mp->curcache < mp->maxcache) 342 goto new; 343 344 /* 345 * If the cache is max'd out, walk the lru list for a buffer we 346 * can flush. If we find one, write it (if necessary) and take it 347 * off any lists. If we don't find anything we grow the cache anyway. 348 * The cache never shrinks. 349 */ 350 TAILQ_FOREACH(bp, &mp->lqh, q) 351 if (!(bp->flags & MPOOL_PINNED)) { 352 /* Flush if dirty. */ 353 if (bp->flags & MPOOL_DIRTY && 354 mpool_write(mp, bp) == RET_ERROR) 355 return (NULL); 356 #ifdef STATISTICS 357 ++mp->pageflush; 358 #endif 359 /* Remove from the hash and lru queues. */ 360 head = &mp->hqh[HASHKEY(bp->pgno)]; 361 TAILQ_REMOVE(head, bp, hq); 362 TAILQ_REMOVE(&mp->lqh, bp, q); 363 #ifdef DEBUG 364 { void *spage; 365 spage = bp->page; 366 memset(bp, 0xff, sizeof(BKT) + mp->pagesize); 367 bp->page = spage; 368 } 369 #endif 370 bp->flags = 0; 371 return (bp); 372 } 373 374 new: if ((bp = (BKT *)calloc(1, sizeof(BKT) + mp->pagesize)) == NULL) 375 return (NULL); 376 #ifdef STATISTICS 377 ++mp->pagealloc; 378 #endif 379 bp->page = (char *)bp + sizeof(BKT); 380 bp->flags = 0; 381 ++mp->curcache; 382 return (bp); 383 } 384 385 /* 386 * mpool_write 387 * Write a page to disk. 388 */ 389 static int 390 mpool_write(MPOOL *mp, BKT *bp) 391 { 392 off_t off; 393 394 #ifdef STATISTICS 395 ++mp->pagewrite; 396 #endif 397 398 /* Run through the user's filter. */ 399 if (mp->pgout) 400 (mp->pgout)(mp->pgcookie, bp->pgno, bp->page); 401 402 off = mp->pagesize * bp->pgno; 403 if (pwrite(mp->fd, bp->page, mp->pagesize, off) != (ssize_t)mp->pagesize) 404 return (RET_ERROR); 405 406 /* 407 * Re-run through the input filter since this page may soon be 408 * accessed via the cache, and whatever the user's output filter 409 * did may screw things up if we don't let the input filter 410 * restore the in-core copy. 411 */ 412 if (mp->pgin) 413 (mp->pgin)(mp->pgcookie, bp->pgno, bp->page); 414 415 bp->flags &= ~MPOOL_DIRTY; 416 return (RET_SUCCESS); 417 } 418 419 /* 420 * mpool_look 421 * Lookup a page in the cache. 422 */ 423 static BKT * 424 mpool_look(MPOOL *mp, pgno_t pgno) 425 { 426 struct _hqh *head; 427 BKT *bp; 428 429 head = &mp->hqh[HASHKEY(pgno)]; 430 TAILQ_FOREACH(bp, head, hq) 431 if ((bp->pgno == pgno) && 432 ((bp->flags & MPOOL_INUSE) == MPOOL_INUSE)) { 433 #ifdef STATISTICS 434 ++mp->cachehit; 435 #endif 436 return (bp); 437 } 438 #ifdef STATISTICS 439 ++mp->cachemiss; 440 #endif 441 return (NULL); 442 } 443 444 #ifdef STATISTICS 445 /* 446 * mpool_stat 447 * Print out cache statistics. 448 */ 449 void 450 mpool_stat(MPOOL *mp) 451 { 452 BKT *bp; 453 int cnt; 454 char *sep; 455 456 (void)fprintf(stderr, "%lu pages in the file\n", mp->npages); 457 (void)fprintf(stderr, 458 "page size %lu, cacheing %lu pages of %lu page max cache\n", 459 mp->pagesize, mp->curcache, mp->maxcache); 460 (void)fprintf(stderr, "%lu page puts, %lu page gets, %lu page new\n", 461 mp->pageput, mp->pageget, mp->pagenew); 462 (void)fprintf(stderr, "%lu page allocs, %lu page flushes\n", 463 mp->pagealloc, mp->pageflush); 464 if (mp->cachehit + mp->cachemiss) 465 (void)fprintf(stderr, 466 "%.0f%% cache hit rate (%lu hits, %lu misses)\n", 467 ((double)mp->cachehit / (mp->cachehit + mp->cachemiss)) 468 * 100, mp->cachehit, mp->cachemiss); 469 (void)fprintf(stderr, "%lu page reads, %lu page writes\n", 470 mp->pageread, mp->pagewrite); 471 472 sep = ""; 473 cnt = 0; 474 TAILQ_FOREACH(bp, &mp->lqh, q) { 475 (void)fprintf(stderr, "%s%d", sep, bp->pgno); 476 if (bp->flags & MPOOL_DIRTY) 477 (void)fprintf(stderr, "d"); 478 if (bp->flags & MPOOL_PINNED) 479 (void)fprintf(stderr, "P"); 480 if (++cnt == 10) { 481 sep = "\n"; 482 cnt = 0; 483 } else 484 sep = ", "; 485 486 } 487 (void)fprintf(stderr, "\n"); 488 } 489 #endif 490